Repository: tiiuae/sbomnix Branch: main Commit: 6fb9c6707ff3 Files: 186 Total size: 903.4 KB Directory structure: gitextract_rba_m7yr/ ├── .envrc ├── .github/ │ ├── dependabot.yml │ └── workflows/ │ ├── codeql.yml │ ├── dependency-review.yml │ ├── release_sbomnix.yml │ ├── scorecards.yml │ └── test_sbomnix.yml ├── .gitignore ├── .gitlint ├── LICENSES/ │ ├── Apache-2.0.txt │ ├── BSD-3-Clause.txt │ ├── CC-BY-3.0.txt │ ├── CC-BY-SA-4.0.txt │ └── MIT.txt ├── README.md ├── REUSE.toml ├── VERSION ├── default.nix ├── doc/ │ ├── nix_outdated.md │ ├── nixgraph.md │ ├── nixmeta.md │ ├── provenance.md │ ├── repology_cli.md │ └── vulnxscan.md ├── flake.nix ├── nix/ │ ├── apps.nix │ ├── default.nix │ ├── formatter.nix │ ├── git-hooks.nix │ └── packages.nix ├── pyproject.toml ├── pyrightconfig.json ├── pytest.ini ├── scripts/ │ ├── check-fast.sh │ ├── check-full.sh │ ├── release-asset.sh │ └── run-pytest-lane.sh ├── shell.nix ├── src/ │ ├── common/ │ │ ├── __init__.py │ │ ├── cli_args.py │ │ ├── columns.py │ │ ├── df.py │ │ ├── errors.py │ │ ├── flakeref.py │ │ ├── http.py │ │ ├── log.py │ │ ├── nix_utils.py │ │ ├── package_names.py │ │ ├── pkgmeta.py │ │ ├── proc.py │ │ ├── regex.py │ │ ├── spdx.py │ │ └── versioning.py │ ├── nixgraph/ │ │ ├── __init__.py │ │ ├── graph.py │ │ ├── main.py │ │ └── render.py │ ├── nixmeta/ │ │ ├── __init__.py │ │ ├── flake_metadata.py │ │ ├── main.py │ │ ├── metadata_json.py │ │ └── scanner.py │ ├── nixupdate/ │ │ ├── __init__.py │ │ ├── nix_outdated.py │ │ ├── nix_visualize.py │ │ ├── pipeline.py │ │ └── report.py │ ├── provenance/ │ │ ├── __init__.py │ │ ├── dependencies.py │ │ ├── digests.py │ │ ├── main.py │ │ ├── nix_commands.py │ │ ├── path_info.py │ │ ├── schema.py │ │ └── subjects.py │ ├── repology/ │ │ ├── __init__.py │ │ ├── adapter.py │ │ ├── cves.py │ │ ├── exceptions.py │ │ ├── projects_parser.py │ │ ├── repology_cli.py │ │ ├── repology_cve.py │ │ ├── reporting.py │ │ ├── sbom.py │ │ └── session.py │ ├── sbomnix/ │ │ ├── __init__.py │ │ ├── builder.py │ │ ├── cdx.py │ │ ├── cli_utils.py │ │ ├── closure.py │ │ ├── components.py │ │ ├── cpe.py │ │ ├── dependency_index.py │ │ ├── derivation.py │ │ ├── derivers.py │ │ ├── dfcache.py │ │ ├── exporters.py │ │ ├── main.py │ │ ├── meta.py │ │ ├── meta_source.py │ │ ├── runtime.py │ │ └── vuln_enrichment.py │ └── vulnxscan/ │ ├── __init__.py │ ├── github_prs.py │ ├── osv.py │ ├── osv_client.py │ ├── parsers.py │ ├── repology_lookup.py │ ├── reporting.py │ ├── scanners.py │ ├── triage.py │ ├── utils.py │ ├── vulnscan.py │ ├── vulnxscan_cli.py │ └── whitelist.py └── tests/ ├── __init__.py ├── compare_deps.py ├── compare_sboms.py ├── conftest.py ├── integration/ │ ├── __init__.py │ ├── test_nixgraph_cli.py │ ├── test_nixmeta_cli.py │ ├── test_nixupdate_cli.py │ ├── test_provenance_cli.py │ ├── test_repology_cli.py │ ├── test_sbomnix_cli.py │ └── test_vulnxscan_cli.py ├── resources/ │ ├── README.md │ ├── cdx_bom-1.3.schema.json │ ├── cdx_bom-1.4.schema.json │ ├── grype-test-db.tar.gz.license │ ├── jsf-0.82.schema.json │ ├── make_grype_test_db.py │ ├── nixmeta-package-set.nix │ ├── provenance-1.0.schema.json │ ├── repology/ │ │ ├── cves_openssl.html │ │ ├── projects_empty.html │ │ └── projects_hello.html │ ├── sample_cdx_sbom.json │ ├── spdx.schema.json │ ├── spdx_bom-2.3.schema.json │ └── test-derivation-chain.nix ├── test_builder_runtime.py ├── test_buildtime_closure.py ├── test_cli_conventions.py ├── test_cli_error_boundaries.py ├── test_cli_smoke.py ├── test_common_log.py ├── test_common_versioning.py ├── test_compare_deps.py ├── test_components.py ├── test_cpe.py ├── test_dependency_index.py ├── test_derivation_hardening.py ├── test_flakeref_resolution.py ├── test_library_exceptions.py ├── test_nix_cli_argv.py ├── test_nix_outdated_pipeline.py ├── test_nix_target_resolution.py ├── test_nix_utils_parsing.py ├── test_nixgraph_graph.py ├── test_nixmeta_parsing.py ├── test_nixmeta_progress.py ├── test_nixmeta_source.py ├── test_nixmeta_source_export.py ├── test_osv_client.py ├── test_provenance_batching.py ├── test_provenance_path_info.py ├── test_provenance_subjects.py ├── test_repology_adapter.py ├── test_repology_cve.py ├── test_repology_projects_parser.py ├── test_repology_sbom.py ├── test_runtime_closure.py ├── test_sbom_closure.py ├── test_sbom_vuln_enrichment.py ├── test_schema_validation.py ├── test_store_batching.py ├── test_temp_sbom_generation.py ├── test_vulnix_test_support.py ├── test_vulnxscan_engine.py ├── test_vulnxscan_triage.py ├── test_whitelist.py ├── testpaths.py ├── testutils.py └── vulnix_test_support.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .envrc ================================================ #! /usr/bin/env bash # SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 # try to use flake initially, fallback to non-flake use otherwise if nix flake show &> /dev/null; then use flake else use nix fi ================================================ FILE: .github/dependabot.yml ================================================ version: 2 updates: - package-ecosystem: github-actions directory: / schedule: interval: daily ================================================ FILE: .github/workflows/codeql.yml ================================================ # For most projects, this workflow file will not need changing; you simply need # to commit it to your repository. # # You may wish to alter this file to override the set of languages analyzed, # or to provide custom queries or build logic. # # ******** NOTE ******** # We have attempted to detect the languages in your repository. Please check # the `language` matrix defined below to confirm you have the correct set of # supported CodeQL languages. # name: "CodeQL" on: push: branches: ["main"] pull_request: # The branches below must be a subset of the branches above branches: ["main"] schedule: - cron: "0 0 * * 1" permissions: contents: read jobs: analyze: name: Analyze runs-on: ubuntu-latest permissions: actions: read contents: read security-events: write strategy: fail-fast: false matrix: language: ["python"] # CodeQL supports [ $supported-codeql-languages ] # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support steps: - name: Harden the runner (Audit all outbound calls) uses: step-security/harden-runner@8d3c67de8e2fe68ef647c8db1e6a09f647780f40 # v2.19.0 with: egress-policy: audit - name: Checkout repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL uses: github/codeql-action/init@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. # By default, queries listed here will override any specified in a config file. # Prefix the list here with "+" to use these queries and those in the config file. # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild uses: github/codeql-action/autobuild@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2 # ℹ️ Command-line programs to run using the OS shell. # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun # If the Autobuild fails above, remove it and uncomment the following three lines. # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. # - run: | # echo "Run, Build Application using script" # ./location_of_script_within_repo/buildscript.sh - name: Perform CodeQL Analysis uses: github/codeql-action/analyze@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2 with: category: "/language:${{matrix.language}}" ================================================ FILE: .github/workflows/dependency-review.yml ================================================ # Dependency Review Action # # This Action will scan dependency manifest files that change as part of a Pull Request, # surfacing known-vulnerable versions of the packages declared or updated in the PR. # Once installed, if the workflow run is marked as required, # PRs introducing known-vulnerable packages will be blocked from merging. # # Source repository: https://github.com/actions/dependency-review-action name: 'Dependency Review' on: [pull_request] permissions: contents: read jobs: dependency-review: runs-on: ubuntu-latest steps: - name: Harden the runner (Audit all outbound calls) uses: step-security/harden-runner@8d3c67de8e2fe68ef647c8db1e6a09f647780f40 # v2.19.0 with: egress-policy: audit - name: 'Checkout Repository' uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: 'Dependency Review' uses: actions/dependency-review-action@2031cfc080254a8a887f58cffee85186f0e49e48 # v4.9.0 ================================================ FILE: .github/workflows/release_sbomnix.yml ================================================ # SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 name: Upload Release Asset on: push: # Run on push events where tags match v*, e.g. v1.3.0 tags: - 'v*' permissions: contents: read jobs: build: name: Upload Release Asset runs-on: ubuntu-latest permissions: contents: write steps: - name: Harden the runner (Audit all outbound calls) uses: step-security/harden-runner@8d3c67de8e2fe68ef647c8db1e6a09f647780f40 # v2.19.0 with: egress-policy: audit - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - uses: cachix/install-nix-action@ab739621df7a23f52766f9ccc97f38da6b7af14f # v31.10.5 with: nix_path: nixpkgs=channel:nixpkgs-unstable - name: Build release asset run: ./scripts/release-asset.sh - name: Upload release asset uses: svenstaro/upload-release-action@29e53e917877a24fad85510ded594ab3c9ca12de # v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} file: build/sbom* tag: ${{ github.ref }} overwrite: true file_glob: true ================================================ FILE: .github/workflows/scorecards.yml ================================================ # This workflow uses actions that are not certified by GitHub. They are provided # by a third-party and are governed by separate terms of service, privacy # policy, and support documentation. name: Scorecard supply-chain security on: # For Branch-Protection check. Only the default branch is supported. See # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection branch_protection_rule: # To guarantee Maintained check is occasionally updated. See # https://github.com/ossf/scorecard/blob/main/docs/checks.md#maintained schedule: - cron: '20 7 * * 2' push: branches: ["main"] # Declare default permissions as read only. permissions: read-all jobs: analysis: name: Scorecard analysis runs-on: ubuntu-latest permissions: # Needed to upload the results to code-scanning dashboard. security-events: write # Needed to publish results and get a badge (see publish_results below). id-token: write contents: read actions: read # To allow GraphQL ListCommits to work issues: read pull-requests: read # To detect SAST tools checks: read steps: - name: Harden the runner (Audit all outbound calls) uses: step-security/harden-runner@8d3c67de8e2fe68ef647c8db1e6a09f647780f40 # v2.19.0 with: egress-policy: audit - name: "Checkout code" uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: persist-credentials: false - name: "Run analysis" uses: ossf/scorecard-action@4eaacf0543bb3f2c246792bd56e8cdeffafb205a # v2.4.3 with: results_file: results.sarif results_format: sarif # (Optional) "write" PAT token. Uncomment the `repo_token` line below if: # - you want to enable the Branch-Protection check on a *public* repository, or # - you are installing Scorecards on a *private* repository # To create the PAT, follow the steps in https://github.com/ossf/scorecard-action#authentication-with-pat. # repo_token: ${{ secrets.SCORECARD_TOKEN }} # Public repositories: # - Publish results to OpenSSF REST API for easy access by consumers # - Allows the repository to include the Scorecard badge. # - See https://github.com/ossf/scorecard-action#publishing-results. # For private repositories: # - `publish_results` will always be set to `false`, regardless # of the value entered here. publish_results: true # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF # format to the repository Actions tab. - name: "Upload artifact" uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: name: SARIF file path: results.sarif retention-days: 5 # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" uses: github/codeql-action/upload-sarif@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2 with: sarif_file: results.sarif ================================================ FILE: .github/workflows/test_sbomnix.yml ================================================ # SPDX-FileCopyrightText: 2022-2023 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 name: sbomnix checks on: push: branches: - main pull_request: branches: - main workflow_dispatch: concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true permissions: contents: read jobs: full: name: full lane (${{ matrix.os }}) strategy: fail-fast: false matrix: os: [ubuntu-latest, macos-latest] runs-on: ${{ matrix.os }} steps: - name: Harden the runner (Audit all outbound calls) uses: step-security/harden-runner@8d3c67de8e2fe68ef647c8db1e6a09f647780f40 # v2.19.0 with: egress-policy: audit - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - uses: cachix/install-nix-action@ab739621df7a23f52766f9ccc97f38da6b7af14f # v31.10.5 with: nix_path: nixpkgs=channel:nixpkgs-unstable - name: Print nix version run: nix --version - name: Run full checks run: ./scripts/check-full.sh ================================================ FILE: .gitignore ================================================ # SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 temp/ venv/ build/ *.egg-info/ .eggs/ __pycache__/ .coverage .coverage.* .hypothesis/ coverage.xml htmlcov/ sbomnix_test_data/ result *.py[cod] *.sqlite *.csv /*.log /*.json !/pyrightconfig.json /*.png /*.jpg /*.pdf /*.dot /*.svg .idea .direnv .pre-commit-config.yaml ================================================ FILE: .gitlint ================================================ # SPDX-FileCopyrightText: 2025 TII (SSRC) and the Ghaf contributors # SPDX-License-Identifier: Apache-2.0 [general] # Ignore rules, reference them by id or name (comma-separated) # https://jorisroovers.com/gitlint/latest/rules/builtin_rules/ ignore=body-is-missing # Enable specific community contributed rules # https://jorisroovers.com/gitlint/latest/rules/contrib_rules/#available-contrib-rules contrib=contrib-body-requires-signed-off-by ================================================ FILE: LICENSES/Apache-2.0.txt ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: LICENSES/BSD-3-Clause.txt ================================================ Copyright (c) . Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: LICENSES/CC-BY-3.0.txt ================================================ Creative Commons Attribution 3.0 Unported CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE LEGAL SERVICES. DISTRIBUTION OF THIS LICENSE DOES NOT CREATE AN ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES REGARDING THE INFORMATION PROVIDED, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM ITS USE. License THE WORK (AS DEFINED BELOW) IS PROVIDED UNDER THE TERMS OF THIS CREATIVE COMMONS PUBLIC LICENSE ("CCPL" OR "LICENSE"). THE WORK IS PROTECTED BY COPYRIGHT AND/OR OTHER APPLICABLE LAW. ANY USE OF THE WORK OTHER THAN AS AUTHORIZED UNDER THIS LICENSE OR COPYRIGHT LAW IS PROHIBITED. BY EXERCISING ANY RIGHTS TO THE WORK PROVIDED HERE, YOU ACCEPT AND AGREE TO BE BOUND BY THE TERMS OF THIS LICENSE. TO THE EXTENT THIS LICENSE MAY BE CONSIDERED TO BE A CONTRACT, THE LICENSOR GRANTS YOU THE RIGHTS CONTAINED HERE IN CONSIDERATION OF YOUR ACCEPTANCE OF SUCH TERMS AND CONDITIONS. 1. Definitions a. "Adaptation" means a work based upon the Work, or upon the Work and other pre-existing works, such as a translation, adaptation, derivative work, arrangement of music or other alterations of a literary or artistic work, or phonogram or performance and includes cinematographic adaptations or any other form in which the Work may be recast, transformed, or adapted including in any form recognizably derived from the original, except that a work that constitutes a Collection will not be considered an Adaptation for the purpose of this License. For the avoidance of doubt, where the Work is a musical work, performance or phonogram, the synchronization of the Work in timed-relation with a moving image ("synching") will be considered an Adaptation for the purpose of this License. b. "Collection" means a collection of literary or artistic works, such as encyclopedias and anthologies, or performances, phonograms or broadcasts, or other works or subject matter other than works listed in Section 1(f) below, which, by reason of the selection and arrangement of their contents, constitute intellectual creations, in which the Work is included in its entirety in unmodified form along with one or more other contributions, each constituting separate and independent works in themselves, which together are assembled into a collective whole. A work that constitutes a Collection will not be considered an Adaptation (as defined above) for the purposes of this License. c. "Distribute" means to make available to the public the original and copies of the Work or Adaptation, as appropriate, through sale or other transfer of ownership. d. "Licensor" means the individual, individuals, entity or entities that offer(s) the Work under the terms of this License. e. "Original Author" means, in the case of a literary or artistic work, the individual, individuals, entity or entities who created the Work or if no individual or entity can be identified, the publisher; and in addition (i) in the case of a performance the actors, singers, musicians, dancers, and other persons who act, sing, deliver, declaim, play in, interpret or otherwise perform literary or artistic works or expressions of folklore; (ii) in the case of a phonogram the producer being the person or legal entity who first fixes the sounds of a performance or other sounds; and, (iii) in the case of broadcasts, the organization that transmits the broadcast. f. "Work" means the literary and/or artistic work offered under the terms of this License including without limitation any production in the literary, scientific and artistic domain, whatever may be the mode or form of its expression including digital form, such as a book, pamphlet and other writing; a lecture, address, sermon or other work of the same nature; a dramatic or dramatico-musical work; a choreographic work or entertainment in dumb show; a musical composition with or without words; a cinematographic work to which are assimilated works expressed by a process analogous to cinematography; a work of drawing, painting, architecture, sculpture, engraving or lithography; a photographic work to which are assimilated works expressed by a process analogous to photography; a work of applied art; an illustration, map, plan, sketch or three-dimensional work relative to geography, topography, architecture or science; a performance; a broadcast; a phonogram; a compilation of data to the extent it is protected as a copyrightable work; or a work performed by a variety or circus performer to the extent it is not otherwise considered a literary or artistic work. g. "You" means an individual or entity exercising rights under this License who has not previously violated the terms of this License with respect to the Work, or who has received express permission from the Licensor to exercise rights under this License despite a previous violation. h. "Publicly Perform" means to perform public recitations of the Work and to communicate to the public those public recitations, by any means or process, including by wire or wireless means or public digital performances; to make available to the public Works in such a way that members of the public may access these Works from a place and at a place individually chosen by them; to perform the Work to the public by any means or process and the communication to the public of the performances of the Work, including by public digital performance; to broadcast and rebroadcast the Work by any means including signs, sounds or images. i. "Reproduce" means to make copies of the Work by any means including without limitation by sound or visual recordings and the right of fixation and reproducing fixations of the Work, including storage of a protected performance or phonogram in digital form or other electronic medium. 2. Fair Dealing Rights. Nothing in this License is intended to reduce, limit, or restrict any uses free from copyright or rights arising from limitations or exceptions that are provided for in connection with the copyright protection under copyright law or other applicable laws. 3. License Grant. Subject to the terms and conditions of this License, Licensor hereby grants You a worldwide, royalty-free, non-exclusive, perpetual (for the duration of the applicable copyright) license to exercise the rights in the Work as stated below: a. to Reproduce the Work, to incorporate the Work into one or more Collections, and to Reproduce the Work as incorporated in the Collections; b. to create and Reproduce Adaptations provided that any such Adaptation, including any translation in any medium, takes reasonable steps to clearly label, demarcate or otherwise identify that changes were made to the original Work. For example, a translation could be marked "The original work was translated from English to Spanish," or a modification could indicate "The original work has been modified."; c. to Distribute and Publicly Perform the Work including as incorporated in Collections; and, d. to Distribute and Publicly Perform Adaptations. e. For the avoidance of doubt: i. Non-waivable Compulsory License Schemes. In those jurisdictions in which the right to collect royalties through any statutory or compulsory licensing scheme cannot be waived, the Licensor reserves the exclusive right to collect such royalties for any exercise by You of the rights granted under this License; ii. Waivable Compulsory License Schemes. In those jurisdictions in which the right to collect royalties through any statutory or compulsory licensing scheme can be waived, the Licensor waives the exclusive right to collect such royalties for any exercise by You of the rights granted under this License; and, iii. Voluntary License Schemes. The Licensor waives the right to collect royalties, whether individually or, in the event that the Licensor is a member of a collecting society that administers voluntary licensing schemes, via that society, from any exercise by You of the rights granted under this License. The above rights may be exercised in all media and formats whether now known or hereafter devised. The above rights include the right to make such modifications as are technically necessary to exercise the rights in other media and formats. Subject to Section 8(f), all rights not expressly granted by Licensor are hereby reserved. 4. Restrictions. The license granted in Section 3 above is expressly made subject to and limited by the following restrictions: a. You may Distribute or Publicly Perform the Work only under the terms of this License. You must include a copy of, or the Uniform Resource Identifier (URI) for, this License with every copy of the Work You Distribute or Publicly Perform. You may not offer or impose any terms on the Work that restrict the terms of this License or the ability of the recipient of the Work to exercise the rights granted to that recipient under the terms of the License. You may not sublicense the Work. You must keep intact all notices that refer to this License and to the disclaimer of warranties with every copy of the Work You Distribute or Publicly Perform. When You Distribute or Publicly Perform the Work, You may not impose any effective technological measures on the Work that restrict the ability of a recipient of the Work from You to exercise the rights granted to that recipient under the terms of the License. This Section 4(a) applies to the Work as incorporated in a Collection, but this does not require the Collection apart from the Work itself to be made subject to the terms of this License. If You create a Collection, upon notice from any Licensor You must, to the extent practicable, remove from the Collection any credit as required by Section 4(b), as requested. If You create an Adaptation, upon notice from any Licensor You must, to the extent practicable, remove from the Adaptation any credit as required by Section 4(b), as requested. b. If You Distribute, or Publicly Perform the Work or any Adaptations or Collections, You must, unless a request has been made pursuant to Section 4(a), keep intact all copyright notices for the Work and provide, reasonable to the medium or means You are utilizing: (i) the name of the Original Author (or pseudonym, if applicable) if supplied, and/or if the Original Author and/or Licensor designate another party or parties (e.g., a sponsor institute, publishing entity, journal) for attribution ("Attribution Parties") in Licensor's copyright notice, terms of service or by other reasonable means, the name of such party or parties; (ii) the title of the Work if supplied; (iii) to the extent reasonably practicable, the URI, if any, that Licensor specifies to be associated with the Work, unless such URI does not refer to the copyright notice or licensing information for the Work; and (iv) , consistent with Section 3(b), in the case of an Adaptation, a credit identifying the use of the Work in the Adaptation (e.g., "French translation of the Work by Original Author," or "Screenplay based on original Work by Original Author"). The credit required by this Section 4 (b) may be implemented in any reasonable manner; provided, however, that in the case of a Adaptation or Collection, at a minimum such credit will appear, if a credit for all contributing authors of the Adaptation or Collection appears, then as part of these credits and in a manner at least as prominent as the credits for the other contributing authors. For the avoidance of doubt, You may only use the credit required by this Section for the purpose of attribution in the manner set out above and, by exercising Your rights under this License, You may not implicitly or explicitly assert or imply any connection with, sponsorship or endorsement by the Original Author, Licensor and/or Attribution Parties, as appropriate, of You or Your use of the Work, without the separate, express prior written permission of the Original Author, Licensor and/or Attribution Parties. c. Except as otherwise agreed in writing by the Licensor or as may be otherwise permitted by applicable law, if You Reproduce, Distribute or Publicly Perform the Work either by itself or as part of any Adaptations or Collections, You must not distort, mutilate, modify or take other derogatory action in relation to the Work which would be prejudicial to the Original Author's honor or reputation. Licensor agrees that in those jurisdictions (e.g. Japan), in which any exercise of the right granted in Section 3(b) of this License (the right to make Adaptations) would be deemed to be a distortion, mutilation, modification or other derogatory action prejudicial to the Original Author's honor and reputation, the Licensor will waive or not assert, as appropriate, this Section, to the fullest extent permitted by the applicable national law, to enable You to reasonably exercise Your right under Section 3(b) of this License (right to make Adaptations) but not otherwise. 5. Representations, Warranties and Disclaimer UNLESS OTHERWISE MUTUALLY AGREED TO BY THE PARTIES IN WRITING, LICENSOR OFFERS THE WORK AS-IS AND MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE WORK, EXPRESS, IMPLIED, STATUTORY OR OTHERWISE, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF TITLE, MERCHANTIBILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, ACCURACY, OR THE PRESENCE OF ABSENCE OF ERRORS, WHETHER OR NOT DISCOVERABLE. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OF IMPLIED WARRANTIES, SO SUCH EXCLUSION MAY NOT APPLY TO YOU. 6. Limitation on Liability. EXCEPT TO THE EXTENT REQUIRED BY APPLICABLE LAW, IN NO EVENT WILL LICENSOR BE LIABLE TO YOU ON ANY LEGAL THEORY FOR ANY SPECIAL, INCIDENTAL, CONSEQUENTIAL, PUNITIVE OR EXEMPLARY DAMAGES ARISING OUT OF THIS LICENSE OR THE USE OF THE WORK, EVEN IF LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 7. Termination a. This License and the rights granted hereunder will terminate automatically upon any breach by You of the terms of this License. Individuals or entities who have received Adaptations or Collections from You under this License, however, will not have their licenses terminated provided such individuals or entities remain in full compliance with those licenses. Sections 1, 2, 5, 6, 7, and 8 will survive any termination of this License. b. Subject to the above terms and conditions, the license granted here is perpetual (for the duration of the applicable copyright in the Work). Notwithstanding the above, Licensor reserves the right to release the Work under different license terms or to stop distributing the Work at any time; provided, however that any such election will not serve to withdraw this License (or any other license that has been, or is required to be, granted under the terms of this License), and this License will continue in full force and effect unless terminated as stated above. 8. Miscellaneous a. Each time You Distribute or Publicly Perform the Work or a Collection, the Licensor offers to the recipient a license to the Work on the same terms and conditions as the license granted to You under this License. b. Each time You Distribute or Publicly Perform an Adaptation, Licensor offers to the recipient a license to the original Work on the same terms and conditions as the license granted to You under this License. c. If any provision of this License is invalid or unenforceable under applicable law, it shall not affect the validity or enforceability of the remainder of the terms of this License, and without further action by the parties to this agreement, such provision shall be reformed to the minimum extent necessary to make such provision valid and enforceable. d. No term or provision of this License shall be deemed waived and no breach consented to unless such waiver or consent shall be in writing and signed by the party to be charged with such waiver or consent. This License constitutes the entire agreement between the parties with respect to the Work licensed here. There are no understandings, agreements or representations with respect to the Work not specified here. Licensor shall not be bound by any additional provisions that may appear in any communication from You. e. This License may not be modified without the mutual written agreement of the Licensor and You. f. The rights granted under, and the subject matter referenced, in this License were drafted utilizing the terminology of the Berne Convention for the Protection of Literary and Artistic Works (as amended on September 28, 1979), the Rome Convention of 1961, the WIPO Copyright Treaty of 1996, the WIPO Performances and Phonograms Treaty of 1996 and the Universal Copyright Convention (as revised on July 24, 1971). These rights and subject matter take effect in the relevant jurisdiction in which the License terms are sought to be enforced according to the corresponding provisions of the implementation of those treaty provisions in the applicable national law. If the standard suite of rights granted under applicable copyright law includes additional rights not granted under this License, such additional rights are deemed to be included in the License; this License is not intended to restrict the license of any rights under applicable law. Creative Commons Notice Creative Commons is not a party to this License, and makes no warranty whatsoever in connection with the Work. Creative Commons will not be liable to You or any party on any legal theory for any damages whatsoever, including without limitation any general, special, incidental or consequential damages arising in connection to this license. Notwithstanding the foregoing two (2) sentences, if Creative Commons has expressly identified itself as the Licensor hereunder, it shall have all rights and obligations of Licensor. Except for the limited purpose of indicating to the public that the Work is licensed under the CCPL, Creative Commons does not authorize the use by either party of the trademark "Creative Commons" or any related trademark or logo of Creative Commons without the prior written consent of Creative Commons. Any permitted use will be in compliance with Creative Commons' then-current trademark usage guidelines, as may be published on its website or otherwise made available upon request from time to time. For the avoidance of doubt, this trademark restriction does not form part of this License. Creative Commons may be contacted at http://creativecommons.org/. ================================================ FILE: LICENSES/CC-BY-SA-4.0.txt ================================================ Creative Commons Attribution-ShareAlike 4.0 International Creative Commons Corporation (“Creative Commons”) is not a law firm and does not provide legal services or legal advice. Distribution of Creative Commons public licenses does not create a lawyer-client or other relationship. Creative Commons makes its licenses and related information available on an “as-is” basis. Creative Commons gives no warranties regarding its licenses, any material licensed under their terms and conditions, or any related information. Creative Commons disclaims all liability for damages resulting from their use to the fullest extent possible. Using Creative Commons Public Licenses Creative Commons public licenses provide a standard set of terms and conditions that creators and other rights holders may use to share original works of authorship and other material subject to copyright and certain other rights specified in the public license below. The following considerations are for informational purposes only, are not exhaustive, and do not form part of our licenses. Considerations for licensors: Our public licenses are intended for use by those authorized to give the public permission to use material in ways otherwise restricted by copyright and certain other rights. Our licenses are irrevocable. Licensors should read and understand the terms and conditions of the license they choose before applying it. Licensors should also secure all rights necessary before applying our licenses so that the public can reuse the material as expected. Licensors should clearly mark any material not subject to the license. This includes other CC-licensed material, or material used under an exception or limitation to copyright. More considerations for licensors. Considerations for the public: By using one of our public licenses, a licensor grants the public permission to use the licensed material under specified terms and conditions. If the licensor’s permission is not necessary for any reason–for example, because of any applicable exception or limitation to copyright–then that use is not regulated by the license. Our licenses grant only permissions under copyright and certain other rights that a licensor has authority to grant. Use of the licensed material may still be restricted for other reasons, including because others have copyright or other rights in the material. A licensor may make special requests, such as asking that all changes be marked or described. Although not required by our licenses, you are encouraged to respect those requests where reasonable. More considerations for the public. Creative Commons Attribution-ShareAlike 4.0 International Public License By exercising the Licensed Rights (defined below), You accept and agree to be bound by the terms and conditions of this Creative Commons Attribution-ShareAlike 4.0 International Public License ("Public License"). To the extent this Public License may be interpreted as a contract, You are granted the Licensed Rights in consideration of Your acceptance of these terms and conditions, and the Licensor grants You such rights in consideration of benefits the Licensor receives from making the Licensed Material available under these terms and conditions. Section 1 – Definitions. a. Adapted Material means material subject to Copyright and Similar Rights that is derived from or based upon the Licensed Material and in which the Licensed Material is translated, altered, arranged, transformed, or otherwise modified in a manner requiring permission under the Copyright and Similar Rights held by the Licensor. For purposes of this Public License, where the Licensed Material is a musical work, performance, or sound recording, Adapted Material is always produced where the Licensed Material is synched in timed relation with a moving image. b. Adapter's License means the license You apply to Your Copyright and Similar Rights in Your contributions to Adapted Material in accordance with the terms and conditions of this Public License. c. BY-SA Compatible License means a license listed at creativecommons.org/compatiblelicenses, approved by Creative Commons as essentially the equivalent of this Public License. d. Copyright and Similar Rights means copyright and/or similar rights closely related to copyright including, without limitation, performance, broadcast, sound recording, and Sui Generis Database Rights, without regard to how the rights are labeled or categorized. For purposes of this Public License, the rights specified in Section 2(b)(1)-(2) are not Copyright and Similar Rights. e. Effective Technological Measures means those measures that, in the absence of proper authority, may not be circumvented under laws fulfilling obligations under Article 11 of the WIPO Copyright Treaty adopted on December 20, 1996, and/or similar international agreements. f. Exceptions and Limitations means fair use, fair dealing, and/or any other exception or limitation to Copyright and Similar Rights that applies to Your use of the Licensed Material. g. License Elements means the license attributes listed in the name of a Creative Commons Public License. The License Elements of this Public License are Attribution and ShareAlike. h. Licensed Material means the artistic or literary work, database, or other material to which the Licensor applied this Public License. i. Licensed Rights means the rights granted to You subject to the terms and conditions of this Public License, which are limited to all Copyright and Similar Rights that apply to Your use of the Licensed Material and that the Licensor has authority to license. j. Licensor means the individual(s) or entity(ies) granting rights under this Public License. k. Share means to provide material to the public by any means or process that requires permission under the Licensed Rights, such as reproduction, public display, public performance, distribution, dissemination, communication, or importation, and to make material available to the public including in ways that members of the public may access the material from a place and at a time individually chosen by them. l. Sui Generis Database Rights means rights other than copyright resulting from Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, as amended and/or succeeded, as well as other essentially equivalent rights anywhere in the world. m. You means the individual or entity exercising the Licensed Rights under this Public License. Your has a corresponding meaning. Section 2 – Scope. a. License grant. 1. Subject to the terms and conditions of this Public License, the Licensor hereby grants You a worldwide, royalty-free, non-sublicensable, non-exclusive, irrevocable license to exercise the Licensed Rights in the Licensed Material to: A. reproduce and Share the Licensed Material, in whole or in part; and B. produce, reproduce, and Share Adapted Material. 2. Exceptions and Limitations. For the avoidance of doubt, where Exceptions and Limitations apply to Your use, this Public License does not apply, and You do not need to comply with its terms and conditions. 3. Term. The term of this Public License is specified in Section 6(a). 4. Media and formats; technical modifications allowed. The Licensor authorizes You to exercise the Licensed Rights in all media and formats whether now known or hereafter created, and to make technical modifications necessary to do so. The Licensor waives and/or agrees not to assert any right or authority to forbid You from making technical modifications necessary to exercise the Licensed Rights, including technical modifications necessary to circumvent Effective Technological Measures. For purposes of this Public License, simply making modifications authorized by this Section 2(a)(4) never produces Adapted Material. 5. Downstream recipients. A. Offer from the Licensor – Licensed Material. Every recipient of the Licensed Material automatically receives an offer from the Licensor to exercise the Licensed Rights under the terms and conditions of this Public License. B. Additional offer from the Licensor – Adapted Material. Every recipient of Adapted Material from You automatically receives an offer from the Licensor to exercise the Licensed Rights in the Adapted Material under the conditions of the Adapter’s License You apply. C. No downstream restrictions. You may not offer or impose any additional or different terms or conditions on, or apply any Effective Technological Measures to, the Licensed Material if doing so restricts exercise of the Licensed Rights by any recipient of the Licensed Material. 6. No endorsement. Nothing in this Public License constitutes or may be construed as permission to assert or imply that You are, or that Your use of the Licensed Material is, connected with, or sponsored, endorsed, or granted official status by, the Licensor or others designated to receive attribution as provided in Section 3(a)(1)(A)(i). b. Other rights. 1. Moral rights, such as the right of integrity, are not licensed under this Public License, nor are publicity, privacy, and/or other similar personality rights; however, to the extent possible, the Licensor waives and/or agrees not to assert any such rights held by the Licensor to the limited extent necessary to allow You to exercise the Licensed Rights, but not otherwise. 2. Patent and trademark rights are not licensed under this Public License. 3. To the extent possible, the Licensor waives any right to collect royalties from You for the exercise of the Licensed Rights, whether directly or through a collecting society under any voluntary or waivable statutory or compulsory licensing scheme. In all other cases the Licensor expressly reserves any right to collect such royalties. Section 3 – License Conditions. Your exercise of the Licensed Rights is expressly made subject to the following conditions. a. Attribution. 1. If You Share the Licensed Material (including in modified form), You must: A. retain the following if it is supplied by the Licensor with the Licensed Material: i. identification of the creator(s) of the Licensed Material and any others designated to receive attribution, in any reasonable manner requested by the Licensor (including by pseudonym if designated); ii. a copyright notice; iii. a notice that refers to this Public License; iv. a notice that refers to the disclaimer of warranties; v. a URI or hyperlink to the Licensed Material to the extent reasonably practicable; B. indicate if You modified the Licensed Material and retain an indication of any previous modifications; and C. indicate the Licensed Material is licensed under this Public License, and include the text of, or the URI or hyperlink to, this Public License. 2. You may satisfy the conditions in Section 3(a)(1) in any reasonable manner based on the medium, means, and context in which You Share the Licensed Material. For example, it may be reasonable to satisfy the conditions by providing a URI or hyperlink to a resource that includes the required information. 3. If requested by the Licensor, You must remove any of the information required by Section 3(a)(1)(A) to the extent reasonably practicable. b. ShareAlike.In addition to the conditions in Section 3(a), if You Share Adapted Material You produce, the following conditions also apply. 1. The Adapter’s License You apply must be a Creative Commons license with the same License Elements, this version or later, or a BY-SA Compatible License. 2. You must include the text of, or the URI or hyperlink to, the Adapter's License You apply. You may satisfy this condition in any reasonable manner based on the medium, means, and context in which You Share Adapted Material. 3. You may not offer or impose any additional or different terms or conditions on, or apply any Effective Technological Measures to, Adapted Material that restrict exercise of the rights granted under the Adapter's License You apply. Section 4 – Sui Generis Database Rights. Where the Licensed Rights include Sui Generis Database Rights that apply to Your use of the Licensed Material: a. for the avoidance of doubt, Section 2(a)(1) grants You the right to extract, reuse, reproduce, and Share all or a substantial portion of the contents of the database; b. if You include all or a substantial portion of the database contents in a database in which You have Sui Generis Database Rights, then the database in which You have Sui Generis Database Rights (but not its individual contents) is Adapted Material, including for purposes of Section 3(b); and c. You must comply with the conditions in Section 3(a) if You Share all or a substantial portion of the contents of the database. For the avoidance of doubt, this Section 4 supplements and does not replace Your obligations under this Public License where the Licensed Rights include other Copyright and Similar Rights. Section 5 – Disclaimer of Warranties and Limitation of Liability. a. Unless otherwise separately undertaken by the Licensor, to the extent possible, the Licensor offers the Licensed Material as-is and as-available, and makes no representations or warranties of any kind concerning the Licensed Material, whether express, implied, statutory, or other. This includes, without limitation, warranties of title, merchantability, fitness for a particular purpose, non-infringement, absence of latent or other defects, accuracy, or the presence or absence of errors, whether or not known or discoverable. Where disclaimers of warranties are not allowed in full or in part, this disclaimer may not apply to You. b. To the extent possible, in no event will the Licensor be liable to You on any legal theory (including, without limitation, negligence) or otherwise for any direct, special, indirect, incidental, consequential, punitive, exemplary, or other losses, costs, expenses, or damages arising out of this Public License or use of the Licensed Material, even if the Licensor has been advised of the possibility of such losses, costs, expenses, or damages. Where a limitation of liability is not allowed in full or in part, this limitation may not apply to You. c. The disclaimer of warranties and limitation of liability provided above shall be interpreted in a manner that, to the extent possible, most closely approximates an absolute disclaimer and waiver of all liability. Section 6 – Term and Termination. a. This Public License applies for the term of the Copyright and Similar Rights licensed here. However, if You fail to comply with this Public License, then Your rights under this Public License terminate automatically. b. Where Your right to use the Licensed Material has terminated under Section 6(a), it reinstates: 1. automatically as of the date the violation is cured, provided it is cured within 30 days of Your discovery of the violation; or 2. upon express reinstatement by the Licensor. c. For the avoidance of doubt, this Section 6(b) does not affect any right the Licensor may have to seek remedies for Your violations of this Public License. d. For the avoidance of doubt, the Licensor may also offer the Licensed Material under separate terms or conditions or stop distributing the Licensed Material at any time; however, doing so will not terminate this Public License. e. Sections 1, 5, 6, 7, and 8 survive termination of this Public License. Section 7 – Other Terms and Conditions. a. The Licensor shall not be bound by any additional or different terms or conditions communicated by You unless expressly agreed. b. Any arrangements, understandings, or agreements regarding the Licensed Material not stated herein are separate from and independent of the terms and conditions of this Public License. Section 8 – Interpretation. a. For the avoidance of doubt, this Public License does not, and shall not be interpreted to, reduce, limit, restrict, or impose conditions on any use of the Licensed Material that could lawfully be made without permission under this Public License. b. To the extent possible, if any provision of this Public License is deemed unenforceable, it shall be automatically reformed to the minimum extent necessary to make it enforceable. If the provision cannot be reformed, it shall be severed from this Public License without affecting the enforceability of the remaining terms and conditions. c. No term or condition of this Public License will be waived and no failure to comply consented to unless expressly agreed to by the Licensor. d. Nothing in this Public License constitutes or may be interpreted as a limitation upon, or waiver of, any privileges and immunities that apply to the Licensor or You, including from the legal processes of any jurisdiction or authority. Creative Commons is not a party to its public licenses. Notwithstanding, Creative Commons may elect to apply one of its public licenses to material it publishes and in those instances will be considered the “Licensor.” Except for the limited purpose of indicating that material is shared under a Creative Commons public license or as otherwise permitted by the Creative Commons policies published at creativecommons.org/policies, Creative Commons does not authorize the use of the trademark “Creative Commons” or any other trademark or logo of Creative Commons without its prior written consent including, without limitation, in connection with any unauthorized modifications to any of its public licenses or any other arrangements, understandings, or agreements concerning use of licensed material. For the avoidance of doubt, this paragraph does not form part of the public licenses. Creative Commons may be contacted at creativecommons.org. ================================================ FILE: LICENSES/MIT.txt ================================================ MIT License Copyright (c) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # sbomnix This repository is home to various command line tools and Python libraries that aim to help with software supply chain challenges: - [`sbomnix`](#generate-sbom) is a utility that generates SBOMs given a [Nix](https://nixos.org/) flake reference or store path. - [`nixgraph`](./doc/nixgraph.md) helps query and visualize dependency graphs for [Nix](https://nixos.org/) packages. - [`nixmeta`](./doc/nixmeta.md) summarizes nixpkgs meta-attributes from the given nixpkgs version. - [`vulnxscan`](./doc/vulnxscan.md) is a vulnerability scanner demonstrating the usage of SBOMs in running vulnerability scans. - [`repology_cli`](./doc/repology_cli.md) and [`repology_cve`](./doc/repology_cli.md#repology-cve-search) are command line clients to [repology.org](https://repology.org/). - [`nix_outdated`](./doc/nix_outdated.md) is a utility that finds outdated nix dependencies for given out path, listing the outdated packages in priority order based on how many other packages depend on the given outdated package. - [`provenance`](./doc/provenance.md) is a command line tool to generate SLSA v1.0 compliant [provenance](https://slsa.dev/spec/v1.0/provenance) attestation files in json format for any nix flake or derivation. For an example of how to use the tooling provided in this repository to automate daily vulnerability scans for a nix flake project, see: [ghafscan](https://github.com/tiiuae/ghafscan). The [CycloneDX](https://cyclonedx.org/) and [SPDX](https://spdx.github.io/spdx-spec/v2.3/) SBOMs for each release of sbomnix tooling is available in the [release assets](https://github.com/tiiuae/sbomnix/releases/latest). All the tools in this repository originate from [Ghaf Framework](https://github.com/tiiuae/ghaf). Table of Contents ================= * [Getting Started](#getting-started) * [Running as Nix Flake](#running-as-nix-flake) * [Running from Nix Development Shell](#running-from-nix-development-shell) * [Buildtime vs Runtime Dependencies](#buildtime-vs-runtime-dependencies) * [Buildtime Dependencies](#buildtime-dependencies) * [Runtime Dependencies](#runtime-dependencies) * [Usage Examples](#usage-examples) * [Generate SBOM Based on Flake Reference](#generate-sbom-based-on-flake-reference) * [Generate SBOM Based on Derivation File or Out-path](#generate-sbom-based-on-derivation-file-or-out-path) * [Generate SBOM Including Buildtime Dependencies](#generate-sbom-including-buildtime-dependencies) * [Generate SBOM Based on a Store Path or Result Symlink](#generate-sbom-based-on-a-store-path-or-result-symlink) * [Nixpkgs Metadata Source Selection](#nixpkgs-metadata-source-selection) * [Visualize Package Dependencies](#visualize-package-dependencies) * [Contribute](#contribute) * [License](#license) * [Acknowledgements](#acknowledgements) ## Getting Started `sbomnix` requires the [Nix](https://nixos.org/download.html) command line tool to be in `$PATH`. Direct, non-flake usage requires a modern `nix` supporting `nix-command` and `--json-format 1`. ### Running as Nix Flake `sbomnix` can be run as a [Nix flake](https://nixos.wiki/wiki/Flakes) from the `tiiuae/sbomnix` repository: ```bash # '--' signifies the end of argument list for `nix`. # '--help' is the first argument to `sbomnix` $ nix run github:tiiuae/sbomnix#sbomnix -- --help ``` or from a local repository: ```bash $ git clone https://github.com/tiiuae/sbomnix $ cd sbomnix $ nix run .#sbomnix -- --help ``` See the full list of supported flake targets by running `nix flake show`. ### Running from Nix Development Shell If you have nix flakes [enabled](https://nixos.wiki/wiki/Flakes#Enable_flakes), start a development shell: ```bash $ git clone https://github.com/tiiuae/sbomnix $ cd sbomnix $ nix develop ``` The devshell adds all CLI entry points (`sbomnix`, `nixgraph`, `nixmeta`, `vulnxscan`, `repology_cli`, `repology_cve`, `nix_outdated`, `provenance`) to `PATH`. They run against the local source tree, so any edits are picked up immediately without reinstalling. All tools support a consistent verbosity flag: no flag or `--verbose=0` shows INFO output, `-v` or `--verbose=1` enables VERBOSE progress details, `-vv` or `--verbose=2` enables DEBUG details, and `-vvv` or `--verbose=3` enables SPAM output. Repeated short flags are counted, so `-v -v`, `-vv`, and `--verbose=2` are equivalent. ## Buildtime vs Runtime Dependencies #### Buildtime Dependencies The buildtime dependencies of a Nix package are the [closure](https://nixos.org/manual/nix/stable/glossary.html#gloss-closure) of its derivation (`.drv` file): all the store paths Nix must have available to reproduce the build, including compilers, build tools, standard libraries, and the infrastructure to bootstrap them. Even a simple hello-world C program typically pulls in over 150 packages, including gcc, stdenv, glibc, and bash. Computing the buildtime dependency closure only requires evaluating the derivation; the target does not need to be built. For reference, below is a graph of the first two layers of buildtime dependencies of an example hello-world C program (direct dependencies and the first level of transitive dependencies): [C hello-world buildtime, depth=2](doc/img/c_hello_world_buildtime_d2.svg). #### Runtime Dependencies [Runtime dependencies](https://nixos.org/manual/nix/stable/command-ref/new-cli/nix3-why-depends.html#description) are a subset of buildtime dependencies. When Nix builds a package, it scans the build outputs for references to other store paths and records them. The runtime closure is the transitive set of those recorded references: the store paths the built output actually needs at runtime. Because this information is captured during the build, the target must be built before its runtime dependencies can be determined. For reference, below is the complete runtime dependency graph of the same hello-world C program: By default, the tools in this repository work with runtime dependencies. Specifically, unless told otherwise, `sbomnix` generates an SBOM of runtime dependencies, `nixgraph` graphs runtime dependencies, and `vulnxscan` and `nix_outdated` scan runtime dependencies. Since the target must be built to determine runtime dependencies, all these tools will build (force-realise) the target as part of their invocation. All tools also accept a `--buildtime` argument to work with buildtime dependencies instead; as noted above, using `--buildtime` does not require building the target. ## Usage Examples In the below examples, we use Nix package `wget` as an example target, referred to by flakeref `github:NixOS/nixpkgs/nixos-unstable#wget`. #### Generate SBOM Based on Flake Reference `sbomnix` accepts [flake references](https://nixos.org/manual/nix/stable/command-ref/new-cli/nix3-flake.html#flake-references) as targets: ```bash $ sbomnix github:NixOS/nixpkgs?ref=nixos-unstable#wget ``` #### Generate SBOM Based on Derivation File or Out-path Flake references are the recommended target for `sbomnix`. When the target is a flake reference, `sbomnix` can resolve the nixpkgs version used to build the package and enrich the SBOM with metadata such as descriptions, licenses, maintainers, and homepage links. When the target is a store path, there is no information about which nixpkgs version produced it, so metadata enrichment is skipped by default; see [Nixpkgs Metadata Source Selection](#nixpkgs-metadata-source-selection). By default `sbomnix` scans the given target and generates an SBOM including the runtime dependencies. Notice: determining the target runtime dependencies in Nix requires building the target. ```bash # Target can be specified as a flakeref or a nix store path, e.g.: # sbomnix . # sbomnix github:tiiuae/sbomnix # sbomnix nixpkgs#wget # sbomnix /nix/store/... (note: nixpkgs metadata not available for store path targets) # Ref: https://nixos.org/manual/nix/stable/command-ref/new-cli/nix3-flake.html#flake-references $ sbomnix github:NixOS/nixpkgs/nixos-unstable#wget ... INFO Wrote: sbom.cdx.json INFO Wrote: sbom.spdx.json INFO Wrote: sbom.csv ``` Main outputs are the SBOM json files sbom.cdx.json and sbom.spdx.json in [CycloneDX](https://cyclonedx.org/) and [SPDX](https://spdx.github.io/spdx-spec/v2.3/) formats. #### Generate SBOM Including Buildtime Dependencies By default `sbomnix` scans the given target for runtime dependencies. You can tell sbomnix to determine the buildtime dependencies using the `--buildtime` argument. Below example generates SBOM including buildtime dependencies. Notice: as opposed to runtime dependencies, determining the buildtime dependencies does not require building the target. ```bash $ sbomnix github:NixOS/nixpkgs/nixos-unstable#wget --buildtime ``` #### Generate SBOM Based on a Store Path or Result Symlink `sbomnix` accepts Nix store paths and result symlinks as targets: ```bash $ sbomnix /path/to/result ``` Note: store paths carry no record of which nixpkgs version produced them, so nixpkgs metadata enrichment is skipped by default. Pass `--meta-nixpkgs` to supply a nixpkgs source explicitly, or see [Nixpkgs Metadata Source Selection](#nixpkgs-metadata-source-selection). #### Nixpkgs Metadata Source Selection `sbomnix` enriches packages with nixpkgs metadata, such as descriptions, licenses, maintainers, and homepage links, when it can select a nixpkgs source that is tied to the target. For flakeref targets, `sbomnix` uses the target flake context. NixOS toplevel flakerefs are handled through the selected NixOS package set, so overlays, package overrides, nixpkgs config, and system-specific package-set changes can be represented. Store-path targets skip nixpkgs metadata by default; pass `--meta-nixpkgs` to choose the source explicitly. `--meta-nixpkgs ` scans an explicit nixpkgs source. `--meta-nixpkgs nix-path` scans the `nixpkgs=` entry from `NIX_PATH` as an explicit opt-in source. `--exclude-meta` disables this enrichment and cannot be combined with `--meta-nixpkgs`. CycloneDX and SPDX outputs record the selected metadata source in document metadata, including fields such as `nixpkgs:metadata_source_method`, `nixpkgs:path`, `nixpkgs:rev`, `nixpkgs:flakeref`, `nixpkgs:version`, and `nixpkgs:message`. #### Visualize Package Dependencies `sbomnix` uses structured Nix JSON to find package dependencies where available. `nixgraph` can also be used as a stand-alone tool for visualizing package dependencies. Below, we show an example of visualizing package `wget` runtime dependencies: ```bash $ nixgraph github:NixOS/nixpkgs/nixos-unstable#wget --depth=2 ``` Which outputs the dependency graph as an image (with maxdepth 2): For more examples on querying and visualizing the package dependencies, see: [nixgraph](./doc/nixgraph.md). ## Contribute Any pull requests, questions and error reports are welcome. To start development, we recommend using Nix flakes development shell: ```bash $ git clone https://github.com/tiiuae/sbomnix $ cd sbomnix/ $ nix develop ``` Before opening a pull request, run at minimum: ```bash $ ./scripts/check-fast.sh ``` This runs the formatter, a fast flake eval, and the fast test lane. CI runs `./scripts/check-full.sh`, which validates the flake and runs the full test lane with coverage. To deactivate the Nix devshell, run `exit` in your shell. To see other Nix flake targets, run `nix flake show`. ## License This project is licensed under the Apache-2.0 license - see the [Apache-2.0.txt](LICENSES/Apache-2.0.txt) file for details. ## Acknowledgements Parts of the Nix store derivation loading code in `sbomnix` ([derivation.py](src/sbomnix/derivation.py) and [derivers.py](src/sbomnix/derivers.py)) originate from [vulnix](https://github.com/nix-community/vulnix). ================================================ FILE: REUSE.toml ================================================ # SPDX-FileCopyrightText: 2022-2025 Technology Innovation Institute (TII) # SPDX-License-Identifier: Apache-2.0 version = 1 SPDX-PackageName = "sbomnix" SPDX-PackageSupplier = "Technology Innovation Institute " SPDX-PackageDownloadLocation = "https://github.com/tiiuae/sbomnix" [[annotations]] SPDX-License-Identifier = "CC-BY-3.0" SPDX-FileCopyrightText = "2022-2025 Technology Innovation Institute (TII)" precedence = "closest" path = [ "doc/img/*", ] [[annotations]] SPDX-License-Identifier = "Apache-2.0" SPDX-FileCopyrightText = "2022-2025 Technology Innovation Institute (TII)" precedence = "closest" path = [ "**.yml", "**.toml", "flake.lock", "pyrightconfig.json", "VERSION", "tests/resources/**", ] ================================================ FILE: VERSION ================================================ 1.7.6 ================================================ FILE: default.nix ================================================ # SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII) # SPDX-FileCopyrightText: 2020-2023 Eelco Dolstra and the flake-compat contributors # # SPDX-License-Identifier: MIT # This file originates from: # https://github.com/nix-community/flake-compat # This file provides backward compatibility to nix < 2.4 clients { system ? builtins.currentSystem, }: let lock = builtins.fromJSON (builtins.readFile ./flake.lock); inherit (lock.nodes.flake-compat.locked) owner repo rev narHash ; flake-compat = fetchTarball { url = "https://github.com/${owner}/${repo}/archive/${rev}.tar.gz"; sha256 = narHash; }; flake = import flake-compat { inherit system; src = ./.; }; in flake.defaultNix ================================================ FILE: doc/nix_outdated.md ================================================ # Getting Started To get started, follow the [Getting Started](../README.md#getting-started) section from the main [README](../README.md). As an example, to run the [`nix_outdated`](../src/nixupdate/nix_outdated.py) from the `tiiuae/sbomnix` repository: ```bash # '--' signifies the end of argument list for `nix`. # '--help' is the first argument to `nix_outdated` $ nix run github:tiiuae/sbomnix#nix_outdated -- --help ``` ## Example Target We use Nix package `git` as an example target, referred to by flakeref `github:NixOS/nixpkgs/nixos-unstable#git`. # nix_outdated [`nix_outdated`](../src/nixupdate/nix_outdated.py) is a command line tool to list outdated nix dependencies for given target nix out path or flakeref. By default, the script outputs runtime dependencies for the given target that appear outdated in nixpkgs 'nix_unstable' channel - the list of output packages would potentially need a PR to update the package in nixpkgs to the package's latest upstream release version specified in the output table column 'version_upstream'. The list of output packages is in priority order based on how many other packages depend on the potentially outdated package. Below command finds `git` runtime dependencies that would have an update in the package's upstream repository based on repology, and the latest release version is not available in nix unstable. The captured output is illustrative; exact versions and findings will differ depending on the package versions resolved at run time. ```bash # Target can be specified as a flakeref or a nix store path, e.g.: # nix_outdated . # nix_outdated github:tiiuae/sbomnix # nix_outdated nixpkgs#git # nix_outdated /nix/store/... # Ref: https://nixos.org/manual/nix/stable/command-ref/new-cli/nix3-flake.html#flake-references $ nix_outdated github:NixOS/nixpkgs/nixos-unstable#git INFO Generating SBOM for target '/nix/store/...-git-' INFO Dependencies that need update in nixpkgs (in priority order based on how many other packages depend on the potentially outdated package): | priority | nix_package | version_local | version_nixpkgs | version_upstream | |------------+--------------------+-----------------+-------------------+-----------------------| | 9 | libidn2 | 2.3.2 | 2.3.2 | 2.3.4 | | 8 | glibc | 2.35-224 | 2.35-224 | 2.37 | | 5 | perl:uri | 5.05 | 5.05 | 5.17 | | 4 | perl:http-message | 6.26 | 6.26 | 6.44 | | 4 | openssl | 3.0.8 | 3.0.8 | 3.1.0 | | 3 | perl:html-parser | 3.75 | 3.75 | 3.81 | | 3 | perl:try-tiny | 0.30 | 0.30 | 0.31 | | 3 | perl:mozilla-ca | 20200520 | 20200520 | 20221114;20221114.0.0 | | 2 | perl:digest-hmac | 1.03 | 1.03 | 1.04 | | 2 | sqlite | 3.40.1 | 3.41.0 | 3.41.1 | | 2 | perl:fcgi | 0.79 | 0.79 | 0.82 | | 2 | perl:net-http | 6.19 | 6.19 | 6.22 | | 2 | perl:io-socket-ssl | 2.068 | 2.068 | 2.081;2.81.0 | | 2 | perl:file-listing | 6.14 | 6.14 | 6.15 | | 2 | perl:http-daemon | 6.14 | 6.14 | 6.16 | | 2 | perl:http-cookies | 6.09 | 6.09 | 6.10;6.10.0 | | 2 | perl:cgi | 4.51 | 4.51 | 4.56 | | 2 | nghttp2 | 1.51.0 | 1.51.0 | 1.52.0 | | 2 | perl:test-fatal | 0.016 | 0.016 | 0.017;0.17.0 | | 2 | perl:test-needs | 0.002006 | 0.002006 | 0.002010 | | 1 | perl:libnet | 3.12 | 3.12 | 3.14 | | 1 | git | 2.39.2 | 2.39.2 | 2.40.0 | | 1 | gettext | 0.21 | 0.21 | 0.21.1 | | 1 | perl:libwww-perl | 6.67 | 6.67 | 6.68 | INFO Wrote: nix_outdated.csv ``` As an example, the first row in the above output table means that: - `libidn2` in nix unstable is not up-to-date with what repology.org knows is the package's newest upstream version. - `libidn2` is on the top of the table, as it has the highest priority among the listed outdated packages. The priority is based on how many other packages depend on the given outdated package. This datapoint is based on [nix-visualize](https://github.com/craigmbooth/nix-visualize). The value of the `priority` column is directly the `level` value determined by [nix-visualize](https://github.com/craigmbooth/nix-visualize). For full description of the `level` values, see nix-visualize documentation: https://github.com/craigmbooth/nix-visualize#vertical-positioning. - `libidn2` local version is 2.3.2. - `libidn2` newest version in nix unstable is 2.3.2 (based on repology.org). - `libidn2` newest release version in the package's upstream repository is 2.3.4 (based on repology.org). - `libidn2` is considered outdated, because the version string in `version_upstream` is later than the version string in `version_nixpkgs`. ================================================ FILE: doc/nixgraph.md ================================================ # nixgraph [`nixgraph`](../src/nixgraph/main.py) is a Python library and command line utility for querying and visualizing dependency graphs for [Nix](https://nixos.org/) packages. Table of Contents ================= * [Getting Started](#getting-started) * [Usage examples](#usage-examples) * [Example: package runtime dependencies](#example-package-runtime-dependencies) * [Example: depth](#example-depth) * [Example: colorize](#example-colorize) * [Example: inverse](#example-inverse) * [Example: package buildtime dependencies](#example-package-buildtime-dependencies) * [Example: output format](#example-output-format) * [Example: pathnames](#example-pathnames) ## Getting Started To get started, follow the [Getting Started](../README.md#getting-started) section from the main [README](../README.md). As an example, to run the [`nixgraph`](../src/nixgraph/main.py) from your local clone of the `tiiuae/sbomnix` repository: ```bash # '--' signifies the end of argument list for `nix`. # '--help' is the first argument to `nixgraph` $ nix run .#nixgraph -- --help ``` ## Usage examples In the below examples, we use nix package `wget` as an example target, referred to by flakeref `github:NixOS/nixpkgs/nixos-unstable#wget`. The example graphs below are illustrative; the actual graph generated will reflect the dependency versions resolved at run time. #### Example: package runtime dependencies ```bash # Target can be specified as a flakeref or a nix store path, e.g.: # nixgraph . # nixgraph github:tiiuae/sbomnix # nixgraph nixpkgs#wget # nixgraph /nix/store/... # Ref: https://nixos.org/manual/nix/stable/command-ref/new-cli/nix3-flake.html#flake-references $ nixgraph github:NixOS/nixpkgs/nixos-unstable#wget INFO Wrote: graph.png ``` By default `nixgraph` scans the given target and generates a graph that shows the direct runtime dependencies. The default output is a png image `graph.png`:

#### Example: depth ```bash $ nixgraph github:NixOS/nixpkgs/nixos-unstable#wget --depth=2 ``` By default, when `--depth` argument is not specified, `nixgraph` shows the direct dependencies. Increasing the `--depth` makes `nixgraph` walk the dependency chain deeper. For instance, with `--depth=2`, the output graph for `wget` becomes:

The value of `--depth` indicates the maximum depth between any two nodes in the resulting graph. For instance, in the above example, `libunistring-1.0` gets included with `--depth=2` because the shortest path between `wget` and `libunistring` is two hops deep (`wget --> libidn2 --> libunistring`). #### Example: colorize ```bash $ nixgraph github:NixOS/nixpkgs/nixos-unstable#wget --depth=2 --colorize='openssl|libidn' ``` `--colorize` allows highlighting nodes that match the specified regular expression:

#### Example: inverse ```bash $ nixgraph github:NixOS/nixpkgs/nixos-unstable#wget --depth=2 --inverse='glibc' ``` `--inverse` makes it possible to draw the graph backwards starting from nodes that match the specified regular expression. For instance, the above command would show all the dependency paths from `wget` that lead to `glibc`:

`--inverse` is especially useful when working with larger graphs. As an example, consider the following graph for `git`: (`nixgraph github:NixOS/nixpkgs/nixos-unstable#git --depth=3 --colorize="openssl-3|sqlite-3"`)

To find out what are all the runtime dependency paths from `git` to the highlighted nodes `openssl` or `sqlite` in the above graph, run the following command: ```bash # --depth=100: make sure the output graph includes "long enough" dependency chains # --inverse="openssl-3|sqlite-3": draw the graph backwards starting from nodes that # match the specified regular expression # --colorize="openssl-3|sqlite-3": colorize the matching nodes nixgraph github:NixOS/nixpkgs/nixos-unstable#git --depth=100 --colorize="openssl-3|sqlite-3" --inverse="openssl-3|sqlite-3" ``` The output now becomes:

The output graph shows that there are three dependency paths from `git` to `openssl-3.0.7` and one dependency path that leads to `sqlite-3.39.4`. #### Example: package buildtime dependencies ```bash $ nixgraph github:NixOS/nixpkgs/nixos-unstable#wget --buildtime ``` Specifying `--buildtime` makes `nixgraph` visualize the buildtime dependencies instead of runtime dependencies:

#### Example: output format ```bash $ nixgraph github:NixOS/nixpkgs/nixos-unstable#wget --out="graph.dot" ``` By default `nixgraph` outputs the graph in png image `graph.png`. To change the output file name and format, use the `--out` argument. The output filename extension determines the output format. As an example, the above command would output the graph in `dot` format. For a full list of supported output formats, see: https://graphviz.org/doc/info/output.html. In addition to graphviz supported output formats, the tool supports output in csv to allow post-processing the output data. #### Example: pathnames ```bash $ nixgraph github:NixOS/nixpkgs/nixos-unstable#wget --depth=1 --pathnames ``` `--pathnames` argument allows adding store path to node label in the output graph:

================================================ FILE: doc/nixmeta.md ================================================ # Getting Started To get started, follow the [Getting Started](../README.md#getting-started) section from the main [README](../README.md). As an example, to run the [`nixmeta`](../src/nixmeta/main.py) from the `tiiuae/sbomnix` repository: ```bash # '--' signifies the end of argument list for `nix`. # '--help' is the first argument to `nixmeta` $ nix run github:tiiuae/sbomnix#nixmeta -- --help ``` # nixmeta [`nixmeta`](../src/nixmeta/main.py) is a command line tool to summarize nixpkgs meta-attributes from the given nixpkgs version. The output is written to a csv file. Nixpkgs version is specified with [`flakeref`](https://nixos.org/manual/nix/stable/command-ref/new-cli/nix3-flake#flake-references). As an example, `--flakeref=github:NixOS/nixpkgs?ref=master` would make `nixmeta` output the meta-attributes from the nixpkgs version in the [master](https://github.com/NixOS/nixpkgs/tree/master) branch. Similarly, `--flakeref=github:NixOS/nixpkgs?ref=release-23.11` would output the meta-attributes from the nixpkgs version in the [release-23.11](https://github.com/NixOS/nixpkgs/tree/release-23.11) branch. Note that `--flakeref` does not necessarily have to reference `github:NixOS/nixpkgs` but any flakeref or even `NIX_ENV` environment variable can be used to specify the nixpkgs version. As an example, `--flakeref=github:tiiuae/sbomnix` would make `nixmeta` output the meta-attributes from the nixpkgs version [pinned by the sbomnix flake](https://github.com/tiiuae/sbomnix/blob/c243db5272fb01c4d97cbbb01a095ae514cd2dcb/flake.lock#L68) in its default branch. As an example, below command outputs nixpkgs meta-attributes from the nixpkgs version pinned by flake `github:NixOS/nixpkgs?ref=master`: ```bash $ nixmeta --flakeref=github:NixOS/nixpkgs?ref=master INFO Finding meta-info for nixpkgs pinned in flake: github:NixOS/nixpkgs?ref=master INFO Wrote: /home/foo/sbomnix-fork/nixmeta.csv ``` Output summarizes the meta-attributes of all the target nixpkgs packages enumerated by `nix-env --query --available`. For each package, the output includes the following details: ```bash $ head -n2 nixmeta.csv | csvlook | name | pname | version | meta_homepage | meta_unfree | meta_license_short | meta_license_spdxid | meta_maintainers_email | | ---------- | ----- | ------- | -------------------- | ----------- | -------------------------------- | -------------------------------------- | ---------------------- | | 0ad-0.0.26 | 0ad | 0.0.26 | https://play0ad.com/ | False | gpl2;lgpl21;mit;cc-by-sa-30;zlib | GPL-2.0;LGPL-2.1;MIT;CC-BY-SA-3.0;Zlib | nixpkgs@cvpetegem.be | ``` ================================================ FILE: doc/provenance.md ================================================ # Getting Started To get started, follow the [Getting Started](../README.md#getting-started) section from the main [README](../README.md). As an example, to run the [`provenance`](../src/provenance/main.py) tool from the `tiiuae/sbomnix` repository: ```bash # '--' signifies the end of argument list for `nix`. # '--help' is the first argument to `provenance` $ nix run github:tiiuae/sbomnix#provenance -- --help ``` # provenance [`provenance`](../src/provenance/main.py) is a command line tool to generate SLSA v1.0 compliant [provenance](https://slsa.dev/spec/v1.0/provenance) attestation files in json format for any nix flake or derivation. To generate provenance file for `nixpkgs#hello`: ```bash provenance nixpkgs#hello ``` To generate provenance file for `curl-8.6.0` in your nix store: ```bash provenance /nix/store/fh7vxc5xgiwl6z7vwq5c3lj84mpcs4br-curl-8.6.0-bin ``` By default the dependencies are resolved only at the top level, i.e. only direct dependencies. To get all dependencies recursively, you can use the `--recursive` option. Note that this will result in a very long provenance file. The dependencies listed are the nix buildtime dependencies of the derivation. Example recursive provenance which is saved into a file: ```bash provenance nixpkgs#hello --recursive --out ./provenance.json ``` ## Build metadata The build metadata to be used in the provenance is supplied through environment variables. These fields cannot be automatically derived from the nix derivation as they are build platform dependant. Variable | Type | Explanation --- | --- | --- PROVENANCE_BUILD_TYPE | str | Corresponds to SLSA [buildDefinition.buildType](https://slsa.dev/spec/v1.0/provenance#builddefinition) PROVENANCE_BUILDER_ID | str | Corresponds to SLSA [runDetails.builder.id](https://slsa.dev/spec/v1.0/provenance#builder) PROVENANCE_INVOCATION_ID | str/int | Corresponds to SLSA [buildMetadata.invocationId](https://slsa.dev/spec/v1.0/provenance#buildmetadata) PROVENANCE_TIMESTAMP_BEGIN | int (unix timestamp) | Is parsed into SLSA [buildMetadata.startedOn](https://slsa.dev/spec/v1.0/provenance#buildmetadata) PROVENANCE_TIMESTAMP_FINISHED | int (unix timestamp) | Is parsed into SLSA [buildMetadata.finishedOn](https://slsa.dev/spec/v1.0/provenance#buildmetadata) PROVENANCE_EXTERNAL_PARAMS | json | Corresponds to SLSA [buildDefinition.externalParameters](https://slsa.dev/spec/v1.0/provenance#builddefinition) PROVENANCE_INTERNAL_PARAMS | json | Corresponds to SLSA [buildDefinition.internalParameters](https://slsa.dev/spec/v1.0/provenance#builddefinition) PROVENANCE_OUTPUT_FILE | path | Has the same function as the `--out` argument. Example usage in a simplified build script: ```bash target="nixpkgs#hello" PROVENANCE_TIMESTAMP_BEGIN="$(date +%s)" nix build $target PROVENANCE_TIMESTAMP_FINISHED="$(date +%s)" PROVENANCE_EXTERNAL_PARAMS="$(jq -n --arg target "$target" '$ARGS.named')" PROVENANCE_INTERNAL_PARAMS="$(jq -n --arg nixVersion "$(nix --version)" '$ARGS.named')" export PROVENANCE_TIMESTAMP_BEGIN export PROVENANCE_TIMESTAMP_FINISHED export PROVENANCE_EXTERNAL_PARAMS export PROVENANCE_INTERNAL_PARAMS provenance $target --out ./provenance.json ``` ================================================ FILE: doc/repology_cli.md ================================================ # repology_cli [`repology_cli`](../src/repology/repology_cli.py) is a command line interface to [repology.org](https://repology.org/). It supports querying package information via package search terms in the same manner as https://repology.org/projects/?search. In addition, it supports querying package information from all packages in a CycloneDX SBOM and printing out some simple statistics based on the input. Table of Contents ================= * [Getting Started](#getting-started) * [Usage Examples](#usage-examples) * [Search by Package Name Exact Match](#search-by-package-name-exact-match) * [Search by Package Name Search Term](#search-by-package-name-search-term) * [Search by Package Names in SBOM](#search-by-package-names-in-sbom) * [Statistics: SBOM Packages](#statistics-sbom-packages) * [Repology CVE search](#repology-cve-search) ## Getting Started To get started, follow the [Getting Started](../README.md#getting-started) section from the main [README](../README.md). As an example, to run the [`repology_cli`](../src/repology/repology_cli.py) from your local clone of the `tiiuae/sbomnix` repository: ```bash # '--' signifies the end of argument list for `nix`. # '--help' is the first argument to `repology_cli` $ nix run .#repology_cli -- --help ``` ## Usage Examples ### Search by Package Name Exact Match Following query finds package name 'firefox' versions in 'nix_unstable' repository: ```bash $ repology_cli --pkg_exact "firefox" --repository nix_unstable INFO Repology package info, packages:5 | repo | package | version | status | potentially_vulnerable | newest_upstream_release | repo_version_classify | |--------------+---------+-----------------------+----------+------------------------+-------------------------+-----------------------| | nix_unstable | firefox | 102-unwrapped-102.8.0 | legacy | 1 | 110.0.1 | | | nix_unstable | firefox | 102.8.0 | legacy | 1 | 110.0.1 | | | nix_unstable | firefox | 110.0.1 | newest | 0 | 110.0.1 | | | nix_unstable | firefox | 111.0b7 | outdated | 0 | 110.0.1 | repo_pkg_needs_update | | nix_unstable | firefox | 111.0b8 | devel | 0 | 110.0.1 | | For more details, see: https://repology.org/projects/?search=firefox&inrepo=nix_unstable INFO Wrote: repology_report.csv ``` Output table includes the datapoints available in repology.org, as stated by each column name. As an example, the first row in the above output table means: - package information was fetched for repository 'nix_unstable' - package name is 'firefox' - latest 'nix_unstable' includes a version of firefox with version string '102-unwrapped-102.8.0' - firefox '102-unwrapped-102.8.0' status is 'legacy'. The details of each classification status is available in https://repology.org/docs/about. - firefox '102-unwrapped-102.8.0' is potentially vulnerable, meaning the package version is associated to at least one CVE. For details of which CVEs repology determined the package is associated to, see: https://repology.org/project/firefox/cves or https://repology.org/project/firefox/cves?version=102-unwrapped-102.8.0 - newest upstream release version of firefox known to repology is '110.0.1' In addition to the above datapoints, `repology_cli` adds the column 'repo_version_classify', which simply states whether the specific package version appears updatable in the given repository. As an example, in the above output, the second last row states 'repo_pkg_needs_update' which means that it appears 'nix_unstable' should update the firefox '111.0b7' to the latest firefox upstream release version '110.0.1'. Full list of repositories available in repology are available in https://repology.org/repositories/statistics. As an example, to repeat the earlier query for Debian 12, you would run: ```bash $ repology_cli --pkg_exact "firefox" --repository debian_12 INFO Repology package info, packages:1 | repo | package | version | status | potentially_vulnerable | newest_upstream_release | repo_version_classify | |-----------+-----------+-----------+----------+--------------------------+---------------------------+-------------------------| | debian_12 | firefox | 102.8.0 | outdated | 1 | 110.0.1 | repo_pkg_needs_update | For more details, see: https://repology.org/projects/?search=firefox&inrepo=debian_12 INFO Wrote: repology_report.csv ``` ### Search by Package Name Search Term Following query finds 'debian_12' packages that include 'firefox' anywhere in the name string: ```bash $ repology_cli --pkg_search "firefox" --repository debian_12 INFO Repology package info, packages:5 | repo | package | version | status | potentially_vulnerable | newest_upstream_release | repo_version_classify | |-----------+-----------------------------+---------+----------+------------------------+-------------------------+-----------------------| | debian_12 | activity-aware-firefox | 0.4.1 | unique | 0 | | | | debian_12 | firefox | 102.8.0 | outdated | 1 | 110.0.1 | repo_pkg_needs_update | | debian_12 | firefox-esr-mobile-config | 3.2.0 | unique | 0 | | | | debian_12 | foxyproxy-firefox-extension | 7.5.1 | unique | 0 | | | | debian_12 | perl:firefox-marionette | 1.35 | newest | 0 | 1.35 | | ``` Notice: using short search strings with `--pkg_search` might result a large number of matches and, thus, potentially a large number of queries to repology.org. To avoid spamming repology.org with such queries, `repology_cli` limits the number of requests sent to repology.org to at most one request per second. In addition, it caches all responses locally for two hours. ### Search by Package Names in SBOM Following query finds 'nix_unstable' packages that match the packages in the CycloneDX sbom 'wget.runtime.sbom.cdx.json': ```bash $ repology_cli --sbom_cdx wget.runtime.sbom.cdx.json --repository nix_unstable INFO Repology package info, packages:9 | repo | package | version | status | potentially_vulnerable | newest_upstream_release | version_sbom | sbom_version_classify | repo_version_classify | |--------------+--------------+----------+----------+------------------------+-------------------------+--------------+-----------------------+-----------------------| | nix_unstable | glibc | 2.35-224 | outdated | 0 | 2.37 | 2.35-224 | sbom_pkg_needs_update | repo_pkg_needs_update | | nix_unstable | libidn2 | 2.3.2 | outdated | 0 | 2.3.4 | 2.3.2 | sbom_pkg_needs_update | repo_pkg_needs_update | | nix_unstable | libunistring | 1.0 | outdated | 0 | 1.1 | 1.0 | sbom_pkg_needs_update | repo_pkg_needs_update | | nix_unstable | openssl | 1.1.1t | legacy | 0 | 3.0.8 | 3.0.8 | | | | nix_unstable | openssl | 3.0.8 | newest | 0 | 3.0.8 | 3.0.8 | | | | nix_unstable | pcre | 8.45 | newest | 0 | 8.45 | 8.45 | | | | nix_unstable | wget | 1.21.3 | legacy | 0 | 2.0.1 | 1.21.3 | | | | nix_unstable | wget | 2.0.1 | newest | 0 | 2.0.1 | 1.21.3 | sbom_pkg_needs_update | | | nix_unstable | zlib | 1.2.13 | newest | 0 | 1.2.13 | 1.2.13 | | | ``` Output includes package details from the packages in the given SBOM that were also found in repology.org. In addition to the datapoints covered in section [Search by Package Name Exact Match](#search-by-package-name-exact-match), `repology_cli` adds the column 'sbom_version_classify' which states whether the package version in SBOM appears outdated. As an example, in the above output, package 'wget' version in sbom is '1.21.3'. Column 'sbom_version_classify' states 'sbom_pkg_needs_update' because 'nix_unstable' would have an update to the 'wget' package to version '2.0.1'. ### Statistics: SBOM Packages Following is the same query as above, but adds the command-line argument `--stats` to print out some simple statistics that might help explain the results. ```bash $ repology_cli --sbom_cdx wget.runtime.sbom.cdx.json --repository nix_unstable --stats INFO Repology package info, packages:9 | repo | package | version | status | potentially_vulnerable | newest_upstream_release | version_sbom | sbom_version_classify | repo_version_classify | |--------------+--------------+-----------+----------+--------------------------+---------------------------+----------------+-------------------------+-------------------------| | nix_unstable | glibc | 2.35-224 | outdated | 0 | 2.37 | 2.35-224 | sbom_pkg_needs_update | repo_pkg_needs_update | | nix_unstable | libidn2 | 2.3.2 | outdated | 0 | 2.3.4 | 2.3.2 | sbom_pkg_needs_update | repo_pkg_needs_update | | nix_unstable | libunistring | 1.0 | outdated | 0 | 1.1 | 1.0 | sbom_pkg_needs_update | repo_pkg_needs_update | | nix_unstable | openssl | 1.1.1t | legacy | 0 | 3.0.8 | 3.0.8 | | | | nix_unstable | openssl | 3.0.8 | newest | 0 | 3.0.8 | 3.0.8 | | | | nix_unstable | pcre | 8.45 | newest | 0 | 8.45 | 8.45 | | | | nix_unstable | wget | 1.21.3 | legacy | 0 | 2.0.1 | 1.21.3 | | | | nix_unstable | wget | 2.0.1 | newest | 0 | 2.0.1 | 1.21.3 | sbom_pkg_needs_update | | | nix_unstable | zlib | 1.2.13 | newest | 0 | 1.2.13 | 1.2.13 | | | For more details, see: https://repology.org/projects/ INFO Repology package statistics: (see the status descriptions in: https://repology.org/docs/about) Unique compared packages: 7 (100%) (status in: ['newest', 'devel', 'unique', 'outdated']) ==> newest: 4 (57%) ==> outdated: 3 (43%) ==> devel or unique: 0 (0%) ==> potentially vulnerable: 0 (0%) INFO Repology SBOM package statistics: Unique packages: 10 (100%) ==> sbom packages in repology: 9 (90%) ==> sbom packages not in repology: 1 (10%) - IGNORED (sbom component is not a package in repology): 0 - NO_VERSION (sbom component is missing the version number): 0 - NOT_FOUND (sbom component was not found in repology): 1 INFO Wrote: repology_report.csv ``` Section 'Repology package statistics' in the console output indicates that: - There were seven packages whose status was one of `['newest', 'devel', 'unique', 'outdated']`. These are the package statuses `repology_cli` considers in the statistics output. - Four out of the total of seven packages had the status 'newest'. This number indicates how many packages are up-to-date with its known latest release version in upstream. - Three out of seven packages have the status 'outdated'. This number indicates how many packages are not up-to-date with its known latest upstream release version in 'nix_unstable' repository. - There were no devel or unique packages. 'devel' packages indicate latest development or unstable package versions, whereas, 'unique' packages are only present in a single repository family, meaning there are no other sources for repology.org to compare them against. - There were no packages with known vulnerabilities associated to them. Section 'Repology SBOM package statistics' in the console output indicates that: - The baseline for SBOM package comparison is ten unique packages. This number includes the unique components in the cdx SBOM (as identified by the component name and version), as well as other current package versions in 'nix_unstable' known to repology. - Nine component names in the SBOM can be matched with package names in repology. - One package was not included to the comparison by `repology_cli`. The reason is 'NOT_FOUND', meaning the package was not found in repology.org. Other possible reasons for `repology_cli` to skip SBOM packages are IGNORED and NO_VERSION. IGNORED means the sbom component name indicates the component is not a package in repology.org. Typical examples of IGNORED packages would be archives (.tar.gz) or patches (.patch). NO_VERSION means the sbom component was missing the version information. Typically, such packages are service files, scripts, or configuration files that are not considered as packages in repology.org but can be included as separate components in the SBOM. In addition to the console output `repology_cli` outputs the full data set in csv file. As an example, you could query the `repology_report.csv` for more details of the skipped packages: ```bash $ csvsql --query "select * from repology_report where status == 'NOT_FOUND'" repology_report.csv | csvlook | repo | package | version | status | | version_sbom | | ------------ | ------------------ | ------- | --------- | ... | ------------ | | nix_unstable | util-linux-minimal | 2.38.1 | NOT_FOUND | | 2.38.1 | ``` Above, we can see the package 'util-linux-minimal' which is one of the components in the example sbom 'wget.runtime.sbom.cdx.json', is not available (with that exact same name) in repology.org. ### Repology CVE search Following query shows an example of using the [`repology_cve`](../src/repology/repology_cve.py) client to query CVEs known to repology.org that impact package `openssl` version `3.1.1`. ```bash $ repology_cve openssl 3.1.1 INFO Repology affected CVE(s) | package | version | cve | |-----------+-----------+---------------| | openssl | 3.1.1 | CVE-2023-2975 | | openssl | 3.1.1 | CVE-2023-3446 | | openssl | 3.1.1 | CVE-2023-3817 | | openssl | 3.1.1 | CVE-2023-4807 | | openssl | 3.1.1 | CVE-2023-5363 | | openssl | 3.1.1 | CVE-2023-5678 | INFO Wrote: repology_cves.csv ``` ================================================ FILE: doc/vulnxscan.md ================================================ # vulnxscan [`vulnxscan`](../src/vulnxscan/vulnxscan_cli.py) is a command line utility that demonstrates running vulnerability scans using SBOM as input. It mainly targets nix packages, however, it can be used with any other targets too as long as the target is expressed as valid CycloneDX SBOM. Table of Contents ================= * [Getting Started](#getting-started) * [Example Target](#example-target) * [Supported Scanners](#supported-scanners) * [Nix and OSV Vulnerability Database](#nix-and-osv-vulnerability-database) * [Nix and Grype](#nix-and-grype) * [Vulnix](#vulnix) * [Vulnxscan Usage Examples](#vulnxscan-usage-examples) * [Find Vulnerabilities Impacting Runtime Dependencies](#find-vulnerabilities-impacting-runtime-dependencies) * [Whitelisting Vulnerabilities](#whitelisting-vulnerabilities) * [Find Vulnerabilities Given SBOM as Input](#find-vulnerabilities-given-sbom-as-input) * [Find Vulnerabilities Impacting Buildtime and Runtime Dependencies](#find-vulnerabilities-impacting-buildtime-and-runtime-dependencies) * [Using Whitelist to Record Manual Analysis Results](#using-whitelist-to-record-manual-analysis-results) * [Triage to Help Manual Analysis](#triage-to-help-manual-analysis) * [Footnotes and Future Work](#footnotes-and-future-work) ## Getting Started To get started, follow the [Getting Started](../README.md#getting-started) section from the main [README](../README.md). As an example, to run the `vulnxscan` from your local clone of the `tiiuae/sbomnix` repository: ```bash # '--' signifies the end of argument list for `nix`. # '--help' is the first argument to `vulnxscan` $ nix run .#vulnxscan -- --help ``` ## Example Target In the below examples, we use `git` as an example target for `vulnxscan`, referred to by flakeref `github:NixOS/nixpkgs/nixos-unstable#git`. ## Supported Scanners ### Nix and OSV Vulnerability Database [OSV](https://osv.dev/) is a vulnerability database for open-source projects [initiated by Google](https://security.googleblog.com/2021/02/launching-osv-better-vulnerability.html). [OSV database](https://osv.dev/list?ecosystem=) currently [does not support Nix ecosystem](https://ossf.github.io/osv-schema/#affectedpackage-field), so queries that specify Nix as ecosystem would not return any matches. For this reason `vulnxscan` currently does not use Google's official [OSV-Scanner](https://security.googleblog.com/2022/12/announcing-osv-scanner-vulnerability.html) front-end, but implements its own OSV client demo in [osv.py](../src/vulnxscan/osv.py). `osv.py` sends queries to [OSV API](https://osv.dev/docs/) without specifying the ecosystem, only the target package name and version. At the time of writing, such queries to OSV API return vulnerabilities that match the given package and version across all ecosystems. As a result, the OSV vulnerabilities for Nix ecosystem will include false positives. Also, it is worth mentioning that OSV queries without ecosystem are undocumented in the [API specification](https://osv.dev/docs/#tag/api/operation/OSV_QueryAffected) currently. ### Nix and Grype [Grype](https://github.com/anchore/grype) is a vulnerability scanner targeted for container images. It uses the vulnerability data from [a variety of publicly available data sources](https://github.com/anchore/grype#grypes-database). Grype also [supports input from CycloneDX SBOM](https://github.com/anchore/grype#supported-sources) which makes it possible to use Grype with SBOM input from `sbomnix`, thus, allowing Grype scans against Nix targets. ### Vulnix [Vulnix](https://github.com/nix-community/vulnix) is a vulnerability scanner intended for Nix targets. It uses [NIST NVD](https://nvd.nist.gov/vuln) vulnerability database. Vulnix matches vulnerabilities based on [heuristic](https://github.com/nix-community/vulnix/blob/f56f3ac857626171b95e51d98cb6874278f789d3/src/vulnix/derivation.py#L104), which might result more false positives compared to direct match. False positives due to rough heuristic are an [intended feature](https://github.com/nix-community/vulnix#whitelisting) in vulnix. On the other hand, vulnix accounts [CVE patches](https://github.com/nix-community/vulnix#cve-patch-auto-detection) applied on Nix packages when matching vulnerabilities, something currently not directly supported by other scanners. ## Vulnxscan Usage Examples ### Find Vulnerabilities Impacting Runtime Dependencies This example shows how to use `vulnxscan` to summarize vulnerabilities impacting the given target or any of its runtime dependencies. The captured output is illustrative; exact versions and findings will differ depending on the package versions resolved at run time. ```bash # Target can be specified as a flakeref or a nix store path, e.g.: # vulnxscan . # vulnxscan github:tiiuae/sbomnix # vulnxscan nixpkgs#git # vulnxscan /nix/store/... # Ref: https://nixos.org/manual/nix/stable/command-ref/new-cli/nix3-flake.html#flake-references $ vulnxscan github:NixOS/nixpkgs/nixos-unstable#git INFO Generating SBOM for target '/nix/store/...-git-' INFO CVE-2023-2975 for 'openssl' is patched with: ['/nix/store/7gz0nj14469r9dlh8p0j5w5wjj3b6hw4-CVE-2023-2975.patch'] INFO CVE-2023-2975 for 'openssl' is patched with: ['/nix/store/7gz0nj14469r9dlh8p0j5w5wjj3b6hw4-CVE-2023-2975.patch'] INFO Console report Potential vulnerabilities impacting version_local: | vuln_id | url | package | version | severity | grype | osv | vulnix | sum | |------------------+---------------------------------------------------+-----------+---------+----------+-------+-----+--------+-----| | CVE-2023-3817 | https://nvd.nist.gov/vuln/detail/CVE-2023-3817 | openssl | 3.0.9 | 5.3 | 1 | 0 | 1 | 2 | | CVE-2022-38663 | https://nvd.nist.gov/vuln/detail/CVE-2022-38663 | git | 2.41.0 | 6.5 | 0 | 0 | 1 | 1 | | CVE-2022-36884 | https://nvd.nist.gov/vuln/detail/CVE-2022-36884 | git | 2.41.0 | 5.3 | 0 | 0 | 1 | 1 | | CVE-2022-36883 | https://nvd.nist.gov/vuln/detail/CVE-2022-36883 | git | 2.41.0 | 7.5 | 0 | 0 | 1 | 1 | | CVE-2022-36882 | https://nvd.nist.gov/vuln/detail/CVE-2022-36882 | git | 2.41.0 | 8.8 | 0 | 0 | 1 | 1 | | CVE-2022-30949 | https://nvd.nist.gov/vuln/detail/CVE-2022-30949 | git | 2.41.0 | 5.3 | 0 | 0 | 1 | 1 | | CVE-2022-30948 | https://nvd.nist.gov/vuln/detail/CVE-2022-30948 | git | 2.41.0 | 7.5 | 0 | 0 | 1 | 1 | | CVE-2022-30947 | https://nvd.nist.gov/vuln/detail/CVE-2022-30947 | git | 2.41.0 | 7.5 | 0 | 0 | 1 | 1 | | MAL-2022-4301 | https://osv.dev/MAL-2022-4301 | libidn2 | 2.3.4 | | 0 | 1 | 0 | 1 | | CVE-2021-21684 | https://nvd.nist.gov/vuln/detail/CVE-2021-21684 | git | 2.41.0 | 6.1 | 0 | 0 | 1 | 1 | | CVE-2020-2136 | https://nvd.nist.gov/vuln/detail/CVE-2020-2136 | git | 2.41.0 | 5.4 | 0 | 0 | 1 | 1 | | CVE-2019-1003010 | https://nvd.nist.gov/vuln/detail/CVE-2019-1003010 | git | 2.41.0 | 4.3 | 0 | 0 | 1 | 1 | | CVE-2018-1000182 | https://nvd.nist.gov/vuln/detail/CVE-2018-1000182 | git | 2.41.0 | 6.4 | 0 | 0 | 1 | 1 | | CVE-2018-1000110 | https://nvd.nist.gov/vuln/detail/CVE-2018-1000110 | git | 2.41.0 | 5.3 | 0 | 0 | 1 | 1 | | CVE-2016-2781 | https://nvd.nist.gov/vuln/detail/CVE-2016-2781 | coreutils | 9.3 | 6.5 | 1 | 0 | 0 | 1 | INFO Wrote: vulns.csv ``` `vulnxscan` first creates an SBOM, then feeds the SBOM (or target path) as input to different vulnerability scanners: [vulnix](https://github.com/nix-community/vulnix), [grype](https://github.com/anchore/grype), and [osv.py](../src/vulnxscan/osv.py) and creates a summary report. The summary report lists the newest vulnerabilities on top, with the `sum` column indicating how many scanners agreed with the exact same finding. In addition to the console output, `vulnxscan` writes the report to csv-file `vulns.csv` to allow easier post-processing of the output. It is worth mentioning that `vulnxscan` filters out vulnerabilities that it detects are patched, as printed out in the console output on lines like '`CVE-2023-2975 for 'openssl' is patched with: ['/nix/store/7gz0nj14469r9dlh8p0j5w5wjj3b6hw4-CVE-2023-2975.patch']`'. This patch auto-detection works in the similar way as the [patch auto-detection on vulnix](https://github.com/nix-community/vulnix#cve-patch-auto-detection), that is, it is based on detecting vulnerability identifiers from the patch filenames. ### Whitelisting Vulnerabilities `vulnxscan` supports whitelisting vulnerabilities to exclude false positives, unfixable issues, or vulnerabilities known to be addressed. Whitelist is a csv file that contains rules for the vulnerabilities to be excluded from the vulnxscan console report. Consider the following example whitelist: ``` $ csvlook whitelist.csv | vuln_id | package | comment | | -------------- | --------- | ----------------------------------------------------------------------- | | MAL-2022-4301 | | Incorrect package: Issue refers npm libidn2, not libidn2. | | CVE-2016-2781 | coreutils | NVD data issue: CPE entry does not correctly state the version numbers. | | CVE-20.* | git | Incorrect package: Impacts Jenkins git plugin, not git. | ``` `vuln_id` and `comment` are mandatory columns. `vuln_id` specifies a regular expression that will be used to match the vulnerability identification (`vuln_id`) against that of the `vulnxscan` output. Vulnerabilities that match the regular expression are excluded from the `vulnxscan` console output. If the whitelist includes a `package` column, in addition to matching `vuln_id`, a strict match is required against the `package` field in `vulnxscan` output. In case many rules match a vulnerability, rules on top of the whitelist are given higher priority. To be able to verify which vulnerabilities are whitelisted, `vulnxscan` csv output `vulns.csv` includes both whitelisted and non-whitelisted vulnerabilities implied with boolean column `whitelist`. `vulns.csv` also includes the `comment` section from the whitelist to be able to verify the reason for whitelisting each vulnerability. Below example shows applying the above example whitelist against the `git` vulnxscan output from the earlier example. ```bash # Given the whitelist.csv contents: $ cat whitelist.csv "vuln_id","package","comment" "MAL-2022-4301",,"Incorrect package: Issue refers npm libidn2, not libidn2." "CVE-2016-2781","coreutils","NVD data issue: CPE entry does not correctly state the version numbers." "CVE-20.* ","git","Incorrect package: Impacts Jenkins git plugin, not git." # Apply the whitelist to git vulnxscan output $ vulnxscan github:NixOS/nixpkgs/nixos-unstable#git --whitelist=whitelist.csv INFO Generating SBOM for target '/nix/store/...-git-' INFO CVE-2023-2975 for 'openssl' is patched with: ['/nix/store/7gz0nj14469r9dlh8p0j5w5wjj3b6hw4-CVE-2023-2975.patch'] INFO CVE-2023-2975 for 'openssl' is patched with: ['/nix/store/7gz0nj14469r9dlh8p0j5w5wjj3b6hw4-CVE-2023-2975.patch'] INFO Console report Potential vulnerabilities impacting version_local: # Note: the console output now includes only non-whitelisted entries: | vuln_id | url | package | version | severity | grype | osv | vulnix | sum | |---------------+------------------------------------------------+-----------+---------+----------+-------+-----+--------+-----| | CVE-2023-3817 | https://nvd.nist.gov/vuln/detail/CVE-2023-3817 | openssl | 3.0.9 | 5.3 | 1 | 0 | 1 | 2 | INFO Wrote: vulns.csv # In addition to the console report, vulnxscan writes a detailed report in a csv file, # by default 'vulns.csv', which includes the full details also from the whitelisted vulnerabilities: $ csvlook vulns.csv | vuln_id | url | package | version | severity | grype | osv | vulnix | sum | sortcol | whitelist | whitelist_comment | | ---------------- | ------------------------------------------------- | --------- | ------- | -------- | ----- | ----- | ------ | --- | --------------- | --------- | ----------------------------------------------------------------------- | | CVE-2023-3817 | https://nvd.nist.gov/vuln/detail/CVE-2023-3817 | openssl | 3.0.9 | 5.3 | True | False | True | 2 | 2023A0000003817 | False | | | CVE-2022-38663 | https://nvd.nist.gov/vuln/detail/CVE-2022-38663 | git | 2.41.0 | 6.5 | False | False | True | 1 | 2022A0000038663 | True | Incorrect package: Impacts Jenkins git plugin, not git. | | CVE-2022-36884 | https://nvd.nist.gov/vuln/detail/CVE-2022-36884 | git | 2.41.0 | 5.3 | False | False | True | 1 | 2022A0000036884 | True | Incorrect package: Impacts Jenkins git plugin, not git. | | CVE-2022-36883 | https://nvd.nist.gov/vuln/detail/CVE-2022-36883 | git | 2.41.0 | 7.5 | False | False | True | 1 | 2022A0000036883 | True | Incorrect package: Impacts Jenkins git plugin, not git. | | CVE-2022-36882 | https://nvd.nist.gov/vuln/detail/CVE-2022-36882 | git | 2.41.0 | 8.8 | False | False | True | 1 | 2022A0000036882 | True | Incorrect package: Impacts Jenkins git plugin, not git. | | CVE-2022-30949 | https://nvd.nist.gov/vuln/detail/CVE-2022-30949 | git | 2.41.0 | 5.3 | False | False | True | 1 | 2022A0000030949 | True | Incorrect package: Impacts Jenkins git plugin, not git. | | CVE-2022-30948 | https://nvd.nist.gov/vuln/detail/CVE-2022-30948 | git | 2.41.0 | 7.5 | False | False | True | 1 | 2022A0000030948 | True | Incorrect package: Impacts Jenkins git plugin, not git. | | CVE-2022-30947 | https://nvd.nist.gov/vuln/detail/CVE-2022-30947 | git | 2.41.0 | 7.5 | False | False | True | 1 | 2022A0000030947 | True | Incorrect package: Impacts Jenkins git plugin, not git. | | MAL-2022-4301 | https://osv.dev/MAL-2022-4301 | libidn2 | 2.3.4 | | False | True | False | 1 | 2022A0000004301 | True | Incorrect package: Issue refers npm libidn2, not libidn2. | | CVE-2021-21684 | https://nvd.nist.gov/vuln/detail/CVE-2021-21684 | git | 2.41.0 | 6.1 | False | False | True | 1 | 2021A0000021684 | True | Incorrect package: Impacts Jenkins git plugin, not git. | | CVE-2020-2136 | https://nvd.nist.gov/vuln/detail/CVE-2020-2136 | git | 2.41.0 | 5.4 | False | False | True | 1 | 2020A0000002136 | True | Incorrect package: Impacts Jenkins git plugin, not git. | | CVE-2019-1003010 | https://nvd.nist.gov/vuln/detail/CVE-2019-1003010 | git | 2.41.0 | 4.3 | False | False | True | 1 | 2019A0001003010 | True | Incorrect package: Impacts Jenkins git plugin, not git. | | CVE-2018-1000182 | https://nvd.nist.gov/vuln/detail/CVE-2018-1000182 | git | 2.41.0 | 6.4 | False | False | True | 1 | 2018A0001000182 | True | Incorrect package: Impacts Jenkins git plugin, not git. | | CVE-2018-1000110 | https://nvd.nist.gov/vuln/detail/CVE-2018-1000110 | git | 2.41.0 | 5.3 | False | False | True | 1 | 2018A0001000110 | True | Incorrect package: Impacts Jenkins git plugin, not git. | | CVE-2016-2781 | https://nvd.nist.gov/vuln/detail/CVE-2016-2781 | coreutils | 9.3 | 6.5 | True | False | False | 1 | 2016A0000002781 | True | NVD data issue: CPE entry does not correctly state the version numbers. | ``` See ghafscan [manual_analysis.csv](https://github.com/tiiuae/ghafscan/blob/main/manual_analysis.csv) for a more complete example and usage of the vulnxscan whitelisting feature. ### Find Vulnerabilities Given SBOM as Input This example shows how to use `vulnxscan` to summarize vulnerabilities impacting components in the given CycloneDX SBOM. First, we use `sbomnix` to generate SBOM for the example target: ```bash $ nix run .#sbomnix -- github:NixOS/nixpkgs/nixos-unstable#git .. INFO Wrote: sbom.cdx.json ``` Then, give the generated SBOM as input to `vulnxscan`: ```bash $ vulnxscan --sbom sbom.cdx.json INFO Console report Potential vulnerabilities impacting version_local: | vuln_id | url | package | version | severity | grype | osv | sum | |---------------+------------------------------------------------+-----------+---------+----------+-------+-----+-----| | CVE-2023-3817 | https://nvd.nist.gov/vuln/detail/CVE-2023-3817 | openssl | 3.0.9 | 5.3 | 1 | 0 | 1 | | CVE-2023-2975 | https://nvd.nist.gov/vuln/detail/CVE-2023-2975 | openssl | 3.0.9 | 5.3 | 1 | 0 | 1 | | MAL-2022-4301 | https://osv.dev/MAL-2022-4301 | libidn2 | 2.3.4 | | 0 | 1 | 1 | | CVE-2016-2781 | https://nvd.nist.gov/vuln/detail/CVE-2016-2781 | coreutils | 9.3 | 6.5 | 1 | 0 | 1 | INFO Wrote: vulns.csv ``` Notice that `vulnxscan` drops the vulnix scan when the input is SBOM. This is due to the vulnix not supporting SBOM input at the time of writing. Also notice that `vulnxscan` drops the patch auto-detection if the input is SBOM. Reason is that `vulnxscan` reads the patch information from nix derivations. Therefore, the patch information is only available when the given input is Nix store path (e.g. derivation or out-path), not SBOM. ### Find Vulnerabilities Impacting Buildtime and Runtime Dependencies By default, `vulnxscan` scans the given target for vulnerabilities that impact its runtime-only dependencies. This example shows how to use `vulnxscan` to include also buildtime dependencies to the scan. ```bash $ vulnxscan ./result --buildtime # ... output not included in this snippet ... ``` ### Using Whitelist to Record Manual Analysis Results `vulnxscan` supports using whitelist csv file as a more generic record of manual analysis results, by allowing non-whitelisting rules. That is, the whitelist csv file can include a boolean `whitelist` column to indicate if the matching vulnerabilities should be whitelisted or not. The default value for `whitelist` is True, that is, if the `whitelist` column is missing or the value is empty, `vulnxscan` interprets the rule as if the `whitelist` column value would evaluate to True. As an example, consider the following manual analysis record (i.e. 'whitelist'): ``` csvlook manual_analysis.csv | vuln_id | whitelist | package | comment | | -------------- | --------- | --------- | ------------------------------------------------------------------ | | CVE-2022-0856 | False | libcaca | Not fixed upstream: https://github.com/cacalabs/libcaca/issues/65. | | CVE-2021-32490 | False | djvulibre | Pending merge: https://github.com/NixOS/nixpkgs/pull/246773. | ``` The above example `manual_analysis.csv` includes two rules: one for `CVE-2022-0856` and one for `CVE-2021-32490`. For both, the `whitelist` column value is '`False`', indicating the rule is a non-whitelisting rule. This means, for both cases, we want to record the manual analysis results as detailed in the `comment` column, but we don't want to whitelist the matching vulnerabilities. Specifically, in the case of `CVE-2022-0856` we don't want to whitelist the issue since it's not fixed upstream, but we still want to record the link to the upstream PR to make it easier to follow the upstream progress. In the case of `CVE-2021-32490` we don't want to whitelist the issue since the nixpkgs PR is pending merge. In this case too, we still want to record the nixpkgs PR to allow following the progress. See ghafscan [manual_analysis.csv](https://github.com/tiiuae/ghafscan/blob/main/manual_analysis.csv) for a more complete example and usage of non-whitelisting rules to help manual analysis. ### Triage to Help Manual Analysis `vulnxscan` can be used to help manual analysis with `--triage` and `--nixprs` command line options. With command line option `--triage`, `vulnxscan` queries repology.org for nix-unstable and package upstream version information, as well as the CVE impacted versions. With the additional information from repology.org, `vulnxscan` classifies each vulnerability accordingly. Consider the following example, using [ghaf](https://github.com/tiiuae/ghaf) as target: ```bash # Run vulnxscan: # --buildtime: Scan buildtime dependencies. Scanning buildtime dependencies does not # require building the target, which allows relatively quick scan also for # targets not built earlier. Notice: nix 'buildtime' dependencies are a # superset of runtime dependencies. # --whitelist: Use 'manual_analysis.csv' as a whitelist file. # --triage : Help manual analysis by querying version info from repology.org. $ vulnxscan github:tiiuae/ghaf?ref=main#packages.x86_64-linux.generic-x86_64-release --buildtime --whitelist=manual_analysis.csv --triage INFO Generating SBOM for target '/nix/store/...-nixos-disk-image.drv' INFO CVE-2023-27371 for 'libmicrohttpd' is patched with: ['/nix/store/l53sq07v6hghm7cchcjbrwyvjyjag06r-CVE-2023-27371.patch'] INFO CVE-2023-2975 for 'openssl' is patched with: ['/nix/store/7gz0nj14469r9dlh8p0j5w5wjj3b6hw4-CVE-2023-2975.patch'] INFO CVE-2023-2975 for 'openssl' is patched with: ['/nix/store/7gz0nj14469r9dlh8p0j5w5wjj3b6hw4-CVE-2023-2975.patch'] INFO CVE-2023-2975 for 'openssl' is patched with: ['/nix/store/7gz0nj14469r9dlh8p0j5w5wjj3b6hw4-CVE-2023-2975.patch'] INFO CVE-2023-2975 for 'openssl' is patched with: ['/nix/store/7gz0nj14469r9dlh8p0j5w5wjj3b6hw4-CVE-2023-2975.patch'] INFO CVE-2023-2617 for 'opencv' is patched with: ['/nix/store/vw29nr5nrfs10vv5p3m7rpkqscwrh4sp-CVE-2023-2617.patch'] ... Potential vulnerabilities impacting version_local: | vuln_id | package | severity | version_local | version_nixpkgs | version_upstream | classify | |---------------------+------------+----------+---------------+-----------------+------------------+--------------------------------------| | CVE-2023-40360 | qemu | 5.5 | 8.0.2 | 8.1.0 | 8.1.0 | fix_update_to_version_nixpkgs | | CVE-2023-40359 | xterm | 9.8 | 379 | 384 | 384 | fix_update_to_version_nixpkgs | | CVE-2023-39742 | giflib | 5.5 | 5.2.1 | 5.2.1 | 5.2.1 | fix_not_available | | CVE-2023-39533 | go | 7.5 | 1.20.6 | 1.21.1 | 1.21.1 | fix_update_to_version_nixpkgs | | CVE-2023-38858 | faad2 | 6.5 | 2.10.1 | 2.10.1 | 2.10.1 | fix_not_available | | CVE-2023-38857 | faad2 | 5.5 | 2.10.1 | 2.10.1 | 2.10.1 | fix_not_available | | CVE-2023-38633 | librsvg | 5.5 | 2.55.1 | 2.56.3 | 2.56.3 | fix_update_to_version_nixpkgs | | CVE-2023-37769 | pixman | 6.5 | 0.42.2 | 0.42.2 | 0.42.2 | err_not_vulnerable_based_on_repology | | CVE-2023-31484 | perl | 8.1 | 5.36.0-env | 5.38.0 | 5.38.0 | fix_update_to_version_nixpkgs | | CVE-2023-31484 | perl | 8.1 | 5.36.0 | 5.38.0 | 5.38.0 | fix_update_to_version_nixpkgs | | CVE-2023-30571 | libarchive | 5.3 | 3.6.2 | 3.6.2 | 3.7.1 | fix_update_to_version_upstream | | CVE-2023-29409 | go | 5.3 | 1.20.6 | 1.21.1 | 1.21.1 | fix_update_to_version_nixpkgs | | CVE-2023-29383 | shadow | 3.3 | 4.13 | 4.13 | 4.14.0 | fix_update_to_version_upstream | ... (output truncated) ... INFO Wrote: /home/hrosten/projects/sbomnix-fork/vulns.csv INFO Wrote: /home/hrosten/projects/sbomnix-fork/vulns.triage.csv ``` As an example, the output table states the following: - Package `qemu` 8.0.2, which is a dependency to ghaf, is potentially vulnerable to CVE-2023-40360. - Based on repology.org, `qemu` newest version in nix-unstable is 8.0.4. Also, based on repology.org, latest `qemu` version in the `qemu` upstream is 8.1.0. - Since both `qemu` 8.0.2 and 8.0.4 are vulnerable to CVE-2023-40360, but the upstream version 8.1.0 is not vulnerable, `vulnxscan` classifies the issue as `fix_update_to_version_upstream`. - Package `xterm` version 379 is potentially vulnerable to CVE-2023-40359. Latest version of `xterm` in nix-unstable is 384, which is not vulnerable to CVE-2023-40359. Therefore, `vulnxscan` classifies the issue as `fix_update_to_version_nixpkgs`. - Package `giflib` version 5.2.1 is potentially vulnerable to CVE-2023-39742. Since there's no known fixed version available in nix-unstable or the package upstream, `vulnxscan` classifies the issue as `fix_not_available`. Notice that the classification is based only on the version numbers. Indeed, it's still possible that there's an upstream patch available in an unreleased version of `giflib` that would fix the issue. - Package `pixman` version 0.42.2 is potentially vulnerable to CVE-2023-37769. However, based on repology.org, the vulnerability [does not impact](https://repology.org/project/pixman/cves?version=0.42.2) the given version of `pixman`. Therefore, `vulnxscan` classifies the issue as `err_not_vulnerable_based_on_repology`. ##### Nixpkgs PR Search With command line option `--nixprs`, `vulnxscan` queries github for nixpkgs PRs that might include more information concerning possible nixpkgs fixes for the found vulnerabilities. `--nixprs` adds URLs to (at most five) PRs that appear valid for each vulnerability based on heuristic. The PR search takes significant time due to github API rate limits, which is why it is not enabled by default. Consider the following example, using the same Ghaf target as earlier: ```bash # Run vulnscan with --triage and --nixprs $ vulnxscan github:tiiuae/ghaf?ref=main#packages.x86_64-linux.generic-x86_64-release --buildtime --whitelist=manual_analysis.csv --triage --nixprs INFO Generating SBOM for target '/nix/store/...-nixos-disk-image.drv' ... Potential vulnerabilities impacting version_local: | vuln_id | package | severity | version_local | version_nixpkgs | version_upstream | classify | nixpkgs_pr | |----------------+------------+------------+---------------+-----------------+------------------+-------------------------------+-----------------------------------------------| | CVE-2023-40360 | qemu | 5.5 | 8.0.2 | 8.1.0 | 8.1.0 | fix_update_to_version_nixpkgs | https://github.com/NixOS/nixpkgs/pull/251154 | | CVE-2023-40359 | xterm | 9.8 | 379 | 384 | 384 | fix_update_to_version_nixpkgs | https://github.com/NixOS/nixpkgs/pull/244141 | | CVE-2023-39742 | giflib | 5.5 | 5.2.1 | 5.2.1 | 5.2.1 | fix_not_available | | | CVE-2023-39533 | go | 7.5 | 1.20.6 | 1.21.1 | 1.21.1 | fix_update_to_version_nixpkgs | https://github.com/NixOS/nixpkgs/pull/253738 | | CVE-2023-38858 | faad2 | 6.5 | 2.10.1 | 2.10.1 | 2.10.1 | fix_not_available | | | CVE-2023-38857 | faad2 | 5.5 | 2.10.1 | 2.10.1 | 2.10.1 | fix_not_available | | | CVE-2023-38633 | librsvg | 5.5 | 2.55.1 | 2.56.3 | 2.56.3 | fix_update_to_version_nixpkgs | https://github.com/NixOS/nixpkgs/pull/246763 | | | | | | | | | https://github.com/NixOS/nixpkgs/pull/246860 | | CVE-2023-37769 | pixman | 6.5 | 0.42.2 | 0.42.2 | 0.42.2 | err_not_vulnerable_based_on_re| | | CVE-2023-31484 | perl | 8.1 | 5.36.0-env | 5.38.0 | 5.38.0 | fix_update_to_version_nixpkgs | https://github.com/NixOS/nixpkgs/pull/241848 | | | | | | | | | https://github.com/NixOS/nixpkgs/pull/247547 | | CVE-2023-31484 | perl | 8.1 | 5.36.0 | 5.38.0 | 5.38.0 | fix_update_to_version_nixpkgs | https://github.com/NixOS/nixpkgs/pull/241848 | | | | | | | | | https://github.com/NixOS/nixpkgs/pull/247547 | | CVE-2023-30571 | libarchive | 5.3 | 3.6.2 | 3.6.2 | 3.7.1 | fix_update_to_version_upstream| | | CVE-2023-29409 | go | 5.3 | 1.20.6 | 1.21.1 | 1.21.1 | fix_update_to_version_nixpkgs | https://github.com/NixOS/nixpkgs/pull/247034 | | | | | | | | | https://github.com/NixOS/nixpkgs/pull/253738 | | CVE-2023-29383 | shadow | 3.3 | 4.13 | 4.13 | 4.14.0 | fix_update_to_version_upstream| https://github.com/NixOS/nixpkgs/pull/233924 | | | | | | | | | https://github.com/NixOS/nixpkgs/pull/254143 | ``` `vulnxscan` option `--nixprs` adds the column `nixpkgs_pr` to the output, to help manual analysis by listing PRs that appear relevant for the given issue. ## Footnotes and Future Work For now, consider `vulnxscan` as a demonstration. Some improvement ideas are listed below: - Consider adding patch information to SBOM (e.g. via the [pedigree](https://cyclonedx.org/use-cases/#pedigree) attribute) to be able to auto-detect patched vulnerabilities also when the input is SBOM. - Vulnerability scanners lack support for parsing the patch data: even if `sbomnix` added the patch data to the output SBOM, we suspect not many vulnerability scanners would read the information. As an example, the following discussion touches this topic on DependencyTrack: https://github.com/DependencyTrack/dependency-track/issues/919. - Identifying packages is hard as pointed out in https://discourse.nixos.org/t/the-future-of-the-vulnerability-roundups/22424/5. As an example, CPEs are inaccurate which causes issues in matching vulnerabilities: https://github.com/DependencyTrack/dependency-track/discussions/2290. - Nix ecosystem is not supported in OSV: the way `osv.py` makes use of OSV data for Nix targets -- as explained in section [Nix and OSV vulnerability database](#nix-and-osv-vulnerability-database) -- makes the reported OSV vulnerabilities include false positives. ### Other Future Work - [vulnxscan](../src//vulnxscan/vulnxscan_cli.py) could include more scanners in addition to [vulnix](https://github.com/nix-community/vulnix), [grype](https://github.com/anchore/grype), and [osv.py](../src/vulnxscan/osv.py). Suggestions for other open-source scanners, especially those that can digest CycloneDX or SPDX SBOMs are welcome. Consider e.g. [bombon](https://github.com/nikstur/bombon) and [cve-bin-tool](https://github.com/intel/cve-bin-tool). Adding cve-bin-tool to vulnxscan was [demonstrated](https://github.com/tiiuae/sbomnix/pull/75) earlier, but not merged due to reasons explained in the [PR](https://github.com/tiiuae/sbomnix/pull/75#issuecomment-1670958503). ================================================ FILE: flake.nix ================================================ # SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 { description = "Flakes file for sbomnix"; inputs = { nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; flake-parts.url = "github:hercules-ci/flake-parts"; flake-root.url = "github:srid/flake-root"; # For preserving compatibility with non-Flake users flake-compat = { url = "github:nix-community/flake-compat"; flake = false; }; # pre-commit hooks git-hooks-nix = { url = "github:cachix/git-hooks.nix"; inputs = { nixpkgs.follows = "nixpkgs"; flake-compat.follows = "flake-compat"; }; }; }; outputs = inputs@{ flake-parts, ... }: flake-parts.lib.mkFlake { inherit inputs; } { systems = [ "x86_64-linux" "aarch64-linux" "aarch64-darwin" ]; imports = [ ./nix ]; }; } ================================================ FILE: nix/apps.nix ================================================ # SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 { perSystem = { self', ... }: { apps = let inherit (self'.packages) sbomnix; mkApp = program: description: { type = "app"; inherit program; meta = { inherit description; }; }; in { # nix run .#repology_cli repology_cli = mkApp "${sbomnix}/bin/repology_cli" "Query Repology using an SBOM as input"; # nix run .#repology_cve repology_cve = mkApp "${sbomnix}/bin/repology_cve" "Find CVEs for packages known to Repology"; # nix run .#nix_outdated nix_outdated = mkApp "${sbomnix}/bin/nix_outdated" "List outdated nix dependencies in priority order"; # nix run .#nixgraph nixgraph = mkApp "${sbomnix}/bin/nixgraph" "Visualize nix package dependencies"; # nix run .#nixmeta nixmeta = mkApp "${sbomnix}/bin/nixmeta" "Summarize nixpkgs meta-attributes"; # nix run .#vulnxscan vulnxscan = mkApp "${sbomnix}/bin/vulnxscan" "Scan nix artifacts or SBOMs for vulnerabilities"; # nix run .#provenance provenance = mkApp "${sbomnix}/bin/provenance" "Generate SLSA provenance for a nix target"; }; }; } ================================================ FILE: nix/default.nix ================================================ # SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 { imports = [ ./apps.nix ./formatter.nix ./packages.nix ./git-hooks.nix ]; } ================================================ FILE: nix/formatter.nix ================================================ # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 { ... }: { perSystem = { config, pkgs, ... }: { formatter = let inherit (config.pre-commit.settings) package configFile; in pkgs.writeShellScriptBin "pre-commit-run" '' exec ${pkgs.lib.getExe package} run --all-files --config ${configFile} ''; }; } ================================================ FILE: nix/git-hooks.nix ================================================ # SPDX-FileCopyrightText: 2025-2026 TII (SSRC) and the Ghaf contributors # SPDX-License-Identifier: Apache-2.0 { inputs, ... }: { imports = with inputs; [ git-hooks-nix.flakeModule ]; perSystem = { pkgs, ... }: let pyrightPythonEnv = pkgs.python3.withPackages ( pp: with pp; [ beautifulsoup4 colorlog dfdiskcache filelock graphviz pp."license-expression" numpy packageurl-python packaging pandas pyrate-limiter reuse requests requests-cache requests-ratelimiter tabulate ] ); pyrightWrapper = pkgs.writeShellScriptBin "pyright-sbomnix" '' exec ${pkgs.lib.getExe pkgs.pyright} --pythonpath ${pyrightPythonEnv}/bin/python "$@" ''; in { pre-commit = { settings.hooks = { gitlint.enable = true; typos = { enable = true; excludes = [ "^LICENSES/.*" "^tests/resources/.*" ]; }; end-of-file-fixer = { enable = true; excludes = [ "^LICENSES/.*" "^tests/resources/.*" ]; }; trim-trailing-whitespace = { enable = true; excludes = [ "^LICENSES/.*" "^tests/resources/.*" ]; }; actionlint.enable = true; deadnix.enable = true; nixfmt.enable = true; pyright = { enable = true; pass_filenames = false; settings.binPath = "${pyrightWrapper}/bin/pyright-sbomnix"; }; ruff.enable = true; ruff-format.enable = true; reuse.enable = true; shellcheck.enable = true; statix = { enable = true; args = [ "fix" ]; }; }; }; }; } ================================================ FILE: nix/packages.nix ================================================ # SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 { self, ... }: { perSystem = { pkgs, lib, config, self', ... }: let pp = pkgs.python3.pkgs; baseVersion = pkgs.lib.removeSuffix "\n" (builtins.readFile ../VERSION); # Append git state so local builds are distinguishable from release # artifacts. shortRev is set on a clean tree; dirtyShortRev (Nix >= 2.14) # is set when the working tree has uncommitted changes. gitSuffix = if self ? shortRev then "+g${self.shortRev}" else if self ? dirtyShortRev then "+g${self.dirtyShortRev}" else ""; # Thin wrapper that calls a module entry point via the ambient python3. # PYTHONPATH (set in shellHook) resolves to the local src/, so edits are # picked up without reinstalling. mkDevEntry = name: module: pkgs.writeShellScriptBin name '' exec python3 -c "import sys; sys.argv[0]='${name}'; from ${module} import main; main()" "$@" ''; prefix_path = with pkgs; [ git graphviz grype nix nix-visualize vulnix ]; check_inputs = with pp; [ hypothesis jsonschema pytest pytest-cov pytest-xdist ]; build_system = with pp; [ setuptools ]; build_inputs = with pp; [ beautifulsoup4 colorlog dfdiskcache filelock graphviz pp."license-expression" numpy packageurl-python packaging pandas pyrate-limiter reuse requests requests-cache requests-ratelimiter tabulate ]; in { packages = rec { default = sbomnix; sbomnix = pp.buildPythonPackage { pname = "sbomnix"; version = "${baseVersion}${gitSuffix}"; pyproject = true; src = lib.cleanSource ../.; postPatch = '' printf '%s' "${baseVersion}${gitSuffix}" > VERSION ''; build-system = build_system; nativeCheckInputs = check_inputs; dependencies = build_inputs; pythonImportsCheck = [ "sbomnix" ]; makeWrapperArgs = [ "--prefix PATH : ${lib.makeBinPath prefix_path}" ]; }; }; checks = # Force a build of all packages during a `nix flake check`. with lib; mapAttrs' (n: nameValuePair "package-${n}") self'.packages; devShells.default = pkgs.mkShell { name = "sbomnix-devshell"; packages = [ pkgs.pyright # for running pyright manually in devshell pkgs.ruff # for running ruff manually in devshell ] ++ check_inputs ++ build_system ++ build_inputs ++ [ (mkDevEntry "sbomnix" "sbomnix.main") (mkDevEntry "nixgraph" "nixgraph.main") (mkDevEntry "nixmeta" "nixmeta.main") (mkDevEntry "nix_outdated" "nixupdate.nix_outdated") (mkDevEntry "vulnxscan" "vulnxscan.vulnxscan_cli") (mkDevEntry "repology_cli" "repology.repology_cli") (mkDevEntry "repology_cve" "repology.repology_cve") (mkDevEntry "provenance" "provenance.main") ]; # Add the repo root to PYTHONPATH, so invoking entrypoints (and them being # able to find the python packages in the repo) becomes possible. # `pytest.ini` already sets this for invoking `pytest` # (cascading down to the processes it spawns), but this is for the developer # invoking entrypoints from inside the devshell. shellHook = '' ${config.pre-commit.installationScript} echo 1>&2 "Welcome to the development shell!" export PATH=${lib.makeBinPath prefix_path}:$PATH export PYTHONPATH="$PYTHONPATH:$(pwd)/src" # https://github.com/NixOS/nix/issues/1009: export TMPDIR="/tmp" ''; }; }; } ================================================ FILE: pyproject.toml ================================================ # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # SPDX-License-Identifier: Apache-2.0 [build-system] requires = ["setuptools>=61"] build-backend = "setuptools.build_meta" [project] name = "sbomnix" dynamic = ["version"] description = "Utility that generates SBOMs from nix packages" readme = { file = "README.md", content-type = "text/markdown" } requires-python = ">=3.10" license = { text = "Apache-2.0" } authors = [{ name = "TII", email = "henri.rosten@unikie.com" }] classifiers = [ "Development Status :: 3 - Alpha", "License :: OSI Approved :: Apache Software License", "Operating System :: POSIX :: Linux", "Programming Language :: Python :: 3 :: Only", ] dependencies = [ "beautifulsoup4", "colorlog", "df-diskcache", "filelock", "graphviz", "license-expression", "numpy", "packageurl-python", "packaging", "pandas", "reuse", "requests", "requests-cache", "requests-ratelimiter", "tabulate", ] [project.urls] Homepage = "https://github.com/tiiuae/sbomnix" [project.scripts] sbomnix = "sbomnix.main:main" nixgraph = "nixgraph.main:main" nixmeta = "nixmeta.main:main" nix_outdated = "nixupdate.nix_outdated:main" vulnxscan = "vulnxscan.vulnxscan_cli:main" repology_cli = "repology.repology_cli:main" repology_cve = "repology.repology_cve:main" provenance = "provenance.main:main" [tool.setuptools] license-files = ["LICENSES/Apache-2.0.txt", "LICENSES/BSD-3-Clause.txt"] [tool.setuptools.dynamic] version = { file = ["VERSION"] } [tool.setuptools.packages.find] where = ["src"] [tool.ruff] line-length = 88 target-version = "py310" [tool.ruff.lint] preview = true select = [ "B", "E4", "E7", "E9", "F", "I", "PLE", "PLW", "PLR0911", "PLR0912", "PLR0913", "PLR0914", "PLR0915", "PLR0917", "RUF100", ] [tool.ruff.lint.isort] known-first-party = [ "common", "nixgraph", "nixmeta", "nixupdate", "provenance", "repology", "sbomnix", "vulnxscan", ] ================================================ FILE: pyrightconfig.json ================================================ { "include": ["src"], "extraPaths": ["src"], "pythonVersion": "3.10", "typeCheckingMode": "standard", "reportMissingTypeStubs": false, } ================================================ FILE: pytest.ini ================================================ # SPDX-FileCopyrightText: 2022-2023 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 [pytest] pythonpath = . addopts = --strict-markers markers = integration: indicates a CLI or cross-module integration test. network: indicates a test that relies on external network access. slow: indicates a slow test. grype: indicates a test that invokes grype (triggers grype DB pre-warm). real_vulnix: opt-in tests that execute the real vulnix binary. ================================================ FILE: scripts/check-fast.sh ================================================ #!/usr/bin/env bash # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 set -euo pipefail nix fmt nix --extra-experimental-features 'flakes nix-command' flake check --no-build nix develop --command ./scripts/run-pytest-lane.sh fast ================================================ FILE: scripts/check-full.sh ================================================ #!/usr/bin/env bash # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 set -euo pipefail nix --extra-experimental-features 'flakes nix-command' flake check nix develop --command ./scripts/run-pytest-lane.sh full ================================================ FILE: scripts/release-asset.sh ================================================ #!/usr/bin/env bash # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 set -euo pipefail mkdir -p build/ release_target=".#sbomnix" nix run .#sbomnix -- "$release_target" \ --cdx=./build/sbom.runtime.cdx.json \ --spdx=./build/sbom.runtime.spdx.json \ --csv=./build/sbom.runtime.csv nix run .#sbomnix -- --buildtime "$release_target" \ --cdx=./build/sbom.buildtime.cdx.json \ --spdx=./build/sbom.buildtime.spdx.json \ --csv=./build/sbom.buildtime.csv echo echo "Built release asset:" ls -la build ================================================ FILE: scripts/run-pytest-lane.sh ================================================ #!/usr/bin/env bash # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 set -euo pipefail usage() { echo "usage: $0 {fast|full}" >&2 exit 2 } lane="${1:-}" marker_expr="" coverage=false pytest_args=( -n auto -x ) case "$lane" in fast) marker_expr="not slow and not network" pytest_args+=(-v --durations=10) ;; full) coverage=true pytest_args+=(-v --durations=20) ;; *) usage ;; esac if $coverage; then pytest_args+=( --cov=src --cov-report=term-missing --cov-report=xml ) fi if [ -n "$marker_expr" ]; then pytest_args+=(-m "$marker_expr") fi pytest "${pytest_args[@]}" tests/ ================================================ FILE: shell.nix ================================================ # SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII) # SPDX-FileCopyrightText: 2020-2023 Eelco Dolstra and the flake-compat contributors # # SPDX-License-Identifier: MIT # This file originates from: # https://github.com/nix-community/flake-compat # This file provides backward compatibility to nix < 2.4 clients { system ? builtins.currentSystem, }: let lock = builtins.fromJSON (builtins.readFile ./flake.lock); inherit (lock.nodes.flake-compat.locked) owner repo rev narHash ; flake-compat = fetchTarball { url = "https://github.com/${owner}/${repo}/archive/${rev}.tar.gz"; sha256 = narHash; }; flake = import flake-compat { inherit system; src = ./.; }; in flake.shellNix ================================================ FILE: src/common/__init__.py ================================================ # SPDX-FileCopyrightText: 2022 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 ================================================ FILE: src/common/cli_args.py ================================================ # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Common argparse helper functions.""" import argparse import sys from weakref import WeakSet from common.pkgmeta import get_py_pkg_version _VERBOSE_COUNT_DEST = "_verbose_count" _VERBOSE_WRAPPED_PARSERS = WeakSet() class _VerboseCountAction(argparse.Action): """Count repeated short verbose flags without using parser defaults.""" def __init__(self, option_strings, dest, nargs=0, **kwargs): if nargs != 0: raise ValueError("nargs must be 0") super().__init__(option_strings, dest, nargs=0, **kwargs) def __call__(self, _parser, namespace, _values, _option_string=None): count = getattr(namespace, _VERBOSE_COUNT_DEST, 0) + 1 setattr(namespace, _VERBOSE_COUNT_DEST, count) setattr(namespace, self.dest, count) def check_positive(val): """Raise ArgumentTypeError if val is not a positive integer.""" intval = int(val) if intval <= 0: raise argparse.ArgumentTypeError(f"{val} is not a positive integer") return intval def _is_integer(value): """Return True if value can be parsed as an integer.""" try: int(value) except ValueError: return False return True def _normalize_verbose_args(args): """Normalize compact short verbose values before argparse sees positionals.""" normalized = [] args = list(sys.argv[1:] if args is None else args) idx = 0 while idx < len(args): arg = args[idx] if arg == "-v" and idx + 1 < len(args) and _is_integer(args[idx + 1]): normalized.append(f"--verbose={args[idx + 1]}") idx += 2 continue if arg.startswith("-v") and arg != "-v": value = arg[2:] if value.startswith("="): value = value[1:] if value and _is_integer(value): normalized.append(f"--verbose={value}") idx += 1 continue normalized.append(arg) idx += 1 return normalized def _finalize_verbose_namespace(namespace): """Remove internal argparse bookkeeping from the parsed namespace.""" if hasattr(namespace, _VERBOSE_COUNT_DEST): delattr(namespace, _VERBOSE_COUNT_DEST) return namespace def _wrap_verbose_parser(parser): """Teach parse_known_args to normalize compact short verbose values.""" if parser in _VERBOSE_WRAPPED_PARSERS: return parse_known_args = parser.parse_known_args def parse_known_args_with_verbose(args=None, namespace=None): namespace, extras = parse_known_args( _normalize_verbose_args(args), namespace, ) return _finalize_verbose_namespace(namespace), extras parser.parse_known_args = parse_known_args_with_verbose _VERBOSE_WRAPPED_PARSERS.add(parser) def add_verbose_argument(parser, default=0, max_level=3, root_parser=None): """Add a standard verbose flag to an argparse parser.""" _wrap_verbose_parser(root_parser or parser) parser.set_defaults(verbose=default, **{_VERBOSE_COUNT_DEST: 0}) levels = ["0=INFO", "1=VERBOSE", "2=DEBUG", "3=SPAM"] level_help = ", ".join(levels[: max_level + 1]) short_help = ( f"Increase verbosity; repeat as -vv for DEBUG (default: --verbose={default})" ) long_help = ( f"Set verbosity level explicitly ({level_help}) (default: --verbose={default})" ) parser.add_argument( "-v", action=_VerboseCountAction, dest="verbose", help=short_help, ) parser.add_argument( "--verbose", type=int, dest="verbose", metavar="N", help=long_help, ) def add_version_argument(parser, package="sbomnix"): """Add a standard version flag to an argparse parser.""" parser.add_argument( "--version", action="version", version=get_py_pkg_version(package) ) ================================================ FILE: src/common/columns.py ================================================ # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Shared DataFrame column names used across package boundaries.""" COMMENT = "comment" COUNT = "count" CPE = "cpe" CLASSIFY = "classify" DEPENDENCY_UID = "dependency_uid" LEVEL = "level" MODIFIED = "modified" NAME = "name" NEWEST_UPSTREAM_RELEASE = "newest_upstream_release" NIXPKGS_PR = "nixpkgs_pr" OUTPUTS = "outputs" PACKAGE = "package" PACKAGE_REPOLOGY = "package_repology" PATCHED = "patched" PNAME = "pname" POTENTIALLY_VULNERABLE = "potentially_vulnerable" RAW_NAME = "raw_name" REPO = "repo" REPO_VERSION_CLASSIFY = "repo_version_classify" SBOM_VERSION_CLASSIFY = "sbom_version_classify" SCANNER = "scanner" SEVERITY = "severity" SIMILARITY = "similarity" SORTCOL = "sortcol" SRC_PATH = "src_path" STATUS = "status" STORE_PATH = "store_path" SUM = "sum" TARGET_PATH = "target_path" URL = "url" VERSION = "version" VERSION_CMP = "version_cmp" VERSION_LOCAL = "version_local" VERSION_NIXPKGS = "version_nixpkgs" VERSION_REPOLOGY = "version_repology" VERSION_SBOM = "version_sbom" VERSION_UPSTREAM = "version_upstream" VULN_ID = "vuln_id" WHITELIST = "whitelist" WHITELIST_COMMENT = "whitelist_comment" ================================================ FILE: src/common/df.py ================================================ # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Shared dataframe helpers.""" import csv import logging import urllib.error from typing import Literal, cast, overload import pandas as pd from tabulate import tabulate from common.errors import CsvLoadError from common.log import LOG def df_to_csv_file(df, name, loglevel=logging.INFO): """Write dataframe to csv file.""" df.to_csv( path_or_buf=name, quoting=csv.QUOTE_ALL, sep=",", index=False, encoding="utf-8" ) LOG.log(loglevel, "Wrote: %s", name) @overload def df_from_csv_file(name, exit_on_error: Literal[True] = True) -> pd.DataFrame: ... @overload def df_from_csv_file( name, exit_on_error: Literal[False], ) -> pd.DataFrame | None: ... def df_from_csv_file(name, exit_on_error=True): """Read csv file into dataframe.""" LOG.debug("Reading: %s", name) try: df = pd.read_csv(name, keep_default_na=False, dtype=str) df.reset_index(drop=True, inplace=True) return df except ( pd.errors.EmptyDataError, pd.errors.ParserError, urllib.error.HTTPError, urllib.error.URLError, ) as error: if exit_on_error: raise CsvLoadError(name, error) from error LOG.debug("Error reading csv file '%s':\n%s", name, error) return None def df_regex_filter(df: pd.DataFrame, column: str, regex: str) -> pd.DataFrame: """Return rows where column `column` values match the given regex.""" LOG.debug("column:'%s', regex:'%s'", column, regex) return cast(pd.DataFrame, df[df[column].str.contains(regex, regex=True, na=False)]) def df_log(df, loglevel, tablefmt="presto"): """Log dataframe with given loglevel and tablefmt.""" if LOG.isEnabledFor(loglevel): if df is None or df.empty: return df = df.fillna("") table = tabulate( df, headers="keys", tablefmt=tablefmt, stralign="left", showindex=False ) LOG.log(loglevel, "\n%s\n", table) ================================================ FILE: src/common/errors.py ================================================ # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Shared exception types for expected user-facing failures.""" import os import shlex class SbomnixError(RuntimeError): """Base class for expected user-facing errors.""" class FlakeRefResolutionError(SbomnixError): """Raised when an input looks like a flakeref but cannot be resolved.""" def __init__(self, flakeref, stderr="", action="evaluating"): self.flakeref = flakeref self.stderr = "" if stderr is None else str(stderr) message = f"Failed {action} flakeref '{flakeref}'" stderr_summary = self.stderr.strip() if stderr_summary: message += f": {stderr_summary}" super().__init__(message) class FlakeRefRealisationError(FlakeRefResolutionError): """Raised when a flakeref resolves but cannot be force-realised.""" def __init__(self, flakeref, stderr=""): super().__init__(flakeref, stderr=stderr, action="force-realising") class CsvLoadError(SbomnixError): """Raised when a CSV input cannot be read.""" def __init__(self, name, error): self.name = name self.error = error super().__init__(f"Error reading csv file '{name}':\n{error}") class CommandNotFoundError(SbomnixError): """Raised when a required executable is not available in PATH.""" def __init__(self, name): self.name = name super().__init__(f"command '{name}' is not in PATH") class NixCommandError(SbomnixError): """Raised when a required Nix command fails.""" def __init__(self, command, stderr="", stdout=""): self.command = _format_command(command) self.stderr = "" if stderr is None else str(stderr) self.stdout = "" if stdout is None else str(stdout) message = f"Failed running Nix command `{self.command}`" detail = self.stderr.strip() or self.stdout.strip() if detail: message += f": {detail}" super().__init__(message) class InvalidNixArtifactError(SbomnixError): """Raised when a CLI target is not a valid nix artifact.""" def __init__(self, path): self.path = path super().__init__(f"Specified target is not a nix artifact: '{path}'") class InvalidNixJsonError(SbomnixError): """Raised when a Nix JSON interface returns an unsupported shape.""" def __init__(self, command, detail): self.command = command self.detail = detail super().__init__( f"Unexpected JSON from `{command}`: {detail}. " "The pinned Nix output schema may have changed; refusing to continue." ) class MissingNixDeriverError(SbomnixError): """Raised when a nix artifact cannot be mapped back to a derivation.""" def __init__(self, path): self.path = path super().__init__(f"No deriver found for: '{path}'") class MissingNixDerivationMetadataError(SbomnixError): """Raised when an artifact has no derivation metadata to model as a package.""" def __init__(self, path): self.path = path super().__init__(f"No derivation metadata found for: '{path}'") class MissingNixOutPathError(SbomnixError): """Raised when a derivation does not expose an out path.""" def __init__(self, path): self.path = path super().__init__(f"No outpath found for: '{path}'") class InvalidCpeDictionaryError(SbomnixError): """Raised when the downloaded CPE dictionary has invalid columns.""" def __init__(self, required_cols): self.required_cols = tuple(sorted(required_cols)) super().__init__( f"Missing required columns {list(self.required_cols)} from cpedict" ) class WhitelistApplicationError(SbomnixError): """Raised when vulnerability whitelist application cannot proceed.""" def __init__(self, message): super().__init__(message) class InvalidSbomError(SbomnixError): """Raised when a supplied SBOM path is invalid.""" def __init__(self, path): self.path = path super().__init__(f"Specified sbom target is not a json file: '{path}'") def _format_command(command): if isinstance(command, bytes): return command.decode(errors="replace") if isinstance(command, str): return command return shlex.join(os.fspath(part) for part in command) ================================================ FILE: src/common/flakeref.py ================================================ # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Flakeref resolution helpers.""" import logging import pathlib import re from common.errors import FlakeRefRealisationError, FlakeRefResolutionError from common.log import LOG, LOG_VERBOSE from common.nix_utils import parse_nix_derivation_show from common.proc import ExecCmdFn, exec_cmd, nix_cmd NIXOS_CONFIGURATION_TOPLEVEL_SUFFIX = ".config.system.build.toplevel" _NIXOS_CONFIGURATION_PREFIX_RE = re.compile( r"^(?P.+)#nixosConfigurations\.(?P.+)$" ) _UNQUOTED_ATTR_SEGMENT_RE = re.compile(r"^[A-Za-z0-9_'-]+$") _NIX_STRING_ESCAPES = { '"': '"', "\\": "\\", "n": "\n", "r": "\r", "t": "\t", } def try_resolve_flakeref( # noqa: PLR0913 flakeref: str, force_realise: bool = False, impure: bool = False, derivation: bool = False, *, exec_cmd_fn: ExecCmdFn | None = None, log: logging.Logger | None = None, ) -> str | None: """ Resolve flakeref to out-path, force-realising the output if ``force_realise`` is True. """ exec_cmd_fn = exec_cmd if exec_cmd_fn is None else exec_cmd_fn log = LOG if log is None else log looks_like_flakeref = _looks_like_flakeref(flakeref) if derivation and not force_realise and looks_like_flakeref: log.info("Evaluating flakeref '%s'", flakeref) cmd = nix_cmd("derivation", "show", flakeref, impure=impure) ret = exec_cmd_fn(cmd, raise_on_error=False, return_error=True, log_error=False) if ret is None or ret.returncode != 0: raise FlakeRefResolutionError(flakeref, ret.stderr if ret else "") drv_paths = parse_nix_derivation_show(ret.stdout) drv_path = next(iter(drv_paths), "") if not drv_path: raise FlakeRefResolutionError( flakeref, "nix derivation show returned no derivation path", ) log.debug("flakeref='%s' maps to derivation='%s'", flakeref, drv_path) return drv_path if force_realise and looks_like_flakeref: log.info("Realising flakeref '%s'", flakeref) cmd = nix_cmd( "build", "--no-link", "--print-out-paths", flakeref, impure=impure, ) ret = exec_cmd_fn(cmd, raise_on_error=False, return_error=True, log_error=False) if ret is None or ret.returncode != 0: raise FlakeRefRealisationError(flakeref, ret.stderr if ret else "") nixpath = _first_output_path(ret.stdout) if not nixpath: raise FlakeRefRealisationError( flakeref, "nix build returned no output path", ) log.debug("flakeref='%s' maps to path='%s'", flakeref, nixpath) return nixpath if looks_like_flakeref: log.info("Evaluating flakeref '%s'", flakeref) else: log.log(LOG_VERBOSE, "Evaluating '%s'", flakeref) cmd = nix_cmd("eval", "--raw", flakeref, impure=impure) ret = exec_cmd_fn(cmd, raise_on_error=False, return_error=True, log_error=False) if ret is None or ret.returncode != 0: if looks_like_flakeref: raise FlakeRefResolutionError(flakeref, ret.stderr if ret else "") log.debug("not a flakeref: '%s'", flakeref) return None nixpath = ret.stdout.strip() log.debug("flakeref='%s' maps to path='%s'", flakeref, nixpath) if not force_realise: return nixpath log.info("Realising flakeref '%s'", flakeref) cmd = nix_cmd("build", "--no-link", flakeref, impure=impure) ret = exec_cmd_fn(cmd, raise_on_error=False, return_error=True, log_error=False) if ret is None or ret.returncode != 0: raise FlakeRefRealisationError(flakeref, ret.stderr if ret else "") return nixpath def _first_output_path(stdout: str) -> str: """Return the first output path printed by ``nix build --print-out-paths``.""" return next((line.strip() for line in stdout.splitlines() if line.strip()), "") def parse_nixos_configuration_ref( flakeref: str, *, suffix: str = "", ) -> tuple[str, str] | None: """ Parse ``#nixosConfigurations.``. ``name`` may be either an unquoted attr segment or a quoted segment such as ``"host.example.com"``. The returned name is decoded and safe to re-quote. """ match = _NIXOS_CONFIGURATION_PREFIX_RE.match(flakeref or "") if not match: return None parsed = _consume_nix_attr_segment(match.group("rest")) if not parsed: return None name, tail = parsed if tail != suffix: return None return match.group("flake"), name def quote_nix_attr_segment(name: str) -> str: """Return a safely quoted Nix attr path segment.""" escaped = [] idx = 0 while idx < len(name): if name.startswith("${", idx): escaped.append(r"\${") idx += 2 continue char = name[idx] if char == '"': escaped.append('\\"') elif char == "\\": escaped.append("\\\\") elif char == "\n": escaped.append("\\n") elif char == "\r": escaped.append("\\r") elif char == "\t": escaped.append("\\t") else: escaped.append(char) idx += 1 return '"' + "".join(escaped) + '"' def _consume_nix_attr_segment(value: str) -> tuple[str, str] | None: if not value: return None if value.startswith('"'): end = _find_quoted_attr_end(value) if end is None: return None raw_segment = value[: end + 1] segment = _decode_nix_quoted_attr_segment(raw_segment) if segment is None: return None return segment, value[end + 1 :] segment, separator, tail = value.partition(".") if not segment or not _UNQUOTED_ATTR_SEGMENT_RE.match(segment): return None return segment, f"{separator}{tail}" if separator else "" def _decode_nix_quoted_attr_segment(value: str) -> str | None: end = len(value) - 1 if len(value) < 2 or value[0] != '"' or value[end] != '"': return None decoded = [] idx = 1 while idx < end: char = value[idx] if char == "$" and idx + 1 < end and value[idx + 1] == "{": return None if char != "\\": decoded.append(char) idx += 1 continue idx += 1 if idx >= end: return None escaped = value[idx] if escaped == "$" and idx + 1 < end and value[idx + 1] == "{": decoded.append("${") idx += 2 continue decoded.append(_NIX_STRING_ESCAPES.get(escaped, f"\\{escaped}")) idx += 1 return "".join(decoded) def _find_quoted_attr_end(value: str) -> int | None: escaped = False for idx, char in enumerate(value[1:], start=1): if escaped: escaped = False continue if char == "\\": escaped = True continue if char == '"': return idx return None def _looks_like_flakeref(flakeref: str) -> bool: """Return true if the input is likely intended as a flake reference.""" looks_like = False if flakeref: path = pathlib.Path(flakeref) if path.exists(): looks_like = path.is_dir() and (path / "flake.nix").exists() else: looks_like = ( flakeref.startswith("nixpkgs=") or "#" in flakeref or "?" in flakeref or re.match(r"^[A-Za-z][A-Za-z0-9+.-]*:", flakeref) is not None ) return looks_like ================================================ FILE: src/common/http.py ================================================ # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Shared HTTP session primitives.""" from collections.abc import Collection from typing import Any from requests import Session from requests.adapters import HTTPAdapter from requests_cache import CacheMixin from requests_ratelimiter import LimiterMixin from urllib3.util.retry import Retry DEFAULT_RETRY_STATUS_CODES = (429, 500, 502, 503, 504) class CachedLimiterSession(CacheMixin, LimiterMixin, Session): # pyright: ignore[reportIncompatibleMethodOverride] """ Session class with caching and rate-limiting. https://requests-cache.readthedocs.io/en/stable/user_guide/compatibility.html """ def mount_retries( session: Session, *, allowed_methods: Collection[str] = frozenset(("GET", "HEAD")), ) -> Session: """Attach a retrying adapter to a requests session.""" retry = Retry( total=3, connect=3, read=3, status=3, backoff_factor=1, status_forcelist=DEFAULT_RETRY_STATUS_CODES, allowed_methods=allowed_methods, raise_on_status=False, respect_retry_after_header=True, ) adapter = HTTPAdapter(max_retries=retry) session.mount("http://", adapter) session.mount("https://", adapter) return session def create_cached_limited_session( *, per_second: int | None = None, per_minute: int | None = None, expire_after: int | None = None, user_agent: str | None = None, allowed_methods: Collection[str] = frozenset(("GET", "HEAD")), ) -> Session: """Create a cached, rate-limited session with retry policy attached.""" kwargs: dict[str, Any] = {} if per_second is not None: kwargs["per_second"] = per_second if per_minute is not None: kwargs["per_minute"] = per_minute if expire_after is not None: kwargs["expire_after"] = expire_after session = CachedLimiterSession(**kwargs) mount_retries(session, allowed_methods=allowed_methods) if user_agent: session.headers.update({"User-Agent": user_agent}) return session ================================================ FILE: src/common/log.py ================================================ # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Shared logging configuration and logger access.""" import logging import os from typing import Any, cast from colorlog import ColoredFormatter, default_log_colors LOG_VERBOSE = 15 LOG_SPAM = logging.DEBUG - 1 LOG_TRACE = LOG_SPAM LOG_LEVELS = [logging.INFO, LOG_VERBOSE, logging.DEBUG, LOG_SPAM] class SbomnixLogger(logging.Logger): """Project logger with sbomnix-specific verbose levels.""" def verbose(self, msg: object, *args: object, **kwargs: Any) -> None: """Log at the project VERBOSE level.""" if self.isEnabledFor(LOG_VERBOSE): kwargs.setdefault("stacklevel", 2) self._log(LOG_VERBOSE, msg, args, **kwargs) def spam(self, msg: object, *args: object, **kwargs: Any) -> None: """Log at the project SPAM level.""" if self.isEnabledFor(LOG_SPAM): kwargs.setdefault("stacklevel", 2) self._log(LOG_SPAM, msg, args, **kwargs) def trace(self, msg: object, *args: object, **kwargs: Any) -> None: """Log at the project TRACE level alias.""" if self.isEnabledFor(LOG_TRACE): kwargs.setdefault("stacklevel", 2) self._log(LOG_TRACE, msg, args, **kwargs) __all__ = [ "LOG", "LOG_SPAM", "LOG_TRACE", "LOG_VERBOSE", "is_debug_enabled", "set_log_verbosity", ] logging.addLevelName(LOG_VERBOSE, "VERBOSE") logging.addLevelName(LOG_SPAM, "SPAM") logging.setLoggerClass(SbomnixLogger) LOG = cast(SbomnixLogger, logging.getLogger(os.path.abspath(__file__))) def set_log_verbosity(verbosity=0): """Set logging verbosity.""" verbosity = min(len(LOG_LEVELS) - 1, max(verbosity, 0)) _init_logging(verbosity) def _init_logging(verbosity=0): """Initialize logging.""" level = LOG_LEVELS[verbosity] if level <= logging.DEBUG: logformat = ( "%(log_color)s%(levelname)-8s%(reset)s " "%(filename)s:%(funcName)s():%(lineno)d " "%(message)s" ) else: logformat = "%(log_color)s%(levelname)-8s%(reset)s %(message)s" log_colors = { **default_log_colors, "INFO": "fg_bold_white", "VERBOSE": "fg_bold_cyan", "DEBUG": "fg_bold_white", "SPAM": "fg_bold_white", } if LOG.handlers: stream = LOG.handlers[0] else: stream = logging.StreamHandler() formatter = ColoredFormatter( logformat, log_colors=log_colors, stream=getattr(stream, "stream", None), ) stream.setFormatter(formatter) if not LOG.handlers: LOG.addHandler(stream) LOG.setLevel(level) def is_debug_enabled(): """Return True when project logging is enabled for DEBUG details.""" return LOG.isEnabledFor(logging.DEBUG) set_log_verbosity(0) ================================================ FILE: src/common/nix_utils.py ================================================ # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Helpers for normalizing nix store paths and derivation JSON.""" import json import os import re from common.errors import InvalidNixJsonError RE_NIX_STORE_PATH_BASENAME = re.compile(r"^[0-9a-z]{32}-.+") RE_NIX_STORE_PATH = re.compile(r"(?P/(?:[^/\s:]+/)+[0-9a-z]{32}-[^/\s:]+)") NIX_DERIVATION_SHOW_JSON = "nix derivation show" NIX_PATH_INFO_JSON = "nix path-info --json --json-format 1" def get_nix_store_dir(path=None, default: str | None = "/nix/store") -> str | None: """Infer the nix store directory from an absolute store path-like string.""" if path: match = RE_NIX_STORE_PATH.search(str(path)) if match: return os.path.dirname(match.group("store_path")) return default def normalize_nix_store_path(path, store_dir="/nix/store"): """Return an absolute store path for basename-only store path strings.""" if not isinstance(path, str) or not path: return path if os.path.isabs(path) or not RE_NIX_STORE_PATH_BASENAME.match(path): return path return os.path.join(store_dir, path) def _iter_nix_store_dir_candidates(value): """Yield strings that may reveal the nix store directory.""" if isinstance(value, str): yield value elif isinstance(value, dict): for item in value.values(): yield from _iter_nix_store_dir_candidates(item) elif isinstance(value, (list, tuple)): for item in value: yield from _iter_nix_store_dir_candidates(item) def _infer_nix_store_dir(drv_info, default="/nix/store"): """Infer the nix store directory from derivation fields when keys are relative.""" if not isinstance(drv_info, dict): return default for candidate in _iter_nix_store_dir_candidates( { "builder": drv_info.get("builder"), "outputs": drv_info.get("outputs"), "env": drv_info.get("env"), } ): store_dir = get_nix_store_dir(candidate, default=None) if store_dir: return store_dir return default def _normalize_nix_derivation_info(drv_info, store_dir): """Normalize basename-only store paths within derivation info.""" if not isinstance(drv_info, dict): return drv_info normalized = dict(drv_info) outputs = normalized.get("outputs") if isinstance(outputs, dict): normalized["outputs"] = {} for name, output in outputs.items(): normalized_output = output if isinstance(output, dict): normalized_output = dict(output) if normalized_output.get("path"): normalized_output["path"] = normalize_nix_store_path( normalized_output["path"], store_dir ) normalized["outputs"][name] = normalized_output env = normalized.get("env") if isinstance(env, dict): normalized["env"] = { key: normalize_nix_store_path(value, store_dir) for key, value in env.items() } inputs = normalized.get("inputs") if isinstance(inputs, dict): normalized_inputs = dict(inputs) srcs = normalized_inputs.get("srcs") if isinstance(srcs, list): normalized_inputs["srcs"] = [ normalize_nix_store_path(src, store_dir) for src in srcs ] drvs = normalized_inputs.get("drvs") if isinstance(drvs, dict): normalized_inputs["drvs"] = { normalize_nix_store_path(path, store_dir): outputs for path, outputs in drvs.items() } normalized["inputs"] = normalized_inputs return normalized def load_nix_json(stdout, command): """Load JSON produced by a Nix command and raise a user-facing error on drift.""" try: return json.loads(stdout) except json.JSONDecodeError as error: raise InvalidNixJsonError(command, f"invalid JSON: {error.msg}") from error def parse_nix_derivation_show(stdout, store_path_hint=None): """Normalize `nix derivation show` JSON across direct and wrapped formats.""" payload = load_nix_json(stdout, NIX_DERIVATION_SHOW_JSON) if not isinstance(payload, dict): raise InvalidNixJsonError( NIX_DERIVATION_SHOW_JSON, f"expected top-level object, got {type(payload).__name__}", ) derivations = payload.get("derivations", payload) if not isinstance(derivations, dict): raise InvalidNixJsonError( NIX_DERIVATION_SHOW_JSON, f"expected `derivations` object, got {type(derivations).__name__}", ) normalized = {} default_store_dir = get_nix_store_dir(store_path_hint) or "/nix/store" for drv_path, drv_info in derivations.items(): _validate_derivation_entry(drv_path, drv_info) store_dir = get_nix_store_dir(drv_path, default=None) if not store_dir: store_dir = _infer_nix_store_dir(drv_info, default=default_store_dir) normalized_drv_path = normalize_nix_store_path(drv_path, store_dir) normalized[normalized_drv_path] = _normalize_nix_derivation_info( drv_info, store_dir ) return normalized def _validate_derivation_entry(drv_path, drv_info): """Validate the `nix derivation show` fields consumed by this project.""" if not isinstance(drv_path, str) or not drv_path: raise InvalidNixJsonError( NIX_DERIVATION_SHOW_JSON, "expected derivation keys to be non-empty strings", ) if not isinstance(drv_info, dict): raise InvalidNixJsonError( NIX_DERIVATION_SHOW_JSON, f"expected derivation `{drv_path}` to be an object", ) _validate_optional_mapping(drv_info, "env", f"derivation `{drv_path}`") _validate_derivation_outputs(drv_path, drv_info) _check_optional_derivation_inputs(drv_path, drv_info) def _validate_optional_mapping(record, field, owner): value = record.get(field) if value is not None and not isinstance(value, dict): raise InvalidNixJsonError( NIX_DERIVATION_SHOW_JSON, f"expected `{field}` in {owner} to be an object", ) def _validate_derivation_outputs(drv_path, drv_info): outputs = drv_info.get("outputs") if outputs is None: return if not isinstance(outputs, dict): raise InvalidNixJsonError( NIX_DERIVATION_SHOW_JSON, f"expected `outputs` in derivation `{drv_path}` to be an object", ) for output_name, output in outputs.items(): if not isinstance(output_name, str) or not output_name: raise InvalidNixJsonError( NIX_DERIVATION_SHOW_JSON, f"expected output names in derivation `{drv_path}` to be strings", ) if not isinstance(output, dict): raise InvalidNixJsonError( NIX_DERIVATION_SHOW_JSON, f"expected output `{output_name}` in derivation `{drv_path}` " "to be an object", ) output_path = output.get("path") if output_path is not None and not isinstance(output_path, str): raise InvalidNixJsonError( NIX_DERIVATION_SHOW_JSON, f"expected output `{output_name}` path in derivation `{drv_path}` " "to be a string", ) def _check_optional_derivation_inputs(drv_path, drv_info): """Validate input shape without requiring callers to consume dependencies.""" inputs = drv_info.get("inputs") if inputs is not None: if not isinstance(inputs, dict): raise InvalidNixJsonError( NIX_DERIVATION_SHOW_JSON, f"expected `inputs` in derivation `{drv_path}` to be an object", ) _validate_optional_mapping(inputs, "drvs", f"`inputs` for `{drv_path}`") srcs = inputs.get("srcs") if srcs is not None: if not isinstance(srcs, list): raise InvalidNixJsonError( NIX_DERIVATION_SHOW_JSON, f"expected `inputs.srcs` in derivation `{drv_path}` to be a list", ) _validated_path_values( srcs, f"`inputs.srcs` in derivation `{drv_path}`", NIX_DERIVATION_SHOW_JSON, ) _reject_legacy_derivation_inputs(drv_path, drv_info) def _reject_legacy_derivation_inputs(drv_path, drv_info): for field in ("inputDrvs", "inputSrcs"): if field in drv_info: raise InvalidNixJsonError( NIX_DERIVATION_SHOW_JSON, f"unsupported legacy `{field}` in derivation `{drv_path}`", ) def nix_derivation_input_drv_paths(drv_path, drv_info): """Return validated input derivation paths from normalized derivation JSON.""" inputs = _require_derivation_inputs(drv_path, drv_info) if "drvs" not in inputs: raise InvalidNixJsonError( NIX_DERIVATION_SHOW_JSON, f"missing `inputs.drvs` in derivation `{drv_path}`", ) drvs = inputs["drvs"] if not isinstance(drvs, dict): raise InvalidNixJsonError( NIX_DERIVATION_SHOW_JSON, f"expected `inputs.drvs` in derivation `{drv_path}` to be an object", ) return _validated_path_keys( drvs, f"`inputs.drvs` in derivation `{drv_path}`", NIX_DERIVATION_SHOW_JSON, ) def nix_derivation_input_src_paths(drv_path, drv_info): """Return validated direct source inputs from normalized derivation JSON.""" inputs = _require_derivation_inputs(drv_path, drv_info) if "srcs" not in inputs: raise InvalidNixJsonError( NIX_DERIVATION_SHOW_JSON, f"missing `inputs.srcs` in derivation `{drv_path}`", ) srcs = inputs["srcs"] if not isinstance(srcs, list): raise InvalidNixJsonError( NIX_DERIVATION_SHOW_JSON, f"expected `inputs.srcs` in derivation `{drv_path}` to be a list", ) return _validated_path_values( srcs, f"`inputs.srcs` in derivation `{drv_path}`", NIX_DERIVATION_SHOW_JSON, ) def _require_derivation_inputs(drv_path, drv_info): """Return the validated modern derivation input object. Parsing derivation metadata only validates an optional ``inputs`` object because some callers use unrelated fields. Graph construction depends on the modern dependency schema, so this accessor requires ``inputs`` and the field-specific accessors require both ``inputs.drvs`` and ``inputs.srcs``. Real leaf derivations still expose those fields as empty containers. """ if not isinstance(drv_info, dict): raise InvalidNixJsonError( NIX_DERIVATION_SHOW_JSON, f"expected derivation `{drv_path}` to be an object", ) _reject_legacy_derivation_inputs(drv_path, drv_info) if "inputs" not in drv_info: raise InvalidNixJsonError( NIX_DERIVATION_SHOW_JSON, f"missing derivation inputs in `{drv_path}`", ) inputs = drv_info["inputs"] if not isinstance(inputs, dict): raise InvalidNixJsonError( NIX_DERIVATION_SHOW_JSON, f"expected `inputs` in derivation `{drv_path}` to be an object", ) return inputs def normalize_nix_path_info(path_info, *, command=NIX_PATH_INFO_JSON): """Normalize and validate Nix path-info JSON to a path-indexed dictionary.""" if isinstance(path_info, dict): normalized = {} for path, info in path_info.items(): if not isinstance(path, str) or not path: raise InvalidNixJsonError( command, "expected path-info object keys to be non-empty strings", ) if not isinstance(info, dict): raise InvalidNixJsonError( command, f"expected path-info record for `{path}` to be an object", ) normalized[path] = info return normalized if isinstance(path_info, list): normalized = {} for index, info in enumerate(path_info): if not isinstance(info, dict): raise InvalidNixJsonError( command, f"expected path-info list item {index} to be an object", ) path = info.get("path") or info.get("storePath") if not isinstance(path, str) or not path: raise InvalidNixJsonError( command, f"missing path string in path-info list item {index}", ) normalized[path] = info return normalized raise InvalidNixJsonError( command, f"expected top-level object or list, got {type(path_info).__name__}", ) def nix_path_info_references(info, path, *, command=NIX_PATH_INFO_JSON): """Return validated path-info references for a store path.""" if "references" not in info: raise InvalidNixJsonError( command, f"missing `references` in path-info for `{path}`", ) references = info["references"] if not isinstance(references, list): raise InvalidNixJsonError( command, f"expected `references` in path-info for `{path}` to be a list", ) for index, reference in enumerate(references): if not isinstance(reference, str) or not reference: raise InvalidNixJsonError( command, f"expected `references[{index}]` in path-info for `{path}` " "to be a non-empty string", ) return references def nix_path_info_deriver(info, path, *, command=NIX_PATH_INFO_JSON): """Return a validated path-info deriver value, or None when absent.""" deriver = info.get("deriver") if deriver is None or deriver == "": return None if not isinstance(deriver, str): raise InvalidNixJsonError( command, f"expected `deriver` in path-info for `{path}` to be a string or null", ) return deriver def nix_path_info_nar_hash(info, path, *, command=NIX_PATH_INFO_JSON): """Return a validated path-info NAR hash.""" nar_hash = info.get("narHash") if not isinstance(nar_hash, str) or not nar_hash: raise InvalidNixJsonError( command, f"missing `narHash` string in path-info for `{path}`", ) return nar_hash def _validated_path_keys(paths, owner, command): validated = [] for path in paths: if not isinstance(path, str) or not path: raise InvalidNixJsonError( command, f"expected keys in {owner} to be non-empty strings", ) validated.append(path) return validated def _validated_path_values(paths, owner, command): validated = [] for index, path in enumerate(paths): if not isinstance(path, str) or not path: raise InvalidNixJsonError( command, f"expected paths in {owner} to be non-empty strings " f"(invalid index {index})", ) validated.append(path) return validated ================================================ FILE: src/common/package_names.py ================================================ # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Package name normalization helpers.""" import re import pandas as pd def nix_to_repology_pkg_name(nix_pkg_name): """Convert nix package name to repology package name.""" if not nix_pkg_name or pd.isnull(nix_pkg_name): return nix_pkg_name nix_pkg_name = nix_pkg_name.lower() re_nix_to_repo = re.compile( r"^(?:" r"(python)|(perl)|(emacs)|(vim)plugin|(ocaml)|" r"(gnome)-shell-extension|(lisp)|(ruby)|(lua)|" r"(php)[0-9]*Packages|(go)|(coq)|(rust)" r")" r"[0-9.]*-(.+)" ) match = re.match(re_nix_to_repo, nix_pkg_name) if match: matches = list(filter(None, match.groups())) assert len(matches) == 2, f"Unexpected package name '{nix_pkg_name}'" nix_pkg_name = f"{matches[0]}:{matches[1]}" if nix_pkg_name == "python3": nix_pkg_name = "python" if nix_pkg_name == "libtiff": nix_pkg_name = "tiff" return nix_pkg_name ================================================ FILE: src/common/pkgmeta.py ================================================ # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Package metadata helpers.""" import importlib.metadata import subprocess from pathlib import Path _REPO_ROOT = Path(__file__).resolve().parents[2] def get_py_pkg_version(package="sbomnix"): """Return package version, including git state when run from source.""" try: return importlib.metadata.version(package) except importlib.metadata.PackageNotFoundError: return _dev_version() def _dev_version(): """Derive version from git when the package is not installed. Produces the same format as the Nix package version so that devshell and nix-built invocations report identical strings for the same checkout: +g clean tree with commits beyond the release tag +g.dirty tree has tracked modifications (untracked files ignored) pip normalises '-' to '.' in local version identifiers, so '.dirty' is used here to match what importlib.metadata returns from the installed package. """ try: base = (_REPO_ROOT / "VERSION").read_text().strip() short_hash = subprocess.run( ["git", "rev-parse", "--short", "HEAD"], capture_output=True, text=True, check=True, cwd=_REPO_ROOT, ).stdout.strip() dirty = subprocess.run( ["git", "status", "--porcelain", "--untracked-files=no"], capture_output=True, text=True, check=True, cwd=_REPO_ROOT, ).stdout.strip() return f"{base}+g{short_hash}{'.dirty' if dirty else ''}" except Exception: try: return (_REPO_ROOT / "VERSION").read_text().strip() + ".dev" except Exception: return "0.0.0" ================================================ FILE: src/common/proc.py ================================================ # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Shared subprocess and nix command helpers.""" import logging import os import shlex import subprocess from collections.abc import Callable, Sequence from shutil import which from typing import IO, Literal, overload from common.errors import CommandNotFoundError, InvalidNixArtifactError from common.log import LOG, LOG_VERBOSE CommandPart = str | os.PathLike[str] ExecCmdResult = subprocess.CompletedProcess[str] | subprocess.CalledProcessError | None ExecCmdFn = Callable[..., ExecCmdResult] @overload def exec_cmd( cmd: Sequence[CommandPart], raise_on_error: Literal[True] = True, return_error: bool = False, log_error: bool = True, stdout: IO[str] | None = None, ) -> subprocess.CompletedProcess[str]: ... @overload def exec_cmd( cmd: Sequence[CommandPart], raise_on_error: Literal[False], return_error: Literal[True], log_error: bool = True, stdout: IO[str] | None = None, ) -> subprocess.CompletedProcess[str] | subprocess.CalledProcessError | None: ... @overload def exec_cmd( cmd: Sequence[CommandPart], raise_on_error: Literal[False], return_error: Literal[False] = False, log_error: bool = True, stdout: IO[str] | None = None, ) -> subprocess.CompletedProcess[str] | None: ... def exec_cmd( cmd: Sequence[CommandPart], raise_on_error: bool = True, return_error: bool = False, log_error: bool = True, stdout: IO[str] | None = None, ) -> ExecCmdResult: """Run shell command `cmd`.""" if isinstance(cmd, (str, bytes, os.PathLike)): raise TypeError("cmd must be an argv sequence, not a string-like value") argv = [os.fspath(part) for part in cmd] command_str = shlex.join(argv) LOG.debug("Running: %s", command_str) try: if stdout: ret = subprocess.run(argv, encoding="utf-8", check=True, stdout=stdout) else: ret = subprocess.run( argv, capture_output=True, encoding="utf-8", check=True, ) return ret except subprocess.CalledProcessError as error: if log_error: LOG.error( "Error running shell command:\n cmd: '%s'\n stdout: %s\n stderr: %s", command_str, error.stdout, error.stderr, ) if raise_on_error: raise error if return_error: return error return None def exit_unless_command_exists( name: str, *, which_fn: Callable[[str], str | None] | None = None, ) -> None: """Raise if `name` is not an executable in PATH.""" which_fn = which if which_fn is None else which_fn name_is_in_path = which_fn(name) is not None if not name_is_in_path: raise CommandNotFoundError(name) def exit_unless_nix_artifact( path: str, force_realise: bool = False, *, exec_cmd_fn: ExecCmdFn | None = None, log: logging.Logger | None = None, ) -> None: """ Raise if `path` is not a nix artifact. If `force_realise` is True, build the installable before querying path information. """ exec_cmd_fn = exec_cmd if exec_cmd_fn is None else exec_cmd_fn log = LOG if log is None else log log.debug("force_realize: %s", force_realise) try: if force_realise: log.log(LOG_VERBOSE, "Try force-realising store-path '%s'", path) exec_cmd_fn(nix_cmd("build", "--no-link", path)) exec_cmd_fn(nix_cmd("path-info", path)) return except subprocess.CalledProcessError: raise InvalidNixArtifactError(path) from None def nix_cmd(*args: str, impure: bool = False) -> list[str]: """Build argv for nix commands that require flakes + nix-command support.""" cmd = [ "nix", *args, "--extra-experimental-features", "flakes", "--extra-experimental-features", "nix-command", ] if impure: cmd.append("--impure") return cmd ================================================ FILE: src/common/regex.py ================================================ # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Small regex helpers kept for compatibility with older call sites.""" import re def regex_match(regex, string): """Return true if ``regex`` matches ``string``.""" if not regex or not string: return False return re.match(regex, string) is not None ================================================ FILE: src/common/spdx.py ================================================ # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Helpers for validating SPDX license identifiers.""" from functools import lru_cache from license_expression import ExpressionError, get_spdx_licensing @lru_cache(maxsize=1) def _spdx_licensing(): return get_spdx_licensing() def canonicalize_spdx_license_id(identifier): """Return a canonical SPDX identifier for a single license key.""" if not identifier: return None try: parsed = _spdx_licensing().parse(str(identifier), validate=True) except ExpressionError: return None return getattr(parsed, "key", None) ================================================ FILE: src/common/versioning.py ================================================ # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Shared version parsing and comparison helpers.""" import re import packaging.version from common.log import LOG, LOG_SPAM def number_distance(n1: object, n2: object) -> float: """ Return float value between [0.0,1.0] indicating the distance between two non-negative numbers. Returns 1.0 if the two numbers are equal. Returns 0.0 if either argument is not a non-negative number. """ if ( not isinstance(n1, (float, int)) or not isinstance(n2, (float, int)) or n1 < 0 or n2 < 0 ): return 0.0 min_n = min(n1, n2) max_n = max(n1, n2) if max_n == 0: return 1.0 if min_n == 0: min_n += 1 max_n += 1 return min_n / max_n def version_distance(v1: object, v2: object) -> float: """ Return float value between [0.0,1.0] indicating the closeness of the given two version number strings. """ v1 = str(v1) v2 = str(v2) v1_clean = re.sub(r"[^0-9.]+", "", v1) v2_clean = re.sub(r"[^0-9.]+", "", v2) re_vsplit = re.compile(r".*?(?P[0-9][0-9]*)(?P.*)$") match = re.match(re_vsplit, v1_clean) if not match: LOG.debug("Unexpected v1 version '%s'", v1) return 0.0 v1_major = match.group("ver_beg") v1_minor = match.group("ver_end").replace(".", "") v1_float = float(v1_major + "." + v1_minor) match = re.match(re_vsplit, v2_clean) if not match: LOG.debug("Unexpected v2 version '%s'", v2) return 0.0 v2_major = match.group("ver_beg") v2_minor = match.group("ver_end").replace(".", "") v2_float = float(v2_major + "." + v2_minor) return number_distance(v1_float, v2_float) def parse_version(ver_str: object) -> packaging.version.Version | None: """ Return comparable version object from the given version string. Returns None if the version string can not be converted to version object. """ ver_str = str(ver_str) if not ver_str: return None re_ver = re.compile(r".*?(?P[0-9][0-9.]*)(?P.*)$") match = re_ver.match(ver_str) if not match: LOG.debug("Unable to parse version '%s'", ver_str) return None ver_beg = match.group("ver_beg").rstrip(".") ver_end = match.group("ver_end") ver_end = re.sub(r"[^0-9.]+", "", ver_end).lstrip(".") if ver_end: ver_end = f"+{ver_end}" else: ver_end = "" ver_end = ver_end.rstrip(".") ver = f"{ver_beg}{ver_end}" ver = re.sub(r"\.+", ".", ver) LOG.log(LOG_SPAM, "%s --> %s", ver_str, ver) if not ver: LOG.debug("Invalid version '%s'", ver_str) return None try: return packaging.version.parse(ver) except packaging.version.InvalidVersion: LOG.debug("Invalid version '%s'", ver_str) return None ================================================ FILE: src/nixgraph/__init__.py ================================================ # SPDX-FileCopyrightText: 2022 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 ================================================ FILE: src/nixgraph/graph.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2022-2023 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Python script to query and visualize nix package dependencies.""" from dataclasses import dataclass import pandas as pd from common.df import df_to_csv_file from common.log import LOG, is_debug_enabled from nixgraph.render import NixDependencyGraph from sbomnix.closure import derivation_dependencies_df from sbomnix.derivation import load_recursive from sbomnix.derivers import require_deriver from sbomnix.runtime import load_runtime_closure @dataclass(frozen=True) class LoadedDependencies: """Dependency dataframe and graph traversal start path.""" start_path: str df: pd.DataFrame dtype: str def load_dependencies(nix_path, buildtime=False): """Load nixgraph dependency rows from structured Nix data.""" LOG.debug("nix_path: %s", nix_path) dtype = "buildtime" if buildtime else "runtime" LOG.info("Loading %s dependencies referenced by '%s'", dtype, nix_path) if buildtime: drv_path = require_deriver(nix_path) _derivations, drv_infos = load_recursive(drv_path) loaded = LoadedDependencies( start_path=drv_path, df=derivation_dependencies_df(drv_infos), dtype=dtype, ) else: runtime_closure = load_runtime_closure(nix_path) loaded = LoadedDependencies( start_path=nix_path, df=runtime_closure.df_deps, dtype=dtype, ) if loaded.df.empty: LOG.info("No %s dependencies", dtype) return loaded def draw_dependencies(loaded, args): """Draw loaded dependencies as a directed graph.""" if is_debug_enabled(): df_to_csv_file(loaded.df, f"nixgraph_deps_{loaded.dtype}.csv") digraph = NixDependencyGraph(loaded.df) return digraph.draw(loaded.start_path, args) ================================================ FILE: src/nixgraph/main.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2022-2023 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Python script to query and visualize nix package dependencies""" import argparse from common.cli_args import add_verbose_argument, add_version_argument, check_positive from common.errors import SbomnixError from common.log import LOG, set_log_verbosity from nixgraph.graph import draw_dependencies, load_dependencies from sbomnix.cli_utils import resolve_nix_target ############################################################################### def getargs(args=None): """Parse command line arguments""" desc = "Visualize nix artifact dependencies" epil = "Example: nixgraph /path/to/derivation.drv " parser = argparse.ArgumentParser(description=desc, epilog=epil) helps = ( "Target nix store path (e.g. derivation file or nix output path) or flakeref" ) parser.add_argument("NIXREF", help=helps, type=str) add_version_argument(parser) helps = "Scan buildtime dependencies instead of runtime dependencies" parser.add_argument("--buildtime", help=helps, action="store_true") helps = "Set the graph maxdepth (default: --depth=1)" parser.add_argument("--depth", help=helps, type=check_positive, default=1) helps = ( "Draw inverse graph starting from node (path) names that match the " "specified regular expression" ) parser.add_argument("--inverse", help=helps) helps = ( "Set the output file name, default is 'graph.png'. " "The output filename extension determines the output format. " "Common supported formats include: png, jpg, pdf, and dot. " "For a full list of supported output formats, see: " "https://graphviz.org/doc/info/output.html. In addition to graphviz " "supported output formats, the tool supports output in csv to " "allow post-processing the output data. Specify output file with " ".csv extension to output the query result in textual csv format." ) parser.add_argument("-o", "--out", nargs="?", help=helps, default="graph.png") helps = "Colorize nodes that match the specified regular expression" parser.add_argument("--colorize", help=helps) helps = ( "Keep drawing the dependencies until package name matches " "the specified regular expression. This option works together with " "--depth so that drawing stops when the first of the two " "conditions match: when the package name matches the given regex " "or when the specified graph depth is reached." ) parser.add_argument("--until", help=helps) helps = "Show nix store path in node label, together with package name" parser.add_argument("--pathnames", help=helps, action="store_true") add_verbose_argument(parser) return parser.parse_args(args) ################################################################################ def main(): """main entry point""" args = getargs() set_log_verbosity(args.verbose) try: _run(args) except SbomnixError as error: LOG.fatal("%s", error) raise SystemExit(1) from error def _run(args): target = resolve_nix_target(args.NIXREF, buildtime=args.buildtime) deps = load_dependencies(target.path, args.buildtime) draw_dependencies(deps, args) if __name__ == "__main__": main() ################################################################################ ================================================ FILE: src/nixgraph/render.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Helpers for traversing and rendering nix dependency graphs.""" import html import os from typing import Any import graphviz as gv import pandas as pd from common import columns as cols from common.df import df_regex_filter, df_to_csv_file from common.log import LOG, LOG_SPAM from common.regex import regex_match from sbomnix.closure import walk_dependency_rows DBG_INDENT = " " GRAPHVIZ_RENDER_WARN_EDGES = 2000 class NixDependencyGraph: """Draw nix package dependencies as graph.""" def __init__(self, df_dependencies): self.df = df_dependencies self.digraph = None self.df_out_csv = None self.maxdepth = 1 self.inverse_regex = None self.until_regex = None self.colorize_regex = None self.pathnames = False def draw(self, start_path, args): """Draw dependency graph.""" self._init_df_out(args) self.maxdepth = args.depth if hasattr(args, "depth") else 1 self.inverse_regex = args.inverse if hasattr(args, "inverse") else None self.until_regex = args.until if hasattr(args, "until") else None self.colorize_regex = args.colorize if hasattr(args, "colorize") else None self.pathnames = args.pathnames if hasattr(args, "pathnames") else False self.digraph = gv.Digraph() self.nodes_drawn = set() self.digraph.attr("graph", rankdir="LR") self.digraph.attr("node", shape="box") self.digraph.attr("node", style="rounded") self.digraph.attr("node", margin="0.3,0.1") self.digraph.attr("graph", concentrate="false") initlen = len(self.digraph.body) walked_rows = self._walk_rows(start_path) if self.df_out_csv is not None: self.df_out_csv = self._walked_rows_to_dataframe(walked_rows) else: for walked in walked_rows: self._draw_row(walked.row, walked.depth) if len(self.digraph.body) > initlen: self._warn_if_large_graphviz_render(args.out, len(walked_rows)) self._render(args.out) elif self.df_out_csv is not None and not self.df_out_csv.empty: if hasattr(args, "return_df") and args.return_df: LOG.debug("Returning graph as dataframe") return self.df_out_csv df_to_csv_file(self.df_out_csv, args.out) else: LOG.warning("Nothing to draw") return None def _walk_rows(self, start_path): if self.inverse_regex: df = df_regex_filter(self.df, cols.SRC_PATH, self.inverse_regex) start_paths = df[cols.SRC_PATH].tolist() if not df.empty else [] for inverse_path in dict.fromkeys(start_paths): LOG.debug("Start path inverse: %s", inverse_path) return walk_dependency_rows( self.df, start_paths, self.maxdepth, inverse=True, stop_at=self._matches_until, ) LOG.debug("Start path: %s", start_path) return walk_dependency_rows( self.df, start_path, self.maxdepth, stop_at=self._matches_until, ) def _walked_rows_to_dataframe(self, walked_rows): rows = [{"graph_depth": walked.depth, **walked.row} for walked in walked_rows] if rows: return pd.DataFrame.from_records(rows) return pd.DataFrame() def _draw_row(self, row, depth): self._dbg_print_row(row, depth) if self._matches_until(row): LOG.debug("%sReached until_function", (DBG_INDENT * (depth - 1))) return self._add_node(row[cols.SRC_PATH], row["src_pname"]) self._add_node(row[cols.TARGET_PATH], row["target_pname"]) self._add_edge(row) def _init_df_out(self, args): if hasattr(args, "out"): _fname, extension = os.path.splitext(args.out) fileformat = extension[1:] if fileformat == "csv": self.df_out_csv = pd.DataFrame() elif hasattr(args, "return_df") and args.return_df: self.df_out_csv = pd.DataFrame() else: self.df_out_csv = None def _render(self, filename): if self.df_out_csv is not None: return if self.digraph is None: return fname, extension = os.path.splitext(filename) gformat = extension[1:] if gformat == "dot": self.digraph.save(filename) LOG.info("Wrote: %s", filename) return self.digraph.render(filename=fname, format=gformat, cleanup=True) LOG.info("Wrote: %s", filename) def _warn_if_large_graphviz_render(self, filename, edge_count): if edge_count < GRAPHVIZ_RENDER_WARN_EDGES: return _fname, extension = os.path.splitext(filename) if extension[1:] in ("csv", "dot"): return LOG.warning( "Rendering %s dependency edges with Graphviz may be slow; " "use --out graph.csv or --out graph.dot for faster output.", edge_count, ) def _matches_until(self, row): return regex_match(self.until_regex, row["target_pname"]) def _add_edge(self, row): if self.df_out_csv is not None: return if self.digraph is None: return self.digraph.edge(row[cols.TARGET_PATH], row[cols.SRC_PATH], style=None) def _add_node(self, path, pname): if self.df_out_csv is not None: return if self.digraph is None: return if path in self.nodes_drawn: return self.nodes_drawn.add(path) node_id = path node_name = html.escape(str(pname)) if self.pathnames: beg = '' end = "" label = f"<{node_name}
{beg}{str(path)}{end}>" else: label = node_name fillcolor = "#EEEEEE" if regex_match(self.colorize_regex, pname): fillcolor = "#FFE6E6" self.digraph.node(node_id, label, style="rounded,filled", fillcolor=fillcolor) def _dbg_print_row(self, row: dict[str, Any], depth): LOG.log( LOG_SPAM, "%sFound: %s ==> %s", (DBG_INDENT * (depth - 1)), row[cols.TARGET_PATH], row[cols.SRC_PATH], ) ================================================ FILE: src/nixmeta/__init__.py ================================================ # SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 ================================================ FILE: src/nixmeta/flake_metadata.py ================================================ # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Helpers for resolving nixpkgs metadata from flakerefs.""" import json import pathlib import re from common.log import LOG, LOG_SPAM from common.proc import exec_cmd, nix_cmd def get_flake_metadata(flakeref, *, exec_cmd_fn=exec_cmd, nix_cmd_fn=nix_cmd, log=LOG): """Return ``nix flake metadata`` JSON for the given flakeref.""" if flakeref.startswith("nixpkgs="): flakeref = flakeref.removeprefix("nixpkgs=") log.info("Reading flake metadata for '%s'", flakeref) cmd = nix_cmd_fn("flake", "metadata", flakeref, "--json") ret = exec_cmd_fn(cmd, raise_on_error=False, return_error=True, log_error=False) if ret is None or ret.returncode != 0: log.warning("Failed reading flake metadata: %s", flakeref) return None meta_json = json.loads(ret.stdout) log.log(LOG_SPAM, meta_json) return meta_json def is_nixpkgs_metadata(meta_json): """Return true if the given metadata describes nixpkgs.""" try: if ( "path" in meta_json and "description" in meta_json and meta_json["description"] == "A collection of packages for the Nix package manager" ): return True if ( "path" in meta_json and meta_json["locked"]["owner"] == "NixOS" and meta_json["locked"]["repo"] == "nixpkgs" ): return True except (KeyError, TypeError): return False return False def _locked_obj_is_nixpkgs(node_name, locked_obj): try: if locked_obj.get("repo") == "nixpkgs": return True if node_name.startswith("nixpkgs") and locked_obj.get("type") == "path": return True except AttributeError: return False return False def _input_node_names(value): if isinstance(value, str): return [value] if isinstance(value, list) and value and isinstance(value[-1], str): # Lock-file override chains store the resolved input node as the last item. return [value[-1]] return [] def _get_flake_nixpkgs_obj(meta_json): try: nodes = meta_json["locks"]["nodes"] root_name = meta_json["locks"]["root"] root_inputs = nodes[root_name].get("inputs", {}) except (KeyError, TypeError, AttributeError): return None for node_name in _input_node_names(root_inputs.get("nixpkgs")): try: return nodes[node_name]["locked"] except (KeyError, TypeError): continue candidates = [] for node_name, node in nodes.items(): try: locked_obj = node["locked"] except (KeyError, TypeError): continue if _locked_obj_is_nixpkgs(node_name, locked_obj): candidates.append(locked_obj) if len(candidates) == 1: return candidates[0] return None def _get_flake_nixpkgs_val(meta_json, key): nixpkgs_obj = _get_flake_nixpkgs_obj(meta_json) if nixpkgs_obj is None: return None try: return nixpkgs_obj[key] except (KeyError, TypeError): return None def _get_nixpkgs_flakeref_github(meta_json, *, log=LOG): owner = _get_flake_nixpkgs_val(meta_json, "owner") repo = _get_flake_nixpkgs_val(meta_json, "repo") rev = _get_flake_nixpkgs_val(meta_json, "rev") if None in [owner, repo, rev]: log.debug( "owner, repo, or rev not found: %s", _get_flake_nixpkgs_obj(meta_json), ) return None return f"github:{owner}/{repo}?rev={rev}" def _get_nixpkgs_flakeref_git(meta_json, *, log=LOG): url = _get_flake_nixpkgs_val(meta_json, "url") rev = _get_flake_nixpkgs_val(meta_json, "rev") ref = _get_flake_nixpkgs_val(meta_json, "ref") if None in [url, rev, ref]: log.debug("url, rev, or ref not found: %s", _get_flake_nixpkgs_obj(meta_json)) return None return f"git+{url}?ref={ref}&rev={rev}" def _get_nixpkgs_flakeref_path(meta_json, *, log=LOG): path = _get_flake_nixpkgs_val(meta_json, "path") if path is None: log.debug("path not found: %s", _get_flake_nixpkgs_obj(meta_json)) return None return f"path:{path}" def _get_nixpkgs_flakeref_tarball(meta_json, *, log=LOG): url = _get_flake_nixpkgs_val(meta_json, "url") if url is None: log.debug("url not found: %s", _get_flake_nixpkgs_obj(meta_json)) return None return f"{url}" def get_nixpkgs_flakeref(meta_json, *, log=LOG): """Given flake metadata, return the locked nixpkgs flakeref.""" locked_type = _get_flake_nixpkgs_val(meta_json, "type") if locked_type == "github": return _get_nixpkgs_flakeref_github(meta_json, log=log) if locked_type == "git": return _get_nixpkgs_flakeref_git(meta_json, log=log) if locked_type == "path": return _get_nixpkgs_flakeref_path(meta_json, log=log) if locked_type == "tarball": return _get_nixpkgs_flakeref_tarball(meta_json, log=log) log.debug("Unsupported nixpkgs locked type: %s", locked_type) return None def nixref_to_nixpkgs_path( flakeref, *, get_flake_metadata_fn=get_flake_metadata, log=LOG, log_spam=LOG_SPAM, ): """Return the nix store path of the nixpkgs pinned by ``flakeref``.""" if not flakeref: return None log.info("Resolving nixpkgs path for '%s'", flakeref) log.debug("Finding meta-info for nixpkgs pinned in nixref: %s", flakeref) match = re.match(r"([^#]+)#", flakeref) if match: flakeref = match.group(1) log.debug("Stripped target specifier: %s", flakeref) meta_json = get_flake_metadata_fn(flakeref) if not is_nixpkgs_metadata(meta_json): log.debug("non-nixpkgs flakeref: %s", flakeref) nixpkgs_flakeref = get_nixpkgs_flakeref(meta_json, log=log) if not nixpkgs_flakeref: log.warning("Failed parsing locked nixpkgs: %s", flakeref) return None log.log(log_spam, "using nixpkgs_flakeref: %s", nixpkgs_flakeref) meta_json = get_flake_metadata_fn(nixpkgs_flakeref) if not is_nixpkgs_metadata(meta_json): log.warning("Failed reading nixpkgs metadata: %s", flakeref) return None return pathlib.Path(meta_json["path"]).absolute() ================================================ FILE: src/nixmeta/main.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII) # SPDX-License-Identifier: Apache-2.0 """Python script for summarizing nixpkgs meta-attributes""" import argparse import pathlib from common.cli_args import add_verbose_argument, add_version_argument from common.errors import SbomnixError from common.log import LOG, set_log_verbosity from common.proc import exit_unless_command_exists from nixmeta.scanner import NixMetaScanner ################################################################################ def _getargs(args=None): """Parse command line arguments""" desc = ( "Summarize nixpkgs meta-attributes from the given nixpkgs version " "to a csv output file." ) epil = "Example: nixmeta --flakeref=github:NixOS/nixpkgs?ref=master" parser = argparse.ArgumentParser(description=desc, epilog=epil) helps = ( "Flake reference specifying the location of the flake " "from which the pinned nixpkgs target version is read. " "The default value is the " "current nixpkgs version in its 'nixos-unstable' branch. " "For more details, see: " "https://nixos.org/manual/nix/stable/command-ref/new-cli/nix3-flake" "#flake-references and " "https://nixos.wiki/wiki/Nix_channels " "(default: --flakeref=github:NixOS/nixpkgs?ref=nixos-unstable)." ) parser.add_argument( "-f", "--flakeref", help=helps, type=str, default="github:NixOS/nixpkgs?ref=nixos-unstable", ) helps = "Path to output file (default: --out=nixmeta.csv)." parser.add_argument( "-o", "--out", help=helps, type=pathlib.Path, default="nixmeta.csv", ) helps = ( "Append to output file - removing duplicate entries - instead of " "completely overwriting possible earlier output file." ) parser.add_argument( "-a", "--append", help=helps, action="store_true", ) add_version_argument(parser) add_verbose_argument(parser) return parser.parse_args(args) ############################################################################### def main(): """main entry point""" args = _getargs() set_log_verbosity(args.verbose) try: _run(args) except SbomnixError as error: LOG.fatal("%s", error) raise SystemExit(1) from error def _run(args): # Fail early if the following commands are not in PATH exit_unless_command_exists("nix") exit_unless_command_exists("nix-env") # Scan metadata from the flakeref pinned nixpkgs LOG.info("Scanning nixpkgs metadata for '%s'", args.flakeref) scanner = NixMetaScanner() scanner.scan(args.flakeref) # Output to csv file scanner.to_csv(args.out, args.append) ################################################################################ if __name__ == "__main__": main() ################################################################################ ================================================ FILE: src/nixmeta/metadata_json.py ================================================ # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Helpers for flattening nix-env metadata JSON.""" import json import pandas as pd from common import columns as cols from common.log import LOG def parse_meta_entry(meta, key): """Flatten nested metadata values for a single key into a string.""" items = [] if isinstance(meta, dict): items.extend([parse_meta_entry(meta.get(key, ""), key)]) elif isinstance(meta, list): items.extend([parse_meta_entry(item, key) for item in meta]) else: return str(meta) return ";".join(list(filter(None, items))) def parse_json_metadata(json_filename, *, log=LOG): """Parse package metadata from a ``nix-env --json`` output file.""" with open(json_filename, "r", encoding="utf-8") as inf: log.debug('Loading meta-info from "%s"', json_filename) json_dict = json.loads(inf.read()) dict_selected = {} setcol = dict_selected.setdefault for pkg in json_dict.values(): setcol(cols.NAME, []).append(pkg.get("name", "")) setcol("pname", []).append(pkg.get("pname", "")) setcol(cols.VERSION, []).append(pkg.get("version", "")) meta = pkg.get("meta", {}) setcol("meta_homepage", []).append(parse_meta_entry(meta, key="homepage")) setcol("meta_unfree", []).append(meta.get("unfree", "")) setcol("meta_description", []).append(meta.get("description", "")) setcol("meta_position", []).append(meta.get("position", "")) meta_license = meta.get("license", {}) setcol("meta_license_short", []).append( parse_meta_entry(meta_license, key="shortName") ) setcol("meta_license_spdxid", []).append( parse_meta_entry(meta_license, key="spdxId") ) meta_maintainers = meta.get("maintainers", {}) setcol("meta_maintainers_email", []).append( parse_meta_entry(meta_maintainers, key="email") ) return pd.DataFrame(dict_selected).astype(str) ================================================ FILE: src/nixmeta/scanner.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII) # SPDX-License-Identifier: Apache-2.0 """Summarize nixpkgs meta-attributes""" import pathlib import subprocess from tempfile import NamedTemporaryFile import pandas as pd from common.df import df_from_csv_file, df_to_csv_file from common.log import LOG, LOG_SPAM from common.proc import exec_cmd, nix_cmd from nixmeta.flake_metadata import get_flake_metadata, nixref_to_nixpkgs_path from nixmeta.metadata_json import parse_json_metadata ############################################################################### def _run_nix_env_metadata(cmd, stdout): """Run nix-env metadata scan while keeping successful eval warnings quiet.""" ret = subprocess.run( cmd, encoding="utf-8", check=True, stdout=stdout, stderr=subprocess.PIPE, ) if ret.stderr: LOG.debug("nix-env metadata stderr:\n%s", ret.stderr.strip()) class NixMetaScanner: """Scan nixpkgs meta-info""" def __init__(self): self.df_meta = None def scan(self, nixref): """ Scan nixpkgs meta-info using nixpkgs version pinned in nixref; nixref can be a nix store path, flakeref or dynamical attribute set. """ nixpkgs_path = nixref_to_nixpkgs_path( nixref, get_flake_metadata_fn=lambda flakeref: get_flake_metadata( flakeref, exec_cmd_fn=exec_cmd, nix_cmd_fn=nix_cmd, log=LOG, ), log=LOG, log_spam=LOG_SPAM, ) if not nixpkgs_path: # try format which is understood by nix-env: # https://ianthehenry.com/posts/how-to-learn-nix/chipping-away-at-flakes/ # ownpkgs-nix-env.nix: # { ... }: # (builtins.getFlake "/tmp/ownpkgs-special-unstable"). # outputs.packages.${builtins.currentSystem} # and execute # NIX_PATH="nixpkgs=/tmp/ownpkgs-special-unstable/ownpkgs-nix-env.nix" # sbomnix /nix/store/outputpath-for-ownpkgs-special-unstable-flake-output nixpkgs_path = pathlib.Path(nixref) self.scan_path(nixpkgs_path) def scan_path(self, nixpkgs_path): """Scan nixpkgs meta-info using an already resolved nixpkgs path.""" nixpkgs_path = pathlib.Path(nixpkgs_path) if not nixpkgs_path.exists(): LOG.warning("Nixpkgs not in nix store: %s", nixpkgs_path.as_posix()) return LOG.debug("nixpkgs: %s", nixpkgs_path) self._read_nixpkgs_meta(nixpkgs_path) def scan_expression(self, expression, *, impure=False): """Scan nixpkgs meta-info using an expression returning a package set.""" prefix = "nixmeta_expr_" suffix = ".nix" with NamedTemporaryFile( mode="w", delete=True, encoding="utf-8", prefix=prefix, suffix=suffix, ) as f: f.write(expression) f.flush() self._read_nixpkgs_meta( pathlib.Path(f.name), enable_flakes=True, impure=impure, ) def to_csv(self, csv_path, append=False): """Export meta-info to a csv file""" csv_path = pathlib.Path(csv_path) if append and csv_path.exists(): df = df_from_csv_file(csv_path) self.df_meta = pd.concat([self.df_meta, df], ignore_index=True) self._drop_duplicates() if self.df_meta is None or self.df_meta.empty: LOG.info("Nothing to output") return csv_path.parent.mkdir(parents=True, exist_ok=True) df_to_csv_file(self.df_meta, csv_path.absolute().as_posix()) def to_df(self): """Return meta-info as dataframe""" return self.df_meta def _read_nixpkgs_meta( self, nixpkgs_path, *, enable_flakes=False, impure=False, ): prefix = "nixmeta_" suffix = ".json" with NamedTemporaryFile(delete=True, prefix=prefix, suffix=suffix) as f: LOG.info("Reading nixpkgs metadata from '%s'", nixpkgs_path.as_posix()) cmd = [ "nix-env", "-qa", "--meta", "--json", "-f", f"{nixpkgs_path.as_posix()}", ] if enable_flakes: cmd.extend(["--option", "experimental-features", "nix-command flakes"]) if impure: cmd.append("--impure") cmd.extend(["--arg", "config", "{allowAliases=false;}"]) _run_nix_env_metadata(cmd, stdout=f) LOG.debug("Generated meta.json: %s", f.name) LOG.info("Parsing nixpkgs metadata") self.df_meta = parse_json_metadata(f.name, log=LOG) self._drop_duplicates() def _drop_duplicates(self): if self.df_meta is None or self.df_meta.empty: return self.df_meta = self.df_meta.astype(str) self.df_meta.fillna("", inplace=True) uids = [ "name", "version", "meta_license_short", "meta_license_spdxid", "meta_homepage", ] self.df_meta.sort_values(by=uids, inplace=True) self.df_meta.drop_duplicates(subset=uids, keep="last", inplace=True) ############################################################################### ================================================ FILE: src/nixupdate/__init__.py ================================================ # SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 ================================================ FILE: src/nixupdate/nix_outdated.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Command-line tool to list outdated nix dependencies in priority order""" import os from argparse import ArgumentParser from tempfile import NamedTemporaryFile from common.cli_args import add_verbose_argument, add_version_argument from common.errors import SbomnixError from common.log import LOG, set_log_verbosity from common.proc import exec_cmd from nixupdate.nix_visualize import ( nix_visualize_csv_to_df as _nix_visualize_csv_to_df_impl, ) from nixupdate.nix_visualize import run_nix_visualize as _run_nix_visualize_impl from nixupdate.pipeline import OutdatedScanHooks, collect_outdated_scan_data from nixupdate.pipeline import query_repology as _query_repology_impl from nixupdate.report import console_out_table as _console_out_table_impl from nixupdate.report import drop_newest_duplicates as _drop_newest_dups_impl from nixupdate.report import generate_report_df as _generate_report_df_impl from nixupdate.report import write_report as _write_report_impl from sbomnix.cli_utils import generate_temp_sbom, resolve_nix_target ############################################################################### def getargs(args=None): """Parse command line arguments""" desc = ( "Command line tool to list outdated nix dependencies for NIXREF. " "By default, the script outputs runtime dependencies of " "NIXREF that appear outdated in nixpkgs 'nix_unstable' channel - the " "list of output packages would potentially need a PR to update the " "package in nixpkgs to the latest upstream release version specified " "in the output table column 'version_upstream'. " "The list of output packages is in priority " "order based on how many other packages depend on the potentially " "outdated package." ) epil = f"Example: ./{os.path.basename(__file__)} '/nix/path/or/flakeref'" parser = ArgumentParser(description=desc, epilog=epil) # Arguments that specify the target: helps = ( "Target nix store path (e.g. derivation file or nix output path) or flakeref" ) parser.add_argument("NIXREF", help=helps, type=str) # Other arguments: helps = ( "Include locally outdated dependencies to the output. " "By default, the script " "outputs dependencies outdated in nixpkgs. With this option " "the tool also includes to the output the dependencies that are " "outdated locally (i.e. would need nix flake update or similar). " "The output list includes runtime dependencies that are locally " "outdated and would have an update available in nixpkgs nix_unstable " "channel, as well as runtime " "dependencies that are outdated in nixpkgs nix_unstable channel " "that would have an update in the package's upstream repository." ) parser.add_argument("--local", help=helps, action="store_true") helps = "Scan target buildtime instead of runtime dependencies." parser.add_argument("--buildtime", help=helps, action="store_true") helps = "Path to output file (default: ./nix_outdated.csv)" parser.add_argument( "-o", "--out", nargs="?", help=helps, default="nix_outdated.csv" ) add_version_argument(parser) add_verbose_argument(parser) return parser.parse_args(args) ################################################################################ def _query_repology(sbompath): return _query_repology_impl(sbompath) def _run_nix_visualize(target_path): return _run_nix_visualize_impl( target_path, exec_cmd_fn=exec_cmd, tempfile_factory=NamedTemporaryFile, log=LOG, ) def _nix_visualize_csv_to_df(csvpath): LOG.debug("Transforming nix-visualize csv to dataframe") return _nix_visualize_csv_to_df_impl(csvpath) def _generate_report_df(df_nv, df_repo): return _generate_report_df_impl(df_nv, df_repo, log=LOG) def _drop_newest_dups(df_con, df_cmp): return _drop_newest_dups_impl(df_con, df_cmp, log=LOG) def _report(df, args): _write_report_impl(df, args, log=LOG) def _console_out_table(table, local=False, buildtime=False): _console_out_table_impl(table, local=local, buildtime=buildtime, log=LOG) ################################################################################ def main(): """main entry point""" args = getargs() set_log_verbosity(args.verbose) try: _run(args) except SbomnixError as error: LOG.fatal("%s", error) raise SystemExit(1) from error def _run(args): target = resolve_nix_target(args.NIXREF, buildtime=args.buildtime) scan_data = collect_outdated_scan_data( target.path, args.buildtime, hooks=OutdatedScanHooks( query_repology=_query_repology, generate_temp_sbom=generate_temp_sbom, run_nix_visualize=_run_nix_visualize, parse_nix_visualize=_nix_visualize_csv_to_df, ), ) df_report = _generate_report_df(scan_data.nix_visualize, scan_data.repology) _report(df_report, args) ################################################################################ if __name__ == "__main__": main() ################################################################################ ================================================ FILE: src/nixupdate/nix_visualize.py ================================================ # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Helpers for running and parsing ``nix-visualize`` output.""" import pathlib from tempfile import NamedTemporaryFile from common import columns as cols from common.df import df_from_csv_file from common.log import LOG, LOG_VERBOSE from common.package_names import nix_to_repology_pkg_name from common.proc import exec_cmd def run_nix_visualize( target_path, *, exec_cmd_fn=exec_cmd, tempfile_factory=NamedTemporaryFile, log=LOG, ): """Run ``nix-visualize`` and return the generated CSV path.""" log.log(LOG_VERBOSE, "Running nix-visualize") with tempfile_factory( delete=False, prefix="nix-visualize_", suffix=".csv", ) as outfile: cmd = ["nix-visualize", f"--output={outfile.name}", target_path] exec_cmd_fn(cmd) return pathlib.Path(outfile.name) def nix_visualize_csv_to_df(csvpath): """Convert ``nix-visualize`` CSV output into Repology-comparable rows.""" df = df_from_csv_file(csvpath) re_split = ( r"^[^-]+?-" r"(.+?)-" r"(\d[-_.0-9pf]*g?b?(?:pre[0-9])*(?:\+git[0-9]*)?)" r"(?:-lib|-bin|-env|-man|-su|-dev|-doc|-info|-nc|-host|-p[0-9]+|\.drv|)" r"$" ) df[[cols.PACKAGE, cols.VERSION]] = df[cols.RAW_NAME].str.extract( re_split, expand=True, ) df[cols.PACKAGE] = df.apply( lambda row: nix_to_repology_pkg_name(row.package), axis=1, ) return df ================================================ FILE: src/nixupdate/pipeline.py ================================================ # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Execution pipeline helpers for ``nix_outdated``.""" import logging from dataclasses import dataclass from typing import Any, Callable import pandas as pd from common.df import df_log from common.log import LOG, LOG_SPAM, LOG_VERBOSE from nixupdate.nix_visualize import nix_visualize_csv_to_df, run_nix_visualize from repology.adapter import RepologyAdapter, RepologyQuery from sbomnix.cli_utils import generate_temp_sbom HookFn = Callable[..., Any] @dataclass class OutdatedScanData: """Collected intermediate dataframes used by ``nix_outdated``.""" repology: pd.DataFrame nix_visualize: pd.DataFrame | None = None def query_repology(sbompath, *, adapter=None, log=LOG): """Query Repology package/version data for a generated SBOM.""" log.log(LOG_VERBOSE, "Querying repology") if adapter is None: adapter = RepologyAdapter() return adapter.query( RepologyQuery( repository="nix_unstable", sbom_cdx=sbompath, ) ) @dataclass class OutdatedScanHooks: """Injectable helpers used by ``collect_outdated_scan_data``.""" query_repology: HookFn = query_repology generate_temp_sbom: HookFn = generate_temp_sbom run_nix_visualize: HookFn = run_nix_visualize parse_nix_visualize: HookFn = nix_visualize_csv_to_df def collect_outdated_scan_data( target_path, buildtime, hooks=None, ): """Collect Repology and ``nix-visualize`` inputs for reporting.""" hooks = OutdatedScanHooks() if hooks is None else hooks dtype = "buildtime" if buildtime else "runtime" LOG.verbose("Checking %s dependencies referenced by '%s'", dtype, target_path) df_nix_visualize = None sbom_artifact = hooks.generate_temp_sbom( target_path, buildtime, prefix="nixdeps_", cdx_suffix=".cdx.json", ) try: sbom_path = sbom_artifact.cdx_path LOG.debug("Using SBOM '%s'", sbom_path) df_repology = hooks.query_repology(sbom_path) finally: if not LOG.isEnabledFor(logging.DEBUG): sbom_artifact.cleanup() df_log(df_repology, LOG_SPAM) if buildtime: LOG.verbose("Not running nix-visualize due to '--buildtime' argument") else: nix_visualize_out = hooks.run_nix_visualize(target_path) LOG.debug("Using nix-visualize out: '%s'", nix_visualize_out) try: df_nix_visualize = hooks.parse_nix_visualize(nix_visualize_out) df_log(df_nix_visualize, LOG_SPAM) finally: if not LOG.isEnabledFor(logging.DEBUG): nix_visualize_out.unlink(missing_ok=True) df_log(df_repology, logging.DEBUG) df_log(df_nix_visualize, logging.DEBUG) return OutdatedScanData( repology=df_repology, nix_visualize=df_nix_visualize, ) ================================================ FILE: src/nixupdate/report.py ================================================ # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Report shaping helpers for ``nix_outdated``.""" import logging from tabulate import tabulate from common import columns as cols from common.df import df_log, df_to_csv_file from common.log import LOG, LOG_SPAM, LOG_VERBOSE def generate_report_df(df_nix_visualize, df_repology, *, log=LOG, log_spam=LOG_SPAM): """Merge Repology and ``nix-visualize`` data into a reporting dataframe.""" if df_nix_visualize is None: df_repology = df_repology.copy(deep=True) df_repology[cols.LEVEL] = "0" df_repology.rename( columns={cols.VERSION: cols.VERSION_REPOLOGY}, inplace=True, ) return df_repology df = df_nix_visualize.merge( df_repology, how="left", left_on=[cols.PACKAGE, cols.VERSION], right_on=[cols.PACKAGE, cols.VERSION_SBOM], suffixes=["", "_repology"], ) log.log(log_spam, "Merged nix-visualize and repology data:") df_log(df, log_spam) return df def drop_newest_duplicates(df_console, df_compare, *, log=LOG): """Drop outdated rows when a corresponding ``newest`` row also exists.""" df_ret = df_console.copy(deep=True) for row in df_console.itertuples(): df_pkgs = df_compare[df_compare[cols.PACKAGE] == row.nix_package] df_newest = df_pkgs[df_pkgs[cols.STATUS] == "newest"] if not df_newest.empty: log.debug( "Ignoring outdated package '%s' since newest version is also available", row.nix_package, ) df_ret = df_ret[df_ret.nix_package != row.nix_package] return df_ret def console_out_table(table, *, local=False, buildtime=False, log=LOG): """Write the formatted console table.""" update_target = "in nixpkgs" if local: update_target = "locally" priority = ":" if not buildtime: priority = ( " (in priority order based on how many other " "packages depend on the potentially outdated package):" ) log.info( "Dependencies that need update %s%s\n\n%s\n\n", update_target, priority, table, ) def write_report(df, args, *, log=LOG): """Write the nix-outdated console and CSV reports.""" if df is None or df.empty: log.info("No outdated dependencies found") return log.log(LOG_VERBOSE, "Writing console report") select_cols = { cols.LEVEL: "priority", cols.PACKAGE: "nix_package", cols.VERSION_SBOM: cols.VERSION_LOCAL, cols.VERSION_REPOLOGY: cols.VERSION_NIXPKGS, cols.NEWEST_UPSTREAM_RELEASE: cols.VERSION_UPSTREAM, } if args.local: df_console = df[df[cols.SBOM_VERSION_CLASSIFY] == "sbom_pkg_needs_update"] df_console = df_console.rename(columns=select_cols)[select_cols.values()] df_console.drop_duplicates( df_console.columns.difference(["priority"]), keep="first", inplace=True ) if args.buildtime: df_console = df_console.drop(["priority"], axis=1) table = tabulate( df_console, headers="keys", tablefmt="orgtbl", numalign="center", showindex=False, ) console_out_table(table, local=args.local, buildtime=args.buildtime, log=log) else: df_console = df[df[cols.REPO_VERSION_CLASSIFY] == "repo_pkg_needs_update"] df_console = df_console.rename(columns=select_cols)[select_cols.values()] df_console.drop_duplicates( df_console.columns.difference(["priority"]), keep="first", inplace=True ) df_console = drop_newest_duplicates(df_console, df, log=log) if args.buildtime: df_console = df_console.drop(["priority"], axis=1) table = tabulate( df_console, headers="keys", tablefmt="orgtbl", numalign="center", showindex=False, ) console_out_table(table, local=args.local, buildtime=args.buildtime, log=log) if log.isEnabledFor(logging.DEBUG): df_to_csv_file(df, "df_nixoutdated_merged.csv") df_to_csv_file(df_console, args.out) ================================================ FILE: src/provenance/__init__.py ================================================ # SPDX-FileCopyrightText: 2024 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 ================================================ FILE: src/provenance/dependencies.py ================================================ # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Helpers for provenance dependency resolution.""" import logging from dataclasses import dataclass, field from typing import Any, Callable from common.errors import InvalidNixJsonError from common.log import LOG, LOG_VERBOSE from common.nix_utils import ( NIX_PATH_INFO_JSON, nix_path_info_references, parse_nix_derivation_show, ) from common.proc import exec_cmd, nix_cmd from provenance.digests import normalize_digest, output_digest from provenance.nix_commands import exec_required_nix_command from provenance.path_info import query_path_hashes, query_path_info from provenance.subjects import output_path HookFn = Callable[..., Any] @dataclass class DependencyHooks: """Injectable helpers used by provenance dependency resolution.""" exec_cmd_fn: HookFn = exec_cmd query_path_hashes_fn: HookFn = field(default_factory=lambda: query_path_hashes) parse_nix_derivation_show_fn: HookFn = parse_nix_derivation_show normalize_digest_fn: HookFn = normalize_digest output_digest_fn: HookFn = output_digest output_path_fn: HookFn = output_path log: logging.Logger = LOG def derivation_outputs_by_path(infos, hooks=None): """Index derivation info by absolute output path.""" hooks = DependencyHooks() if hooks is None else hooks outputs_by_path = {} for info in infos.values(): if not isinstance(info, dict): continue outputs = info.get("outputs") if not isinstance(outputs, dict): continue env = info.get("env") for name, output in outputs.items(): resolved_output_path = hooks.output_path_fn(name, output, env) if resolved_output_path: outputs_by_path[resolved_output_path] = (info, output) return outputs_by_path def dependency_paths(drv_path, recursive=False, outputs_by_path=None, hooks=None): """Return dependency store paths from structured path-info data.""" hooks = DependencyHooks() if hooks is None else hooks path_infos = query_path_info( [drv_path], exec_cmd_fn=hooks.exec_cmd_fn, recursive=recursive, ) if path_infos is None: return [] if recursive: paths = list(path_infos) for path in outputs_by_path or (): if path not in path_infos: paths.append(path) return paths drv_info = path_infos.get(drv_path) if drv_info is None: raise InvalidNixJsonError( NIX_PATH_INFO_JSON, f"missing path-info record for `{drv_path}`", ) return list(nix_path_info_references(drv_info, drv_path)) def dependency_package(drv, output_hash, infos, outputs_by_path, hooks=None): """Create a dependency package entry with a normalized digest.""" hooks = DependencyHooks() if hooks is None else hooks info = infos.get(drv) output_info = outputs_by_path.get(drv) if output_info: info = output_info[0] digest = hooks.output_digest_fn(output_info[1]) if output_info else None if digest is None: digest = hooks.normalize_digest_fn(output_hash) if digest is None: hooks.log.warning("Cannot determine digest for dependency '%s'", drv) return None package = { "name": drv.split("-", 1)[-1].removesuffix(".drv"), "uri": drv, "digest": digest, } if info: package["name"] = info["name"] if version := info["env"].get("version"): package["annotations"] = {"version": version} return package def get_dependencies(drv_path, recursive=False, hooks=None): """Get dependencies of derivation and parse them into ResourceDescriptors.""" hooks = DependencyHooks() if hooks is None else hooks hooks.log.log( LOG_VERBOSE, "Querying derivation dependencies %s", "recursively" if recursive else "", ) cmd = nix_cmd("derivation", "show", "-r", drv_path) infos = hooks.parse_nix_derivation_show_fn( exec_required_nix_command(cmd, hooks.exec_cmd_fn).stdout, store_path_hint=drv_path, ) outputs_by_path = derivation_outputs_by_path(infos, hooks=hooks) references = dependency_paths( drv_path, recursive=recursive, outputs_by_path=outputs_by_path, hooks=hooks, ) hashes = hooks.query_path_hashes_fn(references, exec_cmd_fn=hooks.exec_cmd_fn) dependencies = [] for drv, output_hash in zip(references, hashes, strict=True): hooks.log.debug("Creating dependency entry for %s", drv) package = dependency_package( drv, output_hash, infos, outputs_by_path, hooks=hooks, ) if package is not None: dependencies.append(package) return dependencies ================================================ FILE: src/provenance/digests.py ================================================ # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Digest normalization helpers for provenance generation.""" import base64 import binascii import re HASH_SIZE_BYTES = { "blake3": 32, "md5": 16, "sha1": 20, "sha256": 32, "sha512": 64, } NIX32_ALPHABET = "0123456789abcdfghijklmnpqrsvwxyz" NIX32_INDEX = {char: index for index, char in enumerate(NIX32_ALPHABET)} def canonical_hash_algo(hash_algo): """Normalize Nix hash algorithm labels to plain algorithm names.""" if not hash_algo: return None return str(hash_algo).removeprefix("r:") def hash_size_bytes(hash_algo): """Return expected digest size for the given algorithm.""" hash_algo = canonical_hash_algo(hash_algo) if hash_algo is None: return None return HASH_SIZE_BYTES.get(hash_algo) def decode_nix32(hash_value, size_bytes): """Decode nix base32 digest strings into raw bytes.""" try: value = 0 for char in hash_value: value = value * 32 + NIX32_INDEX[char] except KeyError: return None if value.bit_length() > size_bytes * 8: return None encoded_size = (len(hash_value) * 5 + 7) // 8 raw = value.to_bytes(encoded_size, "little") return raw[:size_bytes].ljust(size_bytes, b"\0") def decode_hash_bytes(hash_value, hash_algo): """Decode known Nix hash encodings into raw bytes.""" size_bytes = hash_size_bytes(hash_algo) if size_bytes is None: return None if re.fullmatch(rf"[0-9a-f]{{{size_bytes * 2}}}", hash_value): return bytes.fromhex(hash_value) if len(hash_value) == (size_bytes * 8 + 4) // 5: decoded = decode_nix32(hash_value, size_bytes) if decoded is not None: return decoded padding = "=" * (-len(hash_value) % 4) try: decoded = base64.b64decode(hash_value + padding, validate=True) except (ValueError, binascii.Error): return None if len(decoded) != size_bytes: return None return decoded def split_hash_value(hash_value, hash_algo=None): """Split a typed hash string into canonical algorithm and raw value.""" hash_algo = canonical_hash_algo(hash_algo) hash_value = str(hash_value).strip() if hash_algo: for separator in (":", "-"): resource_prefix = f"r:{hash_algo}{separator}" if hash_value.startswith(resource_prefix): return hash_algo, hash_value.removeprefix(resource_prefix) prefix = f"{hash_algo}{separator}" if hash_value.startswith(prefix): return hash_algo, hash_value.removeprefix(prefix) match = re.match( r"^(?P(?:r:)?[A-Za-z0-9]+)(?P[:-])(?P.+)$", hash_value, ) if match: return canonical_hash_algo(match.group("algo")), match.group("rest") return hash_algo, hash_value def normalize_digest(hash_value, hash_algo=None): """Return digest in a canonical base16 representation.""" if not hash_value: return None hash_value = str(hash_value).strip() if not hash_value: return None hash_algo, raw_hash_value = split_hash_value(hash_value, hash_algo=hash_algo) if not hash_algo: return None decoded = decode_hash_bytes(raw_hash_value, hash_algo) if decoded is None: return None return {hash_algo: decoded.hex()} def output_digest(data, *, normalize_digest_fn=normalize_digest): """Return digest from derivation output metadata when available.""" if not isinstance(data, dict): return None hash_value = data.get("hash") if not hash_value: return None return normalize_digest_fn(hash_value, hash_algo=data.get("hashAlgo")) ================================================ FILE: src/provenance/main.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2024 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Python script that generates SLSA v1.0 provenance file for a nix target""" import argparse import json import os from dataclasses import dataclass from common.cli_args import add_verbose_argument, add_version_argument from common.errors import SbomnixError from common.log import LOG, set_log_verbosity from common.nix_utils import parse_nix_derivation_show from common.proc import exec_cmd, nix_cmd from provenance.dependencies import DependencyHooks, get_dependencies from provenance.digests import normalize_digest, output_digest from provenance.schema import ( SchemaHooks, get_external_parameters, get_internal_parameters, provenance_document, timestamp, ) from provenance.subjects import SubjectHooks, get_subjects, output_path @dataclass class BuildMeta: """Dataclass for build metadata""" build_type: str builder_id: str invocation_id: str build_begin_ts: str build_finished_ts: str external_parameters: str internal_parameters: str def get_env_metadata(): """Read build metadata from env variables""" # these need to be in the same order as the fields in BuildMeta definition env_vars = [ "PROVENANCE_BUILD_TYPE", "PROVENANCE_BUILDER_ID", "PROVENANCE_INVOCATION_ID", "PROVENANCE_TIMESTAMP_BEGIN", "PROVENANCE_TIMESTAMP_FINISHED", "PROVENANCE_EXTERNAL_PARAMS", "PROVENANCE_INTERNAL_PARAMS", ] values = [os.environ.get(name, "") for name in env_vars] LOG.verbose("Reading metadata from environment:") for name, value in zip(env_vars, values, strict=True): LOG.verbose("| %s = %s", name, value) return BuildMeta(*values) def provenance(target: str, metadata: BuildMeta, recursive: bool = False) -> dict: """Create the provenance file""" return provenance_document( target, metadata, recursive=recursive, hooks=SchemaHooks( exec_cmd_fn=exec_cmd, nix_cmd_fn=nix_cmd, parse_nix_derivation_show_fn=parse_nix_derivation_show, get_subjects_fn=lambda outputs, env=None: get_subjects( outputs, env=env, hooks=SubjectHooks( exec_cmd_fn=exec_cmd, normalize_digest_fn=normalize_digest, output_digest_fn=output_digest, output_path_fn=output_path, log=LOG, ), ), get_dependencies_fn=lambda drv_path, recursive=False: get_dependencies( drv_path, recursive=recursive, hooks=DependencyHooks( exec_cmd_fn=exec_cmd, parse_nix_derivation_show_fn=parse_nix_derivation_show, normalize_digest_fn=normalize_digest, output_digest_fn=output_digest, output_path_fn=output_path, log=LOG, ), ), get_external_parameters_fn=get_external_parameters, get_internal_parameters_fn=get_internal_parameters, timestamp_fn=timestamp, log=LOG, ), ) def getargs(args=None): """Parse command line arguments""" parser = argparse.ArgumentParser( prog="nix-provenance", description="Get SLSA v1.0 provenance file from nix flake or derivation", ) parser.add_argument( "target", help="Flake reference or derivation path", ) parser.add_argument( "--recursive", action="store_true", help="Resolve every dependency recursively", ) parser.add_argument( "-o", "--out", help="Path to file where provenance should be saved", default=os.environ.get("PROVENANCE_OUTPUT_FILE"), ) add_verbose_argument(parser) add_version_argument(parser) return parser.parse_args(args) def main(): """main entry point""" args = getargs() set_log_verbosity(args.verbose) build_metadata = get_env_metadata() try: schema = provenance(args.target, build_metadata, recursive=args.recursive) except SbomnixError as error: LOG.fatal("%s", error) raise SystemExit(1) from error if args.out: with open(args.out, "w", encoding="utf-8") as filepath: LOG.info("Writing provenance file into '%s'", args.out) filepath.write(json.dumps(schema, indent=2)) else: print(json.dumps(schema, indent=2)) if __name__ == "__main__": main() ================================================ FILE: src/provenance/nix_commands.py ================================================ # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Nix command helpers for provenance generation.""" import subprocess from common.errors import NixCommandError def exec_required_nix_command(cmd, exec_cmd_fn): """Run a required Nix command and raise a user-facing error on failure.""" try: return exec_cmd_fn(cmd) except subprocess.CalledProcessError as error: raise NixCommandError( cmd, stderr=error.stderr, stdout=error.stdout, ) from None ================================================ FILE: src/provenance/path_info.py ================================================ # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Structured Nix path-info helpers for provenance generation.""" import errno import subprocess from common.errors import InvalidNixJsonError, NixCommandError from common.nix_utils import ( NIX_PATH_INFO_JSON, load_nix_json, nix_path_info_nar_hash, normalize_nix_path_info, ) from common.proc import exec_cmd, nix_cmd def query_path_info( paths, *, exec_cmd_fn=exec_cmd, recursive=False, raise_on_error=True, ): """Return structured path-info records indexed by store path.""" if not paths: return {} recursive_args = ["--recursive"] if recursive else [] cmd = nix_cmd( "path-info", "--json", "--json-format", "1", *recursive_args, *paths, ) try: ret = exec_cmd_fn(cmd, raise_on_error=raise_on_error) except subprocess.CalledProcessError as error: raise NixCommandError( cmd, stderr=error.stderr, stdout=error.stdout, ) from None if ret is None: return None return normalize_nix_path_info(load_nix_json(ret.stdout, NIX_PATH_INFO_JSON)) def query_path_hashes(paths, *, exec_cmd_fn=exec_cmd): """Query NAR hashes for paths, splitting requests that exceed argv limits.""" paths = list(paths) if not paths: return [] try: path_infos = query_path_info(paths, exec_cmd_fn=exec_cmd_fn) except OSError as error: if error.errno != errno.E2BIG or len(paths) == 1: raise midpoint = len(paths) // 2 return query_path_hashes( paths[:midpoint], exec_cmd_fn=exec_cmd_fn, ) + query_path_hashes( paths[midpoint:], exec_cmd_fn=exec_cmd_fn, ) if path_infos is None: return [] return [nar_hash_for_path(path_infos, path) for path in paths] def nar_hash_for_path(path_infos, path): """Return the NAR hash for one path-info record.""" info = path_infos.get(path) if info is None: raise InvalidNixJsonError( NIX_PATH_INFO_JSON, f"missing path-info record for `{path}`", ) return nix_path_info_nar_hash(info, path) ================================================ FILE: src/provenance/schema.py ================================================ # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Helpers for assembling provenance documents.""" import json import logging from dataclasses import dataclass from datetime import datetime, timezone from typing import Any, Callable, Protocol from common.errors import InvalidNixJsonError, MissingNixDerivationMetadataError from common.log import LOG, LOG_VERBOSE from common.nix_utils import NIX_DERIVATION_SHOW_JSON, parse_nix_derivation_show from common.proc import exec_cmd, nix_cmd from provenance.dependencies import get_dependencies from provenance.nix_commands import exec_required_nix_command from provenance.subjects import get_subjects JsonDict = dict[str, Any] HookFn = Callable[..., Any] class ProvenanceMetadata(Protocol): """Build metadata fields consumed by provenance schema assembly.""" build_type: str builder_id: str invocation_id: str build_begin_ts: str build_finished_ts: str external_parameters: str internal_parameters: str def get_external_parameters(metadata: ProvenanceMetadata) -> JsonDict: """Get externalParameters from env variable.""" params = json.loads(metadata.external_parameters or "{}") return {key: value for key, value in params.items() if value} def get_internal_parameters(metadata: ProvenanceMetadata) -> JsonDict: """Get internalParameters from env variable.""" params = json.loads(metadata.internal_parameters or "{}") return {key: value for key, value in params.items() if value} def timestamp(unix_time: str) -> str: """Turn unix timestamp into RFC 3339 format.""" if not unix_time: return "" dtime = datetime.fromtimestamp( int(unix_time), tz=timezone.utc, ) return dtime.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-4] + "Z" @dataclass class SchemaHooks: """Injectable helpers used by provenance schema assembly.""" exec_cmd_fn: HookFn = exec_cmd nix_cmd_fn: HookFn = nix_cmd parse_nix_derivation_show_fn: HookFn = parse_nix_derivation_show get_subjects_fn: HookFn = get_subjects get_dependencies_fn: HookFn = get_dependencies get_external_parameters_fn: HookFn = get_external_parameters get_internal_parameters_fn: HookFn = get_internal_parameters timestamp_fn: HookFn = timestamp log: logging.Logger = LOG def provenance_document( target: str, metadata: ProvenanceMetadata, recursive: bool = False, hooks: SchemaHooks | None = None, ) -> JsonDict: """Create the provenance file.""" hooks = SchemaHooks() if hooks is None else hooks hooks.log.info("Generating provenance file for '%s'", target) cmd = hooks.nix_cmd_fn("derivation", "show", target) drv_json = hooks.parse_nix_derivation_show_fn( exec_required_nix_command(cmd, hooks.exec_cmd_fn).stdout, store_path_hint=target, ) if not drv_json: raise MissingNixDerivationMetadataError(target) drv_path, drv_json = next(iter(drv_json.items())) outputs = drv_json.get("outputs") if outputs is None: raise InvalidNixJsonError( NIX_DERIVATION_SHOW_JSON, f"missing `outputs` in target derivation `{drv_path}`", ) hooks.log.log(LOG_VERBOSE, "Resolved derivation path is '%s'", drv_path) return { "_type": "https://in-toto.io/Statement/v1", "subject": hooks.get_subjects_fn(outputs, env=drv_json.get("env")), "predicateType": "https://slsa.dev/provenance/v1", "predicate": { "buildDefinition": { "buildType": metadata.build_type, "externalParameters": hooks.get_external_parameters_fn(metadata), "internalParameters": hooks.get_internal_parameters_fn(metadata), "resolvedDependencies": hooks.get_dependencies_fn(drv_path, recursive), }, "runDetails": { "builder": { "id": metadata.builder_id, "builderDependencies": [], "version": {}, }, "metadata": { "invocationId": metadata.invocation_id, "startedOn": hooks.timestamp_fn(metadata.build_begin_ts), "finishedOn": hooks.timestamp_fn(metadata.build_finished_ts), }, "byproducts": [], }, }, } ================================================ FILE: src/provenance/subjects.py ================================================ # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Helpers for deriving in-toto subjects from nix outputs.""" import logging from collections.abc import Mapping from dataclasses import dataclass from typing import Any, Callable from common.log import LOG, LOG_VERBOSE from common.proc import ExecCmdFn, exec_cmd from provenance.digests import normalize_digest, output_digest from provenance.path_info import nar_hash_for_path, query_path_info Digest = dict[str, str] Subject = dict[str, Any] OutputPathFn = Callable[[str, Any, Mapping[str, str] | None], str | None] OutputDigestFn = Callable[[Any], Digest | None] NormalizeDigestFn = Callable[..., Digest | None] def output_path( name: str, output: Any, env: Mapping[str, str] | None = None, ) -> str | None: """Return the resolved absolute output path from outputs or env.""" if isinstance(output, dict) and output.get("path"): return str(output["path"]) env = env or {} return env.get(name) @dataclass class SubjectHooks: """Injectable helpers used by ``get_subjects``.""" exec_cmd_fn: ExecCmdFn = exec_cmd normalize_digest_fn: NormalizeDigestFn = normalize_digest output_digest_fn: OutputDigestFn = output_digest output_path_fn: OutputPathFn = output_path log: logging.Logger = LOG def get_subjects( outputs: Mapping[str, Any], env: Mapping[str, str] | None = None, hooks: SubjectHooks | None = None, ) -> list[Subject]: """Parse derivation outputs into in-toto subjects.""" hooks = SubjectHooks() if hooks is None else hooks hooks.log.log(LOG_VERBOSE, "Parsing derivation outputs") env = env or {} subjects: list[Subject] = [] for name, data in outputs.items(): resolved_output_path = hooks.output_path_fn(name, data, env) subject: Subject = {"name": name} resolved_output_digest = hooks.output_digest_fn(data) if resolved_output_path: subject["uri"] = resolved_output_path if resolved_output_digest is not None: subject["digest"] = resolved_output_digest hooks.log.log( LOG_VERBOSE, "Using derivation metadata hash for fixed-output output '%s'", name, ) elif resolved_output_path: path_infos = query_path_info( [resolved_output_path], exec_cmd_fn=hooks.exec_cmd_fn, raise_on_error=False, ) if path_infos is None or resolved_output_path not in path_infos: hooks.log.warning( "Derivation output '%s' was not found in the nix store, " "assuming it was not built.", name, ) continue digest = hooks.normalize_digest_fn( nar_hash_for_path(path_infos, resolved_output_path) ) if digest is None: hooks.log.warning( "Cannot normalize NAR hash for derivation output '%s'", name, ) continue subject["digest"] = digest else: hooks.log.warning( "Cannot determine path or digest for derivation output '%s'", name, ) continue subjects.append(subject) return subjects ================================================ FILE: src/repology/__init__.py ================================================ # SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Repology package exports.""" from . import cves, reporting __all__ = ["cves", "reporting"] ================================================ FILE: src/repology/adapter.py ================================================ # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Repology query adapter.""" import pathlib import re import urllib.parse from dataclasses import dataclass from typing import Optional, cast import numpy as np import pandas as pd import repology.exceptions from common import columns as cols from common.df import df_regex_filter from common.log import LOG from repology.session import DEFAULT_REPOLOGY_SESSION, REPOLOGY_REQUEST_TIMEOUT from .cves import parse_cve_html from .projects_parser import parse_projects_search_html from .sbom import ( is_ignored_sbom_package, make_sbom_status_row, merge_sbom_fields, parse_cdx_sbom, sbom_row_classify, ) REPOLOGY_PROJECTS_URL = "https://repology.org/projects/" REPOLOGY_PROJECT_URL = "https://repology.org/project/" @dataclass class RepologyQuery: """Repology query parameters independent of the CLI parser.""" repository: str pkg_exact: Optional[str] = None pkg_search: Optional[str] = None sbom_cdx: Optional[pathlib.Path] = None re_package: Optional[str] = None re_version: Optional[str] = None re_status: Optional[str] = None re_vuln: Optional[str] = None def __post_init__(self): if self.sbom_cdx and not isinstance(self.sbom_cdx, pathlib.Path): self.sbom_cdx = pathlib.Path(self.sbom_cdx) query_modes = ( bool(self.pkg_exact), bool(self.pkg_search), self.sbom_cdx is not None, ) if sum(query_modes) != 1: raise ValueError( "RepologyQuery requires exactly one of pkg_exact, " "pkg_search, or sbom_cdx" ) if not self.repository: raise ValueError("RepologyQuery requires a repository name") def repo_row_classify(row): """Classify repository-side version status.""" if row.status == "outdated": return "repo_pkg_needs_update" return "" class RepologyAdapter: """Query and parse Repology package data.""" def __init__(self, session=None, request_timeout=REPOLOGY_REQUEST_TIMEOUT): self.session = DEFAULT_REPOLOGY_SESSION if session is None else session self.request_timeout = request_timeout self.url_projects = REPOLOGY_PROJECTS_URL self._reset_state() def _reset_state(self): self.processed = set() self.pkgs_dict = {} self.df = pd.DataFrame() self.urlq = None self.df_sbom = None def _packages_to_df(self, query, re_pkg_internal=None): if not self.pkgs_dict: return LOG.debug("packages in pkgs_dict: %s", len(self.pkgs_dict[cols.PACKAGE])) df: pd.DataFrame = pd.DataFrame.from_dict(self.pkgs_dict) df_cols = list(df.columns) if query.repository and cols.REPO in df_cols: df = df_regex_filter(df, cols.REPO, re.escape(query.repository)) if re_pkg_internal and cols.PACKAGE in df_cols: re_pkg_internal = f"^(?:[a-z0-9]+:)?{re.escape(re_pkg_internal)}$" df = df_regex_filter(df, cols.PACKAGE, re_pkg_internal) if query.re_package and cols.PACKAGE in df_cols: df = df_regex_filter(df, cols.PACKAGE, query.re_package) if query.re_version and cols.VERSION in df_cols: df = df_regex_filter(df, cols.VERSION, query.re_version) if query.re_status and cols.STATUS in df_cols: df = df_regex_filter(df, cols.STATUS, query.re_status) if query.re_vuln and cols.POTENTIALLY_VULNERABLE in df_cols: df = df_regex_filter(df, cols.POTENTIALLY_VULNERABLE, query.re_vuln) self.df = pd.concat([self.df, cast(pd.DataFrame, df)]) self.df.replace(np.nan, "", regex=True, inplace=True) self.df.drop_duplicates(keep="first", inplace=True) self.df.sort_values(by=self.df.columns.values.tolist(), inplace=True) self.df.reset_index(drop=True, inplace=True) def _append_package_rows(self, package_rows): for package_row in package_rows: for key, value in package_row.items(): self.pkgs_dict.setdefault(key, []).append(value) def _get_resp(self, url): LOG.debug("GET: %s", url) resp = self.session.get(url, timeout=self.request_timeout) LOG.debug("resp.status_code: %s", resp.status_code) if resp.status_code == 404: LOG.fatal("No matching packages found") raise repology.exceptions.RepologyNoMatchingPackages resp.raise_for_status() return resp def query_cves(self, pkg_name, pkg_version): """Query vulnerabilities for a single package/version pair.""" pkg = urllib.parse.quote(pkg_name) ver = urllib.parse.quote(pkg_version) query = f"{REPOLOGY_PROJECT_URL}{pkg}/cves?version={ver}" LOG.debug("GET: %s", query) resp = self.session.get(query, timeout=self.request_timeout) LOG.debug("resp.status_code: %s", resp.status_code) if resp.status_code == 404: LOG.warning("Repology package '%s' not found", pkg_name) return None resp.raise_for_status() return parse_cve_html(resp.text, pkg_name, pkg_version) def _query_pkg_search(self, pkg_search, repository, stop_pkg=None): pkg = urllib.parse.quote(pkg_search) repo = urllib.parse.quote(repository) search_term = f"?search={pkg}&inrepo={repo}" url = f"{self.url_projects}{search_term}" self.urlq = url while True: resp = self._get_resp(url) url_last = url page = parse_projects_search_html( resp.text, repository, self.processed, pkg_stop=stop_pkg, ) self.processed = page.processed_ids self._append_package_rows(page.package_rows) next_query_project = page.next_query_project if not next_query_project: LOG.debug("stopping (no next_query_project)") break next_query_project = urllib.parse.quote(next_query_project) url = f"{self.url_projects}{next_query_project}/{search_term}" if url == url_last: LOG.debug("stopping ('%s'=='%s')", url_last, url) break def _query_pkg_exact(self, pkg_name, repository): self._query_pkg_search(pkg_name, repository, stop_pkg=pkg_name) def _query_sbom_cdx(self, query): self.df_sbom = parse_cdx_sbom(query.sbom_cdx) for component in self.df_sbom.to_dict("records"): LOG.debug("Package: %s", component) name = component[cols.NAME] version = component.get(cols.VERSION, "") if not name: LOG.fatal("Missing package name: %s", component) raise repology.exceptions.RepologyUnexpectedResponse pkg_id = f"{query.repository}:{name}" if pkg_id in self.processed: LOG.debug("Package '%s' in sbom already processed", name) self._packages_to_df(query, re_pkg_internal=name) continue if not version: self._append_package_rows( [ make_sbom_status_row( query.repository, name, "", "NO_VERSION", ) ] ) self._packages_to_df(query, re_pkg_internal=name) continue if is_ignored_sbom_package(name): self._append_package_rows( [ make_sbom_status_row( query.repository, name, version, "IGNORED", ) ] ) self._packages_to_df(query, re_pkg_internal=name) continue try: self._query_pkg_exact(name, query.repository) except repology.exceptions.RepologyNoMatchingPackages: LOG.debug("Package '%s' not found in repology", name) if pkg_id not in self.processed: self._append_package_rows( [ make_sbom_status_row( query.repository, name, version, "NOT_FOUND", ) ] ) self._packages_to_df(query, re_pkg_internal=name) self.urlq = self.url_projects def query(self, query): """Query package information from repology.org.""" self._reset_state() if query.pkg_search: self._query_pkg_search(query.pkg_search, query.repository) elif query.pkg_exact: self._query_pkg_exact(query.pkg_exact, query.repository) elif query.sbom_cdx: self._query_sbom_cdx(query) self._packages_to_df(query, re_pkg_internal=query.pkg_exact) if self.df.empty: LOG.debug("No matching packages found") raise repology.exceptions.RepologyNoMatchingPackages if self.df_sbom is not None: self.df = merge_sbom_fields(self.df_sbom, self.df) self.df[cols.SBOM_VERSION_CLASSIFY] = self.df.apply( sbom_row_classify, axis=1, ) self.df[cols.REPO_VERSION_CLASSIFY] = self.df.apply(repo_row_classify, axis=1) self.df.replace(np.nan, "", regex=True, inplace=True) self.df.drop_duplicates(keep="first", inplace=True) self.df.sort_values(by=self.df.columns.values.tolist(), inplace=True) self.df.reset_index(drop=True, inplace=True) return self.df.copy(deep=True) ================================================ FILE: src/repology/cves.py ================================================ # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Helpers for parsing Repology CVE pages.""" import re import numpy as np import pandas as pd from bs4 import BeautifulSoup import repology.exceptions from common import columns as cols from common.log import LOG, LOG_SPAM from common.versioning import parse_version def is_affected(version, affected_ver_str, *, log=LOG, log_spam=LOG_SPAM): """ Return True if version number is included in the repology affected version string. Also returns true if parsing affected version string fails, in order to avoid false negatives. """ log.log(log_spam, "Affected version(s): %s", affected_ver_str) version_local = parse_version(version) if not version_local: log.fatal("Unexpected local version string: %s", version) raise repology.exceptions.RepologyError affected_ver_str = f" {affected_ver_str} " ver_group = re.compile( r"(?P[(\[])(?P[^,]*), *(?P[^)\]]*)(?P[\])])" ) matches = re.findall(ver_group, affected_ver_str) if matches: log.log(log_spam, "Parsed group version(s): %s", matches) for impacted_group in matches: if len(impacted_group) != 4: log.fatal("Unexpected version group: %s", affected_ver_str) raise repology.exceptions.RepologyUnexpectedResponse beg_ind = impacted_group[0] beg_ver_parsed = parse_version(impacted_group[1]) if not beg_ver_parsed: return True end_ind = impacted_group[3] end_ver_parsed = parse_version(impacted_group[2]) if not end_ver_parsed: return True beg_affected = False end_affected = False if (version_local > beg_ver_parsed) or ( version_local == beg_ver_parsed and beg_ind == "[" ): beg_affected = True if (version_local < end_ver_parsed) or ( version_local == end_ver_parsed and end_ind == "]" ): end_affected = True if beg_affected and end_affected: return True ver_one = r"(?<= )(?\d[^ $)]+)(?= )" matches = re.findall(ver_one, affected_ver_str) log.log(log_spam, "Parsed single version(s): %s", matches) for impacted_version_text in matches: impacted_version = parse_version(impacted_version_text) if impacted_version == version_local: return True return False def parse_cve_html(html_text, pkg_name, pkg_version, *, log=LOG, log_spam=LOG_SPAM): """Parse a Repology CVE page into a dataframe.""" soup = BeautifulSoup(html_text, "html.parser") tables = soup.find_all("table") if not tables: log.debug("Unexpected response: CVE table missing") return pd.DataFrame() cve_table = tables[0] if cve_table.thead is None or cve_table.tbody is None: log.debug("Unexpected response: CVE table missing header or body") return pd.DataFrame() headers = {} for idx, header in enumerate(cve_table.thead.find_all("th")): headers[header.text] = idx if not headers or "CVE ID" not in headers: log.fatal("Unexpected response") raise repology.exceptions.RepologyUnexpectedResponse log.log(log_spam, headers) cve_table_rows = cve_table.tbody.find_all("tr") cve_dict = {} for row in cve_table_rows: affected_versions = row.find_all("span", {"class": "version version-outdated"}) if not affected_versions: continue cells = row.find_all("td") if not cells: continue cve_row = cells[headers["CVE ID"]] log.log(log_spam, "CVE: %s", cve_row) ver_row = cells[headers["Affected version(s)"]] log.log(log_spam, "Versions: %s", ver_row) if not is_affected(pkg_version, ver_row.text, log=log, log_spam=log_spam): continue cve_info = cve_row.text.strip().split("\n") log.debug("CVE info: %s", cve_info) cve_dict.setdefault(cols.PACKAGE, []).append(pkg_name) cve_dict.setdefault(cols.VERSION, []).append(pkg_version) cve_dict.setdefault("cve", []).append(cve_info[0]) df = pd.DataFrame.from_dict(cve_dict) df.replace(np.nan, "", regex=True, inplace=True) df.drop_duplicates(keep="first", inplace=True) return df ================================================ FILE: src/repology/exceptions.py ================================================ # SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Repology exceptions""" class RepologyError(Exception): """Base class for exceptions raised in the repology modules""" pass class RepologyNoMatchingPackages(RepologyError): """Raised when no matching repology packages found""" pass class RepologyUnexpectedResponse(RepologyError): """Raised when repology sends unexpected response""" pass ================================================ FILE: src/repology/projects_parser.py ================================================ # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """HTML parser helpers for Repology project search pages.""" import re from dataclasses import dataclass from bs4 import BeautifulSoup import repology.exceptions from common import columns as cols from common.log import LOG, LOG_SPAM @dataclass class ParsedProjectsPage: """Parsed data extracted from a Repology projects response.""" package_rows: list[dict[str, str]] next_query_project: str processed_ids: set[str] def parse_projects_search_html( # noqa: PLR0912, PLR0914, PLR0915 html, repository, processed_ids=None, pkg_stop=None ): """Parse a Repology package search response.""" processed_ids = set() if processed_ids is None else set(processed_ids) next_query_project = "" package_rows = [] soup = BeautifulSoup(html, "html.parser") tables = soup.find_all("table") if not tables: LOG.debug("Projects table missing: no matching packages") return ParsedProjectsPage(package_rows, next_query_project, processed_ids) projects_table = tables[0] if projects_table.thead is None or projects_table.tbody is None: LOG.fatal("Unexpected response, malformed projects table") raise repology.exceptions.RepologyUnexpectedResponse headers = {} for idx, header in enumerate(projects_table.thead.find_all("th")): headers[header.text] = idx if not headers: LOG.fatal("Unexpected response, missing headers") raise repology.exceptions.RepologyUnexpectedResponse LOG.log(LOG_SPAM, headers) rows = 0 stop_query = False for row in projects_table.tbody.find_all("tr"): cells = row.find_all("td") if not cells: LOG.log(LOG_SPAM, "No columns on row: %s", row) continue rows += 1 LOG.log(LOG_SPAM, "cols: %s", cells) pkg = cells[headers["Project"]] pkg_links = pkg.find_all("a") if not pkg_links: LOG.fatal("Unexpected response, missing project link") raise repology.exceptions.RepologyUnexpectedResponse pkg_name = pkg_links[0].string if not stop_query and pkg_stop and pkg_name == pkg_stop: stop_query = True LOG.debug("Stopping queries after parsing the current response") pkg_id = f"{repository}:{pkg_name}" if pkg_id in processed_ids: LOG.debug("Package '%s' in search resp already processed", pkg_name) continue LOG.debug("Adding package '%s' to processed_ids", pkg_name) processed_ids.add(pkg_id) newest = cells[headers["Newest"]] newest_releases = [] for nspan in newest.find_all("span", {"class": "version-newest"}): rel_version = re.sub(r"[^\x00-\x7f]+", "", nspan.text) newest_releases.append(rel_version) sel = cells[headers["Selected"]] statuses = re.findall(r'version-([^"]+)"', str(sel)) vspans = sel.find_all("span", {"class": "version"}) for idx, vspan in enumerate(vspans): ver = re.sub(r"[^\x00-\x7f]+", "", vspan.text) vulnerable = bool(vspan.find_all("span", {"class": "vulnerable"})) status = statuses[idx] package_rows.append( { cols.REPO: repository, cols.PACKAGE: pkg_name, cols.VERSION: ver, cols.STATUS: status, cols.POTENTIALLY_VULNERABLE: str(int(vulnerable)), cols.NEWEST_UPSTREAM_RELEASE: ";".join(newest_releases), } ) LOG.log(LOG_SPAM, "Added: %s:%s:%s", pkg_name, ver, status) if rows == 200 and not stop_query: next_query_project = pkg_name if rows > 200: LOG.warning( "Unexpected response: raising this warning to notify the " "possibility the repology API has changed and might no longer " "match what this client expects" ) return ParsedProjectsPage(package_rows, next_query_project, processed_ids) ================================================ FILE: src/repology/repology_cli.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Command-line interface to query repology.org for package information.""" import os import pathlib from argparse import SUPPRESS, ArgumentParser, ArgumentTypeError import repology.exceptions from common.cli_args import add_verbose_argument, add_version_argument from common.log import LOG, set_log_verbosity from repology.adapter import RepologyAdapter, RepologyQuery from repology.reporting import write_query_report ############################################################################### def _pkg_str(str_obj): if isinstance(str_obj, str) and len(str_obj) > 0: return str_obj raise ArgumentTypeError("Value must be a non-empty string") def getargs(args=None): """ Parse arguments: by default parses the sys.argv if `args` is not specified, otherwise, parses arguments from the `args` list of strings. This is simply a wrapper for function ArgumentParser.parse_args(), returning argument attributes in argparse.Namespace object. """ desc = "Command line client to query repology.org for package information." epil = ( f"Example: ./{os.path.basename(__file__)} --pkg_search 'firef' " " --repository 'nix_unstable'" ) parser = ArgumentParser(description=desc, epilog=epil, add_help=False) required = parser.add_argument_group( "Required arguments", "Following arguments are mutually exclusive:", ) exclusiveq = required.add_mutually_exclusive_group(required=True) requiredo = parser.add_argument_group("Required other arguments") filtergr = parser.add_argument_group( "Optional output filter arguments (regular expressions)" ) optional = parser.add_argument_group("Optional other arguments") helps = "Show this help message and exit" optional.add_argument("-h", "--help", action="help", default=SUPPRESS, help=helps) helps = "Package name exact match (see: https://repology.org/projects/)" exclusiveq.add_argument("--pkg_exact", help=helps, type=_pkg_str) helps = "Package name search term (see: https://repology.org/projects/)" exclusiveq.add_argument("--pkg_search", help=helps, type=_pkg_str) helps = "Read the package names and versions from the given cdx SBOM" exclusiveq.add_argument("--sbom_cdx", help=helps, type=pathlib.Path) helps = "Repository name exact match (see: https://repology.org/repositories)" requiredo.add_argument( "--repository", required=True, help=helps, type=str, default="" ) helps = "Filter reported results based on package name" filtergr.add_argument("-p", "--re_package", help=helps, type=str, default=None) helps = "Filter reported results based on version string" filtergr.add_argument("-V", "--re_version", help=helps, type=str, default=None) helps = "Filter reported results based on status string" filtergr.add_argument("-s", "--re_status", help=helps, type=str, default=None) helps = "Filter reported results based on vulnerability status" filtergr.add_argument("-c", "--re_vuln", help=helps, type=str, default=None) helps = "Summarize output result statistics" optional.add_argument("--stats", help=helps, action="store_true") add_verbose_argument(optional, root_parser=parser) helps = "Path to output report file (default: ./repology_report.csv)" optional.add_argument("-o", "--out", help=helps, default="repology_report.csv") add_version_argument(optional) if args: return parser.parse_args(args) return parser.parse_args() ################################################################################ def _query_from_args(args): return RepologyQuery( repository=args.repository, pkg_exact=args.pkg_exact, pkg_search=args.pkg_search, sbom_cdx=args.sbom_cdx, re_package=args.re_package, re_version=args.re_version, re_status=args.re_status, re_vuln=args.re_vuln, ) class Repology: """Compatibility wrapper that keeps CLI reporting separate from queries.""" def __init__(self, adapter=None): self.adapter = RepologyAdapter() if adapter is None else adapter self.df = None self.urlq = None self.df_sbom = None def query(self, args, stdout_report=True, file_report=True): """Query package information from repology.org.""" if not file_report: args.out = None self.df = self.adapter.query(_query_from_args(args)) self.urlq = self.adapter.urlq self.df_sbom = self.adapter.df_sbom if stdout_report or args.out is not None: write_query_report( self.df, args, query_url=self.urlq, df_sbom=self.df_sbom, console_report=stdout_report, ) return self.df.copy(deep=True) ################################################################################ def main(): """main entry point""" args = getargs() set_log_verbosity(args.verbose) repology_cli = Repology() try: repology_cli.query(args) except repology.exceptions.RepologyNoMatchingPackages: LOG.warning("No matching packages found") ################################################################################ if __name__ == "__main__": main() ################################################################################ ================================================ FILE: src/repology/repology_cve.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Command-line interface to query CVE info from repology.org.""" import os from argparse import ArgumentParser, ArgumentTypeError from common.cli_args import add_verbose_argument, add_version_argument from common.df import df_to_csv_file from common.log import set_log_verbosity from repology.adapter import RepologyAdapter from repology.reporting import report_cves from repology.session import REPOLOGY_REQUEST_TIMEOUT ############################################################################### def _pkg_str(str_obj): if isinstance(str_obj, str) and len(str_obj) > 0: return str_obj raise ArgumentTypeError("Value must be a non-empty string") def getargs(args=None): """Parse command line arguments.""" desc = ( "Query repology.org for CVEs that impact package PKG_NAME version PKG_VERSION." ) epil = f"Example: ./{os.path.basename(__file__)} openssl 3.1.0" parser = ArgumentParser(description=desc, epilog=epil) helps = "Target package name" parser.add_argument("PKG_NAME", help=helps, type=_pkg_str) helps = "Target package version" parser.add_argument("PKG_VERSION", help=helps, type=str) add_verbose_argument(parser, max_level=2) helps = "Path to output file (default: ./repology_cves.csv)" parser.add_argument( "-o", "--out", nargs="?", help=helps, default="repology_cves.csv" ) add_version_argument(parser) return parser.parse_args(args) ################################################################################ def query_cve( pkg_name, pkg_version, session=None, request_timeout=REPOLOGY_REQUEST_TIMEOUT ): """ Return vulnerabilities known to repology that impact the given package name and version. Results are returned in pandas dataframe. """ adapter = RepologyAdapter(session=session, request_timeout=request_timeout) return adapter.query_cves(pkg_name, pkg_version) ################################################################################ def main(): """main entry point.""" args = getargs() set_log_verbosity(args.verbose) df = query_cve(args.PKG_NAME, args.PKG_VERSION) if not report_cves(df): return df_to_csv_file(df, args.out) ################################################################################ if __name__ == "__main__": main() ################################################################################ ================================================ FILE: src/repology/reporting.py ================================================ # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Console and CSV reporting helpers for Repology commands.""" from tabulate import tabulate from common import columns as cols from common.df import df_to_csv_file from common.log import LOG def _stats_sbom(df, *, log=LOG): # noqa: PLR0914 df = df.copy() df = df.drop_duplicates(keep="first", subset=[cols.PACKAGE, cols.VERSION]) repo_rows_n = df.shape[0] repo_skipped_cols = ["NO_VERSION", "IGNORED", "NOT_FOUND"] df_skipped = df[df.status.isin(repo_skipped_cols)] repo_skipped_n = df_skipped.shape[0] repo_skipped_pct = f"{repo_skipped_n / repo_rows_n:.0%}" df_ignored = df[df.status.isin(["IGNORED"])] ignored_n = df_ignored.shape[0] df_no_version = df[df.status.isin(["NO_VERSION"])] no_version_n = df_no_version.shape[0] df_not_found = df[df.status.isin(["NOT_FOUND"])] not_found_n = df_not_found.shape[0] df_repology = df[~df.status.isin(repo_skipped_cols)] repology_rows_n = df_repology.shape[0] sbom_in_repo = f"{repology_rows_n / repo_rows_n:.0%}" sbom_rows = f"Unique packages: {repo_rows_n} ({1:.0%})" sbom_skipped = ( f"sbom packages not in repology: {repo_skipped_n} ({repo_skipped_pct})" ) ignored = f"IGNORED (sbom component is not a package in repology): {ignored_n}" no_version = ( f"NO_VERSION (sbom component is missing the version number): {no_version_n}" ) not_found = f"NOT_FOUND (sbom component was not found in repology): {not_found_n}" sbom_pkgs_in_repo = f"sbom packages in repology: {repology_rows_n} ({sbom_in_repo})" log.info( "\n\tRepology SBOM package statistics:\n" "\t %s\n" "\t ==> %s\n" "\t ==> %s\n" "\t - %s\n" "\t - %s\n" "\t - %s\n", sbom_rows, sbom_pkgs_in_repo, sbom_skipped, ignored, no_version, not_found, ) def _stats_repology(df, *, log=LOG): # noqa: PLR0914 df = df.copy(deep=True) base_cols = ["newest", "devel", "unique", "outdated"] df = df[df.status.isin(base_cols)] df = df.drop_duplicates(keep="first", subset=[cols.PACKAGE, cols.VERSION]) base_rows_n = df.shape[0] if base_rows_n <= 0: log.debug("No base packages, skipping stats") return df_newest = df[df.status.isin(["newest"])] newest_rows_n = df_newest.shape[0] newest_pct = f"{newest_rows_n / base_rows_n:.0%}" df_outdated = df[df.status.isin(["outdated"])] outdated_rows_n = df_outdated.shape[0] outdated_pct = f"{outdated_rows_n / base_rows_n:.0%}" df_dev_uniq = df[df.status.isin(["devel", "unique"])] dev_uniq_rows_n = df_dev_uniq.shape[0] dev_uniq_pct = f"{dev_uniq_rows_n / base_rows_n:.0%}" df_vuln = df[df.potentially_vulnerable.isin(["1"])] vuln_rows_n = df_vuln.shape[0] vuln_pct = f"{vuln_rows_n / base_rows_n:.0%}" base_rows = ( f"Unique compared packages: {base_rows_n} ({1:.0%})\t(status in: {base_cols})" ) new_rows = f"newest: {newest_rows_n} ({newest_pct})" outdated_rows = f"outdated: {outdated_rows_n} ({outdated_pct})" dev_uniq_rows = f"devel or unique: {dev_uniq_rows_n} ({dev_uniq_pct})" vuln_rows = f"potentially vulnerable: {vuln_rows_n} ({vuln_pct})" about = "https://repology.org/docs/about" log.info( "\n\tRepology package statistics:\n" "\t (see the status descriptions in: %s)\n" "\t %s\n" "\t ==> %s\n" "\t ==> %s\n" "\t ==> %s\n" "\t ==> %s\n", about, base_rows, new_rows, outdated_rows, dev_uniq_rows, vuln_rows, ) def report_cves(df, *, log=LOG): """Render a CVE table to the console when rows exist.""" if df is None or df.empty: log.warning("No matching vulnerabilities found") return False table = tabulate( df, headers="keys", tablefmt="orgtbl", numalign="center", showindex=False, ) log.info("Repology affected CVE(s)\n\n%s\n\n", table) return True def write_query_report( # noqa: PLR0913 df, args, *, query_url, df_sbom, console_report=True, log=LOG ): """Generate result report to console and to csv file.""" report_df = df.copy(deep=True) console_df = report_df.copy(deep=True) col = cols.NEWEST_UPSTREAM_RELEASE console_df[col] = console_df[col].str.slice(0, 26) console_df = console_df[~console_df.status.isin(["IGNORED", "NO_VERSION"])] console_df = console_df.drop_duplicates(keep="first") if console_report: table = tabulate( console_df, headers="keys", tablefmt="orgtbl", numalign="center", showindex=False, ) log.info( "Repology package info, packages:%s\n\n%s\n\nFor more details, see: %s\n", console_df.shape[0], table, query_url, ) if args.stats: _stats_repology(report_df, log=log) if df_sbom is not None: _stats_sbom(report_df, log=log) if args.out is not None: df_to_csv_file(report_df, args.out) ================================================ FILE: src/repology/sbom.py ================================================ # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """CycloneDX SBOM helpers used by Repology queries.""" import json import re import pandas as pd from common import columns as cols from common.log import LOG from common.package_names import nix_to_repology_pkg_name from common.versioning import parse_version IGNORE_SBOM_PACKAGE_PATTERNS = ( r".*\.gz", r".*\.patch", r".*\.xz", r".*\.bz2", r".*\.zip", r".*\.gem", r".*\.tgz", r".*\.h", r".*\.c", r".*\.diff", r".*\?.*", r".*\&.*", ) IGNORE_SBOM_REGEX = re.compile(f"(?:{'|'.join(IGNORE_SBOM_PACKAGE_PATTERNS)})") def parse_cdx_sbom(path): """Parse CycloneDX SBOM components into a normalized dataframe.""" LOG.debug("Parsing cdx sbom: %s", path) with open(path, encoding="utf-8") as inf: json_dict = json.loads(inf.read()) metadata = json_dict.get("metadata", {}) components = list(json_dict.get("components", [])) if "component" in metadata: components.append(metadata["component"]) components_dict = {} for component in components: name = nix_to_repology_pkg_name(component["name"]) components_dict.setdefault(cols.NAME, []).append(name) components_dict.setdefault(cols.VERSION, []).append(component["version"]) if not components_dict: return pd.DataFrame({cols.NAME: [], cols.VERSION: []}) df_components = pd.DataFrame(components_dict) df_components.fillna("", inplace=True) df_components = df_components.astype(str) df_components.sort_values(cols.NAME, inplace=True) df_components.reset_index(drop=True, inplace=True) return df_components def is_ignored_sbom_package(package_name): """Return true if a SBOM component should be ignored for Repology lookup.""" return re.match(IGNORE_SBOM_REGEX, package_name) is not None def make_sbom_status_row(repository, package, version, status): """Build a synthetic Repology result row for a SBOM component.""" return { cols.REPO: repository, cols.PACKAGE: package, cols.VERSION: version, cols.STATUS: status, cols.POTENTIALLY_VULNERABLE: "", cols.NEWEST_UPSTREAM_RELEASE: "", } def merge_sbom_fields(df_sbom, df_repo): """Join SBOM package/version fields into Repology query results.""" df = pd.merge( left=df_sbom, right=df_repo, how="left", left_on=[cols.NAME], right_on=[cols.PACKAGE], suffixes=("_sbom", ""), ) df[cols.VERSION_SBOM] = df.pop(cols.VERSION_SBOM) df.drop(cols.NAME, axis=1, inplace=True) return df def sbom_row_classify(row): """Classify whether the SBOM version appears outdated.""" if row.status == "outdated": return "sbom_pkg_needs_update" if row.status in ["devel", "unique", "newest"]: ver_sbom = parse_version(row.version_sbom) ver_repo = parse_version(row.version) if not ver_sbom or not ver_repo or ver_sbom < ver_repo: return "sbom_pkg_needs_update" return "" ================================================ FILE: src/repology/session.py ================================================ # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Shared HTTP helpers for repology.org clients.""" from common.http import create_cached_limited_session REPOLOGY_CACHE_SECONDS = 6 * 60 * 60 REPOLOGY_REQUEST_TIMEOUT = 60 REPOLOGY_USER_AGENT = "repology_cli/0 (https://github.com/tiiuae/sbomnix/)" def create_repology_session(): """Return a cached, rate-limited, retrying HTTP session.""" return create_cached_limited_session( per_second=1, expire_after=REPOLOGY_CACHE_SECONDS, user_agent=REPOLOGY_USER_AGENT, ) DEFAULT_REPOLOGY_SESSION = create_repology_session() ================================================ FILE: src/sbomnix/__init__.py ================================================ # SPDX-FileCopyrightText: 2022 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 ================================================ FILE: src/sbomnix/builder.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2022-2023 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """SBOM builder orchestration.""" import logging import uuid from dataclasses import dataclass from typing import Any import numpy as np import pandas as pd from common import columns as cols from common.df import df_to_csv_file from common.errors import ( MissingNixDerivationMetadataError, MissingNixDeriverError, SbomnixError, ) from common.log import LOG, is_debug_enabled from sbomnix.closure import ( DEPENDENCY_COLUMNS, dependencies_to_depth, dependency_paths, derivation_dependencies_df, ) from sbomnix.components import ( recursive_derivations_to_dataframe, runtime_derivations_to_dataframe, ) from sbomnix.dependency_index import build_dependency_index from sbomnix.derivation import load_recursive from sbomnix.derivers import find_deriver, is_loadable_deriver_path, require_deriver from sbomnix.exporters import build_cdx_document, build_spdx_document, write_json from sbomnix.meta import Meta, NixpkgsMetaSource from sbomnix.runtime import ( load_runtime_closure, ) from sbomnix.vuln_enrichment import enrich_cdx_with_vulnerabilities ############################################################################### # Namespace UUID (a UUIDv4) for stable UUIDv5 identifiers. # See RFC9562, *6.6. Namespace ID Usage and Allocation*. SBOMNIX_UUID_NAMESPACE = uuid.UUID("136af32e-0d0e-48bc-912c-31b26af294b9") @dataclass(frozen=True) class StructuredClosure: """Structured dependency data used to assemble an SBOM.""" df_deps: pd.DataFrame recursive_buildtime_derivations: dict[str, Any] | None = None runtime_output_paths_by_load_path: dict[str, set[str]] | None = None def _runtime_output_paths_by_load_path(output_paths_by_drv): output_paths_by_load_path = {} for drv_path, output_paths in output_paths_by_drv.items(): if is_loadable_deriver_path(drv_path): output_paths_by_load_path.setdefault(drv_path, set()).update(output_paths) continue for output_path in output_paths: output_paths_by_load_path.setdefault(output_path, set()).add(output_path) return output_paths_by_load_path def _mapped_runtime_output_paths(output_paths_by_load_path): if not output_paths_by_load_path: return set() return set().union(*output_paths_by_load_path.values()) class SbomBuilder: """Generate SBOMs in various formats.""" def __init__( # noqa: PLR0913, PLR0917 self, nix_path, buildtime=False, depth=None, flakeref=None, original_ref=None, meta_nixpkgs=None, impure=False, include_meta=True, include_vulns=False, include_cpe=True, ): # self.uid specifies the attribute that identifies SBOM components. # See the column names in # self.df_sbomdb (sbom.csv) for a list of all components' attributes. self.uid = cols.STORE_PATH self.nix_path = nix_path self.buildtime = buildtime self.target_deriver = self._resolve_target_deriver(nix_path) self.target_component_ref = None self._recursive_buildtime_derivations = None self._runtime_output_paths_by_load_path = None self.df_deps = None self.depth = depth self._structured_closure = self._load_structured_closure(nix_path) self._init_dependencies(self._structured_closure) self.df_sbomdb = None self.df_sbomdb_outputs_exploded = None self.dependency_index = None self.flakeref = flakeref self.original_ref = original_ref self.meta_nixpkgs = meta_nixpkgs self.impure = impure self.meta = None # "disabled" records explicit opt-out; "none" means auto-selection # found no source. self.nixpkgs_meta_source = NixpkgsMetaSource(method="disabled") self.include_cpe = include_cpe self._init_components(include_meta) target_component_ref = self._resolve_target_component_ref() self.target_component_ref = target_component_ref self.include_vulns = include_vulns # Use a random UUID as the serial number when any data source that is # not strictly coming from the resolved target component is used. if include_vulns or include_meta or include_cpe: LOG.verbose("Using random UUIDv4") self.uuid = uuid.uuid4() else: LOG.verbose("Using stable UUIDv5 for '%s'", target_component_ref) # This uses a UUIDv5, resulting in a stable UUID across runs for # the same SBOM subject. self.uuid = uuid.uuid5(SBOMNIX_UUID_NAMESPACE, target_component_ref) self.sbom_type = "runtime_and_buildtime" if not self.buildtime: self.sbom_type = "runtime_only" def _resolve_target_deriver(self, nix_path): if self.buildtime: return require_deriver(nix_path) try: return find_deriver(nix_path) except SbomnixError: raise except RuntimeError: LOG.debug( "Runtime target has no loadable deriver: %s", nix_path, exc_info=True, ) return None def _load_structured_closure(self, nix_path): """Load structured dependency data for the configured SBOM type.""" if self.buildtime: if self.target_deriver is None: raise MissingNixDeriverError(nix_path) return self._load_recursive_buildtime_closure() return self._load_runtime_path_info_closure(nix_path) def _init_dependencies(self, closure): """Initialize dependency attributes from loaded structured data.""" self.df_deps = closure.df_deps self._recursive_buildtime_derivations = closure.recursive_buildtime_derivations self._runtime_output_paths_by_load_path = ( closure.runtime_output_paths_by_load_path ) def _load_recursive_buildtime_closure(self): """Load build-time dependencies from recursive derivation JSON.""" if self.target_deriver is None: raise MissingNixDeriverError(self.nix_path) derivations, drv_infos = load_recursive(self.target_deriver) df_deps = derivation_dependencies_df(drv_infos) if self.depth: df_deps = self._filter_dependencies_to_depth( df_deps, self.target_deriver, self.depth, ) return StructuredClosure( df_deps=df_deps, recursive_buildtime_derivations=derivations, ) def _load_runtime_path_info_closure(self, nix_path): """Load runtime dependencies from structured path-info JSON.""" runtime_closure = load_runtime_closure(nix_path) output_paths_by_load_path = _runtime_output_paths_by_load_path( runtime_closure.output_paths_by_drv ) mapped_paths = _mapped_runtime_output_paths(output_paths_by_load_path) if nix_path not in mapped_paths: load_path = self.target_deriver or nix_path output_paths_by_load_path.setdefault(load_path, set()).add(nix_path) mapped_paths.add(nix_path) graph_only_paths = dependency_paths(runtime_closure.df_deps) - mapped_paths if graph_only_paths: LOG.debug( "Runtime path-info references graph-only paths: %s", sorted(graph_only_paths), ) df_deps = runtime_closure.df_deps if self.depth: df_deps = self._filter_dependencies_to_depth( df_deps, nix_path, self.depth, ) return StructuredClosure( df_deps=df_deps, runtime_output_paths_by_load_path=output_paths_by_load_path, ) def _init_runtime_components(self, paths): if self._runtime_output_paths_by_load_path is None: raise AssertionError("Runtime output metadata was not initialized") df_components = runtime_derivations_to_dataframe( paths, self._runtime_output_paths_by_load_path, include_cpe=self.include_cpe, ) if df_components.empty: raise MissingNixDerivationMetadataError(self.nix_path) return df_components def _filter_dependencies_to_depth( self, df_deps, start_path, depth, columns=DEPENDENCY_COLUMNS, ): """Return dependency rows reachable from ``start_path`` up to ``depth``.""" LOG.debug("Reading dependencies until depth=%s", depth) return dependencies_to_depth(df_deps, start_path, depth, columns=columns) def _init_components(self, include_meta): """Initialize the SBOM component dataframe.""" paths = self._sbom_component_paths() # Populate store based on the dependencies if self._recursive_buildtime_derivations is not None: self.df_sbomdb = recursive_derivations_to_dataframe( paths, self._recursive_buildtime_derivations, include_cpe=self.include_cpe, ) elif self._runtime_output_paths_by_load_path is not None: self.df_sbomdb = self._init_runtime_components(paths) else: # _load_structured_closure always selects exactly one metadata source. raise AssertionError("Structured dependency metadata was not initialized") # Join with meta information if include_meta: self._join_meta() # Clean, drop duplicates, sort self.df_sbomdb.replace(np.nan, "", regex=True, inplace=True) self.df_sbomdb.drop_duplicates(subset=[self.uid], keep="first", inplace=True) self.df_sbomdb.sort_values(by=[cols.NAME, self.uid], inplace=True) self.df_sbomdb_outputs_exploded = self.df_sbomdb.explode(cols.OUTPUTS) self._init_dependency_index() def _sbom_component_paths(self): if self.df_deps is None or self.df_deps.empty: if self._runtime_output_paths_by_load_path is not None: return set().union(*self._runtime_output_paths_by_load_path.values()) # No dependencies, so the only component in the sbom # will be the target itself. if self.target_deriver: return {self.target_deriver} return {self.nix_path} return dependency_paths(self.df_deps) def _resolve_target_component_ref(self) -> str: """Return the component reference that represents the SBOM subject.""" if self.df_sbomdb is None: raise AssertionError("SBOM component metadata was not initialized") if self.target_deriver: df_target = self.df_sbomdb[ self.df_sbomdb[cols.STORE_PATH] == self.target_deriver ] if not df_target.empty: return self.target_deriver for component in self.df_sbomdb.to_dict("records"): store_path = component.get(cols.STORE_PATH) if not isinstance(store_path, str): continue outputs = component.get(cols.OUTPUTS, []) if isinstance(outputs, str): outputs = [outputs] elif not isinstance(outputs, (list, tuple, set)): continue if self.nix_path in outputs: return store_path if self.target_deriver: return self.target_deriver raise MissingNixDerivationMetadataError(self.nix_path) def _init_dependency_index(self): """Build indexed dependency lookups used during export.""" self.dependency_index = build_dependency_index( self.df_deps, self.df_sbomdb, self.df_sbomdb_outputs_exploded, uid=self.uid, ) def _join_meta(self): """Join component rows with nixpkgs metadata.""" if self.df_sbomdb is None: raise AssertionError("SBOM component metadata was not initialized") self.meta = Meta() df_meta, source = self.meta.get_nixpkgs_meta_with_source( target_path=self.nix_path, flakeref=self.flakeref, original_ref=self.original_ref, explicit_nixpkgs=self.meta_nixpkgs, impure=self.impure, ) self.nixpkgs_meta_source = source if df_meta is None or df_meta.empty: if source.message: LOG.info("%s", source.message) if source.path: LOG.warning( "Failed reading nix meta information: " "SBOM will include only minimum set of attributes" ) else: LOG.info( "Skipping nix meta information: " "SBOM will include only minimum set of attributes" ) return if is_debug_enabled(): df_to_csv_file(df_meta, "meta.csv") # Join based on package name including the version number self.df_sbomdb = self.df_sbomdb.merge( df_meta, how="left", left_on=[cols.NAME], right_on=[cols.NAME], suffixes=("", "_meta"), ) def lookup_dependencies(self, drv, uid=cols.STORE_PATH): """Return indexed dependency values for one SBOM component.""" dependency_index = getattr(self, "dependency_index", None) if dependency_index is None: return None return dependency_index.lookup(drv, uid=uid) def to_cdx_data(self): """Return the SBOM as a CycloneDX document.""" return build_cdx_document(self) def enrich_cdx_with_vulnerabilities(self, cdx): """Add vulnerability scan results to an existing CycloneDX document.""" return enrich_cdx_with_vulnerabilities(self, cdx) def to_spdx_data(self): """Return the SBOM as an SPDX document.""" return build_spdx_document(self) def write_json(self, pathname, data, printinfo=False): """Write a JSON document to a file.""" write_json(pathname, data, printinfo=printinfo) def to_cdx(self, cdx_path, printinfo=True): """Export SBOM components to a CycloneDX JSON file.""" cdx = self.to_cdx_data() self.write_json(cdx_path, cdx, printinfo) def to_spdx(self, spdx_path, printinfo=True): """Export SBOM components to an SPDX JSON file.""" spdx = self.to_spdx_data() self.write_json(spdx_path, spdx, printinfo) def to_csv(self, csv_path, loglevel=logging.INFO): """Export SBOM components to a CSV file.""" df_to_csv_file(self.df_sbomdb, csv_path, loglevel) ================================================ FILE: src/sbomnix/cdx.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2022-2023 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """CycloneDX utils""" import re from common import columns as cols from common.log import LOG, LOG_SPAM from common.spdx import canonicalize_spdx_license_id from vulnxscan.utils import _vuln_source, _vuln_url def _drv_to_cdx_licenses_entry(drv, column_name, cdx_license_type): """Parse license entries of type cdx_license_type from column_name""" licenses = [] if column_name not in drv._asdict(): # Return empty list if column name is not in drv return licenses license_str = getattr(drv, column_name) if not license_str: # Return empty list if license string is empty return licenses # Parse the ";" separated licenses to cdx license format license_strings = license_str.split(";") for license_string in license_strings: license_value = license_string # Give up generating the 'licenses' entry if license id should be # spdx but it's not: if "spdxid" in column_name: canonical = canonicalize_spdx_license_id(license_value) if not canonical: LOG.debug("Invalid spdxid license '%s':'%s'", drv.name, license_string) return [] license_value = canonical license_dict = {"license": {cdx_license_type: license_value}} licenses.append(license_dict) return licenses def _cdx_component_add_licenses(component, drv): """Add licenses array to cdx component (if any)""" licenses = [] # First, try reading the license in spdxid-format licenses = _drv_to_cdx_licenses_entry(drv, "meta_license_spdxid", "id") # If it fails, try reading the license short name if not licenses: licenses = _drv_to_cdx_licenses_entry(drv, "meta_license_short", "name") # Give up if package does not have license information associated if not licenses: LOG.log(LOG_SPAM, "No license info found for '%s'", drv.name) return # Otherwise, add the licenses entry component["licenses"] = licenses def _cdx_component_add_patches(component, drv): """Add security patch information to cdx component (if any)""" if drv.patches: security_patches = [] for p in drv.patches.split(" "): ids = re.findall(r"CVE-\d{4}-\d+", p, re.IGNORECASE) if ids: resolves = [] for i in ids: resolves.append( { "type": "security", "id": i.upper(), "references": [f"file://{p}"], } ) security_patches.append( { "type": "unofficial", "resolves": resolves, } ) if security_patches: pedigree = {} pedigree["patches"] = security_patches component["pedigree"] = pedigree def _drv_to_cdx_component(drv, uid=cols.STORE_PATH): """Convert one SBOM component row to a CycloneDX component.""" component = {} # Set the cdx component type based on the following heuristic: # - Set the default component type to 'library' # - Set the component type to 'file' if the drv version string is missing # and out-path matches the below pattern component["type"] = "library" if not drv.version: if drv.out and re.search(r"(\.tar\.|\?|\.[a-z]+$)", drv.out): component["type"] = "file" component["bom-ref"] = getattr(drv, uid) component["name"] = drv.pname component["version"] = drv.version if drv.purl: component["purl"] = drv.purl if drv.cpe: component["cpe"] = drv.cpe if "meta_description" in drv._asdict() and drv.meta_description: component["description"] = drv.meta_description _cdx_component_add_licenses(component, drv) _cdx_component_add_patches(component, drv) properties = [] for output_path in drv.outputs: prop = {} prop["name"] = "nix:output_path" prop["value"] = output_path properties.append(prop) if drv.store_path: prop = {} prop["name"] = "nix:drv_path" prop["value"] = drv.store_path properties.append(prop) # To externalReferences? if drv.urls: prop = {} prop["name"] = "nix:fetch_url" prop["value"] = drv.urls properties.append(prop) if "meta_homepage" in drv._asdict() and drv.meta_homepage: prop = {} prop["name"] = "homepage" prop["value"] = drv.meta_homepage properties.append(prop) if "meta_position" in drv._asdict() and drv.meta_position: prop = {} prop["name"] = "nix:position" prop["value"] = drv.meta_position properties.append(prop) if properties: component["properties"] = properties return component def _drv_to_cdx_dependency(drv, deps_list, uid=cols.STORE_PATH): """Return CycloneDX dependency structure for one component row.""" dependency = {} dependency["ref"] = getattr(drv, uid) if deps_list: dependency["dependsOn"] = deps_list return dependency def _vuln_to_cdx_vuln(vuln): """Return cdx vulnerability entry from vulnix row""" vulnerability = {} vulnerability["bom-ref"] = vuln.store_path vulnerability["id"] = vuln.vuln_id source = {} source["url"] = _vuln_url(vuln) source["name"] = _vuln_source(vuln) vulnerability["source"] = source vulnerability["ratings"] = [] # If the vulnerability is still being assessed, it will be missing a valid number if vuln.severity != "": rating = {} rating["source"] = source rating["score"] = vuln.severity vulnerability["ratings"].append(rating) vulnerability["tools"] = [] for scanner in vuln.scanner: tool = {} tool["name"] = scanner vulnerability["tools"].append(tool) return vulnerability ================================================ FILE: src/sbomnix/cli_utils.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Shared CLI orchestration helpers.""" import logging import pathlib import subprocess from dataclasses import dataclass from tempfile import NamedTemporaryFile from common.errors import InvalidNixArtifactError, MissingNixOutPathError from common.flakeref import ( NIXOS_CONFIGURATION_TOPLEVEL_SUFFIX, parse_nixos_configuration_ref, quote_nix_attr_segment, try_resolve_flakeref, ) from common.log import LOG from common.proc import exec_cmd, exit_unless_nix_artifact, nix_cmd from sbomnix.builder import SbomBuilder @dataclass(frozen=True) class ResolvedNixTarget: """Resolved nix CLI target.""" path: str flakeref: str | None = None original_ref: str | None = None @dataclass(frozen=True) class GeneratedSbom: """Paths of generated temporary SBOM artifacts.""" cdx_path: pathlib.Path csv_path: pathlib.Path | None = None def cleanup(self): """Remove generated artifacts if they exist.""" self.cdx_path.unlink(missing_ok=True) if self.csv_path is not None: self.csv_path.unlink(missing_ok=True) def resolve_nix_target(nixref, buildtime=False, impure=False): """Resolve a CLI target to a nix path, preserving flakeref context.""" runtime = not buildtime resolved_ref = _normalize_nixos_configuration_ref(nixref) target_path = try_resolve_flakeref( resolved_ref, force_realise=runtime, impure=impure, derivation=buildtime, ) if target_path: return ResolvedNixTarget( path=target_path, flakeref=resolved_ref, original_ref=nixref, ) target_path = pathlib.Path(nixref).resolve().as_posix() if runtime and target_path.endswith(".drv"): target_path = _realise_derivation_output(target_path) else: exit_unless_nix_artifact(nixref, force_realise=runtime) return ResolvedNixTarget(path=target_path, original_ref=nixref) def _realise_derivation_output(path): try: ret = exec_cmd( nix_cmd( "build", "--no-link", "--print-out-paths", f"{path}^*", ) ) except subprocess.CalledProcessError: raise InvalidNixArtifactError(path) from None out_path = next( (line.strip() for line in ret.stdout.splitlines() if line.strip()), "" ) if not out_path: raise MissingNixOutPathError(path) LOG.debug("runtime derivation target '%s' maps to output '%s'", path, out_path) return out_path def _normalize_nixos_configuration_ref(nixref): parsed = parse_nixos_configuration_ref(nixref) if not parsed: return nixref flake, name = parsed attr = quote_nix_attr_segment(name) return f"{flake}#nixosConfigurations.{attr}{NIXOS_CONFIGURATION_TOPLEVEL_SUFFIX}" def generate_temp_sbom( target_path, buildtime=False, prefix="sbomnix_", cdx_suffix=".cdx.json", include_csv=False, ): """Generate temporary SBOM artifact files for downstream CLI workflows.""" LOG.info("Generating SBOM for target '%s'", target_path) sbom = SbomBuilder(target_path, buildtime, include_meta=False) cdx_path = None csv_path = None try: with NamedTemporaryFile(delete=False, prefix=prefix, suffix=cdx_suffix) as fcdx: cdx_path = pathlib.Path(fcdx.name) if not include_csv: sbom.to_cdx(cdx_path, printinfo=False) return GeneratedSbom(cdx_path=cdx_path) with NamedTemporaryFile(delete=False, prefix=prefix, suffix=".csv") as fcsv: csv_path = pathlib.Path(fcsv.name) sbom.to_cdx(cdx_path, printinfo=False) sbom.to_csv(csv_path, loglevel=logging.DEBUG) return GeneratedSbom(cdx_path=cdx_path, csv_path=csv_path) except Exception: if cdx_path is not None: cdx_path.unlink(missing_ok=True) if csv_path is not None: csv_path.unlink(missing_ok=True) raise ================================================ FILE: src/sbomnix/closure.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Dependency closure helpers shared by SBOM generation paths.""" from dataclasses import dataclass from typing import Any, Callable, Iterable import pandas as pd from common import columns as cols from common.nix_utils import ( nix_derivation_input_drv_paths, nix_derivation_input_src_paths, ) DEPENDENCY_COLUMNS = [ cols.SRC_PATH, "src_pname", cols.TARGET_PATH, "target_pname", ] @dataclass(frozen=True) class DependencyWalkRow: """One dependency row reached during graph traversal.""" row: dict[str, Any] depth: int def dependency_paths(df_deps): """Return all source and target paths referenced by dependency rows.""" if df_deps is None or df_deps.empty: return set() src_paths = df_deps[cols.SRC_PATH].unique().tolist() target_paths = df_deps[cols.TARGET_PATH].unique().tolist() return set(src_paths + target_paths) def dependencies_to_depth(df_deps, start_path, depth, columns=DEPENDENCY_COLUMNS): """Return dependency rows reachable from ``start_path`` up to ``depth``.""" rows = [walked.row for walked in walk_dependency_rows(df_deps, start_path, depth)] if not rows: return pd.DataFrame(columns=pd.Index(columns)) return pd.DataFrame.from_records(rows, columns=pd.Index(columns)) def walk_dependency_rows( df_deps, start_paths: str | Iterable[str], depth, *, inverse=False, stop_at: Callable[[dict[str, Any]], bool] | None = None, ): """Return dependency rows reached by a depth-limited graph walk.""" if df_deps is None or df_deps.empty: return [] if isinstance(start_paths, str): normalized_start_paths = [start_paths] else: normalized_start_paths = list(start_paths) match_column = cols.SRC_PATH if inverse else cols.TARGET_PATH next_column = cols.TARGET_PATH if inverse else cols.SRC_PATH rows_by_path = _dependency_rows_by_path(df_deps, match_column) rows = [] visited_edges = set() def walk(current_path, curr_depth=0): curr_depth += 1 if curr_depth > depth: return for row in rows_by_path.get(current_path, ()): edge_key = (row[cols.TARGET_PATH], row[cols.SRC_PATH]) if edge_key in visited_edges: continue visited_edges.add(edge_key) rows.append(DependencyWalkRow(row=row, depth=curr_depth)) if stop_at is not None and stop_at(row): continue walk(row[next_column], curr_depth) for start_path in dict.fromkeys(normalized_start_paths): walk(start_path) return rows def _dependency_rows_by_path(df_deps, match_column): """Return dependency row records indexed by the path column used for walking.""" rows_by_path = {} for row in df_deps.to_dict("records"): rows_by_path.setdefault(row[match_column], []).append(row) return rows_by_path def derivation_dependencies_df(drv_infos): """Return build-time dependency edges from recursive derivation JSON.""" rows = [] for target_path, drv_info in drv_infos.items(): for src_path in _iter_input_paths(drv_info, target_path): rows.append( { cols.SRC_PATH: src_path, "src_pname": store_path_label(src_path), cols.TARGET_PATH: target_path, "target_pname": store_path_label(target_path), } ) return dependency_rows_to_dataframe(rows) def dependency_rows_to_dataframe(rows, columns=DEPENDENCY_COLUMNS): """Return sorted dependency dataframe from row dictionaries.""" df_deps = pd.DataFrame.from_records(rows, columns=pd.Index(columns)) if not df_deps.empty: df_deps.drop_duplicates(inplace=True) df_deps.sort_values( by=["src_pname", cols.SRC_PATH, "target_pname", cols.TARGET_PATH], inplace=True, ) return df_deps def store_path_label(path): """Return the Nix store graph-style label for a store path.""" basename = str(path).rstrip("/").rsplit("/", maxsplit=1)[-1] _hash, separator, name = basename.partition("-") return name if separator else basename def _iter_input_paths(drv_info, target_path=None): """Yield validated input derivation and source paths from derivation JSON.""" yield from nix_derivation_input_drv_paths(target_path, drv_info) yield from nix_derivation_input_src_paths(target_path, drv_info) ================================================ FILE: src/sbomnix/components.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """SBOM component dataframe helpers.""" import pandas as pd from common.log import LOG from sbomnix.cpe import CPE from sbomnix.derivation import load_many def recursive_derivations_to_dataframe(paths, derivations, include_cpe=True): """Return component rows from an already-loaded derivation closure.""" drvs = [] for path in sorted(paths): drv = derivations.get(path) if not drv: LOG.debug("Recursive buildtime closure missing path: %s", path) continue drvs.append(drv) return derivations_to_dataframe(drvs, include_cpe=include_cpe) def runtime_derivations_to_dataframe( paths, output_paths_by_load_path, include_cpe=True ): """Return component rows from runtime output-to-load-path mappings.""" filtered_outputs_by_load_path = filter_runtime_outputs_by_load_path( paths, output_paths_by_load_path, ) derivations = load_many( sorted(filtered_outputs_by_load_path), output_paths_by_drv=filtered_outputs_by_load_path, ignore_missing=True, ).values() return derivations_to_dataframe(derivations, include_cpe=include_cpe) def derivations_to_dataframe(derivations, include_cpe=True): """Return component rows for loaded derivations.""" cpe_generator = CPE(include_cpe=include_cpe) drv_dicts = [] for drv in derivations: drv.set_cpe(cpe_generator) drv_dicts.append(drv.to_dict()) return pd.DataFrame.from_records(drv_dicts) def filter_runtime_outputs_by_load_path(paths, output_paths_by_load_path): """Filter runtime output mappings to the selected component paths.""" selected_paths = set(paths) filtered_outputs_by_load_path = {} for load_path, output_paths in output_paths_by_load_path.items(): filtered_output_paths = set(output_paths) & selected_paths if filtered_output_paths: filtered_outputs_by_load_path[load_path] = filtered_output_paths return filtered_outputs_by_load_path ================================================ FILE: src/sbomnix/cpe.py ================================================ # SPDX-FileCopyrightText: 2022-2023 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Generate CPE (Common Platform Enumeration) identifiers""" import string from common.df import df_from_csv_file, df_log from common.errors import InvalidCpeDictionaryError from common.log import LOG, LOG_SPAM from sbomnix.dfcache import LockedDfCache ############################################################################### _CPE_CSV_URL = "https://github.com/tiiuae/cpedict/raw/main/data/cpes.csv" # Update local cached version of _CPE_CSV_URL once a day or when local cache # is cleaned: _CPE_CSV_CACHE_TTL = 60 * 60 * 24 ############################################################################### class CPE: """Generate Common Platform Enumeration identifiers""" def __init__( self, include_cpe=True, ): self._product_vendor = {} self._ambiguous_products = set() # Let's initialize the fields anyway. if not include_cpe: self.df_cpedict = None return self.cache = LockedDfCache() self.df_cpedict = self.cache.get(_CPE_CSV_URL) if self.df_cpedict is not None and not self.df_cpedict.empty: LOG.debug("read CPE dictionary from cache") else: LOG.debug("CPE cache miss, downloading: %s", _CPE_CSV_URL) self.df_cpedict = df_from_csv_file(_CPE_CSV_URL, exit_on_error=False) if self.df_cpedict is None or self.df_cpedict.empty: LOG.warning( "Failed downloading cpedict: CPE information might not be accurate" ) else: self.cache.set(_CPE_CSV_URL, self.df_cpedict, ttl=_CPE_CSV_CACHE_TTL) if self.df_cpedict is not None: # Verify the loaded cpedict contains at least the following columns required_cols = {"vendor", "product"} if not required_cols.issubset(self.df_cpedict): raise InvalidCpeDictionaryError(required_cols) self._init_product_vendor_index() def _init_product_vendor_index(self): df_cpedict = self.df_cpedict if df_cpedict is None: return product_counts = df_cpedict.groupby("product", sort=False).size() unique_products = [ product for product, count in product_counts.items() if count == 1 ] self._ambiguous_products = { product for product, count in product_counts.items() if count != 1 } df_unique = df_cpedict[df_cpedict["product"].isin(unique_products)] self._product_vendor = dict( zip(df_unique["product"], df_unique["vendor"], strict=False) ) def _cpedict_vendor(self, product): if not product or len(product) == 1: LOG.debug("invalid product name '%s'", product) return None if self.df_cpedict is None: LOG.log(LOG_SPAM, "missing cpedict") return None vendor = self._product_vendor.get(product) if vendor: LOG.log(LOG_SPAM, "found vendor for product '%s': '%s'", product, vendor) return vendor if product not in self._ambiguous_products: LOG.log(LOG_SPAM, "no matches for product '%s'", product) return None # If there is more than one product with the same name, we cannot # determine which vendor name should be used for the CPE. Therefore, # treat it the same way as no matches. LOG.log(LOG_SPAM, "more than one match for product '%s':", product) if LOG.isEnabledFor(LOG_SPAM): df = self.df_cpedict[self.df_cpedict["product"] == product] df_log(df, LOG_SPAM) return None def _candidate_vendor(self, product): """ Return vendor name based on the product name: - Try finding exact match from the CPE dictionary - Try finding exact match based on variations of the product name - Use product name as vendor name if other attempts failed """ vendor = self._cpedict_vendor(product) if not vendor: # No exact match found from cpe dictionary based on product name: # try finding vendor for the product name we get by removing # possible trailing digits from the original product name product_mod = product.rstrip(string.digits) if product != product_mod: LOG.log(LOG_SPAM, "re-trying with product name '%s'", product_mod) vendor = self._cpedict_vendor(product_mod) if not vendor: # Use the product name when no CPE dictionary vendor matches. vendor = product LOG.log(LOG_SPAM, "using product name as vendor '%s'", vendor) return vendor def generate(self, name, version): """Generate CPE identifier, given the product name and version""" cpe_vendor = self._candidate_vendor(name.strip()) cpe_product = name.strip() cpe_version = version.strip() cpe_end = "*:*:*:*:*:*:*" ret = f"cpe:2.3:a:{cpe_vendor}:{cpe_product}:{cpe_version}:{cpe_end}" LOG.log(LOG_SPAM, "CPE: '%s'", ret) return ret ############################################################################### ================================================ FILE: src/sbomnix/dependency_index.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Indexed dependency lookups for SBOM export.""" from dataclasses import dataclass, field import pandas as pd from common import columns as cols def _sorted_unique(values): return sorted({value for value in values if value}) def _normalize_outputs(outputs): if isinstance(outputs, (list, tuple)): return [output for output in outputs if output] if isinstance(outputs, str) and outputs: return [outputs] return [] def _group_dependency_rows(df, dep_col): if df.empty: return {} return { target_path: _sorted_unique(group[dep_col].tolist()) for target_path, group in df.groupby(cols.TARGET_PATH) } @dataclass class DependencyIndex: """Lookup dependency identifiers without repeated dataframe merges.""" by_store_path: dict[str, list[str]] component_frame: pd.DataFrame _uid_maps: dict[str, dict[str, str]] = field(default_factory=dict) def lookup(self, drv, uid=cols.STORE_PATH): """Return dependency identifiers for ``drv`` using the requested column.""" dep_store_paths = self.by_store_path.get(drv.store_path, []) if not dep_store_paths: return None if uid == cols.STORE_PATH: return dep_store_paths uid_map = self._get_uid_map(uid) if uid_map is None: return None self_uid = getattr(drv, uid, None) dep_uids = sorted( { uid_map[dep_store_path] for dep_store_path in dep_store_paths if dep_store_path in uid_map and uid_map[dep_store_path] } ) if self_uid is not None: dep_uids = [dep_uid for dep_uid in dep_uids if dep_uid != self_uid] return dep_uids or None def _get_uid_map(self, uid): if uid in self._uid_maps: return self._uid_maps[uid] if uid not in self.component_frame.columns: return None uid_map = dict( self.component_frame.loc[:, [cols.STORE_PATH, uid]].itertuples( index=False, name=None, ) ) self._uid_maps[uid] = uid_map return uid_map def build_dependency_index(df_deps, df_sbomdb, df_sbomdb_outputs_exploded, uid): """Build an indexed dependency map for all SBOM components.""" if df_sbomdb is None or df_sbomdb.empty: return DependencyIndex(by_store_path={}, component_frame=pd.DataFrame()) by_store_path = {drv.store_path: [] for drv in df_sbomdb.itertuples()} if df_deps is None or df_deps.empty: return DependencyIndex(by_store_path=by_store_path, component_frame=df_sbomdb) runtime_sources = df_sbomdb_outputs_exploded.loc[:, [cols.OUTPUTS, uid]].rename( columns={uid: cols.DEPENDENCY_UID} ) runtime_edges = df_deps.merge( runtime_sources, how="inner", left_on=[cols.SRC_PATH], right_on=[cols.OUTPUTS], ) runtime_by_target = _group_dependency_rows(runtime_edges, cols.DEPENDENCY_UID) buildtime_sources = df_sbomdb.loc[:, [cols.STORE_PATH]].copy() buildtime_sources[cols.DEPENDENCY_UID] = df_sbomdb[uid] buildtime_edges = df_deps.merge( buildtime_sources, how="inner", left_on=[cols.SRC_PATH], right_on=[cols.STORE_PATH], ) buildtime_by_target = _group_dependency_rows(buildtime_edges, cols.DEPENDENCY_UID) for drv in df_sbomdb.itertuples(): deps: set[str] = set(buildtime_by_target.get(drv.store_path, ())) for output in _normalize_outputs(drv.outputs): deps.update(runtime_by_target.get(output, ())) self_uid = getattr(drv, uid, None) if self_uid is not None: deps.discard(self_uid) by_store_path[drv.store_path] = sorted(deps) return DependencyIndex( by_store_path=by_store_path, component_frame=df_sbomdb, ) ================================================ FILE: src/sbomnix/derivation.py ================================================ # From: https://github.com/flyingcircusio/vulnix/blob/1.10.1/LICENSE: # SPDX-License-Identifier: BSD-3-Clause # SPDX-FileCopyrightText: Flying Circus Internet Operations GmbH # SPDX-FileCopyrightText: 2022-2023 Technology Innovation Institute (TII) """Nix derivation, originally from https://github.com/flyingcircusio/vulnix""" import bisect import json import subprocess from itertools import islice from packageurl import PackageURL from common.errors import NixCommandError from common.log import LOG, LOG_SPAM from common.nix_utils import parse_nix_derivation_show from common.proc import exec_cmd, nix_cmd ############################################################################### def _batched(iterable, size): iterator = iter(iterable) while batch := list(islice(iterator, size)): yield batch def load(path, outpath): """Load derivation from path""" cmd = nix_cmd("derivation", "show", path) drv_infos = parse_nix_derivation_show( _exec_required_nix_command(cmd).stdout, store_path_hint=path, ) drv_path = path drv_info = drv_infos.get(path) if drv_info is None and drv_infos: drv_path, drv_info = next(iter(drv_infos.items())) if drv_info is None: raise NixCommandError( cmd, stderr=f"No derivation metadata returned for '{path}'", ) if outpath is None and path != drv_path and not path.endswith(".drv"): outpath = path d_obj = Derive.from_nix_derivation_info(drv_path, drv_info, outpath) LOG.log(LOG_SPAM, "load derivation: %s", d_obj) LOG.log(LOG_SPAM, "derivation attrs: %s", d_obj.to_dict()) return d_obj def load_many(paths, output_paths_by_drv=None, batch_size=200, ignore_missing=False): """Load many derivations with batched `nix derivation show` calls.""" if not paths: return {} output_paths_by_drv = {} if output_paths_by_drv is None else output_paths_by_drv loaded = {} for batch in _batched(dict.fromkeys(paths), batch_size): drv_infos = _load_derivation_infos( batch, store_path_hint=batch[0], ignore_missing=ignore_missing, ) query_to_drv_path = _query_paths_to_derivations(batch, drv_infos) output_paths_by_loaded_drv = {} missing_paths = [] for query_path in batch: drv_path = query_to_drv_path.get(query_path) if drv_path is None: missing_paths.append(query_path) continue output_paths = output_paths_by_loaded_drv.setdefault(drv_path, set()) output_paths.update(output_paths_by_drv.get(drv_path, ())) output_paths.update(output_paths_by_drv.get(query_path, ())) if query_path != drv_path and not query_path.endswith(".drv"): output_paths.add(query_path) for drv_path, output_paths in output_paths_by_loaded_drv.items(): drv_info = drv_infos[drv_path] sorted_output_paths = sorted(output_paths) drv = Derive.from_nix_derivation_info( drv_path, drv_info, sorted_output_paths[0] if sorted_output_paths else None, ) for outpath in sorted_output_paths[1:]: drv.add_output_path(outpath) LOG.log(LOG_SPAM, "load derivation: %s", drv) LOG.log(LOG_SPAM, "derivation attrs: %s", drv.to_dict()) loaded[drv_path] = drv for path in missing_paths: if ignore_missing: LOG.debug("Skipping path without derivation metadata: %s", path) continue loaded[path] = load( path, next(iter(output_paths_by_drv.get(path, ())), None), ) return loaded def _load_derivation_infos(paths, store_path_hint=None, ignore_missing=False): if ignore_missing: ret = exec_cmd( nix_cmd("derivation", "show", *paths), raise_on_error=False, log_error=False, ) else: ret = _exec_required_nix_command(nix_cmd("derivation", "show", *paths)) if ret is not None: return parse_nix_derivation_show(ret.stdout, store_path_hint=store_path_hint) if len(paths) == 1: return {} midpoint = len(paths) // 2 left = _load_derivation_infos( paths[:midpoint], store_path_hint=paths[0], ignore_missing=ignore_missing, ) right = _load_derivation_infos( paths[midpoint:], store_path_hint=paths[midpoint], ignore_missing=ignore_missing, ) return {**left, **right} def _query_paths_to_derivations(query_paths, drv_infos): output_to_drv_path = {} for drv_path, drv_info in drv_infos.items(): for output_path in _derivation_output_paths(drv_info): output_to_drv_path.setdefault(output_path, drv_path) query_to_drv_path = {} for query_path in query_paths: if query_path in drv_infos: query_to_drv_path[query_path] = query_path continue drv_path = output_to_drv_path.get(query_path) if drv_path: query_to_drv_path[query_path] = drv_path return query_to_drv_path def _derivation_output_paths(drv_info): outputs = drv_info.get("outputs", {}) env_vars = drv_info.get("env", {}) if not isinstance(outputs, dict): outputs = {} if not isinstance(env_vars, dict): env_vars = {} output_paths = [] def add_output_path(path): if path and path not in output_paths: output_paths.append(path) for output_name, output in outputs.items(): path = _derivation_output_path(outputs, output_name) if path: add_output_path(path) elif isinstance(output, str): add_output_path(output) else: add_output_path(env_vars.get(output_name)) for output_name in str(env_vars.get("outputs", "")).split(): add_output_path(env_vars.get(output_name)) return output_paths def load_recursive(path): """Load a derivation and its recursive build-time closure.""" cmd = nix_cmd("derivation", "show", "--recursive", path) drv_infos = parse_nix_derivation_show( _exec_required_nix_command(cmd).stdout, store_path_hint=path, ) if not drv_infos: raise NixCommandError( cmd, stderr=f"No derivation metadata returned for '{path}'", ) loaded = {} for drv_path, drv_info in drv_infos.items(): drv = Derive.from_nix_derivation_info(drv_path, drv_info) LOG.log(LOG_SPAM, "load derivation: %s", drv) LOG.log(LOG_SPAM, "derivation attrs: %s", drv.to_dict()) loaded[drv_path] = drv return loaded, drv_infos def _exec_required_nix_command(cmd): try: return exec_cmd(cmd) except subprocess.CalledProcessError as error: raise NixCommandError( cmd, stderr=error.stderr, stdout=error.stdout, ) from None def destructure(env): """Decodes Nix 2.0 __structuredAttrs.""" if "__json" in env: return json.loads(env["__json"]) return {} class Derive: """Nix derivation as found as .drv files in the Nix store.""" def __init__( self, _outputs=None, _system=None, _builder=None, _args=None, envVars=None, _derivations=None, name=None, patches=None, ): """Create a derivation from a .drv file. The derivation files are just accidentally Python-syntax, but hey! :-) """ if envVars is None: envVars = {} envVars = dict(envVars) LOG.log(LOG_SPAM, envVars) self.name = name or envVars.get("name") if not self.name: self.name = destructure(envVars)["name"] pname = envVars.get("pname", self.name) # pname read from envVars might not match the pname in nixpkgs. # As an example 'Authen-SASL' full pname is 'perl5.36.0-Authen-SASL' # Below, we reconstruct the full pname based on self.name which # contains the full pname: self.pname = self.name.partition(pname)[0] + pname self.version = envVars.get("version", "") self.patches = patches or envVars.get("patches", "") self.system = envVars.get("system", "") self.out = envVars.get("out", "") self.outputs = [] self.store_path = None outputs = envVars.get("outputs", "").split() for output in outputs: path = envVars.get(output, None) self.add_output_path(path) LOG.log(LOG_SPAM, "%s outputs: %s", self, self.outputs) # pname 'source' in Nix has special meaning - it is the default name # for all fetchFromGitHub derivations. As such, it should not be used # to construct cpe or purl, rather, cpe and purl should be empty # for such packages. self.cpe = "" self.purl = "" self._refresh_purl() self.urls = envVars.get("urls", "") @classmethod def from_nix_derivation_info(cls, path, drv_info, outpath=None): """Create a derivation from normalized `nix derivation show` JSON.""" env_vars = dict(drv_info.get("env", {})) name = _coerce_derivation_string(drv_info.get("name")) or env_vars.get("name") if not name: name = destructure(env_vars).get("name") outputs = drv_info.get("outputs", {}) if not isinstance(outputs, dict): outputs = {} drv = cls( envVars=env_vars, name=name, patches=env_vars.get("patches", ""), ) drv.system = _coerce_derivation_string(drv_info.get("system")) or drv.system drv.version = env_vars.get("version", "") if not drv.version: drv.version = _coerce_derivation_string(drv_info.get("version")) drv.out = drv.out or _derivation_output_path(outputs, "out") drv._refresh_purl() drv.outputs = [] _set_derivation_output_paths(drv, outputs, env_vars) drv.init(path, outpath) return drv def init(self, path, outpath): """Initialize self.store_path and self.outputs""" if self.store_path is not None: raise AssertionError("Derivation is already initialized") LOG.log(LOG_SPAM, "path:%s, outpath:%s", path, outpath) self.store_path = path outpath = outpath if outpath and outpath != path else self.out self.add_output_path(outpath) def __repr__(self): return f"" def set_cpe(self, cpe_generator): """Generate cpe identifier""" if self.pname != "source" and cpe_generator is not None: self.cpe = cpe_generator.generate(self.pname, self.version) def add_output_path(self, path): """Add an output path to derivation""" if path and path not in self.outputs and path != self.store_path: LOG.log(LOG_SPAM, "adding outpath to %s:%s", self, path) bisect.insort(self.outputs, path) def _refresh_purl(self): self.purl = "" if self.pname != "source": self.purl = str( PackageURL(type="nix", name=self.pname, version=self.version) ) def to_dict(self): """Return derivation as dictionary""" ret = {} for attr in vars(self): ret[attr] = getattr(self, attr) return ret def _derivation_output_path(outputs, output_name): output = outputs.get(output_name) if isinstance(output, dict): return output.get("path", "") if isinstance(output, str): return output return "" def _coerce_derivation_string(value): if isinstance(value, str): return value return "" def _set_derivation_output_paths(drv, outputs, env_vars): for output in outputs.values(): if isinstance(output, dict): drv.add_output_path(output.get("path")) else: drv.add_output_path(output) if drv.outputs: return for output_name in str(env_vars.get("outputs", "")).split(): drv.add_output_path(env_vars.get(output_name)) ================================================ FILE: src/sbomnix/derivers.py ================================================ # From: https://github.com/flyingcircusio/vulnix/blob/1.10.1/LICENSE: # SPDX-License-Identifier: BSD-3-Clause # SPDX-FileCopyrightText: Flying Circus Internet Operations GmbH # SPDX-FileCopyrightText: 2022-2023 Technology Innovation Institute (TII) """Deriver lookup helpers for Nix store paths.""" import os from common.errors import MissingNixDeriverError, SbomnixError from common.log import LOG, LOG_SPAM from common.nix_utils import parse_nix_derivation_show from common.proc import exec_cmd, nix_cmd def is_loadable_deriver_path(path): """Return whether path names an existing Nix derivation file.""" return ( isinstance(path, str) and path != "unknown-deriver" and path.endswith(".drv") and os.path.exists(path) ) def find_deriver(path): """Return drv path for the given nix store artifact path.""" LOG.log(LOG_SPAM, path) if path.endswith(".drv"): return path cmd = nix_cmd("derivation", "show", path) ret = exec_cmd(cmd, raise_on_error=False, log_error=False) if not ret: LOG.log(LOG_SPAM, "Deriver not found for '%s'", path) return None qvd_json_keys = list( parse_nix_derivation_show(ret.stdout, store_path_hint=path).keys() ) if not qvd_json_keys: LOG.log(LOG_SPAM, "Not qvd_deriver for '%s'", path) return None qvd_deriver = qvd_json_keys[0] LOG.log(LOG_SPAM, "qvd_deriver: %s", qvd_deriver) if is_loadable_deriver_path(qvd_deriver): return qvd_deriver if qvd_deriver and qvd_deriver != "unknown-deriver": raise RuntimeError( f"Deriver `{qvd_deriver}` does not exist. " f"Couldn't find deriver for path `{path}`" ) raise RuntimeError( "Cannot determine deriver. Is this really a path into the nix store?", path, ) def require_deriver(path, *, find_deriver_fn=find_deriver, log=LOG): """Return the deriver for ``path`` or raise a typed error.""" try: drv_path = find_deriver_fn(path) except SbomnixError: raise except RuntimeError as error: raise MissingNixDeriverError(path) from error if not drv_path: raise MissingNixDeriverError(path) log.debug("nix_drv: %s", drv_path) return drv_path ================================================ FILE: src/sbomnix/dfcache.py ================================================ # SPDX-FileCopyrightText: 2022-2024 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Thread-safe DataFrameDiskCache""" import pathlib import tempfile from getpass import getuser from dfdiskcache import DataFrameDiskCache from filelock import FileLock ############################################################################### # DataFrameDiskCache cache local path and lock file DFCACHE_PATH = pathlib.Path(tempfile.gettempdir()) / f"{getuser()}_sbomnix_df_cache" DFCACHE_LOCK = DFCACHE_PATH / "dfcache.lock" ################################################################################ class LockedDfCache: """Thread-safe (and process-safe) wrapper for DataFrameDiskCache""" def __init__(self): self.dflock = FileLock(DFCACHE_LOCK) def __getattr__(self, name): def wrap(*a, **k): with self.dflock: # We intentionally do not store the dfcache as object variable # but re-instantiate it every time any LockedDfCache method # is called. DataFrameDiskCache internally makes use of sqlite # which does not allow concurrent connections to the database. # Having the dfcache initiated once in __init__() and then # re-used here would mean the connection would remain reserved # for the first thread making other threads throw with # 'database locked' etc. even if we otherwise protect # concurrent writes. dfcache = DataFrameDiskCache(cache_dir_path=DFCACHE_PATH) return getattr(dfcache, name)(*a, **k) return wrap ############################################################################### ================================================ FILE: src/sbomnix/exporters.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """SBOM document exporters.""" import json import re from datetime import datetime, timezone from common import columns as cols from common.log import LOG from common.pkgmeta import get_py_pkg_version from common.spdx import canonicalize_spdx_license_id from sbomnix.cdx import _drv_to_cdx_component, _drv_to_cdx_dependency _NIXPKGS_META_SOURCE_FIELDS = ( ("nixpkgs:metadata_source_method", "method"), ("nixpkgs:path", "path"), ("nixpkgs:rev", "rev"), ("nixpkgs:flakeref", "flakeref"), ("nixpkgs:version", "version"), ("nixpkgs:message", "message"), ) def write_json(pathname, data, printinfo=False): """Write JSON data to a file.""" with open(pathname, "w", encoding="utf-8") as outfile: json_string = json.dumps(data, indent=2) outfile.write(json_string) if printinfo: LOG.info("Wrote: %s", outfile.name) def _nixpkgs_meta_source_properties(sbomdb): """Return non-empty document properties for nixpkgs metadata source.""" source = getattr(sbomdb, "nixpkgs_meta_source", None) if source is None: return [] properties = [] for property_name, attr_name in _NIXPKGS_META_SOURCE_FIELDS: value = getattr(source, attr_name) if value: properties.append({"name": property_name, "value": str(value)}) return properties def _spdx_nixpkgs_meta_source_comment(sbomdb): """Return a compact SPDX comment line for nixpkgs metadata source.""" source = getattr(sbomdb, "nixpkgs_meta_source", None) if source is None: return None fields = [] for property_name, attr_name in _NIXPKGS_META_SOURCE_FIELDS: value = getattr(source, attr_name) if value: fields.append(f"{property_name.removeprefix('nixpkgs:')}={value}") if not fields: return None return "nixpkgs metadata source: " + "; ".join(fields) def build_cdx_document(sbomdb): """Build a CycloneDX document from an SBOM builder.""" cdx = {} cdx["bomFormat"] = "CycloneDX" cdx["specVersion"] = "1.4" cdx["version"] = 1 cdx["serialNumber"] = f"urn:uuid:{sbomdb.uuid}" cdx["metadata"] = {} cdx["metadata"]["timestamp"] = datetime.now(timezone.utc).astimezone().isoformat() cdx["metadata"]["properties"] = [] prop = {} prop["name"] = "sbom_type" prop["value"] = sbomdb.sbom_type cdx["metadata"]["properties"].append(prop) if sbomdb.depth: prop = {} prop["name"] = "sbom_dependencies_depth" prop["value"] = str(sbomdb.depth) cdx["metadata"]["properties"].append(prop) cdx["metadata"]["properties"].extend(_nixpkgs_meta_source_properties(sbomdb)) tool = {} tool["vendor"] = "TII" tool["name"] = "sbomnix" tool["version"] = get_py_pkg_version() cdx["metadata"]["tools"] = [] cdx["metadata"]["tools"].append(tool) cdx["components"] = [] cdx["dependencies"] = [] for drv in sbomdb.df_sbomdb.itertuples(): component = _drv_to_cdx_component(drv, uid=sbomdb.uid) if drv.store_path == sbomdb.target_component_ref: cdx["metadata"]["component"] = component else: cdx["components"].append(component) deps = sbomdb.lookup_dependencies(drv, uid=sbomdb.uid) dependency = _drv_to_cdx_dependency(drv, deps, uid=sbomdb.uid) cdx["dependencies"].append(dependency) return cdx def _str_to_spdxid(strval): # Only letters, numbers, '.', and '-' are allowed in spdx idstring, # replace all other characters with '-' idstring = re.sub(r"[^\-.a-zA-Z0-9]", "-", strval) # Return idstring with prefix "SPDXRef-" if idstring.startswith("-"): return f"SPDXRef{idstring}" return f"SPDXRef-{idstring}" def _drv_to_spdx_license_list(drv): license_attr_name = "meta_license_spdxid" if license_attr_name not in drv._asdict(): return [] license_str = getattr(drv, license_attr_name) if not license_str: return [] license_strings = license_str.split(";") licenses = [] for license_string in license_strings: canonical = canonicalize_spdx_license_id(license_string) if not canonical: continue licenses.append(canonical) return licenses def _drv_to_spdx_extrefs(drv): extrefs = [] if drv.cpe: cpe_ref = {} cpe_ref["referenceCategory"] = "SECURITY" cpe_ref["referenceType"] = "cpe23Type" cpe_ref["referenceLocator"] = drv.cpe extrefs.append(cpe_ref) if drv.purl: purl_ref = {} purl_ref["referenceCategory"] = "PACKAGE-MANAGER" purl_ref["referenceType"] = "purl" purl_ref["referenceLocator"] = drv.purl extrefs.append(purl_ref) return extrefs def _drv_to_spdx_package(drv, uid=cols.STORE_PATH): """Convert one entry from sbomdb (drv) to an SPDX package.""" pkg = {} pkg["name"] = drv.pname pkg["SPDXID"] = _str_to_spdxid(getattr(drv, uid)) pkg["versionInfo"] = drv.version pkg["downloadLocation"] = "NOASSERTION" if drv.urls: pkg["downloadLocation"] = drv.urls if "meta_homepage" in drv._asdict() and drv.meta_homepage: pkg["homepage"] = drv.meta_homepage if "meta_description" in drv._asdict() and drv.meta_description: pkg["summary"] = drv.meta_description licenses = _drv_to_spdx_license_list(drv) if licenses: pkg["licenseInfoFromFiles"] = licenses licence_entry = licenses[0] if len(licenses) == 1 else "NOASSERTION" pkg["licenseConcluded"] = licence_entry pkg["licenseDeclared"] = licence_entry pkg["copyrightText"] = "NOASSERTION" extrefs = _drv_to_spdx_extrefs(drv) if extrefs: pkg["externalRefs"] = extrefs return pkg def _drv_to_spdx_relationships(drv, deps_list, uid=cols.STORE_PATH): """Return list of SPDX relationships for one sbomdb row.""" relationships = [] if not deps_list: return relationships drv_spdxid = _str_to_spdxid(getattr(drv, uid)) relationship_type = "DEPENDS_ON" for dep in deps_list: relationship = {} relationship["spdxElementId"] = drv_spdxid relationship["relationshipType"] = relationship_type relationship["relatedSpdxElement"] = _str_to_spdxid(dep) relationships.append(relationship) return relationships def build_spdx_document(sbomdb): """Build an SPDX document from an SBOM builder.""" spdx = {} spdx["spdxVersion"] = "SPDX-2.3" spdx["dataLicense"] = "CC0-1.0" spdx["SPDXID"] = "SPDXRef-DOCUMENT" spdx["name"] = "" spdx["documentNamespace"] = f"sbomnix://{sbomdb.uuid}" creation_info = {} creation_info["created"] = datetime.now(timezone.utc).astimezone().isoformat() creation_info["creators"] = [] creation_info["creators"].append(f"Tool: sbomnix-{get_py_pkg_version()}") spdx["creationInfo"] = creation_info comments = [f"included dependencies: '{sbomdb.sbom_type}'"] source_comment = _spdx_nixpkgs_meta_source_comment(sbomdb) if source_comment: comments.append(source_comment) spdx["comment"] = "\n".join(comments) spdx["packages"] = [] spdx["relationships"] = [] for drv in sbomdb.df_sbomdb.itertuples(): package = _drv_to_spdx_package(drv, uid=sbomdb.uid) spdx["packages"].append(package) if drv.store_path == sbomdb.target_component_ref: spdx["name"] = _str_to_spdxid(getattr(drv, sbomdb.uid)) deps = sbomdb.lookup_dependencies(drv, uid=sbomdb.uid) relationships = _drv_to_spdx_relationships(drv, deps, uid=sbomdb.uid) for relation in relationships: spdx["relationships"].append(relation) return spdx ================================================ FILE: src/sbomnix/main.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2022-2023 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Python script that generates SBOMs from nix packages""" import argparse from common.cli_args import add_verbose_argument, add_version_argument, check_positive from common.errors import SbomnixError from common.log import LOG, set_log_verbosity from sbomnix.builder import SbomBuilder from sbomnix.cli_utils import resolve_nix_target ############################################################################### def getargs(args=None): """Parse command line arguments""" desc = ( "This tool finds dependencies of the specified nix store path " "or flake reference NIXREF and " "writes SBOM file(s) as specified in output arguments." ) epil = "Example: sbomnix /nix/store/path/or/flakeref" parser = argparse.ArgumentParser(description=desc, epilog=epil) helps = ( "Target nix store path (e.g. derivation file or nix output path) or flakeref" ) parser.add_argument("NIXREF", help=helps, type=str) helps = "Scan buildtime dependencies instead of runtime dependencies" parser.add_argument("--buildtime", help=helps, action="store_true") helps = ( "Set the depth of the included dependencies. As an example, --depth=1 " "indicates the SBOM should include only the NIXREF direct dependencies. " "With --depth=2, the output SBOM includes the direct dependencies and the " "first level of transitive dependencies. " "By default, when --depth is not specified, the output SBOM includes " "all dependencies all the way to the root of the dependency tree." ) parser.add_argument("--depth", help=helps, type=check_positive) add_version_argument(parser) add_verbose_argument(parser) helps = "Include vulnerabilities in the output of CyloneDX SBOM" parser.add_argument("--include-vulns", help=helps, action="store_true") helps = "Exclude Nixpkgs metadata information in the output" parser.add_argument( "--exclude-meta", help=helps, action="store_true", default=False ) helps = ( "Nixpkgs source used for metadata enrichment. Accepts a nixpkgs " "flakeref, a nixpkgs source path, or nix-path. Overrides automatic " "metadata-source detection." ) parser.add_argument("--meta-nixpkgs", help=helps, metavar="META_NIXPKGS") helps = "Exclude using heuristics-based CPE matches in the output" parser.add_argument( "--exclude-cpe-matching", help=helps, action="store_true", default=False ) group = parser.add_argument_group("output arguments") helps = "Path to csv output file (default: ./sbom.csv)" group.add_argument("--csv", nargs="?", help=helps, default="sbom.csv") helps = "Path to cyclonedx json output file (default: ./sbom.cdx.json)" group.add_argument("--cdx", nargs="?", help=helps, default="sbom.cdx.json") helps = "Path to spdx json output file (default: ./sbom.spdx.json)" group.add_argument("--spdx", nargs="?", help=helps, default="sbom.spdx.json") helps = "Run nix command with --impure" parser.add_argument("--impure", help=helps, action="store_true") return parser.parse_args(args) ################################################################################ def main(): """main entry point""" args = getargs() set_log_verbosity(args.verbose) try: _run(args) except SbomnixError as error: LOG.fatal("%s", error) raise SystemExit(1) from error def _run(args): if args.exclude_meta and args.meta_nixpkgs: raise SbomnixError("--exclude-meta cannot be used with --meta-nixpkgs") target = resolve_nix_target( args.NIXREF, buildtime=args.buildtime, impure=args.impure ) LOG.info("Generating SBOM for target '%s'", target.path) sbom = SbomBuilder( nix_path=target.path, buildtime=args.buildtime, depth=args.depth, flakeref=target.flakeref, original_ref=target.original_ref, meta_nixpkgs=args.meta_nixpkgs, impure=args.impure, include_meta=not args.exclude_meta, include_vulns=args.include_vulns, include_cpe=not args.exclude_cpe_matching, ) if args.cdx: cdx = sbom.to_cdx_data() if args.include_vulns: sbom.enrich_cdx_with_vulnerabilities(cdx) sbom.write_json(args.cdx, cdx, printinfo=True) if args.spdx: sbom.to_spdx(args.spdx) if args.csv: sbom.to_csv(args.csv) ################################################################################ if __name__ == "__main__": main() ################################################################################ ================================================ FILE: src/sbomnix/meta.py ================================================ # SPDX-FileCopyrightText: 2022-2023 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Cache and scan nixpkgs meta information.""" import pathlib import tempfile from dataclasses import replace from getpass import getuser from filelock import FileLock from common.log import LOG from nixmeta.scanner import NixMetaScanner from sbomnix.dfcache import LockedDfCache from sbomnix.meta_source import ( META_NIXPKGS_NIX_PATH, SCAN_EXCEPTIONS, NixpkgsMetaSource, NixpkgsMetaSourceResolver, classify_meta_nixpkgs, ) ############################################################################### # Update locally generated nixpkgs meta-info every 30 days or when local cache # is cleaned. _NIXMETA_NIXPKGS_TTL = 60 * 60 * 24 * 30 # FileLock lock path _FLOCK = pathlib.Path(tempfile.gettempdir()) / f"{getuser()}_sbomnix_meta.lock" ############################################################################### __all__ = [ "META_NIXPKGS_NIX_PATH", "Meta", "NixpkgsMetaSource", "classify_meta_nixpkgs", ] class Meta: """Cache nixpkgs meta information.""" def __init__(self): self.lock = FileLock(_FLOCK) self.cache = LockedDfCache() self.source_resolver = NixpkgsMetaSourceResolver() def get_nixpkgs_meta(self, nixref=None): """ Return nixpkgs meta pinned in `nixref`. `nixref` can point to a nix store path or flake reference. If nixref is None, attempt to read the nixpkgs store path from NIX_PATH environment variable. """ source = self.source_resolver.resolve_default_source(nixref) return self._scan_source(source) def get_nixpkgs_meta_with_source( self, *, target_path=None, flakeref=None, original_ref=None, explicit_nixpkgs=None, impure=False, ): """Return nixpkgs metadata and selected metadata source.""" source = self._resolve_source( target_path=target_path, flakeref=flakeref, original_ref=original_ref, explicit_nixpkgs=explicit_nixpkgs, impure=impure, ) return self._scan_source_with_source(source) def _resolve_source( self, *, target_path=None, flakeref=None, original_ref=None, explicit_nixpkgs=None, impure=False, ): if explicit_nixpkgs: return self.source_resolver.resolve_meta_nixpkgs_option( explicit_nixpkgs, target_path=target_path, ) if flakeref: source = self.source_resolver.resolve_flakeref_target_source( flakeref, impure=impure, ) if source is not None: return source return self.source_resolver.resolve_flakeref_lock_source(flakeref) return self.source_resolver.path_target_without_source( target_path=target_path, original_ref=original_ref, ) def _scan_source(self, source): df, _source = self._scan_source_with_source(source) return df def _scan_source_with_source(self, source): if not source.path: return None, source if source.expression: LOG.debug("Scanning meta-info using nix expression for: %s", source.path) df = self._scan_expression( source.expression, cache_key=source.expression_cache_key, impure=source.expression_impure, ) if df is not None and not df.empty: return df, source LOG.warning( "Failed scanning evaluated package set: %s", source.path, ) return None, replace( source, message=( "Evaluated package-set metadata scan failed. " "Skipping nixpkgs metadata." ), ) LOG.debug("Scanning meta-info using nixpkgs path: %s", source.path) return self._scan(source.path), source def _scan_expression(self, expression, *, cache_key=None, impure=False): if cache_key is None: with self.lock: LOG.debug("cache disabled, scanning expression") df = self._try_scan_expression(expression, impure=impure) if df is None or df.empty: LOG.warning("Failed scanning uncached nixmeta expression") return None return df cache_key = f"expr:{cache_key}" with self.lock: df = self.cache.get(cache_key) if df is not None and not df.empty: LOG.debug("found from cache: %s", cache_key) return df LOG.debug("cache miss, scanning expression: %s", cache_key) df = self._try_scan_expression(expression, impure=impure) if df is None or df.empty: LOG.warning("Failed scanning nixmeta expression: %s", cache_key) return None self.cache.set(key=cache_key, value=df, ttl=_NIXMETA_NIXPKGS_TTL) return df @staticmethod def _try_scan_expression(expression, *, impure=False): try: scanner = NixMetaScanner() scanner.scan_expression(expression, impure=impure) return scanner.to_df() except SCAN_EXCEPTIONS: LOG.debug("Failed scanning nixmeta expression", exc_info=True) return None def _scan(self, nixpkgs_path): # In case sbomnix is run concurrently, we want to make sure there's # only one instance of NixMetaScanner.scan_path() running at a time. # The reason is, NixMetaScanner.scan_path() potentially invokes # `nix-env -qa --meta --json -f /path/to/nixpkgs` which is very # memory intensive. The locking needs to happen here (and not in # NixMetaScanner) because this object caches the nixmeta info. # First scan generates the cache, after which the consecutive scans # will read the scan results from the cache, not having to run # the nix-env command again, making the consecutive scans relatively # fast and light-weight. with self.lock: df = self.cache.get(nixpkgs_path) if df is not None and not df.empty: LOG.debug("found from cache: %s", nixpkgs_path) return df LOG.debug("cache miss, scanning: %s", nixpkgs_path) scanner = NixMetaScanner() scanner.scan_path(nixpkgs_path) df = scanner.to_df() if df is None or df.empty: LOG.warning("Failed scanning nixmeta: %s", nixpkgs_path) return None # Cache requires some TTL, so we set it to some value here. # Although, we could as well store it indefinitely as it should # not change given the same key (nixpkgs store path). self.cache.set(key=nixpkgs_path, value=df, ttl=_NIXMETA_NIXPKGS_TTL) return df ############################################################################### ================================================ FILE: src/sbomnix/meta_source.py ================================================ # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Resolve nixpkgs metadata sources from target context and CLI options.""" import json import os import pathlib import re from dataclasses import dataclass, replace from subprocess import CalledProcessError from urllib.parse import urlencode from common.errors import SbomnixError from common.flakeref import ( NIXOS_CONFIGURATION_TOPLEVEL_SUFFIX, parse_nixos_configuration_ref, quote_nix_attr_segment, ) from common.log import LOG from common.proc import exec_cmd, nix_cmd from nixmeta.scanner import nixref_to_nixpkgs_path META_NIXPKGS_NIX_PATH = "nix-path" RESERVED_META_NIXPKGS_MODES = frozenset({META_NIXPKGS_NIX_PATH}) SCAN_EXCEPTIONS = (KeyError, OSError, CalledProcessError, TypeError, ValueError) _NIXREF_RESOLUTION_EXCEPTIONS = (AttributeError, *SCAN_EXCEPTIONS) @dataclass(frozen=True) class NixpkgsMetaSource: """Description of the nixpkgs source used for metadata enrichment.""" method: str path: str | None = None flakeref: str | None = None rev: str | None = None version: str | None = None message: str | None = None expression: str | None = None expression_cache_key: str | None = None expression_impure: bool = False def classify_meta_nixpkgs(value): """Classify a --meta-nixpkgs value as a reserved mode or explicit source.""" if value in RESERVED_META_NIXPKGS_MODES: return value return "explicit" def read_nixpkgs_version(nixpkgs_path): """Read nixpkgs version from a source path if available.""" try: return ( (pathlib.Path(nixpkgs_path) / "lib" / ".version") .read_text(encoding="utf-8") .strip() ) except OSError: return None def is_nix_store_path(path): """Return true when path syntactically points into /nix/store.""" return pathlib.Path(path).as_posix().startswith("/nix/store/") def nixpkgs_meta_source_with_path(source): """Attach path-local nixpkgs version to a metadata source.""" if not source.path: return source return replace(source, version=read_nixpkgs_version(source.path)) class NixpkgsMetaSourceResolver: """Resolve a nixpkgs metadata source without scanning metadata.""" @staticmethod def path_target_without_source(target_path=None, original_ref=None): """Return the no-source result for store-path targets.""" LOG.debug( "No automatic nixpkgs metadata source for target path=%s original_ref=%s", target_path, original_ref, ) return NixpkgsMetaSource( method="none", message=( "No nixpkgs metadata source was provided for store-path target. " "Skipping nixpkgs metadata. Re-run with " "--meta-nixpkgs to include metadata." ), ) def resolve_meta_nixpkgs_option(self, meta_nixpkgs, *, target_path=None): """Resolve an explicit --meta-nixpkgs source or reserved mode.""" LOG.debug( "Resolving explicit nixpkgs metadata source for target path=%s", target_path, ) mode = classify_meta_nixpkgs(meta_nixpkgs) if mode == META_NIXPKGS_NIX_PATH: return self.resolve_nix_path_source( message="NIX_PATH metadata source may not match the target", required=True, ) return self.resolve_explicit_source(meta_nixpkgs) def resolve_flakeref_target_source(self, flakeref, *, impure=False): """Resolve target-specific nixpkgs metadata for known flakeref outputs.""" parsed = self._parse_nixos_toplevel_flakeref(flakeref) if not parsed: return None flake, name = parsed name_attr = quote_nix_attr_segment(name) pkgs_path_ref = f"{flake}#nixosConfigurations.{name_attr}.pkgs.path" pkgs_path = self._nix_eval_raw(pkgs_path_ref, impure=impure) if pkgs_path: expression_flake = self._flake_ref_for_expression( flake, impure=impure, ) return nixpkgs_meta_source_with_path( NixpkgsMetaSource( method="flakeref-target", path=pkgs_path, flakeref=pkgs_path_ref, message="Scanning evaluated NixOS package set from flakeref", expression=self._nixos_pkgs_expression(expression_flake, name), expression_cache_key=self._nixos_pkgs_expression_cache_key( expression_flake, name, impure=impure, ), expression_impure=impure, ), ) return self._nixos_toplevel_without_source() @staticmethod def _nixos_toplevel_without_source(): return NixpkgsMetaSource( method="none", message=( "Failed resolving target-specific nixpkgs metadata source from " "NixOS configuration flakeref. Skipping nixpkgs metadata. Re-run " "with --meta-nixpkgs to include metadata." ), ) @staticmethod def _parse_nixos_toplevel_flakeref(flakeref): return parse_nixos_configuration_ref( flakeref, suffix=NIXOS_CONFIGURATION_TOPLEVEL_SUFFIX, ) @staticmethod def _nixos_pkgs_expression(flake, name): flake_json = json.dumps(flake) name_attr = quote_nix_attr_segment(name) return ( "let\n" f" flake = builtins.getFlake {flake_json};\n" "in\n" f" flake.nixosConfigurations.{name_attr}.pkgs\n" ) def _flake_ref_for_expression(self, flake, *, impure=False): if self._flake_ref_has_stable_lock(flake): return flake if self._should_lock_flake_ref_for_expression(flake): locked_ref = self._locked_flake_ref_from_metadata(flake, impure=impure) if locked_ref: return locked_ref return self._normalize_local_flake_ref_for_expression(flake) @staticmethod def _flake_ref_has_stable_lock(flake): return re.search(r"(?:[?&])(?:narHash|rev)=", flake) is not None @classmethod def _should_lock_flake_ref_for_expression(cls, flake): if cls._flake_ref_has_stable_lock(flake): return False if cls._is_existing_local_flake_ref(flake): return True return re.match(r"^[A-Za-z][A-Za-z0-9+.-]*:", flake or "") is not None @staticmethod def _is_existing_local_flake_ref(flake): path_text = flake if flake.startswith("path:"): path_text = flake.removeprefix("path:").partition("?")[0] elif re.match(r"^[A-Za-z][A-Za-z0-9+.-]*:", flake or ""): return False return pathlib.Path(path_text).expanduser().exists() @staticmethod def _locked_flake_ref_from_metadata(flake, *, impure=False): meta_json = NixpkgsMetaSourceResolver._nix_flake_metadata( flake, impure=impure, ) if meta_json is None: return None try: source_path = meta_json["path"] locked = meta_json["locked"] nar_hash = locked["narHash"] except (KeyError, TypeError): return None if not source_path or not nar_hash or not is_nix_store_path(source_path): return None query = {"narHash": nar_hash} locked_dir = locked.get("dir") if locked_dir: query["dir"] = locked_dir return f"path:{source_path}?{urlencode(query, safe='/')}" @staticmethod def _nix_flake_metadata(flake, *, impure=False): LOG.debug("Reading flake metadata for nixpkgs metadata expression: %s", flake) ret = exec_cmd( nix_cmd("flake", "metadata", flake, "--json", impure=impure), raise_on_error=False, return_error=True, log_error=False, ) if ret is None or ret.returncode != 0: LOG.debug("Failed reading flake metadata for expression: %s", flake) return None try: return json.loads(ret.stdout) except ValueError: LOG.debug("Failed parsing flake metadata for expression: %s", flake) return None @staticmethod def _normalize_local_flake_ref_for_expression(flake): if flake.startswith("path:"): path_text, separator, query = flake.removeprefix("path:").partition("?") path = pathlib.Path(path_text).expanduser() if not path.is_absolute(): path_text = path.resolve().as_posix() return f"path:{path_text}{separator}{query}" if re.match(r"^[A-Za-z][A-Za-z0-9+.-]*:", flake or ""): return flake path = pathlib.Path(flake).expanduser() if path.exists() or flake.startswith((".", "/", "~")): return path.resolve().as_posix() return flake @classmethod def _nixos_pkgs_expression_cache_key(cls, flake, name, *, impure=False): if impure: return None stable_ref = cls._stable_flake_ref_for_expression_cache(flake) if not stable_ref: return None cache_parts = json.dumps([stable_ref, name], separators=(",", ":")) return f"nixos-pkgs:{cache_parts}" @staticmethod def _stable_flake_ref_for_expression_cache(flake): if flake.startswith("path:/nix/store/"): return flake if flake.startswith("/nix/store/"): return flake if re.search(r"(?:[?&])rev=", flake): return flake return None @staticmethod def _nix_eval_raw(flakeref, *, impure=False): LOG.debug("Evaluating nixpkgs metadata helper flakeref '%s'", flakeref) ret = exec_cmd( nix_cmd("eval", "--raw", flakeref, impure=impure), raise_on_error=False, return_error=True, log_error=False, ) if ret is None or ret.returncode != 0: LOG.debug( "Failed evaluating nixpkgs metadata helper flakeref: %s", flakeref ) return None return ret.stdout.strip() or None def resolve_explicit_source(self, meta_nixpkgs): """Resolve an explicit --meta-nixpkgs path or flakeref.""" path = pathlib.Path(meta_nixpkgs) if path.exists(): resolved_path = path.resolve() if is_nix_store_path(resolved_path): return nixpkgs_meta_source_with_path( NixpkgsMetaSource( method="explicit", path=resolved_path.as_posix(), ), ) nixpath = self._try_normalize_mutable_path(resolved_path) if nixpath: return nixpkgs_meta_source_with_path( NixpkgsMetaSource( method="explicit", path=nixpath.as_posix(), flakeref=resolved_path.as_posix(), ), ) raise SbomnixError( "Explicit --meta-nixpkgs path must resolve to an immutable " f"/nix/store source before scanning: '{meta_nixpkgs}'" ) try: nixpath = nixref_to_nixpkgs_path(meta_nixpkgs) except _NIXREF_RESOLUTION_EXCEPTIONS as error: raise SbomnixError( f"Failed resolving --meta-nixpkgs source: '{meta_nixpkgs}'" ) from error if not nixpath: raise SbomnixError( f"Failed resolving --meta-nixpkgs source: '{meta_nixpkgs}'" ) return nixpkgs_meta_source_with_path( NixpkgsMetaSource( method="explicit", path=nixpath.as_posix(), flakeref=meta_nixpkgs, ), ) @staticmethod def _try_normalize_mutable_path(path): try: nixpath = nixref_to_nixpkgs_path(path.as_posix()) except _NIXREF_RESOLUTION_EXCEPTIONS: LOG.debug( "Failed normalizing mutable nixpkgs path: %s", path.as_posix(), exc_info=True, ) return None if nixpath and is_nix_store_path(nixpath): return nixpath return None def resolve_flakeref_lock_source(self, nixref): """Return the nixpkgs source selected by a flakeref lock graph.""" if nixref: LOG.debug("Reading nixpkgs path from nixref: %s", nixref) nixpath = nixref_to_nixpkgs_path(nixref) if nixpath: return nixpkgs_meta_source_with_path( NixpkgsMetaSource( method="flakeref-lock", path=nixpath.as_posix(), flakeref=nixref, ), ) return NixpkgsMetaSource(method="none") def resolve_default_source(self, nixref=None): """Return the metadata source for the older direct Meta API.""" if nixref: return self.resolve_flakeref_lock_source(nixref) if "NIX_PATH" in os.environ: return self.resolve_nix_path_source() return NixpkgsMetaSource(method="none") def resolve_nix_path_source(self, *, message=None, required=False): """Return the nixpkgs source referenced by NIX_PATH.""" LOG.debug("Reading nixpkgs path from NIX_PATH environment") nix_path = os.environ.get("NIX_PATH", "") m_nixpkgs = re.search(r"(?:^|:)nixpkgs=([^:]+)", nix_path) if m_nixpkgs: return nixpkgs_meta_source_with_path( NixpkgsMetaSource( method="nix-path", path=m_nixpkgs.group(1), message=message, ), ) if required: raise SbomnixError( "NIX_PATH does not contain a nixpkgs= entry required by " "--meta-nixpkgs nix-path" ) return NixpkgsMetaSource(method="none") ================================================ FILE: src/sbomnix/runtime.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Runtime closure helpers based on structured Nix path-info JSON.""" import subprocess from dataclasses import dataclass import pandas as pd from common import columns as cols from common.errors import NixCommandError from common.nix_utils import ( NIX_PATH_INFO_JSON, load_nix_json, nix_path_info_deriver, nix_path_info_references, normalize_nix_path_info, ) from common.proc import exec_cmd, nix_cmd from sbomnix.closure import ( dependency_rows_to_dataframe, store_path_label, ) @dataclass(frozen=True) class RuntimeClosure: """Runtime dependency edges and output-to-deriver mapping.""" df_deps: pd.DataFrame output_paths_by_drv: dict[str, set[str]] def load_runtime_closure(path): """Load runtime closure information using ``nix path-info`` JSON.""" cmd = nix_cmd( "path-info", "--json", "--json-format", "1", "--recursive", path, ) try: ret = exec_cmd(cmd) except subprocess.CalledProcessError as error: raise NixCommandError( cmd, stderr=error.stderr, stdout=error.stdout, ) from None return runtime_closure_from_path_info(load_nix_json(ret.stdout, NIX_PATH_INFO_JSON)) def runtime_closure_from_path_info(path_info): """Return runtime closure data from parsed ``nix path-info`` JSON.""" rows = [] output_paths_by_drv = {} for target_path, info in normalize_nix_path_info(path_info).items(): deriver = nix_path_info_deriver(info, target_path) if deriver: output_paths_by_drv.setdefault(deriver, set()).add(target_path) for src_path in nix_path_info_references(info, target_path): if src_path == target_path: continue rows.append( { cols.SRC_PATH: src_path, "src_pname": store_path_label(src_path), cols.TARGET_PATH: target_path, "target_pname": store_path_label(target_path), } ) return RuntimeClosure( df_deps=dependency_rows_to_dataframe(rows), output_paths_by_drv=output_paths_by_drv, ) ================================================ FILE: src/sbomnix/vuln_enrichment.py ================================================ # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """CycloneDX vulnerability enrichment helpers.""" import pathlib from tempfile import NamedTemporaryFile from typing import cast import pandas as pd from common import columns as cols from sbomnix.cdx import _vuln_to_cdx_vuln from vulnxscan.vulnscan import VulnScan def enrich_cdx_with_vulnerabilities(sbomdb, cdx): """Add vulnerability scan results to an existing CycloneDX document.""" scanner = VulnScan() scanner.scan_vulnix(_vulnix_target_path(sbomdb), sbomdb.buildtime) temp_cdx_path = None try: with NamedTemporaryFile( delete=False, prefix="vulnxscan_", suffix=".json", ) as outfile: temp_cdx_path = outfile.name sbomdb.write_json(temp_cdx_path, cdx, printinfo=False) scanner.scan_grype(temp_cdx_path) scanner.scan_osv(temp_cdx_path) finally: if temp_cdx_path is not None: pathlib.Path(temp_cdx_path).unlink(missing_ok=True) cdx["vulnerabilities"] = [] vuln_frames = [ df for df in [scanner.df_grype, scanner.df_osv, scanner.df_vulnix] if df is not None ] if not vuln_frames: return cdx df_vulns = pd.concat(vuln_frames, ignore_index=True) if df_vulns.empty: return cdx if cols.MODIFIED in df_vulns.columns: df_vulns = df_vulns.drop(cols.MODIFIED, axis=1) vuln_grouped = cast( pd.DataFrame, df_vulns.groupby( [cols.PACKAGE, cols.VERSION, cols.SEVERITY, cols.VULN_ID], as_index=False, ).agg({cols.SCANNER: pd.Series.unique}), ) vuln_components = pd.merge( left=vuln_grouped, right=sbomdb.df_sbomdb, how="inner", left_on=[cols.PACKAGE, cols.VERSION], right_on=[cols.PNAME, cols.VERSION], ) for vuln in vuln_components.itertuples(): cdx["vulnerabilities"].append(_vuln_to_cdx_vuln(vuln)) return cdx def _vulnix_target_path(sbomdb): """Return the target path to use for vulnix scans.""" if sbomdb.buildtime: return sbomdb.target_deriver return sbomdb.nix_path ================================================ FILE: src/vulnxscan/__init__.py ================================================ # SPDX-FileCopyrightText: 2022 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 ================================================ FILE: src/vulnxscan/github_prs.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """GitHub PR search helpers for vulnerability triage.""" import json import time import urllib.parse from common.http import create_cached_limited_session from common.log import LOG, LOG_SPAM GITHUB_API_CACHE_SECONDS = 6 * 60 * 60 GITHUB_API_REQUEST_TIMEOUT = 60 GITHUB_API_USER_AGENT = "sbomnix-github-prs/0 (https://github.com/tiiuae/sbomnix/)" def append_search_results(prs, result, max_results=5): """Append GitHub issue search result URLs to ``result``.""" for item in prs["items"]: if len(result) >= max_results: LOG.log( LOG_SPAM, "More than %s PRs, skipping: %s", max_results, item["html_url"], ) continue result.add(item["html_url"]) return result class GitHubPrLookup: """Search likely nixpkgs PRs related to a vulnerability.""" def __init__( self, session=None, sleeper=None, request_timeout=GITHUB_API_REQUEST_TIMEOUT, ): self.session = ( create_cached_limited_session( per_minute=9, per_second=1, expire_after=GITHUB_API_CACHE_SECONDS, user_agent=GITHUB_API_USER_AGENT, ) if session is None else session ) self.sleeper = time.sleep if sleeper is None else sleeper self.request_timeout = request_timeout def query(self, query_str, delay=60): """Query the GitHub issues search API.""" query_str_quoted = urllib.parse.quote(query_str, safe=":/") query = f"https://api.github.com/search/issues?q={query_str_quoted}" LOG.debug("GET: %s", query) resp = self.session.get(query, timeout=self.request_timeout) if not resp.ok and "rate limit exceeded" in resp.text: max_delay = 60 if delay > max_delay: LOG.warning("Rate limit exceeded requesting %s", query) return {"items": []} LOG.debug("Sleeping %s seconds before re-requesting", delay) self.sleeper(delay) LOG.debug("Re-requesting") return self.query(query_str, delay * 2) resp.raise_for_status() resp_json = json.loads(resp.text) LOG.log(LOG_SPAM, "total_count=%s", resp_json["total_count"]) return resp_json def find_nixpkgs_prs(self, row): """Return likely nixpkgs PR URLs for a vulnerable package row.""" if hasattr(row, "whitelist") and row.whitelist: LOG.log(LOG_SPAM, "Whitelisted, skipping PR query: %s", row) return "" nixpr = "repo:NixOS/nixpkgs is:pr" unmerged = "is:unmerged is:open" merged = "is:merged" version = None result = set() append_search_results(self.query(f"{nixpr} {unmerged} {row.vuln_id}"), result) append_search_results(self.query(f"{nixpr} {merged} {row.vuln_id}"), result) if row.classify == "fix_update_to_version_nixpkgs": version = row.version_nixpkgs elif row.classify == "fix_update_to_version_upstream": version = row.version_upstream if version: pkg = row.package append_search_results( self.query(f"{nixpr} {unmerged} {pkg} in:title {version} in:title"), result, ) append_search_results( self.query(f"{nixpr} {merged} {pkg} in:title {version} in:title"), result, ) return " \n".join(sorted(result)) ================================================ FILE: src/vulnxscan/osv.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Demonstrate querying OSV db for vulnerabilities based on cdx SBOM""" import argparse import os import pathlib from common.cli_args import add_verbose_argument, add_version_argument from common.df import df_to_csv_file from common.errors import InvalidSbomError, SbomnixError from common.log import LOG, set_log_verbosity from vulnxscan.osv_client import OSV ############################################################################### def getargs(args=None): """Parse command line arguments""" desc = "Scan CycloneDX SBOM components for OSV vulnerabilities" epil = f"Example: ./{os.path.basename(__file__)} /path/to/sbom.json" parser = argparse.ArgumentParser(description=desc, epilog=epil) add_verbose_argument(parser) helps = "Path to CycloneDX SBOM json file" parser.add_argument("SBOM", help=helps, type=pathlib.Path) helps = "Path to output file (default: ./osv.csv)" parser.add_argument("-o", "--out", nargs="?", help=helps, default="osv.csv") helps = ( 'List of ecosystems to query (default: "GIT,OSS-Fuzz"). ' "For more details, see https://osv.dev" ) parser.add_argument("--ecosystems", type=str, help=helps, default="GIT,OSS-Fuzz") add_version_argument(parser) return parser.parse_args(args) def _run(args): if not args.SBOM.exists(): raise InvalidSbomError(args.SBOM) osv = OSV() ecosystems = [str(x).strip() for x in args.ecosystems.split(",")] osv.query_vulns(args.SBOM.as_posix(), ecosystems) df_vulns = osv.to_dataframe() df_to_csv_file(df_vulns, args.out) def main(): """main entry point""" args = getargs() set_log_verbosity(args.verbose) try: _run(args) except SbomnixError as error: LOG.fatal("%s", error) raise SystemExit(1) from error ################################################################################ if __name__ == "__main__": main() ################################################################################ ================================================ FILE: src/vulnxscan/osv_client.py ================================================ # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Reusable OSV querying helpers.""" import json import pandas as pd from common import columns as cols from common.http import create_cached_limited_session from common.log import LOG, LOG_SPAM OSV_CACHE_SECONDS = 6 * 60 * 60 OSV_QUERY_URL = "https://api.osv.dev/v1/querybatch" OSV_REQUEST_TIMEOUT = 60 OSV_USER_AGENT = "sbomnix-osv/0 (https://github.com/tiiuae/sbomnix/)" def create_osv_session(): """Return a retrying HTTP session for OSV requests.""" return create_cached_limited_session( per_second=1, expire_after=OSV_CACHE_SECONDS, user_agent=OSV_USER_AGENT, allowed_methods=frozenset(("GET", "HEAD", "POST")), ) class OSV: """Query and parse OSV vulnerability data.""" def __init__(self, session=None, request_timeout=OSV_REQUEST_TIMEOUT): self.session = create_osv_session() if session is None else session self.request_timeout = request_timeout self.vulns_dict = {} def _parse_vulns(self, package, vulns): setcol = self.vulns_dict.setdefault for vuln in vulns["vulns"]: setcol(cols.VULN_ID, []).append(vuln["id"]) setcol(cols.MODIFIED, []).append(vuln["modified"]) setcol(cols.PACKAGE, []).append(package["package"]["name"]) setcol(cols.VERSION, []).append(package["version"]) def _parse_batch_response(self, query, results): # Preserve the previous tolerant behavior if the API returns fewer results. for package, vulns in zip(query["queries"], results, strict=False): if not package or not vulns: continue LOG.debug("package: %s", package) LOG.debug("vulns: %s", vulns) if "vulns" not in vulns: continue self._parse_vulns(package, vulns) def _post_batch_query(self, query): LOG.log(LOG_SPAM, "query: %s", query) LOG.log(LOG_SPAM, "sending request to '%s'", OSV_QUERY_URL) resp = self.session.post( OSV_QUERY_URL, json=query, timeout=self.request_timeout, ) LOG.debug("resp.status_code: %s", resp.status_code) LOG.log(LOG_SPAM, "resp.json: %s", resp.json()) resp.raise_for_status() payload = resp.json() self._parse_batch_response(query, payload.get("results", [])) def _parse_sbom(self, path): LOG.debug("Parsing sbom: %s", path) with open(path, encoding="utf-8") as inf: json_dict = json.loads(inf.read()) components = json_dict["components"] + [json_dict["metadata"]["component"]] components_dict = {} setcol = components_dict.setdefault for component in components: setcol(cols.NAME, []).append(component["name"]) setcol(cols.VERSION, []).append(component["version"]) df_components = pd.DataFrame(components_dict) df_components.fillna("", inplace=True) df_components = df_components.astype(str) df_components.sort_values( cols.NAME, inplace=True, key=lambda col: col.str.lower(), ) df_components.reset_index(drop=True, inplace=True) return df_components def query_vulns(self, sbom_path, ecosystems=None): """Query each package in an SBOM for OSV vulnerabilities.""" LOG.verbose("Querying vulnerabilities") df_sbom = self._parse_sbom(sbom_path) max_queries = 1000 batchquery = {"queries": []} if ecosystems is None: ecosystems = ["GIT", "OSS-Fuzz"] for component in df_sbom.to_dict("records"): name = component[cols.NAME] version = component.get(cols.VERSION, "") if not version: LOG.debug("skipping osv query (unknown version): %s", name) continue for ecosystem in ecosystems: query = { "version": version, "package": { "name": name, "ecosystem": ecosystem, }, } batchquery["queries"].append(query) if len(batchquery["queries"]) >= max_queries: self._post_batch_query(batchquery) batchquery["queries"] = [] if batchquery["queries"]: self._post_batch_query(batchquery) def to_dataframe(self): """Return found vulnerabilities as a pandas dataframe.""" return pd.DataFrame.from_dict(self.vulns_dict) ================================================ FILE: src/vulnxscan/parsers.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Parsing helpers for scanner output formats.""" import json import numpy as np import pandas as pd from common import columns as cols from common.log import LOG, LOG_SPAM def _severity_from_cache(cvss_cache, vuln_id): if cvss_cache is None: return "" return cvss_cache.get(vuln_id, "") def parse_vulnix_json(json_str, *, cvss_cache=None, log=LOG): """Parse vulnix JSON output into a normalized dataframe.""" vulnerable_packages = json.loads(json_str) vulnix_vulns_dict = {} setcol = vulnix_vulns_dict.setdefault for package in vulnerable_packages: cvss = package["cvssv3_basescore"] for cve in package["affected_by"]: severity = _severity_from_cache(cvss_cache, cve) if not severity and cve in cvss: severity = cvss[cve] if cvss_cache is not None: cvss_cache[cve] = severity setcol(cols.PACKAGE, []).append(package["pname"]) setcol(cols.VERSION, []).append(package["version"]) setcol(cols.VULN_ID, []).append(cve) setcol(cols.SEVERITY, []).append(severity) setcol(cols.SCANNER, []).append("vulnix") df_vulnix = pd.DataFrame.from_dict(vulnix_vulns_dict) if not df_vulnix.empty: log.debug("Vulnix found vulnerabilities") df_vulnix.replace(np.nan, "", regex=True, inplace=True) df_vulnix.drop_duplicates(keep="first", inplace=True) return df_vulnix def parse_grype_json(json_str, *, cvss_cache=None, log=LOG, log_spam=LOG_SPAM): """Parse grype JSON output into a normalized dataframe.""" vulnerabilities = json.loads(json_str) log.log(log_spam, json.dumps(vulnerabilities, indent=2)) grype_vulns_dict = {} setcol = grype_vulns_dict.setdefault for vuln in vulnerabilities["matches"]: if not vuln["artifact"]["version"]: log.log( log_spam, "'%s' missing version information: skipping", vuln["artifact"]["name"], ) continue vid = vuln["vulnerability"]["id"] severity = _severity_from_cache(cvss_cache, vid) if not severity and vuln["vulnerability"]["cvss"]: for cvss in vuln["vulnerability"]["cvss"]: if float(cvss["version"]) >= 3: log.log(log_spam, "selected cvss: %s", cvss) severity = cvss["metrics"]["baseScore"] if cvss_cache is not None: cvss_cache[vid] = severity break setcol(cols.PACKAGE, []).append(vuln["artifact"]["name"]) setcol(cols.VERSION, []).append(vuln["artifact"]["version"]) setcol(cols.VULN_ID, []).append(vuln["vulnerability"]["id"]) setcol(cols.SEVERITY, []).append(severity) setcol(cols.SCANNER, []).append("grype") df_grype = pd.DataFrame.from_dict(grype_vulns_dict) if not df_grype.empty: log.debug("Grype found vulnerabilities") df_grype.replace(np.nan, "", regex=True, inplace=True) df_grype.drop_duplicates(keep="first", inplace=True) return df_grype def normalize_osv_dataframe(df_osv, *, cvss_cache=None, log=LOG, log_spam=LOG_SPAM): """Normalize OSV query results into vulnxscan's dataframe shape.""" if df_osv is None: return pd.DataFrame() df_osv = df_osv.copy(deep=True) if not df_osv.empty: df_osv[cols.SCANNER] = "osv" df_osv.replace(np.nan, "", regex=True, inplace=True) df_osv.drop_duplicates(keep="first", inplace=True) df_osv[cols.MODIFIED] = pd.to_datetime( df_osv[cols.MODIFIED], format="%Y-%m-%d", exact=False, ) df_osv[cols.SEVERITY] = df_osv[cols.VULN_ID].apply( lambda vuln_id: _severity_from_cache(cvss_cache, vuln_id) ) log.log(log_spam, "osv data:\n%s", df_osv.to_markdown()) log.debug("OSV scan found vulnerabilities") return df_osv ================================================ FILE: src/vulnxscan/repology_lookup.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Repology-backed lookup helpers for vulnerability triage.""" from pathlib import Path import pandas as pd from common import columns as cols from common.df import df_log from common.log import LOG, LOG_SPAM from common.package_names import nix_to_repology_pkg_name from common.versioning import version_distance from repology.adapter import RepologyAdapter, RepologyQuery from repology.exceptions import RepologyNoMatchingPackages from repology.repology_cve import query_cve def select_newest(df): """Return the newest rows per package.""" selected = [] for pkg_name in df[cols.PACKAGE].unique(): df_pkg = df[df[cols.PACKAGE] == str(pkg_name)] df_newest = df_pkg[df_pkg[cols.STATUS] == "newest"] if df_newest.empty: df_newest = df_pkg.sort_values(by=[cols.VERSION]).iloc[[-1]] selected.append(df_newest) if not selected: return pd.DataFrame() return pd.concat(selected, ignore_index=True) def _add_triage_item(out_dict, vuln, whitelist_cols, df_repo=None): if df_repo is None: out_dict.setdefault(cols.VULN_ID, []).append(vuln.vuln_id) out_dict.setdefault(cols.URL, []).append(vuln.url) out_dict.setdefault(cols.PACKAGE, []).append(vuln.package) out_dict.setdefault(cols.SEVERITY, []).append(vuln.severity) out_dict.setdefault(cols.VERSION_LOCAL, []).append(vuln.version) out_dict.setdefault(cols.VERSION_NIXPKGS, []).append("") out_dict.setdefault(cols.VERSION_UPSTREAM, []).append("") out_dict.setdefault(cols.PACKAGE_REPOLOGY, []).append("") out_dict.setdefault(cols.SORTCOL, []).append(vuln.sortcol) if whitelist_cols: out_dict.setdefault(cols.WHITELIST, []).append(vuln.whitelist) out_dict.setdefault(cols.WHITELIST_COMMENT, []).append( vuln.whitelist_comment ) return for item in df_repo.itertuples(): out_dict.setdefault(cols.VULN_ID, []).append(vuln.vuln_id) out_dict.setdefault(cols.URL, []).append(vuln.url) out_dict.setdefault(cols.PACKAGE, []).append(vuln.package) out_dict.setdefault(cols.SEVERITY, []).append(vuln.severity) out_dict.setdefault(cols.VERSION_LOCAL, []).append(vuln.version) out_dict.setdefault(cols.VERSION_NIXPKGS, []).append(item.version) if item.newest_upstream_release and ";" in item.newest_upstream_release: version_upstream_str = item.newest_upstream_release.split(";")[0] else: version_upstream_str = item.newest_upstream_release out_dict.setdefault(cols.VERSION_UPSTREAM, []).append(version_upstream_str) out_dict.setdefault(cols.PACKAGE_REPOLOGY, []).append(item.package) out_dict.setdefault(cols.SORTCOL, []).append(vuln.sortcol) if whitelist_cols: out_dict.setdefault(cols.WHITELIST, []).append(vuln.whitelist) out_dict.setdefault(cols.WHITELIST_COMMENT, []).append( vuln.whitelist_comment ) def _version_similarity(row): ratio = version_distance(row.version, row.version_cmp) LOG.log( LOG_SPAM, "Version similarity ('%s' vs '%s' ==> %s)", row.version, row.version_cmp, ratio, ) return ratio class RepologyVulnerabilityLookup: """Cache and query Repology/CVE data used by triage.""" def __init__(self, adapter=None, cve_query=None): self.adapter = RepologyAdapter() if adapter is None else adapter self.cve_query = query_cve if cve_query is None else cve_query self._repology_cve_dfs = {} self._repology_dfs = {} def is_vulnerable(self, repo_pkg_name, pkg_version, cve_id=None): """ Return true if given pkg version is vulnerable. If ``cve_id`` is specified, return true only if pkg is affected by the given cve id. """ LOG.debug("Finding vulnerability status for %s:%s", repo_pkg_name, pkg_version) key = f"{repo_pkg_name}:{pkg_version}" if key in self._repology_cve_dfs: LOG.log(LOG_SPAM, "Using cached repology_cve results") df = self._repology_cve_dfs[key] else: df = self.cve_query(str(repo_pkg_name), str(pkg_version)) if df is None: df = pd.DataFrame() df_log(df, LOG_SPAM) self._repology_cve_dfs[key] = df if cve_id and not df.empty: df = df[df["cve"] == cve_id] return not df.empty def query_repology(self, pname, match_type="pkg_exact"): """Return cached Repology results for a package name.""" LOG.log(LOG_SPAM, "Querying repology for '%s'", pname) cache_key = f"{match_type}:{pname}" if cache_key in self._repology_dfs: LOG.log(LOG_SPAM, "Using cached repology results") return self._repology_dfs[cache_key].copy(deep=True) if match_type == "pkg_search": query = RepologyQuery( repository="nix_unstable", pkg_search=pname, re_status="outdated|newest|devel|unique", ) elif match_type == "sbom_cdx": query = RepologyQuery( repository="nix_unstable", sbom_cdx=Path(pname), re_status="outdated|newest|devel|unique", ) elif match_type == "pkg_exact": query = RepologyQuery( repository="nix_unstable", pkg_exact=pname, re_status="outdated|newest|devel|unique", ) else: raise ValueError(f"Unknown match_type: {match_type!r}") try: df_repology = self.adapter.query(query) except RepologyNoMatchingPackages: df_repology = None if df_repology is None or df_repology.empty: LOG.debug("No results from repology") return None df_repology = select_newest(df_repology) self._repology_dfs[cache_key] = df_repology.copy(deep=True) df_log(df_repology, LOG_SPAM) return df_repology def query_repology_versions(self, df_vuln_pkgs): """Augment vulnerable package rows with Repology version data.""" LOG.verbose("Querying repology") result_dict = {} whitelist_cols = cols.WHITELIST in df_vuln_pkgs.columns for vuln in df_vuln_pkgs.itertuples(): if whitelist_cols and vuln.whitelist: LOG.log(LOG_SPAM, "Whitelisted, skipping repology query: %s", vuln) _add_triage_item(result_dict, vuln, whitelist_cols) continue repo_pkg = nix_to_repology_pkg_name(vuln.package) LOG.log(LOG_SPAM, "Package '%s' ==> '%s'", vuln.package, repo_pkg) df_repology = self.query_repology(repo_pkg) if df_repology is None or df_repology.empty: _add_triage_item(result_dict, vuln, whitelist_cols) continue if df_repology.shape[0] == 1: LOG.log(LOG_SPAM, "One repology package matches") _add_triage_item(result_dict, vuln, whitelist_cols, df_repology) continue df_exact = df_repology[df_repology[cols.VERSION] == vuln.version] if not df_exact.empty: LOG.log(LOG_SPAM, "Exact version match '%s'", vuln.version) _add_triage_item(result_dict, vuln, whitelist_cols, df_exact) continue df_repology = df_repology.copy(deep=True) df_repology[cols.VERSION_CMP] = vuln.version df_repology[cols.SIMILARITY] = df_repology.apply( _version_similarity, axis=1, ) df_similar = df_repology[df_repology[cols.SIMILARITY] >= 0.7] if not df_similar.empty: LOG.log(LOG_SPAM, "Version similarity match:\n%s", df_similar) best_match = df_similar[cols.SIMILARITY].max() df_similar = df_similar[df_similar[cols.SIMILARITY] == best_match] LOG.log( LOG_SPAM, "Selecting best match based on version:\n%s", df_similar, ) _add_triage_item(result_dict, vuln, whitelist_cols, df_similar) continue LOG.log(LOG_SPAM, "Vague match in repology pkg, adding vuln only") _add_triage_item(result_dict, vuln, whitelist_cols) df_result = pd.DataFrame(result_dict) df_result.fillna("", inplace=True) df_result.reset_index(drop=True, inplace=True) return df_result ================================================ FILE: src/vulnxscan/reporting.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Reporting helpers for vulnxscan findings.""" import pathlib from typing import cast import pandas as pd from tabulate import tabulate from common import columns as cols from common.df import df_from_csv_file, df_to_csv_file from common.log import LOG, LOG_VERBOSE from vulnxscan.utils import _is_patched, _reformat_scanner, _vuln_sortcol, _vuln_url from vulnxscan.whitelist import df_apply_whitelist, df_drop_whitelisted, load_whitelist def build_report_dataframe(df_vulnix, df_grype, df_osv, *, log=LOG): """Combine scanner findings into the final report dataframe.""" scanner_dfs = [df for df in [df_vulnix, df_grype, df_osv] if df is not None] if not scanner_dfs: log.debug("No scanners reported any findings") return pd.DataFrame() df = pd.concat(scanner_dfs, ignore_index=True) if df.empty: log.debug("No scanners reported any findings") return pd.DataFrame() if cols.MODIFIED not in df.columns: df[cols.MODIFIED] = pd.NaT df[cols.SORTCOL] = df.apply(_vuln_sortcol, axis=1) df[cols.COUNT] = 1 group_cols = [ cols.VULN_ID, cols.PACKAGE, cols.SEVERITY, cols.VERSION, cols.SORTCOL, ] df = df.pivot_table(index=group_cols, columns=cols.SCANNER, values=cols.COUNT) df.reset_index(drop=False, inplace=True) scanners = ["grype", "osv"] if df_vulnix is not None: scanners.append("vulnix") df.reindex(group_cols + scanners, axis=1) for scanner_col in scanners: if scanner_col not in df: df[scanner_col] = 0 df[cols.SUM] = df[scanners].sum(axis=1).astype(int) df["grype"] = df.apply(lambda row: _reformat_scanner(row.grype), axis=1) df["osv"] = df.apply(lambda row: _reformat_scanner(row.osv), axis=1) if "vulnix" in scanners: df["vulnix"] = df.apply(lambda row: _reformat_scanner(row.vulnix), axis=1) df[cols.URL] = df.apply(_vuln_url, axis=1) sort_cols = [cols.SORTCOL, cols.PACKAGE, cols.SEVERITY, cols.VERSION] df.sort_values(by=sort_cols, ascending=False, inplace=True) report_cols = ( [cols.VULN_ID, cols.URL, cols.PACKAGE, cols.VERSION, cols.SEVERITY] + scanners + [cols.SUM, cols.SORTCOL] ) return df[report_cols] def filter_patched_report(df_report, sbom_csv, *, log=LOG): """Filter out vulnerabilities that are marked as patched in the SBOM CSV.""" log.log(LOG_VERBOSE, "Filtering patched vulnerabilities") df_sbom_csv = df_from_csv_file(sbom_csv) df = pd.merge( left=df_report, right=df_sbom_csv, how="left", left_on=[cols.PACKAGE, cols.VERSION], right_on=[cols.PNAME, cols.VERSION], suffixes=("", "_sbom_csv"), ) df[cols.PATCHED] = df.apply(_is_patched, axis=1) df = df[~df[cols.PATCHED]] df = cast(pd.DataFrame, df[list(df_report.columns)]) return df.drop_duplicates(keep="first") def apply_whitelist_annotations(df_report, whitelist_csv): """Apply whitelist annotations in-place when a whitelist is provided.""" if whitelist_csv is None: return df_whitelist = load_whitelist(whitelist_csv) if df_whitelist is None: return df_apply_whitelist(df_whitelist, df_report) def render_console_report(df_report, *, df_triaged=None, log=LOG): """Render the console report for the final vulnerability dataframe.""" log.debug("") if df_triaged is not None: df = df_triaged.copy() if cols.PACKAGE_REPOLOGY in df: df = df.drop(cols.PACKAGE_REPOLOGY, axis=1) else: df = df_report.copy() df = df.drop(cols.SORTCOL, axis=1) df = df_drop_whitelisted(df) if df.empty: log.info("Whitelisted all vulnerabilities") return version_cols = [col for col in df.columns if "version" in col] for col in version_cols: df[col] = df[col].str.slice(0, 16) table = tabulate( df, headers="keys", tablefmt="orgtbl", numalign="center", showindex=False, ) log.info( "Console report\n\n" "Potential vulnerabilities impacting version_local: " "\n\n%s\n\n", table, ) def write_reports(df_report, out_path, *, df_triaged=None): """Write the main CSV report and optional triage report.""" out_path = pathlib.Path(out_path) df_to_csv_file(df_report, out_path.resolve().as_posix()) if df_triaged is not None: parents = out_path.parents[0].resolve().as_posix() triage_out = f"{parents}/{out_path.stem}.triage{out_path.suffix}" df_to_csv_file(df_triaged, triage_out) ================================================ FILE: src/vulnxscan/scanners.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Scanner command execution helpers for vulnxscan.""" from common.log import LOG, LOG_VERBOSE from common.proc import exec_cmd from vulnxscan.osv_client import OSV def run_vulnix_scan(target_path, buildtime=False, *, exec_cmd_fn=exec_cmd, log=LOG): """Run vulnix and return its process result.""" log.log(LOG_VERBOSE, "Running vulnix scan") extra_opts = ["-C", "--json"] if buildtime: extra_opts = ["--json"] cmd = ["vulnix", target_path, *extra_opts] return exec_cmd_fn( cmd, raise_on_error=False, return_error=True, log_error=False, ) def run_grype_scan(sbom_path, *, exec_cmd_fn=exec_cmd, log=LOG): """Run grype against the given CycloneDX SBOM path.""" log.log(LOG_VERBOSE, "Running grype scan") cmd = ["grype", f"sbom:{sbom_path}", "--add-cpes-if-none", "--output", "json"] return exec_cmd_fn(cmd) def run_osv_scan(sbom_path, *, osv_factory=OSV, log=LOG): """Run OSV queries for the given CycloneDX SBOM path.""" log.log(LOG_VERBOSE, "Running OSV scan") osv = osv_factory() osv.query_vulns(sbom_path) return osv.to_dataframe() ================================================ FILE: src/vulnxscan/triage.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Vulnerability triage helpers.""" from common import columns as cols from common.df import df_log from common.log import LOG, LOG_SPAM from common.versioning import parse_version from vulnxscan.github_prs import GitHubPrLookup from vulnxscan.repology_lookup import RepologyVulnerabilityLookup _DEFAULT_REPOLOGY_LOOKUP = None _DEFAULT_GITHUB_PR_LOOKUP = None def _get_default_repology_lookup(): global _DEFAULT_REPOLOGY_LOOKUP # noqa: PLW0603 if _DEFAULT_REPOLOGY_LOOKUP is None: _DEFAULT_REPOLOGY_LOOKUP = RepologyVulnerabilityLookup() return _DEFAULT_REPOLOGY_LOOKUP def _get_default_github_lookup(): global _DEFAULT_GITHUB_PR_LOOKUP # noqa: PLW0603 if _DEFAULT_GITHUB_PR_LOOKUP is None: _DEFAULT_GITHUB_PR_LOOKUP = GitHubPrLookup() return _DEFAULT_GITHUB_PR_LOOKUP def classify_vulnerability(row, repology_lookup=None): # noqa: PLR0911 """Classify a vulnerable package row using Repology/CVE data.""" repology_lookup = ( _get_default_repology_lookup() if repology_lookup is None else repology_lookup ) if not row.version_nixpkgs and not row.version_upstream: return "err_missing_repology_version" if row.version_local and not repology_lookup.is_vulnerable( row.package_repology, row.version_local, row.vuln_id ): return "err_not_vulnerable_based_on_repology" version_local = parse_version(row.version_local) version_nixpkgs = parse_version(row.version_nixpkgs) if not version_local or not version_nixpkgs: return "err_invalid_version" if row.version_nixpkgs and version_local < version_nixpkgs: if not repology_lookup.is_vulnerable( row.package_repology, row.version_nixpkgs, row.vuln_id ): return "fix_update_to_version_nixpkgs" version_upstream = parse_version(row.version_upstream) if not version_upstream: return "err_invalid_version" if row.version_upstream and version_local < version_upstream: if not repology_lookup.is_vulnerable( row.package_repology, version_upstream, row.vuln_id ): return "fix_update_to_version_upstream" return "fix_not_available" def triage_vulnerabilities( df_report, search_nix_prs, repology_lookup=None, github_lookup=None, ): """Enrich and classify a vulnerability report.""" repology_lookup = ( _get_default_repology_lookup() if repology_lookup is None else repology_lookup ) github_lookup = ( _get_default_github_lookup() if github_lookup is None else github_lookup ) LOG.debug("") df = df_report.copy() uids = [ cols.VULN_ID, cols.PACKAGE, cols.SEVERITY, cols.VERSION, cols.URL, cols.SORTCOL, ] if cols.WHITELIST in df.columns: uids.append(cols.WHITELIST) uids.append(cols.WHITELIST_COMMENT) df_vuln_pkgs = df.groupby(by=uids).size().reset_index(name=cols.COUNT) LOG.debug("Number of vulnerable packages: %s", df_vuln_pkgs.shape[0]) if df_vuln_pkgs.empty: return df_vuln_pkgs df_log(df_vuln_pkgs, LOG_SPAM) df_vuln_pkgs = repology_lookup.query_repology_versions(df_vuln_pkgs) LOG.debug("Vulnerable pkgs with repology version info: %s", df_vuln_pkgs.shape[0]) df_log(df_vuln_pkgs, LOG_SPAM) df_vuln_pkgs[cols.CLASSIFY] = df_vuln_pkgs.apply( lambda row: classify_vulnerability(row, repology_lookup=repology_lookup), axis=1, ) if search_nix_prs: LOG.verbose("Querying nixpkgs github PRs") df_vuln_pkgs[cols.NIXPKGS_PR] = df_vuln_pkgs.apply( github_lookup.find_nixpkgs_prs, axis=1, ) sort_cols = [cols.SORTCOL, cols.PACKAGE, cols.SEVERITY, cols.VERSION_LOCAL] df_vuln_pkgs.sort_values(by=sort_cols, ascending=False, inplace=True) return df_vuln_pkgs ================================================ FILE: src/vulnxscan/utils.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Shared report and file helpers for vulnxscan.""" import json import re import pandas as pd from common.log import LOG ################################################################################ def _reformat_scanner(val): if val and not pd.isnull(val): return "1" return "0" def _vuln_sortcol(row): # Return a string that should make the vulns we want to see high # on the report list to bubble up when sorted in ascending order based # on the returned string match = re.match(r".*[A-Za-z][-_]([1-2][0-9]{3})[-_]([0-9]+).*", row.vuln_id) if match: year = match.group(1) number = str(match.group(2)).zfill(10) return f"{year}A{number}" if row.modified and not pd.isnull(row.modified): return f"{row.modified.year}A{int(row.modified.timestamp())}" return str(row.vuln_id) def _vuln_url(row): osv_url = "https://osv.dev/" nvd_url = "https://nvd.nist.gov/vuln/detail/" if row.vuln_id.lower().startswith("cve"): return f"{nvd_url}{row.vuln_id}" if getattr(row, "osv", False) or ("osv" in getattr(row, "scanner", [])): return f"{osv_url}{row.vuln_id}" return "" def _vuln_source(row): if row.vuln_id.lower().startswith("cve"): return "NVD" if getattr(row, "osv", False) or ("osv" in getattr(row, "scanner", [])): return "OSV" return "" def _is_patched(row): if row.vuln_id and str(row.vuln_id).lower() in str(row.patches).lower(): patches = row.patches.split() patch = [p for p in patches if str(row.vuln_id).lower() in str(p).lower()] LOG.info("%s for '%s' is patched with: %s", row.vuln_id, row.package, patch) return True return False def _is_json(path): try: with open(path, encoding="utf-8") as f: json_obj = json.load(f) if json_obj: return True return False except (json.JSONDecodeError, OSError, UnicodeError): return False ================================================ FILE: src/vulnxscan/vulnscan.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """ VulnScan abstracts over querying and collecting vulnerability information from grype, vulnix, and osv databases """ import pandas as pd from common import columns as cols from common.df import df_to_csv_file from common.log import LOG, LOG_SPAM, is_debug_enabled from common.proc import exec_cmd from vulnxscan import parsers as vulnxscan_parsers from vulnxscan import reporting as vulnxscan_reporting from vulnxscan import scanners as vulnxscan_scanners from vulnxscan.triage import triage_vulnerabilities from vulnxscan.utils import _vuln_sortcol class VulnScan: """Run vulnerability scans, generate reports""" def __init__(self): self.df_vulnix = None self.df_grype = None self.df_osv = None self.df_report = None self.df_triaged = None # Key:vuln_id, value:severity self.cvss = {} def _parse_vulnix(self, json_str): self.df_vulnix = vulnxscan_parsers.parse_vulnix_json( json_str, cvss_cache=self.cvss, log=LOG, ) if not self.df_vulnix.empty: if is_debug_enabled(): df_to_csv_file(self.df_vulnix, "df_vulnix.csv") def scan_vulnix(self, target_path, buildtime=False): """Run vulnix scan for nix artifact at target_path""" self.df_vulnix = pd.DataFrame() ret = vulnxscan_scanners.run_vulnix_scan( target_path, buildtime=buildtime, exec_cmd_fn=exec_cmd, log=LOG, ) if ret and hasattr(ret, "stderr") and ret.stderr: LOG.warning(ret) LOG.warning(ret.stderr) self.df_vulnix = None if ret and hasattr(ret, "stdout") and ret.stdout: self._parse_vulnix(ret.stdout) def _parse_grype(self, json_str): self.df_grype = vulnxscan_parsers.parse_grype_json( json_str, cvss_cache=self.cvss, log=LOG, log_spam=LOG_SPAM, ) if not self.df_grype.empty: if is_debug_enabled(): df_to_csv_file(self.df_grype, "df_grype.csv") def scan_grype(self, sbom_path): """Run grype scan using the SBOM at sbom_path as input""" ret = vulnxscan_scanners.run_grype_scan( sbom_path, exec_cmd_fn=exec_cmd, log=LOG, ) if ret.stdout: self._parse_grype(ret.stdout) def _parse_osv(self, df_osv): self.df_osv = vulnxscan_parsers.normalize_osv_dataframe( df_osv, cvss_cache=self.cvss, log=LOG, log_spam=LOG_SPAM, ) if not self.df_osv.empty: if is_debug_enabled(): df_to_csv_file(self.df_osv, "df_osv.csv") def scan_osv(self, sbom_path): """Run osv scan using the SBOM at sbom_path as input""" df_osv = vulnxscan_scanners.run_osv_scan(sbom_path, log=LOG) self._parse_osv(df_osv) def _generate_report(self): self.df_report = vulnxscan_reporting.build_report_dataframe( self.df_vulnix, self.df_grype, self.df_osv, log=LOG, ) if self.df_report.empty: self.df_report = None return if is_debug_enabled(): df_report_raw = pd.concat( [ df for df in [self.df_vulnix, self.df_grype, self.df_osv] if df is not None ], ignore_index=True, ) if not df_report_raw.empty: df_report_raw[cols.SORTCOL] = df_report_raw.apply( _vuln_sortcol, axis=1, ) df_to_csv_file(df_report_raw, "df_report_raw.csv") def _filter_patched(self, sbom_csv): self.df_report = vulnxscan_reporting.filter_patched_report( self.df_report, sbom_csv, log=LOG, ) def _apply_whitelist(self, whitelist_csv): vulnxscan_reporting.apply_whitelist_annotations(self.df_report, whitelist_csv) def _console_report(self): vulnxscan_reporting.render_console_report( self.df_report, df_triaged=self.df_triaged, log=LOG, ) def report(self, args, sbom_csv): """Generate the vulnerability reports: csv file and a table to console""" self._generate_report() if self.df_report is None or self.df_report.empty: LOG.info("No vulnerabilities found") return if sbom_csv: self._filter_patched(sbom_csv) if args.whitelist: LOG.verbose("Applying whitelist '%s'", args.whitelist) self._apply_whitelist(args.whitelist) if args.triage: LOG.verbose("Running vulnerability triage") self.df_triaged = triage_vulnerabilities(self.df_report, args.nixprs) # Rename 'version' to 'version_local' self.df_report.columns = [ cols.VERSION_LOCAL if col == cols.VERSION else col for col in self.df_report.columns ] LOG.debug("Writing reports") # Console report self._console_report() # File report vulnxscan_reporting.write_reports( self.df_report, args.out, df_triaged=self.df_triaged if args.triage else None, ) ================================================ FILE: src/vulnxscan/vulnxscan_cli.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """ Scan nix artifact or CycloneDX SBOM for vulnerabilities with various open-source vulnerability scanners. """ import argparse import logging import pathlib from common.cli_args import add_verbose_argument, add_version_argument from common.errors import InvalidSbomError, SbomnixError from common.log import LOG, set_log_verbosity from common.proc import exit_unless_command_exists from sbomnix.cli_utils import generate_temp_sbom, resolve_nix_target from vulnxscan.utils import _is_json from vulnxscan.vulnscan import VulnScan ############################################################################### def getargs(args=None): """Parse command line arguments""" desc = ( "Scan nix artifact or CycloneDX SBOM for vulnerabilities with " "various open-source vulnerability scanners." ) epil = "Example: ./vulnxscan.py /path/to/nix/out/or/drv/or/flakeref" parser = argparse.ArgumentParser(description=desc, epilog=epil) helps = ( "Target nix store path (e.g. derivation file or nix output path) or flakeref" ) parser.add_argument("TARGET", help=helps, type=str) add_verbose_argument(parser) helps = "Path to output file (default: ./vulns.csv)" parser.add_argument("-o", "--out", nargs="?", help=helps, default="vulns.csv") helps = ( "Scan target buildtime instead of runtime dependencies. This option " "has no impact if the scan target is SBOM (ref: --sbom)." ) parser.add_argument("--buildtime", help=helps, action="store_true") helps = ( "Indicate that TARGET is a cdx SBOM instead of path to nix artifact. " "This allows running vulnxscan using input SBOMs from any tool " "capable of generating cdx SBOM. This option makes it possible to run " "vulnxscan postmortem against any (potentially earlier) release of " "the TARGET. " "Moreover, this option allows using vulnxscan against non-nix targets " "as long as SBOM includes valid CPE identifiers and purls. " "If this option is specified, vulnix scan will not run, since vulnix " "is nix-only and requires components' nix store paths. " "Also, if this option is specified, option '--buildtime' will be " "ignored since target packages will be read from the given SBOM." ) parser.add_argument("--sbom", help=helps, action="store_true") helps = ( "Path to whitelist file. Vulnerabilities that match any whitelisted " "entries will not be included to the console output and are annotated " "accordingly in the output csv. See more details in the vulnxscan " "README.md." ) parser.add_argument("--whitelist", help=helps, type=pathlib.Path) helps = ( "Add more information to vulnxscan output by querying " "repology.org for available package versions in nix-unstable and " "package upstream. This option is intended to help manual analysis. " "Output is written to a separate OUT file with 'triage' infix, " "by default: 'vulns.triage.csv'." ) parser.add_argument("--triage", help=helps, action="store_true") triagegr = parser.add_argument_group("Other arguments") helps = ( "Search nixpkgs github for PRs that might include more information " "concerning possible nixpkgs fixes for the found vulnerabilities. " "This option adds URLs to (at most five) PRs that appear valid " "for each vulnerability based on heuristic. " "The PR search takes significant " "time due to github API rate limits, which is why this feature is " "not enabled by default. This option has no impact unless '--triage' " "is also specified." ) triagegr.add_argument("--nixprs", help=helps, action="store_true") add_version_argument(parser) return parser.parse_args(args) ################################################################################ def main(): """main entry point""" args = getargs() set_log_verbosity(args.verbose) try: _run(args) except SbomnixError as error: LOG.fatal("%s", error) raise SystemExit(1) from error def _run(args): # Fail early if following commands are not in path exit_unless_command_exists("grype") exit_unless_command_exists("vulnix") scanner = VulnScan() sbom_artifact = None if args.sbom: target_path = pathlib.Path(args.TARGET).resolve().as_posix() if not _is_json(target_path): raise InvalidSbomError(args.TARGET) sbom_cdx_path = target_path sbom_csv_path = None else: target = resolve_nix_target(args.TARGET, buildtime=args.buildtime) target_path = target.path sbom_artifact = generate_temp_sbom( target_path, args.buildtime, prefix="vulnxscan_", cdx_suffix=".json", include_csv=True, ) sbom_cdx_path = sbom_artifact.cdx_path sbom_csv_path = sbom_artifact.csv_path LOG.debug("Using cdx SBOM '%s'", sbom_cdx_path) LOG.debug("Using csv SBOM '%s'", sbom_csv_path) try: if not args.sbom: scanner.scan_vulnix(target_path, args.buildtime) scanner.scan_grype(sbom_cdx_path) scanner.scan_osv(sbom_cdx_path) scanner.report(args, sbom_csv_path) finally: if ( not args.sbom and not LOG.isEnabledFor(logging.DEBUG) and sbom_artifact is not None ): # Remove generated temp files unless verbosity is DEBUG or more verbose sbom_artifact.cleanup() if __name__ == "__main__": main() ################################################################################ ================================================ FILE: src/vulnxscan/whitelist.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """ Utility functions when dealing with whitelists """ ################################################################################ # Whitelist from common import columns as cols from common.df import df_from_csv_file, df_log from common.errors import WhitelistApplicationError from common.log import LOG, LOG_SPAM def load_whitelist(whitelist_csv_path): """ Load vulnerability whitelist from the given path. Returns None if the whitelist is not a valid vulnerability whitelist. Otherwise returns whitelist_csv_path as dataframe. """ df = df_from_csv_file(whitelist_csv_path, exit_on_error=False) if df is None: return None # Whitelist must have the following columns if not set([cols.VULN_ID, cols.COMMENT]).issubset(df.columns): LOG.warning("Whitelist csv missing required columns") return None if cols.WHITELIST in df.columns: # Interpret possible string values in "whitelist" column # to boolean as follows: df[cols.WHITELIST] = df[cols.WHITELIST].replace({"": True}) df[cols.WHITELIST] = ( df[cols.WHITELIST].astype(str).replace({"False": False, "0": False}) ) df[cols.WHITELIST] = df[cols.WHITELIST].astype("bool") return df def df_apply_whitelist(df_whitelist, df_vulns): """ Apply df_whitelist to vulnerabilities in df_vulns, changing df_vulns in-place. Adds columns "whitelist" and "whitelist_comment" to df_vulns based on whitelisting regular expressions in column df_whitelist["vuln_id"]. If df_whitelist["package"] exists and is not empty, require strict match in df_whitelist["package"] and df_vulns["package"]. If df_whitelist["whitelist"] exists and is False, do *not* whitelist the entry even if the rule matches, but only apply the column "whitelist_comment" to matching entries. """ # Add default values to whitelist columns df_vulns[cols.WHITELIST] = False df_vulns[cols.WHITELIST_COMMENT] = "" if cols.VULN_ID not in df_vulns: raise WhitelistApplicationError("Missing 'vuln_id' column from df_vulns") if cols.VULN_ID not in df_whitelist: LOG.warning("Whitelist ignored: missing 'vuln_id' column from whitelist") return check_pkg_name = False if cols.PACKAGE in df_whitelist.columns and cols.PACKAGE in df_vulns.columns: check_pkg_name = True check_whitelist = False if cols.WHITELIST in df_whitelist.columns: check_whitelist = True # Iterate rows in df_whitelist in reverse order so the whitelist rules # on top of the file get higher priority df_whitelist_rev = df_whitelist[::-1] for whitelist_entry in df_whitelist_rev.itertuples(): LOG.log(LOG_SPAM, "whitelist_entry: %s", whitelist_entry) regex = str(whitelist_entry.vuln_id).strip() LOG.log(LOG_SPAM, "whitelist regex: %s", regex) df_matches = df_vulns[cols.VULN_ID].str.fullmatch(regex) if check_pkg_name and whitelist_entry.package: LOG.log(LOG_SPAM, "filtering by package name: %s", whitelist_entry.package) df_matches = df_matches & ( df_vulns[cols.PACKAGE] == whitelist_entry.package ) df_vulns.loc[df_matches, cols.WHITELIST] = True if check_whitelist: LOG.log(LOG_SPAM, "entry[whitelist]=%s", bool(whitelist_entry.whitelist)) df_vulns.loc[df_matches, cols.WHITELIST] = bool(whitelist_entry.whitelist) df_vulns.loc[df_matches, cols.WHITELIST_COMMENT] = whitelist_entry.comment LOG.log(LOG_SPAM, "matches %s vulns", len(df_vulns[df_matches])) df_log(df_vulns[df_matches], LOG_SPAM) def df_drop_whitelisted(df): """ Drop whitelisted vulnerabilities from `df` as well as the related columns. """ if cols.WHITELIST in df.columns: # Convert possible string to boolean df = df[~df[cols.WHITELIST]] df = df.drop(cols.WHITELIST, axis=1) if cols.WHITELIST_COMMENT in df.columns: df = df.drop(cols.WHITELIST_COMMENT, axis=1) return df ================================================ FILE: tests/__init__.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 ================================================ FILE: tests/compare_deps.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Python script that compares dependencies between sbomnix and nixgraph""" import argparse import json import os import pathlib import sys import pandas as pd from common.cli_args import add_verbose_argument from common.df import df_from_csv_file, df_to_csv_file from common.log import LOG, LOG_SPAM, is_debug_enabled, set_log_verbosity from common.regex import regex_match ############################################################################### def getargs(): """Parse command line arguments""" desc = "Compare nixgraph and sbomnix output to cross-validate" epil = ( f"Example: ./{os.path.basename(__file__)} " "--sbom /path/to/sbom.json --graph /path/to/graph.csv" ) parser = argparse.ArgumentParser(description=desc, epilog=epil) add_verbose_argument(parser) helps = "Path to sbom in csv format" parser.add_argument("--sbom", help=helps, type=pathlib.Path, required=True) helps = "Path to graph in csv format" parser.add_argument("--graph", help=helps, type=pathlib.Path, required=True) return parser.parse_args() ################################################################################ def _parse_sbom(path): LOG.info("Loading sbom data from '%s'", path) with path.open(encoding="utf-8") as inf: json_dict = json.loads(inf.read()) # Parse sbom type sbom_type = "" for prop_dict in json_dict["metadata"]["properties"]: if "sbom_type" in prop_dict["name"]: sbom_type = prop_dict["value"] if not sbom_type: LOG.fatal("Failed to find sbom_type") sys.exit(1) LOG.debug(sbom_type) # Parse components components = json_dict["components"] + [json_dict["metadata"]["component"]] comp_parsed_dict = {} setcol = comp_parsed_dict.setdefault for cmp in components: # setcol("bom_ref", []).append(cmp["bom-ref"]) outpaths = [] for prop_dict in cmp["properties"]: if "output_path" in prop_dict["name"]: outpaths.append(prop_dict["value"]) elif "drv_path" in prop_dict["name"]: setcol("drv_path", []).append(prop_dict["value"]) setcol("output_path", []).append(outpaths) df_components = pd.DataFrame(comp_parsed_dict) # Parse dependencies deps = json_dict["dependencies"] deps_parsed_dict = {} setcol = deps_parsed_dict.setdefault for dep in deps: if "dependsOn" not in dep: setcol("ref", []).append(dep["ref"]) setcol("depends_on", []).append("") continue for dependson in dep["dependsOn"]: setcol("ref", []).append(dep["ref"]) setcol("depends_on", []).append(dependson) df_dependencies = pd.DataFrame(deps_parsed_dict) # Join df_components with df_dependencies df_parsed = df_components.merge( df_dependencies, how="outer", left_on=["drv_path"], right_on=["ref"], ) df_parsed.fillna("", inplace=True) if is_debug_enabled(): df_to_csv_file(df_parsed, "df_sbom_parsed.csv") return df_parsed, sbom_type def _parse_graph(path): LOG.info("Loading graph data from '%s'", path) df_graph = df_from_csv_file(path) df_graph.fillna("", inplace=True) df_graph = df_graph.astype(str) src_path = df_graph["src_path"].iloc[0] graph_type = "buildtime" if regex_match(r".*\.[a-z]+$", src_path) else "runtime" return df_graph, graph_type def _filter_set(re_filter_out_list, target_set): matching_set = set() for target in target_set: for regex in re_filter_out_list: if regex_match(regex, target): matching_set.add(target) break return target_set - matching_set ################################################################################ def sbom_internal_checks(df_sbom): """Cross-check sbom components vs dependencies""" passed = True # Empty "output_path" indicates component is referenced in the # sbom "dependency" section, but missing from the "components" section df = df_sbom[df_sbom["output_path"].isna()] if not df.empty: missing = df["ref"].to_list() LOG.fatal("sbom component missing: %s", missing) passed = False # Empty "ref" indicates component is listed in the sbom # "components" section, but missing from the "dependencies" df = df_sbom[df_sbom["ref"].isna()] if not df.empty: missing = df["drv_path"].to_list() LOG.fatal("sbom dependency missing for component: %s", missing) passed = False return passed def compare_dependencies(df_sbom, df_graph, sbom_type, graph_type): """Compare dependencies in df_sbom and df_braph""" LOG.debug("sbom_type=%s", sbom_type) LOG.debug("graph_type=%s", graph_type) deps_sbom_all = set() deps_graph_all = set() df_sbom = df_sbom.explode("output_path") df_sbom = df_sbom.astype(str) if (graph_type == "runtime" and sbom_type != "runtime_only") or ( graph_type == "buildtime" and sbom_type == "runtime_only" ): LOG.fatal("Unable to compare: graph='%s' vs sbom='%s'", graph_type, sbom_type) return False if graph_type == "runtime": LOG.info("Comparing runtime dependencies") for out_path in df_sbom["output_path"].unique().tolist(): LOG.log(LOG_SPAM, "target: %s", out_path) df_sbom_deps = df_sbom[df_sbom["output_path"] == out_path] sbom_deps = list(filter(None, df_sbom_deps["depends_on"].unique().tolist())) LOG.log(LOG_SPAM, "sbom depends-ons: %s", sbom_deps) deps_sbom_all.update(set(sbom_deps)) df_graph_deps = df_graph[df_graph["target_path"] == out_path] # Map graph src_path to sbom paths dfr = df_sbom.merge( df_graph_deps, how="inner", left_on="output_path", right_on="src_path" ).loc[:, ["drv_path"]] graph_deps = list(filter(None, dfr["drv_path"].unique().tolist())) LOG.log(LOG_SPAM, "graph depends-ons: %s", graph_deps) deps_graph_all.update(set(graph_deps)) if graph_type == "buildtime": LOG.info("Comparing buildtime dependencies") for drv_path in df_sbom["drv_path"].unique().tolist(): LOG.log(LOG_SPAM, "target: %s", drv_path) df_sbom_deps = df_sbom[df_sbom["drv_path"] == drv_path] sbom_deps = list(filter(None, df_sbom_deps["depends_on"].unique().tolist())) LOG.log(LOG_SPAM, "sbom depends-ons: %s", sbom_deps) deps_sbom_all.update(set(sbom_deps)) dfr = df_graph[df_graph["target_path"] == drv_path] graph_deps = list(filter(None, dfr["src_path"].unique().tolist())) LOG.log(LOG_SPAM, "graph depends-ons: %s", graph_deps) deps_graph_all.update(set(graph_deps)) deps_only_in_sbom = set() deps_only_in_graph = set() deps_only_in_sbom.update(deps_sbom_all - deps_graph_all) deps_only_in_graph.update(deps_graph_all - deps_sbom_all) # Filter out the following dependencies from the "deps_only_in_graph": # Store paths that match these regular expressions have no known derivers. # As such, they are not included in the sbom, but they are still drawn in # the graph. Not including such paths in the sbom is not an error, so # we filter them out here: re_no_known_drvs = [ r".*\.patch$", r".*\.patch.gz$", r".*\.sh$", r".*\.bash$", r".*\.diff$", r".*\.c$", r".*\.h$", r".*\.py$", r".*\.pl$", r".*\.xsl$", r".*\.lock$", r".*\.cnf$", r".*\.conf$", r".*\.crt$", r".*\.nix$", r".*\.in$", r".*\.plist$", r".*\.options$", r".*\.build$", r".*\.xcspec$", r".*\.toml$", r".*\.tmac$", r".*\.ds$", r".*\.key$", r".*\-source$", r".*\-builder$", r".*\-prefetch-git$", r".*\-inputrc$", r".*\-patch-registry-deps$", r".*\-make-initrd-ng$", r".*\.kaem$", r".*\.mk$", r".*\-nuke-refs$", r".*\-setup-hook$", r".*\-remove-references-to$", ] deps_only_in_graph = _filter_set(re_no_known_drvs, deps_only_in_graph) passed = True if deps_only_in_sbom: passed = False LOG.fatal("Dependencies only in sbom:") for dep in sorted(deps_only_in_sbom): LOG.fatal(" %s", dep) if deps_only_in_graph: passed = False LOG.fatal("Dependencies only in graph:") for dep in sorted(deps_only_in_graph): LOG.fatal(" %s", dep) return passed ################################################################################ def main(): """main entry point""" args = getargs() set_log_verbosity(args.verbose) if not args.sbom.exists(): LOG.fatal("Invalid path: '%s'", args.sbom) sys.exit(1) if not args.graph.exists(): LOG.fatal("Invalid path: '%s'", args.graph) sys.exit(1) df_sbom, sbom_type = _parse_sbom(args.sbom) df_graph, graph_type = _parse_graph(args.graph) # Checks sbom_check = sbom_internal_checks(df_sbom) deps_check = compare_dependencies(df_sbom, df_graph, sbom_type, graph_type) if sbom_check and deps_check: sys.exit(0) else: sys.exit(1) ################################################################################ if __name__ == "__main__": main() ################################################################################ ================================================ FILE: tests/compare_sboms.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Python script that compares two sboms""" import argparse import json import os import pathlib import sys import pandas as pd from common.cli_args import add_verbose_argument from common.df import df_to_csv_file from common.log import LOG, is_debug_enabled, set_log_verbosity ############################################################################### def getargs(): """Parse command line arguments""" desc = "Compare CycloneDX or SPDX sbom json files" epil = ( f"Example: ./{os.path.basename(__file__)} " "/path/to/sbom.cdx.json /path/to/sbom.cdx.json" ) parser = argparse.ArgumentParser(description=desc, epilog=epil) add_verbose_argument(parser) helps = "Path to first sbom json file" parser.add_argument("FILE1", help=helps, type=pathlib.Path) helps = "Path to second sbom json file" parser.add_argument("FILE2", help=helps, type=pathlib.Path) helps = ( "Set the SBOM component attribute(s) used as unique identifier" "(default: --uid='name,version')" ) parser.add_argument("--uid", help=helps, type=str, default="name,version") return parser.parse_args() ################################################################################ def _sbom_df_from_dict(dict_obj): df_ret = pd.DataFrame(dict_obj) df_ret.fillna("", inplace=True) df_ret = df_ret.astype(str) df_ret.sort_values("name", inplace=True, key=lambda col: col.str.lower()) df_ret.reset_index(drop=True, inplace=True) return df_ret def _parse_sbom_cdx(json_dict): components = json_dict["components"] + [json_dict["metadata"]["component"]] components_dict = {} setcol = components_dict.setdefault for cmp in components: setcol("uid", []).append(cmp["bom-ref"]) setcol("name", []).append(cmp["name"]) setcol("version", []).append(cmp["version"]) return _sbom_df_from_dict(components_dict) def _parse_sbom_spdx(json_dict): packages = json_dict["packages"] packages_dict = {} setcol = packages_dict.setdefault for cmp in packages: setcol("uid", []).append(cmp["SPDXID"]) setcol("name", []).append(cmp["name"]) setcol("version", []).append(cmp["versionInfo"]) return _sbom_df_from_dict(packages_dict) def _parse_sbom(path): with path.open(encoding="utf-8") as inf: json_dict = json.loads(inf.read()) sbom_format = "" if "SPDXID" in json_dict: sbom_format = "SPDX" return _parse_sbom_spdx(json_dict) if "bomFormat" in json_dict: sbom_format = json_dict["bomFormat"] return _parse_sbom_cdx(json_dict) LOG.fatal("Unsupported SBOM format: '%s'", sbom_format) sys.exit(1) def _log_rows(df, name): for row in df.itertuples(index=False, name=name): LOG.info(row) def _compare_sboms(args, df1, df2): """Describe diff of sboms df1 and df2, return True if they are equal""" if is_debug_enabled(): df_to_csv_file(df1, "df_sbom_file1.csv") df_to_csv_file(df2, "df_sbom_file2.csv") uid_list = [str(uid) for uid in args.uid.split(",")] df1_uidg = df1.groupby(by=uid_list).size().reset_index(name="count") df1_non_uniq = df1_uidg[df1_uidg["count"] > 1] df2_uidg = df2.groupby(by=uid_list).size().reset_index(name="count") df2_non_uniq = df2_uidg[df2_uidg["count"] > 1] df_common = pd.merge(left=df1, right=df2, how="inner", on=uid_list) df_common.drop_duplicates(subset=uid_list, inplace=True) df1_only = pd.merge(left=df1, right=df2, how="left", on=uid_list) df1_only = df1_only[df1_only["uid_y"].isna()] df1_only.drop_duplicates(subset=uid_list, inplace=True) df2_only = pd.merge(left=df2, right=df1, how="left", on=uid_list) df2_only = df2_only[df2_only["uid_y"].isna()] df2_only.drop_duplicates(subset=uid_list, inplace=True) LOG.info("Using uid: '%s'", uid_list) LOG.info("") LOG.info("FILE1 path '%s'", args.FILE1) LOG.info("FILE1 number of unique entries: %s", len(df1_uidg.index)) if not df1_non_uniq.empty: LOG.info("FILE1 number of non-unique entries: %s", len(df1_non_uniq)) _log_rows(df1_non_uniq, "non_unique") LOG.info("") LOG.info("FILE2 path '%s'", args.FILE2) LOG.info("FILE2 number of unique components: %s", len(df2_uidg.index)) if not df2_non_uniq.empty: LOG.info("FILE2 number of non-unique entries: %s", len(df2_non_uniq)) _log_rows(df2_non_uniq, "non_unique") LOG.info("") LOG.info("FILE1 and FILE2 common entries: %s", len(df_common)) if not df_common.empty: _log_rows(df_common[uid_list], "common") LOG.info("") LOG.info("FILE1 only entries: %s", len(df1_only)) if not df1_only.empty: _log_rows(df1_only[uid_list], "file1_only") LOG.info("") LOG.info("FILE2 only entries: %s", len(df2_only)) if not df2_only.empty: _log_rows(df2_only[uid_list], "file2_only") LOG.info("") return len(df1_only) == 0 and len(df2_only) == 0 ################################################################################ def main(): """main entry point""" args = getargs() set_log_verbosity(args.verbose) if not args.FILE1.exists(): LOG.fatal("Invalid path: '%s'", args.sbom) sys.exit(1) if not args.FILE2.exists(): LOG.fatal("Invalid path: '%s'", args.graph) sys.exit(1) df_sbom_f1 = _parse_sbom(args.FILE1) df_sbom_f2 = _parse_sbom(args.FILE2) equal = _compare_sboms(args, df_sbom_f1, df_sbom_f2) if equal: sys.exit(0) else: sys.exit(1) ################################################################################ if __name__ == "__main__": main() ################################################################################ ================================================ FILE: tests/conftest.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Shared pytest fixtures for the test suite.""" import os import re import shutil import subprocess import time from pathlib import Path import pytest from tests import vulnix_test_support REPOROOT = Path(__file__).resolve().parent.parent INTEGRATION_DIR = REPOROOT / "tests" / "integration" RESOURCES_DIR = REPOROOT / "tests" / "resources" _GRYPE_TEST_DB = RESOURCES_DIR / "grype-test-db.tar.gz" _GRYPE_CACHE_SUBDIR = "grype-cache" def _output_mentions_repology_host(output): patterns = ( r"https://repology\.org(?:/|$)", r"host=['\"]repology\.org['\"]", r"HTTPSConnectionPool\(host=['\"]repology\.org['\"]", ) return any(re.search(pattern, output) for pattern in patterns) def _pythonpath_with_repo_root(env): repo_root = REPOROOT.as_posix() pythonpath = env.get("PYTHONPATH", "") if not pythonpath: env["PYTHONPATH"] = repo_root return env paths = pythonpath.split(os.pathsep) if repo_root not in paths: env["PYTHONPATH"] = f"{pythonpath}{os.pathsep}{repo_root}" return env @pytest.fixture(scope="session", autouse=True) def _warm_grype_db(request, tmp_path_factory): """Import the committed minimal grype DB into a session-scoped temp dir. Returns the cache Path so _run_python_script can point GRYPE_DB_CACHE_DIR at it. Returns None when no grype-marked tests are collected (non-grype sessions are unaffected and no DB import runs). Each pytest-xdist worker imports into its own getbasetemp() subdir, so there is no shared state and no locking is needed. """ has_grype_test = any( item.get_closest_marker("grype") for item in request.session.items ) if not has_grype_test: return None cache_dir = tmp_path_factory.getbasetemp() / _GRYPE_CACHE_SUBDIR cache_dir.mkdir(exist_ok=True) subprocess.run( ["grype", "db", "import", str(_GRYPE_TEST_DB)], env={ **os.environ, "GRYPE_DB_CACHE_DIR": str(cache_dir), "GRYPE_DB_VALIDATE_AGE": "false", }, check=True, ) return cache_dir @pytest.fixture(scope="session") def _configure_test_vulnix(request, tmp_path_factory): """Prepare the vulnix test wrapper with a deterministic default mode.""" requested_mode = os.environ.get("SBOMNIX_TEST_VULNIX_MODE", "dummy") if requested_mode not in {"dummy", "real"}: raise ValueError( "invalid SBOMNIX_TEST_VULNIX_MODE " f"{requested_mode!r}; expected one of: dummy, real" ) tmp_root = tmp_path_factory.getbasetemp().parent real_vulnix = shutil.which("vulnix") if requested_mode == "real" and real_vulnix is None: raise RuntimeError( "real vulnix selected for tests, but 'vulnix' is not available in PATH" ) cache_dir = vulnix_test_support.default_vulnix_cache_dir() reporter = request.config.pluginmanager.get_plugin("terminalreporter") if reporter is not None: reporter.write_sep( "=", f"vulnix test mode: {requested_mode}", bold=True, ) if requested_mode == "real": reporter.write_line(f"real vulnix binary: {real_vulnix}") reporter.write_line(f"real vulnix cache dir: {cache_dir}") else: reporter.write_line("using dummy vulnix wrapper for deterministic tests") return vulnix_test_support.configure_vulnix_for_tests( tmp_root=tmp_root, effective_mode=requested_mode, cache_dir=cache_dir, real_vulnix=real_vulnix, ) @pytest.fixture(name="test_work_dir") def fixture_test_work_dir(tmp_path): """Return a per-test working directory.""" return Path(tmp_path) @pytest.fixture(name="test_nix_drv", scope="session") def fixture_test_nix_drv(): """Instantiate a small test derivation chain once per test session.""" test_derivation = RESOURCES_DIR / "test-derivation-chain.nix" ret = subprocess.run( ["nix-instantiate", test_derivation.as_posix()], capture_output=True, encoding="utf-8", check=True, ) drv = Path(ret.stdout.strip()) assert drv.exists() return drv @pytest.fixture(name="test_nix_result", scope="session") def fixture_test_nix_result(test_nix_drv, tmp_path_factory): """Build nixpkgs.hello once per test session.""" build_dir = tmp_path_factory.mktemp("nix-build") result = build_dir / "result" cmd = ["nix-build", test_nix_drv.as_posix(), "-o", result.as_posix()] subprocess.run(cmd, check=True) assert result.exists() return result @pytest.fixture(name="test_cdx_sbom", scope="session") def fixture_test_cdx_sbom(): """Return a static CycloneDX SBOM fixture for offline SBOM-input tests.""" sbom = RESOURCES_DIR / "sample_cdx_sbom.json" assert sbom.exists() return sbom @pytest.fixture(name="_run_python_script") def fixture_run_python_script(test_work_dir, _warm_grype_db, _configure_test_vulnix): """Invoke a Python entrypoint from an isolated test workdir.""" def _run(args, **kwargs): env = _pythonpath_with_repo_root(os.environ.copy()) env.setdefault("GRYPE_DB_AUTO_UPDATE", "false") env.setdefault("GRYPE_DB_VALIDATE_AGE", "false") if _warm_grype_db is not None: env["GRYPE_DB_CACHE_DIR"] = str(_warm_grype_db) env = vulnix_test_support.build_vulnix_test_env( env, config=_configure_test_vulnix, ) kwargs.setdefault("cwd", test_work_dir) check = kwargs.pop("check", True) return subprocess.run(args, check=check, env=env, **kwargs) return _run @pytest.fixture(name="_run_python_script_retry_on_repology_network_error") def fixture_run_python_script_retry_on_repology_network_error(_run_python_script): """Retry transient repology.org connectivity failures before failing.""" def _run(args): markers = [ "requests.exceptions.ConnectTimeout", "requests.exceptions.ConnectionError", "requests.exceptions.ReadTimeout", "urllib3.exceptions.ConnectTimeoutError", "urllib3.exceptions.ReadTimeoutError", "Max retries exceeded", "Connection timed out", "Temporary failure in name resolution", "Name or service not known", ] retry_delays = [15, 45] last_ret = None for attempt in range(len(retry_delays) + 1): ret = _run_python_script(args, check=False, capture_output=True, text=True) if ret.returncode == 0: return ret last_ret = ret output = "\n".join(filter(None, [ret.stdout, ret.stderr])) is_repology_network_error = _output_mentions_repology_host(output) and any( marker in output for marker in markers ) if not is_repology_network_error or attempt >= len(retry_delays): ret.check_returncode() delay = retry_delays[attempt] print( f"repology.org request failed with a transient network error; " f"retrying in {delay}s (attempt {attempt + 2}/{len(retry_delays) + 1})" ) time.sleep(delay) last_ret.check_returncode() return last_ret return _run def pytest_collection_modifyitems(items): """Mark integration tests based on their path.""" run_real_vulnix = os.environ.get("SBOMNIX_RUN_REAL_VULNIX_TESTS") == "1" skip_real_vulnix = pytest.mark.skip( reason="real vulnix tests are opt-in; set SBOMNIX_RUN_REAL_VULNIX_TESTS=1" ) for item in items: path = Path(str(item.fspath)).resolve() if INTEGRATION_DIR in path.parents: item.add_marker(pytest.mark.integration) if item.get_closest_marker("real_vulnix") and not run_real_vulnix: item.add_marker(skip_real_vulnix) ================================================ FILE: tests/integration/__init__.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 ================================================ FILE: tests/integration/test_nixgraph_cli.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """CLI integration tests for nixgraph and dependency comparisons.""" from textwrap import dedent import pandas as pd import pytest from tests.testpaths import COMPARE_DEPS, NIXGRAPH, SBOMNIX from tests.testutils import df_difference, df_to_string def _write_nixgraph_test_flake(flake_dir): flake_dir.mkdir() (flake_dir / "flake.nix").write_text( dedent( """ { outputs = { self }: let mkPackage = system: let mkTestDerivation = { name, pname, version, command }: builtins.derivation { inherit name pname system version; builder = "/bin/sh"; args = [ "-c" command ]; }; first = mkTestDerivation { name = "sbomnix-flake-first-1.0"; pname = "sbomnix-flake-first"; version = "1.0"; command = "echo first > $out"; }; second = mkTestDerivation { name = "sbomnix-flake-second-1.0"; pname = "sbomnix-flake-second"; version = "1.0"; command = "echo ${first} > $out"; }; in mkTestDerivation { name = "sbomnix-flake-third-1.0"; pname = "sbomnix-flake-third"; version = "1.0"; command = "echo ${second} > $out"; }; in { packages.x86_64-linux.default = mkPackage "x86_64-linux"; packages.aarch64-linux.default = mkPackage "aarch64-linux"; packages.x86_64-darwin.default = mkPackage "x86_64-darwin"; packages.aarch64-darwin.default = mkPackage "aarch64-darwin"; }; } """ ), encoding="utf-8", ) return f"{flake_dir.as_posix()}#" def test_nixgraph_help(_run_python_script): """Test nixgraph command line argument: '-h'.""" _run_python_script([NIXGRAPH, "-h"]) def test_nixgraph_png(_run_python_script, test_nix_result, test_work_dir): """Test nixgraph with png output generates valid png image.""" png_out = test_work_dir / "graph.png" _run_python_script([NIXGRAPH, test_nix_result, "--out", png_out, "--depth", "3"]) assert png_out.exists() def test_nixgraph_csv(_run_python_script, test_nix_result, test_work_dir): """Test nixgraph with csv output generates valid csv.""" csv_out = test_work_dir / "graph.csv" _run_python_script([NIXGRAPH, test_nix_result, "--out", csv_out, "--depth", "3"]) assert csv_out.exists() df_out = pd.read_csv(csv_out) assert not df_out.empty def test_nixgraph_csv_runtime_drv(_run_python_script, test_nix_drv, test_work_dir): """Test nixgraph runtime graph generation from a direct derivation path.""" csv_out = test_work_dir / "graph_runtime_drv.csv" _run_python_script([NIXGRAPH, test_nix_drv, "--out", csv_out, "--depth", "3"]) assert csv_out.exists() df_out = pd.read_csv(csv_out) assert not df_out.empty assert set(df_out["target_pname"]) >= { "sbomnix-test-third-1.0", "sbomnix-test-second-1.0", } def test_nixgraph_csv_buildtime(_run_python_script, test_nix_drv, test_work_dir): """Test nixgraph with buildtime csv output generates valid csv.""" csv_out = test_work_dir / "graph_buildtime.csv" _run_python_script([NIXGRAPH, test_nix_drv, "--out", csv_out, "--buildtime"]) assert csv_out.exists() df_out = pd.read_csv(csv_out) assert not df_out.empty def test_nixgraph_dot_includes_edges_labels_and_style( _run_python_script, test_nix_result, test_work_dir, ): """Test DOT output for graph shape, labels, pathnames, and colorized nodes.""" dot_out = test_work_dir / "graph.dot" _run_python_script( [ NIXGRAPH, test_nix_result, "--out", dot_out, "--depth=3", "--pathnames", "--colorize=.*second.*", ] ) dot = dot_out.read_text(encoding="utf-8") assert "->" in dot assert "sbomnix-test-third-1.0" in dot assert "sbomnix-test-second-1.0" in dot assert "
" in dot assert 'fillcolor="#FFE6E6"' in dot def test_nixgraph_depth_and_until_limit_traversal( _run_python_script, test_nix_result, test_work_dir, ): """Test traversal limiting with --depth and --until.""" depth_one_csv = test_work_dir / "graph_depth_one.csv" _run_python_script( [ NIXGRAPH, test_nix_result, "--out", depth_one_csv, "--depth=1", ] ) df_depth_one = pd.read_csv(depth_one_csv) assert df_depth_one["graph_depth"].max() == 1 until_dot = test_work_dir / "graph_until.dot" _run_python_script( [ NIXGRAPH, test_nix_result, "--out", until_dot, "--depth=100", "--until=.*second.*", ] ) dot = until_dot.read_text(encoding="utf-8") assert "sbomnix-test-second-1.0" in dot assert "sbomnix-test-first-1.0" not in dot def test_nixgraph_csv_runtime_flakeref(_run_python_script, test_work_dir): """Test nixgraph runtime graph generation from a flakeref.""" flakeref = _write_nixgraph_test_flake(test_work_dir / "runtime-flake") csv_out = test_work_dir / "graph_runtime_flake.csv" _run_python_script([NIXGRAPH, flakeref, "--out", csv_out, "--depth=3"]) assert csv_out.exists() df_out = pd.read_csv(csv_out) assert set(df_out["target_pname"]) >= { "sbomnix-flake-third-1.0", "sbomnix-flake-second-1.0", } def test_nixgraph_csv_buildtime_flakeref(_run_python_script, test_work_dir): """Test nixgraph buildtime graph generation from a flakeref.""" flakeref = _write_nixgraph_test_flake(test_work_dir / "buildtime-flake") csv_out = test_work_dir / "graph_buildtime_flake.csv" _run_python_script( [NIXGRAPH, flakeref, "--out", csv_out, "--buildtime", "--depth=3"] ) assert csv_out.exists() df_out = pd.read_csv(csv_out) assert set(df_out["target_pname"]) >= { "sbomnix-flake-third-1.0.drv", "sbomnix-flake-second-1.0.drv", } def test_nixgraph_csv_graph_inverse(_run_python_script, test_nix_result, test_work_dir): """Test nixgraph with '--inverse' argument.""" csv_out = test_work_dir / "graph.csv" _run_python_script( [ NIXGRAPH, test_nix_result, "--out", csv_out, "--depth=100", ] ) assert csv_out.exists() df_out = pd.read_csv(csv_out) assert not df_out.empty csv_out_inv = test_work_dir / "graph_inverse.csv" _run_python_script( [ NIXGRAPH, test_nix_result, "--out", csv_out_inv, "--depth=100", "--inverse=.*", ] ) assert csv_out_inv.exists() df_out_inv = pd.read_csv(csv_out_inv) assert not df_out_inv.empty df_out = df_out.drop("graph_depth", axis=1).sort_values(by=["src_path"]) df_out_inv = df_out_inv.drop("graph_depth", axis=1).sort_values(by=["src_path"]) df_diff = df_difference(df_out, df_out_inv) assert df_diff.empty, df_to_string(df_diff) def test_compare_deps_runtime(_run_python_script, test_nix_result, test_work_dir): """Compare nixgraph vs sbom runtime dependencies.""" graph_csv_out = test_work_dir / "graph.csv" _run_python_script( [ NIXGRAPH, test_nix_result, "--out", graph_csv_out, "--depth=100", ] ) assert graph_csv_out.exists() out_path_cdx = test_work_dir / "sbom_cdx_test.json" _run_python_script( [ SBOMNIX, test_nix_result, "--cdx", out_path_cdx.as_posix(), ] ) assert out_path_cdx.exists() _run_python_script( [ COMPARE_DEPS, "--sbom", out_path_cdx, "--graph", graph_csv_out, ] ) @pytest.mark.slow def test_compare_deps_buildtime(_run_python_script, test_nix_drv, test_work_dir): """Compare nixgraph vs sbom buildtime dependencies.""" graph_csv_out = test_work_dir / "graph.csv" _run_python_script( [ NIXGRAPH, test_nix_drv, "--out", graph_csv_out, "--depth=100", "--buildtime", ] ) assert graph_csv_out.exists() out_path_cdx = test_work_dir / "sbom_cdx_test.json" _run_python_script( [ SBOMNIX, test_nix_drv, "--cdx", out_path_cdx.as_posix(), "--buildtime", ] ) assert out_path_cdx.exists() _run_python_script( [ COMPARE_DEPS, "--sbom", out_path_cdx, "--graph", graph_csv_out, ] ) ================================================ FILE: tests/integration/test_nixmeta_cli.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """CLI integration tests for nixmeta.""" import pytest from common.df import df_from_csv_file from tests.testpaths import NIXMETA, RESOURCES_DIR def test_nixmeta_help(_run_python_script): """Test nixmeta command line argument: '-h'.""" _run_python_script([NIXMETA, "-h"]) @pytest.mark.slow def test_nixmeta_sbomnix_flakeref(_run_python_script, test_work_dir): """Test nixmeta with a small package-set path.""" out_path = test_work_dir / "nixmeta.csv" package_set = RESOURCES_DIR / "nixmeta-package-set.nix" _run_python_script( [ NIXMETA, "--out", out_path.as_posix(), "--flakeref", package_set, ] ) assert out_path.exists() df_meta = df_from_csv_file(out_path) assert df_meta is not None assert set(df_meta["name"]) == { "sbomnix-meta-first-1.0", "sbomnix-meta-second-2.0", } ================================================ FILE: tests/integration/test_nixupdate_cli.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """CLI integration tests for nix_outdated.""" import pytest from tests.testpaths import NIX_OUTDATED def test_nix_outdated_help(_run_python_script): """Test nix_outdated command line argument: '-h'.""" _run_python_script([NIX_OUTDATED, "-h"]) @pytest.mark.network @pytest.mark.slow def test_nix_outdated_result( _run_python_script_retry_on_repology_network_error, test_nix_result, test_work_dir ): """Test nix_outdated with the nix result as input.""" out_path_nix_outdated = test_work_dir / "nix_outdated.csv" _run_python_script_retry_on_repology_network_error( [ NIX_OUTDATED, "--out", out_path_nix_outdated.as_posix(), test_nix_result, ] ) assert out_path_nix_outdated.exists() ================================================ FILE: tests/integration/test_provenance_cli.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """CLI integration tests for provenance.""" from tests.testpaths import PROVENANCE, RESOURCES_DIR from tests.testutils import validate_json def test_provenance_help(_run_python_script): """Test provenance command line argument: '-h'.""" _run_python_script([PROVENANCE, "-h"]) def test_provenance_schema(_run_python_script, test_nix_drv, test_work_dir): """Test provenance generates valid schema.""" out_path = test_work_dir / "provenance_test.json" _run_python_script( [ PROVENANCE, test_nix_drv, "--out", out_path.as_posix(), ] ) assert out_path.exists() schema_path = RESOURCES_DIR / "provenance-1.0.schema.json" assert schema_path.exists() validate_json(out_path.as_posix(), schema_path) def test_provenance_schema_recursive(_run_python_script, test_nix_drv, test_work_dir): """Test provenance generates valid schema with recursive option.""" out_path = test_work_dir / "recursive_provenance_test.json" _run_python_script( [ PROVENANCE, test_nix_drv, "--recursive", "--out", out_path.as_posix(), ] ) assert out_path.exists() schema_path = RESOURCES_DIR / "provenance-1.0.schema.json" assert schema_path.exists() validate_json(out_path.as_posix(), schema_path) ================================================ FILE: tests/integration/test_repology_cli.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """CLI integration tests for repology.""" import pytest from tests.testpaths import REPOLOGY_CLI def test_repology_cli_help(_run_python_script): """Test repology_cli command line argument: '-h'.""" _run_python_script([REPOLOGY_CLI, "-h"]) @pytest.mark.network @pytest.mark.slow def test_repology_cli_sbom( _run_python_script_retry_on_repology_network_error, test_cdx_sbom, test_work_dir, ): """Test repology_cli with SBOM as input.""" out_path_repology = test_work_dir / "repology.csv" _run_python_script_retry_on_repology_network_error( [ REPOLOGY_CLI, "--sbom_cdx", test_cdx_sbom.as_posix(), "--repository", "nix_unstable", "--out", out_path_repology.as_posix(), ] ) assert out_path_repology.exists() ================================================ FILE: tests/integration/test_sbomnix_cli.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """CLI integration tests for sbomnix.""" import pandas as pd import pytest from tests.testpaths import COMPARE_SBOMS, RESOURCES_DIR, SBOMNIX from tests.testutils import df_difference, df_to_string, validate_json def test_sbomnix_help(_run_python_script): """Test sbomnix command line argument: '-h'.""" _run_python_script([SBOMNIX, "-h"]) def test_sbomnix_type_runtime(_run_python_script, test_nix_result, test_work_dir): """Test sbomnix generates valid CycloneDX json with runtime dependencies.""" out_path_cdx = test_work_dir / "sbom_cdx_test.json" out_path_spdx = test_work_dir / "sbom_spdx_test.json" _run_python_script( [ SBOMNIX, test_nix_result, "--cdx", out_path_cdx.as_posix(), "--spdx", out_path_spdx.as_posix(), ] ) assert out_path_cdx.exists() assert out_path_spdx.exists() cdx_schema_path = RESOURCES_DIR / "cdx_bom-1.4.schema.json" assert cdx_schema_path.exists() validate_json(out_path_cdx.as_posix(), cdx_schema_path) spdx_schema_path = RESOURCES_DIR / "spdx_bom-2.3.schema.json" assert spdx_schema_path.exists() validate_json(out_path_spdx.as_posix(), spdx_schema_path) @pytest.mark.slow def test_sbomnix_type_buildtime(_run_python_script, test_nix_drv, test_work_dir): """Test sbomnix generates valid CycloneDX json with buildtime dependencies.""" out_path_cdx = test_work_dir / "sbom_cdx_test.json" out_path_spdx = test_work_dir / "sbom_spdx_test.json" _run_python_script( [ SBOMNIX, test_nix_drv, "--cdx", out_path_cdx.as_posix(), "--spdx", out_path_spdx.as_posix(), "--buildtime", ] ) assert out_path_cdx.exists() assert out_path_spdx.exists() cdx_schema_path = RESOURCES_DIR / "cdx_bom-1.4.schema.json" assert cdx_schema_path.exists() validate_json(out_path_cdx.as_posix(), cdx_schema_path) spdx_schema_path = RESOURCES_DIR / "spdx_bom-2.3.schema.json" assert spdx_schema_path.exists() validate_json(out_path_spdx.as_posix(), spdx_schema_path) @pytest.mark.slow def test_sbomnix_depth(_run_python_script, test_nix_drv, test_work_dir): """Test sbomnix '--depth' option.""" out_path_csv_1 = test_work_dir / "sbom_csv_test_1.csv" out_path_csv_2 = test_work_dir / "sbom_csv_test_2.csv" _run_python_script( [ SBOMNIX, test_nix_drv, "--buildtime", "--csv", out_path_csv_1.as_posix(), "--depth=2", ] ) assert out_path_csv_1.exists() df_out_1 = pd.read_csv(out_path_csv_1) assert not df_out_1.empty _run_python_script( [ SBOMNIX, test_nix_drv, "--buildtime", "--csv", out_path_csv_2.as_posix(), "--depth=1", ] ) assert out_path_csv_2.exists() df_out_2 = pd.read_csv(out_path_csv_2) assert not df_out_2.empty df_diff = df_difference(df_out_1, df_out_2) assert not df_diff.empty, df_to_string(df_diff) df_right_only = df_diff[df_diff["_merge"] == "right_only"] assert df_right_only.empty, df_to_string(df_diff) @pytest.mark.slow def test_compare_subsequent_cdx_sboms(_run_python_script, test_nix_drv, test_work_dir): """Compare two sbomnix runs with same target produce the same cdx sbom.""" out_path_cdx_1 = test_work_dir / "sbom_cdx_test_1.json" _run_python_script( [ SBOMNIX, test_nix_drv, "--cdx", out_path_cdx_1.as_posix(), "--buildtime", ] ) assert out_path_cdx_1.exists() out_path_cdx_2 = test_work_dir / "sbom_cdx_test_2.json" _run_python_script( [ SBOMNIX, test_nix_drv, "--cdx", out_path_cdx_2.as_posix(), "--buildtime", ] ) assert out_path_cdx_2.exists() _run_python_script([COMPARE_SBOMS, out_path_cdx_1, out_path_cdx_2]) @pytest.mark.slow def test_compare_subsequent_spdx_sboms(_run_python_script, test_nix_drv, test_work_dir): """Compare two sbomnix runs with same target produce the same spdx sbom.""" out_path_spdx_1 = test_work_dir / "sbom_spdx_test_1.json" _run_python_script( [ SBOMNIX, test_nix_drv, "--spdx", out_path_spdx_1.as_posix(), "--buildtime", ] ) assert out_path_spdx_1.exists() out_path_spdx_2 = test_work_dir / "sbom_spdx_test_2.json" _run_python_script( [ SBOMNIX, test_nix_drv, "--spdx", out_path_spdx_2.as_posix(), "--buildtime", ] ) assert out_path_spdx_2.exists() _run_python_script([COMPARE_SBOMS, out_path_spdx_1, out_path_spdx_2]) @pytest.mark.slow def test_compare_spdx_and_cdx_sboms(_run_python_script, test_nix_drv, test_work_dir): """Compare spdx and cdx sboms from the same sbomnix invocation.""" out_path_spdx = test_work_dir / "sbom_spdx_test.json" out_path_cdx = test_work_dir / "sbom_cdx_test.json" _run_python_script( [ SBOMNIX, test_nix_drv, "--cdx", out_path_cdx.as_posix(), "--spdx", out_path_spdx.as_posix(), "--buildtime", ] ) assert out_path_cdx.exists() assert out_path_spdx.exists() _run_python_script([COMPARE_SBOMS, out_path_cdx, out_path_spdx]) ================================================ FILE: tests/integration/test_vulnxscan_cli.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """CLI integration tests for vulnxscan.""" import pandas as pd import pytest from tests.testpaths import RESOURCES_DIR, VULNXSCAN # Synthetic CVE committed in tests/resources/grype-test-db.tar.gz. # It targets sbomnix-test-first==1.0, which is in the test derivation chain. _SYNTHETIC_CVE = "CVE-TEST-2026-00001" def test_vulnxscan_help(_run_python_script): """Test vulnxscan command line argument: '-h'.""" _run_python_script([VULNXSCAN, "--help"]) @pytest.mark.network @pytest.mark.grype def test_vulnxscan_scan_nix_result(_run_python_script, test_nix_result, test_work_dir): """Test vulnxscan scan with the nix result as input.""" out_path_vulns = test_work_dir / "vulnxscan_test.csv" _run_python_script( [ VULNXSCAN, test_nix_result.as_posix(), "--out", out_path_vulns.as_posix(), ] ) df = pd.read_csv(out_path_vulns) assert _SYNTHETIC_CVE in df["vuln_id"].values, ( f"{_SYNTHETIC_CVE} not found in scan output — " "check grype-test-db.tar.gz matches the test fixture packages" ) @pytest.mark.network @pytest.mark.grype def test_vulnxscan_scan_sbom(_run_python_script, test_cdx_sbom, test_work_dir): """Test vulnxscan scan with SBOM as input.""" out_path_vulns = test_work_dir / "vulnxscan_test.csv" _run_python_script( [ VULNXSCAN, "--sbom", test_cdx_sbom.as_posix(), "--out", out_path_vulns.as_posix(), ] ) @pytest.mark.network @pytest.mark.grype def test_vulnxscan_triage(_run_python_script, test_nix_result, test_work_dir): """Test vulnxscan scan with --triage.""" out_path_vulns = test_work_dir / "vulnxscan_test.csv" _run_python_script( [ VULNXSCAN, "--triage", "--out", out_path_vulns.as_posix(), test_nix_result.as_posix(), ] ) df = pd.read_csv(out_path_vulns) assert _SYNTHETIC_CVE in df["vuln_id"].values, ( f"{_SYNTHETIC_CVE} not found in triage output" ) @pytest.mark.network @pytest.mark.grype def test_vulnxscan_triage_whitelist(_run_python_script, test_nix_result, test_work_dir): """Test vulnxscan scan with --triage and --whitelist.""" # Positive case: CVE is present without --whitelist out_no_whitelist = test_work_dir / "vulnxscan_no_whitelist.csv" ret_no_wl = _run_python_script( [ VULNXSCAN, "--triage", "--out", out_no_whitelist.as_posix(), test_nix_result.as_posix(), ], capture_output=True, text=True, ) assert "Potential vulnerabilities impacting version_local" in ret_no_wl.stderr df_no_wl = pd.read_csv(out_no_whitelist) assert _SYNTHETIC_CVE in df_no_wl["vuln_id"].values # Suppressed case: CVE is whitelisted away out_path_vulns = test_work_dir / "vulnxscan_test.csv" whitelist_csv = RESOURCES_DIR / "whitelist_all.csv" assert whitelist_csv.exists() ret = _run_python_script( [ VULNXSCAN, "--triage", "--whitelist", whitelist_csv.as_posix(), "--out", out_path_vulns.as_posix(), test_nix_result.as_posix(), ], capture_output=True, text=True, ) assert "Potential vulnerabilities impacting version_local" not in ret.stderr ================================================ FILE: tests/resources/README.md ================================================ # Test resources ## CycloneDX 1.4 json schema - cdx_bom-1.4.schema.json - ## CycloneDX 1.3 json schema - cdx_bom-1.3.schema.json - ## SPDX 2.3 json schema - spdx_bom-2.3.schema.json - ## CycloneDX SPDX companion schema - spdx.schema.json - Local minimal schema shim used by offline CycloneDX jsonschema validation ## JSON Signature Format 0.82 schema - jsf-0.82.schema.json - Local minimal schema shim used by offline CycloneDX schema validation ## SLSA v1.0 provenance schema - provenance-1.0.schema.json - translated and rewritten into jsonschema format. ## Sample CycloneDX SBOM - sample_cdx_sbom.json - Small static SBOM fixture for offline SBOM-input tests ================================================ FILE: tests/resources/cdx_bom-1.3.schema.json ================================================ { "$schema": "http://json-schema.org/draft-07/schema#", "$id": "http://cyclonedx.org/schema/bom-1.3a.schema.json", "type": "object", "title": "CycloneDX Software Bill-of-Material Specification", "$comment" : "CycloneDX JSON schema is published under the terms of the Apache License 2.0.", "required": [ "bomFormat", "specVersion", "version" ], "properties": { "bomFormat": { "$id": "#/properties/bomFormat", "type": "string", "title": "BOM Format", "description": "Specifies the format of the BOM. This helps to identify the file as CycloneDX since BOMs do not have a filename convention nor does JSON schema support namespaces.", "enum": [ "CycloneDX" ] }, "specVersion": { "$id": "#/properties/specVersion", "type": "string", "title": "CycloneDX Specification Version", "description": "The version of the CycloneDX specification a BOM is written to (starting at version 1.2)", "examples": ["1.3"] }, "serialNumber": { "$id": "#/properties/serialNumber", "type": "string", "title": "BOM Serial Number", "description": "Every BOM generated should have a unique serial number, even if the contents of the BOM being generated have not changed over time. The process or tool responsible for creating the BOM should create random UUID's for every BOM generated.", "examples": ["urn:uuid:3e671687-395b-41f5-a30f-a58921a69b79"], "pattern": "^urn:uuid:[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$" }, "version": { "$id": "#/properties/version", "type": "integer", "title": "BOM Version", "description": "The version allows component publishers/authors to make changes to existing BOMs to update various aspects of the document such as description or licenses. When a system is presented with multiple BOMs for the same component, the system should use the most recent version of the BOM. The default version is '1' and should be incremented for each version of the BOM that is published. Each version of a component should have a unique BOM and if no changes are made to the BOMs, then each BOM will have a version of '1'.", "default": 1, "examples": [1] }, "metadata": { "$id": "#/properties/metadata", "$ref": "#/definitions/metadata", "title": "BOM Metadata", "description": "Provides additional information about a BOM." }, "components": { "$id": "#/properties/components", "type": "array", "items": {"$ref": "#/definitions/component"}, "uniqueItems": true, "title": "Components" }, "services": { "$id": "#/properties/services", "type": "array", "items": {"$ref": "#/definitions/service"}, "uniqueItems": true, "title": "Services" }, "externalReferences": { "$id": "#/properties/externalReferences", "type": "array", "items": {"$ref": "#/definitions/externalReference"}, "title": "External References", "description": "External references provide a way to document systems, sites, and information that may be relevant but which are not included with the BOM." }, "dependencies": { "$id": "#/properties/dependencies", "type": "array", "items": {"$ref": "#/definitions/dependency"}, "uniqueItems": true, "title": "Dependencies", "description": "Provides the ability to document dependency relationships." }, "compositions": { "$id": "#/properties/compositions", "type": "array", "items": {"$ref": "#/definitions/compositions"}, "uniqueItems": true, "title": "Compositions", "description": "Compositions describe constituent parts (including components, services, and dependency relationships) and their completeness." } }, "definitions": { "metadata": { "type": "object", "title": "BOM Metadata Object", "properties": { "timestamp": { "type": "string", "format": "date-time", "title": "Timestamp", "description": "The date and time (timestamp) when the document was created." }, "tools": { "type": "array", "title": "Creation Tools", "description": "The tool(s) used in the creation of the BOM.", "items": {"$ref": "#/definitions/tool"} }, "authors" :{ "type": "array", "title": "Authors", "description": "The person(s) who created the BOM. Authors are common in BOMs created through manual processes. BOMs created through automated means may not have authors.", "items": {"$ref": "#/definitions/organizationalContact"} }, "component": { "title": "Component", "description": "The component that the BOM describes.", "$ref": "#/definitions/component" }, "manufacture": { "title": "Manufacture", "description": "The organization that manufactured the component that the BOM describes.", "$ref": "#/definitions/organizationalEntity" }, "supplier": { "title": "Supplier", "description": " The organization that supplied the component that the BOM describes. The supplier may often be the manufacturer, but may also be a distributor or repackager.", "$ref": "#/definitions/organizationalEntity" }, "licenses": { "type": "array", "title": "BOM License(s)", "items": {"$ref": "#/definitions/licenseChoice"} }, "properties": { "type": "array", "title": "Properties", "description": "Provides the ability to document properties in a name-value store. This provides flexibility to include data not officially supported in the standard without having to use additional namespaces or create extensions. Unlike key-value stores, properties support duplicate names, each potentially having different values.", "items": {"$ref": "#/definitions/property"} } } }, "tool": { "type": "object", "title": "Tool", "description": "The tool used to create the BOM.", "properties": { "vendor": { "type": "string", "title": "Tool Vendor", "description": "The date and time (timestamp) when the document was created." }, "name": { "type": "string", "title": "Tool Name", "description": "The date and time (timestamp) when the document was created." }, "version": { "type": "string", "title": "Tool Version", "description": "The date and time (timestamp) when the document was created." }, "hashes": { "$id": "#/definitions/tool/properties/hashes", "type": "array", "items": {"$ref": "#/definitions/hash"}, "title": "Hashes", "description": "The hashes of the tool (if applicable)." } } }, "organizationalEntity": { "type": "object", "title": "Organizational Entity Object", "description": "", "properties": { "name": { "type": "string", "title": "Name", "description": "The name of the organization", "examples": [ "Example Inc." ] }, "url": { "type": "array", "items": { "type": "string", "format": "iri-reference" }, "title": "URL", "description": "The URL of the organization. Multiple URLs are allowed.", "examples": ["https://example.com"] }, "contact": { "type": "array", "title": "Contact", "description": "A contact at the organization. Multiple contacts are allowed.", "items": {"$ref": "#/definitions/organizationalContact"} } } }, "organizationalContact": { "type": "object", "title": "Organizational Contact Object", "description": "", "properties": { "name": { "type": "string", "title": "Name", "description": "The name of a contact", "examples": ["Contact name"] }, "email": { "type": "string", "title": "Email Address", "description": "The email address of the contact.", "examples": ["firstname.lastname@example.com"] }, "phone": { "type": "string", "title": "Phone", "description": "The phone number of the contact.", "examples": ["800-555-1212"] } } }, "component": { "type": "object", "title": "Component Object", "required": [ "type", "name", "version" ], "properties": { "type": { "type": "string", "enum": [ "application", "framework", "library", "container", "operating-system", "device", "firmware", "file" ], "title": "Component Type", "description": "Specifies the type of component. For software components, classify as application if no more specific appropriate classification is available or cannot be determined for the component.", "examples": ["library"] }, "mime-type": { "type": "string", "title": "Mime-Type", "description": "The optional mime-type of the component. When used on file components, the mime-type can provide additional context about the kind of file being represented such as an image, font, or executable. Some library or framework components may also have an associated mime-type.", "examples": ["image/jpeg"], "pattern": "^[-+a-z0-9.]+/[-+a-z0-9.]+$" }, "bom-ref": { "type": "string", "title": "BOM Reference", "description": "An optional identifier which can be used to reference the component elsewhere in the BOM. Every bom-ref should be unique." }, "supplier": { "title": "Component Supplier", "description": " The organization that supplied the component. The supplier may often be the manufacturer, but may also be a distributor or repackager.", "$ref": "#/definitions/organizationalEntity" }, "author": { "type": "string", "title": "Component Author", "description": "The person(s) or organization(s) that authored the component", "examples": ["Acme Inc"] }, "publisher": { "type": "string", "title": "Component Publisher", "description": "The person(s) or organization(s) that published the component", "examples": ["Acme Inc"] }, "group": { "type": "string", "title": "Component Group", "description": "The grouping name or identifier. This will often be a shortened, single name of the company or project that produced the component, or the source package or domain name. Whitespace and special characters should be avoided. Examples include: apache, org.apache.commons, and apache.org.", "examples": ["com.acme"] }, "name": { "type": "string", "title": "Component Name", "description": "The name of the component. This will often be a shortened, single name of the component. Examples: commons-lang3 and jquery", "examples": ["tomcat-catalina"] }, "version": { "type": "string", "title": "Component Version", "description": "The component version. The version should ideally comply with semantic versioning but is not enforced.", "examples": ["9.0.14"] }, "description": { "type": "string", "title": "Component Description", "description": "Specifies a description for the component" }, "scope": { "type": "string", "enum": [ "required", "optional", "excluded" ], "title": "Component Scope", "description": "Specifies the scope of the component. If scope is not specified, 'required' scope should be assumed by the consumer of the BOM", "default": "required" }, "hashes": { "type": "array", "title": "Component Hashes", "items": {"$ref": "#/definitions/hash"} }, "licenses": { "type": "array", "items": {"$ref": "#/definitions/licenseChoice"}, "title": "Component License(s)" }, "copyright": { "type": "string", "title": "Component Copyright", "description": "An optional copyright notice informing users of the underlying claims to copyright ownership in a published work.", "examples": ["Acme Inc"] }, "cpe": { "type": "string", "title": "Component Common Platform Enumeration (CPE)", "description": "DEPRECATED - DO NOT USE. This will be removed in a future version. Specifies a well-formed CPE name. See https://nvd.nist.gov/products/cpe", "examples": ["cpe:2.3:a:acme:component_framework:-:*:*:*:*:*:*:*"] }, "purl": { "type": "string", "title": "Component Package URL (purl)", "examples": ["pkg:maven/com.acme/tomcat-catalina@9.0.14?packaging=jar"] }, "swid": { "$ref": "#/definitions/swid", "title": "SWID Tag", "description": "Specifies metadata and content for ISO-IEC 19770-2 Software Identification (SWID) Tags." }, "modified": { "type": "boolean", "title": "Component Modified From Original", "description": "DEPRECATED - DO NOT USE. This will be removed in a future version. Use the pedigree element instead to supply information on exactly how the component was modified. A boolean value indicating is the component has been modified from the original. A value of true indicates the component is a derivative of the original. A value of false indicates the component has not been modified from the original." }, "pedigree": { "type": "object", "title": "Component Pedigree", "description": "Component pedigree is a way to document complex supply chain scenarios where components are created, distributed, modified, redistributed, combined with other components, etc. Pedigree supports viewing this complex chain from the beginning, the end, or anywhere in the middle. It also provides a way to document variants where the exact relation may not be known.", "properties": { "ancestors": { "type": "array", "title": "Ancestors", "description": "Describes zero or more components in which a component is derived from. This is commonly used to describe forks from existing projects where the forked version contains a ancestor node containing the original component it was forked from. For example, Component A is the original component. Component B is the component being used and documented in the BOM. However, Component B contains a pedigree node with a single ancestor documenting Component A - the original component from which Component B is derived from.", "items": {"$ref": "#/definitions/component"} }, "descendants": { "type": "array", "title": "Descendants", "description": "Descendants are the exact opposite of ancestors. This provides a way to document all forks (and their forks) of an original or root component.", "items": {"$ref": "#/definitions/component"} }, "variants": { "type": "array", "title": "Variants", "description": "Variants describe relations where the relationship between the components are not known. For example, if Component A contains nearly identical code to Component B. They are both related, but it is unclear if one is derived from the other, or if they share a common ancestor.", "items": {"$ref": "#/definitions/component"} }, "commits": { "type": "array", "title": "Commits", "description": "A list of zero or more commits which provide a trail describing how the component deviates from an ancestor, descendant, or variant.", "items": {"$ref": "#/definitions/commit"} }, "patches": { "type": "array", "title": "Patches", "description": ">A list of zero or more patches describing how the component deviates from an ancestor, descendant, or variant. Patches may be complimentary to commits or may be used in place of commits.", "items": {"$ref": "#/definitions/patch"} }, "notes": { "type": "string", "title": "Notes", "description": "Notes, observations, and other non-structured commentary describing the components pedigree." } } }, "externalReferences": { "type": "array", "items": {"$ref": "#/definitions/externalReference"}, "title": "External References" }, "components": { "$id": "#/definitions/component/properties/components", "type": "array", "items": {"$ref": "#/definitions/component"}, "uniqueItems": true, "title": "Components" }, "evidence": { "$ref": "#/definitions/componentEvidence", "title": "Evidence", "description": "Provides the ability to document evidence collected through various forms of extraction or analysis." }, "properties": { "type": "array", "title": "Properties", "description": "Provides the ability to document properties in a name-value store. This provides flexibility to include data not officially supported in the standard without having to use additional namespaces or create extensions. Unlike key-value stores, properties support duplicate names, each potentially having different values.", "items": {"$ref": "#/definitions/property"} } } }, "swid": { "type": "object", "title": "SWID Tag", "description": "Specifies metadata and content for ISO-IEC 19770-2 Software Identification (SWID) Tags.", "required": [ "tagId", "name" ], "properties": { "tagId": { "type": "string", "title": "Tag ID", "description": "Maps to the tagId of a SoftwareIdentity." }, "name": { "type": "string", "title": "Name", "description": "Maps to the name of a SoftwareIdentity." }, "version": { "type": "string", "title": "Version", "default": "0.0", "description": "Maps to the version of a SoftwareIdentity." }, "tagVersion": { "type": "integer", "title": "Tag Version", "default": 0, "description": "Maps to the tagVersion of a SoftwareIdentity." }, "patch": { "type": "boolean", "title": "Patch", "default": false, "description": "Maps to the patch of a SoftwareIdentity." }, "text": { "title": "Attachment text", "description": "Specifies the metadata and content of the SWID tag.", "$ref": "#/definitions/attachment" }, "url": { "type": "string", "title": "URL", "description": "The URL to the SWID file.", "format": "iri-reference" } } }, "attachment": { "type": "object", "title": "Attachment", "description": "Specifies the metadata and content for an attachment.", "required": [ "content" ], "properties": { "contentType": { "type": "string", "title": "Content-Type", "description": "Specifies the content type of the text. Defaults to text/plain if not specified.", "default": "text/plain" }, "encoding": { "type": "string", "title": "Encoding", "description": "Specifies the optional encoding the text is represented in.", "enum": [ "base64" ] }, "content": { "type": "string", "title": "Attachment Text", "description": "The attachment data" } } }, "hash": { "type": "object", "title": "Hash Objects", "required": [ "alg", "content" ], "properties": { "alg": { "$ref": "#/definitions/hash-alg" }, "content": { "$ref": "#/definitions/hash-content" } } }, "hash-alg": { "type": "string", "enum": [ "MD5", "SHA-1", "SHA-256", "SHA-384", "SHA-512", "SHA3-256", "SHA3-384", "SHA3-512", "BLAKE2b-256", "BLAKE2b-384", "BLAKE2b-512", "BLAKE3" ], "title": "Hash Algorithm" }, "hash-content": { "type": "string", "title": "Hash Content (value)", "examples": ["3942447fac867ae5cdb3229b658f4d48"], "pattern": "^([a-fA-F0-9]{32}|[a-fA-F0-9]{40}|[a-fA-F0-9]{64}|[a-fA-F0-9]{96}|[a-fA-F0-9]{128})$" }, "license": { "type": "object", "title": "License Object", "oneOf": [ { "required": ["id"] }, { "required": ["name"] } ], "properties": { "id": { "$ref": "spdx.schema.json", "title": "License ID (SPDX)", "description": "A valid SPDX license ID", "examples": ["Apache-2.0"] }, "name": { "type": "string", "title": "License Name", "description": "If SPDX does not define the license used, this field may be used to provide the license name", "examples": ["Acme Software License"] }, "text": { "title": "License text", "description": "An optional way to include the textual content of a license.", "$ref": "#/definitions/attachment" }, "url": { "type": "string", "title": "License URL", "description": "The URL to the license file. If specified, a 'license' externalReference should also be specified for completeness", "examples": ["https://www.apache.org/licenses/LICENSE-2.0.txt"], "format": "iri-reference" } } }, "licenseChoice": { "type": "object", "title": "License(s)", "properties": { "license": { "$ref": "#/definitions/license" }, "expression": { "type": "string", "title": "SPDX License Expression", "examples": [ "Apache-2.0 AND (MIT OR GPL-2.0-only)", "GPL-3.0-only WITH Classpath-exception-2.0" ] } }, "oneOf":[ { "required": ["license"] }, { "required": ["expression"] } ] }, "commit": { "type": "object", "title": "Commit", "description": "Specifies an individual commit", "properties": { "uid": { "type": "string", "title": "UID", "description": "A unique identifier of the commit. This may be version control specific. For example, Subversion uses revision numbers whereas git uses commit hashes." }, "url": { "type": "string", "title": "URL", "description": "The URL to the commit. This URL will typically point to a commit in a version control system.", "format": "iri-reference" }, "author": { "title": "Author", "description": "The author who created the changes in the commit", "$ref": "#/definitions/identifiableAction" }, "committer": { "title": "Committer", "description": "The person who committed or pushed the commit", "$ref": "#/definitions/identifiableAction" }, "message": { "type": "string", "title": "Message", "description": "The text description of the contents of the commit" } } }, "patch": { "type": "object", "title": "Patch", "description": "Specifies an individual patch", "required": [ "type" ], "properties": { "type": { "type": "string", "enum": [ "unofficial", "monkey", "backport", "cherry-pick" ], "title": "Type", "description": "Specifies the purpose for the patch including the resolution of defects, security issues, or new behavior or functionality" }, "diff": { "title": "Diff", "description": "The patch file (or diff) that show changes. Refer to https://en.wikipedia.org/wiki/Diff", "$ref": "#/definitions/diff" }, "resolves": { "type": "array", "items": {"$ref": "#/definitions/issue"}, "title": "Resolves", "description": "A collection of issues the patch resolves" } } }, "diff": { "type": "object", "title": "Diff", "description": "The patch file (or diff) that show changes. Refer to https://en.wikipedia.org/wiki/Diff", "properties": { "text": { "title": "Diff text", "description": "Specifies the optional text of the diff", "$ref": "#/definitions/attachment" }, "url": { "type": "string", "title": "URL", "description": "Specifies the URL to the diff", "format": "iri-reference" } } }, "issue": { "type": "object", "title": "Diff", "description": "The patch file (or diff) that show changes. Refer to https://en.wikipedia.org/wiki/Diff", "required": [ "type" ], "properties": { "type": { "type": "string", "enum": [ "defect", "enhancement", "security" ], "title": "Type", "description": "Specifies the type of issue" }, "id": { "type": "string", "title": "ID", "description": "The identifier of the issue assigned by the source of the issue" }, "name": { "type": "string", "title": "Name", "description": "The name of the issue" }, "description": { "type": "string", "title": "Description", "description": "A description of the issue" }, "source": { "type": "object", "title": "Source", "description": "The source of the issue where it is documented", "properties": { "name": { "type": "string", "title": "Name", "description": "The name of the source. For example 'National Vulnerability Database', 'NVD', and 'Apache'" }, "url": { "type": "string", "title": "URL", "description": "The url of the issue documentation as provided by the source", "format": "iri-reference" } } }, "references": { "type": "array", "items": { "type": "string", "format": "iri-reference" }, "title": "References", "description": "A collection of URL's for reference. Multiple URLs are allowed.", "examples": ["https://example.com"] } } }, "identifiableAction": { "type": "object", "title": "Identifiable Action", "description": "Specifies an individual commit", "properties": { "timestamp": { "type": "string", "format": "date-time", "title": "Timestamp", "description": "The timestamp in which the action occurred" }, "name": { "type": "string", "title": "Name", "description": "The name of the individual who performed the action" }, "email": { "type": "string", "format": "idn-email", "title": "E-mail", "description": "The email address of the individual who performed the action" } } }, "externalReference": { "type": "object", "title": "External Reference", "description": "Specifies an individual external reference", "required": [ "url", "type" ], "properties": { "url": { "type": "string", "title": "URL", "description": "The URL to the external reference", "format": "iri-reference" }, "comment": { "type": "string", "title": "Comment", "description": "An optional comment describing the external reference" }, "type": { "type": "string", "title": "Type", "description": "Specifies the type of external reference. There are built-in types to describe common references. If a type does not exist for the reference being referred to, use the \"other\" type.", "enum": [ "vcs", "issue-tracker", "website", "advisories", "bom", "mailing-list", "social", "chat", "documentation", "support", "distribution", "license", "build-meta", "build-system", "other" ] }, "hashes": { "$id": "#/definitions/externalReference/properties/hashes", "type": "array", "items": {"$ref": "#/definitions/hash"}, "title": "Hashes", "description": "The hashes of the external reference (if applicable)." } } }, "dependency": { "type": "object", "title": "Dependency", "description": "Defines the direct dependencies of a component. Components that do not have their own dependencies MUST be declared as empty elements within the graph. Components that are not represented in the dependency graph MAY have unknown dependencies. It is RECOMMENDED that implementations assume this to be opaque and not an indicator of a component being dependency-free.", "required": [ "ref" ], "properties": { "ref": { "type": "string", "title": "Reference", "description": "References a component by the components bom-ref attribute" }, "dependsOn": { "type": "array", "uniqueItems": true, "items": { "type": "string" }, "title": "Depends On", "description": "The bom-ref identifiers of the components that are dependencies of this dependency object." } } }, "service": { "type": "object", "title": "Service Object", "required": [ "name" ], "properties": { "bom-ref": { "type": "string", "title": "BOM Reference", "description": "An optional identifier which can be used to reference the service elsewhere in the BOM. Every bom-ref should be unique." }, "provider": { "title": "Provider", "description": "The organization that provides the service.", "$ref": "#/definitions/organizationalEntity" }, "group": { "type": "string", "title": "Service Group", "description": "The grouping name, namespace, or identifier. This will often be a shortened, single name of the company or project that produced the service or domain name. Whitespace and special characters should be avoided.", "examples": ["com.acme"] }, "name": { "type": "string", "title": "Service Name", "description": "The name of the service. This will often be a shortened, single name of the service.", "examples": ["ticker-service"] }, "version": { "type": "string", "title": "Service Version", "description": "The service version.", "examples": ["1.0.0"] }, "description": { "type": "string", "title": "Service Description", "description": "Specifies a description for the service" }, "endpoints": { "type": "array", "items": { "type": "string", "format": "iri-reference" }, "title": "Endpoints", "description": "The endpoint URIs of the service. Multiple endpoints are allowed.", "examples": ["https://example.com/api/v1/ticker"] }, "authenticated": { "type": "boolean", "title": "Authentication Required", "description": "A boolean value indicating if the service requires authentication. A value of true indicates the service requires authentication prior to use. A value of false indicates the service does not require authentication." }, "x-trust-boundary": { "type": "boolean", "title": "Crosses Trust Boundary", "description": "A boolean value indicating if use of the service crosses a trust zone or boundary. A value of true indicates that by using the service, a trust boundary is crossed. A value of false indicates that by using the service, a trust boundary is not crossed." }, "data": { "type": "array", "items": {"$ref": "#/definitions/dataClassification"}, "title": "Data Classification", "description": "Specifies the data classification." }, "licenses": { "type": "array", "items": {"$ref": "#/definitions/licenseChoice"}, "title": "Component License(s)" }, "externalReferences": { "type": "array", "items": {"$ref": "#/definitions/externalReference"}, "title": "External References" }, "services": { "$id": "#/definitions/service/properties/services", "type": "array", "items": {"$ref": "#/definitions/service"}, "uniqueItems": true, "title": "Services" }, "properties": { "type": "array", "title": "Properties", "description": "Provides the ability to document properties in a name-value store. This provides flexibility to include data not officially supported in the standard without having to use additional namespaces or create extensions. Unlike key-value stores, properties support duplicate names, each potentially having different values.", "items": {"$ref": "#/definitions/property"} } } }, "dataClassification": { "type": "object", "title": "Hash Objects", "required": [ "flow", "classification" ], "properties": { "flow": { "$ref": "#/definitions/dataFlow" }, "classification": { "type": "string" } } }, "dataFlow": { "type": "string", "enum": [ "inbound", "outbound", "bi-directional", "unknown" ], "title": "Data flow direction" }, "copyright": { "type": "object", "title": "Copyright", "required": [ "text" ], "properties": { "text": { "type": "string", "title": "Copyright Text" } } }, "componentEvidence": { "type": "object", "title": "Evidence", "description": "Provides the ability to document evidence collected through various forms of extraction or analysis.", "properties": { "licenses": { "type": "array", "items": {"$ref": "#/definitions/licenseChoice"}, "title": "Component License(s)" }, "copyright": { "type": "array", "items": {"$ref": "#/definitions/copyright"}, "title": "Copyright" } } }, "compositions": { "type": "object", "title": "Compositions", "required": [ "aggregate" ], "properties": { "aggregate": { "$ref": "#/definitions/aggregateType", "title": "Aggregate", "description": "Specifies an aggregate type that describe how complete a relationship is." }, "assemblies": { "type": "array", "uniqueItems": true, "items": { "type": "string" }, "title": "BOM references", "description": "The bom-ref identifiers of the components or services being described. Assemblies refer to nested relationships whereby a constituent part may include other constituent parts. References do not cascade to child parts. References are explicit for the specified constituent part only." }, "dependencies": { "type": "array", "uniqueItems": true, "items": { "type": "string" }, "title": "BOM references", "description": "The bom-ref identifiers of the components or services being described. Dependencies refer to a relationship whereby an independent constituent part requires another independent constituent part. References do not cascade to transitive dependencies. References are explicit for the specified dependency only." } } }, "aggregateType": { "type": "string", "default": "not_specified", "enum": [ "complete", "incomplete", "incomplete_first_party_only", "incomplete_third_party_only", "unknown", "not_specified" ] }, "property": { "type": "object", "title": "Lightweight name-value pair", "properties": { "name": { "type": "string", "title": "Name", "description": "The name of the property. Duplicate names are allowed, each potentially having a different value." }, "value": { "type": "string", "title": "Value", "description": "The value of the property." } } } } } ================================================ FILE: tests/resources/cdx_bom-1.4.schema.json ================================================ { "$schema": "http://json-schema.org/draft-07/schema#", "$id": "http://cyclonedx.org/schema/bom-1.4.schema.json", "type": "object", "title": "CycloneDX Software Bill of Materials Standard", "$comment" : "CycloneDX JSON schema is published under the terms of the Apache License 2.0.", "required": [ "bomFormat", "specVersion", "version" ], "additionalProperties": false, "properties": { "$schema": { "type": "string", "enum": [ "http://cyclonedx.org/schema/bom-1.4.schema.json" ] }, "bomFormat": { "type": "string", "title": "BOM Format", "description": "Specifies the format of the BOM. This helps to identify the file as CycloneDX since BOMs do not have a filename convention nor does JSON schema support namespaces. This value MUST be \"CycloneDX\".", "enum": [ "CycloneDX" ] }, "specVersion": { "type": "string", "title": "CycloneDX Specification Version", "description": "The version of the CycloneDX specification a BOM conforms to (starting at version 1.2).", "examples": ["1.4"] }, "serialNumber": { "type": "string", "title": "BOM Serial Number", "description": "Every BOM generated SHOULD have a unique serial number, even if the contents of the BOM have not changed over time. If specified, the serial number MUST conform to RFC-4122. Use of serial numbers are RECOMMENDED.", "examples": ["urn:uuid:3e671687-395b-41f5-a30f-a58921a69b79"], "pattern": "^urn:uuid:[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$" }, "version": { "type": "integer", "title": "BOM Version", "description": "Whenever an existing BOM is modified, either manually or through automated processes, the version of the BOM SHOULD be incremented by 1. When a system is presented with multiple BOMs with identical serial numbers, the system SHOULD use the most recent version of the BOM. The default version is '1'.", "default": 1, "examples": [1] }, "metadata": { "$ref": "#/definitions/metadata", "title": "BOM Metadata", "description": "Provides additional information about a BOM." }, "components": { "type": "array", "additionalItems": false, "items": {"$ref": "#/definitions/component"}, "uniqueItems": true, "title": "Components", "description": "A list of software and hardware components." }, "services": { "type": "array", "additionalItems": false, "items": {"$ref": "#/definitions/service"}, "uniqueItems": true, "title": "Services", "description": "A list of services. This may include microservices, function-as-a-service, and other types of network or intra-process services." }, "externalReferences": { "type": "array", "additionalItems": false, "items": {"$ref": "#/definitions/externalReference"}, "title": "External References", "description": "External references provide a way to document systems, sites, and information that may be relevant but which are not included with the BOM." }, "dependencies": { "type": "array", "additionalItems": false, "items": {"$ref": "#/definitions/dependency"}, "uniqueItems": true, "title": "Dependencies", "description": "Provides the ability to document dependency relationships." }, "compositions": { "type": "array", "additionalItems": false, "items": {"$ref": "#/definitions/compositions"}, "uniqueItems": true, "title": "Compositions", "description": "Compositions describe constituent parts (including components, services, and dependency relationships) and their completeness." }, "vulnerabilities": { "type": "array", "additionalItems": false, "items": {"$ref": "#/definitions/vulnerability"}, "uniqueItems": true, "title": "Vulnerabilities", "description": "Vulnerabilities identified in components or services." }, "signature": { "$ref": "#/definitions/signature", "title": "Signature", "description": "Enveloped signature in [JSON Signature Format (JSF)](https://cyberphone.github.io/doc/security/jsf.html)." } }, "definitions": { "refType": { "$comment": "Identifier-DataType for interlinked elements.", "type": "string" }, "metadata": { "type": "object", "title": "BOM Metadata Object", "additionalProperties": false, "properties": { "timestamp": { "type": "string", "format": "date-time", "title": "Timestamp", "description": "The date and time (timestamp) when the BOM was created." }, "tools": { "type": "array", "title": "Creation Tools", "description": "The tool(s) used in the creation of the BOM.", "additionalItems": false, "items": {"$ref": "#/definitions/tool"} }, "authors" :{ "type": "array", "title": "Authors", "description": "The person(s) who created the BOM. Authors are common in BOMs created through manual processes. BOMs created through automated means may not have authors.", "additionalItems": false, "items": {"$ref": "#/definitions/organizationalContact"} }, "component": { "title": "Component", "description": "The component that the BOM describes.", "$ref": "#/definitions/component" }, "manufacture": { "title": "Manufacture", "description": "The organization that manufactured the component that the BOM describes.", "$ref": "#/definitions/organizationalEntity" }, "supplier": { "title": "Supplier", "description": " The organization that supplied the component that the BOM describes. The supplier may often be the manufacturer, but may also be a distributor or repackager.", "$ref": "#/definitions/organizationalEntity" }, "licenses": { "type": "array", "title": "BOM License(s)", "additionalItems": false, "items": {"$ref": "#/definitions/licenseChoice"} }, "properties": { "type": "array", "title": "Properties", "description": "Provides the ability to document properties in a name-value store. This provides flexibility to include data not officially supported in the standard without having to use additional namespaces or create extensions. Unlike key-value stores, properties support duplicate names, each potentially having different values. Property names of interest to the general public are encouraged to be registered in the [CycloneDX Property Taxonomy](https://github.com/CycloneDX/cyclonedx-property-taxonomy). Formal registration is OPTIONAL.", "additionalItems": false, "items": {"$ref": "#/definitions/property"} } } }, "tool": { "type": "object", "title": "Tool", "description": "Information about the automated or manual tool used", "additionalProperties": false, "properties": { "vendor": { "type": "string", "title": "Tool Vendor", "description": "The name of the vendor who created the tool" }, "name": { "type": "string", "title": "Tool Name", "description": "The name of the tool" }, "version": { "type": "string", "title": "Tool Version", "description": "The version of the tool" }, "hashes": { "type": "array", "additionalItems": false, "items": {"$ref": "#/definitions/hash"}, "title": "Hashes", "description": "The hashes of the tool (if applicable)." }, "externalReferences": { "type": "array", "additionalItems": false, "items": {"$ref": "#/definitions/externalReference"}, "title": "External References", "description": "External references provide a way to document systems, sites, and information that may be relevant but which are not included with the BOM." } } }, "organizationalEntity": { "type": "object", "title": "Organizational Entity Object", "description": "", "additionalProperties": false, "properties": { "name": { "type": "string", "title": "Name", "description": "The name of the organization", "examples": [ "Example Inc." ] }, "url": { "type": "array", "items": { "type": "string", "format": "iri-reference" }, "title": "URL", "description": "The URL of the organization. Multiple URLs are allowed.", "examples": ["https://example.com"] }, "contact": { "type": "array", "title": "Contact", "description": "A contact at the organization. Multiple contacts are allowed.", "additionalItems": false, "items": {"$ref": "#/definitions/organizationalContact"} } } }, "organizationalContact": { "type": "object", "title": "Organizational Contact Object", "description": "", "additionalProperties": false, "properties": { "name": { "type": "string", "title": "Name", "description": "The name of a contact", "examples": ["Contact name"] }, "email": { "type": "string", "format": "idn-email", "title": "Email Address", "description": "The email address of the contact.", "examples": ["firstname.lastname@example.com"] }, "phone": { "type": "string", "title": "Phone", "description": "The phone number of the contact.", "examples": ["800-555-1212"] } } }, "component": { "type": "object", "title": "Component Object", "required": [ "type", "name" ], "additionalProperties": false, "properties": { "type": { "type": "string", "enum": [ "application", "framework", "library", "container", "operating-system", "device", "firmware", "file" ], "title": "Component Type", "description": "Specifies the type of component. For software components, classify as application if no more specific appropriate classification is available or cannot be determined for the component. Types include:\n\n* __application__ = A software application. Refer to [https://en.wikipedia.org/wiki/Application_software](https://en.wikipedia.org/wiki/Application_software) for information about applications.\n* __framework__ = A software framework. Refer to [https://en.wikipedia.org/wiki/Software_framework](https://en.wikipedia.org/wiki/Software_framework) for information on how frameworks vary slightly from libraries.\n* __library__ = A software library. Refer to [https://en.wikipedia.org/wiki/Library_(computing)](https://en.wikipedia.org/wiki/Library_(computing))\n for information about libraries. All third-party and open source reusable components will likely be a library. If the library also has key features of a framework, then it should be classified as a framework. If not, or is unknown, then specifying library is RECOMMENDED.\n* __container__ = A packaging and/or runtime format, not specific to any particular technology, which isolates software inside the container from software outside of a container through virtualization technology. Refer to [https://en.wikipedia.org/wiki/OS-level_virtualization](https://en.wikipedia.org/wiki/OS-level_virtualization)\n* __operating-system__ = A software operating system without regard to deployment model (i.e. installed on physical hardware, virtual machine, image, etc) Refer to [https://en.wikipedia.org/wiki/Operating_system](https://en.wikipedia.org/wiki/Operating_system)\n* __device__ = A hardware device such as a processor, or chip-set. A hardware device containing firmware SHOULD include a component for the physical hardware itself, and another component of type 'firmware' or 'operating-system' (whichever is relevant), describing information about the software running on the device.\n See also the list of [known device properties](https://github.com/CycloneDX/cyclonedx-property-taxonomy/blob/main/cdx/device.md).\n* __firmware__ = A special type of software that provides low-level control over a devices hardware. Refer to [https://en.wikipedia.org/wiki/Firmware](https://en.wikipedia.org/wiki/Firmware)\n* __file__ = A computer file. Refer to [https://en.wikipedia.org/wiki/Computer_file](https://en.wikipedia.org/wiki/Computer_file) for information about files.", "examples": ["library"] }, "mime-type": { "type": "string", "title": "Mime-Type", "description": "The optional mime-type of the component. When used on file components, the mime-type can provide additional context about the kind of file being represented such as an image, font, or executable. Some library or framework components may also have an associated mime-type.", "examples": ["image/jpeg"], "pattern": "^[-+a-z0-9.]+/[-+a-z0-9.]+$" }, "bom-ref": { "$ref": "#/definitions/refType", "title": "BOM Reference", "description": "An optional identifier which can be used to reference the component elsewhere in the BOM. Every bom-ref MUST be unique within the BOM." }, "supplier": { "title": "Component Supplier", "description": " The organization that supplied the component. The supplier may often be the manufacturer, but may also be a distributor or repackager.", "$ref": "#/definitions/organizationalEntity" }, "author": { "type": "string", "title": "Component Author", "description": "The person(s) or organization(s) that authored the component", "examples": ["Acme Inc"] }, "publisher": { "type": "string", "title": "Component Publisher", "description": "The person(s) or organization(s) that published the component", "examples": ["Acme Inc"] }, "group": { "type": "string", "title": "Component Group", "description": "The grouping name or identifier. This will often be a shortened, single name of the company or project that produced the component, or the source package or domain name. Whitespace and special characters should be avoided. Examples include: apache, org.apache.commons, and apache.org.", "examples": ["com.acme"] }, "name": { "type": "string", "title": "Component Name", "description": "The name of the component. This will often be a shortened, single name of the component. Examples: commons-lang3 and jquery", "examples": ["tomcat-catalina"] }, "version": { "type": "string", "title": "Component Version", "description": "The component version. The version should ideally comply with semantic versioning but is not enforced.", "examples": ["9.0.14"] }, "description": { "type": "string", "title": "Component Description", "description": "Specifies a description for the component" }, "scope": { "type": "string", "enum": [ "required", "optional", "excluded" ], "title": "Component Scope", "description": "Specifies the scope of the component. If scope is not specified, 'required' scope SHOULD be assumed by the consumer of the BOM.", "default": "required" }, "hashes": { "type": "array", "title": "Component Hashes", "additionalItems": false, "items": {"$ref": "#/definitions/hash"} }, "licenses": { "type": "array", "additionalItems": false, "items": {"$ref": "#/definitions/licenseChoice"}, "title": "Component License(s)" }, "copyright": { "type": "string", "title": "Component Copyright", "description": "A copyright notice informing users of the underlying claims to copyright ownership in a published work.", "examples": ["Acme Inc"] }, "cpe": { "type": "string", "title": "Component Common Platform Enumeration (CPE)", "description": "Specifies a well-formed CPE name that conforms to the CPE 2.2 or 2.3 specification. See [https://nvd.nist.gov/products/cpe](https://nvd.nist.gov/products/cpe)", "examples": ["cpe:2.3:a:acme:component_framework:-:*:*:*:*:*:*:*"] }, "purl": { "type": "string", "title": "Component Package URL (purl)", "description": "Specifies the package-url (purl). The purl, if specified, MUST be valid and conform to the specification defined at: [https://github.com/package-url/purl-spec](https://github.com/package-url/purl-spec)", "examples": ["pkg:maven/com.acme/tomcat-catalina@9.0.14?packaging=jar"] }, "swid": { "$ref": "#/definitions/swid", "title": "SWID Tag", "description": "Specifies metadata and content for [ISO-IEC 19770-2 Software Identification (SWID) Tags](https://www.iso.org/standard/65666.html)." }, "modified": { "type": "boolean", "title": "Component Modified From Original", "description": "[Deprecated] - DO NOT USE. This will be removed in a future version. Use the pedigree element instead to supply information on exactly how the component was modified. A boolean value indicating if the component has been modified from the original. A value of true indicates the component is a derivative of the original. A value of false indicates the component has not been modified from the original." }, "pedigree": { "type": "object", "title": "Component Pedigree", "description": "Component pedigree is a way to document complex supply chain scenarios where components are created, distributed, modified, redistributed, combined with other components, etc. Pedigree supports viewing this complex chain from the beginning, the end, or anywhere in the middle. It also provides a way to document variants where the exact relation may not be known.", "additionalProperties": false, "properties": { "ancestors": { "type": "array", "title": "Ancestors", "description": "Describes zero or more components in which a component is derived from. This is commonly used to describe forks from existing projects where the forked version contains a ancestor node containing the original component it was forked from. For example, Component A is the original component. Component B is the component being used and documented in the BOM. However, Component B contains a pedigree node with a single ancestor documenting Component A - the original component from which Component B is derived from.", "additionalItems": false, "items": {"$ref": "#/definitions/component"} }, "descendants": { "type": "array", "title": "Descendants", "description": "Descendants are the exact opposite of ancestors. This provides a way to document all forks (and their forks) of an original or root component.", "additionalItems": false, "items": {"$ref": "#/definitions/component"} }, "variants": { "type": "array", "title": "Variants", "description": "Variants describe relations where the relationship between the components are not known. For example, if Component A contains nearly identical code to Component B. They are both related, but it is unclear if one is derived from the other, or if they share a common ancestor.", "additionalItems": false, "items": {"$ref": "#/definitions/component"} }, "commits": { "type": "array", "title": "Commits", "description": "A list of zero or more commits which provide a trail describing how the component deviates from an ancestor, descendant, or variant.", "additionalItems": false, "items": {"$ref": "#/definitions/commit"} }, "patches": { "type": "array", "title": "Patches", "description": ">A list of zero or more patches describing how the component deviates from an ancestor, descendant, or variant. Patches may be complimentary to commits or may be used in place of commits.", "additionalItems": false, "items": {"$ref": "#/definitions/patch"} }, "notes": { "type": "string", "title": "Notes", "description": "Notes, observations, and other non-structured commentary describing the components pedigree." } } }, "externalReferences": { "type": "array", "additionalItems": false, "items": {"$ref": "#/definitions/externalReference"}, "title": "External References", "description": "External references provide a way to document systems, sites, and information that may be relevant but which are not included with the BOM." }, "components": { "type": "array", "additionalItems": false, "items": {"$ref": "#/definitions/component"}, "uniqueItems": true, "title": "Components", "description": "A list of software and hardware components included in the parent component. This is not a dependency tree. It provides a way to specify a hierarchical representation of component assemblies, similar to system → subsystem → parts assembly in physical supply chains." }, "evidence": { "$ref": "#/definitions/componentEvidence", "title": "Evidence", "description": "Provides the ability to document evidence collected through various forms of extraction or analysis." }, "releaseNotes": { "$ref": "#/definitions/releaseNotes", "title": "Release notes", "description": "Specifies optional release notes." }, "properties": { "type": "array", "title": "Properties", "description": "Provides the ability to document properties in a name-value store. This provides flexibility to include data not officially supported in the standard without having to use additional namespaces or create extensions. Unlike key-value stores, properties support duplicate names, each potentially having different values. Property names of interest to the general public are encouraged to be registered in the [CycloneDX Property Taxonomy](https://github.com/CycloneDX/cyclonedx-property-taxonomy). Formal registration is OPTIONAL.", "additionalItems": false, "items": {"$ref": "#/definitions/property"} }, "signature": { "$ref": "#/definitions/signature", "title": "Signature", "description": "Enveloped signature in [JSON Signature Format (JSF)](https://cyberphone.github.io/doc/security/jsf.html)." } } }, "swid": { "type": "object", "title": "SWID Tag", "description": "Specifies metadata and content for ISO-IEC 19770-2 Software Identification (SWID) Tags.", "required": [ "tagId", "name" ], "additionalProperties": false, "properties": { "tagId": { "type": "string", "title": "Tag ID", "description": "Maps to the tagId of a SoftwareIdentity." }, "name": { "type": "string", "title": "Name", "description": "Maps to the name of a SoftwareIdentity." }, "version": { "type": "string", "title": "Version", "default": "0.0", "description": "Maps to the version of a SoftwareIdentity." }, "tagVersion": { "type": "integer", "title": "Tag Version", "default": 0, "description": "Maps to the tagVersion of a SoftwareIdentity." }, "patch": { "type": "boolean", "title": "Patch", "default": false, "description": "Maps to the patch of a SoftwareIdentity." }, "text": { "title": "Attachment text", "description": "Specifies the metadata and content of the SWID tag.", "$ref": "#/definitions/attachment" }, "url": { "type": "string", "title": "URL", "description": "The URL to the SWID file.", "format": "iri-reference" } } }, "attachment": { "type": "object", "title": "Attachment", "description": "Specifies the metadata and content for an attachment.", "required": [ "content" ], "additionalProperties": false, "properties": { "contentType": { "type": "string", "title": "Content-Type", "description": "Specifies the content type of the text. Defaults to text/plain if not specified.", "default": "text/plain" }, "encoding": { "type": "string", "title": "Encoding", "description": "Specifies the optional encoding the text is represented in.", "enum": [ "base64" ] }, "content": { "type": "string", "title": "Attachment Text", "description": "The attachment data. Proactive controls such as input validation and sanitization should be employed to prevent misuse of attachment text." } } }, "hash": { "type": "object", "title": "Hash Objects", "required": [ "alg", "content" ], "additionalProperties": false, "properties": { "alg": { "$ref": "#/definitions/hash-alg" }, "content": { "$ref": "#/definitions/hash-content" } } }, "hash-alg": { "type": "string", "enum": [ "MD5", "SHA-1", "SHA-256", "SHA-384", "SHA-512", "SHA3-256", "SHA3-384", "SHA3-512", "BLAKE2b-256", "BLAKE2b-384", "BLAKE2b-512", "BLAKE3" ], "title": "Hash Algorithm" }, "hash-content": { "type": "string", "title": "Hash Content (value)", "examples": ["3942447fac867ae5cdb3229b658f4d48"], "pattern": "^([a-fA-F0-9]{32}|[a-fA-F0-9]{40}|[a-fA-F0-9]{64}|[a-fA-F0-9]{96}|[a-fA-F0-9]{128})$" }, "license": { "type": "object", "title": "License Object", "oneOf": [ { "required": ["id"] }, { "required": ["name"] } ], "additionalProperties": false, "properties": { "id": { "$ref": "spdx.schema.json", "title": "License ID (SPDX)", "description": "A valid SPDX license ID", "examples": ["Apache-2.0"] }, "name": { "type": "string", "title": "License Name", "description": "If SPDX does not define the license used, this field may be used to provide the license name", "examples": ["Acme Software License"] }, "text": { "title": "License text", "description": "An optional way to include the textual content of a license.", "$ref": "#/definitions/attachment" }, "url": { "type": "string", "title": "License URL", "description": "The URL to the license file. If specified, a 'license' externalReference should also be specified for completeness", "examples": ["https://www.apache.org/licenses/LICENSE-2.0.txt"], "format": "iri-reference" } } }, "licenseChoice": { "type": "object", "title": "License(s)", "additionalProperties": false, "properties": { "license": { "$ref": "#/definitions/license" }, "expression": { "type": "string", "title": "SPDX License Expression", "examples": [ "Apache-2.0 AND (MIT OR GPL-2.0-only)", "GPL-3.0-only WITH Classpath-exception-2.0" ] } }, "oneOf":[ { "required": ["license"] }, { "required": ["expression"] } ] }, "commit": { "type": "object", "title": "Commit", "description": "Specifies an individual commit", "additionalProperties": false, "properties": { "uid": { "type": "string", "title": "UID", "description": "A unique identifier of the commit. This may be version control specific. For example, Subversion uses revision numbers whereas git uses commit hashes." }, "url": { "type": "string", "title": "URL", "description": "The URL to the commit. This URL will typically point to a commit in a version control system.", "format": "iri-reference" }, "author": { "title": "Author", "description": "The author who created the changes in the commit", "$ref": "#/definitions/identifiableAction" }, "committer": { "title": "Committer", "description": "The person who committed or pushed the commit", "$ref": "#/definitions/identifiableAction" }, "message": { "type": "string", "title": "Message", "description": "The text description of the contents of the commit" } } }, "patch": { "type": "object", "title": "Patch", "description": "Specifies an individual patch", "required": [ "type" ], "additionalProperties": false, "properties": { "type": { "type": "string", "enum": [ "unofficial", "monkey", "backport", "cherry-pick" ], "title": "Type", "description": "Specifies the purpose for the patch including the resolution of defects, security issues, or new behavior or functionality.\n\n* __unofficial__ = A patch which is not developed by the creators or maintainers of the software being patched. Refer to [https://en.wikipedia.org/wiki/Unofficial_patch](https://en.wikipedia.org/wiki/Unofficial_patch)\n* __monkey__ = A patch which dynamically modifies runtime behavior. Refer to [https://en.wikipedia.org/wiki/Monkey_patch](https://en.wikipedia.org/wiki/Monkey_patch)\n* __backport__ = A patch which takes code from a newer version of software and applies it to older versions of the same software. Refer to [https://en.wikipedia.org/wiki/Backporting](https://en.wikipedia.org/wiki/Backporting)\n* __cherry-pick__ = A patch created by selectively applying commits from other versions or branches of the same software." }, "diff": { "title": "Diff", "description": "The patch file (or diff) that show changes. Refer to [https://en.wikipedia.org/wiki/Diff](https://en.wikipedia.org/wiki/Diff)", "$ref": "#/definitions/diff" }, "resolves": { "type": "array", "additionalItems": false, "items": {"$ref": "#/definitions/issue"}, "title": "Resolves", "description": "A collection of issues the patch resolves" } } }, "diff": { "type": "object", "title": "Diff", "description": "The patch file (or diff) that show changes. Refer to https://en.wikipedia.org/wiki/Diff", "additionalProperties": false, "properties": { "text": { "title": "Diff text", "description": "Specifies the optional text of the diff", "$ref": "#/definitions/attachment" }, "url": { "type": "string", "title": "URL", "description": "Specifies the URL to the diff", "format": "iri-reference" } } }, "issue": { "type": "object", "title": "Diff", "description": "An individual issue that has been resolved.", "required": [ "type" ], "additionalProperties": false, "properties": { "type": { "type": "string", "enum": [ "defect", "enhancement", "security" ], "title": "Type", "description": "Specifies the type of issue" }, "id": { "type": "string", "title": "ID", "description": "The identifier of the issue assigned by the source of the issue" }, "name": { "type": "string", "title": "Name", "description": "The name of the issue" }, "description": { "type": "string", "title": "Description", "description": "A description of the issue" }, "source": { "type": "object", "title": "Source", "description": "The source of the issue where it is documented", "additionalProperties": false, "properties": { "name": { "type": "string", "title": "Name", "description": "The name of the source. For example 'National Vulnerability Database', 'NVD', and 'Apache'" }, "url": { "type": "string", "title": "URL", "description": "The url of the issue documentation as provided by the source", "format": "iri-reference" } } }, "references": { "type": "array", "items": { "type": "string", "format": "iri-reference" }, "title": "References", "description": "A collection of URL's for reference. Multiple URLs are allowed.", "examples": ["https://example.com"] } } }, "identifiableAction": { "type": "object", "title": "Identifiable Action", "description": "Specifies an individual commit", "additionalProperties": false, "properties": { "timestamp": { "type": "string", "format": "date-time", "title": "Timestamp", "description": "The timestamp in which the action occurred" }, "name": { "type": "string", "title": "Name", "description": "The name of the individual who performed the action" }, "email": { "type": "string", "format": "idn-email", "title": "E-mail", "description": "The email address of the individual who performed the action" } } }, "externalReference": { "type": "object", "title": "External Reference", "description": "Specifies an individual external reference", "required": [ "url", "type" ], "additionalProperties": false, "properties": { "url": { "type": "string", "title": "URL", "description": "The URL to the external reference", "format": "iri-reference" }, "comment": { "type": "string", "title": "Comment", "description": "An optional comment describing the external reference" }, "type": { "type": "string", "title": "Type", "description": "Specifies the type of external reference. There are built-in types to describe common references. If a type does not exist for the reference being referred to, use the \"other\" type.", "enum": [ "vcs", "issue-tracker", "website", "advisories", "bom", "mailing-list", "social", "chat", "documentation", "support", "distribution", "license", "build-meta", "build-system", "release-notes", "other" ] }, "hashes": { "type": "array", "additionalItems": false, "items": {"$ref": "#/definitions/hash"}, "title": "Hashes", "description": "The hashes of the external reference (if applicable)." } } }, "dependency": { "type": "object", "title": "Dependency", "description": "Defines the direct dependencies of a component. Components that do not have their own dependencies MUST be declared as empty elements within the graph. Components that are not represented in the dependency graph MAY have unknown dependencies. It is RECOMMENDED that implementations assume this to be opaque and not an indicator of a component being dependency-free.", "required": [ "ref" ], "additionalProperties": false, "properties": { "ref": { "$ref": "#/definitions/refType", "title": "Reference", "description": "References a component by the components bom-ref attribute" }, "dependsOn": { "type": "array", "uniqueItems": true, "additionalItems": false, "items": { "$ref": "#/definitions/refType" }, "title": "Depends On", "description": "The bom-ref identifiers of the components that are dependencies of this dependency object." } } }, "service": { "type": "object", "title": "Service Object", "required": [ "name" ], "additionalProperties": false, "properties": { "bom-ref": { "$ref": "#/definitions/refType", "title": "BOM Reference", "description": "An optional identifier which can be used to reference the service elsewhere in the BOM. Every bom-ref MUST be unique within the BOM." }, "provider": { "title": "Provider", "description": "The organization that provides the service.", "$ref": "#/definitions/organizationalEntity" }, "group": { "type": "string", "title": "Service Group", "description": "The grouping name, namespace, or identifier. This will often be a shortened, single name of the company or project that produced the service or domain name. Whitespace and special characters should be avoided.", "examples": ["com.acme"] }, "name": { "type": "string", "title": "Service Name", "description": "The name of the service. This will often be a shortened, single name of the service.", "examples": ["ticker-service"] }, "version": { "type": "string", "title": "Service Version", "description": "The service version.", "examples": ["1.0.0"] }, "description": { "type": "string", "title": "Service Description", "description": "Specifies a description for the service" }, "endpoints": { "type": "array", "items": { "type": "string", "format": "iri-reference" }, "title": "Endpoints", "description": "The endpoint URIs of the service. Multiple endpoints are allowed.", "examples": ["https://example.com/api/v1/ticker"] }, "authenticated": { "type": "boolean", "title": "Authentication Required", "description": "A boolean value indicating if the service requires authentication. A value of true indicates the service requires authentication prior to use. A value of false indicates the service does not require authentication." }, "x-trust-boundary": { "type": "boolean", "title": "Crosses Trust Boundary", "description": "A boolean value indicating if use of the service crosses a trust zone or boundary. A value of true indicates that by using the service, a trust boundary is crossed. A value of false indicates that by using the service, a trust boundary is not crossed." }, "data": { "type": "array", "additionalItems": false, "items": {"$ref": "#/definitions/dataClassification"}, "title": "Data Classification", "description": "Specifies the data classification." }, "licenses": { "type": "array", "additionalItems": false, "items": {"$ref": "#/definitions/licenseChoice"}, "title": "Component License(s)" }, "externalReferences": { "type": "array", "additionalItems": false, "items": {"$ref": "#/definitions/externalReference"}, "title": "External References", "description": "External references provide a way to document systems, sites, and information that may be relevant but which are not included with the BOM." }, "services": { "type": "array", "additionalItems": false, "items": {"$ref": "#/definitions/service"}, "uniqueItems": true, "title": "Services", "description": "A list of services included or deployed behind the parent service. This is not a dependency tree. It provides a way to specify a hierarchical representation of service assemblies." }, "releaseNotes": { "$ref": "#/definitions/releaseNotes", "title": "Release notes", "description": "Specifies optional release notes." }, "properties": { "type": "array", "title": "Properties", "description": "Provides the ability to document properties in a name-value store. This provides flexibility to include data not officially supported in the standard without having to use additional namespaces or create extensions. Unlike key-value stores, properties support duplicate names, each potentially having different values. Property names of interest to the general public are encouraged to be registered in the [CycloneDX Property Taxonomy](https://github.com/CycloneDX/cyclonedx-property-taxonomy). Formal registration is OPTIONAL.", "additionalItems": false, "items": {"$ref": "#/definitions/property"} }, "signature": { "$ref": "#/definitions/signature", "title": "Signature", "description": "Enveloped signature in [JSON Signature Format (JSF)](https://cyberphone.github.io/doc/security/jsf.html)." } } }, "dataClassification": { "type": "object", "title": "Hash Objects", "required": [ "flow", "classification" ], "additionalProperties": false, "properties": { "flow": { "$ref": "#/definitions/dataFlow", "title": "Directional Flow", "description": "Specifies the flow direction of the data. Direction is relative to the service. Inbound flow states that data enters the service. Outbound flow states that data leaves the service. Bi-directional states that data flows both ways, and unknown states that the direction is not known." }, "classification": { "type": "string", "title": "Classification", "description": "Data classification tags data according to its type, sensitivity, and value if altered, stolen, or destroyed." } } }, "dataFlow": { "type": "string", "enum": [ "inbound", "outbound", "bi-directional", "unknown" ], "title": "Data flow direction", "description": "Specifies the flow direction of the data. Direction is relative to the service. Inbound flow states that data enters the service. Outbound flow states that data leaves the service. Bi-directional states that data flows both ways, and unknown states that the direction is not known." }, "copyright": { "type": "object", "title": "Copyright", "required": [ "text" ], "additionalProperties": false, "properties": { "text": { "type": "string", "title": "Copyright Text" } } }, "componentEvidence": { "type": "object", "title": "Evidence", "description": "Provides the ability to document evidence collected through various forms of extraction or analysis.", "additionalProperties": false, "properties": { "licenses": { "type": "array", "additionalItems": false, "items": {"$ref": "#/definitions/licenseChoice"}, "title": "Component License(s)" }, "copyright": { "type": "array", "additionalItems": false, "items": {"$ref": "#/definitions/copyright"}, "title": "Copyright" } } }, "compositions": { "type": "object", "title": "Compositions", "required": [ "aggregate" ], "additionalProperties": false, "properties": { "aggregate": { "$ref": "#/definitions/aggregateType", "title": "Aggregate", "description": "Specifies an aggregate type that describe how complete a relationship is." }, "assemblies": { "type": "array", "uniqueItems": true, "items": { "type": "string" }, "title": "BOM references", "description": "The bom-ref identifiers of the components or services being described. Assemblies refer to nested relationships whereby a constituent part may include other constituent parts. References do not cascade to child parts. References are explicit for the specified constituent part only." }, "dependencies": { "type": "array", "uniqueItems": true, "items": { "type": "string" }, "title": "BOM references", "description": "The bom-ref identifiers of the components or services being described. Dependencies refer to a relationship whereby an independent constituent part requires another independent constituent part. References do not cascade to transitive dependencies. References are explicit for the specified dependency only." }, "signature": { "$ref": "#/definitions/signature", "title": "Signature", "description": "Enveloped signature in [JSON Signature Format (JSF)](https://cyberphone.github.io/doc/security/jsf.html)." } } }, "aggregateType": { "type": "string", "default": "not_specified", "enum": [ "complete", "incomplete", "incomplete_first_party_only", "incomplete_third_party_only", "unknown", "not_specified" ] }, "property": { "type": "object", "title": "Lightweight name-value pair", "properties": { "name": { "type": "string", "title": "Name", "description": "The name of the property. Duplicate names are allowed, each potentially having a different value." }, "value": { "type": "string", "title": "Value", "description": "The value of the property." } } }, "localeType": { "type": "string", "pattern": "^([a-z]{2})(-[A-Z]{2})?$", "title": "Locale", "description": "Defines a syntax for representing two character language code (ISO-639) followed by an optional two character country code. The language code MUST be lower case. If the country code is specified, the country code MUST be upper case. The language code and country code MUST be separated by a minus sign. Examples: en, en-US, fr, fr-CA" }, "releaseType": { "type": "string", "examples": [ "major", "minor", "patch", "pre-release", "internal" ], "description": "The software versioning type. It is RECOMMENDED that the release type use one of 'major', 'minor', 'patch', 'pre-release', or 'internal'. Representing all possible software release types is not practical, so standardizing on the recommended values, whenever possible, is strongly encouraged.\n\n* __major__ = A major release may contain significant changes or may introduce breaking changes.\n* __minor__ = A minor release, also known as an update, may contain a smaller number of changes than major releases.\n* __patch__ = Patch releases are typically unplanned and may resolve defects or important security issues.\n* __pre-release__ = A pre-release may include alpha, beta, or release candidates and typically have limited support. They provide the ability to preview a release prior to its general availability.\n* __internal__ = Internal releases are not for public consumption and are intended to be used exclusively by the project or manufacturer that produced it." }, "note": { "type": "object", "title": "Note", "description": "A note containing the locale and content.", "required": [ "text" ], "additionalProperties": false, "properties": { "locale": { "$ref": "#/definitions/localeType", "title": "Locale", "description": "The ISO-639 (or higher) language code and optional ISO-3166 (or higher) country code. Examples include: \"en\", \"en-US\", \"fr\" and \"fr-CA\"" }, "text": { "title": "Release note content", "description": "Specifies the full content of the release note.", "$ref": "#/definitions/attachment" } } }, "releaseNotes": { "type": "object", "title": "Release notes", "required": [ "type" ], "additionalProperties": false, "properties": { "type": { "$ref": "#/definitions/releaseType", "title": "Type", "description": "The software versioning type the release note describes." }, "title": { "type": "string", "title": "Title", "description": "The title of the release." }, "featuredImage": { "type": "string", "format": "iri-reference", "title": "Featured image", "description": "The URL to an image that may be prominently displayed with the release note." }, "socialImage": { "type": "string", "format": "iri-reference", "title": "Social image", "description": "The URL to an image that may be used in messaging on social media platforms." }, "description": { "type": "string", "title": "Description", "description": "A short description of the release." }, "timestamp": { "type": "string", "format": "date-time", "title": "Timestamp", "description": "The date and time (timestamp) when the release note was created." }, "aliases": { "type": "array", "items": { "type": "string" }, "title": "Aliases", "description": "One or more alternate names the release may be referred to. This may include unofficial terms used by development and marketing teams (e.g. code names)." }, "tags": { "type": "array", "items": { "type": "string" }, "title": "Tags", "description": "One or more tags that may aid in search or retrieval of the release note." }, "resolves": { "type": "array", "additionalItems": false, "items": {"$ref": "#/definitions/issue"}, "title": "Resolves", "description": "A collection of issues that have been resolved." }, "notes": { "type": "array", "additionalItems": false, "items": {"$ref": "#/definitions/note"}, "title": "Notes", "description": "Zero or more release notes containing the locale and content. Multiple note objects may be specified to support release notes in a wide variety of languages." }, "properties": { "type": "array", "title": "Properties", "description": "Provides the ability to document properties in a name-value store. This provides flexibility to include data not officially supported in the standard without having to use additional namespaces or create extensions. Unlike key-value stores, properties support duplicate names, each potentially having different values. Property names of interest to the general public are encouraged to be registered in the [CycloneDX Property Taxonomy](https://github.com/CycloneDX/cyclonedx-property-taxonomy). Formal registration is OPTIONAL.", "additionalItems": false, "items": {"$ref": "#/definitions/property"} } } }, "advisory": { "type": "object", "title": "Advisory", "description": "Title and location where advisory information can be obtained. An advisory is a notification of a threat to a component, service, or system.", "required": ["url"], "additionalProperties": false, "properties": { "title": { "type": "string", "title": "Title", "description": "An optional name of the advisory." }, "url": { "type": "string", "title": "URL", "format": "iri-reference", "description": "Location where the advisory can be obtained." } } }, "cwe": { "type": "integer", "minimum": 1, "title": "CWE", "description": "Integer representation of a Common Weaknesses Enumerations (CWE). For example 399 (of https://cwe.mitre.org/data/definitions/399.html)" }, "severity": { "type": "string", "title": "Severity", "description": "Textual representation of the severity of the vulnerability adopted by the analysis method. If the analysis method uses values other than what is provided, the user is expected to translate appropriately.", "enum": [ "critical", "high", "medium", "low", "info", "none", "unknown" ] }, "scoreMethod": { "type": "string", "title": "Method", "description": "Specifies the severity or risk scoring methodology or standard used.\n\n* CVSSv2 - [Common Vulnerability Scoring System v2](https://www.first.org/cvss/v2/)\n* CVSSv3 - [Common Vulnerability Scoring System v3](https://www.first.org/cvss/v3-0/)\n* CVSSv31 - [Common Vulnerability Scoring System v3.1](https://www.first.org/cvss/v3-1/)\n* OWASP - [OWASP Risk Rating Methodology](https://owasp.org/www-community/OWASP_Risk_Rating_Methodology)", "enum": [ "CVSSv2", "CVSSv3", "CVSSv31", "OWASP", "other" ] }, "impactAnalysisState": { "type": "string", "title": "Impact Analysis State", "description": "Declares the current state of an occurrence of a vulnerability, after automated or manual analysis. \n\n* __resolved__ = the vulnerability has been remediated. \n* __resolved\\_with\\_pedigree__ = the vulnerability has been remediated and evidence of the changes are provided in the affected components pedigree containing verifiable commit history and/or diff(s). \n* __exploitable__ = the vulnerability may be directly or indirectly exploitable. \n* __in\\_triage__ = the vulnerability is being investigated. \n* __false\\_positive__ = the vulnerability is not specific to the component or service and was falsely identified or associated. \n* __not\\_affected__ = the component or service is not affected by the vulnerability. Justification should be specified for all not_affected cases.", "enum": [ "resolved", "resolved_with_pedigree", "exploitable", "in_triage", "false_positive", "not_affected" ] }, "impactAnalysisJustification": { "type": "string", "title": "Impact Analysis Justification", "description": "The rationale of why the impact analysis state was asserted. \n\n* __code\\_not\\_present__ = the code has been removed or tree-shaked. \n* __code\\_not\\_reachable__ = the vulnerable code is not invoked at runtime. \n* __requires\\_configuration__ = exploitability requires a configurable option to be set/unset. \n* __requires\\_dependency__ = exploitability requires a dependency that is not present. \n* __requires\\_environment__ = exploitability requires a certain environment which is not present. \n* __protected\\_by\\_compiler__ = exploitability requires a compiler flag to be set/unset. \n* __protected\\_at\\_runtime__ = exploits are prevented at runtime. \n* __protected\\_at\\_perimeter__ = attacks are blocked at physical, logical, or network perimeter. \n* __protected\\_by\\_mitigating\\_control__ = preventative measures have been implemented that reduce the likelihood and/or impact of the vulnerability.", "enum": [ "code_not_present", "code_not_reachable", "requires_configuration", "requires_dependency", "requires_environment", "protected_by_compiler", "protected_at_runtime", "protected_at_perimeter", "protected_by_mitigating_control" ] }, "rating": { "type": "object", "title": "Rating", "description": "Defines the severity or risk ratings of a vulnerability.", "additionalProperties": false, "properties": { "source": { "$ref": "#/definitions/vulnerabilitySource", "description": "The source that calculated the severity or risk rating of the vulnerability." }, "score": { "type": "number", "title": "Score", "description": "The numerical score of the rating." }, "severity": { "$ref": "#/definitions/severity", "description": "Textual representation of the severity that corresponds to the numerical score of the rating." }, "method": { "$ref": "#/definitions/scoreMethod" }, "vector": { "type": "string", "title": "Vector", "description": "Textual representation of the metric values used to score the vulnerability" }, "justification": { "type": "string", "title": "Justification", "description": "An optional reason for rating the vulnerability as it was" } } }, "vulnerabilitySource": { "type": "object", "title": "Source", "description": "The source of vulnerability information. This is often the organization that published the vulnerability.", "additionalProperties": false, "properties": { "url": { "type": "string", "title": "URL", "description": "The url of the vulnerability documentation as provided by the source.", "examples": [ "https://nvd.nist.gov/vuln/detail/CVE-2021-39182" ] }, "name": { "type": "string", "title": "Name", "description": "The name of the source.", "examples": [ "NVD", "National Vulnerability Database", "OSS Index", "VulnDB", "GitHub Advisories" ] } } }, "vulnerability": { "type": "object", "title": "Vulnerability", "description": "Defines a weakness in an component or service that could be exploited or triggered by a threat source.", "additionalProperties": false, "properties": { "bom-ref": { "$ref": "#/definitions/refType", "title": "BOM Reference", "description": "An optional identifier which can be used to reference the vulnerability elsewhere in the BOM. Every bom-ref MUST be unique within the BOM." }, "id": { "type": "string", "title": "ID", "description": "The identifier that uniquely identifies the vulnerability.", "examples": [ "CVE-2021-39182", "GHSA-35m5-8cvj-8783", "SNYK-PYTHON-ENROCRYPT-1912876" ] }, "source": { "$ref": "#/definitions/vulnerabilitySource", "description": "The source that published the vulnerability." }, "references": { "type": "array", "title": "References", "description": "Zero or more pointers to vulnerabilities that are the equivalent of the vulnerability specified. Often times, the same vulnerability may exist in multiple sources of vulnerability intelligence, but have different identifiers. References provide a way to correlate vulnerabilities across multiple sources of vulnerability intelligence.", "additionalItems": false, "items": { "required": [ "id", "source" ], "additionalProperties": false, "properties": { "id": { "type": "string", "title": "ID", "description": "An identifier that uniquely identifies the vulnerability.", "examples": [ "CVE-2021-39182", "GHSA-35m5-8cvj-8783", "SNYK-PYTHON-ENROCRYPT-1912876" ] }, "source": { "$ref": "#/definitions/vulnerabilitySource", "description": "The source that published the vulnerability." } } } }, "ratings": { "type": "array", "title": "Ratings", "description": "List of vulnerability ratings", "additionalItems": false, "items": { "$ref": "#/definitions/rating" } }, "cwes": { "type": "array", "title": "CWEs", "description": "List of Common Weaknesses Enumerations (CWEs) codes that describes this vulnerability. For example 399 (of https://cwe.mitre.org/data/definitions/399.html)", "examples": [399], "additionalItems": false, "items": { "$ref": "#/definitions/cwe" } }, "description": { "type": "string", "title": "Description", "description": "A description of the vulnerability as provided by the source." }, "detail": { "type": "string", "title": "Details", "description": "If available, an in-depth description of the vulnerability as provided by the source organization. Details often include examples, proof-of-concepts, and other information useful in understanding root cause." }, "recommendation": { "type": "string", "title": "Details", "description": "Recommendations of how the vulnerability can be remediated or mitigated." }, "advisories": { "type": "array", "title": "Advisories", "description": "Published advisories of the vulnerability if provided.", "additionalItems": false, "items": { "$ref": "#/definitions/advisory" } }, "created": { "type": "string", "format": "date-time", "title": "Created", "description": "The date and time (timestamp) when the vulnerability record was created in the vulnerability database." }, "published": { "type": "string", "format": "date-time", "title": "Published", "description": "The date and time (timestamp) when the vulnerability record was first published." }, "updated": { "type": "string", "format": "date-time", "title": "Updated", "description": "The date and time (timestamp) when the vulnerability record was last updated." }, "credits": { "type": "object", "title": "Credits", "description": "Individuals or organizations credited with the discovery of the vulnerability.", "additionalProperties": false, "properties": { "organizations": { "type": "array", "title": "Organizations", "description": "The organizations credited with vulnerability discovery.", "additionalItems": false, "items": { "$ref": "#/definitions/organizationalEntity" } }, "individuals": { "type": "array", "title": "Individuals", "description": "The individuals, not associated with organizations, that are credited with vulnerability discovery.", "additionalItems": false, "items": { "$ref": "#/definitions/organizationalContact" } } } }, "tools": { "type": "array", "title": "Creation Tools", "description": "The tool(s) used to identify, confirm, or score the vulnerability.", "additionalItems": false, "items": {"$ref": "#/definitions/tool"} }, "analysis": { "type": "object", "title": "Impact Analysis", "description": "An assessment of the impact and exploitability of the vulnerability.", "additionalProperties": false, "properties": { "state": { "$ref": "#/definitions/impactAnalysisState" }, "justification": { "$ref": "#/definitions/impactAnalysisJustification" }, "response": { "type": "array", "title": "Response", "description": "A response to the vulnerability by the manufacturer, supplier, or project responsible for the affected component or service. More than one response is allowed. Responses are strongly encouraged for vulnerabilities where the analysis state is exploitable.", "additionalItems": false, "items": { "type": "string", "enum": [ "can_not_fix", "will_not_fix", "update", "rollback", "workaround_available" ] } }, "detail": { "type": "string", "title": "Detail", "description": "Detailed description of the impact including methods used during assessment. If a vulnerability is not exploitable, this field should include specific details on why the component or service is not impacted by this vulnerability." } } }, "affects": { "type": "array", "uniqueItems": true, "additionalItems": false, "items": { "required": [ "ref" ], "additionalProperties": false, "properties": { "ref": { "$ref": "#/definitions/refType", "title": "Reference", "description": "References a component or service by the objects bom-ref" }, "versions": { "type": "array", "title": "Versions", "description": "Zero or more individual versions or range of versions.", "additionalItems": false, "items": { "oneOf": [ { "required": ["version"] }, { "required": ["range"] } ], "additionalProperties": false, "properties": { "version": { "description": "A single version of a component or service.", "$ref": "#/definitions/version" }, "range": { "description": "A version range specified in Package URL Version Range syntax (vers) which is defined at https://github.com/package-url/purl-spec/VERSION-RANGE-SPEC.rst", "$ref": "#/definitions/range" }, "status": { "description": "The vulnerability status for the version or range of versions.", "$ref": "#/definitions/affectedStatus", "default": "affected" } } } } } }, "title": "Affects", "description": "The components or services that are affected by the vulnerability." }, "properties": { "type": "array", "title": "Properties", "description": "Provides the ability to document properties in a name-value store. This provides flexibility to include data not officially supported in the standard without having to use additional namespaces or create extensions. Unlike key-value stores, properties support duplicate names, each potentially having different values. Property names of interest to the general public are encouraged to be registered in the [CycloneDX Property Taxonomy](https://github.com/CycloneDX/cyclonedx-property-taxonomy). Formal registration is OPTIONAL.", "additionalItems": false, "items": { "$ref": "#/definitions/property" } } } }, "affectedStatus": { "description": "The vulnerability status of a given version or range of versions of a product. The statuses 'affected' and 'unaffected' indicate that the version is affected or unaffected by the vulnerability. The status 'unknown' indicates that it is unknown or unspecified whether the given version is affected. There can be many reasons for an 'unknown' status, including that an investigation has not been undertaken or that a vendor has not disclosed the status.", "type": "string", "enum": [ "affected", "unaffected", "unknown" ] }, "version": { "description": "A single version of a component or service.", "type": "string", "minLength": 1, "maxLength": 1024 }, "range": { "description": "A version range specified in Package URL Version Range syntax (vers) which is defined at https://github.com/package-url/purl-spec/VERSION-RANGE-SPEC.rst", "type": "string", "minLength": 1, "maxLength": 1024 }, "signature": { "$ref": "jsf-0.82.schema.json#/definitions/signature", "title": "Signature", "description": "Enveloped signature in [JSON Signature Format (JSF)](https://cyberphone.github.io/doc/security/jsf.html)." } } } ================================================ FILE: tests/resources/grype-test-db.tar.gz.license ================================================ SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) SPDX-License-Identifier: Apache-2.0 ================================================ FILE: tests/resources/jsf-0.82.schema.json ================================================ { "$schema": "http://json-schema.org/draft-07/schema#", "$id": "http://cyclonedx.org/schema/jsf-0.82.schema.json", "type": "object", "title": "JSON Signature Format (JSF) standard", "definitions": { "signature": { "type": "object" } } } ================================================ FILE: tests/resources/make_grype_test_db.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Generate the minimal grype vulnerability DB used by the integration tests. The output is tests/resources/grype-test-db.tar.gz — a ~2 KB archive containing an empty SQLite database with one synthetic CVE entry. Committing this artifact avoids the ~50 s cold-cache grype DB download during test runs. Schema target: grype DB model version 6 (grype v0.79+). Find the current model: grype db status | grep Schema or: SELECT model FROM db_metadata; in a freshly downloaded DB. When grype bumps the model integer: 1. Update MODEL below to match. 2. Verify the vulnerability and affected-CPE blob JSON schemas against grype's source (db/v6/models/) or a live DB row. 3. Re-run this script and commit the updated grype-test-db.tar.gz. Synthetic CVE: CVE-TEST-2026-00001 affects sbomnix-test-first == 1.0. Grype matches pkg:nix packages via auto-generated CPEs (--add-cpes-if-none). The generated CPE for 'sbomnix-test-first' 1.0 is: cpe:2.3:a:sbomnix-test-first:sbomnix-test-first:1.0:*:*:*:*:*:*:* so the DB entry uses vendor=product='sbomnix-test-first'. """ import json import sqlite3 import tarfile from pathlib import Path HERE = Path(__file__).resolve().parent OUT_ARCHIVE = HERE / "grype-test-db.tar.gz" MODEL = 6 REVISION = 1 ADDITION = 4 SYNTHETIC_CVE = "CVE-TEST-2026-00001" TEST_PACKAGE = "sbomnix-test-first" TEST_VERSION = "1.0" VULN_BLOB = json.dumps( { "id": SYNTHETIC_CVE, "assigner": ["test"], "description": ( "Synthetic vulnerability for sbomnix grype integration tests. " "Not a real CVE." ), "refs": [], "severities": [ { "scheme": "CVSS", "value": {"vector": "AV:N/AC:L/Au:N/C:N/I:N/A:P", "version": "2.0"}, "source": "test", "rank": 1, } ], } ) # Blob linked from affected_cpe_handles; constraint targets the test version. AFFECTED_CPE_BLOB = json.dumps( { "cves": [SYNTHETIC_CVE], "ranges": [{"version": {"constraint": f"= {TEST_VERSION}"}}], } ) # Exact DDL as created by grype — constraint names must match for migration. DDL = """ CREATE TABLE `affected_cpe_handles` (`id` integer PRIMARY KEY AUTOINCREMENT,`vulnerability_id` integer NOT NULL,`cpe_id` integer,`blob_id` integer,CONSTRAINT `fk_affected_cpe_handles_cpe` FOREIGN KEY (`cpe_id`) REFERENCES `cpes`(`id`),CONSTRAINT `fk_affected_cpe_handles_vulnerability` FOREIGN KEY (`vulnerability_id`) REFERENCES `vulnerability_handles`(`id`)); CREATE TABLE `affected_package_handles` (`id` integer PRIMARY KEY AUTOINCREMENT,`vulnerability_id` integer NOT NULL,`operating_system_id` integer,`package_id` integer,`blob_id` integer,CONSTRAINT `fk_affected_package_handles_vulnerability` FOREIGN KEY (`vulnerability_id`) REFERENCES `vulnerability_handles`(`id`),CONSTRAINT `fk_affected_package_handles_operating_system` FOREIGN KEY (`operating_system_id`) REFERENCES `operating_systems`(`id`),CONSTRAINT `fk_affected_package_handles_package` FOREIGN KEY (`package_id`) REFERENCES `packages`(`id`)); CREATE TABLE `blobs` (`id` integer PRIMARY KEY AUTOINCREMENT,`value` text NOT NULL); CREATE TABLE `cpes` (`id` integer PRIMARY KEY AUTOINCREMENT,`part` text NOT NULL,`vendor` text,`product` text NOT NULL,`edition` text,`language` text,`software_edition` text,`target_hardware` text,`target_software` text,`other` text); CREATE TABLE `cwe_handles` (`id` integer PRIMARY KEY AUTOINCREMENT,`cve` text NOT NULL,`cwe` text NOT NULL,`source` text,`type` text); CREATE TABLE `db_metadata` (`build_timestamp` datetime NOT NULL,`model` integer NOT NULL,`revision` integer NOT NULL,`addition` integer NOT NULL); CREATE TABLE `epss_handles` (`id` integer PRIMARY KEY AUTOINCREMENT,`cve` text NOT NULL,`epss` real NOT NULL,`percentile` real NOT NULL); CREATE TABLE `epss_metadata` (`date` datetime NOT NULL); CREATE TABLE `known_exploited_vulnerability_handles` (`id` integer PRIMARY KEY AUTOINCREMENT,`cve` text NOT NULL,`blob_id` integer); CREATE TABLE `operating_system_specifier_overrides` (`alias` text,`version` text,`version_pattern` text,`codename` text,`channel` text,`replacement` text,`replacement_major_version` text,`replacement_minor_version` text,`replacement_label_version` text,`replacement_channel` text,`rolling` numeric,`applicable_client_db_schemas` text,PRIMARY KEY (`alias`,`version`,`version_pattern`,`replacement`,`replacement_major_version`,`replacement_minor_version`,`replacement_label_version`,`replacement_channel`,`rolling`)); CREATE TABLE `operating_systems` (`id` integer PRIMARY KEY AUTOINCREMENT,`name` text,`release_id` text,`major_version` text,`minor_version` text,`label_version` text,`codename` text,`channel` text,`eol_date` datetime,`eoas_date` datetime); CREATE TABLE `package_cpes` (`cpe_id` integer,`package_id` integer,PRIMARY KEY (`cpe_id`,`package_id`),CONSTRAINT `fk_package_cpes_cpe` FOREIGN KEY (`cpe_id`) REFERENCES `cpes`(`id`),CONSTRAINT `fk_package_cpes_package` FOREIGN KEY (`package_id`) REFERENCES `packages`(`id`)); CREATE TABLE `package_specifier_overrides` (`ecosystem` text,`replacement_ecosystem` text,PRIMARY KEY (`ecosystem`,`replacement_ecosystem`)); CREATE TABLE `packages` (`id` integer PRIMARY KEY AUTOINCREMENT,`ecosystem` text,`name` text); CREATE TABLE `providers` (`id` text,`version` text,`processor` text,`date_captured` datetime,`input_digest` text,PRIMARY KEY (`id`)); CREATE TABLE `unaffected_cpe_handles` (`id` integer PRIMARY KEY AUTOINCREMENT,`vulnerability_id` integer NOT NULL,`cpe_id` integer,`blob_id` integer,CONSTRAINT `fk_unaffected_cpe_handles_vulnerability` FOREIGN KEY (`vulnerability_id`) REFERENCES `vulnerability_handles`(`id`),CONSTRAINT `fk_unaffected_cpe_handles_cpe` FOREIGN KEY (`cpe_id`) REFERENCES `cpes`(`id`)); CREATE TABLE `unaffected_package_handles` (`id` integer PRIMARY KEY AUTOINCREMENT,`vulnerability_id` integer NOT NULL,`operating_system_id` integer,`package_id` integer,`blob_id` integer,CONSTRAINT `fk_unaffected_package_handles_vulnerability` FOREIGN KEY (`vulnerability_id`) REFERENCES `vulnerability_handles`(`id`),CONSTRAINT `fk_unaffected_package_handles_operating_system` FOREIGN KEY (`operating_system_id`) REFERENCES `operating_systems`(`id`),CONSTRAINT `fk_unaffected_package_handles_package` FOREIGN KEY (`package_id`) REFERENCES `packages`(`id`)); CREATE TABLE `vulnerability_aliases` (`name` text,`alias` text NOT NULL,PRIMARY KEY (`name`,`alias`)); CREATE TABLE `vulnerability_handles` (`id` integer PRIMARY KEY AUTOINCREMENT,`name` text NOT NULL,`status` text NOT NULL,`published_date` datetime,`modified_date` datetime,`withdrawn_date` datetime,`provider_id` text NOT NULL,`blob_id` integer,CONSTRAINT `fk_vulnerability_handles_provider` FOREIGN KEY (`provider_id`) REFERENCES `providers`(`id`)); """ def build(db_path: Path) -> None: db_path.unlink(missing_ok=True) con = sqlite3.connect(db_path) con.executescript(DDL) con.execute( "INSERT INTO db_metadata VALUES (datetime('now'), ?, ?, ?)", (MODEL, REVISION, ADDITION), ) con.execute( "INSERT INTO providers VALUES ('test', '1', 'test', datetime('now'), " "'xxh64:0000000000000000')" ) # Vulnerability detail blob (blob_id=1) con.execute("INSERT INTO blobs(value) VALUES (?)", (VULN_BLOB,)) # Affected-CPE constraint blob (blob_id=2) con.execute("INSERT INTO blobs(value) VALUES (?)", (AFFECTED_CPE_BLOB,)) con.execute( "INSERT INTO vulnerability_handles" "(name, status, published_date, modified_date, provider_id, blob_id)" " VALUES (?, 'active', datetime('now'), datetime('now'), 'test', 1)", (SYNTHETIC_CVE,), ) # CPE: cpe:2.3:a:sbomnix-test-first:sbomnix-test-first:*:*:*:*:*:*:*:* con.execute( "INSERT INTO cpes(part, vendor, product) VALUES ('a', ?, ?)", (TEST_PACKAGE, TEST_PACKAGE), ) con.execute( "INSERT INTO affected_cpe_handles(vulnerability_id, cpe_id, blob_id)" " VALUES (1, 1, 2)" ) con.commit() con.close() def main() -> None: db_path = HERE / "vulnerability.db" build(db_path) print(f"DB: {db_path.stat().st_size // 1024} KB") with tarfile.open(OUT_ARCHIVE, "w:gz") as tf: tf.add(db_path, arcname="vulnerability.db") db_path.unlink() print(f"Archive: {OUT_ARCHIVE.stat().st_size // 1024} KB → {OUT_ARCHIVE}") if __name__ == "__main__": main() ================================================ FILE: tests/resources/nixmeta-package-set.nix ================================================ # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 { ... }: let mkPackage = { name, pname, version, description, homepage, }: builtins.derivation { inherit name pname version; system = builtins.currentSystem; builder = "/bin/sh"; args = [ "-c" "echo ${name} > $out" ]; meta = { inherit description homepage; license = { shortName = "Apache-2.0"; spdxId = "Apache-2.0"; }; }; }; in { first = mkPackage { name = "sbomnix-meta-first-1.0"; pname = "sbomnix-meta-first"; version = "1.0"; description = "First sbomnix metadata fixture package"; homepage = "https://example.test/sbomnix-meta-first"; }; second = mkPackage { name = "sbomnix-meta-second-2.0"; pname = "sbomnix-meta-second"; version = "2.0"; description = "Second sbomnix metadata fixture package"; homepage = "https://example.test/sbomnix-meta-second"; }; } ================================================ FILE: tests/resources/provenance-1.0.schema.json ================================================ { "$schema": "http://json-schema.org/draft-07/schema#", "$id": "https://in-toto.io/Statement/v1", "title": "SLSA Provenance v1.0", "type": "object", "additionalProperties": false, "required": [ "_type", "subject", "predicateType", "predicate" ], "properties": { "_type": { "description": "Identifier for the schema of the Statement. Always https://in-toto.io/Statement/v1 for this version of the spec.", "type": "string" }, "subject": { "description": "Set of software artifacts that the attestation applies to. Each element represents a single software artifact.", "type": "array", "items": { "type": "object", "properties": { "name": { "description": "Identifier to distinguish this artifact from others within the subject.", "type": "string" }, "digest": { "description": "Collection of cryptographic digests for the contents of this artifact.", "type": "object" } } } }, "predicateType": { "description": "URI identifying the type of the Predicate.", "type": "string" }, "predicate": { "type": "object", "additionalProperties": false, "required": [ "buildDefinition", "runDetails" ], "properties": { "buildDefinition": { "type": "object", "additionalProperties": false, "minProperties": 4, "properties": { "buildType": { "description": "Identifies the template for how to perform the build and interpret the parameters and dependencies.", "type": "string" }, "externalParameters": { "description": "The parameters that are under external control, such as those set by a user or tenant of the build platform.", "type": "object" }, "internalParameters": { "description": "The parameters that are under the control of the entity represented by builder.id.", "type": "object" }, "resolvedDependencies": { "description": "Unordered collection of artifacts needed at build time.", "type": "array", "items": { "$ref": "#/$defs/ResourceDescriptor" } } } }, "runDetails": { "type": "object", "additionalProperties": false, "required": [ "builder", "metadata", "byproducts" ], "properties": { "builder": { "description": "dentifies the build platform that executed the invocation.", "type": "object", "properties": { "id": { "description": "URI indicating the transitive closure of the trusted build platform.", "type": "string" }, "builderDependencies": { "description": "Dependencies used by the orchestrator that are not run within the workload and that do not affect the build", "type": "array", "items": { "$ref": "#/$defs/ResourceDescriptor" } }, "version": { "description": "Map of names of components of the build platform to their version.", "type": "object" } } }, "metadata": { "description": "Metadata about this particular execution of the build.", "type": "object", "properties": { "invocationId": { "description": "Identifies this particular build invocation", "type": "string" }, "startedOn": { "description": "The timestamp of when the build started.", "type": "string" }, "finishedOn": { "description": "The timestamp of when the build completed.", "type": "string" } } }, "byproducts": { "description": "Additional artifacts generated during the build that are not considered the “output” of the build", "type": "array", "items": { "$ref": "#/$defs/ResourceDescriptor" } } } } } } }, "$defs": { "ResourceDescriptor": { "$id": "/schema/ResourceDescriptor", "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", "properties": { "name": { "description": "Machine-readable identifier for distinguishing between descriptors.", "type": "string" }, "uri": { "description": "A URI used to identify the resource or artifact globally.", "type": "string" }, "digest": { "description": "A set of cryptographic digests of the contents of the resource or artifact.", "type": "object" }, "content": { "description": "The contents of the resource or artifact.", "type": "string" }, "downloadLocation": { "description": "The location of the described resource or artifact, if different from the uri.", "type": "string" }, "mediaType": { "description": "The MIME Type (i.e., media type) of the described resource or artifact.", "type": "string" }, "annotations": { "description": "This field MAY be used to provide additional information or metadata about the resource or artifact that may be useful to the consumer when evaluating the attestation against a policy.", "type": "object" } } } } } ================================================ FILE: tests/resources/repology/cves_openssl.html ================================================
CVE ID Affected version(s)
CVE-2024-1111 [3.0.0, 3.1.0]
CVE-2024-2222 [1.0.0, 2.0.0]
================================================ FILE: tests/resources/repology/projects_empty.html ================================================

No matches

================================================ FILE: tests/resources/repology/projects_hello.html ================================================
Project Newest Selected
hello 2.11 2.12-rc1 2.10 2.11
================================================ FILE: tests/resources/sample_cdx_sbom.json ================================================ { "bomFormat": "CycloneDX", "specVersion": "1.4", "serialNumber": "urn:uuid:11111111-1111-4111-8111-111111111111", "version": 1, "metadata": { "component": { "type": "library", "name": "openssl", "version": "3.1.0", "licenses": [ { "license": { "id": "Apache-2.0" } } ], "purl": "pkg:generic/openssl@3.1.0" } }, "components": [] } ================================================ FILE: tests/resources/spdx.schema.json ================================================ { "$schema": "http://json-schema.org/draft-07/schema#", "$id": "http://cyclonedx.org/schema/spdx.schema.json", "type": "string", "title": "SPDX license identifier", "description": "Local offline companion schema for CycloneDX license id validation." } ================================================ FILE: tests/resources/spdx_bom-2.3.schema.json ================================================ { "$schema" : "http://json-schema.org/draft-07/schema#", "$id" : "http://spdx.org/rdf/terms/2.3", "title" : "SPDX 2.3", "type" : "object", "properties" : { "SPDXID" : { "type" : "string", "description" : "Uniquely identify any element in an SPDX document which may be referenced by other elements." }, "annotations" : { "description" : "Provide additional information about an SpdxElement.", "type" : "array", "items" : { "type" : "object", "properties" : { "annotationDate" : { "description" : "Identify when the comment was made. This is to be specified according to the combined date and time in the UTC format, as specified in the ISO 8601 standard.", "type" : "string" }, "annotationType" : { "description" : "Type of the annotation.", "type" : "string", "enum" : [ "OTHER", "REVIEW" ] }, "annotator" : { "description" : "This field identifies the person, organization, or tool that has commented on a file, package, snippet, or the entire document.", "type" : "string" }, "comment" : { "type" : "string" } }, "required" : [ "annotationDate", "annotationType", "annotator", "comment" ], "additionalProperties" : false, "description" : "An Annotation is a comment on an SpdxItem by an agent." } }, "comment" : { "type" : "string" }, "creationInfo" : { "type" : "object", "properties" : { "comment" : { "type" : "string" }, "created" : { "description" : "Identify when the SPDX document was originally created. The date is to be specified according to combined date and time in UTC format as specified in ISO 8601 standard.", "type" : "string" }, "creators" : { "description" : "Identify who (or what, in the case of a tool) created the SPDX document. If the SPDX document was created by an individual, indicate the person's name. If the SPDX document was created on behalf of a company or organization, indicate the entity name. If the SPDX document was created using a software tool, indicate the name and version for that tool. If multiple participants or tools were involved, use multiple instances of this field. Person name or organization name may be designated as “anonymous” if appropriate.", "minItems" : 1, "type" : "array", "items" : { "description" : "Identify who (or what, in the case of a tool) created the SPDX document. If the SPDX document was created by an individual, indicate the person's name. If the SPDX document was created on behalf of a company or organization, indicate the entity name. If the SPDX document was created using a software tool, indicate the name and version for that tool. If multiple participants or tools were involved, use multiple instances of this field. Person name or organization name may be designated as “anonymous” if appropriate.", "type" : "string" } }, "licenseListVersion" : { "description" : "An optional field for creators of the SPDX file to provide the version of the SPDX License List used when the SPDX file was created.", "type" : "string" } }, "required" : [ "created", "creators" ], "additionalProperties" : false, "description" : "One instance is required for each SPDX file produced. It provides the necessary information for forward and backward compatibility for processing tools." }, "dataLicense" : { "description" : "License expression for dataLicense. See SPDX Annex D for the license expression syntax. Compliance with the SPDX specification includes populating the SPDX fields therein with data related to such fields (\"SPDX-Metadata\"). The SPDX specification contains numerous fields where an SPDX document creator may provide relevant explanatory text in SPDX-Metadata. Without opining on the lawfulness of \"database rights\" (in jurisdictions where applicable), such explanatory text is copyrightable subject matter in most Berne Convention countries. By using the SPDX specification, or any portion hereof, you hereby agree that any copyright rights (as determined by your jurisdiction) in any SPDX-Metadata, including without limitation explanatory text, shall be subject to the terms of the Creative Commons CC0 1.0 Universal license. For SPDX-Metadata not containing any copyright rights, you hereby agree and acknowledge that the SPDX-Metadata is provided to you \"as-is\" and without any representations or warranties of any kind concerning the SPDX-Metadata, express, implied, statutory or otherwise, including without limitation warranties of title, merchantability, fitness for a particular purpose, non-infringement, or the absence of latent or other defects, accuracy, or the presence or absence of errors, whether or not discoverable, all to the greatest extent permissible under applicable law.", "type" : "string" }, "externalDocumentRefs" : { "description" : "Identify any external SPDX documents referenced within this SPDX document.", "type" : "array", "items" : { "type" : "object", "properties" : { "checksum" : { "type" : "object", "properties" : { "algorithm" : { "description" : "Identifies the algorithm used to produce the subject Checksum. Currently, SHA-1 is the only supported algorithm. It is anticipated that other algorithms will be supported at a later time.", "type" : "string", "enum" : [ "SHA1", "BLAKE3", "SHA3-384", "SHA256", "SHA384", "BLAKE2b-512", "BLAKE2b-256", "SHA3-512", "MD2", "ADLER32", "MD4", "SHA3-256", "BLAKE2b-384", "SHA512", "MD6", "MD5", "SHA224" ] }, "checksumValue" : { "description" : "The checksumValue property provides a lower case hexidecimal encoded digest value produced using a specific algorithm.", "type" : "string" } }, "required" : [ "algorithm", "checksumValue" ], "additionalProperties" : false, "description" : "A Checksum is value that allows the contents of a file to be authenticated. Even small changes to the content of the file will change its checksum. This class allows the results of a variety of checksum and cryptographic message digest algorithms to be represented." }, "externalDocumentId" : { "description" : "externalDocumentId is a string containing letters, numbers, ., - and/or + which uniquely identifies an external document within this document.", "type" : "string" }, "spdxDocument" : { "description" : "SPDX ID for SpdxDocument. A property containing an SPDX document.", "type" : "string" } }, "required" : [ "checksum", "externalDocumentId", "spdxDocument" ], "additionalProperties" : false, "description" : "Information about an external SPDX document reference including the checksum. This allows for verification of the external references." } }, "hasExtractedLicensingInfos" : { "description" : "Indicates that a particular ExtractedLicensingInfo was defined in the subject SpdxDocument.", "type" : "array", "items" : { "type" : "object", "properties" : { "comment" : { "type" : "string" }, "crossRefs" : { "description" : "Cross Reference Detail for a license SeeAlso URL", "type" : "array", "items" : { "type" : "object", "properties" : { "isLive" : { "description" : "Indicate a URL is still a live accessible location on the public internet", "type" : "boolean" }, "isValid" : { "description" : "True if the URL is a valid well formed URL", "type" : "boolean" }, "isWayBackLink" : { "description" : "True if the License SeeAlso URL points to a Wayback archive", "type" : "boolean" }, "match" : { "description" : "Status of a License List SeeAlso URL reference if it refers to a website that matches the license text.", "type" : "string" }, "order" : { "description" : "The ordinal order of this element within a list", "type" : "integer" }, "timestamp" : { "description" : "Timestamp", "type" : "string" }, "url" : { "description" : "URL Reference", "type" : "string" } }, "required" : [ "url" ], "additionalProperties" : false, "description" : "Cross reference details for the a URL reference" } }, "extractedText" : { "description" : "Provide a copy of the actual text of the license reference extracted from the package, file or snippet that is associated with the License Identifier to aid in future analysis.", "type" : "string" }, "licenseId" : { "description" : "A human readable short form license identifier for a license. The license ID is either on the standard license list or the form \"LicenseRef-[idString]\" where [idString] is a unique string containing letters, numbers, \".\" or \"-\". When used within a license expression, the license ID can optionally include a reference to an external document in the form \"DocumentRef-[docrefIdString]:LicenseRef-[idString]\" where docRefIdString is an ID for an external document reference.", "type" : "string" }, "name" : { "description" : "Identify name of this SpdxElement.", "type" : "string" }, "seeAlsos" : { "type" : "array", "items" : { "type" : "string" } } }, "required" : [ "extractedText", "licenseId" ], "additionalProperties" : false, "description" : "An ExtractedLicensingInfo represents a license or licensing notice that was found in a package, file or snippet. Any license text that is recognized as a license may be represented as a License rather than an ExtractedLicensingInfo." } }, "name" : { "description" : "Identify name of this SpdxElement.", "type" : "string" }, "revieweds" : { "description" : "Reviewed", "type" : "array", "items" : { "type" : "object", "properties" : { "comment" : { "type" : "string" }, "reviewDate" : { "description" : "The date and time at which the SpdxDocument was reviewed. This value must be in UTC and have 'Z' as its timezone indicator.", "type" : "string" }, "reviewer" : { "description" : "The name and, optionally, contact information of the person who performed the review. Values of this property must conform to the agent and tool syntax. The reviewer property is deprecated in favor of Annotation with an annotationType review.", "type" : "string" } }, "required" : [ "reviewDate" ], "additionalProperties" : false, "description" : "This class has been deprecated in favor of an Annotation with an Annotation type of review." } }, "spdxVersion" : { "description" : "Provide a reference number that can be used to understand how to parse and interpret the rest of the file. It will enable both future changes to the specification and to support backward compatibility. The version number consists of a major and minor version indicator. The major field will be incremented when incompatible changes between versions are made (one or more sections are created, modified or deleted). The minor field will be incremented when backwards compatible changes are made.", "type" : "string" }, "documentNamespace" : { "type" : "string", "description" : "The URI provides an unambiguous mechanism for other SPDX documents to reference SPDX elements within this SPDX document." }, "documentDescribes" : { "description" : "Packages, files and/or Snippets described by this SPDX document", "type" : "array", "items" : { "type" : "string", "description" : "SPDX ID for each Package, File, or Snippet." } }, "packages" : { "description" : "Packages referenced in the SPDX document", "type" : "array", "items" : { "type" : "object", "properties" : { "SPDXID" : { "type" : "string", "description" : "Uniquely identify any element in an SPDX document which may be referenced by other elements." }, "annotations" : { "description" : "Provide additional information about an SpdxElement.", "type" : "array", "items" : { "type" : "object", "properties" : { "annotationDate" : { "description" : "Identify when the comment was made. This is to be specified according to the combined date and time in the UTC format, as specified in the ISO 8601 standard.", "type" : "string" }, "annotationType" : { "description" : "Type of the annotation.", "type" : "string", "enum" : [ "OTHER", "REVIEW" ] }, "annotator" : { "description" : "This field identifies the person, organization, or tool that has commented on a file, package, snippet, or the entire document.", "type" : "string" }, "comment" : { "type" : "string" } }, "required" : [ "annotationDate", "annotationType", "annotator", "comment" ], "additionalProperties" : false, "description" : "An Annotation is a comment on an SpdxItem by an agent." } }, "attributionTexts" : { "description" : "This field provides a place for the SPDX data creator to record acknowledgements that may be required to be communicated in some contexts. This is not meant to include the actual complete license text (see licenseConculded and licenseDeclared), and may or may not include copyright notices (see also copyrightText). The SPDX data creator may use this field to record other acknowledgements, such as particular clauses from license texts, which may be necessary or desirable to reproduce.", "type" : "array", "items" : { "description" : "This field provides a place for the SPDX data creator to record acknowledgements that may be required to be communicated in some contexts. This is not meant to include the actual complete license text (see licenseConculded and licenseDeclared), and may or may not include copyright notices (see also copyrightText). The SPDX data creator may use this field to record other acknowledgements, such as particular clauses from license texts, which may be necessary or desirable to reproduce.", "type" : "string" } }, "builtDate" : { "description" : "This field provides a place for recording the actual date the package was built.", "type" : "string" }, "checksums" : { "description" : "The checksum property provides a mechanism that can be used to verify that the contents of a File or Package have not changed.", "type" : "array", "items" : { "type" : "object", "properties" : { "algorithm" : { "description" : "Identifies the algorithm used to produce the subject Checksum. Currently, SHA-1 is the only supported algorithm. It is anticipated that other algorithms will be supported at a later time.", "type" : "string", "enum" : [ "SHA1", "BLAKE3", "SHA3-384", "SHA256", "SHA384", "BLAKE2b-512", "BLAKE2b-256", "SHA3-512", "MD2", "ADLER32", "MD4", "SHA3-256", "BLAKE2b-384", "SHA512", "MD6", "MD5", "SHA224" ] }, "checksumValue" : { "description" : "The checksumValue property provides a lower case hexidecimal encoded digest value produced using a specific algorithm.", "type" : "string" } }, "required" : [ "algorithm", "checksumValue" ], "additionalProperties" : false, "description" : "A Checksum is value that allows the contents of a file to be authenticated. Even small changes to the content of the file will change its checksum. This class allows the results of a variety of checksum and cryptographic message digest algorithms to be represented." } }, "comment" : { "type" : "string" }, "copyrightText" : { "description" : "The text of copyright declarations recited in the package, file or snippet.\n\nIf the copyrightText field is not present, it implies an equivalent meaning to NOASSERTION.", "type" : "string" }, "description" : { "description" : "Provides a detailed description of the package.", "type" : "string" }, "downloadLocation" : { "description" : "The URI at which this package is available for download. Private (i.e., not publicly reachable) URIs are acceptable as values of this property. The values http://spdx.org/rdf/terms#none and http://spdx.org/rdf/terms#noassertion may be used to specify that the package is not downloadable or that no attempt was made to determine its download location, respectively.", "type" : "string" }, "externalRefs" : { "description" : "An External Reference allows a Package to reference an external source of additional information, metadata, enumerations, asset identifiers, or downloadable content believed to be relevant to the Package.", "type" : "array", "items" : { "type" : "object", "properties" : { "comment" : { "type" : "string" }, "referenceCategory" : { "description" : "Category for the external reference", "type" : "string", "enum" : [ "OTHER", "PERSISTENT-ID", "SECURITY", "PACKAGE-MANAGER" ] }, "referenceLocator" : { "description" : "The unique string with no spaces necessary to access the package-specific information, metadata, or content within the target location. The format of the locator is subject to constraints defined by the .", "type" : "string" }, "referenceType" : { "description" : "Type of the external reference. These are definined in an appendix in the SPDX specification.", "type" : "string" } }, "required" : [ "referenceCategory", "referenceLocator", "referenceType" ], "additionalProperties" : false, "description" : "An External Reference allows a Package to reference an external source of additional information, metadata, enumerations, asset identifiers, or downloadable content believed to be relevant to the Package." } }, "filesAnalyzed" : { "description" : "Indicates whether the file content of this package has been available for or subjected to analysis when creating the SPDX document. If false indicates packages that represent metadata or URI references to a project, product, artifact, distribution or a component. If set to false, the package must not contain any files.", "type" : "boolean" }, "hasFiles" : { "description" : "Indicates that a particular file belongs to a package.", "type" : "array", "items" : { "description" : "SPDX ID for File. Indicates that a particular file belongs to a package.", "type" : "string" } }, "homepage" : { "type" : "string" }, "licenseComments" : { "description" : "The licenseComments property allows the preparer of the SPDX document to describe why the licensing in spdx:licenseConcluded was chosen.", "type" : "string" }, "licenseConcluded" : { "description" : "License expression for licenseConcluded. See SPDX Annex D for the license expression syntax. The licensing that the preparer of this SPDX document has concluded, based on the evidence, actually applies to the SPDX Item.\n\nIf the licenseConcluded field is not present for an SPDX Item, it implies an equivalent meaning to NOASSERTION.", "type" : "string" }, "licenseDeclared" : { "description" : "License expression for licenseDeclared. See SPDX Annex D for the license expression syntax. The licensing that the creators of the software in the package, or the packager, have declared. Declarations by the original software creator should be preferred, if they exist.", "type" : "string" }, "licenseInfoFromFiles" : { "description" : "The licensing information that was discovered directly within the package. There will be an instance of this property for each distinct value of alllicenseInfoInFile properties of all files contained in the package.\n\nIf the licenseInfoFromFiles field is not present for a package and filesAnalyzed property for that same pacakge is true or omitted, it implies an equivalent meaning to NOASSERTION.", "type" : "array", "items" : { "description" : "License expression for licenseInfoFromFiles. See SPDX Annex D for the license expression syntax. The licensing information that was discovered directly within the package. There will be an instance of this property for each distinct value of alllicenseInfoInFile properties of all files contained in the package.\n\nIf the licenseInfoFromFiles field is not present for a package and filesAnalyzed property for that same pacakge is true or omitted, it implies an equivalent meaning to NOASSERTION.", "type" : "string" } }, "name" : { "description" : "Identify name of this SpdxElement.", "type" : "string" }, "originator" : { "description" : "The name and, optionally, contact information of the person or organization that originally created the package. Values of this property must conform to the agent and tool syntax.", "type" : "string" }, "packageFileName" : { "description" : "The base name of the package file name. For example, zlib-1.2.5.tar.gz.", "type" : "string" }, "packageVerificationCode" : { "type" : "object", "properties" : { "packageVerificationCodeExcludedFiles" : { "description" : "A file that was excluded when calculating the package verification code. This is usually a file containing SPDX data regarding the package. If a package contains more than one SPDX file all SPDX files must be excluded from the package verification code. If this is not done it would be impossible to correctly calculate the verification codes in both files.", "type" : "array", "items" : { "description" : "A file that was excluded when calculating the package verification code. This is usually a file containing SPDX data regarding the package. If a package contains more than one SPDX file all SPDX files must be excluded from the package verification code. If this is not done it would be impossible to correctly calculate the verification codes in both files.", "type" : "string" } }, "packageVerificationCodeValue" : { "description" : "The actual package verification code as a hex encoded value.", "type" : "string" } }, "required" : [ "packageVerificationCodeValue" ], "additionalProperties" : false, "description" : "A manifest based verification code (the algorithm is defined in section 4.7 of the full specification) of the SPDX Item. This allows consumers of this data and/or database to determine if an SPDX item they have in hand is identical to the SPDX item from which the data was produced. This algorithm works even if the SPDX document is included in the SPDX item." }, "primaryPackagePurpose" : { "description" : "This field provides information about the primary purpose of the identified package. Package Purpose is intrinsic to how the package is being used rather than the content of the package.", "type" : "string", "enum" : [ "OTHER", "INSTALL", "ARCHIVE", "FIRMWARE", "APPLICATION", "FRAMEWORK", "LIBRARY", "CONTAINER", "SOURCE", "DEVICE", "OPERATING_SYSTEM", "FILE" ] }, "releaseDate" : { "description" : "This field provides a place for recording the date the package was released.", "type" : "string" }, "sourceInfo" : { "description" : "Allows the producer(s) of the SPDX document to describe how the package was acquired and/or changed from the original source.", "type" : "string" }, "summary" : { "description" : "Provides a short description of the package.", "type" : "string" }, "supplier" : { "description" : "The name and, optionally, contact information of the person or organization who was the immediate supplier of this package to the recipient. The supplier may be different than originator when the software has been repackaged. Values of this property must conform to the agent and tool syntax.", "type" : "string" }, "validUntilDate" : { "description" : "This field provides a place for recording the end of the support period for a package from the supplier.", "type" : "string" }, "versionInfo" : { "description" : "Provides an indication of the version of the package that is described by this SpdxDocument.", "type" : "string" } }, "required" : [ "SPDXID", "downloadLocation", "name" ], "additionalProperties" : false } }, "files" : { "description" : "Files referenced in the SPDX document", "type" : "array", "items" : { "type" : "object", "properties" : { "SPDXID" : { "type" : "string", "description" : "Uniquely identify any element in an SPDX document which may be referenced by other elements." }, "annotations" : { "description" : "Provide additional information about an SpdxElement.", "type" : "array", "items" : { "type" : "object", "properties" : { "annotationDate" : { "description" : "Identify when the comment was made. This is to be specified according to the combined date and time in the UTC format, as specified in the ISO 8601 standard.", "type" : "string" }, "annotationType" : { "description" : "Type of the annotation.", "type" : "string", "enum" : [ "OTHER", "REVIEW" ] }, "annotator" : { "description" : "This field identifies the person, organization, or tool that has commented on a file, package, snippet, or the entire document.", "type" : "string" }, "comment" : { "type" : "string" } }, "required" : [ "annotationDate", "annotationType", "annotator", "comment" ], "additionalProperties" : false, "description" : "An Annotation is a comment on an SpdxItem by an agent." } }, "artifactOfs" : { "description" : "Indicates the project in which the SpdxElement originated. Tools must preserve doap:homepage and doap:name properties and the URI (if one is known) of doap:Project resources that are values of this property. All other properties of doap:Projects are not directly supported by SPDX and may be dropped when translating to or from some SPDX formats.", "type" : "array", "items" : { "type" : "object" } }, "attributionTexts" : { "description" : "This field provides a place for the SPDX data creator to record acknowledgements that may be required to be communicated in some contexts. This is not meant to include the actual complete license text (see licenseConculded and licenseDeclared), and may or may not include copyright notices (see also copyrightText). The SPDX data creator may use this field to record other acknowledgements, such as particular clauses from license texts, which may be necessary or desirable to reproduce.", "type" : "array", "items" : { "description" : "This field provides a place for the SPDX data creator to record acknowledgements that may be required to be communicated in some contexts. This is not meant to include the actual complete license text (see licenseConculded and licenseDeclared), and may or may not include copyright notices (see also copyrightText). The SPDX data creator may use this field to record other acknowledgements, such as particular clauses from license texts, which may be necessary or desirable to reproduce.", "type" : "string" } }, "checksums" : { "description" : "The checksum property provides a mechanism that can be used to verify that the contents of a File or Package have not changed.", "minItems" : 1, "type" : "array", "items" : { "type" : "object", "properties" : { "algorithm" : { "description" : "Identifies the algorithm used to produce the subject Checksum. Currently, SHA-1 is the only supported algorithm. It is anticipated that other algorithms will be supported at a later time.", "type" : "string", "enum" : [ "SHA1", "BLAKE3", "SHA3-384", "SHA256", "SHA384", "BLAKE2b-512", "BLAKE2b-256", "SHA3-512", "MD2", "ADLER32", "MD4", "SHA3-256", "BLAKE2b-384", "SHA512", "MD6", "MD5", "SHA224" ] }, "checksumValue" : { "description" : "The checksumValue property provides a lower case hexidecimal encoded digest value produced using a specific algorithm.", "type" : "string" } }, "required" : [ "algorithm", "checksumValue" ], "additionalProperties" : false, "description" : "A Checksum is value that allows the contents of a file to be authenticated. Even small changes to the content of the file will change its checksum. This class allows the results of a variety of checksum and cryptographic message digest algorithms to be represented." } }, "comment" : { "type" : "string" }, "copyrightText" : { "description" : "The text of copyright declarations recited in the package, file or snippet.\n\nIf the copyrightText field is not present, it implies an equivalent meaning to NOASSERTION.", "type" : "string" }, "fileContributors" : { "description" : "This field provides a place for the SPDX file creator to record file contributors. Contributors could include names of copyright holders and/or authors who may not be copyright holders yet contributed to the file content.", "type" : "array", "items" : { "description" : "This field provides a place for the SPDX file creator to record file contributors. Contributors could include names of copyright holders and/or authors who may not be copyright holders yet contributed to the file content.", "type" : "string" } }, "fileDependencies" : { "description" : "This field is deprecated since SPDX 2.0 in favor of using Section 7 which provides more granularity about relationships.", "type" : "array", "items" : { "description" : "SPDX ID for File. This field is deprecated since SPDX 2.0 in favor of using Section 7 which provides more granularity about relationships.", "type" : "string" } }, "fileName" : { "description" : "The name of the file relative to the root of the package.", "type" : "string" }, "fileTypes" : { "description" : "The type of the file.", "type" : "array", "items" : { "description" : "The type of the file.", "type" : "string", "enum" : [ "OTHER", "DOCUMENTATION", "IMAGE", "VIDEO", "ARCHIVE", "SPDX", "APPLICATION", "SOURCE", "BINARY", "TEXT", "AUDIO" ] } }, "licenseComments" : { "description" : "The licenseComments property allows the preparer of the SPDX document to describe why the licensing in spdx:licenseConcluded was chosen.", "type" : "string" }, "licenseConcluded" : { "description" : "License expression for licenseConcluded. See SPDX Annex D for the license expression syntax. The licensing that the preparer of this SPDX document has concluded, based on the evidence, actually applies to the SPDX Item.\n\nIf the licenseConcluded field is not present for an SPDX Item, it implies an equivalent meaning to NOASSERTION.", "type" : "string" }, "licenseInfoInFiles" : { "description" : "Licensing information that was discovered directly in the subject file. This is also considered a declared license for the file.\n\nIf the licenseInfoInFile field is not present for a file, it implies an equivalent meaning to NOASSERTION.", "type" : "array", "items" : { "description" : "License expression for licenseInfoInFile. See SPDX Annex D for the license expression syntax. Licensing information that was discovered directly in the subject file. This is also considered a declared license for the file.\n\nIf the licenseInfoInFile field is not present for a file, it implies an equivalent meaning to NOASSERTION.", "type" : "string" } }, "noticeText" : { "description" : "This field provides a place for the SPDX file creator to record potential legal notices found in the file. This may or may not include copyright statements.", "type" : "string" } }, "required" : [ "SPDXID", "checksums", "fileName" ], "additionalProperties" : false } }, "snippets" : { "description" : "Snippets referenced in the SPDX document", "type" : "array", "items" : { "type" : "object", "properties" : { "SPDXID" : { "type" : "string", "description" : "Uniquely identify any element in an SPDX document which may be referenced by other elements." }, "annotations" : { "description" : "Provide additional information about an SpdxElement.", "type" : "array", "items" : { "type" : "object", "properties" : { "annotationDate" : { "description" : "Identify when the comment was made. This is to be specified according to the combined date and time in the UTC format, as specified in the ISO 8601 standard.", "type" : "string" }, "annotationType" : { "description" : "Type of the annotation.", "type" : "string", "enum" : [ "OTHER", "REVIEW" ] }, "annotator" : { "description" : "This field identifies the person, organization, or tool that has commented on a file, package, snippet, or the entire document.", "type" : "string" }, "comment" : { "type" : "string" } }, "required" : [ "annotationDate", "annotationType", "annotator", "comment" ], "additionalProperties" : false, "description" : "An Annotation is a comment on an SpdxItem by an agent." } }, "attributionTexts" : { "description" : "This field provides a place for the SPDX data creator to record acknowledgements that may be required to be communicated in some contexts. This is not meant to include the actual complete license text (see licenseConculded and licenseDeclared), and may or may not include copyright notices (see also copyrightText). The SPDX data creator may use this field to record other acknowledgements, such as particular clauses from license texts, which may be necessary or desirable to reproduce.", "type" : "array", "items" : { "description" : "This field provides a place for the SPDX data creator to record acknowledgements that may be required to be communicated in some contexts. This is not meant to include the actual complete license text (see licenseConculded and licenseDeclared), and may or may not include copyright notices (see also copyrightText). The SPDX data creator may use this field to record other acknowledgements, such as particular clauses from license texts, which may be necessary or desirable to reproduce.", "type" : "string" } }, "comment" : { "type" : "string" }, "copyrightText" : { "description" : "The text of copyright declarations recited in the package, file or snippet.\n\nIf the copyrightText field is not present, it implies an equivalent meaning to NOASSERTION.", "type" : "string" }, "licenseComments" : { "description" : "The licenseComments property allows the preparer of the SPDX document to describe why the licensing in spdx:licenseConcluded was chosen.", "type" : "string" }, "licenseConcluded" : { "description" : "License expression for licenseConcluded. See SPDX Annex D for the license expression syntax. The licensing that the preparer of this SPDX document has concluded, based on the evidence, actually applies to the SPDX Item.\n\nIf the licenseConcluded field is not present for an SPDX Item, it implies an equivalent meaning to NOASSERTION.", "type" : "string" }, "licenseInfoInSnippets" : { "description" : "Licensing information that was discovered directly in the subject snippet. This is also considered a declared license for the snippet.\n\nIf the licenseInfoInSnippet field is not present for a snippet, it implies an equivalent meaning to NOASSERTION.", "type" : "array", "items" : { "description" : "License expression for licenseInfoInSnippet. See SPDX Annex D for the license expression syntax. Licensing information that was discovered directly in the subject snippet. This is also considered a declared license for the snippet.\n\nIf the licenseInfoInSnippet field is not present for a snippet, it implies an equivalent meaning to NOASSERTION.", "type" : "string" } }, "name" : { "description" : "Identify name of this SpdxElement.", "type" : "string" }, "ranges" : { "description" : "This field defines the byte range in the original host file (in X.2) that the snippet information applies to", "minItems" : 1, "type" : "array", "items" : { "type" : "object", "properties" : { "endPointer" : { "type" : "object", "properties" : { "reference" : { "description" : "SPDX ID for File", "type" : "string" }, "offset" : { "type" : "integer", "description" : "Byte offset in the file" }, "lineNumber" : { "type" : "integer", "description" : "line number offset in the file" } }, "required" : [ "reference" ], "additionalProperties" : false }, "startPointer" : { "type" : "object", "properties" : { "reference" : { "description" : "SPDX ID for File", "type" : "string" }, "offset" : { "type" : "integer", "description" : "Byte offset in the file" }, "lineNumber" : { "type" : "integer", "description" : "line number offset in the file" } }, "required" : [ "reference" ], "additionalProperties" : false } }, "required" : [ "endPointer", "startPointer" ], "additionalProperties" : false } }, "snippetFromFile" : { "description" : "SPDX ID for File. File containing the SPDX element (e.g. the file contaning a snippet).", "type" : "string" } }, "required" : [ "SPDXID", "name", "ranges", "snippetFromFile" ], "additionalProperties" : false } }, "relationships" : { "description" : "Relationships referenced in the SPDX document", "type" : "array", "items" : { "type" : "object", "properties" : { "spdxElementId" : { "type" : "string", "description" : "Id to which the SPDX element is related" }, "comment" : { "type" : "string" }, "relatedSpdxElement" : { "description" : "SPDX ID for SpdxElement. A related SpdxElement.", "type" : "string" }, "relationshipType" : { "description" : "Describes the type of relationship between two SPDX elements.", "type" : "string", "enum" : [ "VARIANT_OF", "COPY_OF", "PATCH_FOR", "TEST_DEPENDENCY_OF", "CONTAINED_BY", "DATA_FILE_OF", "OPTIONAL_COMPONENT_OF", "ANCESTOR_OF", "GENERATES", "CONTAINS", "OPTIONAL_DEPENDENCY_OF", "FILE_ADDED", "REQUIREMENT_DESCRIPTION_FOR", "DEV_DEPENDENCY_OF", "DEPENDENCY_OF", "BUILD_DEPENDENCY_OF", "DESCRIBES", "PREREQUISITE_FOR", "HAS_PREREQUISITE", "PROVIDED_DEPENDENCY_OF", "DYNAMIC_LINK", "DESCRIBED_BY", "METAFILE_OF", "DEPENDENCY_MANIFEST_OF", "PATCH_APPLIED", "RUNTIME_DEPENDENCY_OF", "TEST_OF", "TEST_TOOL_OF", "DEPENDS_ON", "SPECIFICATION_FOR", "FILE_MODIFIED", "DISTRIBUTION_ARTIFACT", "AMENDS", "DOCUMENTATION_OF", "GENERATED_FROM", "STATIC_LINK", "OTHER", "BUILD_TOOL_OF", "TEST_CASE_OF", "PACKAGE_OF", "DESCENDANT_OF", "FILE_DELETED", "EXPANDED_FROM_ARCHIVE", "DEV_TOOL_OF", "EXAMPLE_OF" ] } }, "required" : [ "spdxElementId", "relatedSpdxElement", "relationshipType" ], "additionalProperties" : false } } }, "required" : [ "SPDXID", "creationInfo", "dataLicense", "name", "spdxVersion" ], "additionalProperties" : false } ================================================ FILE: tests/resources/test-derivation-chain.nix ================================================ # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 { system ? builtins.currentSystem, }: let mkTestDerivation = { name, pname, version, command, }: builtins.derivation { inherit name pname system version ; builder = "/bin/sh"; args = [ "-c" command ]; }; first = mkTestDerivation { name = "sbomnix-test-first-1.0"; pname = "sbomnix-test-first"; version = "1.0"; command = "echo first > $out"; }; second = mkTestDerivation { name = "sbomnix-test-second-1.0"; pname = "sbomnix-test-second"; version = "1.0"; command = "echo ${first} > $out"; }; in mkTestDerivation { name = "sbomnix-test-third-1.0"; pname = "sbomnix-test-third"; version = "1.0"; command = "echo ${second} > $out"; } ================================================ FILE: tests/test_builder_runtime.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Focused tests for SBOM builder runtime closure selection.""" import pandas as pd import pytest from common import columns as cols from common.errors import MissingNixDerivationMetadataError, SbomnixError from sbomnix import builder as sbomnix_builder from sbomnix.builder import SbomBuilder from sbomnix.closure import dependency_rows_to_dataframe from sbomnix.runtime import RuntimeClosure TARGET_PATH = "/nix/store/11111111111111111111111111111111-target-1.0" TARGET_DERIVER = "/nix/store/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-target-1.0.drv" GRAPH_ONLY_PATH = "/nix/store/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-source" def _builder_double(): builder = object.__new__(SbomBuilder) builder.nix_path = TARGET_PATH builder.buildtime = False builder.target_deriver = TARGET_DERIVER builder.target_component_ref = None builder.include_cpe = False builder.depth = None builder.df_deps = None builder._runtime_output_paths_by_load_path = None return builder def _runtime_closure(output_paths_by_drv, rows=None): return RuntimeClosure( df_deps=dependency_rows_to_dataframe([] if rows is None else rows), output_paths_by_drv=output_paths_by_drv, ) def test_runtime_path_info_dependencies_accepts_existing_derivers(monkeypatch): closure = _runtime_closure({TARGET_DERIVER: {TARGET_PATH}}) monkeypatch.setattr( sbomnix_builder, "load_runtime_closure", lambda _path: closure, ) monkeypatch.setattr( sbomnix_builder, "is_loadable_deriver_path", lambda path: path == TARGET_DERIVER, ) builder = _builder_double() loaded = builder._load_runtime_path_info_closure(TARGET_PATH) builder._init_dependencies(loaded) assert loaded.runtime_output_paths_by_load_path == {TARGET_DERIVER: {TARGET_PATH}} assert builder._runtime_output_paths_by_load_path == {TARGET_DERIVER: {TARGET_PATH}} assert builder.df_deps.equals(closure.df_deps) def test_runtime_components_propagate_derivation_loading_failures(monkeypatch): def fail_runtime_components(*_args, **_kwargs): raise ValueError("broken derivation metadata") monkeypatch.setattr( sbomnix_builder, "runtime_derivations_to_dataframe", fail_runtime_components, ) builder = _builder_double() builder._runtime_output_paths_by_load_path = {TARGET_DERIVER: {TARGET_PATH}} with pytest.raises(ValueError, match="broken derivation metadata"): builder._init_runtime_components({TARGET_PATH}) assert builder._runtime_output_paths_by_load_path == {TARGET_DERIVER: {TARGET_PATH}} def test_runtime_components_reject_missing_derivation_metadata(monkeypatch): monkeypatch.setattr( sbomnix_builder, "runtime_derivations_to_dataframe", lambda *_args, **_kwargs: pd.DataFrame(), ) builder = _builder_double() builder._runtime_output_paths_by_load_path = {TARGET_PATH: {TARGET_PATH}} with pytest.raises(MissingNixDerivationMetadataError, match=TARGET_PATH): builder._init_runtime_components({TARGET_PATH}) def test_runtime_deriver_lookup_preserves_typed_errors(monkeypatch): def fail_find_deriver(_path): raise SbomnixError("schema drift") monkeypatch.setattr(sbomnix_builder, "find_deriver", fail_find_deriver) builder = _builder_double() with pytest.raises(SbomnixError, match="schema drift"): builder._resolve_target_deriver(TARGET_PATH) @pytest.mark.parametrize( "deriver", [ "unknown-deriver", "/nix/store/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-target-1.0", "/nix/store/cccccccccccccccccccccccccccccccc-missing-1.0.drv", ], ) def test_runtime_path_info_dependencies_uses_output_queries_for_unloadable_derivers( monkeypatch, deriver, ): closure = _runtime_closure({deriver: {TARGET_PATH}}) monkeypatch.setattr( sbomnix_builder, "load_runtime_closure", lambda _path: closure, ) monkeypatch.setattr( sbomnix_builder, "is_loadable_deriver_path", lambda _path: False, ) builder = _builder_double() loaded = builder._load_runtime_path_info_closure(TARGET_PATH) builder._init_dependencies(loaded) assert loaded.runtime_output_paths_by_load_path == {TARGET_PATH: {TARGET_PATH}} assert builder._runtime_output_paths_by_load_path == {TARGET_PATH: {TARGET_PATH}} assert builder.df_deps.equals(closure.df_deps) def test_runtime_path_info_dependencies_accepts_graph_only_references(monkeypatch): rows = [ { "src_path": GRAPH_ONLY_PATH, "src_pname": "source", "target_path": TARGET_PATH, "target_pname": "target-1.0", } ] closure = _runtime_closure({TARGET_DERIVER: {TARGET_PATH}}, rows=rows) monkeypatch.setattr( sbomnix_builder, "load_runtime_closure", lambda _path: closure, ) monkeypatch.setattr( sbomnix_builder, "is_loadable_deriver_path", lambda path: path == TARGET_DERIVER, ) builder = _builder_double() loaded = builder._load_runtime_path_info_closure(TARGET_PATH) builder._init_dependencies(loaded) assert loaded.runtime_output_paths_by_load_path == {TARGET_DERIVER: {TARGET_PATH}} assert builder._runtime_output_paths_by_load_path == {TARGET_DERIVER: {TARGET_PATH}} assert builder.df_deps.equals(closure.df_deps) def test_runtime_path_info_dependencies_supports_targets_without_derivers( monkeypatch, ): closure = _runtime_closure({}) monkeypatch.setattr( sbomnix_builder, "load_runtime_closure", lambda _path: closure, ) builder = _builder_double() builder.target_deriver = None loaded = builder._load_runtime_path_info_closure(TARGET_PATH) builder._init_dependencies(loaded) assert loaded.runtime_output_paths_by_load_path == {TARGET_PATH: {TARGET_PATH}} assert builder._runtime_output_paths_by_load_path == {TARGET_PATH: {TARGET_PATH}} def test_target_component_ref_uses_runtime_output_when_deriver_is_unavailable(): builder = _builder_double() builder.target_deriver = None builder.df_sbomdb = pd.DataFrame( [ { cols.STORE_PATH: "/nix/store/runtime-load-path", cols.OUTPUTS: [TARGET_PATH], } ] ) assert builder._resolve_target_component_ref() == "/nix/store/runtime-load-path" def test_target_component_ref_skips_missing_outputs_when_deriver_is_unavailable(): builder = _builder_double() builder.target_deriver = None builder.df_sbomdb = pd.DataFrame( [ { cols.STORE_PATH: "/nix/store/no-outputs", cols.OUTPUTS: float("nan"), }, { cols.STORE_PATH: "/nix/store/runtime-load-path", cols.OUTPUTS: [TARGET_PATH], }, ] ) assert builder._resolve_target_component_ref() == "/nix/store/runtime-load-path" def test_target_component_ref_handles_non_identifier_output_column(monkeypatch): monkeypatch.setattr(cols, "OUTPUTS", "store-outputs") builder = _builder_double() builder.target_deriver = None builder.df_sbomdb = pd.DataFrame( [ { cols.STORE_PATH: "/nix/store/runtime-load-path", cols.OUTPUTS: [TARGET_PATH], } ] ) assert builder._resolve_target_component_ref() == "/nix/store/runtime-load-path" def test_target_component_ref_rejects_missing_runtime_target_metadata(): builder = _builder_double() builder.target_deriver = None builder.df_sbomdb = pd.DataFrame( [ { cols.STORE_PATH: "/nix/store/runtime-load-path", cols.OUTPUTS: ["/nix/store/other-output"], } ] ) with pytest.raises(MissingNixDerivationMetadataError, match=TARGET_PATH): builder._resolve_target_component_ref() ================================================ FILE: tests/test_buildtime_closure.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Focused tests for recursive build-time derivation parsing.""" import pytest from common.errors import InvalidNixJsonError from sbomnix.closure import derivation_dependencies_df def test_recursive_buildtime_dependencies_df_reads_new_derivation_inputs(): drv_infos = { "/nix/store/11111111111111111111111111111111-target-1.0.drv": { "inputs": { "drvs": { "/nix/store/22222222222222222222222222222222-dep-a-1.0.drv": [ "out" ], "/nix/store/33333333333333333333333333333333-dep-b-1.0.drv": [ "out" ], }, "srcs": [ "/nix/store/44444444444444444444444444444444-builder.sh", ], } } } df = derivation_dependencies_df(drv_infos) rows = df.sort_values("src_path").to_dict("records") assert rows == [ { "src_path": "/nix/store/22222222222222222222222222222222-dep-a-1.0.drv", "src_pname": "dep-a-1.0.drv", "target_path": "/nix/store/11111111111111111111111111111111-target-1.0.drv", "target_pname": "target-1.0.drv", }, { "src_path": "/nix/store/33333333333333333333333333333333-dep-b-1.0.drv", "src_pname": "dep-b-1.0.drv", "target_path": "/nix/store/11111111111111111111111111111111-target-1.0.drv", "target_pname": "target-1.0.drv", }, { "src_path": "/nix/store/44444444444444444444444444444444-builder.sh", "src_pname": "builder.sh", "target_path": "/nix/store/11111111111111111111111111111111-target-1.0.drv", "target_pname": "target-1.0.drv", }, ] def test_recursive_buildtime_dependencies_df_rejects_legacy_input_drvs(): drv_infos = { "/nix/store/11111111111111111111111111111111-target-1.0.drv": { "inputDrvs": { "/nix/store/22222222222222222222222222222222-dep-a-1.0.drv": ["out"], } } } with pytest.raises(InvalidNixJsonError, match="unsupported legacy `inputDrvs`"): derivation_dependencies_df(drv_infos) def test_recursive_buildtime_dependencies_df_rejects_missing_input_schema(): drv_infos = { "/nix/store/11111111111111111111111111111111-target-1.0.drv": { "name": "target-1.0", } } with pytest.raises(InvalidNixJsonError, match="missing derivation inputs"): derivation_dependencies_df(drv_infos) def test_recursive_buildtime_dependencies_df_accepts_empty_modern_inputs(): drv_infos = { "/nix/store/11111111111111111111111111111111-leaf-1.0.drv": { "inputs": { "drvs": {}, "srcs": [], } } } df = derivation_dependencies_df(drv_infos) assert df.empty assert list(df.columns) == [ "src_path", "src_pname", "target_path", "target_pname", ] def test_recursive_buildtime_dependencies_df_rejects_missing_source_inputs(): drv_infos = { "/nix/store/11111111111111111111111111111111-target-1.0.drv": { "inputs": { "drvs": {}, } } } with pytest.raises(InvalidNixJsonError, match="missing `inputs.srcs`"): derivation_dependencies_df(drv_infos) def test_recursive_buildtime_dependencies_df_rejects_missing_derivation_inputs(): drv_infos = { "/nix/store/11111111111111111111111111111111-target-1.0.drv": { "inputs": { "srcs": [], } } } with pytest.raises(InvalidNixJsonError, match="missing `inputs.drvs`"): derivation_dependencies_df(drv_infos) ================================================ FILE: tests/test_cli_conventions.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Focused tests for shared CLI conventions.""" import re import subprocess from pathlib import Path import pytest from common.pkgmeta import _dev_version, get_py_pkg_version from nixgraph import main as nixgraph_main from nixmeta import main as nixmeta_main from nixupdate import nix_outdated from provenance import main as provenance_main from repology import repology_cli, repology_cve from sbomnix import main as sbomnix_main from vulnxscan import osv as osv_cli from vulnxscan import vulnxscan_cli def _stringify(value): if isinstance(value, Path): return value.as_posix() return str(value) CLI_ARG_CASES = [ (sbomnix_main.getargs, [".#pkg"]), (nixgraph_main.getargs, [".#pkg"]), (nixmeta_main._getargs, []), (nix_outdated.getargs, [".#pkg"]), (vulnxscan_cli.getargs, [".#pkg"]), (osv_cli.getargs, ["sbom.json"]), ( repology_cli.getargs, ["--pkg_exact", "openssl", "--repository", "nix_unstable"], ), (repology_cve.getargs, ["openssl", "3.1.0"]), (provenance_main.getargs, [".#pkg"]), ] @pytest.mark.parametrize( "getargs", [ sbomnix_main.getargs, nixgraph_main.getargs, nixmeta_main._getargs, nix_outdated.getargs, vulnxscan_cli.getargs, osv_cli.getargs, repology_cli.getargs, repology_cve.getargs, provenance_main.getargs, ], ) def test_cli_version_flags_exit_zero(getargs, capsys): with pytest.raises(SystemExit) as excinfo: getargs(["--version"]) assert excinfo.value.code == 0 assert capsys.readouterr().out.strip() == get_py_pkg_version() @pytest.mark.parametrize( ("getargs", "base_argv"), CLI_ARG_CASES, ) def test_cli_verbose_default_is_normal_info(getargs, base_argv): assert getargs(base_argv).verbose == 0 @pytest.mark.parametrize( ("getargs", "base_argv"), CLI_ARG_CASES, ) @pytest.mark.parametrize( "verbose_argv", [ ["-v"], ["--verbose=1"], ["--verbose", "1"], ], ) def test_cli_verbose_level_one_forms_match(getargs, base_argv, verbose_argv): assert getargs([*verbose_argv, *base_argv]).verbose == 1 @pytest.mark.parametrize( ("getargs", "base_argv"), CLI_ARG_CASES, ) @pytest.mark.parametrize( "verbose_argv", [ ["-v", "-v"], ["-vv"], ["-v", "2"], ["--verbose=2"], ["--verbose", "2"], ], ) def test_cli_verbose_level_two_forms_match(getargs, base_argv, verbose_argv): assert getargs([*verbose_argv, *base_argv]).verbose == 2 @pytest.mark.parametrize( ("getargs", "argv", "expected_out"), [ (nixgraph_main.getargs, ["-o", "graph.dot", ".#pkg"], "graph.dot"), (nixmeta_main._getargs, ["-o", "meta.csv"], "meta.csv"), (nix_outdated.getargs, ["-o", "nix_outdated.csv", ".#pkg"], "nix_outdated.csv"), (vulnxscan_cli.getargs, ["-o", "vulns.csv", ".#pkg"], "vulns.csv"), (osv_cli.getargs, ["-o", "osv.csv", "sbom.json"], "osv.csv"), ( repology_cli.getargs, [ "-o", "repology.csv", "--pkg_exact", "openssl", "--repository", "nix_unstable", ], "repology.csv", ), ( repology_cve.getargs, ["-o", "repology_cves.csv", "openssl", "3.1.0"], "repology_cves.csv", ), ( provenance_main.getargs, ["-o", "provenance.json", ".#pkg"], "provenance.json", ), ], ) def test_single_output_clis_accept_short_o_alias(getargs, argv, expected_out): assert _stringify(getargs(argv).out) == expected_out _REPO_ROOT = Path(__file__).resolve().parents[1] _DEV_VERSION_RE = re.compile( r"^(?P\d+\.\d+\.\d+)\+g(?P[0-9a-f]+)(?P\.dirty)?$" ) def test_dev_version_format_matches_nix_package_format(): """_dev_version() must produce the same PEP 440 local-version format as the Nix postPatch hook so that devshell and packaged invocations report identical strings for the same checkout. Expected format: +g[.dirty] The '.dirty' suffix matches what pip writes to METADATA after normalising the '-dirty' suffix that the Nix dirtyShortRev attribute appends. """ version = _dev_version() m = _DEV_VERSION_RE.match(version) assert m, f"_dev_version() returned {version!r}; expected +g[.dirty]" expected_base = (_REPO_ROOT / "VERSION").read_text().strip() assert m.group("base") == expected_base, ( f"base {m.group('base')!r} does not match VERSION file {expected_base!r}" ) expected_hash = subprocess.run( ["git", "rev-parse", "--short", "HEAD"], capture_output=True, text=True, check=True, cwd=_REPO_ROOT, ).stdout.strip() assert m.group("hash") == expected_hash, ( f"hash {m.group('hash')!r} does not match HEAD {expected_hash!r}" ) is_dirty = bool( subprocess.run( ["git", "status", "--porcelain", "--untracked-files=no"], capture_output=True, text=True, check=True, cwd=_REPO_ROOT, ).stdout.strip() ) assert bool(m.group("dirty")) == is_dirty, ( f"dirty flag in {version!r} does not match working-tree state (dirty={is_dirty})" ) @pytest.mark.slow def test_dev_version_parity_with_nix_package_version(): """_dev_version() must equal the version written into the Nix-built package's dist-info METADATA for the same checkout. This exercises the full packaging pipeline — gitSuffix in nix/packages.nix, the postPatch VERSION rewrite, setuptools wheel build, and pip normalisation — and compares the result with _dev_version(), so any drift between the Nix packaging path and the Python fallback is caught. Evaluating the Nix version attribute alone is not sufficient because postPatch could write a different string than the attribute implies. """ system = subprocess.run( ["nix", "eval", "--impure", "--raw", "--expr", "builtins.currentSystem"], capture_output=True, text=True, check=True, cwd=_REPO_ROOT, ).stdout.strip() out_path = subprocess.run( [ "nix", "build", f".#packages.{system}.sbomnix", "--print-out-paths", "--no-link", ], capture_output=True, text=True, check=True, cwd=_REPO_ROOT, ).stdout.strip() metadata_files = list( Path(out_path).glob("lib/python*/site-packages/sbomnix-*.dist-info/METADATA") ) assert metadata_files, f"no sbomnix dist-info METADATA found under {out_path}" version_line = next( line for line in metadata_files[0].read_text().splitlines() if line.startswith("Version:") ) installed_version = version_line.split(":", 1)[1].strip() assert _dev_version() == installed_version, ( f"devshell version {_dev_version()!r} != " f"installed METADATA version {installed_version!r}" ) def test_repology_cli_uses_uppercase_v_for_version_filter(): args = repology_cli.getargs( [ "-V", "^3\\.1\\.", "--pkg_exact", "openssl", "--repository", "nix_unstable", ] ) assert args.re_version == "^3\\.1\\." ================================================ FILE: tests/test_cli_error_boundaries.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Focused tests for CLI error-to-exit-code boundaries.""" from types import SimpleNamespace import pytest from common.errors import SbomnixError from nixgraph import main as nixgraph_main from nixmeta import main as nixmeta_main from nixupdate import nix_outdated from provenance import main as provenance_main from vulnxscan import osv as osv_cli from vulnxscan import vulnxscan_cli def test_vulnxscan_invalid_sbom_exits_nonzero(tmp_path, monkeypatch): invalid_sbom = tmp_path / "invalid.json" invalid_sbom.write_text("not json", encoding="utf-8") args = SimpleNamespace( TARGET=invalid_sbom.as_posix(), verbose=0, out="vulns.csv", buildtime=False, sbom=True, whitelist=None, triage=False, nixprs=False, ) monkeypatch.setattr(vulnxscan_cli, "getargs", lambda: args) monkeypatch.setattr(vulnxscan_cli, "set_log_verbosity", lambda _verbosity: None) monkeypatch.setattr( vulnxscan_cli, "exit_unless_command_exists", lambda _command: None ) with pytest.raises(SystemExit) as excinfo: vulnxscan_cli.main() assert excinfo.value.code == 1 def test_osv_invalid_sbom_exits_nonzero(tmp_path, monkeypatch): missing_sbom = tmp_path / "missing.json" args = SimpleNamespace( SBOM=missing_sbom, verbose=0, out="osv.csv", ecosystems="GIT", ) monkeypatch.setattr(osv_cli, "getargs", lambda: args) monkeypatch.setattr(osv_cli, "set_log_verbosity", lambda _verbosity: None) with pytest.raises(SystemExit) as excinfo: osv_cli.main() assert excinfo.value.code == 1 @pytest.mark.parametrize( ("module", "args", "prep", "patched_name"), [ ( nix_outdated, SimpleNamespace( NIXREF=".#broken", buildtime=False, local=False, out="nix_outdated.csv", verbose=0, ), lambda monkeypatch: None, "resolve_nix_target", ), ( nixgraph_main, SimpleNamespace( NIXREF=".#broken", buildtime=False, depth=1, inverse=None, out="graph.png", colorize=None, until=None, pathnames=False, verbose=0, ), lambda monkeypatch: None, "resolve_nix_target", ), ( vulnxscan_cli, SimpleNamespace( TARGET=".#broken", verbose=0, out="vulns.csv", buildtime=False, sbom=False, whitelist=None, triage=False, nixprs=False, ), lambda monkeypatch: monkeypatch.setattr( vulnxscan_cli, "exit_unless_command_exists", lambda _command: None ), "resolve_nix_target", ), ( nixmeta_main, SimpleNamespace( flakeref="github:NixOS/nixpkgs?ref=nixos-unstable", out="nixmeta.csv", append=False, verbose=0, ), lambda monkeypatch: None, "exit_unless_command_exists", ), ( provenance_main, SimpleNamespace( target="/nix/store/broken.drv", recursive=False, out=None, verbose=0, ), lambda monkeypatch: monkeypatch.setattr( provenance_main, "get_env_metadata", lambda: provenance_main.BuildMeta("", "", "", "", "", "{}", "{}"), ), "provenance", ), ], ) def test_cli_translates_sbomnix_errors_to_exit_code_1( monkeypatch, module, args, prep, patched_name ): prep(monkeypatch) monkeypatch.setattr(module, "getargs", lambda: args, raising=False) monkeypatch.setattr(module, "_getargs", lambda: args, raising=False) monkeypatch.setattr(module, "set_log_verbosity", lambda _verbosity: None) monkeypatch.setattr( module, patched_name, lambda *_args, **_kwargs: (_ for _ in ()).throw( SbomnixError("expected failure") ), ) with pytest.raises(SystemExit) as excinfo: module.main() assert excinfo.value.code == 1 ================================================ FILE: tests/test_cli_smoke.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Offline smoke tests for lightweight CLI entrypoint boundaries.""" from types import SimpleNamespace import pandas as pd from common.df import df_from_csv_file from repology import repology_cve from vulnxscan import osv as osv_cli def test_repology_cve_main_writes_output_csv(tmp_path, monkeypatch): out_path = tmp_path / "repology_cves.csv" reported = [] monkeypatch.setattr( repology_cve, "getargs", lambda: SimpleNamespace( PKG_NAME="openssl", PKG_VERSION="3.1.0", out=out_path.as_posix(), verbose=0, ), ) monkeypatch.setattr(repology_cve, "set_log_verbosity", lambda _verbosity: None) monkeypatch.setattr( repology_cve, "query_cve", lambda pkg_name, pkg_version: pd.DataFrame( [ { "package": pkg_name, "version": pkg_version, "cve": "CVE-2024-1111", } ] ), ) monkeypatch.setattr( repology_cve, "report_cves", lambda df: reported.append(df.copy(deep=True)) or True, ) repology_cve.main() assert len(reported) == 1 assert df_from_csv_file(out_path).to_dict(orient="records") == [ { "package": "openssl", "version": "3.1.0", "cve": "CVE-2024-1111", } ] def test_osv_main_writes_output_csv_with_requested_ecosystems(tmp_path, monkeypatch): sbom_path = tmp_path / "sbom.cdx.json" out_path = tmp_path / "osv.csv" sbom_path.write_text( '{"metadata":{"component":{"name":"hello","version":"1.0"}},"components":[]}', encoding="utf-8", ) class FakeOSV: def __init__(self): self.calls = [] def query_vulns(self, sbom, ecosystems): self.calls.append((sbom, ecosystems)) def to_dataframe(self): return pd.DataFrame( [ { "vuln_id": "OSV-1", "modified": "2024-01-01", "package": "hello", "version": "1.0", } ] ) fake_osv = FakeOSV() monkeypatch.setattr( osv_cli, "getargs", lambda: SimpleNamespace( SBOM=sbom_path, ecosystems="GIT, OSS-Fuzz", out=out_path.as_posix(), verbose=0, ), ) monkeypatch.setattr(osv_cli, "set_log_verbosity", lambda _verbosity: None) monkeypatch.setattr(osv_cli, "OSV", lambda: fake_osv) osv_cli.main() assert fake_osv.calls == [(sbom_path.as_posix(), ["GIT", "OSS-Fuzz"])] assert df_from_csv_file(out_path).to_dict(orient="records") == [ { "vuln_id": "OSV-1", "modified": "2024-01-01", "package": "hello", "version": "1.0", } ] ================================================ FILE: tests/test_common_log.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Focused tests for shared logging levels.""" import logging import pytest from common.log import LOG, LOG_SPAM, LOG_TRACE, LOG_VERBOSE, set_log_verbosity @pytest.mark.parametrize( ("verbosity", "level"), [ (0, logging.INFO), (1, LOG_VERBOSE), (2, logging.DEBUG), (3, LOG_SPAM), (99, LOG_SPAM), (-1, logging.INFO), ], ) def test_set_log_verbosity_maps_cli_levels_to_logging_levels(verbosity, level): try: set_log_verbosity(verbosity) assert LOG.level == level finally: set_log_verbosity(0) def test_custom_log_level_names_are_registered(): assert logging.getLevelName(LOG_VERBOSE) == "VERBOSE" assert logging.getLevelName(LOG_SPAM) == "SPAM" assert LOG_TRACE == LOG_SPAM def test_verbose_level_is_between_info_and_debug(): try: set_log_verbosity(1) assert LOG.isEnabledFor(logging.INFO) assert LOG.isEnabledFor(LOG_VERBOSE) assert not LOG.isEnabledFor(logging.DEBUG) finally: set_log_verbosity(0) ================================================ FILE: tests/test_common_versioning.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Unit tests for shared version and package-name helpers.""" import pytest from hypothesis import example, given from hypothesis import strategies as st from packaging.version import Version from common.package_names import nix_to_repology_pkg_name from common.versioning import number_distance, parse_version, version_distance NON_NEGATIVE_NUMBERS = st.one_of( st.integers(min_value=0, max_value=10**18), st.floats(min_value=0.0, allow_nan=False, allow_infinity=False), ) NEGATIVE_NUMBERS = st.one_of( st.integers(max_value=-1), st.floats(max_value=-0.001, allow_nan=False, allow_infinity=False), ) VERSION_TEXT = st.text(max_size=120) @pytest.mark.parametrize( ("left", "right", "expected"), [ (0, 0, 1.0), (0, 1, 0.5), (-1, 1, 0.0), ], ) def test_number_distance_documents_edge_cases(left, right, expected): assert number_distance(left, right) == expected @given(NON_NEGATIVE_NUMBERS, NON_NEGATIVE_NUMBERS) @example(0, 0) @example(0, 1) def test_number_distance_is_symmetric_for_non_negative_numbers(left, right): assert number_distance(left, right) == number_distance(right, left) @given(NON_NEGATIVE_NUMBERS, NON_NEGATIVE_NUMBERS) @example(0, 0) @example(0, 1) def test_number_distance_is_bounded_for_non_negative_numbers(left, right): result = number_distance(left, right) assert 0.0 <= result <= 1.0 @given(NON_NEGATIVE_NUMBERS) @example(0) def test_number_distance_identity_for_non_negative_numbers(value): assert number_distance(value, value) == 1.0 @given(NEGATIVE_NUMBERS, NON_NEGATIVE_NUMBERS) def test_number_distance_returns_zero_for_negative_arguments(negative, value): assert number_distance(negative, value) == 0.0 assert number_distance(value, negative) == 0.0 def test_parse_version_normalizes_suffixes(): parsed = parse_version("openssl-3.0p1") assert parsed == Version("3.0+1") @given(VERSION_TEXT) def test_parse_version_never_raises_for_text(value): parse_version(value) @given(VERSION_TEXT) def test_parse_version_is_idempotent_after_string_roundtrip(value): parsed = parse_version(value) if parsed is not None: assert parse_version(str(parsed)) == parsed def test_version_distance_handles_identical_and_invalid_versions(): assert version_distance("1.2.3", "1.2.3") == 1.0 assert version_distance("release", "1.2.3") == 0.0 @given(VERSION_TEXT, VERSION_TEXT) def test_version_distance_is_bounded_for_text(left, right): result = version_distance(left, right) assert 0.0 <= result <= 1.0 def test_nix_to_repology_pkg_name_handles_prefixes_and_special_cases(): assert nix_to_repology_pkg_name("python311-requests") == "python:requests" assert nix_to_repology_pkg_name("ruby-rake") == "ruby:rake" assert nix_to_repology_pkg_name("python3") == "python" assert nix_to_repology_pkg_name("libtiff") == "tiff" ================================================ FILE: tests/test_compare_deps.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Tests for the dependency comparison test harness.""" import pandas as pd from tests.compare_deps import compare_dependencies def test_compare_dependencies_filters_darwin_buildtime_source_paths(): """Darwin nixgraph output may include graph-only source paths without derivers.""" target_drv = "/nix/store/hash-hello-2.12.3.drv" dependency_drv = "/nix/store/hash-dependency.drv" graph_only_paths = [ "/nix/store/hash-meson.build.in", "/nix/store/hash-Info.plist", "/nix/store/hash-meson.options", "/nix/store/hash-meson.build", "/nix/store/hash-lua-setup-hook", "/nix/store/hash-remove-references-to", "/nix/store/hash-Architectures.xcspec", "/nix/store/hash-ToolchainInfo.plist", "/nix/store/hash-ProductTypes.xcspec", "/nix/store/hash-PackageTypes.xcspec", ] df_sbom = pd.DataFrame( { "drv_path": [target_drv, dependency_drv], "output_path": ["/nix/store/hash-hello", "/nix/store/hash-dependency"], "ref": [target_drv, dependency_drv], "depends_on": [dependency_drv, ""], } ) df_graph = pd.DataFrame( { "target_path": [target_drv] * (1 + len(graph_only_paths)), "src_path": [dependency_drv, *graph_only_paths], } ) assert compare_dependencies( df_sbom, df_graph, sbom_type="runtime_and_buildtime", graph_type="buildtime", ) ================================================ FILE: tests/test_components.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Focused tests for SBOM component dataframe helpers.""" from sbomnix import components as sbomnix_components class FakeDrv: """Minimal derivation double for component dataframe tests.""" def __init__(self, store_path, name): self.store_path = store_path self.name = name self.outputs = [] self.cpe_set = False def set_cpe(self, _generator): self.cpe_set = True def to_dict(self): return { "store_path": self.store_path, "name": self.name, "outputs": self.outputs, "cpe_set": self.cpe_set, } def test_recursive_derivations_to_dataframe_skips_missing_paths(): derivations = { "/nix/store/first.drv": FakeDrv("/nix/store/first.drv", "first"), "/nix/store/second.drv": FakeDrv("/nix/store/second.drv", "second"), } # Keep the test focused on component assembly without loading CPE data. df_components = sbomnix_components.recursive_derivations_to_dataframe( [ "/nix/store/missing.drv", "/nix/store/second.drv", "/nix/store/first.drv", ], derivations, include_cpe=False, ) assert df_components.to_dict("records") == [ { "store_path": "/nix/store/first.drv", "name": "first", "outputs": [], "cpe_set": True, }, { "store_path": "/nix/store/second.drv", "name": "second", "outputs": [], "cpe_set": True, }, ] def test_runtime_derivations_to_dataframe_filters_outputs_before_loading(monkeypatch): load_calls = [] def fake_load_many(paths, output_paths_by_drv=None, ignore_missing=False): load_calls.append((paths, output_paths_by_drv, ignore_missing)) return { "/nix/store/first.drv": FakeDrv("/nix/store/first.drv", "first"), "/nix/store/second.drv": FakeDrv("/nix/store/second.drv", "second"), } monkeypatch.setattr(sbomnix_components, "load_many", fake_load_many) df_components = sbomnix_components.runtime_derivations_to_dataframe( { "/nix/store/first-out", "/nix/store/second-out", }, { "/nix/store/first.drv": { "/nix/store/first-out", "/nix/store/ignored-first-out", }, "/nix/store/second.drv": { "/nix/store/second-out", }, "/nix/store/ignored.drv": { "/nix/store/ignored-out", }, }, include_cpe=False, ) assert load_calls == [ ( ["/nix/store/first.drv", "/nix/store/second.drv"], { "/nix/store/first.drv": {"/nix/store/first-out"}, "/nix/store/second.drv": {"/nix/store/second-out"}, }, True, ) ] assert df_components["store_path"].to_list() == [ "/nix/store/first.drv", "/nix/store/second.drv", ] ================================================ FILE: tests/test_cpe.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Focused tests for CPE generation.""" import pandas as pd from sbomnix import cpe class FakeCache: def __init__(self, df): self.df = df def get(self, _url): return self.df def set(self, *_args, **_kwargs): raise AssertionError("cache set should not be called for populated data") def test_cpe_uses_indexed_unique_product_vendor(monkeypatch): monkeypatch.setattr( cpe, "LockedDfCache", lambda: FakeCache( pd.DataFrame( { "product": ["openssl", "curl"], "vendor": ["openssl_project", "curl_project"], } ) ), ) generated = cpe.CPE().generate("openssl", "3.0.0") assert generated == "cpe:2.3:a:openssl_project:openssl:3.0.0:*:*:*:*:*:*:*" def test_cpe_ambiguous_product_falls_back_to_product_name(monkeypatch): monkeypatch.setattr( cpe, "LockedDfCache", lambda: FakeCache( pd.DataFrame( { "product": ["openssl", "openssl"], "vendor": ["first_vendor", "second_vendor"], } ) ), ) generated = cpe.CPE().generate("openssl", "3.0.0") assert generated == "cpe:2.3:a:openssl:openssl:3.0.0:*:*:*:*:*:*:*" ================================================ FILE: tests/test_dependency_index.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Focused tests for indexed SBOM dependency lookups.""" import pandas as pd from sbomnix.dependency_index import build_dependency_index def test_build_dependency_index_combines_runtime_and_buildtime_edges(): """Index both output-path and derivation-path dependencies for one component.""" df_sbomdb = pd.DataFrame( [ { "store_path": "/nix/store/target.drv", "purl": "pkg:nix/target@1.0", "outputs": ["/nix/store/target-out"], }, { "store_path": "/nix/store/runtime-dep.drv", "purl": "pkg:nix/runtime-dep@1.0", "outputs": ["/nix/store/runtime-dep-out"], }, { "store_path": "/nix/store/build-dep.drv", "purl": "pkg:nix/build-dep@1.0", "outputs": ["/nix/store/build-dep-out"], }, ] ) df_outputs = df_sbomdb.explode("outputs") df_deps = pd.DataFrame( [ { "src_path": "/nix/store/runtime-dep-out", "target_path": "/nix/store/target-out", }, { "src_path": "/nix/store/build-dep.drv", "target_path": "/nix/store/target.drv", }, ] ) index = build_dependency_index(df_deps, df_sbomdb, df_outputs, uid="store_path") target_drv = next(df_sbomdb.itertuples()) assert index.lookup(target_drv) == [ "/nix/store/build-dep.drv", "/nix/store/runtime-dep.drv", ] assert index.lookup(target_drv, uid="purl") == [ "pkg:nix/build-dep@1.0", "pkg:nix/runtime-dep@1.0", ] def test_build_dependency_index_returns_none_without_dependencies(): """Return no lookup entries when the component has no indexed dependencies.""" df_sbomdb = pd.DataFrame( [ { "store_path": "/nix/store/target.drv", "purl": "pkg:nix/target@1.0", "outputs": ["/nix/store/target-out"], } ] ) index = build_dependency_index( pd.DataFrame(), df_sbomdb, df_sbomdb.explode("outputs"), uid="store_path", ) assert index.lookup(next(df_sbomdb.itertuples())) is None ================================================ FILE: tests/test_derivation_hardening.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Unit tests for derivation loading and SPDX hardening.""" import json from collections import namedtuple from types import SimpleNamespace from common import spdx as common_spdx from sbomnix import cdx as sbomnix_cdx from sbomnix import derivation as sbomnix_derivation from sbomnix import exporters as sbomnix_exporters def test_load_derivation_uses_nix_derivation_show(monkeypatch): drv_path = "/nix/store/0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-hello-2.12.3.drv" out_path = "/nix/store/1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-hello-2.12.3" doc_path = "/nix/store/2ccccccccccccccccccccccccccccccc-hello-2.12.3-doc" calls = [] def fake_nix_cmd(*args): return ["nix", *args] def fake_exec_cmd(cmd, **_kwargs): calls.append(cmd) return SimpleNamespace( stdout=json.dumps( { "version": 4, "derivations": { drv_path: { "name": "hello-2.12.3", "version": 4, "system": "x86_64-linux", "outputs": { "doc": {"path": doc_path}, "out": {"path": out_path}, }, "env": { "name": "hello-2.12.3", "pname": "hello", "out": out_path, "outputs": "out doc", "version": "2.12.3", "urls": "https://example.test/hello.tar.gz", }, } }, } ) ) monkeypatch.setattr(sbomnix_derivation, "nix_cmd", fake_nix_cmd) monkeypatch.setattr(sbomnix_derivation, "exec_cmd", fake_exec_cmd) drv = sbomnix_derivation.load(drv_path, None) assert calls == [["nix", "derivation", "show", drv_path]] assert drv.store_path == drv_path assert drv.name == "hello-2.12.3" assert drv.pname == "hello" assert drv.version == "2.12.3" assert drv.system == "x86_64-linux" assert drv.out == out_path assert drv.outputs == [out_path, doc_path] assert drv.urls == "https://example.test/hello.tar.gz" assert drv.purl == "pkg:nix/hello@2.12.3" def test_canonicalize_spdx_license_id_canonicalizes_aliases(): expected_canonical_ids = { "GPL-2.0+": "GPL-2.0-or-later", "GPL-3.0": "GPL-3.0-only", "GPL-3.0+": "GPL-3.0-or-later", "LGPL-2.1": "LGPL-2.1-only", "LGPL-2.1+": "LGPL-2.1-or-later", } for license_id, canonical_id in expected_canonical_ids.items(): assert common_spdx.canonicalize_spdx_license_id(license_id) == canonical_id assert ( common_spdx.canonicalize_spdx_license_id("LicenseRef-scancode-free-unknown") == "LicenseRef-scancode-free-unknown" ) assert common_spdx.canonicalize_spdx_license_id("MIT AND Apache-2.0") is None assert common_spdx.canonicalize_spdx_license_id("not-a-license") is None def test_cdx_and_spdx_license_exporters_use_canonical_spdx_ids(): drv_type = namedtuple( "Drv", [ "name", "pname", "version", "purl", "cpe", "meta_description", "meta_license_spdxid", "meta_license_short", "patches", "outputs", "store_path", "out", "urls", "meta_homepage", "meta_position", ], ) drv = drv_type( name="hello-2.12.3", pname="hello", version="2.12.3", purl="pkg:nix/hello@2.12.3", cpe="", meta_description="Hello", meta_license_spdxid=( "GPL-3.0;GPL-3.0+;LGPL-2.1;LGPL-2.1+;LicenseRef-scancode-free-unknown" ), meta_license_short="GPL2+", patches="", outputs=["/nix/store/out"], store_path="/nix/store/0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-hello-2.12.3.drv", out="/nix/store/out", urls="", meta_homepage="", meta_position="", ) component = sbomnix_cdx._drv_to_cdx_component(drv) package = sbomnix_exporters._drv_to_spdx_package(drv) assert component["licenses"] == [ {"license": {"id": "GPL-3.0-only"}}, {"license": {"id": "GPL-3.0-or-later"}}, {"license": {"id": "LGPL-2.1-only"}}, {"license": {"id": "LGPL-2.1-or-later"}}, {"license": {"id": "LicenseRef-scancode-free-unknown"}}, ] assert package["licenseInfoFromFiles"] == [ "GPL-3.0-only", "GPL-3.0-or-later", "LGPL-2.1-only", "LGPL-2.1-or-later", "LicenseRef-scancode-free-unknown", ] def test_cdx_falls_back_to_license_short_name_when_spdx_id_is_invalid(): drv_type = namedtuple( "Drv", [ "name", "pname", "version", "purl", "cpe", "meta_description", "meta_license_spdxid", "meta_license_short", "patches", "outputs", "store_path", "out", "urls", "meta_homepage", "meta_position", ], ) drv = drv_type( name="hello-2.12.3", pname="hello", version="2.12.3", purl="pkg:nix/hello@2.12.3", cpe="", meta_description="Hello", meta_license_spdxid="not-a-license", meta_license_short="Custom Short Name", patches="", outputs=["/nix/store/out"], store_path="/nix/store/0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-hello-2.12.3.drv", out="/nix/store/out", urls="", meta_homepage="", meta_position="", ) component = sbomnix_cdx._drv_to_cdx_component(drv) package = sbomnix_exporters._drv_to_spdx_package(drv) assert component["licenses"] == [{"license": {"name": "Custom Short Name"}}] assert "licenseInfoFromFiles" not in package assert package["licenseConcluded"] == "NOASSERTION" ================================================ FILE: tests/test_flakeref_resolution.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Focused tests for flakeref resolution helpers.""" import string from types import SimpleNamespace import pytest from hypothesis import given from hypothesis import strategies as st from common.errors import FlakeRefRealisationError, FlakeRefResolutionError from common.flakeref import ( parse_nixos_configuration_ref, quote_nix_attr_segment, try_resolve_flakeref, ) from common.log import LOG_VERBOSE class CapturingLogger: def __init__(self): self.records = [] def info(self, msg, *args): self.records.append(("info", msg, args)) def log(self, level, msg, *args): self.records.append(("log", level, msg, args)) def debug(self, msg, *args): self.records.append(("debug", msg, args)) SAFE_PATH_CHARS = string.ascii_letters + string.digits + "._-" PATH_SEGMENTS = st.text(SAFE_PATH_CHARS, min_size=1, max_size=16).filter( lambda segment: segment not in {".", ".."} ) PLAIN_MISSING_PATHS = st.lists(PATH_SEGMENTS, min_size=1, max_size=3).map( lambda parts: "hypothesis-missing/" + "/".join(parts) ) FLAKE_ATTRS = st.text(SAFE_PATH_CHARS, min_size=1, max_size=24) FLAKE_REFS = st.one_of( FLAKE_ATTRS.map(lambda attr: f".#{attr}"), FLAKE_ATTRS.map(lambda attr: f"nixpkgs?ref=nixos-unstable#{attr}"), st.builds( lambda owner, repo, attr: f"github:{owner}/{repo}#{attr}", PATH_SEGMENTS, PATH_SEGMENTS, FLAKE_ATTRS, ), ) @pytest.mark.parametrize( ("name", "quoted"), [ ("host${foo}.é", r'"host\${foo}.é"'), ('quote"slash\\tab\tnewline\n', r'"quote\"slash\\tab\tnewline\n"'), ], ) def test_nixos_configuration_attr_segments_use_nix_string_escaping(name, quoted): assert quote_nix_attr_segment(name) == quoted assert parse_nixos_configuration_ref(f"/flake#nixosConfigurations.{quoted}") == ( "/flake", name, ) def test_nixos_configuration_parser_rejects_unescaped_interpolation(): assert parse_nixos_configuration_ref('/flake#nixosConfigurations."${foo}"') is None def test_try_resolve_flakeref_uses_argv_lists(): calls = [] def fake_exec_cmd(cmd, **kwargs): calls.append((cmd, kwargs)) return SimpleNamespace(stdout="/nix/store/resolved\n", stderr="", returncode=0) resolved = try_resolve_flakeref( "/tmp/my flake#pkg", force_realise=True, impure=True, exec_cmd_fn=fake_exec_cmd, ) assert resolved == "/nix/store/resolved" assert calls == [ ( [ "nix", "build", "--no-link", "--print-out-paths", "/tmp/my flake#pkg", "--extra-experimental-features", "flakes", "--extra-experimental-features", "nix-command", "--impure", ], {"raise_on_error": False, "return_error": True, "log_error": False}, ), ] def test_try_resolve_flakeref_can_return_derivation_path(): calls = [] def fake_exec_cmd(cmd, **kwargs): calls.append((cmd, kwargs)) return SimpleNamespace( stdout=( '{"derivations": {' '"11111111111111111111111111111111-package-1.0.drv":' '{"name": "package-1.0"}}, "version": 4}' ), stderr="", returncode=0, ) resolved = try_resolve_flakeref( "nixpkgs#package", derivation=True, impure=True, exec_cmd_fn=fake_exec_cmd, ) assert resolved == "/nix/store/11111111111111111111111111111111-package-1.0.drv" assert calls == [ ( [ "nix", "derivation", "show", "nixpkgs#package", "--extra-experimental-features", "flakes", "--extra-experimental-features", "nix-command", "--impure", ], {"raise_on_error": False, "return_error": True, "log_error": False}, ) ] def test_try_resolve_flakeref_logs_flake_progress_at_info(): logger = CapturingLogger() def fake_exec_cmd(_cmd, **_kwargs): return SimpleNamespace( stdout="/nix/store/resolved\n", stderr="", returncode=0, ) resolved = try_resolve_flakeref( ".#hello", force_realise=True, exec_cmd_fn=fake_exec_cmd, log=logger, ) assert resolved == "/nix/store/resolved" assert ( "info", "Realising flakeref '%s'", (".#hello",), ) in logger.records def test_try_resolve_flakeref_keeps_plain_path_probe_verbose(): logger = CapturingLogger() def fake_exec_cmd(_cmd, **_kwargs): return SimpleNamespace(stdout="", stderr="dummy eval failure", returncode=1) resolved = try_resolve_flakeref( "/nix/store/not-a-flake-output", exec_cmd_fn=fake_exec_cmd, log=logger, ) assert resolved is None assert ( "log", LOG_VERBOSE, "Evaluating '%s'", ("/nix/store/not-a-flake-output",), ) in (logger.records) assert not [record for record in logger.records if record[0] == "info"] def test_try_resolve_flakeref_raises_on_failed_force_realise(): def fake_exec_cmd(_cmd, **_kwargs): return SimpleNamespace(stdout="", stderr="build failed", returncode=1) with pytest.raises(FlakeRefRealisationError, match="build failed"): try_resolve_flakeref( "/tmp/my flake#pkg", force_realise=True, exec_cmd_fn=fake_exec_cmd, ) def test_try_resolve_flakeref_raises_when_force_realise_prints_no_path(): def fake_exec_cmd(_cmd, **_kwargs): return SimpleNamespace(stdout="\n", stderr="", returncode=0) with pytest.raises(FlakeRefRealisationError, match="returned no output path"): try_resolve_flakeref( "/tmp/my flake#pkg", force_realise=True, exec_cmd_fn=fake_exec_cmd, ) def test_try_resolve_flakeref_raises_on_failed_eval_for_flakeref(): def fake_exec_cmd(_cmd, **_kwargs): return SimpleNamespace(stdout="", stderr="attribute missing", returncode=1) with pytest.raises(FlakeRefResolutionError, match="attribute missing"): try_resolve_flakeref(".#missing", exec_cmd_fn=fake_exec_cmd) def test_try_resolve_flakeref_returns_none_for_non_flake_path(): def fake_exec_cmd(_cmd, **_kwargs): return SimpleNamespace( stdout="", stderr="does not contain a 'flake.nix'", returncode=1, ) resolved = try_resolve_flakeref( "/nix/store/not-a-flake-output", exec_cmd_fn=fake_exec_cmd, ) assert resolved is None @given(PLAIN_MISSING_PATHS) def test_try_resolve_flakeref_returns_none_for_generated_plain_paths(path): def fake_exec_cmd(_cmd, **_kwargs): return SimpleNamespace(stdout="", stderr="dummy eval failure", returncode=1) resolved = try_resolve_flakeref(path, exec_cmd_fn=fake_exec_cmd) assert resolved is None @pytest.mark.parametrize("path", ["missing", "./missing", "foo/bar"]) def test_try_resolve_flakeref_returns_none_for_missing_relative_paths(path): def fake_exec_cmd(_cmd, **_kwargs): return SimpleNamespace(stdout="", stderr="dummy eval failure", returncode=1) resolved = try_resolve_flakeref(path, exec_cmd_fn=fake_exec_cmd) assert resolved is None def test_try_resolve_flakeref_returns_none_for_existing_fragment_path_when_eval_fails( tmp_path, ): existing_path = tmp_path / "contains#hash" existing_path.mkdir() calls = [] def fake_exec_cmd(cmd, **kwargs): calls.append((cmd, kwargs)) return SimpleNamespace( stdout="", stderr="does not contain a 'flake.nix'", returncode=1, ) resolved = try_resolve_flakeref(existing_path.as_posix(), exec_cmd_fn=fake_exec_cmd) assert resolved is None assert calls @given(FLAKE_REFS) def test_try_resolve_flakeref_raises_for_generated_flakeref_failures(flakeref): calls = [] def fake_exec_cmd(cmd, **kwargs): calls.append((cmd, kwargs)) return SimpleNamespace(stdout="", stderr="attribute missing", returncode=1) with pytest.raises(FlakeRefResolutionError, match="attribute missing"): try_resolve_flakeref(flakeref, exec_cmd_fn=fake_exec_cmd) assert flakeref in calls[0][0] assert calls[0][1] == { "raise_on_error": False, "return_error": True, "log_error": False, } @given(FLAKE_REFS) def test_try_resolve_flakeref_strips_generated_eval_output(flakeref): resolved_path = "/nix/store/00000000000000000000000000000000-package" def fake_exec_cmd(cmd, **_kwargs): assert flakeref in cmd return SimpleNamespace(stdout=f"{resolved_path}\n", stderr="", returncode=0) resolved = try_resolve_flakeref(flakeref, exec_cmd_fn=fake_exec_cmd) assert resolved == resolved_path def test_flakeref_realisation_error_accepts_none_stderr(): error = FlakeRefRealisationError(".#pkg", stderr=None) assert error.stderr == "" assert str(error) == "Failed force-realising flakeref '.#pkg'" def test_flake_ref_resolution_error_preserves_stderr_verbatim(): error = FlakeRefResolutionError(".#pkg", stderr="stderr details\n") assert error.stderr == "stderr details\n" assert str(error) == "Failed evaluating flakeref '.#pkg': stderr details" ================================================ FILE: tests/test_library_exceptions.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Unit tests for typed library exceptions.""" import subprocess from types import SimpleNamespace import pandas as pd import pytest from common import df as common_df from common.errors import ( CommandNotFoundError, CsvLoadError, InvalidCpeDictionaryError, InvalidNixArtifactError, InvalidNixJsonError, MissingNixDeriverError, WhitelistApplicationError, ) from common.proc import exit_unless_command_exists, exit_unless_nix_artifact from repology.reporting import report_cves from sbomnix import cpe from sbomnix.derivers import require_deriver from vulnxscan import whitelist def test_df_from_csv_file_raises_csv_load_error(monkeypatch): def fail_read_csv(*_args, **_kwargs): raise pd.errors.ParserError("bad csv") monkeypatch.setattr(common_df.pd, "read_csv", fail_read_csv) with pytest.raises(CsvLoadError, match="Error reading csv file 'broken.csv'"): common_df.df_from_csv_file("broken.csv") def test_df_log_ignores_none(): common_df.df_log(None, 0) def test_exit_unless_command_exists_raises_typed_error(): with pytest.raises(CommandNotFoundError, match="command 'nix' is not in PATH"): exit_unless_command_exists("nix", which_fn=lambda _name: None) def test_exit_unless_nix_artifact_raises_typed_error(): def fail_exec_cmd(*_args, **_kwargs): raise subprocess.CalledProcessError(1, ["nix", "path-info", "missing"]) with pytest.raises( InvalidNixArtifactError, match="Specified target is not a nix artifact: 'missing'", ): exit_unless_nix_artifact("missing", exec_cmd_fn=fail_exec_cmd) def test_exit_unless_nix_artifact_uses_modern_nix_commands(): calls = [] def fake_exec_cmd(cmd, **_kwargs): calls.append(cmd) return SimpleNamespace(stdout="/nix/store/target\n") exit_unless_nix_artifact( "/nix/store/target", force_realise=True, exec_cmd_fn=fake_exec_cmd, ) assert calls == [ [ "nix", "build", "--no-link", "/nix/store/target", "--extra-experimental-features", "flakes", "--extra-experimental-features", "nix-command", ], [ "nix", "path-info", "/nix/store/target", "--extra-experimental-features", "flakes", "--extra-experimental-features", "nix-command", ], ] def test_find_deriver_raises_typed_error(): with pytest.raises(MissingNixDeriverError, match="No deriver found for: 'missing'"): require_deriver("missing", find_deriver_fn=lambda _path: None) def test_require_deriver_wraps_lookup_runtime_errors(): def fail_find_deriver(_path): raise RuntimeError("deriver metadata exists but is not loadable") with pytest.raises( MissingNixDeriverError, match="No deriver found for: 'missing'", ): require_deriver("missing", find_deriver_fn=fail_find_deriver) def test_require_deriver_preserves_typed_lookup_errors(): def fail_find_deriver(_path): raise InvalidNixJsonError("nix derivation show", "bad schema") with pytest.raises(InvalidNixJsonError, match="bad schema"): require_deriver("missing", find_deriver_fn=fail_find_deriver) def test_cpe_raises_typed_error_when_required_columns_are_missing(monkeypatch): class FakeCache: def get(self, _url): return pd.DataFrame({"product": ["openssl"]}) def set(self, *_args, **_kwargs): raise AssertionError("cache set should not be called for populated data") monkeypatch.setattr(cpe, "LockedDfCache", FakeCache) with pytest.raises(InvalidCpeDictionaryError, match="cpedict"): cpe.CPE() def test_df_apply_whitelist_raises_typed_error_without_vuln_id_column(): df_whitelist = pd.DataFrame({"vuln_id": ["CVE-.*"], "comment": ["reason"]}) df_vulns = pd.DataFrame({"package": ["openssl"]}) with pytest.raises( WhitelistApplicationError, match="Missing 'vuln_id' column from df_vulns", ): whitelist.df_apply_whitelist(df_whitelist, df_vulns) def test_repology_cve_report_returns_false_on_empty_results(): assert report_cves(None) is False assert report_cves(pd.DataFrame()) is False ================================================ FILE: tests/test_nix_cli_argv.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Whitespace-safe argv construction tests for nix-facing helpers.""" import json from types import SimpleNamespace import pytest from common.proc import exec_cmd, nix_cmd from nixmeta import flake_metadata from nixupdate import nix_outdated from sbomnix import derivers as sbomnix_derivers from sbomnix.meta import Meta def test_exec_cmd_rejects_string_commands(): with pytest.raises( TypeError, match="cmd must be an argv sequence, not a string-like value", ): exec_cmd("echo hello") def test_find_deriver_uses_argv_list(monkeypatch): calls = [] drv_basename = "my target.drv" def fake_exec_cmd(cmd, **kwargs): calls.append((cmd, kwargs)) if cmd[:3] == ["nix", "derivation", "show"]: return SimpleNamespace( stdout=json.dumps( { "derivations": { drv_basename: {"name": "target"}, }, "version": 4, } ), returncode=0, stderr="", ) raise AssertionError(f"unexpected command: {cmd} kwargs={kwargs}") monkeypatch.setattr(sbomnix_derivers, "exec_cmd", fake_exec_cmd) monkeypatch.setattr("os.path.exists", lambda path: path.endswith(".drv")) drv_path = sbomnix_derivers.find_deriver("/nix/store/my target") assert drv_path == "my target.drv" assert calls == [ ( [ "nix", "derivation", "show", "/nix/store/my target", "--extra-experimental-features", "flakes", "--extra-experimental-features", "nix-command", ], {"raise_on_error": False, "log_error": False}, ), ] def test_find_deriver_supports_nix_2_33_wrapped_json(monkeypatch): target_path = "/custom/store/0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-root" drv_basename = "1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-root.drv" def fake_exec_cmd(cmd, **kwargs): if cmd[:3] == ["nix", "derivation", "show"]: return SimpleNamespace( stdout=json.dumps( { "derivations": {drv_basename: {"name": "root"}}, "version": 4, } ), returncode=0, stderr="", ) raise AssertionError(f"unexpected command: {cmd} kwargs={kwargs}") monkeypatch.setattr(sbomnix_derivers, "exec_cmd", fake_exec_cmd) monkeypatch.setattr("os.path.exists", lambda path: path.endswith(".drv")) drv_path = sbomnix_derivers.find_deriver(target_path) assert drv_path == f"/custom/store/{drv_basename}" def test_find_deriver_rejects_unloadable_structured_deriver(monkeypatch): calls = [] target_path = "/nix/store/target" drv_path = "/nix/store/missing-target.drv" def fake_exec_cmd(cmd, **kwargs): calls.append((cmd, kwargs)) if cmd[:3] == ["nix", "derivation", "show"]: return SimpleNamespace( stdout=json.dumps( { "derivations": {drv_path: {"name": "target"}}, "version": 4, } ), returncode=0, stderr="", ) raise AssertionError(f"unexpected command: {cmd} kwargs={kwargs}") monkeypatch.setattr(sbomnix_derivers, "exec_cmd", fake_exec_cmd) monkeypatch.setattr("os.path.exists", lambda _path: False) with pytest.raises(RuntimeError, match="missing-target.drv"): sbomnix_derivers.find_deriver(target_path) assert calls == [ ( [ "nix", "derivation", "show", target_path, "--extra-experimental-features", "flakes", "--extra-experimental-features", "nix-command", ], {"raise_on_error": False, "log_error": False}, ) ] def test_get_flake_metadata_uses_argv_list(): calls = [] def fake_exec_cmd(cmd, **kwargs): calls.append((cmd, kwargs)) return SimpleNamespace(stdout='{"path": "/nix/store/nixpkgs"}', returncode=0) meta = flake_metadata.get_flake_metadata( "/tmp/my flake", exec_cmd_fn=fake_exec_cmd, nix_cmd_fn=nix_cmd, ) assert meta == {"path": "/nix/store/nixpkgs"} assert calls == [ ( [ "nix", "flake", "metadata", "/tmp/my flake", "--json", "--extra-experimental-features", "flakes", "--extra-experimental-features", "nix-command", ], {"raise_on_error": False, "return_error": True, "log_error": False}, ) ] def test_get_flake_metadata_strips_nixpkgs_prefix_without_splitting_spaces(): calls = [] def fake_exec_cmd(cmd, **kwargs): calls.append((cmd, kwargs)) return SimpleNamespace(stdout='{"path": "/nix/store/nixpkgs"}', returncode=0) flake_metadata.get_flake_metadata( "nixpkgs=/tmp/my flake", exec_cmd_fn=fake_exec_cmd, nix_cmd_fn=nix_cmd, ) assert calls[0][0][3] == "/tmp/my flake" def test_run_nix_visualize_uses_argv_list(tmp_path, monkeypatch): calls = [] output_path = tmp_path / "graph output.csv" class FakeTempFile: """Minimal context manager compatible with NamedTemporaryFile.""" def __init__(self, path): self.name = path.as_posix() def __enter__(self): return self def __exit__(self, exc_type, exc, traceback): return False def fake_exec_cmd(cmd, **kwargs): calls.append((cmd, kwargs)) return SimpleNamespace(stdout="", returncode=0) monkeypatch.setattr( nix_outdated, "NamedTemporaryFile", lambda **_kwargs: FakeTempFile(output_path), ) monkeypatch.setattr(nix_outdated, "exec_cmd", fake_exec_cmd) returned_path = nix_outdated._run_nix_visualize("/nix/store/my target") assert returned_path == output_path assert calls == [ ( [ "nix-visualize", f"--output={output_path.as_posix()}", "/nix/store/my target", ], {}, ) ] def test_meta_reads_nix_path_entry_with_spaces(monkeypatch): scanned = [] monkeypatch.setenv("NIX_PATH", "foo=/tmp/other:nixpkgs=/tmp/my flake") monkeypatch.setattr(Meta, "_scan", lambda self, path: scanned.append(path) or path) resolved = Meta().get_nixpkgs_meta() assert resolved == "/tmp/my flake" assert scanned == ["/tmp/my flake"] ================================================ FILE: tests/test_nix_outdated_pipeline.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Offline tests for nix_outdated pipeline and reporting behavior.""" from contextlib import contextmanager from types import SimpleNamespace import pandas as pd from common.df import df_from_csv_file from common.log import LOG, LOG_LEVELS, set_log_verbosity from nixupdate import pipeline from nixupdate.report import generate_report_df, write_report class FakeSbomArtifact: def __init__(self, cdx_path): self.cdx_path = cdx_path self.cleaned = False def cleanup(self): self.cleaned = True def _repology_df(): return pd.DataFrame( [ { "package": "hello", "version": "2.11", "version_sbom": "2.10", "newest_upstream_release": "2.12", "status": "outdated", "repo_version_classify": "repo_pkg_needs_update", "sbom_version_classify": "sbom_pkg_needs_update", } ] ) @contextmanager def _log_verbosity(verbosity): previous_level = LOG.level set_log_verbosity(verbosity) try: yield finally: if previous_level in LOG_LEVELS: set_log_verbosity(LOG_LEVELS.index(previous_level)) else: LOG.setLevel(previous_level) def test_collect_outdated_scan_data_runtime_uses_hooks_and_cleans_outputs(tmp_path): artifact = FakeSbomArtifact(tmp_path / "deps.cdx.json") graph_csv = tmp_path / "graph.csv" calls = [] def generate_temp_sbom(target_path, buildtime, prefix, cdx_suffix): calls.append(("generate_temp_sbom", target_path, buildtime, prefix, cdx_suffix)) return artifact def query_repology(sbom_path): calls.append(("query_repology", sbom_path)) return _repology_df() def run_nix_visualize(target_path): calls.append(("run_nix_visualize", target_path)) graph_csv.write_text("package,version,level\nhello,2.10,1\n", encoding="utf-8") return graph_csv def parse_nix_visualize(csv_path): calls.append(("parse_nix_visualize", csv_path, csv_path.exists())) return pd.DataFrame( [ { "package": "hello", "version": "2.10", "level": "1", } ] ) with _log_verbosity(0): data = pipeline.collect_outdated_scan_data( "/nix/store/root", buildtime=False, hooks=pipeline.OutdatedScanHooks( query_repology=query_repology, generate_temp_sbom=generate_temp_sbom, run_nix_visualize=run_nix_visualize, parse_nix_visualize=parse_nix_visualize, ), ) assert calls == [ ("generate_temp_sbom", "/nix/store/root", False, "nixdeps_", ".cdx.json"), ("query_repology", artifact.cdx_path), ("run_nix_visualize", "/nix/store/root"), ("parse_nix_visualize", graph_csv, True), ] assert artifact.cleaned assert not graph_csv.exists() assert data.repology.to_dict(orient="records") == _repology_df().to_dict( orient="records" ) assert data.nix_visualize is not None assert data.nix_visualize.to_dict(orient="records") == [ { "package": "hello", "version": "2.10", "level": "1", } ] def test_collect_outdated_scan_data_buildtime_skips_nix_visualize(tmp_path): artifact = FakeSbomArtifact(tmp_path / "deps.cdx.json") with _log_verbosity(0): data = pipeline.collect_outdated_scan_data( "/nix/store/root.drv", buildtime=True, hooks=pipeline.OutdatedScanHooks( query_repology=lambda _sbom_path: _repology_df(), generate_temp_sbom=lambda *_args, **_kwargs: artifact, run_nix_visualize=lambda _target_path: (_ for _ in ()).throw( AssertionError("nix-visualize should not run for buildtime scans") ), parse_nix_visualize=lambda _csv_path: (_ for _ in ()).throw( AssertionError("nix-visualize output should not be parsed") ), ), ) assert artifact.cleaned assert data.nix_visualize is None assert data.repology.to_dict(orient="records") == _repology_df().to_dict( orient="records" ) def test_collect_outdated_scan_data_buildtime_debug_keeps_nix_visualize_optional( tmp_path, ): artifact = FakeSbomArtifact(tmp_path / "deps.cdx.json") with _log_verbosity(2): data = pipeline.collect_outdated_scan_data( "/nix/store/root.drv", buildtime=True, hooks=pipeline.OutdatedScanHooks( query_repology=lambda _sbom_path: _repology_df(), generate_temp_sbom=lambda *_args, **_kwargs: artifact, run_nix_visualize=lambda _target_path: (_ for _ in ()).throw( AssertionError("nix-visualize should not run for buildtime scans") ), parse_nix_visualize=lambda _csv_path: (_ for _ in ()).throw( AssertionError("nix-visualize output should not be parsed") ), ), ) assert not artifact.cleaned assert data.nix_visualize is None assert data.repology.to_dict(orient="records") == _repology_df().to_dict( orient="records" ) def test_generate_report_df_buildtime_adds_default_priority_and_renames_version(): df_report = generate_report_df(None, _repology_df()) assert list(df_report["level"]) == ["0"] assert list(df_report["version_repology"]) == ["2.11"] assert "version" not in df_report.columns def test_write_report_defaults_to_nixpkgs_updates_and_drops_newest_duplicates(tmp_path): out_path = tmp_path / "nix_outdated.csv" df = pd.DataFrame( [ { "level": "1", "package": "openssl", "version": "3.0", "version_sbom": "3.0", "version_repology": "3.1", "newest_upstream_release": "3.2", "status": "outdated", "repo_version_classify": "repo_pkg_needs_update", "sbom_version_classify": "", }, { "level": "2", "package": "hello", "version": "2.10", "version_sbom": "2.10", "version_repology": "2.11", "newest_upstream_release": "2.12", "status": "outdated", "repo_version_classify": "repo_pkg_needs_update", "sbom_version_classify": "", }, { "level": "3", "package": "hello", "version": "2.12", "version_sbom": "2.12", "version_repology": "2.12", "newest_upstream_release": "2.12", "status": "newest", "repo_version_classify": "", "sbom_version_classify": "", }, { "level": "4", "package": "local-only", "version": "1.0", "version_sbom": "1.0", "version_repology": "1.1", "newest_upstream_release": "1.1", "status": "outdated", "repo_version_classify": "", "sbom_version_classify": "sbom_pkg_needs_update", }, ] ) write_report( df, SimpleNamespace(local=False, buildtime=False, out=out_path.as_posix()), ) report = df_from_csv_file(out_path) assert report.to_dict(orient="records") == [ { "priority": "1", "nix_package": "openssl", "version_local": "3.0", "version_nixpkgs": "3.1", "version_upstream": "3.2", } ] def test_write_report_local_buildtime_outputs_local_updates_without_priority(tmp_path): out_path = tmp_path / "nix_outdated_local.csv" df = pd.DataFrame( [ { "level": "0", "package": "local-only", "version": "1.0", "version_sbom": "1.0", "version_repology": "1.1", "newest_upstream_release": "1.2", "status": "outdated", "repo_version_classify": "", "sbom_version_classify": "sbom_pkg_needs_update", }, { "level": "0", "package": "repo-only", "version": "2.0", "version_sbom": "2.0", "version_repology": "2.1", "newest_upstream_release": "2.2", "status": "outdated", "repo_version_classify": "repo_pkg_needs_update", "sbom_version_classify": "", }, ] ) write_report( df, SimpleNamespace(local=True, buildtime=True, out=out_path.as_posix()), ) report = df_from_csv_file(out_path) assert list(report.columns) == [ "nix_package", "version_local", "version_nixpkgs", "version_upstream", ] assert report.to_dict(orient="records") == [ { "nix_package": "local-only", "version_local": "1.0", "version_nixpkgs": "1.1", "version_upstream": "1.2", } ] ================================================ FILE: tests/test_nix_target_resolution.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Focused tests for shared nix target resolution helpers.""" import subprocess from types import SimpleNamespace import pytest from common.errors import ( FlakeRefRealisationError, FlakeRefResolutionError, InvalidNixArtifactError, MissingNixOutPathError, ) from sbomnix import cli_utils as sbomnix_cli_utils def test_resolve_nix_target_preserves_flakeref_on_success(monkeypatch): monkeypatch.setattr( sbomnix_cli_utils, "try_resolve_flakeref", lambda *_args, **_kwargs: "/nix/store/resolved", ) resolved = sbomnix_cli_utils.resolve_nix_target(".#hello", buildtime=False) assert resolved == sbomnix_cli_utils.ResolvedNixTarget( path="/nix/store/resolved", flakeref=".#hello", original_ref=".#hello", ) def test_resolve_nix_target_requests_derivation_for_buildtime_flakeref(monkeypatch): calls = [] def fake_resolve(flakeref, **kwargs): calls.append((flakeref, kwargs)) return "/nix/store/resolved.drv" monkeypatch.setattr( sbomnix_cli_utils, "try_resolve_flakeref", fake_resolve, ) resolved = sbomnix_cli_utils.resolve_nix_target(".#hello", buildtime=True) assert resolved.path == "/nix/store/resolved.drv" assert calls == [ ( ".#hello", { "force_realise": False, "impure": False, "derivation": True, }, ) ] def test_resolve_nix_target_normalizes_plain_nixos_configuration(monkeypatch): calls = [] def fake_resolve(flakeref, **_kwargs): calls.append(flakeref) return "/nix/store/resolved" monkeypatch.setattr( sbomnix_cli_utils, "try_resolve_flakeref", fake_resolve, ) resolved = sbomnix_cli_utils.resolve_nix_target( "/flake#nixosConfigurations.host", buildtime=False, ) assert calls == ['/flake#nixosConfigurations."host".config.system.build.toplevel'] assert resolved == sbomnix_cli_utils.ResolvedNixTarget( path="/nix/store/resolved", flakeref='/flake#nixosConfigurations."host".config.system.build.toplevel', original_ref="/flake#nixosConfigurations.host", ) def test_resolve_nix_target_normalizes_quoted_nixos_configuration(monkeypatch): calls = [] def fake_resolve(flakeref, **_kwargs): calls.append(flakeref) return "/nix/store/resolved" monkeypatch.setattr( sbomnix_cli_utils, "try_resolve_flakeref", fake_resolve, ) resolved = sbomnix_cli_utils.resolve_nix_target( '/flake#nixosConfigurations."host.example.com"', buildtime=False, ) assert calls == [ '/flake#nixosConfigurations."host.example.com".config.system.build.toplevel' ] assert resolved == sbomnix_cli_utils.ResolvedNixTarget( path="/nix/store/resolved", flakeref=( '/flake#nixosConfigurations."host.example.com".config.system.build.toplevel' ), original_ref='/flake#nixosConfigurations."host.example.com"', ) @pytest.mark.parametrize( "nixref", [ "/flake#nixosConfigurations.", '/flake#nixosConfigurations."unterminated', '/flake#nixosConfigurations."trailing\\', ], ) def test_resolve_nix_target_leaves_malformed_nixos_configuration_refs( nixref, monkeypatch, ): calls = [] def fake_resolve(flakeref, **_kwargs): calls.append(flakeref) return "/nix/store/resolved" monkeypatch.setattr( sbomnix_cli_utils, "try_resolve_flakeref", fake_resolve, ) resolved = sbomnix_cli_utils.resolve_nix_target(nixref, buildtime=False) assert calls == [nixref] assert resolved == sbomnix_cli_utils.ResolvedNixTarget( path="/nix/store/resolved", flakeref=nixref, original_ref=nixref, ) def test_resolve_nix_target_propagates_flakeref_realisation_failure_without_path_probe( monkeypatch, ): artifact_checks = [] def raise_realisation_error(*_args, **_kwargs): raise FlakeRefRealisationError(".#broken", "build failed") monkeypatch.setattr( sbomnix_cli_utils, "try_resolve_flakeref", raise_realisation_error, ) monkeypatch.setattr( sbomnix_cli_utils, "exit_unless_nix_artifact", lambda path, force_realise=False: artifact_checks.append((path, force_realise)), ) with pytest.raises(FlakeRefRealisationError) as excinfo: sbomnix_cli_utils.resolve_nix_target(".#broken", buildtime=False) assert ( str(excinfo.value) == "Failed force-realising flakeref '.#broken': build failed" ) assert not artifact_checks def test_resolve_nix_target_propagates_flakeref_eval_failure_without_path_probe( monkeypatch, ): artifact_checks = [] def raise_resolution_error(*_args, **_kwargs): raise FlakeRefResolutionError(".#broken", "attribute missing") monkeypatch.setattr( sbomnix_cli_utils, "try_resolve_flakeref", raise_resolution_error, ) monkeypatch.setattr( sbomnix_cli_utils, "exit_unless_nix_artifact", lambda path, force_realise=False: artifact_checks.append((path, force_realise)), ) with pytest.raises(FlakeRefResolutionError) as excinfo: sbomnix_cli_utils.resolve_nix_target(".#broken", buildtime=False) assert ( str(excinfo.value) == "Failed evaluating flakeref '.#broken': attribute missing" ) assert not artifact_checks def test_resolve_nix_target_uses_plain_path_validation(monkeypatch): artifact_checks = [] monkeypatch.setattr( sbomnix_cli_utils, "try_resolve_flakeref", lambda *_args, **_kwargs: None, ) monkeypatch.setattr( sbomnix_cli_utils, "exit_unless_nix_artifact", lambda path, force_realise=False: artifact_checks.append((path, force_realise)), ) resolved = sbomnix_cli_utils.resolve_nix_target("/nix/store/not-a-flake") assert resolved == sbomnix_cli_utils.ResolvedNixTarget( path="/nix/store/not-a-flake", flakeref=None, original_ref="/nix/store/not-a-flake", ) assert artifact_checks == [("/nix/store/not-a-flake", True)] def test_resolve_nix_target_realises_runtime_drv_target(monkeypatch): calls = [] monkeypatch.setattr( sbomnix_cli_utils, "try_resolve_flakeref", lambda *_args, **_kwargs: None, ) def fake_exec_cmd(cmd, **kwargs): calls.append((cmd, kwargs)) return SimpleNamespace(stdout="/nix/store/resolved-output\n") monkeypatch.setattr(sbomnix_cli_utils, "exec_cmd", fake_exec_cmd) resolved = sbomnix_cli_utils.resolve_nix_target( "/nix/store/target.drv", buildtime=False, ) assert resolved == sbomnix_cli_utils.ResolvedNixTarget( path="/nix/store/resolved-output", flakeref=None, original_ref="/nix/store/target.drv", ) assert calls == [ ( [ "nix", "build", "--no-link", "--print-out-paths", "/nix/store/target.drv^*", "--extra-experimental-features", "flakes", "--extra-experimental-features", "nix-command", ], {}, ) ] def test_resolve_nix_target_uses_first_runtime_drv_output(monkeypatch): monkeypatch.setattr( sbomnix_cli_utils, "try_resolve_flakeref", lambda *_args, **_kwargs: None, ) monkeypatch.setattr( sbomnix_cli_utils, "exec_cmd", lambda *_args, **_kwargs: SimpleNamespace( stdout="\n/nix/store/first-output\n/nix/store/second-output\n" ), ) resolved = sbomnix_cli_utils.resolve_nix_target( "/nix/store/target.drv", buildtime=False, ) assert resolved.path == "/nix/store/first-output" def test_resolve_nix_target_rejects_empty_runtime_drv_output(monkeypatch): monkeypatch.setattr( sbomnix_cli_utils, "try_resolve_flakeref", lambda *_args, **_kwargs: None, ) monkeypatch.setattr( sbomnix_cli_utils, "exec_cmd", lambda *_args, **_kwargs: SimpleNamespace(stdout="\n"), ) with pytest.raises(MissingNixOutPathError): sbomnix_cli_utils.resolve_nix_target( "/nix/store/target.drv", buildtime=False, ) def test_resolve_nix_target_rejects_failed_runtime_drv_realisation(monkeypatch): monkeypatch.setattr( sbomnix_cli_utils, "try_resolve_flakeref", lambda *_args, **_kwargs: None, ) def fake_exec_cmd(*_args, **_kwargs): raise subprocess.CalledProcessError(1, ["nix", "build"]) monkeypatch.setattr(sbomnix_cli_utils, "exec_cmd", fake_exec_cmd) with pytest.raises(InvalidNixArtifactError): sbomnix_cli_utils.resolve_nix_target( "/nix/store/target.drv", buildtime=False, ) ================================================ FILE: tests/test_nix_utils_parsing.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Focused tests for nix derivation JSON normalization helpers.""" import json import pytest from common.errors import InvalidNixJsonError from common.nix_utils import get_nix_store_dir, parse_nix_derivation_show def test_parse_nix_derivation_show_normalizes_nix_2_33_store_paths(): parsed = parse_nix_derivation_show( json.dumps( { "version": 4, "derivations": { "0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-root.drv": { "name": "root", "outputs": { "out": { "path": "1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-root", } }, "inputs": { "drvs": { "2ccccccccccccccccccccccccccccccc-dep.drv": ["out"], }, "srcs": [ "3ddddddddddddddddddddddddddddddd-source", ], }, } }, } ), store_path_hint="/custom/store/0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-root.drv", ) assert parsed == { "/custom/store/0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-root.drv": { "name": "root", "outputs": { "out": { "path": "/custom/store/1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-root", } }, "inputs": { "drvs": { "/custom/store/2ccccccccccccccccccccccccccccccc-dep.drv": ["out"], }, "srcs": [ "/custom/store/3ddddddddddddddddddddddddddddddd-source", ], }, } } def test_get_nix_store_dir_ignores_colon_separated_env_paths(): assert ( get_nix_store_dir( "/custom/store/1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-bin:" "/custom/store/2ccccccccccccccccccccccccccccccc-sbin" ) == "/custom/store" ) def test_parse_nix_derivation_show_infers_store_dir_from_path_like_env_values(): drv_basename = "0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-root.drv" out_basename = "1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-root" parsed = parse_nix_derivation_show( json.dumps( { "version": 4, "derivations": { drv_basename: { "name": "root", "outputs": {"out": {"method": "nar"}}, "env": { "out": out_basename, "PATH": ( "/custom/store/3ddddddddddddddddddddddddddddddd-coreutils/bin:" "/custom/store/4eeeeeeeeeeeeeeeeeeeeeeeeeeeeeee-git/bin:" "/custom/store/5fffffffffffffffffffffffffffffff-graphviz/bin" ), }, } }, } ) ) drv_path = f"/custom/store/{drv_basename}" assert list(parsed) == [drv_path] assert parsed[drv_path]["env"]["out"] == f"/custom/store/{out_basename}" def test_parse_nix_derivation_show_rejects_changed_wrapper_shape(): with pytest.raises(InvalidNixJsonError, match="expected `derivations` object"): parse_nix_derivation_show(json.dumps({"version": 4, "derivations": []})) def test_parse_nix_derivation_show_rejects_changed_output_shape(): with pytest.raises(InvalidNixJsonError, match="expected `outputs`"): parse_nix_derivation_show( json.dumps( { "version": 4, "derivations": { "/nix/store/0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-root.drv": { "outputs": [], } }, } ) ) def test_parse_nix_derivation_show_rejects_invalid_json(): with pytest.raises(InvalidNixJsonError, match="invalid JSON"): parse_nix_derivation_show("not-json") ================================================ FILE: tests/test_nixgraph_graph.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Focused tests for nixgraph loading and traversal.""" from types import SimpleNamespace import pandas as pd from nixgraph import graph as nixgraph_graph from nixgraph import render as nixgraph_render from nixgraph.render import NixDependencyGraph from sbomnix.closure import dependency_rows_to_dataframe from sbomnix.runtime import RuntimeClosure class CapturingLogger: def __init__(self): self.records = [] def debug(self, msg, *args): self.records.append(("debug", msg, args)) def info(self, msg, *args): self.records.append(("info", msg, args)) def warning(self, msg, *args): self.records.append(("warning", msg, args)) def log(self, level, msg, *args): self.records.append(("log", level, msg, args)) def test_dependency_graph_returns_dataframe_for_csv_output(): """Return the traversed graph rows directly when CSV mode is requested.""" df_dependencies = pd.DataFrame.from_records( [ { "src_path": "/nix/store/bash", "src_pname": "bash", "target_path": "/nix/store/hello", "target_pname": "hello", }, { "src_path": "/nix/store/glibc", "src_pname": "glibc", "target_path": "/nix/store/bash", "target_pname": "bash", }, ] ) args = SimpleNamespace( out="graph.csv", depth=3, inverse=None, until=None, colorize=None, pathnames=False, return_df=True, ) df_out = NixDependencyGraph(df_dependencies).draw("/nix/store/hello", args) df_out = df_out.sort_values(["graph_depth", "src_path"]).reset_index(drop=True) assert list(df_out["graph_depth"]) == [1, 2] assert list(df_out["target_path"]) == ["/nix/store/hello", "/nix/store/bash"] assert list(df_out["src_path"]) == ["/nix/store/bash", "/nix/store/glibc"] def test_dependency_graph_inverse_returns_dataframe_for_csv_output(): """Return inverse traversal rows through the shared dependency walker.""" df_dependencies = pd.DataFrame.from_records( [ { "src_path": "/nix/store/bash", "src_pname": "bash", "target_path": "/nix/store/hello", "target_pname": "hello", }, { "src_path": "/nix/store/glibc", "src_pname": "glibc", "target_path": "/nix/store/bash", "target_pname": "bash", }, ] ) args = SimpleNamespace( out="graph.csv", depth=3, inverse="glibc", until=None, colorize=None, pathnames=False, return_df=True, ) df_out = NixDependencyGraph(df_dependencies).draw("/nix/store/hello", args) df_out = df_out.sort_values(["graph_depth", "target_path"]).reset_index(drop=True) assert list(df_out["graph_depth"]) == [1, 2] assert list(df_out["target_path"]) == ["/nix/store/bash", "/nix/store/hello"] assert list(df_out["src_path"]) == ["/nix/store/glibc", "/nix/store/bash"] def test_dependency_graph_writes_raw_dot_without_graphviz_render(tmp_path): class FakeDigraph: def __init__(self): self.saved = [] self.rendered = [] def save(self, filename): self.saved.append(filename) def render(self, **kwargs): self.rendered.append(kwargs) fake = FakeDigraph() graph = NixDependencyGraph(pd.DataFrame()) graph.digraph = fake dot_path = tmp_path / "graph.dot" graph._render(dot_path.as_posix()) assert fake.saved == [dot_path.as_posix()] assert fake.rendered == [] def test_dependency_graph_deduplicates_rendered_nodes(): node_calls = [] class FakeDigraph: def node(self, *args, **kwargs): node_calls.append((args, kwargs)) graph = NixDependencyGraph(pd.DataFrame()) graph.digraph = FakeDigraph() graph.nodes_drawn = set() graph._add_node("/nix/store/bash", "bash") graph._add_node("/nix/store/bash", "bash") assert len(node_calls) == 1 def test_dependency_graph_warns_before_large_graphviz_render(monkeypatch): logger = CapturingLogger() monkeypatch.setattr(nixgraph_render, "LOG", logger) monkeypatch.setattr(nixgraph_render, "GRAPHVIZ_RENDER_WARN_EDGES", 1) monkeypatch.setattr(NixDependencyGraph, "_render", lambda self, filename: None) df_dependencies = pd.DataFrame.from_records( [ { "src_path": "/nix/store/bash", "src_pname": "bash", "target_path": "/nix/store/hello", "target_pname": "hello", }, ] ) args = SimpleNamespace( out="graph.png", depth=1, inverse=None, until=None, colorize=None, pathnames=False, ) NixDependencyGraph(df_dependencies).draw("/nix/store/hello", args) assert ( "warning", "Rendering %s dependency edges with Graphviz may be slow; " "use --out graph.csv or --out graph.dot for faster output.", (1,), ) in logger.records def test_load_dependencies_logs_dependency_loading_at_info(monkeypatch): logger = CapturingLogger() monkeypatch.setattr(nixgraph_graph, "LOG", logger) monkeypatch.setattr( nixgraph_graph, "load_runtime_closure", lambda *_args, **_kwargs: RuntimeClosure( df_deps=dependency_rows_to_dataframe([]), output_paths_by_drv={}, ), ) nixgraph_graph.load_dependencies("/nix/store/target") assert ( "info", "Loading %s dependencies referenced by '%s'", ("runtime", "/nix/store/target"), ) in logger.records def test_load_dependencies_buildtime_uses_derivation_json(monkeypatch): drv_infos = { "/nix/store/11111111111111111111111111111111-target.drv": { "inputs": { "drvs": { "/nix/store/22222222222222222222222222222222-dep.drv": ["out"], }, "srcs": [ "/nix/store/33333333333333333333333333333333-source", ], } } } monkeypatch.setattr( nixgraph_graph, "require_deriver", lambda path: path, ) monkeypatch.setattr( nixgraph_graph, "load_recursive", lambda path: ({path: object()}, drv_infos), ) deps = nixgraph_graph.load_dependencies( "/nix/store/target.drv", buildtime=True, ) assert deps.start_path == "/nix/store/target.drv" assert deps.df.to_dict("records") == [ { "src_path": "/nix/store/22222222222222222222222222222222-dep.drv", "src_pname": "dep.drv", "target_path": "/nix/store/11111111111111111111111111111111-target.drv", "target_pname": "target.drv", }, { "src_path": "/nix/store/33333333333333333333333333333333-source", "src_pname": "source", "target_path": "/nix/store/11111111111111111111111111111111-target.drv", "target_pname": "target.drv", }, ] def test_load_dependencies_runtime_uses_resolved_output_path(monkeypatch): monkeypatch.setattr( nixgraph_graph, "load_runtime_closure", lambda *_args, **_kwargs: RuntimeClosure( df_deps=dependency_rows_to_dataframe( [ { "src_path": "/nix/store/dep", "src_pname": "dep", "target_path": "/nix/store/target", "target_pname": "target", } ] ), output_paths_by_drv={}, ), ) deps = nixgraph_graph.load_dependencies("/nix/store/target") assert deps.start_path == "/nix/store/target" assert deps.df.to_dict("records") == [ { "src_path": "/nix/store/dep", "src_pname": "dep", "target_path": "/nix/store/target", "target_pname": "target", } ] def test_nixgraph_no_longer_exposes_removed_graph_helpers(): assert not hasattr(nixgraph_graph, "NixDependencies") assert not hasattr(nixgraph_graph, "parse_nix_query_out") assert not hasattr(nixgraph_graph, "runtime_query_output") assert not hasattr(nixgraph_graph, "buildtime_query_output") assert not hasattr(nixgraph_graph, "find_output_path") ================================================ FILE: tests/test_nixmeta_parsing.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Focused tests for nixmeta parsing helpers.""" import json from nixmeta import metadata_json def test_parse_json_metadata_flattens_nested_fields(tmp_path): json_path = tmp_path / "meta.json" json_path.write_text( json.dumps( { "hello": { "name": "hello-2.12.1", "pname": "hello", "version": "2.12.1", "meta": { "homepage": ["https://example.invalid/hello"], "unfree": False, "description": "GNU hello", "position": "pkgs/tools/misc/hello/default.nix:1", "license": [ {"shortName": "GPLv3+", "spdxId": "GPL-3.0-or-later"} ], "maintainers": [ {"email": "maintainer@example.invalid"}, ], }, } } ), encoding="utf-8", ) df = metadata_json.parse_json_metadata(json_path) assert df.to_dict(orient="records") == [ { "name": "hello-2.12.1", "pname": "hello", "version": "2.12.1", "meta_homepage": "https://example.invalid/hello", "meta_unfree": "False", "meta_description": "GNU hello", "meta_position": "pkgs/tools/misc/hello/default.nix:1", "meta_license_short": "GPLv3+", "meta_license_spdxid": "GPL-3.0-or-later", "meta_maintainers_email": "maintainer@example.invalid", } ] ================================================ FILE: tests/test_nixmeta_progress.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Focused tests for nixmeta progress logging.""" import json import subprocess from types import SimpleNamespace from nixmeta import flake_metadata from nixmeta import main as nixmeta_main from nixmeta import scanner as nixmeta_scanner class CapturingLogger: def __init__(self): self.records = [] def debug(self, msg, *args): self.records.append(("debug", msg, args)) def info(self, msg, *args): self.records.append(("info", msg, args)) def warning(self, msg, *args): self.records.append(("warning", msg, args)) def fatal(self, msg, *args): self.records.append(("fatal", msg, args)) def log(self, level, msg, *args): self.records.append(("log", level, msg, args)) def test_nixmeta_main_logs_scan_start(monkeypatch): args = SimpleNamespace( flakeref="github:NixOS/nixpkgs?ref=nixos-unstable", out="nixmeta.csv", append=False, ) logger = CapturingLogger() events = [] class FakeScanner: def scan(self, flakeref): events.append(("scan", flakeref)) def to_csv(self, out, append): events.append(("to_csv", out, append)) monkeypatch.setattr(nixmeta_main, "LOG", logger) monkeypatch.setattr( nixmeta_main, "exit_unless_command_exists", lambda command: events.append(("command", command)), ) monkeypatch.setattr(nixmeta_main, "NixMetaScanner", FakeScanner) nixmeta_main._run(args) assert ( "info", "Scanning nixpkgs metadata for '%s'", ("github:NixOS/nixpkgs?ref=nixos-unstable",), ) in logger.records assert events == [ ("command", "nix"), ("command", "nix-env"), ("scan", "github:NixOS/nixpkgs?ref=nixos-unstable"), ("to_csv", "nixmeta.csv", False), ] def test_get_flake_metadata_logs_metadata_read(): logger = CapturingLogger() def fake_exec_cmd(_cmd, **_kwargs): return SimpleNamespace(stdout='{"path": "/nix/store/nixpkgs"}', returncode=0) meta = flake_metadata.get_flake_metadata( "nixpkgs=/tmp/my flake", exec_cmd_fn=fake_exec_cmd, log=logger, ) assert meta == {"path": "/nix/store/nixpkgs"} assert ( "info", "Reading flake metadata for '%s'", ("/tmp/my flake",), ) in logger.records def test_get_nixpkgs_flakeref_uses_root_nixpkgs_input_with_renamed_node(): meta_json = { "locks": { "root": "root", "nodes": { "root": {"inputs": {"nixpkgs": "nixpkgs_3"}}, "nixpkgs": { "locked": { "type": "github", "owner": "NixOS", "repo": "nixpkgs", "rev": "wrong", } }, "nixpkgs_3": { "locked": { "type": "github", "owner": "NixOS", "repo": "nixpkgs", "rev": "right", } }, }, } } assert ( flake_metadata.get_nixpkgs_flakeref(meta_json) == "github:NixOS/nixpkgs?rev=right" ) def test_nixmeta_scanner_logs_nix_env_progress(tmp_path, monkeypatch): nixpkgs_path = tmp_path / "nixpkgs" nixpkgs_path.mkdir() logger = CapturingLogger() commands = [] def fake_run_nix_env_metadata(_cmd, stdout): commands.append(_cmd) stdout.write( json.dumps( { "hello": { "name": "hello-2.12.1", "pname": "hello", "version": "2.12.1", "meta": { "homepage": "https://example.invalid/hello", "unfree": False, "description": "GNU hello", "position": "pkgs/tools/misc/hello/default.nix:1", "license": { "shortName": "GPLv3+", "spdxId": "GPL-3.0-or-later", }, "maintainers": { "email": "maintainer@example.invalid", }, }, } } ).encode("utf-8") ) stdout.flush() monkeypatch.setattr(nixmeta_scanner, "LOG", logger) monkeypatch.setattr( nixmeta_scanner, "nixref_to_nixpkgs_path", lambda *_args, **_kwargs: nixpkgs_path, ) monkeypatch.setattr( nixmeta_scanner, "_run_nix_env_metadata", fake_run_nix_env_metadata, ) scanner = nixmeta_scanner.NixMetaScanner() scanner.scan("github:NixOS/nixpkgs?ref=nixos-unstable") assert ( "info", "Reading nixpkgs metadata from '%s'", (nixpkgs_path.as_posix(),), ) in logger.records assert ("info", "Parsing nixpkgs metadata", ()) in logger.records assert commands assert scanner.to_df() is not None def test_run_nix_env_metadata_captures_successful_stderr(monkeypatch, tmp_path): calls = [] logger = CapturingLogger() def fake_run(cmd, **kwargs): calls.append((cmd, kwargs)) return SimpleNamespace(stderr="warning: noisy eval\n") monkeypatch.setattr(nixmeta_scanner.subprocess, "run", fake_run) monkeypatch.setattr(nixmeta_scanner, "LOG", logger) out_path = tmp_path / "meta.json" with out_path.open("w", encoding="utf-8") as out: nixmeta_scanner._run_nix_env_metadata(["nix-env"], stdout=out) assert calls assert calls[0][1]["stderr"] is subprocess.PIPE assert calls[0][1]["stdout"].name == out_path.as_posix() assert ( "debug", "nix-env metadata stderr:\n%s", ("warning: noisy eval",), ) in logger.records def test_nixmeta_scanner_tolerates_empty_metadata_json(tmp_path, monkeypatch): nixpkgs_path = tmp_path / "nixpkgs" nixpkgs_path.mkdir() def fake_run_nix_env_metadata(_cmd, stdout): stdout.write(b"{}") stdout.flush() monkeypatch.setattr( nixmeta_scanner, "_run_nix_env_metadata", fake_run_nix_env_metadata, ) scanner = nixmeta_scanner.NixMetaScanner() scanner.scan_path(nixpkgs_path) assert scanner.to_df() is not None assert scanner.to_df().empty def test_nixmeta_expression_scan_enables_flakes(monkeypatch): commands = [] def fake_run_nix_env_metadata(cmd, stdout): commands.append(cmd) stdout.write(b"{}") stdout.flush() monkeypatch.setattr( nixmeta_scanner, "_run_nix_env_metadata", fake_run_nix_env_metadata, ) scanner = nixmeta_scanner.NixMetaScanner() scanner.scan_expression('builtins.getFlake "github:NixOS/nixpkgs"') assert commands assert [ "--option", "experimental-features", "nix-command flakes", ] in [commands[0][idx : idx + 3] for idx in range(len(commands[0]) - 2)] def test_nixmeta_expression_scan_honors_impure(monkeypatch): commands = [] def fake_run_nix_env_metadata(cmd, stdout): commands.append(cmd) stdout.write(b"{}") stdout.flush() monkeypatch.setattr( nixmeta_scanner, "_run_nix_env_metadata", fake_run_nix_env_metadata, ) scanner = nixmeta_scanner.NixMetaScanner() scanner.scan_expression( 'builtins.getFlake "path:/tmp/local-flake"', impure=True, ) assert commands assert "--impure" in commands[0] ================================================ FILE: tests/test_nixmeta_source.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Focused tests for nixpkgs metadata source selection.""" import json import pathlib from types import SimpleNamespace import pytest from common.errors import SbomnixError from sbomnix import meta as sbomnix_meta from sbomnix import meta_source as sbomnix_meta_source def test_classify_meta_nixpkgs_reserved_modes_before_explicit_source(): assert ( sbomnix_meta.classify_meta_nixpkgs(sbomnix_meta.META_NIXPKGS_NIX_PATH) == sbomnix_meta.META_NIXPKGS_NIX_PATH ) assert sbomnix_meta.classify_meta_nixpkgs("/nix/store/source") == "explicit" def test_get_nixpkgs_meta_with_source_records_flakeref_lock(monkeypatch, tmp_path): nixpkgs_path = tmp_path / "nixpkgs" (nixpkgs_path / "lib").mkdir(parents=True) (nixpkgs_path / "lib" / ".version").write_text("25.11\n", encoding="utf-8") scanned = [] monkeypatch.setattr( sbomnix_meta_source, "nixref_to_nixpkgs_path", lambda _nixref: nixpkgs_path, ) monkeypatch.setattr( sbomnix_meta.Meta, "_scan", lambda self, path: scanned.append(path) or "df", ) df_meta, source = sbomnix_meta.Meta().get_nixpkgs_meta_with_source( target_path="/nix/store/target", flakeref=".#target", original_ref=".#target", ) assert df_meta == "df" assert scanned == [nixpkgs_path.as_posix()] assert source == sbomnix_meta.NixpkgsMetaSource( method="flakeref-lock", path=nixpkgs_path.as_posix(), flakeref=".#target", version="25.11", ) def test_get_nixpkgs_meta_with_source_records_opt_in_nix_path(monkeypatch): scanned = [] monkeypatch.setenv("NIX_PATH", "foo=/tmp/other:nixpkgs=/tmp/my flake") monkeypatch.setattr( sbomnix_meta.Meta, "_scan", lambda self, path: scanned.append(path) or "df", ) df_meta, source = sbomnix_meta.Meta().get_nixpkgs_meta_with_source( target_path="/nix/store/target", flakeref=None, original_ref="/nix/store/target", explicit_nixpkgs=sbomnix_meta.META_NIXPKGS_NIX_PATH, ) assert df_meta == "df" assert scanned == ["/tmp/my flake"] assert source == sbomnix_meta.NixpkgsMetaSource( method="nix-path", path="/tmp/my flake", message="NIX_PATH metadata source may not match the target", ) def test_explicit_nix_path_source_requires_nixpkgs_entry(monkeypatch): def fail_if_scanned(self, path): raise AssertionError(f"nix-path scan should not run: {path}") monkeypatch.setenv("NIX_PATH", "foo=/tmp/other") monkeypatch.setattr(sbomnix_meta.Meta, "_scan", fail_if_scanned) with pytest.raises(SbomnixError, match="NIX_PATH.*nixpkgs="): sbomnix_meta.Meta().get_nixpkgs_meta_with_source( target_path="/nix/store/target", flakeref=None, original_ref="/nix/store/target", explicit_nixpkgs=sbomnix_meta.META_NIXPKGS_NIX_PATH, ) def test_path_target_without_source_skips_nix_path_metadata(monkeypatch): def fail_if_scanned(self, path): raise AssertionError(f"path-target scan should be skipped: {path}") monkeypatch.setenv("NIX_PATH", "nixpkgs=/tmp/nixpkgs") monkeypatch.setattr(sbomnix_meta.Meta, "_scan", fail_if_scanned) df_meta, source = sbomnix_meta.Meta().get_nixpkgs_meta_with_source( target_path="/nix/store/resolved-target", flakeref=None, original_ref="./result", ) assert df_meta is None assert source.method == "none" assert source.path is None assert "store-path target" in source.message assert "./result" not in source.message assert "--meta-nixpkgs" in source.message def test_explicit_store_path_source_records_explicit_method(monkeypatch, tmp_path): nixpkgs_path = tmp_path / "nixpkgs" (nixpkgs_path / "lib").mkdir(parents=True) (nixpkgs_path / "lib" / ".version").write_text("25.11\n", encoding="utf-8") scanned = [] monkeypatch.setattr( sbomnix_meta.Meta, "_scan", lambda self, path: scanned.append(path) or "df", ) monkeypatch.setattr( sbomnix_meta_source, "is_nix_store_path", lambda path: path.as_posix() == nixpkgs_path.as_posix(), ) df_meta, source = sbomnix_meta.Meta().get_nixpkgs_meta_with_source( target_path="/nix/store/target", explicit_nixpkgs=nixpkgs_path.as_posix(), ) assert df_meta == "df" assert scanned == [nixpkgs_path.as_posix()] assert source == sbomnix_meta.NixpkgsMetaSource( method="explicit", path=nixpkgs_path.as_posix(), version="25.11", ) def test_explicit_flakeref_source_resolves_nixpkgs_path(monkeypatch): nixpkgs_path = "/nix/store/abc-source" scanned = [] monkeypatch.setattr( sbomnix_meta_source, "nixref_to_nixpkgs_path", lambda _nixref: pathlib.Path(nixpkgs_path), ) monkeypatch.setattr( sbomnix_meta.Meta, "_scan", lambda self, path: scanned.append(path) or "df", ) df_meta, source = sbomnix_meta.Meta().get_nixpkgs_meta_with_source( target_path="/nix/store/target", explicit_nixpkgs="github:NixOS/nixpkgs?rev=abc", ) assert df_meta == "df" assert scanned == [nixpkgs_path] assert source == sbomnix_meta.NixpkgsMetaSource( method="explicit", path=nixpkgs_path, flakeref="github:NixOS/nixpkgs?rev=abc", ) def test_mutable_explicit_path_is_normalized_before_scanning(monkeypatch, tmp_path): mutable_path = tmp_path / "nixpkgs-checkout" mutable_path.mkdir() store_path = pathlib.Path("/nix/store/normalized-source") scanned = [] monkeypatch.setattr( sbomnix_meta_source, "nixref_to_nixpkgs_path", lambda nixref: store_path if nixref == mutable_path.as_posix() else None, ) monkeypatch.setattr( sbomnix_meta.Meta, "_scan", lambda self, path: scanned.append(path) or "df", ) df_meta, source = sbomnix_meta.Meta().get_nixpkgs_meta_with_source( target_path="/nix/store/target", explicit_nixpkgs=mutable_path.as_posix(), ) assert df_meta == "df" assert scanned == [store_path.as_posix()] assert source == sbomnix_meta.NixpkgsMetaSource( method="explicit", path=store_path.as_posix(), flakeref=mutable_path.as_posix(), ) def test_mutable_explicit_path_is_rejected_if_not_cache_safe(monkeypatch, tmp_path): mutable_path = tmp_path / "nixpkgs-checkout" mutable_path.mkdir() monkeypatch.setattr( sbomnix_meta_source, "nixref_to_nixpkgs_path", lambda _nixref: None ) with pytest.raises(SbomnixError, match="immutable /nix/store source"): sbomnix_meta.Meta().get_nixpkgs_meta_with_source( target_path="/nix/store/target", explicit_nixpkgs=mutable_path.as_posix(), ) def test_nixos_toplevel_flakeref_prefers_configuration_pkgs_path( monkeypatch, tmp_path, ): nixpkgs_path = tmp_path / "nixpkgs" nixpkgs_path.mkdir() calls = [] expressions = [] fake_df = SimpleNamespace(empty=False) def fake_exec_cmd(cmd, **_kwargs): calls.append(cmd) if cmd == [ "nix", "eval", "--raw", '/flake#nixosConfigurations."host".pkgs.path', ]: return SimpleNamespace(stdout=f"{nixpkgs_path}\n", returncode=0) raise AssertionError(f"unexpected command: {cmd}") monkeypatch.setattr( sbomnix_meta_source, "nix_cmd", lambda *args, impure=False: ["nix", *args] + (["--impure"] if impure else []), ) monkeypatch.setattr(sbomnix_meta_source, "exec_cmd", fake_exec_cmd) monkeypatch.setattr( sbomnix_meta_source, "nixref_to_nixpkgs_path", lambda _nixref: (_ for _ in ()).throw( AssertionError("lock-node lookup should not run") ), ) monkeypatch.setattr( sbomnix_meta.Meta, "_scan_expression", lambda self, expression, *, cache_key=None, impure=False: ( expressions.append((expression, cache_key, impure)) or fake_df ), ) df_meta, source = sbomnix_meta.Meta().get_nixpkgs_meta_with_source( target_path="/nix/store/target", flakeref="/flake#nixosConfigurations.host.config.system.build.toplevel", original_ref="/flake#nixosConfigurations.host.config.system.build.toplevel", ) assert df_meta is fake_df assert calls == [ ["nix", "eval", "--raw", '/flake#nixosConfigurations."host".pkgs.path'] ] assert source.method == "flakeref-target" assert source.path == nixpkgs_path.as_posix() assert source.flakeref == '/flake#nixosConfigurations."host".pkgs.path' assert source.message == "Scanning evaluated NixOS package set from flakeref" assert expressions == [ ( 'let\n flake = builtins.getFlake "/flake";\nin\n' ' flake.nixosConfigurations."host".pkgs\n', None, False, ) ] def test_nixos_toplevel_expression_locks_relative_flake_refs( monkeypatch, tmp_path, ): nixpkgs_path = tmp_path / "nixpkgs" nixpkgs_path.mkdir() source_path = "/nix/store/root-source" calls = [] expressions = [] fake_df = SimpleNamespace(empty=False) def fake_exec_cmd(cmd, **_kwargs): calls.append(cmd) if cmd == [ "nix", "eval", "--raw", '.#nixosConfigurations."host".pkgs.path', ]: return SimpleNamespace(stdout=f"{nixpkgs_path}\n", returncode=0) if cmd == ["nix", "flake", "metadata", ".", "--json"]: return SimpleNamespace( stdout=json.dumps( { "path": source_path, "locked": {"narHash": "sha256-abc"}, } ), returncode=0, ) raise AssertionError(f"unexpected command: {cmd}") monkeypatch.setattr( sbomnix_meta_source, "nix_cmd", lambda *args, impure=False: ["nix", *args] + (["--impure"] if impure else []), ) monkeypatch.setattr(sbomnix_meta_source, "exec_cmd", fake_exec_cmd) monkeypatch.setattr( sbomnix_meta.Meta, "_scan_expression", lambda self, expression, *, cache_key=None, impure=False: ( expressions.append((expression, cache_key, impure)) or fake_df ), ) df_meta, source = sbomnix_meta.Meta().get_nixpkgs_meta_with_source( target_path="/nix/store/target", flakeref=".#nixosConfigurations.host.config.system.build.toplevel", original_ref=".#nixosConfigurations.host.config.system.build.toplevel", ) locked_ref = f"path:{source_path}?narHash=sha256-abc" assert df_meta is fake_df assert source.method == "flakeref-target" assert source.flakeref == '.#nixosConfigurations."host".pkgs.path' assert calls == [ ["nix", "eval", "--raw", '.#nixosConfigurations."host".pkgs.path'], ["nix", "flake", "metadata", ".", "--json"], ] cache_key = "nixos-pkgs:" + json.dumps( [locked_ref, "host"], separators=(",", ":"), ) assert expressions == [ ( f'let\n flake = builtins.getFlake "{locked_ref}";\nin\n' ' flake.nixosConfigurations."host".pkgs\n', cache_key, False, ) ] def test_nixos_toplevel_expression_preserves_locked_subflake_dir( monkeypatch, tmp_path, ): nixpkgs_path = tmp_path / "nixpkgs" nixpkgs_path.mkdir() source_path = "/nix/store/root-source" calls = [] expressions = [] fake_df = SimpleNamespace(empty=False) def fake_exec_cmd(cmd, **_kwargs): calls.append(cmd) if cmd == [ "nix", "eval", "--raw", 'path:.?dir=sub/flake#nixosConfigurations."host".pkgs.path', ]: return SimpleNamespace(stdout=f"{nixpkgs_path}\n", returncode=0) if cmd == ["nix", "flake", "metadata", "path:.?dir=sub/flake", "--json"]: return SimpleNamespace( stdout=json.dumps( { "path": source_path, "locked": { "narHash": "sha256-abc", "dir": "sub/flake", }, } ), returncode=0, ) raise AssertionError(f"unexpected command: {cmd}") monkeypatch.setattr( sbomnix_meta_source, "nix_cmd", lambda *args, impure=False: ["nix", *args] + (["--impure"] if impure else []), ) monkeypatch.setattr(sbomnix_meta_source, "exec_cmd", fake_exec_cmd) monkeypatch.setattr( sbomnix_meta.Meta, "_scan_expression", lambda self, expression, *, cache_key=None, impure=False: ( expressions.append((expression, cache_key, impure)) or fake_df ), ) df_meta, source = sbomnix_meta.Meta().get_nixpkgs_meta_with_source( target_path="/nix/store/target", flakeref=( "path:.?dir=sub/flake#nixosConfigurations.host.config.system.build.toplevel" ), original_ref=( "path:.?dir=sub/flake#nixosConfigurations.host.config.system.build.toplevel" ), ) locked_ref = f"path:{source_path}?narHash=sha256-abc&dir=sub/flake" assert df_meta is fake_df assert source.method == "flakeref-target" assert calls == [ [ "nix", "eval", "--raw", 'path:.?dir=sub/flake#nixosConfigurations."host".pkgs.path', ], ["nix", "flake", "metadata", "path:.?dir=sub/flake", "--json"], ] cache_key = "nixos-pkgs:" + json.dumps( [locked_ref, "host"], separators=(",", ":"), ) assert expressions == [ ( f'let\n flake = builtins.getFlake "{locked_ref}";\nin\n' ' flake.nixosConfigurations."host".pkgs\n', cache_key, False, ) ] def test_nixos_toplevel_flakeref_handles_quoted_configuration_names( monkeypatch, tmp_path, ): nixpkgs_path = tmp_path / "nixpkgs" nixpkgs_path.mkdir() expressions = [] fake_df = SimpleNamespace(empty=False) def fake_exec_cmd(cmd, **_kwargs): if cmd == [ "nix", "eval", "--raw", '/flake#nixosConfigurations."host.example.com".pkgs.path', ]: return SimpleNamespace(stdout=f"{nixpkgs_path}\n", returncode=0) raise AssertionError(f"unexpected command: {cmd}") monkeypatch.setattr( sbomnix_meta_source, "nix_cmd", lambda *args, impure=False: ["nix", *args] + (["--impure"] if impure else []), ) monkeypatch.setattr(sbomnix_meta_source, "exec_cmd", fake_exec_cmd) monkeypatch.setattr( sbomnix_meta.Meta, "_scan_expression", lambda self, expression, *, cache_key=None, impure=False: ( expressions.append((expression, cache_key, impure)) or fake_df ), ) df_meta, source = sbomnix_meta.Meta().get_nixpkgs_meta_with_source( target_path="/nix/store/target", flakeref=( '/flake#nixosConfigurations."host.example.com".config.system.build.toplevel' ), original_ref=( '/flake#nixosConfigurations."host.example.com".config.system.build.toplevel' ), ) assert df_meta is fake_df assert source.method == "flakeref-target" assert source.flakeref == ( '/flake#nixosConfigurations."host.example.com".pkgs.path' ) assert expressions == [ ( 'let\n flake = builtins.getFlake "/flake";\nin\n' ' flake.nixosConfigurations."host.example.com".pkgs\n', None, False, ) ] def test_nixos_toplevel_flakeref_metadata_eval_honors_impure(monkeypatch, tmp_path): nixpkgs_path = tmp_path / "nixpkgs" nixpkgs_path.mkdir() calls = [] expressions = [] fake_df = SimpleNamespace(empty=False) def fake_exec_cmd(cmd, **_kwargs): calls.append(cmd) return SimpleNamespace(stdout=f"{nixpkgs_path}\n", returncode=0) monkeypatch.setattr( sbomnix_meta_source, "nix_cmd", lambda *args, impure=False: ["nix", *args] + (["--impure"] if impure else []), ) monkeypatch.setattr(sbomnix_meta_source, "exec_cmd", fake_exec_cmd) monkeypatch.setattr( sbomnix_meta.Meta, "_scan_expression", lambda self, expression, *, cache_key=None, impure=False: ( expressions.append((expression, cache_key, impure)) or fake_df ), ) df_meta, source = sbomnix_meta.Meta().get_nixpkgs_meta_with_source( target_path="/nix/store/target", flakeref="/flake#nixosConfigurations.host.config.system.build.toplevel", original_ref="/flake#nixosConfigurations.host.config.system.build.toplevel", impure=True, ) assert df_meta is fake_df assert source.method == "flakeref-target" assert calls == [ [ "nix", "eval", "--raw", '/flake#nixosConfigurations."host".pkgs.path', "--impure", ] ] assert expressions == [ ( 'let\n flake = builtins.getFlake "/flake";\nin\n' ' flake.nixosConfigurations."host".pkgs\n', None, True, ) ] assert source.expression_cache_key is None assert source.expression_impure is True def test_nixos_toplevel_expression_cache_uses_only_stable_refs(monkeypatch, tmp_path): nixpkgs_path = tmp_path / "nixpkgs" nixpkgs_path.mkdir() expressions = [] fake_df = SimpleNamespace(empty=False) def fake_exec_cmd(_cmd, **_kwargs): return SimpleNamespace(stdout=f"{nixpkgs_path}\n", returncode=0) monkeypatch.setattr( sbomnix_meta_source, "nix_cmd", lambda *args, impure=False: ["nix", *args] + (["--impure"] if impure else []), ) monkeypatch.setattr(sbomnix_meta_source, "exec_cmd", fake_exec_cmd) monkeypatch.setattr( sbomnix_meta.Meta, "_scan_expression", lambda self, expression, *, cache_key=None, impure=False: ( expressions.append((expression, cache_key, impure)) or fake_df ), ) df_meta, source = sbomnix_meta.Meta().get_nixpkgs_meta_with_source( target_path="/nix/store/target", flakeref=( "github:example/flake?rev=abc" '#nixosConfigurations."host:8080".config.system.build.toplevel' ), original_ref=( "github:example/flake?rev=abc" '#nixosConfigurations."host:8080".config.system.build.toplevel' ), ) cache_key = "nixos-pkgs:" + json.dumps( ["github:example/flake?rev=abc", "host:8080"], separators=(",", ":"), ) assert df_meta is fake_df assert source.method == "flakeref-target" assert expressions == [ ( 'let\n flake = builtins.getFlake "github:example/flake?rev=abc";\n' "in\n" ' flake.nixosConfigurations."host:8080".pkgs\n', cache_key, False, ) ] def test_nixos_toplevel_expression_scan_failure_skips_metadata( monkeypatch, tmp_path, ): nixpkgs_path = tmp_path / "nixpkgs" nixpkgs_path.mkdir() scanned = [] def fake_exec_cmd(_cmd, **_kwargs): return SimpleNamespace(stdout=f"{nixpkgs_path}\n", returncode=0) monkeypatch.setattr( sbomnix_meta_source, "nix_cmd", lambda *args, impure=False: ["nix", *args] + (["--impure"] if impure else []), ) monkeypatch.setattr(sbomnix_meta_source, "exec_cmd", fake_exec_cmd) monkeypatch.setattr( sbomnix_meta.Meta, "_scan_expression", lambda *_args, **_kwargs: None, ) monkeypatch.setattr( sbomnix_meta.Meta, "_scan", lambda self, path: scanned.append(path), ) df_meta, source = sbomnix_meta.Meta().get_nixpkgs_meta_with_source( target_path="/nix/store/target", flakeref="/flake#nixosConfigurations.host.config.system.build.toplevel", original_ref="/flake#nixosConfigurations.host.config.system.build.toplevel", ) assert df_meta is None assert scanned == [] assert source.method == "flakeref-target" assert source.expression is not None assert source.path == nixpkgs_path.as_posix() assert source.message == ( "Evaluated package-set metadata scan failed. Skipping nixpkgs metadata." ) def test_nixos_toplevel_flakeref_without_pkgs_path_returns_message(monkeypatch): calls = [] def fake_exec_cmd(cmd, **_kwargs): calls.append(cmd) if cmd == [ "nix", "eval", "--raw", '/flake#nixosConfigurations."host".pkgs.path', ]: return SimpleNamespace(stdout="", stderr="missing", returncode=1) raise AssertionError(f"unexpected command: {cmd}") monkeypatch.setattr( sbomnix_meta_source, "nix_cmd", lambda *args, impure=False: ["nix", *args] + (["--impure"] if impure else []), ) monkeypatch.setattr(sbomnix_meta_source, "exec_cmd", fake_exec_cmd) df_meta, source = sbomnix_meta.Meta().get_nixpkgs_meta_with_source( target_path="/nix/store/target", flakeref="/flake#nixosConfigurations.host.config.system.build.toplevel", original_ref="/flake#nixosConfigurations.host.config.system.build.toplevel", ) assert df_meta is None assert calls == [ ["nix", "eval", "--raw", '/flake#nixosConfigurations."host".pkgs.path'], ] assert source.method == "none" assert source.path is None assert "NixOS configuration flakeref" in source.message assert "--meta-nixpkgs" in source.message def test_nixos_toplevel_flakeref_without_pkgs_returns_message( monkeypatch, ): calls = [] def fake_exec_cmd(cmd, **_kwargs): calls.append(cmd) return SimpleNamespace(stdout="", stderr="missing", returncode=1) monkeypatch.setattr( sbomnix_meta_source, "nix_cmd", lambda *args, impure=False: ["nix", *args] + (["--impure"] if impure else []), ) monkeypatch.setattr(sbomnix_meta_source, "exec_cmd", fake_exec_cmd) monkeypatch.setattr( sbomnix_meta_source, "nixref_to_nixpkgs_path", lambda _nixref: None, ) df_meta, source = sbomnix_meta.Meta().get_nixpkgs_meta_with_source( target_path="/nix/store/target", flakeref="/flake#nixosConfigurations.host.config.system.build.toplevel", original_ref="/flake#nixosConfigurations.host.config.system.build.toplevel", ) assert df_meta is None assert calls == [ ["nix", "eval", "--raw", '/flake#nixosConfigurations."host".pkgs.path'], ] assert source.method == "none" assert source.path is None assert "NixOS configuration flakeref" in source.message assert "--meta-nixpkgs" in source.message def test_plain_nixos_configuration_attrset_is_not_target_inferred( monkeypatch, tmp_path, ): nixpkgs_path = tmp_path / "lock-source" nixpkgs_path.mkdir() scanned = [] monkeypatch.setattr( sbomnix_meta.Meta, "_scan", lambda self, path: scanned.append(path) or "df", ) monkeypatch.setattr( sbomnix_meta_source, "nixref_to_nixpkgs_path", lambda _nixref: nixpkgs_path, ) df_meta, source = sbomnix_meta.Meta().get_nixpkgs_meta_with_source( target_path="/nix/store/target", flakeref="/flake#nixosConfigurations.host", original_ref="/flake#nixosConfigurations.host", ) assert df_meta == "df" assert scanned == [nixpkgs_path.as_posix()] assert source.method == "flakeref-lock" def test_meta_scan_uses_already_resolved_scanner_path(monkeypatch): calls = [] fake_df = SimpleNamespace(empty=False) class FakeScanner: """Scanner stand-in that records normalized scan paths.""" def scan(self, path): raise AssertionError(f"scan should not resolve path again: {path}") def scan_path(self, path): calls.append(path) def to_df(self): return fake_df meta = sbomnix_meta.Meta() monkeypatch.setattr(meta.cache, "get", lambda _key: None) monkeypatch.setattr(meta.cache, "set", lambda **_kwargs: None) monkeypatch.setattr(sbomnix_meta, "NixMetaScanner", FakeScanner) assert meta._scan("/nix/store/source") is fake_df assert calls == ["/nix/store/source"] ================================================ FILE: tests/test_nixmeta_source_export.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Tests for SBOM-level nixpkgs metadata source export.""" import uuid import pandas as pd from sbomnix.builder import SbomBuilder from sbomnix.meta import NixpkgsMetaSource def _make_minimal_sbom(): sbomdb = object.__new__(SbomBuilder) sbomdb.uid = "store_path" sbomdb.nix_path = "/nix/store/target" sbomdb.buildtime = False sbomdb.target_deriver = "/nix/store/target.drv" sbomdb.target_component_ref = "/nix/store/target.drv" sbomdb.depth = None sbomdb.uuid = uuid.uuid4() sbomdb.sbom_type = "runtime_only" sbomdb.nixpkgs_meta_source = NixpkgsMetaSource( method="flakeref-target", path="/nix/store/source", rev="1234", flakeref=".#target", version="25.11", message="base nixpkgs source metadata", ) sbomdb.df_sbomdb = pd.DataFrame( [ { "store_path": "/nix/store/target.drv", "pname": "target", "name": "target", "version": "1.0", "outputs": ["/nix/store/target"], "out": "/nix/store/target", "purl": "", "cpe": "", "urls": "", "patches": "", } ] ) return sbomdb def test_cdx_document_records_nixpkgs_metadata_source(monkeypatch): sbomdb = _make_minimal_sbom() monkeypatch.setattr( SbomBuilder, "lookup_dependencies", lambda *_args, **_kwargs: None ) cdx = sbomdb.to_cdx_data() properties = {prop["name"]: prop["value"] for prop in cdx["metadata"]["properties"]} assert properties["nixpkgs:metadata_source_method"] == "flakeref-target" assert properties["nixpkgs:path"] == "/nix/store/source" assert properties["nixpkgs:rev"] == "1234" assert properties["nixpkgs:flakeref"] == ".#target" assert properties["nixpkgs:version"] == "25.11" assert properties["nixpkgs:message"] == "base nixpkgs source metadata" def test_spdx_document_records_nixpkgs_metadata_source(monkeypatch): sbomdb = _make_minimal_sbom() monkeypatch.setattr( SbomBuilder, "lookup_dependencies", lambda *_args, **_kwargs: None ) spdx = sbomdb.to_spdx_data() assert "included dependencies: 'runtime_only'" in spdx["comment"] assert ( "nixpkgs metadata source: metadata_source_method=flakeref-target" in spdx["comment"] ) assert "path=/nix/store/source" in spdx["comment"] assert "rev=1234" in spdx["comment"] assert "message=base nixpkgs source metadata" in spdx["comment"] assert "warning=" not in spdx["comment"] ================================================ FILE: tests/test_osv_client.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Focused tests for the reusable OSV client.""" from vulnxscan.osv_client import OSV class FakeResponse: def __init__(self, payload): self._payload = payload self.status_code = 200 def json(self): return self._payload def raise_for_status(self): return None class FakeSession: def __init__(self): self.calls = [] def post(self, url, json=None, timeout=None): self.calls.append((url, json, timeout)) return FakeResponse( { "results": [ { "vulns": [ { "id": "OSV-1", "modified": "2024-01-01", } ] } ] } ) def test_osv_client_posts_with_timeout_and_parses_results(tmp_path): sbom_path = tmp_path / "sbom.json" sbom_path.write_text( ('{"metadata":{"component":{"name":"hello","version":"1.0"}},"components":[]}'), encoding="utf-8", ) session = FakeSession() osv = OSV(session=session, request_timeout=17) osv.query_vulns(sbom_path.as_posix(), ecosystems=["GIT"]) assert session.calls == [ ( "https://api.osv.dev/v1/querybatch", { "queries": [ { "version": "1.0", "package": { "name": "hello", "ecosystem": "GIT", }, } ] }, 17, ) ] assert osv.to_dataframe().to_dict(orient="records") == [ { "vuln_id": "OSV-1", "modified": "2024-01-01", "package": "hello", "version": "1.0", } ] ================================================ FILE: tests/test_provenance_batching.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Offline provenance tests that do not require CLI execution.""" import errno import json import subprocess from provenance.path_info import query_path_hashes def _path_info_paths(cmd): assert cmd[:5] == ["nix", "path-info", "--json", "--json-format", "1"] args = cmd[5:] if "--extra-experimental-features" in args: args = args[: args.index("--extra-experimental-features")] return args def test_provenance_hash_query_batches_on_e2big(): """Test provenance splits oversized path-info hash queries and preserves order.""" references = [f"/nix/store/hash-{idx}" for idx in range(5)] calls = [] def fake_exec_cmd(cmd, **_kwargs): if cmd[:5] == ["nix", "path-info", "--json", "--json-format", "1"]: batch = _path_info_paths(cmd) calls.append(batch) if len(batch) > 2: raise OSError(errno.E2BIG, "Argument list too long") path_info = { path: {"narHash": f"sha256:hash-{path.rsplit('-', 1)[-1]}"} for path in batch } return subprocess.CompletedProcess( cmd, 0, stdout=json.dumps(path_info), stderr="", ) raise AssertionError(f"unexpected command: {cmd}") hashes = query_path_hashes( references, exec_cmd_fn=fake_exec_cmd, ) assert hashes == [f"sha256:hash-{idx}" for idx in range(5)] assert calls == [ references, references[:2], references[2:], references[2:3], references[3:], ] ================================================ FILE: tests/test_provenance_path_info.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Focused tests for strict provenance path-info handling.""" import json import subprocess from types import SimpleNamespace import pytest from common.errors import InvalidNixJsonError, NixCommandError from common.nix_utils import normalize_nix_path_info from provenance.dependencies import DependencyHooks, dependency_paths from provenance.path_info import nar_hash_for_path, query_path_info def test_normalize_path_info_rejects_malformed_list_records(): with pytest.raises(InvalidNixJsonError, match="missing path string"): normalize_nix_path_info([{"narHash": "sha256-test"}]) def test_normalize_path_info_rejects_malformed_object_records(): with pytest.raises(InvalidNixJsonError, match="expected path-info record"): normalize_nix_path_info({"/nix/store/target": None}) def test_normalize_path_info_supports_list_records(): first = "/nix/store/11111111111111111111111111111111-first" second = "/nix/store/22222222222222222222222222222222-second" assert normalize_nix_path_info( [ {"path": first, "references": []}, {"storePath": second, "references": [first]}, ] ) == { first: {"path": first, "references": []}, second: {"storePath": second, "references": [first]}, } def test_nar_hash_for_path_rejects_missing_hash(): with pytest.raises(InvalidNixJsonError, match="missing `narHash`"): nar_hash_for_path({"/nix/store/target": {}}, "/nix/store/target") def test_nar_hash_for_path_rejects_missing_record(): with pytest.raises(InvalidNixJsonError, match="missing path-info record"): nar_hash_for_path({}, "/nix/store/target") def test_dependency_paths_rejects_mismatched_path_info_record(): requested = "/nix/store/11111111111111111111111111111111-requested.drv" returned = "/nix/store/22222222222222222222222222222222-other.drv" def fake_exec_cmd(cmd, **_kwargs): return SimpleNamespace(stdout=json.dumps({returned: {"references": []}})) with pytest.raises(InvalidNixJsonError, match="missing path-info record"): dependency_paths( requested, hooks=DependencyHooks(exec_cmd_fn=fake_exec_cmd), ) def test_dependency_paths_recursive_includes_derivation_outputs(): root_drv = "/nix/store/11111111111111111111111111111111-root.drv" dep_drv = "/nix/store/22222222222222222222222222222222-dependency.drv" root_out = "/nix/store/33333333333333333333333333333333-root" dep_out = "/nix/store/44444444444444444444444444444444-dependency" def fake_exec_cmd(cmd, **_kwargs): assert "--recursive" in cmd return SimpleNamespace( stdout=json.dumps( { root_drv: {"references": [dep_drv]}, dep_drv: {"references": []}, } ) ) assert dependency_paths( root_drv, recursive=True, outputs_by_path={ root_out: ({}, {}), dep_out: ({}, {}), }, hooks=DependencyHooks(exec_cmd_fn=fake_exec_cmd), ) == [ root_drv, dep_drv, root_out, dep_out, ] def test_query_path_info_wraps_nix_command_failures(): def fail_exec_cmd(cmd, **_kwargs): raise subprocess.CalledProcessError( returncode=1, cmd=cmd, stderr="unsupported path-info json format", ) with pytest.raises(NixCommandError, match="unsupported path-info json format"): query_path_info( ["/nix/store/11111111111111111111111111111111-target-1.0"], exec_cmd_fn=fail_exec_cmd, ) ================================================ FILE: tests/test_provenance_subjects.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Focused tests for provenance digest and subject handling.""" import json import logging import subprocess from types import SimpleNamespace import pytest from common.errors import ( InvalidNixJsonError, MissingNixDerivationMetadataError, NixCommandError, ) from common.log import LOG from common.nix_utils import parse_nix_derivation_show from provenance import main as provenance_main from provenance.dependencies import ( DependencyHooks, dependency_package, get_dependencies, ) from provenance.digests import normalize_digest, output_digest from provenance.subjects import SubjectHooks, get_subjects, output_path def _dependency_hooks(*, exec_cmd_fn, query_path_hashes_fn=None): if query_path_hashes_fn is None: return DependencyHooks( exec_cmd_fn=exec_cmd_fn, parse_nix_derivation_show_fn=parse_nix_derivation_show, normalize_digest_fn=normalize_digest, output_digest_fn=output_digest, output_path_fn=output_path, log=LOG, ) return DependencyHooks( exec_cmd_fn=exec_cmd_fn, query_path_hashes_fn=query_path_hashes_fn, parse_nix_derivation_show_fn=parse_nix_derivation_show, normalize_digest_fn=normalize_digest, output_digest_fn=output_digest, output_path_fn=output_path, log=LOG, ) def _subject_hooks(exec_cmd_fn): return SubjectHooks( exec_cmd_fn=exec_cmd_fn, normalize_digest_fn=normalize_digest, output_digest_fn=output_digest, output_path_fn=output_path, log=LOG, ) def _path_info_paths(cmd): if cmd[:5] != ["nix", "path-info", "--json", "--json-format", "1"]: return None args = cmd[5:] if "--extra-experimental-features" in args: args = args[: args.index("--extra-experimental-features")] return args def test_get_dependencies_supports_nix_2_33_wrapped_json(): drv_path = "/nix/store/0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-root.drv" dep_basename = "1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-dependency.drv" dep_path = f"/nix/store/{dep_basename}" digest = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad" def fake_exec_cmd(cmd, **kwargs): if _path_info_paths(cmd) == [drv_path]: return SimpleNamespace( stdout=json.dumps({drv_path: {"references": [dep_path]}}) ) if cmd[:4] == ["nix", "derivation", "show", "-r"]: return SimpleNamespace( stdout=json.dumps( { "derivations": { dep_basename: { "name": "dependency", "env": {"version": "1.2.3"}, } }, "version": 4, } ) ) raise AssertionError(f"unexpected command: {cmd} kwargs={kwargs}") assert get_dependencies( drv_path, hooks=_dependency_hooks( exec_cmd_fn=fake_exec_cmd, query_path_hashes_fn=lambda _paths, **_kwargs: [ "sha256:1b8m03r63zqhnjf7l5wnldhh7c134ap5vpj0850ymkq1iyzicy5s" ], ), ) == [ { "name": "dependency", "uri": dep_path, "digest": {"sha256": digest}, "annotations": {"version": "1.2.3"}, } ] def test_normalize_digest_does_not_shell_out(): assert normalize_digest( "sha256:1b8m03r63zqhnjf7l5wnldhh7c134ap5vpj0850ymkq1iyzicy5s" ) == {"sha256": "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"} assert normalize_digest("sha256-ungWv48Bz+pBQUDeXa4iI7ADYaOWF3qctBD/YfIAFa0=") == { "sha256": "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad" } assert normalize_digest( "77a94a83ccab42a68278ac5d3e340dcefecd736dd4feff1de71dec137b6b44ce", "r:sha256", ) == {"sha256": "77a94a83ccab42a68278ac5d3e340dcefecd736dd4feff1de71dec137b6b44ce"} def test_normalize_digest_rejects_overflowing_nix32_values(): assert normalize_digest("sha256:" + ("z" * 52)) is None def test_dependency_package_skips_non_normalized_digest(caplog): drv_path = "/nix/store/1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-dependency.drv" with caplog.at_level(logging.WARNING, logger=LOG.name): package = dependency_package( drv_path, "sha999:abc", {}, {}, hooks=DependencyHooks( normalize_digest_fn=normalize_digest, output_digest_fn=output_digest, log=LOG, ), ) assert package is None assert "Cannot determine digest" in caplog.text def test_get_dependencies_prefers_fixed_output_digest_for_output_paths(): drv_path = "/nix/store/0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-root.drv" dep_drv_basename = "1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-source.drv" dep_out_basename = "2ccccccccccccccccccccccccccccccc-source" dep_out_path = f"/nix/store/{dep_out_basename}" metadata_digest = "77a94a83ccab42a68278ac5d3e340dcefecd736dd4feff1de71dec137b6b44ce" def fake_exec_cmd(cmd, **kwargs): if _path_info_paths(cmd) == [drv_path]: return SimpleNamespace( stdout=json.dumps({drv_path: {"references": [dep_out_path]}}) ) if cmd[:4] == ["nix", "derivation", "show", "-r"]: return SimpleNamespace( stdout=json.dumps( { "derivations": { dep_drv_basename: { "name": "source", "outputs": { "out": { "path": dep_out_basename, "hash": metadata_digest, "hashAlgo": "r:sha256", } }, "env": {"version": "1.2.3"}, } }, "version": 4, } ) ) raise AssertionError(f"unexpected command: {cmd} kwargs={kwargs}") assert get_dependencies( drv_path, hooks=_dependency_hooks( exec_cmd_fn=fake_exec_cmd, query_path_hashes_fn=lambda _paths, **_kwargs: [ "sha256:09i0w2qz3i5yp7m3yziq4z2n2r2v9s6d3n8j4x1q8k0m5a6b7c8d" ], ), ) == [ { "name": "source", "uri": dep_out_path, "digest": {"sha256": metadata_digest}, "annotations": {"version": "1.2.3"}, } ] def test_get_dependencies_maps_env_only_output_paths_back_to_derivations(): drv_path = "/nix/store/0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-root.drv" dep_out_basename = "2ccccccccccccccccccccccccccccccc-source" dep_out_path = f"/nix/store/{dep_out_basename}" digest = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad" def fake_exec_cmd(cmd, **kwargs): if _path_info_paths(cmd) == [drv_path]: return SimpleNamespace( stdout=json.dumps({drv_path: {"references": [dep_out_path]}}) ) if cmd[:4] == ["nix", "derivation", "show", "-r"]: return SimpleNamespace( stdout=json.dumps( { "derivations": { "1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-source.drv": { "name": "special-source", "outputs": {"out": {"method": "nar"}}, "env": { "out": dep_out_basename, "version": "1.2.3", }, } }, "version": 4, } ) ) raise AssertionError(f"unexpected command: {cmd} kwargs={kwargs}") assert get_dependencies( drv_path, hooks=_dependency_hooks( exec_cmd_fn=fake_exec_cmd, query_path_hashes_fn=lambda _paths, **_kwargs: [ "sha256:1b8m03r63zqhnjf7l5wnldhh7c134ap5vpj0850ymkq1iyzicy5s" ], ), ) == [ { "name": "special-source", "uri": dep_out_path, "digest": {"sha256": digest}, "annotations": {"version": "1.2.3"}, } ] def test_get_dependencies_wraps_derivation_show_failures(): drv_path = "/nix/store/0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-root.drv" def fake_exec_cmd(cmd, **_kwargs): raise subprocess.CalledProcessError( returncode=1, cmd=cmd, stderr="derivation show failed", ) with pytest.raises(NixCommandError, match="derivation show failed"): get_dependencies( drv_path, hooks=_dependency_hooks(exec_cmd_fn=fake_exec_cmd), ) def test_get_subjects_falls_back_to_env_output_paths(): output_path_value = "/custom/store/1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-nghttp2-1.68.1" output_hash = "1b8m03r63zqhnjf7l5wnldhh7c134ap5vpj0850ymkq1iyzicy5s" digest = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad" def fake_exec_cmd(cmd, **kwargs): if _path_info_paths(cmd) == [output_path_value]: return SimpleNamespace( stdout=json.dumps( {output_path_value: {"narHash": f"sha256:{output_hash}"}} ) ) raise AssertionError(f"unexpected command: {cmd} kwargs={kwargs}") assert get_subjects( {"out": {"method": "nar"}}, env={"out": output_path_value}, hooks=_subject_hooks(fake_exec_cmd), ) == [ { "name": "out", "uri": output_path_value, "digest": {"sha256": digest}, } ] def test_get_subjects_prefers_derivation_hash_for_realized_flat_outputs(): output_path_value = "/custom/store/1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-nghttp2-1.68.1" output_hash = "sha256-ungWv48Bz+pBQUDeXa4iI7ADYaOWF3qctBD/YfIAFa0=" digest = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad" def fail_exec_cmd(cmd, **kwargs): raise AssertionError(f"unexpected command: {cmd} kwargs={kwargs}") assert get_subjects( {"out": {"method": "flat", "hash": output_hash}}, env={"out": output_path_value}, hooks=_subject_hooks(fail_exec_cmd), ) == [ { "name": "out", "uri": output_path_value, "digest": {"sha256": digest}, } ] def test_get_subjects_uses_derivation_hash_when_output_is_not_realized(): output_path_value = "/custom/store/1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-nghttp2-1.68.1" output_hash = "sha256-ungWv48Bz+pBQUDeXa4iI7ADYaOWF3qctBD/YfIAFa0=" digest = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad" def fail_exec_cmd(cmd, **kwargs): raise AssertionError(f"unexpected command: {cmd} kwargs={kwargs}") assert get_subjects( {"out": {"method": "nar", "hash": output_hash}}, env={"out": output_path_value}, hooks=_subject_hooks(fail_exec_cmd), ) == [ { "name": "out", "uri": output_path_value, "digest": {"sha256": digest}, } ] def test_get_subjects_supports_resource_sha256_metadata(): output_path_value = "/custom/store/1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-nghttp2-1.68.1" digest = "77a94a83ccab42a68278ac5d3e340dcefecd736dd4feff1de71dec137b6b44ce" def fail_exec_cmd(cmd, **kwargs): raise AssertionError(f"unexpected command: {cmd} kwargs={kwargs}") assert get_subjects( { "out": { "hash": digest, "hashAlgo": "r:sha256", } }, env={"out": output_path_value}, hooks=_subject_hooks(fail_exec_cmd), ) == [ { "name": "out", "uri": output_path_value, "digest": {"sha256": digest}, } ] def test_get_subjects_skips_unrealized_outputs_without_digest(): output_path_value = "/custom/store/2ccccccccccccccccccccccccccccccc-nghttp2-doc" def fake_exec_cmd(cmd, **_kwargs): assert _path_info_paths(cmd) == [output_path_value] assert not get_subjects( {"out": {"method": "nar"}}, env={"out": output_path_value}, hooks=_subject_hooks(fake_exec_cmd), ) def test_get_subjects_skip_only_missing_unrealized_outputs(): output_path_value = "/custom/store/1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-nghttp2-1.68.1" missing_path = "/custom/store/2ccccccccccccccccccccccccccccccc-nghttp2-doc" output_hash = "1b8m03r63zqhnjf7l5wnldhh7c134ap5vpj0850ymkq1iyzicy5s" digest = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad" def fake_exec_cmd(cmd, **kwargs): if _path_info_paths(cmd) == [output_path_value]: return SimpleNamespace( stdout=json.dumps( {output_path_value: {"narHash": f"sha256:{output_hash}"}} ) ) if _path_info_paths(cmd) == [missing_path]: return None raise AssertionError(f"unexpected command: {cmd} kwargs={kwargs}") assert get_subjects( {"out": {"path": output_path_value}, "doc": {"path": missing_path}}, hooks=_subject_hooks(fake_exec_cmd), ) == [ { "name": "out", "uri": output_path_value, "digest": {"sha256": digest}, } ] def test_provenance_uses_store_path_hint_for_nix_2_33_outputs_without_path(monkeypatch): target = "/custom/store/0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-root.drv" drv_basename = "0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-root.drv" out_basename = "1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-root" output_hash = "sha256-ungWv48Bz+pBQUDeXa4iI7ADYaOWF3qctBD/YfIAFa0=" digest = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad" def fake_exec_cmd(cmd, **kwargs): if cmd[:3] == ["nix", "derivation", "show"]: assert cmd[3] == target return SimpleNamespace( stdout=json.dumps( { "version": 4, "derivations": { drv_basename: { "name": "root", "outputs": { "out": { "method": "nar", "hash": output_hash, } }, "env": {"out": out_basename}, } }, } ) ) raise AssertionError(f"unexpected command: {cmd} kwargs={kwargs}") monkeypatch.setattr(provenance_main, "exec_cmd", fake_exec_cmd) monkeypatch.setattr( provenance_main, "get_dependencies", lambda *_args, **_kwargs: [] ) metadata = provenance_main.BuildMeta("", "", "", "", "", "{}", "{}") provenance = provenance_main.provenance(target, metadata) assert provenance["subject"] == [ { "name": "out", "uri": f"/custom/store/{out_basename}", "digest": {"sha256": digest}, } ] def test_provenance_wraps_target_derivation_show_failures(monkeypatch): target = "/custom/store/0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-root.drv" def fake_exec_cmd(cmd, **_kwargs): raise subprocess.CalledProcessError( returncode=1, cmd=cmd, stderr="target derivation show failed", ) monkeypatch.setattr(provenance_main, "exec_cmd", fake_exec_cmd) metadata = provenance_main.BuildMeta("", "", "", "", "", "{}", "{}") with pytest.raises(NixCommandError, match="target derivation show failed"): provenance_main.provenance(target, metadata) def test_provenance_rejects_empty_target_derivation_metadata(monkeypatch): target = "/custom/store/0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-root.drv" def fake_exec_cmd(cmd, **_kwargs): if cmd[:3] == ["nix", "derivation", "show"]: return SimpleNamespace(stdout=json.dumps({"version": 4, "derivations": {}})) raise AssertionError(f"unexpected command: {cmd}") monkeypatch.setattr(provenance_main, "exec_cmd", fake_exec_cmd) metadata = provenance_main.BuildMeta("", "", "", "", "", "{}", "{}") with pytest.raises( MissingNixDerivationMetadataError, match="No derivation metadata found", ): provenance_main.provenance(target, metadata) def test_provenance_rejects_target_derivation_without_outputs(monkeypatch): target = "/custom/store/0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-root.drv" drv_basename = "0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-root.drv" def fake_exec_cmd(cmd, **_kwargs): if cmd[:3] == ["nix", "derivation", "show"]: return SimpleNamespace( stdout=json.dumps( { "version": 4, "derivations": { drv_basename: { "name": "root", "env": {"name": "root"}, } }, } ) ) raise AssertionError(f"unexpected command: {cmd}") monkeypatch.setattr(provenance_main, "exec_cmd", fake_exec_cmd) metadata = provenance_main.BuildMeta("", "", "", "", "", "{}", "{}") with pytest.raises( InvalidNixJsonError, match=r"missing `outputs` in target derivation", ): provenance_main.provenance(target, metadata) def test_provenance_keeps_fixed_output_subjects_when_output_is_not_realized( monkeypatch, ): target = "/custom/store/0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-root.drv" drv_basename = "0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-root.drv" out_basename = "1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-root" output_hash = "sha256-ungWv48Bz+pBQUDeXa4iI7ADYaOWF3qctBD/YfIAFa0=" digest = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad" def fake_exec_cmd(cmd, **kwargs): if cmd[:3] == ["nix", "derivation", "show"]: assert cmd[3] == target return SimpleNamespace( stdout=json.dumps( { "version": 4, "derivations": { drv_basename: { "name": "root", "outputs": { "out": {"method": "nar", "hash": output_hash} }, "env": {"out": out_basename}, } }, } ) ) raise AssertionError(f"unexpected command: {cmd} kwargs={kwargs}") monkeypatch.setattr(provenance_main, "exec_cmd", fake_exec_cmd) monkeypatch.setattr( provenance_main, "get_dependencies", lambda *_args, **_kwargs: [] ) metadata = provenance_main.BuildMeta("", "", "", "", "", "{}", "{}") provenance = provenance_main.provenance(target, metadata) assert provenance["subject"] == [ { "name": "out", "uri": f"/custom/store/{out_basename}", "digest": {"sha256": digest}, } ] ================================================ FILE: tests/test_repology_adapter.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Offline tests for the Repology adapter.""" import json import pytest from repology.adapter import RepologyAdapter, RepologyQuery from repology.exceptions import RepologyNoMatchingPackages from repology.session import REPOLOGY_REQUEST_TIMEOUT from tests.testpaths import RESOURCES_DIR REPOLOGY_FIXTURES_DIR = RESOURCES_DIR / "repology" class FakeResponse: def __init__(self, text, status_code=200): self.text = text self.status_code = status_code def raise_for_status(self): if self.status_code >= 400: raise RuntimeError(f"unexpected status code: {self.status_code}") class MappingSession: def __init__(self, responses): self.responses = responses self.calls = [] def get(self, url, timeout=None): self.calls.append((url, timeout)) if url not in self.responses: raise AssertionError(f"unexpected URL requested: {url}") return self.responses[url] def _fixture_text(name): return (REPOLOGY_FIXTURES_DIR / name).read_text(encoding="utf-8") def test_repology_adapter_pkg_exact_parses_fixture_and_uses_timeout(): url = "https://repology.org/projects/?search=hello&inrepo=nix_unstable" session = MappingSession( { url: FakeResponse(_fixture_text("projects_hello.html")), } ) df = RepologyAdapter(session=session).query( RepologyQuery( repository="nix_unstable", pkg_exact="hello", ) ) assert session.calls == [(url, REPOLOGY_REQUEST_TIMEOUT)] assert list(df["package"].unique()) == ["hello"] assert set(df["status"]) == {"newest", "outdated"} outdated = df[df["status"] == "outdated"].iloc[0] assert outdated["version"] == "2.10" assert outdated["potentially_vulnerable"] == "1" assert outdated["newest_upstream_release"] == "2.11;2.12-rc1" assert outdated["repo_version_classify"] == "repo_pkg_needs_update" def test_repology_adapter_pkg_exact_raises_for_empty_results(): url = "https://repology.org/projects/?search=missing&inrepo=nix_unstable" session = MappingSession( { url: FakeResponse(_fixture_text("projects_empty.html")), } ) with pytest.raises(RepologyNoMatchingPackages): RepologyAdapter(session=session).query( RepologyQuery( repository="nix_unstable", pkg_exact="missing", ) ) assert session.calls == [(url, REPOLOGY_REQUEST_TIMEOUT)] def test_repology_adapter_sbom_query_marks_special_statuses(tmp_path): sbom_path = tmp_path / "sbom.cdx.json" sbom_path.write_text( json.dumps( { "metadata": {}, "components": [ {"name": "hello", "version": "2.10"}, {"name": "archive.tar.gz", "version": "1.0"}, {"name": "missingver", "version": ""}, {"name": "missingpkg", "version": "9.9"}, ], } ), encoding="utf-8", ) hello_url = "https://repology.org/projects/?search=hello&inrepo=nix_unstable" missing_url = "https://repology.org/projects/?search=missingpkg&inrepo=nix_unstable" session = MappingSession( { hello_url: FakeResponse(_fixture_text("projects_hello.html")), missing_url: FakeResponse(_fixture_text("projects_empty.html")), } ) df = RepologyAdapter(session=session).query( RepologyQuery( repository="nix_unstable", sbom_cdx=sbom_path, ) ) assert session.calls == [ (hello_url, REPOLOGY_REQUEST_TIMEOUT), (missing_url, REPOLOGY_REQUEST_TIMEOUT), ] assert set(df["status"]) == { "IGNORED", "NOT_FOUND", "NO_VERSION", "newest", "outdated", } hello_rows = df[df["package"] == "hello"] assert set(hello_rows["sbom_version_classify"]) == {"sbom_pkg_needs_update"} assert set(hello_rows["repo_version_classify"]) == { "", "repo_pkg_needs_update", } assert df[df["package"] == "archive.tar.gz"].iloc[0]["status"] == "IGNORED" assert df[df["package"] == "missingver"].iloc[0]["status"] == "NO_VERSION" assert df[df["package"] == "missingpkg"].iloc[0]["status"] == "NOT_FOUND" def test_repology_adapter_query_cves_parses_fixture_and_uses_timeout(): url = "https://repology.org/project/openssl/cves?version=3.1.0" session = MappingSession( { url: FakeResponse(_fixture_text("cves_openssl.html")), } ) df = RepologyAdapter(session=session).query_cves("openssl", "3.1.0") assert session.calls == [(url, REPOLOGY_REQUEST_TIMEOUT)] assert list(df["package"]) == ["openssl"] assert list(df["version"]) == ["3.1.0"] assert list(df["cve"]) == ["CVE-2024-1111"] ================================================ FILE: tests/test_repology_cve.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Offline tests for Repology CVE queries.""" from repology.repology_cve import query_cve from repology.session import REPOLOGY_REQUEST_TIMEOUT from tests.testpaths import RESOURCES_DIR REPOLOGY_FIXTURES_DIR = RESOURCES_DIR / "repology" class FakeResponse: def __init__(self, text, status_code=200): self.text = text self.status_code = status_code def raise_for_status(self): if self.status_code >= 400: raise RuntimeError(f"unexpected status code: {self.status_code}") class MappingSession: def __init__(self, responses): self.responses = responses self.calls = [] def get(self, url, timeout=None): self.calls.append((url, timeout)) if url not in self.responses: raise AssertionError(f"unexpected URL requested: {url}") return self.responses[url] def test_query_cve_parses_fixture_and_uses_timeout(): url = "https://repology.org/project/openssl/cves?version=3.1.0" session = MappingSession( { url: FakeResponse( (REPOLOGY_FIXTURES_DIR / "cves_openssl.html").read_text( encoding="utf-8" ) ), } ) df = query_cve("openssl", "3.1.0", session=session) assert session.calls == [(url, REPOLOGY_REQUEST_TIMEOUT)] assert list(df["package"]) == ["openssl"] assert list(df["version"]) == ["3.1.0"] assert list(df["cve"]) == ["CVE-2024-1111"] ================================================ FILE: tests/test_repology_projects_parser.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Offline tests for the Repology projects-page parser.""" import pytest from repology.exceptions import RepologyUnexpectedResponse from repology.projects_parser import parse_projects_search_html from tests.testpaths import RESOURCES_DIR REPOLOGY_FIXTURES_DIR = RESOURCES_DIR / "repology" def _fixture_text(name): return (REPOLOGY_FIXTURES_DIR / name).read_text(encoding="utf-8") def test_parse_projects_search_html_parses_fixture_rows(): parsed = parse_projects_search_html( _fixture_text("projects_hello.html"), "nix_unstable", ) assert parsed.next_query_project == "" assert parsed.processed_ids == {"nix_unstable:hello"} assert parsed.package_rows == [ { "repo": "nix_unstable", "package": "hello", "version": "2.10", "status": "outdated", "potentially_vulnerable": "1", "newest_upstream_release": "2.11;2.12-rc1", }, { "repo": "nix_unstable", "package": "hello", "version": "2.11", "status": "newest", "potentially_vulnerable": "0", "newest_upstream_release": "2.11;2.12-rc1", }, ] def test_parse_projects_search_html_respects_already_processed_packages(): parsed = parse_projects_search_html( _fixture_text("projects_hello.html"), "nix_unstable", processed_ids={"nix_unstable:hello"}, ) assert parsed.next_query_project == "" assert parsed.processed_ids == {"nix_unstable:hello"} assert not parsed.package_rows def test_parse_projects_search_html_raises_for_malformed_table(): malformed = """
Project
""" with pytest.raises(RepologyUnexpectedResponse): parse_projects_search_html(malformed, "nix_unstable") ================================================ FILE: tests/test_repology_sbom.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Unit tests for Repology SBOM helpers.""" import json import pandas as pd from repology.sbom import ( is_ignored_sbom_package, make_sbom_status_row, merge_sbom_fields, parse_cdx_sbom, sbom_row_classify, ) def test_parse_cdx_sbom_normalizes_names_and_includes_metadata_component(tmp_path): sbom_path = tmp_path / "sbom.cdx.json" sbom_path.write_text( json.dumps( { "metadata": { "component": {"name": "libtiff", "version": "4.6.0"}, }, "components": [ {"name": "python311-requests", "version": "2.32.0"}, ], } ), encoding="utf-8", ) df = parse_cdx_sbom(sbom_path) assert df.to_dict("records") == [ {"name": "python:requests", "version": "2.32.0"}, {"name": "tiff", "version": "4.6.0"}, ] def test_merge_sbom_fields_and_classify_outdated_versions(): df_sbom = pd.DataFrame([{"name": "hello", "version": "2.10"}]) df_repo = pd.DataFrame( [ { "repo": "nix_unstable", "package": "hello", "version": "2.11", "status": "newest", "potentially_vulnerable": "0", "newest_upstream_release": "2.12", } ] ) df = merge_sbom_fields(df_sbom, df_repo) df["sbom_version_classify"] = df.apply(sbom_row_classify, axis=1) assert df["version_sbom"].tolist() == ["2.10"] assert df["sbom_version_classify"].tolist() == ["sbom_pkg_needs_update"] def test_sbom_status_helpers_cover_ignored_rows(): assert is_ignored_sbom_package("archive.tar.gz") is True assert is_ignored_sbom_package("openssl") is False assert make_sbom_status_row("nix_unstable", "archive.tar.gz", "1.0", "IGNORED") == { "repo": "nix_unstable", "package": "archive.tar.gz", "version": "1.0", "status": "IGNORED", "potentially_vulnerable": "", "newest_upstream_release": "", } ================================================ FILE: tests/test_runtime_closure.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Focused tests for structured runtime closure parsing.""" import subprocess import pytest from common.errors import InvalidNixJsonError, NixCommandError from sbomnix import runtime as sbomnix_runtime from sbomnix.runtime import runtime_closure_from_path_info def test_runtime_closure_from_path_info_extracts_edges_and_derivers(): closure = runtime_closure_from_path_info( { "/nix/store/11111111111111111111111111111111-target-1.0": { "deriver": "/nix/store/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-target-1.0.drv", "references": [ "/nix/store/11111111111111111111111111111111-target-1.0", "/nix/store/22222222222222222222222222222222-dep-1.0", ], }, "/nix/store/22222222222222222222222222222222-dep-1.0": { "deriver": "/nix/store/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-dep-1.0.drv", "references": ["/nix/store/22222222222222222222222222222222-dep-1.0"], }, } ) assert closure.df_deps.to_dict("records") == [ { "src_path": "/nix/store/22222222222222222222222222222222-dep-1.0", "src_pname": "dep-1.0", "target_path": "/nix/store/11111111111111111111111111111111-target-1.0", "target_pname": "target-1.0", } ] assert closure.output_paths_by_drv == { "/nix/store/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-target-1.0.drv": { "/nix/store/11111111111111111111111111111111-target-1.0" }, "/nix/store/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-dep-1.0.drv": { "/nix/store/22222222222222222222222222222222-dep-1.0" }, } def test_runtime_closure_from_path_info_supports_list_payloads(): closure = runtime_closure_from_path_info( [ { "path": "/nix/store/11111111111111111111111111111111-target-1.0", "deriver": "/nix/store/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-target-1.0.drv", "references": [], } ] ) assert closure.df_deps.empty assert closure.output_paths_by_drv == { "/nix/store/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-target-1.0.drv": { "/nix/store/11111111111111111111111111111111-target-1.0" } } def test_runtime_closure_from_path_info_rejects_missing_references(): with pytest.raises(InvalidNixJsonError, match="missing `references`"): runtime_closure_from_path_info( { "/nix/store/11111111111111111111111111111111-target-1.0": { "deriver": None, } } ) def test_runtime_closure_from_path_info_rejects_malformed_reference_items(): with pytest.raises(InvalidNixJsonError, match=r"references\[0\]"): runtime_closure_from_path_info( { "/nix/store/11111111111111111111111111111111-target-1.0": { "references": [None], } } ) def test_load_runtime_closure_wraps_nix_command_failures(monkeypatch): def fail_exec_cmd(cmd): raise subprocess.CalledProcessError( returncode=1, cmd=cmd, stderr="unsupported path-info json format", ) monkeypatch.setattr(sbomnix_runtime, "exec_cmd", fail_exec_cmd) with pytest.raises(NixCommandError, match="unsupported path-info json format"): sbomnix_runtime.load_runtime_closure( "/nix/store/11111111111111111111111111111111-target-1.0" ) ================================================ FILE: tests/test_sbom_closure.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Focused tests for SBOM dependency closure helpers.""" import pandas as pd from sbomnix.closure import ( dependencies_to_depth, dependency_paths, walk_dependency_rows, ) def _dependency_df(): return pd.DataFrame.from_records( [ { "src_path": "/nix/store/bash", "src_pname": "bash", "target_path": "/nix/store/hello", "target_pname": "hello", }, { "src_path": "/nix/store/glibc", "src_pname": "glibc", "target_path": "/nix/store/bash", "target_pname": "bash", }, { "src_path": "/nix/store/zlib", "src_pname": "zlib", "target_path": "/nix/store/glibc", "target_pname": "glibc", }, ] ) def test_dependencies_to_depth_returns_reachable_dependency_rows(): df_depth = dependencies_to_depth(_dependency_df(), "/nix/store/hello", depth=2) assert df_depth.to_dict("records") == [ { "src_path": "/nix/store/bash", "src_pname": "bash", "target_path": "/nix/store/hello", "target_pname": "hello", }, { "src_path": "/nix/store/glibc", "src_pname": "glibc", "target_path": "/nix/store/bash", "target_pname": "bash", }, ] def test_walk_dependency_rows_supports_inverse_traversal(): walked = walk_dependency_rows( _dependency_df(), "/nix/store/zlib", depth=2, inverse=True, ) assert [row.depth for row in walked] == [1, 2] assert [row.row["target_path"] for row in walked] == [ "/nix/store/glibc", "/nix/store/bash", ] assert [row.row["src_path"] for row in walked] == [ "/nix/store/zlib", "/nix/store/glibc", ] def test_walk_dependency_rows_stops_after_matching_boundary_row(): walked = walk_dependency_rows( _dependency_df(), "/nix/store/hello", depth=3, stop_at=lambda row: row["target_pname"] == "bash", ) assert [row.depth for row in walked] == [1, 2] assert [row.row["target_path"] for row in walked] == [ "/nix/store/hello", "/nix/store/bash", ] assert [row.row["src_path"] for row in walked] == [ "/nix/store/bash", "/nix/store/glibc", ] def test_dependencies_to_depth_returns_empty_dataframe_for_missing_start(): df_depth = dependencies_to_depth(_dependency_df(), "/nix/store/missing", depth=2) assert df_depth.empty assert list(df_depth.columns) == [ "src_path", "src_pname", "target_path", "target_pname", ] def test_dependencies_to_depth_deduplicates_shared_diamond_edges(): df_deps = pd.DataFrame.from_records( [ { "src_path": "/nix/store/left", "src_pname": "left", "target_path": "/nix/store/root", "target_pname": "root", }, { "src_path": "/nix/store/right", "src_pname": "right", "target_path": "/nix/store/root", "target_pname": "root", }, { "src_path": "/nix/store/shared", "src_pname": "shared", "target_path": "/nix/store/left", "target_pname": "left", }, { "src_path": "/nix/store/shared", "src_pname": "shared", "target_path": "/nix/store/right", "target_pname": "right", }, { "src_path": "/nix/store/leaf", "src_pname": "leaf", "target_path": "/nix/store/shared", "target_pname": "shared", }, ] ) df_depth = dependencies_to_depth(df_deps, "/nix/store/root", depth=3) assert df_depth.to_dict("records") == [ { "src_path": "/nix/store/left", "src_pname": "left", "target_path": "/nix/store/root", "target_pname": "root", }, { "src_path": "/nix/store/shared", "src_pname": "shared", "target_path": "/nix/store/left", "target_pname": "left", }, { "src_path": "/nix/store/leaf", "src_pname": "leaf", "target_path": "/nix/store/shared", "target_pname": "shared", }, { "src_path": "/nix/store/right", "src_pname": "right", "target_path": "/nix/store/root", "target_pname": "root", }, { "src_path": "/nix/store/shared", "src_pname": "shared", "target_path": "/nix/store/right", "target_pname": "right", }, ] def test_dependency_paths_returns_all_source_and_target_paths(): assert dependency_paths(_dependency_df()) == { "/nix/store/bash", "/nix/store/glibc", "/nix/store/hello", "/nix/store/zlib", } ================================================ FILE: tests/test_sbom_vuln_enrichment.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Focused tests for SBOM vulnerability enrichment boundaries.""" import uuid from pathlib import Path from types import SimpleNamespace import pandas as pd import pytest from common.errors import SbomnixError from sbomnix import cli_utils as sbomnix_cli_utils from sbomnix import main as sbomnix_main from sbomnix import vuln_enrichment as sbomnix_vuln_enrichment from sbomnix.builder import SbomBuilder class CapturingLogger: def __init__(self): self.records = [] def info(self, msg, *args): self.records.append(("info", msg, args)) def fatal(self, msg, *args): self.records.append(("fatal", msg, args)) def test_sbomnix_getargs_accepts_meta_nixpkgs(): args = sbomnix_main.getargs( [ "/nix/store/target", "--meta-nixpkgs", "nix-path", ] ) assert args.meta_nixpkgs == "nix-path" def test_sbomnix_run_rejects_exclude_meta_with_meta_nixpkgs(): args = SimpleNamespace( NIXREF="/nix/store/target", buildtime=False, depth=None, verbose=0, include_vulns=False, exclude_meta=True, meta_nixpkgs="nix-path", exclude_cpe_matching=False, csv=None, cdx=None, spdx=None, impure=True, ) with pytest.raises(SbomnixError, match="--exclude-meta"): sbomnix_main._run(args) def test_sbomnix_main_enriches_cdx_explicitly_when_include_vulns_is_set(monkeypatch): args = SimpleNamespace( NIXREF=".#target", buildtime=False, depth=None, verbose=0, include_vulns=True, exclude_meta=False, meta_nixpkgs=None, exclude_cpe_matching=False, csv=None, cdx="sbom.cdx.json", spdx=None, impure=True, ) events = [] class FakeSbomBuilder: def __init__(self, **kwargs): events.append(("init", kwargs)) def to_cdx_data(self): events.append(("to_cdx_data",)) return {"bomFormat": "CycloneDX"} def enrich_cdx_with_vulnerabilities(self, cdx): events.append(("enrich", dict(cdx))) cdx["vulnerabilities"] = [] def write_json(self, path, data, printinfo=False): events.append(("write_json", path, dict(data), printinfo)) def to_spdx(self, _path): raise AssertionError("to_spdx should not run in this test") def to_csv(self, _path): raise AssertionError("to_csv should not run in this test") monkeypatch.setattr(sbomnix_main, "getargs", lambda: args) monkeypatch.setattr(sbomnix_main, "set_log_verbosity", lambda _verbosity: None) monkeypatch.setattr( sbomnix_main, "resolve_nix_target", lambda *_args, **_kwargs: sbomnix_cli_utils.ResolvedNixTarget( path="/nix/store/target", flakeref=".#target", ), ) monkeypatch.setattr(sbomnix_main, "SbomBuilder", FakeSbomBuilder) sbomnix_main.main() assert events == [ ( "init", { "nix_path": "/nix/store/target", "buildtime": False, "depth": None, "flakeref": ".#target", "original_ref": None, "meta_nixpkgs": None, "impure": True, "include_meta": True, "include_vulns": True, "include_cpe": True, }, ), ("to_cdx_data",), ("enrich", {"bomFormat": "CycloneDX"}), ( "write_json", "sbom.cdx.json", {"bomFormat": "CycloneDX", "vulnerabilities": []}, True, ), ] def test_sbomnix_main_logs_generation_before_initializing_builder(monkeypatch): args = SimpleNamespace( NIXREF=".#target", buildtime=False, depth=None, verbose=0, include_vulns=False, exclude_meta=False, meta_nixpkgs=None, exclude_cpe_matching=False, csv=None, cdx=None, spdx=None, impure=False, ) logger = CapturingLogger() events = [] class FakeSbomBuilder: def __init__(self, **kwargs): events.append(("init", kwargs)) monkeypatch.setattr(sbomnix_main, "LOG", logger) monkeypatch.setattr( sbomnix_main, "resolve_nix_target", lambda *_args, **_kwargs: sbomnix_cli_utils.ResolvedNixTarget( path="/nix/store/target", flakeref=".#target", ), ) monkeypatch.setattr(sbomnix_main, "SbomBuilder", FakeSbomBuilder) sbomnix_main._run(args) assert logger.records == [ ("info", "Generating SBOM for target '%s'", ("/nix/store/target",)) ] assert events == [ ( "init", { "nix_path": "/nix/store/target", "buildtime": False, "depth": None, "flakeref": ".#target", "original_ref": None, "meta_nixpkgs": None, "impure": False, "include_meta": True, "include_vulns": False, "include_cpe": True, }, ) ] def test_to_cdx_no_longer_triggers_vulnerability_scans(tmp_path, monkeypatch): seen_calls = [] def no_dependencies(_self, _drv, **_kwargs): return None class FailIfCalledScanner: def __init__(self): seen_calls.append("init") def scan_vulnix(self, _target_path, _buildtime): raise AssertionError("scan_vulnix should not run during plain export") def scan_grype(self, _sbom_path): raise AssertionError("scan_grype should not run during plain export") def scan_osv(self, _sbom_path): raise AssertionError("scan_osv should not run during plain export") # Bypass __init__ to keep the test focused on export behavior without Nix IO. sbomdb = object.__new__(SbomBuilder) sbomdb.uid = "store_path" sbomdb.nix_path = "/nix/store/target" sbomdb.buildtime = False sbomdb.target_deriver = "/nix/store/target.drv" sbomdb.target_component_ref = "/nix/store/target.drv" sbomdb.depth = None sbomdb.uuid = uuid.uuid4() sbomdb.include_vulns = True sbomdb.sbom_type = "runtime_only" sbomdb.df_sbomdb = pd.DataFrame( [ { "store_path": "/nix/store/target.drv", "pname": "target", "name": "target", "version": "1.0", "outputs": ["/nix/store/target"], "out": "/nix/store/target", "purl": "", "cpe": "", "urls": "", "patches": "", } ] ) monkeypatch.setattr("sbomnix.vuln_enrichment.VulnScan", FailIfCalledScanner) monkeypatch.setattr(SbomBuilder, "lookup_dependencies", no_dependencies) out_path = tmp_path / "out.cdx.json" sbomdb.to_cdx(out_path, printinfo=False) assert out_path.exists() assert not seen_calls @pytest.mark.parametrize( ("buildtime", "expected_target"), [ (False, "/nix/store/target-output"), (True, "/nix/store/target.drv"), ], ) def test_sbom_vuln_enrichment_scans_expected_nix_target( buildtime, expected_target, monkeypatch, ): seen_vulnix_calls = [] class CapturingScanner: def __init__(self): self.df_grype = pd.DataFrame() self.df_osv = pd.DataFrame() self.df_vulnix = pd.DataFrame() def scan_vulnix(self, target_path, scan_buildtime): seen_vulnix_calls.append((target_path, scan_buildtime)) def scan_grype(self, _sbom_path): return None def scan_osv(self, _sbom_path): return None # Bypass __init__ to keep the test focused on enrichment target selection. sbomdb = object.__new__(SbomBuilder) sbomdb.nix_path = "/nix/store/target-output" sbomdb.buildtime = buildtime sbomdb.target_deriver = "/nix/store/target.drv" sbomdb.target_component_ref = "/nix/store/target.drv" sbomdb.df_sbomdb = pd.DataFrame() monkeypatch.setattr(sbomnix_vuln_enrichment, "VulnScan", CapturingScanner) cdx = {"bomFormat": "CycloneDX"} sbomdb.enrich_cdx_with_vulnerabilities(cdx) assert seen_vulnix_calls == [(expected_target, buildtime)] assert cdx["vulnerabilities"] == [] def test_sbom_vuln_tempfile_is_removed_on_scan_failure(tmp_path, monkeypatch): temp_cdx_path = tmp_path / "vulnscan_temp.json" seen_paths = [] def no_dependencies(_self, _drv, **_kwargs): return None class FakeTempFile: def __init__(self, path): self.name = path.as_posix() def __enter__(self): Path(self.name).touch() return self def __exit__(self, exc_type, exc, traceback): return False class FailingScanner: def __init__(self): self.df_grype = pd.DataFrame() self.df_osv = pd.DataFrame() self.df_vulnix = pd.DataFrame() def scan_vulnix(self, _target_path, _buildtime): return None def scan_grype(self, sbom_path): sbom_path = Path(sbom_path) seen_paths.append(sbom_path) assert sbom_path.exists() def scan_osv(self, sbom_path): sbom_path = Path(sbom_path) seen_paths.append(sbom_path) raise RuntimeError("osv scan failed") # Bypass __init__ to keep the test focused on enrichment tempfile cleanup. sbomdb = object.__new__(SbomBuilder) sbomdb.uid = "store_path" sbomdb.nix_path = "/nix/store/target" sbomdb.buildtime = False sbomdb.target_deriver = "/nix/store/target.drv" sbomdb.target_component_ref = "/nix/store/target.drv" sbomdb.depth = None sbomdb.uuid = uuid.uuid4() sbomdb.include_vulns = True sbomdb.sbom_type = "runtime_only" sbomdb.df_sbomdb = pd.DataFrame( [ { "store_path": "/nix/store/target.drv", "pname": "target", "name": "target", "version": "1.0", "outputs": ["/nix/store/target"], "out": "/nix/store/target", "purl": "", "cpe": "", "urls": "", "patches": "", } ] ) monkeypatch.setattr( sbomnix_vuln_enrichment, "NamedTemporaryFile", lambda **_kwargs: FakeTempFile(temp_cdx_path), ) monkeypatch.setattr(sbomnix_vuln_enrichment, "VulnScan", FailingScanner) monkeypatch.setattr(SbomBuilder, "lookup_dependencies", no_dependencies) cdx = sbomdb.to_cdx_data() with pytest.raises(RuntimeError, match="osv scan failed"): sbomdb.enrich_cdx_with_vulnerabilities(cdx) assert seen_paths == [temp_cdx_path, temp_cdx_path] assert not temp_cdx_path.exists() ================================================ FILE: tests/test_schema_validation.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Focused tests for offline schema validation helpers.""" from tests.testpaths import RESOURCES_DIR, SAMPLE_CDX_SBOM from tests.testutils import resolve_local_schema_path, validate_json def test_local_schema_aliases_resolve_to_vendored_resources(): """Resolve the vendored schema aliases used by local validation.""" assert resolve_local_schema_path("spdx.schema.json", RESOURCES_DIR).name == ( "spdx.schema.json" ) assert ( resolve_local_schema_path( "http://cyclonedx.org/schema/spdx.schema.json", RESOURCES_DIR, ).name == "spdx.schema.json" ) assert ( resolve_local_schema_path( "jsf-0.82.schema.json#/definitions/signature", RESOURCES_DIR, ).name == "jsf-0.82.schema.json" ) def test_validate_json_uses_only_local_schema_resources(): """Validate a sample CycloneDX SBOM without network access.""" validate_json(SAMPLE_CDX_SBOM, RESOURCES_DIR / "cdx_bom-1.4.schema.json") ================================================ FILE: tests/test_store_batching.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Focused tests for batched store and derivation loading.""" import json import subprocess from types import SimpleNamespace import pytest from common.errors import NixCommandError from sbomnix import derivation as sbomnix_derivation def test_load_many_batches_nix_derivation_show_and_preserves_outputs(monkeypatch): calls = [] def fake_exec_cmd(cmd, **kwargs): calls.append((cmd, kwargs)) return SimpleNamespace( stdout=json.dumps( { "derivations": { "/nix/store/first.drv": { "name": "first", "env": { "name": "first", "pname": "first", "version": "1.0", }, "outputs": { "out": {"path": "/nix/store/first-out"}, }, }, "/nix/store/second.drv": { "name": "second", "env": { "name": "second", "pname": "second", "version": "2.0", }, "outputs": { "out": {"path": "/nix/store/second-out"}, }, }, }, "version": 4, } ), returncode=0, stderr="", ) monkeypatch.setattr(sbomnix_derivation, "exec_cmd", fake_exec_cmd) loaded = sbomnix_derivation.load_many( ["/nix/store/first.drv", "/nix/store/second.drv"], output_paths_by_drv={ "/nix/store/first.drv": {"/nix/store/first-extra-out"}, "/nix/store/second.drv": {"/nix/store/second-extra-out"}, }, batch_size=50, ) assert calls == [ ( [ "nix", "derivation", "show", "/nix/store/first.drv", "/nix/store/second.drv", "--extra-experimental-features", "flakes", "--extra-experimental-features", "nix-command", ], {}, ) ] assert loaded["/nix/store/first.drv"].outputs == [ "/nix/store/first-extra-out", "/nix/store/first-out", ] assert loaded["/nix/store/second.drv"].outputs == [ "/nix/store/second-extra-out", "/nix/store/second-out", ] def test_load_many_supports_output_path_queries(monkeypatch): calls = [] def fake_exec_cmd(cmd, **kwargs): calls.append((cmd, kwargs)) return SimpleNamespace( stdout=json.dumps( { "derivations": { "/nix/store/canonical.drv": { "name": "first", "env": { "name": "first", "pname": "first", "version": "1.0", }, "outputs": { "out": {"path": "/nix/store/first-out"}, "dev": {"path": "/nix/store/first-dev"}, }, }, }, "version": 4, } ), returncode=0, stderr="", ) monkeypatch.setattr(sbomnix_derivation, "exec_cmd", fake_exec_cmd) loaded = sbomnix_derivation.load_many( ["/nix/store/first-out"], output_paths_by_drv={ "/nix/store/first-out": {"/nix/store/first-out"}, }, batch_size=50, ) assert calls == [ ( [ "nix", "derivation", "show", "/nix/store/first-out", "--extra-experimental-features", "flakes", "--extra-experimental-features", "nix-command", ], {}, ) ] assert list(loaded) == ["/nix/store/canonical.drv"] assert loaded["/nix/store/canonical.drv"].store_path == "/nix/store/canonical.drv" assert loaded["/nix/store/canonical.drv"].outputs == [ "/nix/store/first-dev", "/nix/store/first-out", ] def test_load_many_maps_output_queries_from_derivation_env(monkeypatch): def fake_exec_cmd(cmd, **kwargs): return SimpleNamespace( stdout=json.dumps( { "derivations": { "/nix/store/fixed.drv": { "name": "fixed", "env": { "name": "fixed", "out": "/nix/store/fixed-out", "outputs": "out", "pname": "fixed", "version": "1.0", }, "outputs": { "out": { "hash": "sha256-test", "method": "flat", }, }, }, }, "version": 4, } ), returncode=0, stderr="", ) monkeypatch.setattr(sbomnix_derivation, "exec_cmd", fake_exec_cmd) loaded = sbomnix_derivation.load_many( ["/nix/store/fixed-out"], output_paths_by_drv={ "/nix/store/fixed-out": {"/nix/store/fixed-out"}, }, batch_size=50, ) assert list(loaded) == ["/nix/store/fixed.drv"] assert loaded["/nix/store/fixed.drv"].outputs == ["/nix/store/fixed-out"] def test_load_many_can_ignore_missing_output_derivations(monkeypatch): calls = [] def fake_exec_cmd(cmd, **kwargs): calls.append((cmd, kwargs)) query_paths = cmd[3:-4] if "/nix/store/missing-out" in query_paths: assert kwargs == {"raise_on_error": False, "log_error": False} return None return SimpleNamespace( stdout=json.dumps( { "derivations": { "/nix/store/good.drv": { "name": "good", "env": { "name": "good", "pname": "good", "version": "1.0", }, "outputs": { "out": {"path": "/nix/store/good-out"}, }, }, }, "version": 4, } ), returncode=0, stderr="", ) monkeypatch.setattr(sbomnix_derivation, "exec_cmd", fake_exec_cmd) loaded = sbomnix_derivation.load_many( ["/nix/store/good-out", "/nix/store/missing-out"], output_paths_by_drv={ "/nix/store/good-out": {"/nix/store/good-out"}, "/nix/store/missing-out": {"/nix/store/missing-out"}, }, batch_size=50, ignore_missing=True, ) assert list(loaded) == ["/nix/store/good.drv"] assert [call[0][3:-4] for call in calls] == [ ["/nix/store/good-out", "/nix/store/missing-out"], ["/nix/store/good-out"], ["/nix/store/missing-out"], ] def test_load_recursive_wraps_nix_command_failures(monkeypatch): def fail_exec_cmd(cmd): raise subprocess.CalledProcessError( returncode=1, cmd=cmd, stderr="recursive derivation show failed", ) monkeypatch.setattr(sbomnix_derivation, "exec_cmd", fail_exec_cmd) with pytest.raises(NixCommandError, match="recursive derivation show failed"): sbomnix_derivation.load_recursive( "/nix/store/11111111111111111111111111111111-target-1.0.drv" ) def test_load_rejects_empty_derivation_metadata(monkeypatch): monkeypatch.setattr( sbomnix_derivation, "exec_cmd", lambda _cmd: SimpleNamespace(stdout="{}", stderr="", returncode=0), ) with pytest.raises(NixCommandError, match="No derivation metadata returned"): sbomnix_derivation.load( "/nix/store/11111111111111111111111111111111-target-1.0", None, ) def test_load_recursive_rejects_empty_derivation_metadata(monkeypatch): monkeypatch.setattr( sbomnix_derivation, "exec_cmd", lambda _cmd: SimpleNamespace(stdout="{}", stderr="", returncode=0), ) with pytest.raises(NixCommandError, match="No derivation metadata returned"): sbomnix_derivation.load_recursive( "/nix/store/11111111111111111111111111111111-target-1.0.drv" ) ================================================ FILE: tests/test_temp_sbom_generation.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Focused tests for temporary SBOM generation and cleanup.""" from pathlib import Path from types import SimpleNamespace import pytest from sbomnix import cli_utils as sbomnix_cli_utils from vulnxscan import vulnxscan_cli def test_vulnxscan_cleans_generated_tempfiles_on_failure(tmp_path, monkeypatch): sbom_cdx_path = tmp_path / "generated.cdx.json" sbom_csv_path = tmp_path / "generated.csv" sbom_cdx_path.write_text("{}", encoding="utf-8") sbom_csv_path.write_text("", encoding="utf-8") args = SimpleNamespace( TARGET="target", verbose=0, out="vulns.csv", buildtime=False, sbom=False, whitelist=None, triage=False, nixprs=False, ) class FailingScanner: def scan_vulnix(self, _target_path, _buildtime): return None def scan_grype(self, _sbom_path): raise RuntimeError("scan failed") def scan_osv(self, _sbom_path): raise AssertionError("scan_osv should not run after grype failure") def report(self, _args, _sbom_csv_path): raise AssertionError("report should not run after scan failure") monkeypatch.setattr(vulnxscan_cli, "getargs", lambda: args) monkeypatch.setattr(vulnxscan_cli, "set_log_verbosity", lambda _verbosity: None) monkeypatch.setattr( vulnxscan_cli, "exit_unless_command_exists", lambda _command: None ) monkeypatch.setattr( vulnxscan_cli, "resolve_nix_target", lambda _target, buildtime=False: sbomnix_cli_utils.ResolvedNixTarget( path="/nix/store/target" ), ) monkeypatch.setattr( vulnxscan_cli, "generate_temp_sbom", lambda _target_path, _buildtime, **_kwargs: sbomnix_cli_utils.GeneratedSbom( cdx_path=sbom_cdx_path, csv_path=sbom_csv_path, ), ) monkeypatch.setattr(vulnxscan_cli, "VulnScan", FailingScanner) with pytest.raises(RuntimeError, match="scan failed"): vulnxscan_cli.main() assert not sbom_cdx_path.exists() assert not sbom_csv_path.exists() def test_generate_temp_sbom_without_csv_returns_only_cdx_path(tmp_path, monkeypatch): sbom_cdx_path = tmp_path / "generated.cdx.json" class FakeTempFile: def __init__(self, path): self.name = path.as_posix() def __enter__(self): Path(self.name).touch() return self def __exit__(self, exc_type, exc, traceback): return False class DummySbomBuilder: def __init__(self, _target_path, _buildtime, include_meta=False): assert include_meta is False def to_cdx(self, sbom_path, printinfo=False): Path(sbom_path).write_text("{}", encoding="utf-8") assert printinfo is False def to_csv(self, _sbom_path, loglevel=None): raise AssertionError("to_csv should not run when include_csv is False") monkeypatch.setattr( sbomnix_cli_utils, "NamedTemporaryFile", lambda **_kwargs: FakeTempFile(sbom_cdx_path), ) monkeypatch.setattr(sbomnix_cli_utils, "SbomBuilder", DummySbomBuilder) generated = sbomnix_cli_utils.generate_temp_sbom( "/nix/store/target", buildtime=False, prefix="nixdeps_", cdx_suffix=".cdx.json", ) assert generated == sbomnix_cli_utils.GeneratedSbom( cdx_path=sbom_cdx_path, csv_path=None, ) assert sbom_cdx_path.exists() generated.cleanup() assert not sbom_cdx_path.exists() def test_generate_temp_sbom_cleans_tempfiles_on_generation_failure( tmp_path, monkeypatch ): sbom_cdx_path = tmp_path / "generated.cdx.json" sbom_csv_path = tmp_path / "generated.csv" class FakeTempFile: def __init__(self, path): self.name = path.as_posix() def __enter__(self): Path(self.name).touch() return self def __exit__(self, exc_type, exc, traceback): return False class FailingSbomBuilder: def __init__(self, _target_path, _buildtime, include_meta=False): assert include_meta is False def to_cdx(self, sbom_path, printinfo=False): Path(sbom_path).write_text("{}", encoding="utf-8") assert printinfo is False def to_csv(self, sbom_path, loglevel=None): Path(sbom_path).write_text("", encoding="utf-8") assert loglevel is not None raise RuntimeError("sbom csv generation failed") monkeypatch.setattr( sbomnix_cli_utils, "NamedTemporaryFile", lambda **kwargs: FakeTempFile( sbom_cdx_path if kwargs["suffix"] == ".json" else sbom_csv_path ), ) monkeypatch.setattr(sbomnix_cli_utils, "SbomBuilder", FailingSbomBuilder) with pytest.raises(RuntimeError, match="sbom csv generation failed"): sbomnix_cli_utils.generate_temp_sbom( "/nix/store/target", buildtime=False, prefix="vulnxscan_", cdx_suffix=".json", include_csv=True, ) assert not sbom_cdx_path.exists() assert not sbom_csv_path.exists() def test_generate_temp_sbom_cleans_first_tempfile_if_second_creation_fails( tmp_path, monkeypatch ): sbom_cdx_path = tmp_path / "generated.cdx.json" class FakeTempFile: def __init__(self, path): self.name = path.as_posix() def __enter__(self): Path(self.name).touch() return self def __exit__(self, exc_type, exc, traceback): return False class DummySbomBuilder: def __init__(self, _target_path, _buildtime, include_meta=False): assert include_meta is False def to_cdx(self, _sbom_path, printinfo=False): raise AssertionError("to_cdx should not run if csv tempfile creation fails") def to_csv(self, _sbom_path, loglevel=None): raise AssertionError("to_csv should not run if csv tempfile creation fails") def fake_named_temporary_file(**kwargs): if kwargs["suffix"] == ".json": return FakeTempFile(sbom_cdx_path) raise RuntimeError("csv tempfile creation failed") monkeypatch.setattr( sbomnix_cli_utils, "NamedTemporaryFile", fake_named_temporary_file, ) monkeypatch.setattr(sbomnix_cli_utils, "SbomBuilder", DummySbomBuilder) with pytest.raises(RuntimeError, match="csv tempfile creation failed"): sbomnix_cli_utils.generate_temp_sbom( "/nix/store/target", buildtime=False, prefix="vulnxscan_", cdx_suffix=".json", include_csv=True, ) assert not sbom_cdx_path.exists() ================================================ FILE: tests/test_vulnix_test_support.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Tests for the vulnix test wrapper helpers.""" from __future__ import annotations import os import shutil import subprocess import pytest from tests import vulnix_test_support def test_build_vulnix_test_env_prepends_wrapper_dir(tmp_path): """Wrapper dir should take precedence on PATH for test subprocesses.""" wrapper_dir = tmp_path / "bin" config = vulnix_test_support.VulnixTestConfig( wrapper_dir=wrapper_dir, effective_mode="dummy", effective_cache_dir=None, real_vulnix=None, ) env = vulnix_test_support.build_vulnix_test_env( {"PATH": "/usr/bin"}, config=config, ) assert env["PATH"] == os.pathsep.join([str(wrapper_dir), "/usr/bin"]) assert env["SBOMNIX_TEST_VULNIX_EFFECTIVE_MODE"] == "dummy" assert env["SBOMNIX_TEST_REAL_VULNIX"] == "" assert "SBOMNIX_TEST_VULNIX_EFFECTIVE_CACHE_DIR" not in env def test_dummy_vulnix_wrapper_returns_empty_json(tmp_path): """Dummy mode should behave like a no-op vulnix process.""" config = vulnix_test_support.configure_vulnix_for_tests( tmp_root=tmp_path, effective_mode="dummy", cache_dir=tmp_path / "cache", real_vulnix=None, ) env = vulnix_test_support.build_vulnix_test_env({}, config=config) ret = subprocess.run( [str(config.wrapper_dir / "vulnix"), "--json"], check=True, capture_output=True, encoding="utf-8", env=env, ) assert ret.stdout == "[]" assert ret.stderr == "" def test_real_vulnix_wrapper_forwards_cache_dir_and_args(tmp_path): """Real mode wrapper should exec the underlying binary with the cache dir.""" real_vulnix = tmp_path / "real-vulnix" real_vulnix.write_text( """#!/bin/sh set -eu printf '%s\\n' "$@" """, encoding="utf-8", ) real_vulnix.chmod(0o755) cache_dir = tmp_path / "cache" cache_dir.mkdir() (cache_dir / "Data.fs").write_text("ready", encoding="utf-8") config = vulnix_test_support.configure_vulnix_for_tests( tmp_root=tmp_path, effective_mode="real", cache_dir=cache_dir, real_vulnix=real_vulnix.as_posix(), ) env = vulnix_test_support.build_vulnix_test_env({}, config=config) env = {"PATH": os.environ.get("PATH", os.defpath), **env} ret = subprocess.run( [str(config.wrapper_dir / "vulnix"), "target", "-C", "--json"], check=True, capture_output=True, encoding="utf-8", env=env, ) assert ret.stdout.splitlines() == [ "--cache-dir", cache_dir.as_posix(), "target", "-C", "--json", ] def test_configure_vulnix_for_tests_rejects_unknown_mode(tmp_path): """configure_vulnix_for_tests should only accept dummy or real modes.""" with pytest.raises(ValueError, match="invalid effective vulnix mode"): vulnix_test_support.configure_vulnix_for_tests( tmp_root=tmp_path, effective_mode="surprise", cache_dir=tmp_path / "cache", real_vulnix=None, ) def test_real_vulnix_wrapper_shows_clear_error_when_binary_missing(tmp_path): """Real mode wrapper should fail with a readable message if env is stale.""" config = vulnix_test_support.configure_vulnix_for_tests( tmp_root=tmp_path, effective_mode="dummy", cache_dir=tmp_path / "cache", real_vulnix=None, ) env = { "PATH": os.environ.get("PATH", os.defpath), "SBOMNIX_TEST_VULNIX_EFFECTIVE_MODE": "real", } ret = subprocess.run( [str(config.wrapper_dir / "vulnix"), "--json"], check=False, capture_output=True, encoding="utf-8", env=env, ) assert ret.returncode != 0 assert "SBOMNIX_TEST_REAL_VULNIX is empty" in ret.stderr def test_ensure_real_vulnix_cache_surfaces_warmup_errors(tmp_path): """Warm-up failures should include stderr details in the raised error.""" real_vulnix = tmp_path / "fake-vulnix" real_vulnix.write_text( """#!/bin/sh set -eu echo 'vulnix boom' >&2 exit 7 """, encoding="utf-8", ) real_vulnix.chmod(0o755) result = tmp_path / "build" / "result" result.parent.mkdir(parents=True, exist_ok=True) result.write_text("placeholder", encoding="utf-8") with pytest.raises( RuntimeError, match="vulnix cache warm-up scan failed: vulnix boom" ): vulnix_test_support.ensure_real_vulnix_cache( tmp_path / "cache", build_root=tmp_path / "build", real_vulnix=real_vulnix.as_posix(), test_derivation=tmp_path / "derivation.nix", ) @pytest.mark.real_vulnix def test_real_vulnix_wrapper_executes_real_binary(tmp_path): """Opt-in smoke test that executes the real vulnix binary via the wrapper.""" real_vulnix = shutil.which("vulnix") if real_vulnix is None: pytest.skip("'vulnix' is not available in PATH") cache_dir = tmp_path / "real-cache" config = vulnix_test_support.configure_vulnix_for_tests( tmp_root=tmp_path, effective_mode="real", cache_dir=cache_dir, real_vulnix=real_vulnix, ) env = vulnix_test_support.build_vulnix_test_env({}, config=config) env = {"PATH": os.environ.get("PATH", os.defpath), **env} ret = subprocess.run( [str(config.wrapper_dir / "vulnix"), "--version"], check=True, capture_output=True, encoding="utf-8", env=env, ) assert "vulnix" in ret.stdout.lower() or "vulnix" in ret.stderr.lower() ================================================ FILE: tests/test_vulnxscan_engine.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Focused tests for vulnxscan parser and reporting helpers.""" from pathlib import Path from types import SimpleNamespace import pandas as pd import pytest from vulnxscan.parsers import parse_grype_json, parse_vulnix_json from vulnxscan.reporting import build_report_dataframe, write_reports from vulnxscan.vulnscan import VulnScan def test_parse_vulnix_json_updates_cvss_cache(): """Populate vulnerability rows and severity cache from vulnix JSON.""" cvss_cache = {} df = parse_vulnix_json( '[{"pname":"hello","version":"1.0","affected_by":["CVE-1"],' '"cvssv3_basescore":{"CVE-1":"7.5"}}]', cvss_cache=cvss_cache, ) assert df.to_dict("records") == [ { "package": "hello", "version": "1.0", "vuln_id": "CVE-1", "severity": "7.5", "scanner": "vulnix", } ] assert cvss_cache == {"CVE-1": "7.5"} def test_parse_grype_json_prefers_cvss_v3_scores(): """Select CVSS v3 severity when grype reports multiple CVSS entries.""" cvss_cache = {} json_str = """ { "matches": [ { "artifact": {"name": "hello", "version": "1.0"}, "vulnerability": { "id": "CVE-2", "cvss": [ {"version": "2.0", "metrics": {"baseScore": 4.0}}, {"version": "3.1", "metrics": {"baseScore": 9.8}} ] } } ] } """ df = parse_grype_json(json_str, cvss_cache=cvss_cache) assert df.to_dict("records") == [ { "package": "hello", "version": "1.0", "vuln_id": "CVE-2", "severity": 9.8, "scanner": "grype", } ] assert cvss_cache == {"CVE-2": 9.8} def test_build_report_dataframe_merges_scanner_counts(): """Aggregate scanner findings into the final report layout.""" df_report = build_report_dataframe( df_vulnix=pd.DataFrame( [ { "package": "hello", "version": "1.0", "vuln_id": "CVE-1", "severity": "7.5", "scanner": "vulnix", } ] ), df_grype=pd.DataFrame( [ { "package": "hello", "version": "1.0", "vuln_id": "CVE-1", "severity": "7.5", "scanner": "grype", } ] ), df_osv=pd.DataFrame(), ) assert df_report.to_dict("records") == [ { "vuln_id": "CVE-1", "url": "https://nvd.nist.gov/vuln/detail/CVE-1", "package": "hello", "version": "1.0", "severity": "7.5", "grype": "1", "osv": "0", "vulnix": "1", "sum": 2, "sortcol": df_report.iloc[0]["sortcol"], } ] def test_write_reports_writes_triage_report(tmp_path): """Write both the main report and the derived triage report files.""" main_out = tmp_path / "vulns.csv" df_report = pd.DataFrame([{"vuln_id": "CVE-1"}]) df_triaged = pd.DataFrame([{"vuln_id": "CVE-1", "classify": "triaged"}]) write_reports(df_report, main_out, df_triaged=df_triaged) assert main_out.exists() assert (tmp_path / "vulns.triage.csv").exists() assert Path(main_out).read_text(encoding="utf-8") @pytest.mark.parametrize( ("buildtime", "expected_cmd"), [ (False, ["vulnix", "/nix/store/my target", "-C", "--json"]), (True, ["vulnix", "/nix/store/my target", "--json"]), ], ) def test_scan_vulnix_uses_argv_lists(monkeypatch, buildtime, expected_cmd): """Build vulnix subprocess argv without splitting whitespace-containing paths.""" calls = [] parsed = [] def fake_exec_cmd(cmd, **kwargs): calls.append((cmd, kwargs)) return SimpleNamespace( stdout='[{"pname": "hello", "version": "1.0", "affected_by": []}]', stderr="", returncode=0, ) monkeypatch.setattr("vulnxscan.vulnscan.exec_cmd", fake_exec_cmd) monkeypatch.setattr( VulnScan, "_parse_vulnix", lambda self, stdout: parsed.append(stdout), ) VulnScan().scan_vulnix("/nix/store/my target", buildtime=buildtime) assert calls == [ ( expected_cmd, {"raise_on_error": False, "return_error": True, "log_error": False}, ) ] assert parsed == ['[{"pname": "hello", "version": "1.0", "affected_by": []}]'] ================================================ FILE: tests/test_vulnxscan_triage.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Unit tests for vulnxscan triage and lookup helpers.""" from types import SimpleNamespace import pandas as pd import pytest from vulnxscan.github_prs import GitHubPrLookup from vulnxscan.repology_lookup import RepologyVulnerabilityLookup from vulnxscan.triage import classify_vulnerability, triage_vulnerabilities class FakeRepologyLookup: def __init__(self): self.vulnerable_checks = [] self.query_inputs = [] def is_vulnerable(self, package, version, vuln_id=None): self.vulnerable_checks.append((package, str(version), vuln_id)) return str(version) == "1.0.0" def query_repology_versions(self, df_vuln_pkgs): self.query_inputs.append(df_vuln_pkgs.copy(deep=True)) return pd.DataFrame( [ { "vuln_id": "CVE-2024-1", "url": "https://nvd.nist.gov/vuln/detail/CVE-2024-1", "package": "openssl", "severity": "7.0", "version_local": "1.0.0", "version_nixpkgs": "1.1.0", "version_upstream": "1.2.0", "package_repology": "openssl", "sortcol": "2024A0000000001", } ] ) class FakeGitHubLookup: def __init__(self): self.rows = [] def find_nixpkgs_prs(self, row): self.rows.append(row) return "https://github.com/NixOS/nixpkgs/pull/1" class FakeAdapter: def __init__(self): self.queries = [] def query(self, repology_query): self.queries.append(repology_query) return pd.DataFrame( [ { "package": "tiff", "version": "4.5.0", "status": "newest", "newest_upstream_release": "4.5.1", }, { "package": "tiff-tools", "version": "4.4.0", "status": "newest", "newest_upstream_release": "4.4.2", }, ] ) def test_classify_vulnerability_marks_fixable_nixpkgs_update(): lookup = FakeRepologyLookup() row = SimpleNamespace( vuln_id="CVE-2024-1", package_repology="openssl", version_local="1.0.0", version_nixpkgs="1.1.0", version_upstream="1.2.0", ) classification = classify_vulnerability(row, repology_lookup=lookup) assert classification == "fix_update_to_version_nixpkgs" assert lookup.vulnerable_checks == [ ("openssl", "1.0.0", "CVE-2024-1"), ("openssl", "1.1.0", "CVE-2024-1"), ] def test_triage_vulnerabilities_groups_rows_and_adds_nixpkgs_prs(): repology_lookup = FakeRepologyLookup() github_lookup = FakeGitHubLookup() df_report = pd.DataFrame( [ { "vuln_id": "CVE-2024-1", "package": "openssl", "severity": "7.0", "version": "1.0.0", "url": "https://nvd.nist.gov/vuln/detail/CVE-2024-1", "sortcol": "2024A0000000001", }, { "vuln_id": "CVE-2024-1", "package": "openssl", "severity": "7.0", "version": "1.0.0", "url": "https://nvd.nist.gov/vuln/detail/CVE-2024-1", "sortcol": "2024A0000000001", }, ] ) triaged = triage_vulnerabilities( df_report, True, repology_lookup=repology_lookup, github_lookup=github_lookup, ) assert repology_lookup.query_inputs[0]["count"].tolist() == [2] assert triaged["classify"].tolist() == ["fix_update_to_version_nixpkgs"] assert triaged["nixpkgs_pr"].tolist() == ["https://github.com/NixOS/nixpkgs/pull/1"] def test_github_pr_lookup_queries_vuln_and_version_matches(): queries = [] lookup = GitHubPrLookup( session=SimpleNamespace(get=None), sleeper=lambda _delay: None ) def fake_query(query_str, delay=60): queries.append((query_str, delay)) return { "total_count": 1, "items": [ {"html_url": f"https://github.com/NixOS/nixpkgs/pull/{len(queries)}"} ], } lookup.query = fake_query row = SimpleNamespace( vuln_id="CVE-2024-1", classify="fix_update_to_version_nixpkgs", version_nixpkgs="1.2.3", version_upstream="", package="openssl", whitelist=False, ) prs = lookup.find_nixpkgs_prs(row) assert queries == [ ("repo:NixOS/nixpkgs is:pr is:unmerged is:open CVE-2024-1", 60), ("repo:NixOS/nixpkgs is:pr is:merged CVE-2024-1", 60), ( "repo:NixOS/nixpkgs is:pr is:unmerged is:open openssl in:title 1.2.3 in:title", 60, ), ("repo:NixOS/nixpkgs is:pr is:merged openssl in:title 1.2.3 in:title", 60), ] assert prs == ( "https://github.com/NixOS/nixpkgs/pull/1 \n" "https://github.com/NixOS/nixpkgs/pull/2 \n" "https://github.com/NixOS/nixpkgs/pull/3 \n" "https://github.com/NixOS/nixpkgs/pull/4" ) def test_query_repology_versions_prefers_exact_version_match(): adapter = FakeAdapter() lookup = RepologyVulnerabilityLookup(adapter=adapter, cve_query=lambda *_args: None) df_vuln_pkgs = pd.DataFrame( [ { "vuln_id": "CVE-2024-2", "url": "https://nvd.nist.gov/vuln/detail/CVE-2024-2", "package": "libtiff", "severity": "5.0", "version": "4.5.0", "sortcol": "2024A0000000002", "count": 1, } ] ) result = lookup.query_repology_versions(df_vuln_pkgs) assert len(adapter.queries) == 1 assert result.to_dict("records") == [ { "vuln_id": "CVE-2024-2", "url": "https://nvd.nist.gov/vuln/detail/CVE-2024-2", "package": "libtiff", "severity": "5.0", "version_local": "4.5.0", "version_nixpkgs": "4.5.0", "version_upstream": "4.5.1", "package_repology": "tiff", "sortcol": "2024A0000000002", } ] def test_query_repology_rejects_unknown_match_type(): lookup = RepologyVulnerabilityLookup( adapter=FakeAdapter(), cve_query=lambda *_args: None, ) with pytest.raises(ValueError, match="Unknown match_type: 'bad'"): lookup.query_repology("openssl", match_type="bad") ================================================ FILE: tests/test_whitelist.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Offline tests for whitelist handling.""" from common.df import df_from_csv_file from tests.testpaths import RESOURCES_DIR from tests.testutils import df_difference, df_to_string from vulnxscan.whitelist import df_apply_whitelist, load_whitelist def test_whitelist(): """Test applying whitelist to vulnerability csv file.""" whitelist_csv = RESOURCES_DIR / "whitelist.csv" assert whitelist_csv.exists() vulns_csv = RESOURCES_DIR / "vulns.csv" assert vulns_csv.exists() df_whitelist = load_whitelist(whitelist_csv) assert df_whitelist is not None df_vulns = df_from_csv_file(vulns_csv) assert df_vulns is not None df_vuln_id_copy = df_vulns.copy()[["vuln_id", "package"]] df_apply_whitelist(df_whitelist, df_vuln_id_copy) df_diff = df_difference(df_vulns.astype(str), df_vuln_id_copy.astype(str)) assert df_diff.empty, df_to_string(df_diff) ================================================ FILE: tests/testpaths.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Shared paths for the test suite.""" from pathlib import Path TESTS_DIR = Path(__file__).resolve().parent RESOURCES_DIR = TESTS_DIR / "resources" REPOROOT = TESTS_DIR.parent SRCDIR = REPOROOT / "src" COMPARE_DEPS = TESTS_DIR / "compare_deps.py" COMPARE_SBOMS = TESTS_DIR / "compare_sboms.py" SAMPLE_CDX_SBOM = RESOURCES_DIR / "sample_cdx_sbom.json" SBOMNIX = SRCDIR / "sbomnix" / "main.py" NIXGRAPH = SRCDIR / "nixgraph" / "main.py" NIXMETA = SRCDIR / "nixmeta" / "main.py" PROVENANCE = SRCDIR / "provenance" / "main.py" NIX_OUTDATED = SRCDIR / "nixupdate" / "nix_outdated.py" VULNXSCAN = SRCDIR / "vulnxscan" / "vulnxscan_cli.py" REPOLOGY_CLI = SRCDIR / "repology" / "repology_cli.py" ================================================ FILE: tests/testutils.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Shared helper utilities for the test suite.""" import json from pathlib import Path from urllib.parse import urldefrag, urlparse import jsonschema import referencing import referencing.retrieval LOCAL_SCHEMA_ALIASES = { "spdx.schema.json": "spdx.schema.json", "http://cyclonedx.org/schema/spdx.schema.json": "spdx.schema.json", "jsf-0.82.schema.json": "jsf-0.82.schema.json", "http://cyclonedx.org/schema/jsf-0.82.schema.json": "jsf-0.82.schema.json", } def resolve_local_schema_path(uri, schema_dir): """Resolve a schema reference to a local file under ``schema_dir``.""" schema_dir = Path(schema_dir) base_uri, _fragment = urldefrag(uri) if base_uri in LOCAL_SCHEMA_ALIASES: filename = LOCAL_SCHEMA_ALIASES[base_uri] else: parsed = urlparse(base_uri) filename = Path(parsed.path or base_uri).name filename = LOCAL_SCHEMA_ALIASES.get(filename, filename) path = schema_dir / filename if not path.exists(): raise FileNotFoundError(f"Local schema not found for '{uri}': {path}") return path def create_local_schema_retriever(schema_dir): """Create a cached local schema retriever for ``referencing``.""" @referencing.retrieval.to_cached_resource() def _retrieve(uri): return resolve_local_schema_path(uri, schema_dir).read_text(encoding="utf-8") return _retrieve def validate_json(file_path, schema_path): """Validate json file matches schema.""" schema_path = Path(schema_path) with ( open(file_path, encoding="utf-8") as json_file, open( schema_path, encoding="utf-8", ) as schema_file, ): json_obj = json.load(json_file) schema_obj = json.load(schema_file) registry = referencing.Registry( retrieve=create_local_schema_retriever(schema_path.parent) ) jsonschema.validate(json_obj, schema_obj, registry=registry) def df_to_string(df): """Convert dataframe to string.""" return ( "\n" + df.to_string(max_rows=None, max_cols=None, index=False, justify="left") + "\n" ) def df_difference(df_left, df_right): """Return dataframe that represents diff of two dataframes.""" df_right = df_right.astype(df_left.dtypes.to_dict()) df = df_left.merge( df_right, how="outer", indicator=True, ) df = df[df["_merge"] != "both"] cols = df.columns.tolist() cols = cols[-1:] + cols[:-1] return df[cols] ================================================ FILE: tests/vulnix_test_support.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII) # # SPDX-License-Identifier: Apache-2.0 """Helpers for choosing the real or dummy vulnix binary in tests.""" from __future__ import annotations import fcntl import os import shutil import stat import subprocess from dataclasses import dataclass from pathlib import Path _WRAPPER_BASENAME = "vulnix" @dataclass(frozen=True) class VulnixTestConfig: """Resolved vulnix test execution configuration.""" wrapper_dir: Path effective_mode: str effective_cache_dir: Path | None real_vulnix: str | None def default_vulnix_cache_dir(env: dict[str, str] | None = None) -> Path: """Return the real vulnix cache dir for this environment.""" env = os.environ if env is None else env cache_dir = env.get("SBOMNIX_TEST_VULNIX_CACHE_DIR") if cache_dir: return Path(cache_dir).expanduser() return Path("~/.cache/vulnix").expanduser() def vulnix_cache_ready(cache_dir: Path) -> bool: """Return True when `cache_dir` already contains a usable vulnix DB.""" data_file = cache_dir / "Data.fs" return data_file.is_file() and data_file.stat().st_size > 0 def write_vulnix_wrapper(wrapper_dir: Path) -> Path: """Create the test-only vulnix wrapper and return its path.""" wrapper_dir.mkdir(parents=True, exist_ok=True) wrapper_path = wrapper_dir / _WRAPPER_BASENAME wrapper_path.write_text( """#!/bin/sh set -eu mode="${SBOMNIX_TEST_VULNIX_EFFECTIVE_MODE:?}" if [ "$mode" = "dummy" ]; then printf '[]' exit 0 fi real_vulnix="${SBOMNIX_TEST_REAL_VULNIX:-}" if [ -z "$real_vulnix" ]; then echo "SBOMNIX_TEST_REAL_VULNIX is empty while vulnix test mode is real" >&2 exit 2 fi cache_dir="${SBOMNIX_TEST_VULNIX_EFFECTIVE_CACHE_DIR:-}" if [ -n "$cache_dir" ]; then exec "$real_vulnix" --cache-dir "$cache_dir" "$@" fi exec "$real_vulnix" "$@" """, encoding="utf-8", ) wrapper_path.chmod( wrapper_path.stat().st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH ) return wrapper_path def build_vulnix_test_env( env: dict[str, str], *, config: VulnixTestConfig, ) -> dict[str, str]: """Return environment variables needed by the vulnix test wrapper.""" env = env.copy() path_entries = [str(config.wrapper_dir)] path_entries.append(env.get("PATH", os.defpath)) env["PATH"] = os.pathsep.join(path_entries) env["SBOMNIX_TEST_VULNIX_EFFECTIVE_MODE"] = config.effective_mode env["SBOMNIX_TEST_REAL_VULNIX"] = config.real_vulnix or "" if config.effective_cache_dir is not None: env["SBOMNIX_TEST_VULNIX_EFFECTIVE_CACHE_DIR"] = str(config.effective_cache_dir) else: env.pop("SBOMNIX_TEST_VULNIX_EFFECTIVE_CACHE_DIR", None) return env def configure_vulnix_for_tests( *, tmp_root: Path, effective_mode: str, cache_dir: Path, real_vulnix: str | None = None, ) -> VulnixTestConfig: """Resolve vulnix wrapper mode and materialize the wrapper script.""" if effective_mode not in {"dummy", "real"}: raise ValueError( f"invalid effective vulnix mode {effective_mode!r}; expected 'dummy' or 'real'" ) if effective_mode == "real" and real_vulnix is None: real_vulnix = shutil.which("vulnix") if effective_mode == "real" and real_vulnix is None: raise RuntimeError( "real vulnix requested, but 'vulnix' is not available in PATH" ) wrapper_dir = tmp_root / "tool-wrappers" write_vulnix_wrapper(wrapper_dir) return VulnixTestConfig( wrapper_dir=wrapper_dir, effective_mode=effective_mode, effective_cache_dir=cache_dir if effective_mode == "real" else None, real_vulnix=real_vulnix, ) def ensure_real_vulnix_cache( cache_dir: Path, *, build_root: Path, real_vulnix: str, test_derivation: Path, ) -> Path: """Warm a shared vulnix cache once for opt-in/manual real-vulnix test runs. The default test harness uses dummy vulnix and does not call this helper. """ def _run_warmup_command(cmd: list[str], *, step: str) -> None: try: subprocess.run( cmd, check=True, capture_output=True, text=True, ) except subprocess.CalledProcessError as exc: stderr = (exc.stderr or "").strip() stdout = (exc.stdout or "").strip() details = stderr or stdout or "no output captured" raise RuntimeError(f"{step} failed: {details}") from exc cache_dir.mkdir(parents=True, exist_ok=True) build_root.mkdir(parents=True, exist_ok=True) lock_path = build_root / "vulnix-cache.lock" with lock_path.open("w", encoding="utf-8") as lock_file: fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX) if vulnix_cache_ready(cache_dir): return cache_dir result_link = build_root / "result" if not result_link.exists(): _run_warmup_command( ["nix-build", test_derivation.as_posix(), "-o", result_link.as_posix()], step="nix-build for vulnix cache warm-up", ) _run_warmup_command( [ real_vulnix, "--cache-dir", cache_dir.as_posix(), result_link.as_posix(), "-C", "--json", ], step="vulnix cache warm-up scan", ) return cache_dir