Repository: tiiuae/sbomnix
Branch: main
Commit: 6fb9c6707ff3
Files: 186
Total size: 903.4 KB

Directory structure:
gitextract_rba_m7yr/

├── .envrc
├── .github/
│   ├── dependabot.yml
│   └── workflows/
│       ├── codeql.yml
│       ├── dependency-review.yml
│       ├── release_sbomnix.yml
│       ├── scorecards.yml
│       └── test_sbomnix.yml
├── .gitignore
├── .gitlint
├── LICENSES/
│   ├── Apache-2.0.txt
│   ├── BSD-3-Clause.txt
│   ├── CC-BY-3.0.txt
│   ├── CC-BY-SA-4.0.txt
│   └── MIT.txt
├── README.md
├── REUSE.toml
├── VERSION
├── default.nix
├── doc/
│   ├── nix_outdated.md
│   ├── nixgraph.md
│   ├── nixmeta.md
│   ├── provenance.md
│   ├── repology_cli.md
│   └── vulnxscan.md
├── flake.nix
├── nix/
│   ├── apps.nix
│   ├── default.nix
│   ├── formatter.nix
│   ├── git-hooks.nix
│   └── packages.nix
├── pyproject.toml
├── pyrightconfig.json
├── pytest.ini
├── scripts/
│   ├── check-fast.sh
│   ├── check-full.sh
│   ├── release-asset.sh
│   └── run-pytest-lane.sh
├── shell.nix
├── src/
│   ├── common/
│   │   ├── __init__.py
│   │   ├── cli_args.py
│   │   ├── columns.py
│   │   ├── df.py
│   │   ├── errors.py
│   │   ├── flakeref.py
│   │   ├── http.py
│   │   ├── log.py
│   │   ├── nix_utils.py
│   │   ├── package_names.py
│   │   ├── pkgmeta.py
│   │   ├── proc.py
│   │   ├── regex.py
│   │   ├── spdx.py
│   │   └── versioning.py
│   ├── nixgraph/
│   │   ├── __init__.py
│   │   ├── graph.py
│   │   ├── main.py
│   │   └── render.py
│   ├── nixmeta/
│   │   ├── __init__.py
│   │   ├── flake_metadata.py
│   │   ├── main.py
│   │   ├── metadata_json.py
│   │   └── scanner.py
│   ├── nixupdate/
│   │   ├── __init__.py
│   │   ├── nix_outdated.py
│   │   ├── nix_visualize.py
│   │   ├── pipeline.py
│   │   └── report.py
│   ├── provenance/
│   │   ├── __init__.py
│   │   ├── dependencies.py
│   │   ├── digests.py
│   │   ├── main.py
│   │   ├── nix_commands.py
│   │   ├── path_info.py
│   │   ├── schema.py
│   │   └── subjects.py
│   ├── repology/
│   │   ├── __init__.py
│   │   ├── adapter.py
│   │   ├── cves.py
│   │   ├── exceptions.py
│   │   ├── projects_parser.py
│   │   ├── repology_cli.py
│   │   ├── repology_cve.py
│   │   ├── reporting.py
│   │   ├── sbom.py
│   │   └── session.py
│   ├── sbomnix/
│   │   ├── __init__.py
│   │   ├── builder.py
│   │   ├── cdx.py
│   │   ├── cli_utils.py
│   │   ├── closure.py
│   │   ├── components.py
│   │   ├── cpe.py
│   │   ├── dependency_index.py
│   │   ├── derivation.py
│   │   ├── derivers.py
│   │   ├── dfcache.py
│   │   ├── exporters.py
│   │   ├── main.py
│   │   ├── meta.py
│   │   ├── meta_source.py
│   │   ├── runtime.py
│   │   └── vuln_enrichment.py
│   └── vulnxscan/
│       ├── __init__.py
│       ├── github_prs.py
│       ├── osv.py
│       ├── osv_client.py
│       ├── parsers.py
│       ├── repology_lookup.py
│       ├── reporting.py
│       ├── scanners.py
│       ├── triage.py
│       ├── utils.py
│       ├── vulnscan.py
│       ├── vulnxscan_cli.py
│       └── whitelist.py
└── tests/
    ├── __init__.py
    ├── compare_deps.py
    ├── compare_sboms.py
    ├── conftest.py
    ├── integration/
    │   ├── __init__.py
    │   ├── test_nixgraph_cli.py
    │   ├── test_nixmeta_cli.py
    │   ├── test_nixupdate_cli.py
    │   ├── test_provenance_cli.py
    │   ├── test_repology_cli.py
    │   ├── test_sbomnix_cli.py
    │   └── test_vulnxscan_cli.py
    ├── resources/
    │   ├── README.md
    │   ├── cdx_bom-1.3.schema.json
    │   ├── cdx_bom-1.4.schema.json
    │   ├── grype-test-db.tar.gz.license
    │   ├── jsf-0.82.schema.json
    │   ├── make_grype_test_db.py
    │   ├── nixmeta-package-set.nix
    │   ├── provenance-1.0.schema.json
    │   ├── repology/
    │   │   ├── cves_openssl.html
    │   │   ├── projects_empty.html
    │   │   └── projects_hello.html
    │   ├── sample_cdx_sbom.json
    │   ├── spdx.schema.json
    │   ├── spdx_bom-2.3.schema.json
    │   └── test-derivation-chain.nix
    ├── test_builder_runtime.py
    ├── test_buildtime_closure.py
    ├── test_cli_conventions.py
    ├── test_cli_error_boundaries.py
    ├── test_cli_smoke.py
    ├── test_common_log.py
    ├── test_common_versioning.py
    ├── test_compare_deps.py
    ├── test_components.py
    ├── test_cpe.py
    ├── test_dependency_index.py
    ├── test_derivation_hardening.py
    ├── test_flakeref_resolution.py
    ├── test_library_exceptions.py
    ├── test_nix_cli_argv.py
    ├── test_nix_outdated_pipeline.py
    ├── test_nix_target_resolution.py
    ├── test_nix_utils_parsing.py
    ├── test_nixgraph_graph.py
    ├── test_nixmeta_parsing.py
    ├── test_nixmeta_progress.py
    ├── test_nixmeta_source.py
    ├── test_nixmeta_source_export.py
    ├── test_osv_client.py
    ├── test_provenance_batching.py
    ├── test_provenance_path_info.py
    ├── test_provenance_subjects.py
    ├── test_repology_adapter.py
    ├── test_repology_cve.py
    ├── test_repology_projects_parser.py
    ├── test_repology_sbom.py
    ├── test_runtime_closure.py
    ├── test_sbom_closure.py
    ├── test_sbom_vuln_enrichment.py
    ├── test_schema_validation.py
    ├── test_store_batching.py
    ├── test_temp_sbom_generation.py
    ├── test_vulnix_test_support.py
    ├── test_vulnxscan_engine.py
    ├── test_vulnxscan_triage.py
    ├── test_whitelist.py
    ├── testpaths.py
    ├── testutils.py
    └── vulnix_test_support.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .envrc
================================================
#! /usr/bin/env bash
# SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

# try to use flake initially, fallback to non-flake use otherwise
if nix flake show &> /dev/null; then
  use flake
else
  use nix
fi


================================================
FILE: .github/dependabot.yml
================================================
version: 2
updates:
  - package-ecosystem: github-actions
    directory: /
    schedule:
      interval: daily


================================================
FILE: .github/workflows/codeql.yml
================================================
# For most projects, this workflow file will not need changing; you simply need
# to commit it to your repository.
#
# You may wish to alter this file to override the set of languages analyzed,
# or to provide custom queries or build logic.
#
# ******** NOTE ********
# We have attempted to detect the languages in your repository. Please check
# the `language` matrix defined below to confirm you have the correct set of
# supported CodeQL languages.
#
name: "CodeQL"

on:
  push:
    branches: ["main"]
  pull_request:
    # The branches below must be a subset of the branches above
    branches: ["main"]
  schedule:
    - cron: "0 0 * * 1"

permissions:
  contents: read

jobs:
  analyze:
    name: Analyze
    runs-on: ubuntu-latest
    permissions:
      actions: read
      contents: read
      security-events: write

    strategy:
      fail-fast: false
      matrix:
        language: ["python"]
        # CodeQL supports [ $supported-codeql-languages ]
        # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support

    steps:
      - name: Harden the runner (Audit all outbound calls)
        uses: step-security/harden-runner@8d3c67de8e2fe68ef647c8db1e6a09f647780f40 # v2.19.0
        with:
          egress-policy: audit

      - name: Checkout repository
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

      # Initializes the CodeQL tools for scanning.
      - name: Initialize CodeQL
        uses: github/codeql-action/init@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2
        with:
          languages: ${{ matrix.language }}
          # If you wish to specify custom queries, you can do so here or in a config file.
          # By default, queries listed here will override any specified in a config file.
          # Prefix the list here with "+" to use these queries and those in the config file.

      # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
      # If this step fails, then you should remove it and run the build manually (see below)
      - name: Autobuild
        uses: github/codeql-action/autobuild@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2

      # ℹ️ Command-line programs to run using the OS shell.
      # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun

      #   If the Autobuild fails above, remove it and uncomment the following three lines.
      #   modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.

      # - run: |
      #   echo "Run, Build Application using script"
      #   ./location_of_script_within_repo/buildscript.sh

      - name: Perform CodeQL Analysis
        uses: github/codeql-action/analyze@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2
        with:
          category: "/language:${{matrix.language}}"


================================================
FILE: .github/workflows/dependency-review.yml
================================================
# Dependency Review Action
#
# This Action will scan dependency manifest files that change as part of a Pull Request,
# surfacing known-vulnerable versions of the packages declared or updated in the PR.
# Once installed, if the workflow run is marked as required,
# PRs introducing known-vulnerable packages will be blocked from merging.
#
# Source repository: https://github.com/actions/dependency-review-action
name: 'Dependency Review'
on: [pull_request]

permissions:
  contents: read

jobs:
  dependency-review:
    runs-on: ubuntu-latest
    steps:
      - name: Harden the runner (Audit all outbound calls)
        uses: step-security/harden-runner@8d3c67de8e2fe68ef647c8db1e6a09f647780f40 # v2.19.0
        with:
          egress-policy: audit

      - name: 'Checkout Repository'
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
      - name: 'Dependency Review'
        uses: actions/dependency-review-action@2031cfc080254a8a887f58cffee85186f0e49e48 # v4.9.0


================================================
FILE: .github/workflows/release_sbomnix.yml
================================================
# SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

name: Upload Release Asset

on:
  push:
    # Run on push events where tags match v*, e.g. v1.3.0
    tags:
      - 'v*'

permissions:
  contents: read

jobs:
  build:
    name: Upload Release Asset
    runs-on: ubuntu-latest
    permissions:
      contents: write
    steps:
      - name: Harden the runner (Audit all outbound calls)
        uses: step-security/harden-runner@8d3c67de8e2fe68ef647c8db1e6a09f647780f40 # v2.19.0
        with:
          egress-policy: audit

      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
      - uses: cachix/install-nix-action@ab739621df7a23f52766f9ccc97f38da6b7af14f # v31.10.5
        with:
          nix_path: nixpkgs=channel:nixpkgs-unstable
      - name: Build release asset
        run: ./scripts/release-asset.sh
      - name: Upload release asset
        uses: svenstaro/upload-release-action@29e53e917877a24fad85510ded594ab3c9ca12de # v2
        with:
          repo_token: ${{ secrets.GITHUB_TOKEN }}
          file: build/sbom*
          tag: ${{ github.ref }}
          overwrite: true
          file_glob: true


================================================
FILE: .github/workflows/scorecards.yml
================================================
# This workflow uses actions that are not certified by GitHub. They are provided
# by a third-party and are governed by separate terms of service, privacy
# policy, and support documentation.

name: Scorecard supply-chain security
on:
  # For Branch-Protection check. Only the default branch is supported. See
  # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection
  branch_protection_rule:
  # To guarantee Maintained check is occasionally updated. See
  # https://github.com/ossf/scorecard/blob/main/docs/checks.md#maintained
  schedule:
    - cron: '20 7 * * 2'
  push:
    branches: ["main"]

# Declare default permissions as read only.
permissions: read-all

jobs:
  analysis:
    name: Scorecard analysis
    runs-on: ubuntu-latest
    permissions:
      # Needed to upload the results to code-scanning dashboard.
      security-events: write
      # Needed to publish results and get a badge (see publish_results below).
      id-token: write
      contents: read
      actions: read
      # To allow GraphQL ListCommits to work
      issues: read
      pull-requests: read
      # To detect SAST tools
      checks: read

    steps:
      - name: Harden the runner (Audit all outbound calls)
        uses: step-security/harden-runner@8d3c67de8e2fe68ef647c8db1e6a09f647780f40 # v2.19.0
        with:
          egress-policy: audit

      - name: "Checkout code"
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          persist-credentials: false

      - name: "Run analysis"
        uses: ossf/scorecard-action@4eaacf0543bb3f2c246792bd56e8cdeffafb205a # v2.4.3
        with:
          results_file: results.sarif
          results_format: sarif
          # (Optional) "write" PAT token. Uncomment the `repo_token` line below if:
          # - you want to enable the Branch-Protection check on a *public* repository, or
          # - you are installing Scorecards on a *private* repository
          # To create the PAT, follow the steps in https://github.com/ossf/scorecard-action#authentication-with-pat.
          # repo_token: ${{ secrets.SCORECARD_TOKEN }}

          # Public repositories:
          #   - Publish results to OpenSSF REST API for easy access by consumers
          #   - Allows the repository to include the Scorecard badge.
          #   - See https://github.com/ossf/scorecard-action#publishing-results.
          # For private repositories:
          #   - `publish_results` will always be set to `false`, regardless
          #     of the value entered here.
          publish_results: true

      # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF
      # format to the repository Actions tab.
      - name: "Upload artifact"
        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
        with:
          name: SARIF file
          path: results.sarif
          retention-days: 5

      # Upload the results to GitHub's code scanning dashboard.
      - name: "Upload to code-scanning"
        uses: github/codeql-action/upload-sarif@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2
        with:
          sarif_file: results.sarif


================================================
FILE: .github/workflows/test_sbomnix.yml
================================================
# SPDX-FileCopyrightText: 2022-2023 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

name: sbomnix checks

on:
  push:
    branches:
      - main
  pull_request:
    branches:
      - main
  workflow_dispatch:

concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true

permissions:
  contents: read

jobs:
  full:
    name: full lane (${{ matrix.os }})
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, macos-latest]
    runs-on: ${{ matrix.os }}
    steps:
      - name: Harden the runner (Audit all outbound calls)
        uses: step-security/harden-runner@8d3c67de8e2fe68ef647c8db1e6a09f647780f40 # v2.19.0
        with:
          egress-policy: audit

      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
      - uses: cachix/install-nix-action@ab739621df7a23f52766f9ccc97f38da6b7af14f # v31.10.5
        with:
          nix_path: nixpkgs=channel:nixpkgs-unstable
      - name: Print nix version
        run: nix --version
      - name: Run full checks
        run: ./scripts/check-full.sh


================================================
FILE: .gitignore
================================================
# SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

temp/
venv/
build/
*.egg-info/
.eggs/
__pycache__/
.coverage
.coverage.*
.hypothesis/
coverage.xml
htmlcov/
sbomnix_test_data/
result
*.py[cod]
*.sqlite
*.csv
/*.log
/*.json
!/pyrightconfig.json
/*.png
/*.jpg
/*.pdf
/*.dot
/*.svg
.idea
.direnv
.pre-commit-config.yaml


================================================
FILE: .gitlint
================================================
# SPDX-FileCopyrightText: 2025 TII (SSRC) and the Ghaf contributors
# SPDX-License-Identifier: Apache-2.0

[general]
# Ignore rules, reference them by id or name (comma-separated)
# https://jorisroovers.com/gitlint/latest/rules/builtin_rules/
ignore=body-is-missing

# Enable specific community contributed rules
# https://jorisroovers.com/gitlint/latest/rules/contrib_rules/#available-contrib-rules
contrib=contrib-body-requires-signed-off-by


================================================
FILE: LICENSES/Apache-2.0.txt
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/

TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

1. Definitions.

"License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document.

"Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License.

"Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity.

"You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License.

"Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files.

"Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types.

"Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below).

"Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof.

"Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution."

"Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work.

2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form.

3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed.

4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions:

     (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and

     (b) You must cause any modified files to carry prominent notices stating that You changed the files; and

     (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and

     (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License.

     You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License.

5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions.

6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file.

7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License.

8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages.

9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability.

END OF TERMS AND CONDITIONS

APPENDIX: How to apply the Apache License to your work.

To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!)  The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives.

Copyright [yyyy] [name of copyright owner]

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.


================================================
FILE: LICENSES/BSD-3-Clause.txt
================================================
Copyright (c) <year> <owner>. 

Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.

3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


================================================
FILE: LICENSES/CC-BY-3.0.txt
================================================
Creative Commons Attribution 3.0 Unported

 CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE LEGAL SERVICES. DISTRIBUTION OF THIS LICENSE DOES NOT CREATE AN ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES REGARDING THE INFORMATION PROVIDED, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM ITS USE.

License

THE WORK (AS DEFINED BELOW) IS PROVIDED UNDER THE TERMS OF THIS CREATIVE COMMONS PUBLIC LICENSE ("CCPL" OR "LICENSE"). THE WORK IS PROTECTED BY COPYRIGHT AND/OR OTHER APPLICABLE LAW. ANY USE OF THE WORK OTHER THAN AS AUTHORIZED UNDER THIS LICENSE OR COPYRIGHT LAW IS PROHIBITED.

BY EXERCISING ANY RIGHTS TO THE WORK PROVIDED HERE, YOU ACCEPT AND AGREE TO BE BOUND BY THE TERMS OF THIS LICENSE. TO THE EXTENT THIS LICENSE MAY BE CONSIDERED TO BE A CONTRACT, THE LICENSOR GRANTS YOU THE RIGHTS CONTAINED HERE IN CONSIDERATION OF YOUR ACCEPTANCE OF SUCH TERMS AND CONDITIONS.

1. Definitions

     a. "Adaptation" means a work based upon the Work, or upon the Work and other pre-existing works, such as a translation, adaptation, derivative work, arrangement of music or other alterations of a literary or artistic work, or phonogram or performance and includes cinematographic adaptations or any other form in which the Work may be recast, transformed, or adapted including in any form recognizably derived from the original, except that a work that constitutes a Collection will not be considered an Adaptation for the purpose of this License. For the avoidance of doubt, where the Work is a musical work, performance or phonogram, the synchronization of the Work in timed-relation with a moving image ("synching") will be considered an Adaptation for the purpose of this License.

     b. "Collection" means a collection of literary or artistic works, such as encyclopedias and anthologies, or performances, phonograms or broadcasts, or other works or subject matter other than works listed in Section 1(f) below, which, by reason of the selection and arrangement of their contents, constitute intellectual creations, in which the Work is included in its entirety in unmodified form along with one or more other contributions, each constituting separate and independent works in themselves, which together are assembled into a collective whole. A work that constitutes a Collection will not be considered an Adaptation (as defined above) for the purposes of this License.

     c. "Distribute" means to make available to the public the original and copies of the Work or Adaptation, as appropriate, through sale or other transfer of ownership.

     d. "Licensor" means the individual, individuals, entity or entities that offer(s) the Work under the terms of this License.

     e. "Original Author" means, in the case of a literary or artistic work, the individual, individuals, entity or entities who created the Work or if no individual or entity can be identified, the publisher; and in addition (i) in the case of a performance the actors, singers, musicians, dancers, and other persons who act, sing, deliver, declaim, play in, interpret or otherwise perform literary or artistic works or expressions of folklore; (ii) in the case of a phonogram the producer being the person or legal entity who first fixes the sounds of a performance or other sounds; and, (iii) in the case of broadcasts, the organization that transmits the broadcast.

     f. "Work" means the literary and/or artistic work offered under the terms of this License including without limitation any production in the literary, scientific and artistic domain, whatever may be the mode or form of its expression including digital form, such as a book, pamphlet and other writing; a lecture, address, sermon or other work of the same nature; a dramatic or dramatico-musical work; a choreographic work or entertainment in dumb show; a musical composition with or without words; a cinematographic work to which are assimilated works expressed by a process analogous to cinematography; a work of drawing, painting, architecture, sculpture, engraving or lithography; a photographic work to which are assimilated works expressed by a process analogous to photography; a work of applied art; an illustration, map, plan, sketch or three-dimensional work relative to geography, topography, architecture or science; a performance; a broadcast; a phonogram; a compilation of data to the extent it is protected as a copyrightable work; or a work performed by a variety or circus performer to the extent it is not otherwise considered a literary or artistic work.

     g. "You" means an individual or entity exercising rights under this License who has not previously violated the terms of this License with respect to the Work, or who has received express permission from the Licensor to exercise rights under this License despite a previous violation.

     h. "Publicly Perform" means to perform public recitations of the Work and to communicate to the public those public recitations, by any means or process, including by wire or wireless means or public digital performances; to make available to the public Works in such a way that members of the public may access these Works from a place and at a place individually chosen by them; to perform the Work to the public by any means or process and the communication to the public of the performances of the Work, including by public digital performance; to broadcast and rebroadcast the Work by any means including signs, sounds or images.

     i. "Reproduce" means to make copies of the Work by any means including without limitation by sound or visual recordings and the right of fixation and reproducing fixations of the Work, including storage of a protected performance or phonogram in digital form or other electronic medium.

2. Fair Dealing Rights. Nothing in this License is intended to reduce, limit, or restrict any uses free from copyright or rights arising from limitations or exceptions that are provided for in connection with the copyright protection under copyright law or other applicable laws.

3. License Grant. Subject to the terms and conditions of this License, Licensor hereby grants You a worldwide, royalty-free, non-exclusive, perpetual (for the duration of the applicable copyright) license to exercise the rights in the Work as stated below:

     a. to Reproduce the Work, to incorporate the Work into one or more Collections, and to Reproduce the Work as incorporated in the Collections;

     b. to create and Reproduce Adaptations provided that any such Adaptation, including any translation in any medium, takes reasonable steps to clearly label, demarcate or otherwise identify that changes were made to the original Work. For example, a translation could be marked "The original work was translated from English to Spanish," or a modification could indicate "The original work has been modified.";

     c. to Distribute and Publicly Perform the Work including as incorporated in Collections; and,

     d. to Distribute and Publicly Perform Adaptations.

     e. For the avoidance of doubt:

          i. Non-waivable Compulsory License Schemes. In those jurisdictions in which the right to collect royalties through any statutory or compulsory licensing scheme cannot be waived, the Licensor reserves the exclusive right to collect such royalties for any exercise by You of the rights granted under this License;

          ii. Waivable Compulsory License Schemes. In those jurisdictions in which the right to collect royalties through any statutory or compulsory licensing scheme can be waived, the Licensor waives the exclusive right to collect such royalties for any exercise by You of the rights granted under this License; and,

          iii. Voluntary License Schemes. The Licensor waives the right to collect royalties, whether individually or, in the event that the Licensor is a member of a collecting society that administers voluntary licensing schemes, via that society, from any exercise by You of the rights granted under this License.

The above rights may be exercised in all media and formats whether now known or hereafter devised. The above rights include the right to make such modifications as are technically necessary to exercise the rights in other media and formats. Subject to Section 8(f), all rights not expressly granted by Licensor are hereby reserved.

4. Restrictions. The license granted in Section 3 above is expressly made subject to and limited by the following restrictions:

     a. You may Distribute or Publicly Perform the Work only under the terms of this License. You must include a copy of, or the Uniform Resource Identifier (URI) for, this License with every copy of the Work You Distribute or Publicly Perform. You may not offer or impose any terms on the Work that restrict the terms of this License or the ability of the recipient of the Work to exercise the rights granted to that recipient under the terms of the License. You may not sublicense the Work. You must keep intact all notices that refer to this License and to the disclaimer of warranties with every copy of the Work You Distribute or Publicly Perform. When You Distribute or Publicly Perform the Work, You may not impose any effective technological measures on the Work that restrict the ability of a recipient of the Work from You to exercise the rights granted to that recipient under the terms of the License. This Section 4(a) applies to the Work as incorporated in a Collection, but this does not require the Collection apart from the Work itself to be made subject to the terms of this License. If You create a Collection, upon notice from any Licensor You must, to the extent practicable, remove from the Collection any credit as required by Section 4(b), as requested. If You create an Adaptation, upon notice from any Licensor You must, to the extent practicable, remove from the Adaptation any credit as required by Section 4(b), as requested.

     b. If You Distribute, or Publicly Perform the Work or any Adaptations or Collections, You must, unless a request has been made pursuant to Section 4(a), keep intact all copyright notices for the Work and provide, reasonable to the medium or means You are utilizing: (i) the name of the Original Author (or pseudonym, if applicable) if supplied, and/or if the Original Author and/or Licensor designate another party or parties (e.g., a sponsor institute, publishing entity, journal) for attribution ("Attribution Parties") in Licensor's copyright notice, terms of service or by other reasonable means, the name of such party or parties; (ii) the title of the Work if supplied; (iii) to the extent reasonably practicable, the URI, if any, that Licensor specifies to be associated with the Work, unless such URI does not refer to the copyright notice or licensing information for the Work; and (iv) , consistent with Section 3(b), in the case of an Adaptation, a credit identifying the use of the Work in the Adaptation (e.g., "French translation of the Work by Original Author," or "Screenplay based on original Work by Original Author"). The credit required by this Section 4 (b) may be implemented in any reasonable manner; provided, however, that in the case of a Adaptation or Collection, at a minimum such credit will appear, if a credit for all contributing authors of the Adaptation or Collection appears, then as part of these credits and in a manner at least as prominent as the credits for the other contributing authors. For the avoidance of doubt, You may only use the credit required by this Section for the purpose of attribution in the manner set out above and, by exercising Your rights under this License, You may not implicitly or explicitly assert or imply any connection with, sponsorship or endorsement by the Original Author, Licensor and/or Attribution Parties, as appropriate, of You or Your use of the Work, without the separate, express prior written permission of the Original Author, Licensor and/or Attribution Parties.

     c. Except as otherwise agreed in writing by the Licensor or as may be otherwise permitted by applicable law, if You Reproduce, Distribute or Publicly Perform the Work either by itself or as part of any Adaptations or Collections, You must not distort, mutilate, modify or take other derogatory action in relation to the Work which would be prejudicial to the Original Author's honor or reputation. Licensor agrees that in those jurisdictions (e.g. Japan), in which any exercise of the right granted in Section 3(b) of this License (the right to make Adaptations) would be deemed to be a distortion, mutilation, modification or other derogatory action prejudicial to the Original Author's honor and reputation, the Licensor will waive or not assert, as appropriate, this Section, to the fullest extent permitted by the applicable national law, to enable You to reasonably exercise Your right under Section 3(b) of this License (right to make Adaptations) but not otherwise.

5. Representations, Warranties and Disclaimer

UNLESS OTHERWISE MUTUALLY AGREED TO BY THE PARTIES IN WRITING, LICENSOR OFFERS THE WORK AS-IS AND MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE WORK, EXPRESS, IMPLIED, STATUTORY OR OTHERWISE, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF TITLE, MERCHANTIBILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, ACCURACY, OR THE PRESENCE OF ABSENCE OF ERRORS, WHETHER OR NOT DISCOVERABLE. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OF IMPLIED WARRANTIES, SO SUCH EXCLUSION MAY NOT APPLY TO YOU.

6. Limitation on Liability. EXCEPT TO THE EXTENT REQUIRED BY APPLICABLE LAW, IN NO EVENT WILL LICENSOR BE LIABLE TO YOU ON ANY LEGAL THEORY FOR ANY SPECIAL, INCIDENTAL, CONSEQUENTIAL, PUNITIVE OR EXEMPLARY DAMAGES ARISING OUT OF THIS LICENSE OR THE USE OF THE WORK, EVEN IF LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.

7. Termination

     a. This License and the rights granted hereunder will terminate automatically upon any breach by You of the terms of this License. Individuals or entities who have received Adaptations or Collections from You under this License, however, will not have their licenses terminated provided such individuals or entities remain in full compliance with those licenses. Sections 1, 2, 5, 6, 7, and 8 will survive any termination of this License.

     b. Subject to the above terms and conditions, the license granted here is perpetual (for the duration of the applicable copyright in the Work). Notwithstanding the above, Licensor reserves the right to release the Work under different license terms or to stop distributing the Work at any time; provided, however that any such election will not serve to withdraw this License (or any other license that has been, or is required to be, granted under the terms of this License), and this License will continue in full force and effect unless terminated as stated above.

8. Miscellaneous

     a. Each time You Distribute or Publicly Perform the Work or a Collection, the Licensor offers to the recipient a license to the Work on the same terms and conditions as the license granted to You under this License.

     b. Each time You Distribute or Publicly Perform an Adaptation, Licensor offers to the recipient a license to the original Work on the same terms and conditions as the license granted to You under this License.

     c. If any provision of this License is invalid or unenforceable under applicable law, it shall not affect the validity or enforceability of the remainder of the terms of this License, and without further action by the parties to this agreement, such provision shall be reformed to the minimum extent necessary to make such provision valid and enforceable.

     d. No term or provision of this License shall be deemed waived and no breach consented to unless such waiver or consent shall be in writing and signed by the party to be charged with such waiver or consent. This License constitutes the entire agreement between the parties with respect to the Work licensed here. There are no understandings, agreements or representations with respect to the Work not specified here. Licensor shall not be bound by any additional provisions that may appear in any communication from You.

     e. This License may not be modified without the mutual written agreement of the Licensor and You.

     f. The rights granted under, and the subject matter referenced, in this License were drafted utilizing the terminology of the Berne Convention for the Protection of Literary and Artistic Works (as amended on September 28, 1979), the Rome Convention of 1961, the WIPO Copyright Treaty of 1996, the WIPO Performances and Phonograms Treaty of 1996 and the Universal Copyright Convention (as revised on July 24, 1971). These rights and subject matter take effect in the relevant jurisdiction in which the License terms are sought to be enforced according to the corresponding provisions of the implementation of those treaty provisions in the applicable national law. If the standard suite of rights granted under applicable copyright law includes additional rights not granted under this License, such additional rights are deemed to be included in the License; this License is not intended to restrict the license of any rights under applicable law.

Creative Commons Notice

Creative Commons is not a party to this License, and makes no warranty whatsoever in connection with the Work. Creative Commons will not be liable to You or any party on any legal theory for any damages whatsoever, including without limitation any general, special, incidental or consequential damages arising in connection to this license. Notwithstanding the foregoing two (2) sentences, if Creative Commons has expressly identified itself as the Licensor hereunder, it shall have all rights and obligations of Licensor.

Except for the limited purpose of indicating to the public that the Work is licensed under the CCPL, Creative Commons does not authorize the use by either party of the trademark "Creative Commons" or any related trademark or logo of Creative Commons without the prior written consent of Creative Commons. Any permitted use will be in compliance with Creative Commons' then-current trademark usage guidelines, as may be published on its website or otherwise made available upon request from time to time. For the avoidance of doubt, this trademark restriction does not form part of this License.

Creative Commons may be contacted at http://creativecommons.org/.


================================================
FILE: LICENSES/CC-BY-SA-4.0.txt
================================================
Creative Commons Attribution-ShareAlike 4.0 International

 Creative Commons Corporation (“Creative Commons”) is not a law firm and does not provide legal services or legal advice. Distribution of Creative Commons public licenses does not create a lawyer-client or other relationship. Creative Commons makes its licenses and related information available on an “as-is” basis. Creative Commons gives no warranties regarding its licenses, any material licensed under their terms and conditions, or any related information. Creative Commons disclaims all liability for damages resulting from their use to the fullest extent possible.

Using Creative Commons Public Licenses

Creative Commons public licenses provide a standard set of terms and conditions that creators and other rights holders may use to share original works of authorship and other material subject to copyright and certain other rights specified in the public license below. The following considerations are for informational purposes only, are not exhaustive, and do not form part of our licenses.

Considerations for licensors: Our public licenses are intended for use by those authorized to give the public permission to use material in ways otherwise restricted by copyright and certain other rights. Our licenses are irrevocable. Licensors should read and understand the terms and conditions of the license they choose before applying it. Licensors should also secure all rights necessary before applying our licenses so that the public can reuse the material as expected. Licensors should clearly mark any material not subject to the license. This includes other CC-licensed material, or material used under an exception or limitation to copyright. More considerations for licensors.

Considerations for the public: By using one of our public licenses, a licensor grants the public permission to use the licensed material under specified terms and conditions. If the licensor’s permission is not necessary for any reason–for example, because of any applicable exception or limitation to copyright–then that use is not regulated by the license. Our licenses grant only permissions under copyright and certain other rights that a licensor has authority to grant. Use of the licensed material may still be restricted for other reasons, including because others have copyright or other rights in the material. A licensor may make special requests, such as asking that all changes be marked or described.

Although not required by our licenses, you are encouraged to respect those requests where reasonable. More considerations for the public.

Creative Commons Attribution-ShareAlike 4.0 International Public License

By exercising the Licensed Rights (defined below), You accept and agree to be bound by the terms and conditions of this Creative Commons Attribution-ShareAlike 4.0 International Public License ("Public License"). To the extent this Public License may be interpreted as a contract, You are granted the Licensed Rights in consideration of Your acceptance of these terms and conditions, and the Licensor grants You such rights in consideration of benefits the Licensor receives from making the Licensed Material available under these terms and conditions.

Section 1 – Definitions.

     a.	Adapted Material means material subject to Copyright and Similar Rights that is derived from or based upon the Licensed Material and in which the Licensed Material is translated, altered, arranged, transformed, or otherwise modified in a manner requiring permission under the Copyright and Similar Rights held by the Licensor. For purposes of this Public License, where the Licensed Material is a musical work, performance, or sound recording, Adapted Material is always produced where the Licensed Material is synched in timed relation with a moving image.

     b.	Adapter's License means the license You apply to Your Copyright and Similar Rights in Your contributions to Adapted Material in accordance with the terms and conditions of this Public License.

     c.	BY-SA Compatible License means a license listed at creativecommons.org/compatiblelicenses, approved by Creative Commons as essentially the equivalent of this Public License.

     d.	Copyright and Similar Rights means copyright and/or similar rights closely related to copyright including, without limitation, performance, broadcast, sound recording, and Sui Generis Database Rights, without regard to how the rights are labeled or categorized. For purposes of this Public License, the rights specified in Section 2(b)(1)-(2) are not Copyright and Similar Rights.

     e.	Effective Technological Measures means those measures that, in the absence of proper authority, may not be circumvented under laws fulfilling obligations under Article 11 of the WIPO Copyright Treaty adopted on December 20, 1996, and/or similar international agreements.

     f.	Exceptions and Limitations means fair use, fair dealing, and/or any other exception or limitation to Copyright and Similar Rights that applies to Your use of the Licensed Material.

     g.	License Elements means the license attributes listed in the name of a Creative Commons Public License. The License Elements of this Public License are Attribution and ShareAlike.

     h.	Licensed Material means the artistic or literary work, database, or other material to which the Licensor applied this Public License.

     i.	Licensed Rights means the rights granted to You subject to the terms and conditions of this Public License, which are limited to all Copyright and Similar Rights that apply to Your use of the Licensed Material and that the Licensor has authority to license.

     j.	Licensor means the individual(s) or entity(ies) granting rights under this Public License.

     k.	Share means to provide material to the public by any means or process that requires permission under the Licensed Rights, such as reproduction, public display, public performance, distribution, dissemination, communication, or importation, and to make material available to the public including in ways that members of the public may access the material from a place and at a time individually chosen by them.

     l.	Sui Generis Database Rights means rights other than copyright resulting from Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, as amended and/or succeeded, as well as other essentially equivalent rights anywhere in the world.

     m.	You means the individual or entity exercising the Licensed Rights under this Public License. Your has a corresponding meaning.

Section 2 – Scope.

     a.	License grant.

          1. Subject to the terms and conditions of this Public License, the Licensor hereby grants You a worldwide, royalty-free, non-sublicensable, non-exclusive, irrevocable license to exercise the Licensed Rights in the Licensed Material to:

               A. reproduce and Share the Licensed Material, in whole or in part; and

               B. produce, reproduce, and Share Adapted Material.

          2. Exceptions and Limitations. For the avoidance of doubt, where Exceptions and Limitations apply to Your use, this Public License does not apply, and You do not need to comply with its terms and conditions.

          3. Term. The term of this Public License is specified in Section 6(a).

          4. Media and formats; technical modifications allowed. The Licensor authorizes You to exercise the Licensed Rights in all media and formats whether now known or hereafter created, and to make technical modifications necessary to do so. The Licensor waives and/or agrees not to assert any right or authority to forbid You from making technical modifications necessary to exercise the Licensed Rights, including technical modifications necessary to circumvent Effective Technological Measures. For purposes of this Public License, simply making modifications authorized by this Section 2(a)(4) never produces Adapted Material.

          5. Downstream recipients.

               A. Offer from the Licensor – Licensed Material. Every recipient of the Licensed Material automatically receives an offer from the Licensor to exercise the Licensed Rights under the terms and conditions of this Public License.

               B. Additional offer from the Licensor – Adapted Material. Every recipient of Adapted Material from You automatically receives an offer from the Licensor to exercise the Licensed Rights in the Adapted Material under the conditions of the Adapter’s License You apply.

               C. No downstream restrictions. You may not offer or impose any additional or different terms or conditions on, or apply any Effective Technological Measures to, the Licensed Material if doing so restricts exercise of the Licensed Rights by any recipient of the Licensed Material.

          6. No endorsement. Nothing in this Public License constitutes or may be construed as permission to assert or imply that You are, or that Your use of the Licensed Material is, connected with, or sponsored, endorsed, or granted official status by, the Licensor or others designated to receive attribution as provided in Section 3(a)(1)(A)(i).

     b.	Other rights.

          1. Moral rights, such as the right of integrity, are not licensed under this Public License, nor are publicity, privacy, and/or other similar personality rights; however, to the extent possible, the Licensor waives and/or agrees not to assert any such rights held by the Licensor to the limited extent necessary to allow You to exercise the Licensed Rights, but not otherwise.

          2. Patent and trademark rights are not licensed under this Public License.

          3. To the extent possible, the Licensor waives any right to collect royalties from You for the exercise of the Licensed Rights, whether directly or through a collecting society under any voluntary or waivable statutory or compulsory licensing scheme. In all other cases the Licensor expressly reserves any right to collect such royalties.

Section 3 – License Conditions.

Your exercise of the Licensed Rights is expressly made subject to the following conditions.

     a.	Attribution.

          1. If You Share the Licensed Material (including in modified form), You must:

               A. retain the following if it is supplied by the Licensor with the Licensed Material:

                    i.	identification of the creator(s) of the Licensed Material and any others designated to receive attribution, in any reasonable manner requested by the Licensor (including by pseudonym if designated);

                    ii.	a copyright notice;

                    iii. a notice that refers to this Public License;

                    iv.	a notice that refers to the disclaimer of warranties;

                    v.	a URI or hyperlink to the Licensed Material to the extent reasonably practicable;

               B. indicate if You modified the Licensed Material and retain an indication of any previous modifications; and

               C. indicate the Licensed Material is licensed under this Public License, and include the text of, or the URI or hyperlink to, this Public License.

          2. You may satisfy the conditions in Section 3(a)(1) in any reasonable manner based on the medium, means, and context in which You Share the Licensed Material. For example, it may be reasonable to satisfy the conditions by providing a URI or hyperlink to a resource that includes the required information.

          3. If requested by the Licensor, You must remove any of the information required by Section 3(a)(1)(A) to the extent reasonably practicable.

     b.	ShareAlike.In addition to the conditions in Section 3(a), if You Share Adapted Material You produce, the following conditions also apply.

          1. The Adapter’s License You apply must be a Creative Commons license with the same License Elements, this version or later, or a BY-SA Compatible License.

          2. You must include the text of, or the URI or hyperlink to, the Adapter's License You apply. You may satisfy this condition in any reasonable manner based on the medium, means, and context in which You Share Adapted Material.

          3. You may not offer or impose any additional or different terms or conditions on, or apply any Effective Technological Measures to, Adapted Material that restrict exercise of the rights granted under the Adapter's License You apply.

Section 4 – Sui Generis Database Rights.

Where the Licensed Rights include Sui Generis Database Rights that apply to Your use of the Licensed Material:

     a.	for the avoidance of doubt, Section 2(a)(1) grants You the right to extract, reuse, reproduce, and Share all or a substantial portion of the contents of the database;

     b.	if You include all or a substantial portion of the database contents in a database in which You have Sui Generis Database Rights, then the database in which You have Sui Generis Database Rights (but not its individual contents) is Adapted Material, including for purposes of Section 3(b); and

     c.	You must comply with the conditions in Section 3(a) if You Share all or a substantial portion of the contents of the database.
For the avoidance of doubt, this Section 4 supplements and does not replace Your obligations under this Public License where the Licensed Rights include other Copyright and Similar Rights.

Section 5 – Disclaimer of Warranties and Limitation of Liability.

     a.	Unless otherwise separately undertaken by the Licensor, to the extent possible, the Licensor offers the Licensed Material as-is and as-available, and makes no representations or warranties of any kind concerning the Licensed Material, whether express, implied, statutory, or other. This includes, without limitation, warranties of title, merchantability, fitness for a particular purpose, non-infringement, absence of latent or other defects, accuracy, or the presence or absence of errors, whether or not known or discoverable. Where disclaimers of warranties are not allowed in full or in part, this disclaimer may not apply to You.

     b.	To the extent possible, in no event will the Licensor be liable to You on any legal theory (including, without limitation, negligence) or otherwise for any direct, special, indirect, incidental, consequential, punitive, exemplary, or other losses, costs, expenses, or damages arising out of this Public License or use of the Licensed Material, even if the Licensor has been advised of the possibility of such losses, costs, expenses, or damages. Where a limitation of liability is not allowed in full or in part, this limitation may not apply to You.

     c.	The disclaimer of warranties and limitation of liability provided above shall be interpreted in a manner that, to the extent possible, most closely approximates an absolute disclaimer and waiver of all liability.

Section 6 – Term and Termination.

     a.	This Public License applies for the term of the Copyright and Similar Rights licensed here. However, if You fail to comply with this Public License, then Your rights under this Public License terminate automatically.

     b.	Where Your right to use the Licensed Material has terminated under Section 6(a), it reinstates:

          1. automatically as of the date the violation is cured, provided it is cured within 30 days of Your discovery of the violation; or

          2. upon express reinstatement by the Licensor.

     c.	For the avoidance of doubt, this Section 6(b) does not affect any right the Licensor may have to seek remedies for Your violations of this Public License.

     d.	For the avoidance of doubt, the Licensor may also offer the Licensed Material under separate terms or conditions or stop distributing the Licensed Material at any time; however, doing so will not terminate this Public License.

     e.	Sections 1, 5, 6, 7, and 8 survive termination of this Public License.

Section 7 – Other Terms and Conditions.

     a.	The Licensor shall not be bound by any additional or different terms or conditions communicated by You unless expressly agreed.

     b.	Any arrangements, understandings, or agreements regarding the Licensed Material not stated herein are separate from and independent of the terms and conditions of this Public License.

Section 8 – Interpretation.

     a.	For the avoidance of doubt, this Public License does not, and shall not be interpreted to, reduce, limit, restrict, or impose conditions on any use of the Licensed Material that could lawfully be made without permission under this Public License.

     b.	To the extent possible, if any provision of this Public License is deemed unenforceable, it shall be automatically reformed to the minimum extent necessary to make it enforceable. If the provision cannot be reformed, it shall be severed from this Public License without affecting the enforceability of the remaining terms and conditions.

     c.	No term or condition of this Public License will be waived and no failure to comply consented to unless expressly agreed to by the Licensor.

     d.	Nothing in this Public License constitutes or may be interpreted as a limitation upon, or waiver of, any privileges and immunities that apply to the Licensor or You, including from the legal processes of any jurisdiction or authority.

Creative Commons is not a party to its public licenses. Notwithstanding, Creative Commons may elect to apply one of its public licenses to material it publishes and in those instances will be considered the “Licensor.” Except for the limited purpose of indicating that material is shared under a Creative Commons public license or as otherwise permitted by the Creative Commons policies published at creativecommons.org/policies, Creative Commons does not authorize the use of the trademark “Creative Commons” or any other trademark or logo of Creative Commons without its prior written consent including, without limitation, in connection with any unauthorized modifications to any of its public licenses or any other arrangements, understandings, or agreements concerning use of licensed material. For the avoidance of doubt, this paragraph does not form part of the public licenses.

Creative Commons may be contacted at creativecommons.org.


================================================
FILE: LICENSES/MIT.txt
================================================
MIT License

Copyright (c) <year> <copyright holders>

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


================================================
FILE: README.md
================================================
<!--
SPDX-FileCopyrightText: 2022-2023 Technology Innovation Institute (TII)

SPDX-License-Identifier: CC-BY-SA-4.0
-->

# sbomnix

This repository is home to various command line tools and Python libraries that aim to help with software supply chain challenges:
- [`sbomnix`](#generate-sbom) is a utility that generates SBOMs given a [Nix](https://nixos.org/) flake reference or store path.
- [`nixgraph`](./doc/nixgraph.md) helps query and visualize dependency graphs for [Nix](https://nixos.org/) packages.
- [`nixmeta`](./doc/nixmeta.md) summarizes nixpkgs meta-attributes from the given nixpkgs version.
- [`vulnxscan`](./doc/vulnxscan.md) is a vulnerability scanner demonstrating the usage of SBOMs in running vulnerability scans.
- [`repology_cli`](./doc/repology_cli.md) and [`repology_cve`](./doc/repology_cli.md#repology-cve-search) are command line clients to [repology.org](https://repology.org/).
- [`nix_outdated`](./doc/nix_outdated.md) is a utility that finds outdated nix dependencies for given out path, listing the outdated packages in priority order based on how many other packages depend on the given outdated package.
- [`provenance`](./doc/provenance.md) is a command line tool to generate SLSA v1.0 compliant [provenance](https://slsa.dev/spec/v1.0/provenance) attestation files in json format for any nix flake or derivation.

For an example of how to use the tooling provided in this repository to automate daily vulnerability scans for a nix flake project, see: [ghafscan](https://github.com/tiiuae/ghafscan).

The [CycloneDX](https://cyclonedx.org/) and [SPDX](https://spdx.github.io/spdx-spec/v2.3/) SBOMs for each release of sbomnix tooling is available in the [release assets](https://github.com/tiiuae/sbomnix/releases/latest).

All the tools in this repository originate from [Ghaf Framework](https://github.com/tiiuae/ghaf).

Table of Contents
=================

* [Getting Started](#getting-started)
   * [Running as Nix Flake](#running-as-nix-flake)
   * [Running from Nix Development Shell](#running-from-nix-development-shell)
* [Buildtime vs Runtime Dependencies](#buildtime-vs-runtime-dependencies)
   * [Buildtime Dependencies](#buildtime-dependencies)
   * [Runtime Dependencies](#runtime-dependencies)
* [Usage Examples](#usage-examples)
   * [Generate SBOM Based on Flake Reference](#generate-sbom-based-on-flake-reference)
   * [Generate SBOM Based on Derivation File or Out-path](#generate-sbom-based-on-derivation-file-or-out-path)
   * [Generate SBOM Including Buildtime Dependencies](#generate-sbom-including-buildtime-dependencies)
   * [Generate SBOM Based on a Store Path or Result Symlink](#generate-sbom-based-on-a-store-path-or-result-symlink)
   * [Nixpkgs Metadata Source Selection](#nixpkgs-metadata-source-selection)
   * [Visualize Package Dependencies](#visualize-package-dependencies)
* [Contribute](#contribute)
* [License](#license)
* [Acknowledgements](#acknowledgements)

## Getting Started
`sbomnix` requires the [Nix](https://nixos.org/download.html) command line
tool to be in `$PATH`. Direct, non-flake usage requires a modern `nix`
supporting `nix-command` and `--json-format 1`.

### Running as Nix Flake
`sbomnix` can be run as a [Nix flake](https://nixos.wiki/wiki/Flakes) from the `tiiuae/sbomnix` repository:
```bash
# '--' signifies the end of argument list for `nix`.
# '--help' is the first argument to `sbomnix`
$ nix run github:tiiuae/sbomnix#sbomnix -- --help
```

or from a local repository:
```bash
$ git clone https://github.com/tiiuae/sbomnix
$ cd sbomnix
$ nix run .#sbomnix -- --help
```
See the full list of supported flake targets by running `nix flake show`.

### Running from Nix Development Shell

If you have nix flakes [enabled](https://nixos.wiki/wiki/Flakes#Enable_flakes), start a development shell:
```bash
$ git clone https://github.com/tiiuae/sbomnix
$ cd sbomnix
$ nix develop
```

The devshell adds all CLI entry points (`sbomnix`, `nixgraph`, `nixmeta`, `vulnxscan`, `repology_cli`, `repology_cve`, `nix_outdated`, `provenance`) to `PATH`. They run against the local source tree, so any edits are picked up immediately without reinstalling.

All tools support a consistent verbosity flag: no flag or `--verbose=0`
shows INFO output, `-v` or `--verbose=1` enables VERBOSE progress
details, `-vv` or `--verbose=2` enables DEBUG details, and `-vvv` or
`--verbose=3` enables SPAM output. Repeated short flags are counted, so
`-v -v`, `-vv`, and `--verbose=2` are equivalent.

## Buildtime vs Runtime Dependencies
#### Buildtime Dependencies
The buildtime dependencies of a Nix package are the [closure](https://nixos.org/manual/nix/stable/glossary.html#gloss-closure) of its derivation (`.drv` file): all the store paths Nix must have available to reproduce the build, including compilers, build tools, standard libraries, and the infrastructure to bootstrap them. Even a simple hello-world C program typically pulls in over 150 packages, including gcc, stdenv, glibc, and bash. Computing the buildtime dependency closure only requires evaluating the derivation; the target does not need to be built.

For reference, below is a graph of the first two layers of buildtime dependencies of an example hello-world C program (direct dependencies and the first level of transitive dependencies): [C hello-world buildtime, depth=2](doc/img/c_hello_world_buildtime_d2.svg).

#### Runtime Dependencies
[Runtime dependencies](https://nixos.org/manual/nix/stable/command-ref/new-cli/nix3-why-depends.html#description) are a subset of buildtime dependencies. When Nix builds a package, it scans the build outputs for references to other store paths and records them. The runtime closure is the transitive set of those recorded references: the store paths the built output actually needs at runtime. Because this information is captured during the build, the target must be built before its runtime dependencies can be determined. For reference, below is the complete runtime dependency graph of the same hello-world C program:

<img src="doc/img/c_hello_world_runtime.svg" width="700">

By default, the tools in this repository work with runtime dependencies. Specifically, unless told otherwise, `sbomnix` generates an SBOM of runtime dependencies, `nixgraph` graphs runtime dependencies, and `vulnxscan` and `nix_outdated` scan runtime dependencies. Since the target must be built to determine runtime dependencies, all these tools will build (force-realise) the target as part of their invocation. All tools also accept a `--buildtime` argument to work with buildtime dependencies instead; as noted above, using `--buildtime` does not require building the target.


## Usage Examples
In the below examples, we use Nix package `wget` as an example target, referred to by flakeref `github:NixOS/nixpkgs/nixos-unstable#wget`.

#### Generate SBOM Based on Flake Reference
`sbomnix` accepts [flake references](https://nixos.org/manual/nix/stable/command-ref/new-cli/nix3-flake.html#flake-references) as targets:
```bash
$ sbomnix github:NixOS/nixpkgs?ref=nixos-unstable#wget
```

#### Generate SBOM Based on Derivation File or Out-path
Flake references are the recommended target for `sbomnix`. When the target is a flake reference, `sbomnix` can resolve the nixpkgs version used to build the package and enrich the SBOM with metadata such as descriptions, licenses, maintainers, and homepage links. When the target is a store path, there is no information about which nixpkgs version produced it, so metadata enrichment is skipped by default; see [Nixpkgs Metadata Source Selection](#nixpkgs-metadata-source-selection).

By default `sbomnix` scans the given target and generates an SBOM including the runtime dependencies.
Notice: determining the target runtime dependencies in Nix requires building the target.
```bash
# Target can be specified as a flakeref or a nix store path, e.g.:
# sbomnix .
# sbomnix github:tiiuae/sbomnix
# sbomnix nixpkgs#wget
# sbomnix /nix/store/...  (note: nixpkgs metadata not available for store path targets)
# Ref: https://nixos.org/manual/nix/stable/command-ref/new-cli/nix3-flake.html#flake-references
$ sbomnix github:NixOS/nixpkgs/nixos-unstable#wget
...
INFO     Wrote: sbom.cdx.json
INFO     Wrote: sbom.spdx.json
INFO     Wrote: sbom.csv
```
Main outputs are the SBOM json files sbom.cdx.json and sbom.spdx.json in [CycloneDX](https://cyclonedx.org/) and [SPDX](https://spdx.github.io/spdx-spec/v2.3/) formats.

#### Generate SBOM Including Buildtime Dependencies
By default `sbomnix` scans the given target for runtime dependencies. You can tell sbomnix to determine the buildtime dependencies using the `--buildtime` argument.
Below example generates SBOM including buildtime dependencies.
Notice: as opposed to runtime dependencies, determining the buildtime dependencies does not require building the target.
```bash
$ sbomnix github:NixOS/nixpkgs/nixos-unstable#wget --buildtime
```

#### Generate SBOM Based on a Store Path or Result Symlink
`sbomnix` accepts Nix store paths and result symlinks as targets:
```bash
$ sbomnix /path/to/result
```
Note: store paths carry no record of which nixpkgs version produced them, so nixpkgs metadata enrichment is skipped by default. Pass `--meta-nixpkgs` to supply a nixpkgs source explicitly, or see [Nixpkgs Metadata Source Selection](#nixpkgs-metadata-source-selection).

#### Nixpkgs Metadata Source Selection
`sbomnix` enriches packages with nixpkgs metadata, such as descriptions,
licenses, maintainers, and homepage links, when it can select a nixpkgs
source that is tied to the target.

For flakeref targets, `sbomnix` uses the target flake context. NixOS
toplevel flakerefs are handled through the selected NixOS package set, so
overlays, package overrides, nixpkgs config, and system-specific package-set
changes can be represented.

Store-path targets skip nixpkgs metadata by default; pass `--meta-nixpkgs` to
choose the source explicitly.

`--meta-nixpkgs <flakeref-or-path>` scans an explicit nixpkgs source.
`--meta-nixpkgs nix-path` scans the `nixpkgs=` entry from `NIX_PATH` as an
explicit opt-in source. `--exclude-meta` disables this enrichment and cannot be
combined with `--meta-nixpkgs`.

CycloneDX and SPDX outputs record the selected metadata source in document
metadata, including fields such as `nixpkgs:metadata_source_method`,
`nixpkgs:path`, `nixpkgs:rev`, `nixpkgs:flakeref`, `nixpkgs:version`, and
`nixpkgs:message`.

#### Visualize Package Dependencies
`sbomnix` uses structured Nix JSON to find package dependencies where
available. `nixgraph` can also be used as a stand-alone tool for visualizing
package dependencies.
Below, we show an example of visualizing package `wget` runtime dependencies:
```bash
$ nixgraph github:NixOS/nixpkgs/nixos-unstable#wget --depth=2
```

Which outputs the dependency graph as an image (with maxdepth 2):

<img src="doc/img/wget_runtime.svg" width="900">

For more examples on querying and visualizing the package dependencies, see: [nixgraph](./doc/nixgraph.md).

## Contribute
Any pull requests, questions and error reports are welcome.
To start development, we recommend using Nix flakes development shell:
```bash
$ git clone https://github.com/tiiuae/sbomnix
$ cd sbomnix/
$ nix develop
```
Before opening a pull request, run at minimum:
```bash
$ ./scripts/check-fast.sh
```
This runs the formatter, a fast flake eval, and the fast test lane.
CI runs `./scripts/check-full.sh`, which validates the flake and runs the full
test lane with coverage.

To deactivate the Nix devshell, run `exit` in your shell.
To see other Nix flake targets, run `nix flake show`.


## License
This project is licensed under the Apache-2.0 license - see the [Apache-2.0.txt](LICENSES/Apache-2.0.txt) file for details.


## Acknowledgements
Parts of the Nix store derivation loading code in `sbomnix`
([derivation.py](src/sbomnix/derivation.py) and
[derivers.py](src/sbomnix/derivers.py)) originate from
[vulnix](https://github.com/nix-community/vulnix).


================================================
FILE: REUSE.toml
================================================
# SPDX-FileCopyrightText: 2022-2025 Technology Innovation Institute (TII)
# SPDX-License-Identifier: Apache-2.0
version = 1
SPDX-PackageName = "sbomnix"
SPDX-PackageSupplier = "Technology Innovation Institute <https://tii.ae>"
SPDX-PackageDownloadLocation = "https://github.com/tiiuae/sbomnix"

[[annotations]]
SPDX-License-Identifier = "CC-BY-3.0"
SPDX-FileCopyrightText = "2022-2025 Technology Innovation Institute (TII)"
precedence = "closest"
path = [
  "doc/img/*",
]

[[annotations]]
SPDX-License-Identifier = "Apache-2.0"
SPDX-FileCopyrightText = "2022-2025 Technology Innovation Institute (TII)"
precedence = "closest"
path = [
  "**.yml",
  "**.toml",
  "flake.lock",
  "pyrightconfig.json",
  "VERSION",
  "tests/resources/**",
]


================================================
FILE: VERSION
================================================
1.7.6


================================================
FILE: default.nix
================================================
# SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII)
# SPDX-FileCopyrightText: 2020-2023 Eelco Dolstra and the flake-compat contributors
#
# SPDX-License-Identifier: MIT
# This file originates from:
# https://github.com/nix-community/flake-compat
# This file provides backward compatibility to nix < 2.4 clients
{
  system ? builtins.currentSystem,
}:
let
  lock = builtins.fromJSON (builtins.readFile ./flake.lock);

  inherit (lock.nodes.flake-compat.locked)
    owner
    repo
    rev
    narHash
    ;

  flake-compat = fetchTarball {
    url = "https://github.com/${owner}/${repo}/archive/${rev}.tar.gz";
    sha256 = narHash;
  };

  flake = import flake-compat {
    inherit system;
    src = ./.;
  };
in
flake.defaultNix


================================================
FILE: doc/nix_outdated.md
================================================
<!--
SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII)

SPDX-License-Identifier: CC-BY-SA-4.0
-->

# Getting Started
To get started, follow the [Getting Started](../README.md#getting-started) section from the main [README](../README.md).

As an example, to run the [`nix_outdated`](../src/nixupdate/nix_outdated.py) from the `tiiuae/sbomnix` repository:
```bash
# '--' signifies the end of argument list for `nix`.
# '--help' is the first argument to `nix_outdated`
$ nix run github:tiiuae/sbomnix#nix_outdated -- --help
```

## Example Target
We use Nix package `git` as an example target, referred to by flakeref `github:NixOS/nixpkgs/nixos-unstable#git`.

# nix_outdated
[`nix_outdated`](../src/nixupdate/nix_outdated.py) is a command line tool to list outdated nix dependencies for given target nix out path or flakeref. By default, the script outputs runtime dependencies for the given target that appear outdated in nixpkgs 'nix_unstable' channel - the list of output packages would potentially need a PR to update the package in nixpkgs to the package's latest upstream release version specified in the output table column 'version_upstream'. The list of output packages is in priority order based on how many other packages depend on the potentially outdated package.

Below command finds `git` runtime dependencies that would have an update in the package's upstream repository based on repology, and the latest release version is not available in nix unstable. The captured output is illustrative; exact versions and findings will differ depending on the package versions resolved at run time.

```bash
# Target can be specified as a flakeref or a nix store path, e.g.:
# nix_outdated .
# nix_outdated github:tiiuae/sbomnix
# nix_outdated nixpkgs#git
# nix_outdated /nix/store/...
# Ref: https://nixos.org/manual/nix/stable/command-ref/new-cli/nix3-flake.html#flake-references
$ nix_outdated github:NixOS/nixpkgs/nixos-unstable#git
INFO     Generating SBOM for target '/nix/store/...-git-<version>'
INFO     Dependencies that need update in nixpkgs (in priority order based on how many other packages depend on the potentially outdated package):

|  priority  | nix_package        | version_local   | version_nixpkgs   | version_upstream      |
|------------+--------------------+-----------------+-------------------+-----------------------|
|     9      | libidn2            | 2.3.2           | 2.3.2             | 2.3.4                 |
|     8      | glibc              | 2.35-224        | 2.35-224          | 2.37                  |
|     5      | perl:uri           | 5.05            | 5.05              | 5.17                  |
|     4      | perl:http-message  | 6.26            | 6.26              | 6.44                  |
|     4      | openssl            | 3.0.8           | 3.0.8             | 3.1.0                 |
|     3      | perl:html-parser   | 3.75            | 3.75              | 3.81                  |
|     3      | perl:try-tiny      | 0.30            | 0.30              | 0.31                  |
|     3      | perl:mozilla-ca    | 20200520        | 20200520          | 20221114;20221114.0.0 |
|     2      | perl:digest-hmac   | 1.03            | 1.03              | 1.04                  |
|     2      | sqlite             | 3.40.1          | 3.41.0            | 3.41.1                |
|     2      | perl:fcgi          | 0.79            | 0.79              | 0.82                  |
|     2      | perl:net-http      | 6.19            | 6.19              | 6.22                  |
|     2      | perl:io-socket-ssl | 2.068           | 2.068             | 2.081;2.81.0          |
|     2      | perl:file-listing  | 6.14            | 6.14              | 6.15                  |
|     2      | perl:http-daemon   | 6.14            | 6.14              | 6.16                  |
|     2      | perl:http-cookies  | 6.09            | 6.09              | 6.10;6.10.0           |
|     2      | perl:cgi           | 4.51            | 4.51              | 4.56                  |
|     2      | nghttp2            | 1.51.0          | 1.51.0            | 1.52.0                |
|     2      | perl:test-fatal    | 0.016           | 0.016             | 0.017;0.17.0          |
|     2      | perl:test-needs    | 0.002006        | 0.002006          | 0.002010              |
|     1      | perl:libnet        | 3.12            | 3.12              | 3.14                  |
|     1      | git                | 2.39.2          | 2.39.2            | 2.40.0                |
|     1      | gettext            | 0.21            | 0.21              | 0.21.1                |
|     1      | perl:libwww-perl   | 6.67            | 6.67              | 6.68                  |


INFO     Wrote: nix_outdated.csv
```

As an example, the first row in the above output table means that:
- `libidn2` in nix unstable is not up-to-date with what repology.org knows is the package's newest upstream version.
- `libidn2` is on the top of the table, as it has the highest priority among the listed outdated packages. The priority is based on how many other packages depend on the given outdated package. This datapoint is based on [nix-visualize](https://github.com/craigmbooth/nix-visualize). The value of the `priority` column is directly the `level` value determined by [nix-visualize](https://github.com/craigmbooth/nix-visualize). For full description of the `level` values, see nix-visualize documentation: https://github.com/craigmbooth/nix-visualize#vertical-positioning.
- `libidn2` local version is 2.3.2.
- `libidn2` newest version in nix unstable is 2.3.2 (based on repology.org).
- `libidn2` newest release version in the package's upstream repository is 2.3.4 (based on repology.org).
- `libidn2` is considered outdated, because the version string in `version_upstream` is later than the version string in `version_nixpkgs`.


================================================
FILE: doc/nixgraph.md
================================================
<!--
SPDX-FileCopyrightText: 2022-2023 Technology Innovation Institute (TII)

SPDX-License-Identifier: CC-BY-SA-4.0
-->

# nixgraph

[`nixgraph`](../src/nixgraph/main.py) is a Python library and command line utility for querying and visualizing dependency graphs for [Nix](https://nixos.org/) packages.


Table of Contents
=================
* [Getting Started](#getting-started)
* [Usage examples](#usage-examples)
   * [Example: package runtime dependencies](#example-package-runtime-dependencies)
   * [Example: depth](#example-depth)
   * [Example: colorize](#example-colorize)
   * [Example: inverse](#example-inverse)
   * [Example: package buildtime dependencies](#example-package-buildtime-dependencies)
   * [Example: output format](#example-output-format)
   * [Example: pathnames](#example-pathnames)


## Getting Started
To get started, follow the [Getting Started](../README.md#getting-started) section from the main [README](../README.md).

As an example, to run the [`nixgraph`](../src/nixgraph/main.py) from your local clone of the `tiiuae/sbomnix` repository:
```bash
# '--' signifies the end of argument list for `nix`.
# '--help' is the first argument to `nixgraph`
$ nix run .#nixgraph -- --help
```

## Usage examples
In the below examples, we use nix package `wget` as an example target, referred to by flakeref `github:NixOS/nixpkgs/nixos-unstable#wget`. The example graphs below are illustrative; the actual graph generated will reflect the dependency versions resolved at run time.

#### Example: package runtime dependencies
```bash
# Target can be specified as a flakeref or a nix store path, e.g.:
# nixgraph .
# nixgraph github:tiiuae/sbomnix
# nixgraph nixpkgs#wget
# nixgraph /nix/store/...
# Ref: https://nixos.org/manual/nix/stable/command-ref/new-cli/nix3-flake.html#flake-references
$ nixgraph github:NixOS/nixpkgs/nixos-unstable#wget

INFO     Wrote: graph.png
```
By default `nixgraph` scans the given target and generates a graph that shows the direct runtime dependencies.
The default output is a png image `graph.png`:

<img src="img/wget_r1.svg">
<br /><br />


#### Example: depth
```bash
$ nixgraph github:NixOS/nixpkgs/nixos-unstable#wget --depth=2
```

By default, when `--depth` argument is not specified, `nixgraph` shows the direct dependencies. Increasing the `--depth` makes `nixgraph` walk the dependency chain deeper. For instance, with `--depth=2`, the output graph for `wget` becomes:

<img src="img/wget_r2.svg" width="900">
<br /><br />

The value of `--depth` indicates the maximum depth between any two nodes in the resulting graph. For instance, in the above example, `libunistring-1.0` gets included with `--depth=2` because the shortest path between `wget` and `libunistring` is two hops deep (`wget --> libidn2 --> libunistring`).

#### Example: colorize
```bash
$ nixgraph github:NixOS/nixpkgs/nixos-unstable#wget --depth=2 --colorize='openssl|libidn'
```

`--colorize` allows highlighting nodes that match the specified regular expression:

<img src="img/wget_r2_col.svg" width="900">
<br /><br />


#### Example: inverse
```bash
$ nixgraph github:NixOS/nixpkgs/nixos-unstable#wget --depth=2 --inverse='glibc'
```

`--inverse` makes it possible to draw the graph backwards starting from nodes that match the specified regular expression. For instance, the above command would show all the dependency paths from `wget` that lead to `glibc`:

<img src="img/wget_r2_inv.svg">
<br /><br />

`--inverse` is especially useful when working with larger graphs.

As an example, consider the following graph for `git`:
(`nixgraph github:NixOS/nixpkgs/nixos-unstable#git --depth=3 --colorize="openssl-3|sqlite-3"`)

<img src="img/git_r2_col.svg" width="900">
<br /><br />

To find out what are all the runtime dependency paths from `git` to the highlighted nodes `openssl` or `sqlite` in the above graph, run the following command:
```bash
# --depth=100: make sure the output graph includes "long enough" dependency chains
# --inverse="openssl-3|sqlite-3": draw the graph backwards starting from nodes that
#                                 match the specified regular expression
# --colorize="openssl-3|sqlite-3": colorize the matching nodes
nixgraph github:NixOS/nixpkgs/nixos-unstable#git --depth=100 --colorize="openssl-3|sqlite-3" --inverse="openssl-3|sqlite-3"
```
The output now becomes:

<img src="img/git_r2_col_inv.svg">
<br /><br />

The output graph shows that there are three dependency paths from `git` to `openssl-3.0.7` and one dependency path that leads to `sqlite-3.39.4`.

#### Example: package buildtime dependencies
```bash
$ nixgraph github:NixOS/nixpkgs/nixos-unstable#wget --buildtime
```

Specifying `--buildtime` makes `nixgraph` visualize the buildtime dependencies instead of runtime dependencies:

<img src="img/wget_b1.svg">
<br /><br />


#### Example: output format
```bash
$ nixgraph github:NixOS/nixpkgs/nixos-unstable#wget --out="graph.dot"
```
By default `nixgraph` outputs the graph in png image `graph.png`. To change the output file name and format, use the `--out` argument. The output filename extension determines the output format. As an example, the above command would output the graph in `dot` format. For a full list of supported output formats, see: https://graphviz.org/doc/info/output.html. In addition to graphviz supported output formats, the tool supports output in csv to allow post-processing the output data.


#### Example: pathnames
```bash
$ nixgraph github:NixOS/nixpkgs/nixos-unstable#wget --depth=1 --pathnames
```

`--pathnames` argument allows adding store path to node label in the output graph:

<img src="img/wget_r1_paths.svg">
<br /><br />


================================================
FILE: doc/nixmeta.md
================================================
<!--
SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII)

SPDX-License-Identifier: CC-BY-SA-4.0
-->

# Getting Started
To get started, follow the [Getting Started](../README.md#getting-started) section from the main [README](../README.md).

As an example, to run the [`nixmeta`](../src/nixmeta/main.py) from the `tiiuae/sbomnix` repository:
```bash
# '--' signifies the end of argument list for `nix`.
# '--help' is the first argument to `nixmeta`
$ nix run github:tiiuae/sbomnix#nixmeta -- --help
```

# nixmeta
[`nixmeta`](../src/nixmeta/main.py) is a command line tool to summarize nixpkgs meta-attributes from the given nixpkgs version. The output is written to a csv file. Nixpkgs version is specified with [`flakeref`](https://nixos.org/manual/nix/stable/command-ref/new-cli/nix3-flake#flake-references). As an example, `--flakeref=github:NixOS/nixpkgs?ref=master` would make `nixmeta` output the meta-attributes from the nixpkgs version in the [master](https://github.com/NixOS/nixpkgs/tree/master) branch. Similarly, `--flakeref=github:NixOS/nixpkgs?ref=release-23.11` would output the meta-attributes from the nixpkgs version in the [release-23.11](https://github.com/NixOS/nixpkgs/tree/release-23.11) branch. Note that `--flakeref` does not necessarily have to reference `github:NixOS/nixpkgs` but any flakeref or even `NIX_ENV` environment variable can be used to specify the nixpkgs version. As an example, `--flakeref=github:tiiuae/sbomnix` would make `nixmeta` output the meta-attributes from the nixpkgs version [pinned by the sbomnix flake](https://github.com/tiiuae/sbomnix/blob/c243db5272fb01c4d97cbbb01a095ae514cd2dcb/flake.lock#L68) in its default branch.

As an example, below command outputs nixpkgs meta-attributes from the nixpkgs version pinned by flake `github:NixOS/nixpkgs?ref=master`:

```bash
$ nixmeta --flakeref=github:NixOS/nixpkgs?ref=master
INFO     Finding meta-info for nixpkgs pinned in flake: github:NixOS/nixpkgs?ref=master
INFO     Wrote: /home/foo/sbomnix-fork/nixmeta.csv
```

Output summarizes the meta-attributes of all the target nixpkgs packages enumerated by `nix-env --query --available`.
For each package, the output includes the following details:

```bash
$ head -n2 nixmeta.csv | csvlook
| name       | pname | version | meta_homepage        | meta_unfree | meta_license_short               | meta_license_spdxid                    | meta_maintainers_email |
| ---------- | ----- | ------- | -------------------- | ----------- | -------------------------------- | -------------------------------------- | ---------------------- |
| 0ad-0.0.26 | 0ad   | 0.0.26  | https://play0ad.com/ |       False | gpl2;lgpl21;mit;cc-by-sa-30;zlib | GPL-2.0;LGPL-2.1;MIT;CC-BY-SA-3.0;Zlib | nixpkgs@cvpetegem.be   |

```


================================================
FILE: doc/provenance.md
================================================
<!--
SPDX-FileCopyrightText: 2024 Technology Innovation Institute (TII)

SPDX-License-Identifier: CC-BY-SA-4.0
-->

# Getting Started

To get started, follow the [Getting Started](../README.md#getting-started) section from the main [README](../README.md).

As an example, to run the [`provenance`](../src/provenance/main.py) tool from the `tiiuae/sbomnix` repository:

```bash
# '--' signifies the end of argument list for `nix`.
# '--help' is the first argument to `provenance`
$ nix run github:tiiuae/sbomnix#provenance -- --help
```

# provenance

[`provenance`](../src/provenance/main.py) is a command line tool to generate SLSA v1.0 compliant [provenance](https://slsa.dev/spec/v1.0/provenance) attestation files in json format for any nix flake or derivation.

To generate provenance file for `nixpkgs#hello`:

```bash
provenance nixpkgs#hello
```

To generate provenance file for `curl-8.6.0` in your nix store:

```bash
provenance /nix/store/fh7vxc5xgiwl6z7vwq5c3lj84mpcs4br-curl-8.6.0-bin
```

By default the dependencies are resolved only at the top level, i.e. only direct dependencies.
To get all dependencies recursively, you can use the `--recursive` option.
Note that this will result in a very long provenance file.

The dependencies listed are the nix buildtime dependencies of the derivation.

Example recursive provenance which is saved into a file:

```bash
provenance nixpkgs#hello --recursive --out ./provenance.json
```

## Build metadata

The build metadata to be used in the provenance is supplied through environment variables.
These fields cannot be automatically derived from the nix derivation as they are build platform dependant.

Variable | Type | Explanation
--- | --- | ---
PROVENANCE_BUILD_TYPE | str | Corresponds to SLSA [buildDefinition.buildType](https://slsa.dev/spec/v1.0/provenance#builddefinition)
PROVENANCE_BUILDER_ID | str | Corresponds to SLSA [runDetails.builder.id](https://slsa.dev/spec/v1.0/provenance#builder)
PROVENANCE_INVOCATION_ID | str/int | Corresponds to SLSA [buildMetadata.invocationId](https://slsa.dev/spec/v1.0/provenance#buildmetadata)
PROVENANCE_TIMESTAMP_BEGIN | int (unix timestamp) | Is parsed into SLSA [buildMetadata.startedOn](https://slsa.dev/spec/v1.0/provenance#buildmetadata)
PROVENANCE_TIMESTAMP_FINISHED | int (unix timestamp) | Is parsed into SLSA [buildMetadata.finishedOn](https://slsa.dev/spec/v1.0/provenance#buildmetadata)
PROVENANCE_EXTERNAL_PARAMS | json | Corresponds to SLSA [buildDefinition.externalParameters](https://slsa.dev/spec/v1.0/provenance#builddefinition)
PROVENANCE_INTERNAL_PARAMS | json | Corresponds to SLSA [buildDefinition.internalParameters](https://slsa.dev/spec/v1.0/provenance#builddefinition)
PROVENANCE_OUTPUT_FILE | path | Has the same function as the `--out` argument.

Example usage in a simplified build script:

```bash
target="nixpkgs#hello"

PROVENANCE_TIMESTAMP_BEGIN="$(date +%s)"

nix build $target

PROVENANCE_TIMESTAMP_FINISHED="$(date +%s)"

PROVENANCE_EXTERNAL_PARAMS="$(jq -n --arg target "$target" '$ARGS.named')"
PROVENANCE_INTERNAL_PARAMS="$(jq -n --arg nixVersion "$(nix --version)" '$ARGS.named')"

export PROVENANCE_TIMESTAMP_BEGIN
export PROVENANCE_TIMESTAMP_FINISHED
export PROVENANCE_EXTERNAL_PARAMS
export PROVENANCE_INTERNAL_PARAMS

provenance $target --out ./provenance.json
```


================================================
FILE: doc/repology_cli.md
================================================
<!--
SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII)

SPDX-License-Identifier: CC-BY-SA-4.0
-->

# repology_cli

[`repology_cli`](../src/repology/repology_cli.py) is a command line interface to [repology.org](https://repology.org/). It supports querying package information via package search terms in the same manner as https://repology.org/projects/?search. In addition, it supports querying package information from all packages in a CycloneDX SBOM and printing out some simple statistics based on the input.


Table of Contents
=================

* [Getting Started](#getting-started)
* [Usage Examples](#usage-examples)
   * [Search by Package Name Exact Match](#search-by-package-name-exact-match)
   * [Search by Package Name Search Term](#search-by-package-name-search-term)
   * [Search by Package Names in SBOM](#search-by-package-names-in-sbom)
   * [Statistics: SBOM Packages](#statistics-sbom-packages)
   * [Repology CVE search](#repology-cve-search)

## Getting Started
To get started, follow the [Getting Started](../README.md#getting-started) section from the main [README](../README.md).

As an example, to run the [`repology_cli`](../src/repology/repology_cli.py) from your local clone of the `tiiuae/sbomnix` repository:
```bash
# '--' signifies the end of argument list for `nix`.
# '--help' is the first argument to `repology_cli`
$ nix run .#repology_cli -- --help
```

## Usage Examples

### Search by Package Name Exact Match
Following query finds package name 'firefox' versions in 'nix_unstable' repository:
```bash
$ repology_cli --pkg_exact "firefox" --repository nix_unstable

INFO     Repology package info, packages:5

| repo         | package | version               | status   | potentially_vulnerable | newest_upstream_release | repo_version_classify |
|--------------+---------+-----------------------+----------+------------------------+-------------------------+-----------------------|
| nix_unstable | firefox | 102-unwrapped-102.8.0 | legacy   |           1            | 110.0.1                 |                       |
| nix_unstable | firefox | 102.8.0               | legacy   |           1            | 110.0.1                 |                       |
| nix_unstable | firefox | 110.0.1               | newest   |           0            | 110.0.1                 |                       |
| nix_unstable | firefox | 111.0b7               | outdated |           0            | 110.0.1                 | repo_pkg_needs_update |
| nix_unstable | firefox | 111.0b8               | devel    |           0            | 110.0.1                 |                       |

For more details, see: https://repology.org/projects/?search=firefox&inrepo=nix_unstable

INFO     Wrote: repology_report.csv
```

Output table includes the datapoints available in repology.org, as stated by each column name. As an example, the first row in the above output table means:
- package information was fetched for repository 'nix_unstable'
- package name is 'firefox'
- latest 'nix_unstable' includes a version of firefox with version string '102-unwrapped-102.8.0'
- firefox '102-unwrapped-102.8.0' status is 'legacy'. The details of each classification status is available in https://repology.org/docs/about.
- firefox '102-unwrapped-102.8.0' is potentially vulnerable, meaning the package version is associated to at least one CVE. For details of which CVEs repology determined the package is associated to, see: https://repology.org/project/firefox/cves or https://repology.org/project/firefox/cves?version=102-unwrapped-102.8.0
- newest upstream release version of firefox known to repology is '110.0.1'

In addition to the above datapoints, `repology_cli` adds the column 'repo_version_classify', which simply states whether the specific package version appears updatable in the given repository. As an example, in the above output, the second last row states 'repo_pkg_needs_update' which means that it appears 'nix_unstable' should update the firefox '111.0b7' to the latest firefox upstream release version '110.0.1'.

Full list of repositories available in repology are available in https://repology.org/repositories/statistics. As an example, to repeat the earlier query for Debian 12, you would run:

```bash
$ repology_cli --pkg_exact "firefox" --repository debian_12

INFO     Repology package info, packages:1

| repo      | package   | version   | status   |  potentially_vulnerable  | newest_upstream_release   | repo_version_classify   |
|-----------+-----------+-----------+----------+--------------------------+---------------------------+-------------------------|
| debian_12 | firefox   | 102.8.0   | outdated |            1             | 110.0.1                   | repo_pkg_needs_update   |

For more details, see: https://repology.org/projects/?search=firefox&inrepo=debian_12

INFO     Wrote: repology_report.csv
```

### Search by Package Name Search Term
Following query finds 'debian_12' packages that include 'firefox' anywhere in the name string:

```bash
$ repology_cli --pkg_search "firefox" --repository debian_12

INFO     Repology package info, packages:5

| repo      | package                     | version | status   | potentially_vulnerable | newest_upstream_release | repo_version_classify |
|-----------+-----------------------------+---------+----------+------------------------+-------------------------+-----------------------|
| debian_12 | activity-aware-firefox      | 0.4.1   | unique   |           0            |                         |                       |
| debian_12 | firefox                     | 102.8.0 | outdated |           1            | 110.0.1                 | repo_pkg_needs_update |
| debian_12 | firefox-esr-mobile-config   | 3.2.0   | unique   |           0            |                         |                       |
| debian_12 | foxyproxy-firefox-extension | 7.5.1   | unique   |           0            |                         |                       |
| debian_12 | perl:firefox-marionette     | 1.35    | newest   |           0            | 1.35                    |                       |
```

Notice: using short search strings with `--pkg_search` might result a large number of matches and, thus, potentially a large number of queries to repology.org. To avoid spamming repology.org with such queries, `repology_cli` limits the number of requests sent to repology.org to at most one request per second. In addition, it caches all responses locally for two hours.

### Search by Package Names in SBOM
Following query finds 'nix_unstable' packages that match the packages in the CycloneDX sbom 'wget.runtime.sbom.cdx.json':

```bash
$ repology_cli --sbom_cdx  wget.runtime.sbom.cdx.json --repository nix_unstable

INFO     Repology package info, packages:9

| repo         | package      | version  | status   | potentially_vulnerable | newest_upstream_release | version_sbom | sbom_version_classify | repo_version_classify |
|--------------+--------------+----------+----------+------------------------+-------------------------+--------------+-----------------------+-----------------------|
| nix_unstable | glibc        | 2.35-224 | outdated |           0            | 2.37                    | 2.35-224     | sbom_pkg_needs_update | repo_pkg_needs_update |
| nix_unstable | libidn2      | 2.3.2    | outdated |           0            | 2.3.4                   | 2.3.2        | sbom_pkg_needs_update | repo_pkg_needs_update |
| nix_unstable | libunistring | 1.0      | outdated |           0            | 1.1                     | 1.0          | sbom_pkg_needs_update | repo_pkg_needs_update |
| nix_unstable | openssl      | 1.1.1t   | legacy   |           0            | 3.0.8                   | 3.0.8        |                       |                       |
| nix_unstable | openssl      | 3.0.8    | newest   |           0            | 3.0.8                   | 3.0.8        |                       |                       |
| nix_unstable | pcre         | 8.45     | newest   |           0            | 8.45                    | 8.45         |                       |                       |
| nix_unstable | wget         | 1.21.3   | legacy   |           0            | 2.0.1                   | 1.21.3       |                       |                       |
| nix_unstable | wget         | 2.0.1    | newest   |           0            | 2.0.1                   | 1.21.3       | sbom_pkg_needs_update |                       |
| nix_unstable | zlib         | 1.2.13   | newest   |           0            | 1.2.13                  | 1.2.13       |                       |                       |
```

Output includes package details from the packages in the given SBOM that were also found in repology.org. In addition to the datapoints covered in section [Search by Package Name Exact Match](#search-by-package-name-exact-match), `repology_cli` adds the column 'sbom_version_classify' which states whether the package version in SBOM appears outdated. As an example, in the above output, package 'wget' version in sbom is '1.21.3'. Column 'sbom_version_classify' states 'sbom_pkg_needs_update' because 'nix_unstable' would have an update to the 'wget' package to version '2.0.1'.

### Statistics: SBOM Packages
Following is the same query as above, but adds the command-line argument `--stats` to print out some simple statistics that might help explain the results.

```bash
$ repology_cli --sbom_cdx  wget.runtime.sbom.cdx.json --repository nix_unstable --stats
INFO     Repology package info, packages:9

| repo         | package      | version   | status   |  potentially_vulnerable  | newest_upstream_release   | version_sbom   | sbom_version_classify   | repo_version_classify   |
|--------------+--------------+-----------+----------+--------------------------+---------------------------+----------------+-------------------------+-------------------------|
| nix_unstable | glibc        | 2.35-224  | outdated |            0             | 2.37                      | 2.35-224       | sbom_pkg_needs_update   | repo_pkg_needs_update   |
| nix_unstable | libidn2      | 2.3.2     | outdated |            0             | 2.3.4                     | 2.3.2          | sbom_pkg_needs_update   | repo_pkg_needs_update   |
| nix_unstable | libunistring | 1.0       | outdated |            0             | 1.1                       | 1.0            | sbom_pkg_needs_update   | repo_pkg_needs_update   |
| nix_unstable | openssl      | 1.1.1t    | legacy   |            0             | 3.0.8                     | 3.0.8          |                         |                         |
| nix_unstable | openssl      | 3.0.8     | newest   |            0             | 3.0.8                     | 3.0.8          |                         |                         |
| nix_unstable | pcre         | 8.45      | newest   |            0             | 8.45                      | 8.45           |                         |                         |
| nix_unstable | wget         | 1.21.3    | legacy   |            0             | 2.0.1                     | 1.21.3         |                         |                         |
| nix_unstable | wget         | 2.0.1     | newest   |            0             | 2.0.1                     | 1.21.3         | sbom_pkg_needs_update   |                         |
| nix_unstable | zlib         | 1.2.13    | newest   |            0             | 1.2.13                    | 1.2.13         |                         |                         |

For more details, see: https://repology.org/projects/

INFO
	Repology package statistics:
	 (see the status descriptions in: https://repology.org/docs/about)
	   Unique compared packages: 7 (100%)	(status in: ['newest', 'devel', 'unique', 'outdated'])
	    ==> newest: 4 (57%)
	    ==> outdated: 3 (43%)
	    ==> devel or unique: 0 (0%)
	    ==> potentially vulnerable: 0 (0%)

INFO
	Repology SBOM package statistics:
	  Unique packages: 10 (100%)
	   ==> sbom packages in repology: 9 (90%)
	   ==> sbom packages not in repology: 1 (10%)
	        - IGNORED (sbom component is not a package in repology): 0
	        - NO_VERSION (sbom component is missing the version number): 0
	        - NOT_FOUND (sbom component was not found in repology): 1

INFO     Wrote: repology_report.csv
```
Section 'Repology package statistics' in the console output indicates that:
- There were seven packages whose status was one of `['newest', 'devel', 'unique', 'outdated']`. These are the package statuses `repology_cli` considers in the statistics output.
- Four out of the total of seven packages had the status 'newest'. This number indicates how many packages are up-to-date with its known latest release version in upstream.
- Three out of seven packages have the status 'outdated'. This number indicates how many packages are not up-to-date with its known latest upstream release version in 'nix_unstable' repository.
- There were no devel or unique packages. 'devel' packages indicate latest development or unstable package versions, whereas, 'unique' packages are only present in a single repository family, meaning there are no other sources for repology.org to compare them against.
- There were no packages with known vulnerabilities associated to them.

Section 'Repology SBOM package statistics' in the console output indicates that:
- The baseline for SBOM package comparison is ten unique packages. This number includes the unique components in the cdx SBOM (as identified by the component name and version), as well as other current package versions in 'nix_unstable' known to repology.
- Nine component names in the SBOM can be matched with package names in repology.
- One package was not included to the comparison by `repology_cli`. The reason is 'NOT_FOUND', meaning the package was not found in repology.org. Other possible reasons for `repology_cli` to skip SBOM packages are IGNORED and NO_VERSION. IGNORED means the sbom component name indicates the component is not a package in repology.org. Typical examples of IGNORED packages would be archives (.tar.gz) or patches (.patch). NO_VERSION means the sbom component was missing the version information. Typically, such packages are service files, scripts, or configuration files that are not considered as packages in repology.org but can be included as separate components in the SBOM.

In addition to the console output `repology_cli` outputs the full data set in csv file. As an example, you could query the `repology_report.csv` for more details of the skipped packages:

```bash

$ csvsql --query "select * from repology_report where status == 'NOT_FOUND'" repology_report.csv | csvlook

| repo         | package            | version | status    |       | version_sbom |
| ------------ | ------------------ | ------- | --------- |  ...  | ------------ |
| nix_unstable | util-linux-minimal | 2.38.1  | NOT_FOUND |       | 2.38.1       |
```

Above, we can see the package 'util-linux-minimal' which is one of the components in the example sbom 'wget.runtime.sbom.cdx.json', is not available (with that exact same name) in repology.org.

### Repology CVE search
Following query shows an example of using the [`repology_cve`](../src/repology/repology_cve.py) client to query CVEs known to repology.org that impact package `openssl` version `3.1.1`.

```bash
$ repology_cve openssl 3.1.1

INFO     Repology affected CVE(s)

| package   | version   | cve           |
|-----------+-----------+---------------|
| openssl   | 3.1.1     | CVE-2023-2975 |
| openssl   | 3.1.1     | CVE-2023-3446 |
| openssl   | 3.1.1     | CVE-2023-3817 |
| openssl   | 3.1.1     | CVE-2023-4807 |
| openssl   | 3.1.1     | CVE-2023-5363 |
| openssl   | 3.1.1     | CVE-2023-5678 |

INFO     Wrote: repology_cves.csv
```


================================================
FILE: doc/vulnxscan.md
================================================
<!--
SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII)

SPDX-License-Identifier: CC-BY-SA-4.0
-->

# vulnxscan

[`vulnxscan`](../src/vulnxscan/vulnxscan_cli.py) is a command line utility that demonstrates running vulnerability scans using SBOM as input. It mainly targets nix packages, however, it can be used with any other targets too as long as the target is expressed as valid CycloneDX SBOM.

Table of Contents
=================
* [Getting Started](#getting-started)
* [Example Target](#example-target)
* [Supported Scanners](#supported-scanners)
   * [Nix and OSV Vulnerability Database](#nix-and-osv-vulnerability-database)
   * [Nix and Grype](#nix-and-grype)
   * [Vulnix](#vulnix)
* [Vulnxscan Usage Examples](#vulnxscan-usage-examples)
   * [Find Vulnerabilities Impacting Runtime Dependencies](#find-vulnerabilities-impacting-runtime-dependencies)
   * [Whitelisting Vulnerabilities](#whitelisting-vulnerabilities)
   * [Find Vulnerabilities Given SBOM as Input](#find-vulnerabilities-given-sbom-as-input)
   * [Find Vulnerabilities Impacting Buildtime and Runtime Dependencies](#find-vulnerabilities-impacting-buildtime-and-runtime-dependencies)
   * [Using Whitelist to Record Manual Analysis Results](#using-whitelist-to-record-manual-analysis-results)
   * [Triage to Help Manual Analysis](#triage-to-help-manual-analysis)
* [Footnotes and Future Work](#footnotes-and-future-work)

## Getting Started
To get started, follow the [Getting Started](../README.md#getting-started) section from the main [README](../README.md).

As an example, to run the `vulnxscan` from your local clone of the `tiiuae/sbomnix` repository:
```bash
# '--' signifies the end of argument list for `nix`.
# '--help' is the first argument to `vulnxscan`
$ nix run .#vulnxscan -- --help
```

## Example Target
In the below examples, we use `git` as an example target for `vulnxscan`, referred to by flakeref `github:NixOS/nixpkgs/nixos-unstable#git`.

## Supported Scanners
### Nix and OSV Vulnerability Database
[OSV](https://osv.dev/) is a vulnerability database for open-source projects [initiated by Google](https://security.googleblog.com/2021/02/launching-osv-better-vulnerability.html).

[OSV database](https://osv.dev/list?ecosystem=) currently [does not support Nix ecosystem](https://ossf.github.io/osv-schema/#affectedpackage-field), so queries that specify Nix as ecosystem would not return any matches. For this reason `vulnxscan` currently does not use Google's official [OSV-Scanner](https://security.googleblog.com/2022/12/announcing-osv-scanner-vulnerability.html) front-end, but implements its own OSV client demo in [osv.py](../src/vulnxscan/osv.py).

`osv.py` sends queries to [OSV API](https://osv.dev/docs/) without specifying the ecosystem, only the target package name and version. At the time of writing, such queries to OSV API return vulnerabilities that match the given package and version across all ecosystems. As a result, the OSV vulnerabilities for Nix ecosystem will include false positives.

Also, it is worth mentioning that OSV queries without ecosystem are undocumented in the [API specification](https://osv.dev/docs/#tag/api/operation/OSV_QueryAffected) currently.

### Nix and Grype
[Grype](https://github.com/anchore/grype) is a vulnerability scanner targeted for container images. It uses the vulnerability data from [a variety of publicly available data sources](https://github.com/anchore/grype#grypes-database). Grype also [supports input from CycloneDX SBOM](https://github.com/anchore/grype#supported-sources) which makes it possible to use Grype with SBOM input from `sbomnix`, thus, allowing Grype scans against Nix targets.

### Vulnix
[Vulnix](https://github.com/nix-community/vulnix) is a vulnerability scanner intended for Nix targets. It uses [NIST NVD](https://nvd.nist.gov/vuln) vulnerability database.

Vulnix matches vulnerabilities based on [heuristic](https://github.com/nix-community/vulnix/blob/f56f3ac857626171b95e51d98cb6874278f789d3/src/vulnix/derivation.py#L104), which might result more false positives compared to direct match. False positives due to rough heuristic are an [intended feature](https://github.com/nix-community/vulnix#whitelisting) in vulnix. On the other hand, vulnix accounts [CVE patches](https://github.com/nix-community/vulnix#cve-patch-auto-detection) applied on Nix packages when matching vulnerabilities, something currently not directly supported by other scanners.

## Vulnxscan Usage Examples

### Find Vulnerabilities Impacting Runtime Dependencies
This example shows how to use `vulnxscan` to summarize vulnerabilities impacting the given target or any of its runtime dependencies. The captured output is illustrative; exact versions and findings will differ depending on the package versions resolved at run time.

```bash
# Target can be specified as a flakeref or a nix store path, e.g.:
# vulnxscan .
# vulnxscan github:tiiuae/sbomnix
# vulnxscan nixpkgs#git
# vulnxscan /nix/store/...
# Ref: https://nixos.org/manual/nix/stable/command-ref/new-cli/nix3-flake.html#flake-references
$ vulnxscan github:NixOS/nixpkgs/nixos-unstable#git

INFO     Generating SBOM for target '/nix/store/...-git-<version>'
INFO     CVE-2023-2975 for 'openssl' is patched with: ['/nix/store/7gz0nj14469r9dlh8p0j5w5wjj3b6hw4-CVE-2023-2975.patch']
INFO     CVE-2023-2975 for 'openssl' is patched with: ['/nix/store/7gz0nj14469r9dlh8p0j5w5wjj3b6hw4-CVE-2023-2975.patch']
INFO     Console report

Potential vulnerabilities impacting version_local:

| vuln_id          | url                                               | package   | version | severity | grype | osv | vulnix | sum |
|------------------+---------------------------------------------------+-----------+---------+----------+-------+-----+--------+-----|
| CVE-2023-3817    | https://nvd.nist.gov/vuln/detail/CVE-2023-3817    | openssl   | 3.0.9   | 5.3      |   1   |  0  |   1    |  2  |
| CVE-2022-38663   | https://nvd.nist.gov/vuln/detail/CVE-2022-38663   | git       | 2.41.0  | 6.5      |   0   |  0  |   1    |  1  |
| CVE-2022-36884   | https://nvd.nist.gov/vuln/detail/CVE-2022-36884   | git       | 2.41.0  | 5.3      |   0   |  0  |   1    |  1  |
| CVE-2022-36883   | https://nvd.nist.gov/vuln/detail/CVE-2022-36883   | git       | 2.41.0  | 7.5      |   0   |  0  |   1    |  1  |
| CVE-2022-36882   | https://nvd.nist.gov/vuln/detail/CVE-2022-36882   | git       | 2.41.0  | 8.8      |   0   |  0  |   1    |  1  |
| CVE-2022-30949   | https://nvd.nist.gov/vuln/detail/CVE-2022-30949   | git       | 2.41.0  | 5.3      |   0   |  0  |   1    |  1  |
| CVE-2022-30948   | https://nvd.nist.gov/vuln/detail/CVE-2022-30948   | git       | 2.41.0  | 7.5      |   0   |  0  |   1    |  1  |
| CVE-2022-30947   | https://nvd.nist.gov/vuln/detail/CVE-2022-30947   | git       | 2.41.0  | 7.5      |   0   |  0  |   1    |  1  |
| MAL-2022-4301    | https://osv.dev/MAL-2022-4301                     | libidn2   | 2.3.4   |          |   0   |  1  |   0    |  1  |
| CVE-2021-21684   | https://nvd.nist.gov/vuln/detail/CVE-2021-21684   | git       | 2.41.0  | 6.1      |   0   |  0  |   1    |  1  |
| CVE-2020-2136    | https://nvd.nist.gov/vuln/detail/CVE-2020-2136    | git       | 2.41.0  | 5.4      |   0   |  0  |   1    |  1  |
| CVE-2019-1003010 | https://nvd.nist.gov/vuln/detail/CVE-2019-1003010 | git       | 2.41.0  | 4.3      |   0   |  0  |   1    |  1  |
| CVE-2018-1000182 | https://nvd.nist.gov/vuln/detail/CVE-2018-1000182 | git       | 2.41.0  | 6.4      |   0   |  0  |   1    |  1  |
| CVE-2018-1000110 | https://nvd.nist.gov/vuln/detail/CVE-2018-1000110 | git       | 2.41.0  | 5.3      |   0   |  0  |   1    |  1  |
| CVE-2016-2781    | https://nvd.nist.gov/vuln/detail/CVE-2016-2781    | coreutils | 9.3     | 6.5      |   1   |  0  |   0    |  1  |

INFO     Wrote: vulns.csv
```

`vulnxscan` first creates an SBOM, then feeds the SBOM (or target path) as input to different vulnerability scanners: [vulnix](https://github.com/nix-community/vulnix), [grype](https://github.com/anchore/grype), and [osv.py](../src/vulnxscan/osv.py) and creates a summary report. The summary report lists the newest vulnerabilities on top, with the `sum` column indicating how many scanners agreed with the exact same finding. In addition to the console output, `vulnxscan` writes the report to csv-file `vulns.csv` to allow easier post-processing of the output.

It is worth mentioning that `vulnxscan` filters out vulnerabilities that it detects are patched, as printed out in the console output on lines like '`CVE-2023-2975 for 'openssl' is patched with: ['/nix/store/7gz0nj14469r9dlh8p0j5w5wjj3b6hw4-CVE-2023-2975.patch']`'.
This patch auto-detection works in the similar way as the [patch auto-detection on vulnix](https://github.com/nix-community/vulnix#cve-patch-auto-detection), that is, it is based on detecting vulnerability identifiers from the patch filenames.


### Whitelisting Vulnerabilities
`vulnxscan` supports whitelisting vulnerabilities to exclude false positives, unfixable issues, or vulnerabilities known to be addressed. Whitelist is a csv file that contains rules for the vulnerabilities to be excluded from the vulnxscan console report. Consider the following example whitelist:

```
$ csvlook whitelist.csv

| vuln_id        | package   | comment                                                                 |
| -------------- | --------- | ----------------------------------------------------------------------- |
| MAL-2022-4301  |           | Incorrect package: Issue refers npm libidn2, not libidn2.               |
| CVE-2016-2781  | coreutils | NVD data issue: CPE entry does not correctly state the version numbers. |
| CVE-20.*       | git       | Incorrect package: Impacts Jenkins git plugin, not git.                 |
```

`vuln_id` and `comment` are mandatory columns. `vuln_id` specifies a regular expression that will be used to match the vulnerability identification (`vuln_id`) against that of the `vulnxscan` output. Vulnerabilities that match the regular expression are excluded from the `vulnxscan` console output. If the whitelist includes a `package` column, in addition to matching `vuln_id`, a strict match is required against the `package` field in `vulnxscan` output.

In case many rules match a vulnerability, rules on top of the whitelist are given higher priority.

To be able to verify which vulnerabilities are whitelisted, `vulnxscan` csv output `vulns.csv` includes both whitelisted and non-whitelisted vulnerabilities implied with boolean column `whitelist`. `vulns.csv` also includes the `comment` section from the whitelist to be able to verify the reason for whitelisting each vulnerability. Below example shows applying the above example whitelist against the `git` vulnxscan output from the earlier example.

```bash
# Given the whitelist.csv contents:
$ cat whitelist.csv
"vuln_id","package","comment"
"MAL-2022-4301",,"Incorrect package: Issue refers npm libidn2, not libidn2."
"CVE-2016-2781","coreutils","NVD data issue: CPE entry does not correctly state the version numbers."
"CVE-20.* ","git","Incorrect package: Impacts Jenkins git plugin, not git."

# Apply the whitelist to git vulnxscan output
$ vulnxscan github:NixOS/nixpkgs/nixos-unstable#git --whitelist=whitelist.csv

INFO     Generating SBOM for target '/nix/store/...-git-<version>'
INFO     CVE-2023-2975 for 'openssl' is patched with: ['/nix/store/7gz0nj14469r9dlh8p0j5w5wjj3b6hw4-CVE-2023-2975.patch']
INFO     CVE-2023-2975 for 'openssl' is patched with: ['/nix/store/7gz0nj14469r9dlh8p0j5w5wjj3b6hw4-CVE-2023-2975.patch']
INFO     Console report

Potential vulnerabilities impacting version_local:

# Note: the console output now includes only non-whitelisted entries:

| vuln_id       | url                                            | package   | version | severity | grype | osv | vulnix | sum |
|---------------+------------------------------------------------+-----------+---------+----------+-------+-----+--------+-----|
| CVE-2023-3817 | https://nvd.nist.gov/vuln/detail/CVE-2023-3817 | openssl   | 3.0.9   |   5.3    |   1   |  0  |   1    |  2  |

INFO     Wrote: vulns.csv

# In addition to the console report, vulnxscan writes a detailed report in a csv file,
# by default 'vulns.csv', which includes the full details also from the whitelisted vulnerabilities:
$ csvlook vulns.csv

| vuln_id          | url                                               | package   | version | severity | grype |   osv | vulnix | sum | sortcol         | whitelist | whitelist_comment                                                       |
| ---------------- | ------------------------------------------------- | --------- | ------- | -------- | ----- | ----- | ------ | --- | --------------- | --------- | ----------------------------------------------------------------------- |
| CVE-2023-3817    | https://nvd.nist.gov/vuln/detail/CVE-2023-3817    | openssl   | 3.0.9   |      5.3 |  True | False |   True |   2 | 2023A0000003817 |     False |                                                                         |
| CVE-2022-38663   | https://nvd.nist.gov/vuln/detail/CVE-2022-38663   | git       | 2.41.0  |      6.5 | False | False |   True |   1 | 2022A0000038663 |      True | Incorrect package: Impacts Jenkins git plugin, not git.                 |
| CVE-2022-36884   | https://nvd.nist.gov/vuln/detail/CVE-2022-36884   | git       | 2.41.0  |      5.3 | False | False |   True |   1 | 2022A0000036884 |      True | Incorrect package: Impacts Jenkins git plugin, not git.                 |
| CVE-2022-36883   | https://nvd.nist.gov/vuln/detail/CVE-2022-36883   | git       | 2.41.0  |      7.5 | False | False |   True |   1 | 2022A0000036883 |      True | Incorrect package: Impacts Jenkins git plugin, not git.                 |
| CVE-2022-36882   | https://nvd.nist.gov/vuln/detail/CVE-2022-36882   | git       | 2.41.0  |      8.8 | False | False |   True |   1 | 2022A0000036882 |      True | Incorrect package: Impacts Jenkins git plugin, not git.                 |
| CVE-2022-30949   | https://nvd.nist.gov/vuln/detail/CVE-2022-30949   | git       | 2.41.0  |      5.3 | False | False |   True |   1 | 2022A0000030949 |      True | Incorrect package: Impacts Jenkins git plugin, not git.                 |
| CVE-2022-30948   | https://nvd.nist.gov/vuln/detail/CVE-2022-30948   | git       | 2.41.0  |      7.5 | False | False |   True |   1 | 2022A0000030948 |      True | Incorrect package: Impacts Jenkins git plugin, not git.                 |
| CVE-2022-30947   | https://nvd.nist.gov/vuln/detail/CVE-2022-30947   | git       | 2.41.0  |      7.5 | False | False |   True |   1 | 2022A0000030947 |      True | Incorrect package: Impacts Jenkins git plugin, not git.                 |
| MAL-2022-4301    | https://osv.dev/MAL-2022-4301                     | libidn2   | 2.3.4   |          | False |  True |  False |   1 | 2022A0000004301 |      True | Incorrect package: Issue refers npm libidn2, not libidn2.               |
| CVE-2021-21684   | https://nvd.nist.gov/vuln/detail/CVE-2021-21684   | git       | 2.41.0  |      6.1 | False | False |   True |   1 | 2021A0000021684 |      True | Incorrect package: Impacts Jenkins git plugin, not git.                 |
| CVE-2020-2136    | https://nvd.nist.gov/vuln/detail/CVE-2020-2136    | git       | 2.41.0  |      5.4 | False | False |   True |   1 | 2020A0000002136 |      True | Incorrect package: Impacts Jenkins git plugin, not git.                 |
| CVE-2019-1003010 | https://nvd.nist.gov/vuln/detail/CVE-2019-1003010 | git       | 2.41.0  |      4.3 | False | False |   True |   1 | 2019A0001003010 |      True | Incorrect package: Impacts Jenkins git plugin, not git.                 |
| CVE-2018-1000182 | https://nvd.nist.gov/vuln/detail/CVE-2018-1000182 | git       | 2.41.0  |      6.4 | False | False |   True |   1 | 2018A0001000182 |      True | Incorrect package: Impacts Jenkins git plugin, not git.                 |
| CVE-2018-1000110 | https://nvd.nist.gov/vuln/detail/CVE-2018-1000110 | git       | 2.41.0  |      5.3 | False | False |   True |   1 | 2018A0001000110 |      True | Incorrect package: Impacts Jenkins git plugin, not git.                 |
| CVE-2016-2781    | https://nvd.nist.gov/vuln/detail/CVE-2016-2781    | coreutils | 9.3     |      6.5 |  True | False |  False |   1 | 2016A0000002781 |      True | NVD data issue: CPE entry does not correctly state the version numbers. |
```

See ghafscan [manual_analysis.csv](https://github.com/tiiuae/ghafscan/blob/main/manual_analysis.csv) for a more complete example and usage of the vulnxscan whitelisting feature.

### Find Vulnerabilities Given SBOM as Input
This example shows how to use `vulnxscan` to summarize vulnerabilities impacting components in the given CycloneDX SBOM.

First, we use `sbomnix` to generate SBOM for the example target:
```bash
$ nix run .#sbomnix -- github:NixOS/nixpkgs/nixos-unstable#git
..
INFO     Wrote: sbom.cdx.json
```

Then, give the generated SBOM as input to `vulnxscan`:
```bash
$ vulnxscan --sbom sbom.cdx.json

INFO     Console report

Potential vulnerabilities impacting version_local:

| vuln_id       | url                                            | package   | version | severity | grype | osv | sum |
|---------------+------------------------------------------------+-----------+---------+----------+-------+-----+-----|
| CVE-2023-3817 | https://nvd.nist.gov/vuln/detail/CVE-2023-3817 | openssl   | 3.0.9   | 5.3      |   1   |  0  |  1  |
| CVE-2023-2975 | https://nvd.nist.gov/vuln/detail/CVE-2023-2975 | openssl   | 3.0.9   | 5.3      |   1   |  0  |  1  |
| MAL-2022-4301 | https://osv.dev/MAL-2022-4301                  | libidn2   | 2.3.4   |          |   0   |  1  |  1  |
| CVE-2016-2781 | https://nvd.nist.gov/vuln/detail/CVE-2016-2781 | coreutils | 9.3     | 6.5      |   1   |  0  |  1  |

INFO     Wrote: vulns.csv
```
Notice that `vulnxscan` drops the vulnix scan when the input is SBOM. This is due to the vulnix not supporting SBOM input at the time of writing.

Also notice that `vulnxscan` drops the patch auto-detection if the input is SBOM. Reason is that `vulnxscan` reads the patch information from nix derivations. Therefore, the patch information is only available when the given input is Nix store path (e.g. derivation or out-path), not SBOM.


### Find Vulnerabilities Impacting Buildtime and Runtime Dependencies
By default, `vulnxscan` scans the given target for vulnerabilities that impact its runtime-only dependencies. This example shows how to use `vulnxscan` to include also buildtime dependencies to the scan.

```bash
$ vulnxscan ./result --buildtime

# ... output not included in this snippet ...
```

### Using Whitelist to Record Manual Analysis Results
`vulnxscan` supports using whitelist csv file as a more generic record of manual analysis results, by allowing non-whitelisting rules. That is, the whitelist csv file can include a boolean `whitelist` column to indicate if the matching vulnerabilities should be whitelisted or not. The default value for `whitelist` is True, that is, if the `whitelist` column is missing or the value is empty, `vulnxscan` interprets the rule as if the `whitelist` column value would evaluate to True.

As an example, consider the following manual analysis record (i.e. 'whitelist'):

```
csvlook manual_analysis.csv

| vuln_id        | whitelist | package   | comment                                                            |
| -------------- | --------- | --------- | ------------------------------------------------------------------ |
| CVE-2022-0856  |     False | libcaca   | Not fixed upstream: https://github.com/cacalabs/libcaca/issues/65. |
| CVE-2021-32490 |     False | djvulibre | Pending merge: https://github.com/NixOS/nixpkgs/pull/246773.       |
```

The above example `manual_analysis.csv` includes two rules: one for `CVE-2022-0856` and one for `CVE-2021-32490`. For both, the `whitelist` column value is '`False`', indicating the rule is a non-whitelisting rule. This means, for both cases, we want to record the manual analysis results as detailed in the `comment` column, but we don't want to whitelist the matching vulnerabilities. Specifically, in the case of `CVE-2022-0856` we don't want to whitelist the issue since it's not fixed upstream, but we still want to record the link to the upstream PR to make it easier to follow the upstream progress. In the case of `CVE-2021-32490` we don't want to whitelist the issue since the nixpkgs PR is pending merge. In this case too, we still want to record the nixpkgs PR to allow following the progress.

See ghafscan [manual_analysis.csv](https://github.com/tiiuae/ghafscan/blob/main/manual_analysis.csv) for a more complete example and usage of non-whitelisting rules to help manual analysis.

### Triage to Help Manual Analysis
`vulnxscan` can be used to help manual analysis with `--triage` and `--nixprs` command line options.

With command line option `--triage`, `vulnxscan` queries repology.org for nix-unstable and package upstream version information, as well as the CVE impacted versions. With the additional information from repology.org, `vulnxscan` classifies each vulnerability accordingly.

Consider the following example, using [ghaf](https://github.com/tiiuae/ghaf) as target:

```bash
# Run vulnxscan:
#  --buildtime: Scan buildtime dependencies. Scanning buildtime dependencies does not
#               require building the target, which allows relatively quick scan also for
#               targets not built earlier. Notice: nix 'buildtime' dependencies are a
#               superset of runtime dependencies.
#  --whitelist: Use 'manual_analysis.csv' as a whitelist file.
#  --triage   : Help manual analysis by querying version info from repology.org.
$ vulnxscan github:tiiuae/ghaf?ref=main#packages.x86_64-linux.generic-x86_64-release --buildtime --whitelist=manual_analysis.csv --triage
INFO     Generating SBOM for target '/nix/store/...-nixos-disk-image.drv'
INFO     CVE-2023-27371 for 'libmicrohttpd' is patched with: ['/nix/store/l53sq07v6hghm7cchcjbrwyvjyjag06r-CVE-2023-27371.patch']
INFO     CVE-2023-2975 for 'openssl' is patched with: ['/nix/store/7gz0nj14469r9dlh8p0j5w5wjj3b6hw4-CVE-2023-2975.patch']
INFO     CVE-2023-2975 for 'openssl' is patched with: ['/nix/store/7gz0nj14469r9dlh8p0j5w5wjj3b6hw4-CVE-2023-2975.patch']
INFO     CVE-2023-2975 for 'openssl' is patched with: ['/nix/store/7gz0nj14469r9dlh8p0j5w5wjj3b6hw4-CVE-2023-2975.patch']
INFO     CVE-2023-2975 for 'openssl' is patched with: ['/nix/store/7gz0nj14469r9dlh8p0j5w5wjj3b6hw4-CVE-2023-2975.patch']
INFO     CVE-2023-2617 for 'opencv' is patched with: ['/nix/store/vw29nr5nrfs10vv5p3m7rpkqscwrh4sp-CVE-2023-2617.patch']
...

Potential vulnerabilities impacting version_local:

| vuln_id             | package    | severity | version_local | version_nixpkgs | version_upstream | classify                             |
|---------------------+------------+----------+---------------+-----------------+------------------+--------------------------------------|
| CVE-2023-40360      | qemu       | 5.5      | 8.0.2         | 8.1.0           | 8.1.0            | fix_update_to_version_nixpkgs        |
| CVE-2023-40359      | xterm      | 9.8      | 379           | 384             | 384              | fix_update_to_version_nixpkgs        |
| CVE-2023-39742      | giflib     | 5.5      | 5.2.1         | 5.2.1           | 5.2.1            | fix_not_available                    |
| CVE-2023-39533      | go         | 7.5      | 1.20.6        | 1.21.1          | 1.21.1           | fix_update_to_version_nixpkgs        |
| CVE-2023-38858      | faad2      | 6.5      | 2.10.1        | 2.10.1          | 2.10.1           | fix_not_available                    |
| CVE-2023-38857      | faad2      | 5.5      | 2.10.1        | 2.10.1          | 2.10.1           | fix_not_available                    |
| CVE-2023-38633      | librsvg    | 5.5      | 2.55.1        | 2.56.3          | 2.56.3           | fix_update_to_version_nixpkgs        |
| CVE-2023-37769      | pixman     | 6.5      | 0.42.2        | 0.42.2          | 0.42.2           | err_not_vulnerable_based_on_repology |
| CVE-2023-31484      | perl       | 8.1      | 5.36.0-env    | 5.38.0          | 5.38.0           | fix_update_to_version_nixpkgs        |
| CVE-2023-31484      | perl       | 8.1      | 5.36.0        | 5.38.0          | 5.38.0           | fix_update_to_version_nixpkgs        |
| CVE-2023-30571      | libarchive | 5.3      | 3.6.2         | 3.6.2           | 3.7.1            | fix_update_to_version_upstream       |
| CVE-2023-29409      | go         | 5.3      | 1.20.6        | 1.21.1          | 1.21.1           | fix_update_to_version_nixpkgs        |
| CVE-2023-29383      | shadow     | 3.3      | 4.13          | 4.13            | 4.14.0           | fix_update_to_version_upstream       |

... (output truncated) ...

INFO     Wrote: /home/hrosten/projects/sbomnix-fork/vulns.csv
INFO     Wrote: /home/hrosten/projects/sbomnix-fork/vulns.triage.csv
```

As an example, the output table states the following:
- Package `qemu` 8.0.2, which is a dependency to ghaf, is potentially vulnerable to CVE-2023-40360.
- Based on repology.org, `qemu` newest version in nix-unstable is 8.0.4. Also, based on repology.org, latest `qemu` version in the `qemu` upstream is 8.1.0.
- Since both `qemu` 8.0.2 and 8.0.4 are vulnerable to CVE-2023-40360, but the upstream version 8.1.0 is not vulnerable, `vulnxscan` classifies the issue as `fix_update_to_version_upstream`.
- Package `xterm` version 379 is potentially vulnerable to CVE-2023-40359. Latest version of `xterm` in nix-unstable is 384, which is not vulnerable to CVE-2023-40359. Therefore, `vulnxscan` classifies the issue as `fix_update_to_version_nixpkgs`.
- Package `giflib` version 5.2.1 is potentially vulnerable to CVE-2023-39742. Since there's no known fixed version available in nix-unstable or the package upstream, `vulnxscan` classifies the issue as `fix_not_available`. Notice that the classification is based only on the version numbers. Indeed, it's still possible that there's an upstream patch available in an unreleased version of `giflib` that would fix the issue.
- Package `pixman` version 0.42.2 is potentially vulnerable to CVE-2023-37769. However, based on repology.org, the vulnerability [does not impact](https://repology.org/project/pixman/cves?version=0.42.2) the given version of `pixman`. Therefore, `vulnxscan` classifies the issue as `err_not_vulnerable_based_on_repology`.

##### Nixpkgs PR Search

With command line option `--nixprs`, `vulnxscan` queries github for nixpkgs PRs that might include more information concerning possible nixpkgs fixes for the found vulnerabilities. `--nixprs` adds URLs to (at most five) PRs that appear valid for each vulnerability based on heuristic. The PR search takes significant time due to github API rate limits, which is why it is not enabled by default.

Consider the following example, using the same Ghaf target as earlier:

```bash
# Run vulnscan with --triage and --nixprs
$ vulnxscan github:tiiuae/ghaf?ref=main#packages.x86_64-linux.generic-x86_64-release --buildtime --whitelist=manual_analysis.csv --triage --nixprs
INFO     Generating SBOM for target '/nix/store/...-nixos-disk-image.drv'
...
Potential vulnerabilities impacting version_local:


| vuln_id        | package    | severity   | version_local | version_nixpkgs | version_upstream | classify                      | nixpkgs_pr                                    |
|----------------+------------+------------+---------------+-----------------+------------------+-------------------------------+-----------------------------------------------|
| CVE-2023-40360 | qemu       | 5.5        | 8.0.2         | 8.1.0           | 8.1.0            | fix_update_to_version_nixpkgs | https://github.com/NixOS/nixpkgs/pull/251154  |
| CVE-2023-40359 | xterm      | 9.8        | 379           | 384             | 384              | fix_update_to_version_nixpkgs | https://github.com/NixOS/nixpkgs/pull/244141  |
| CVE-2023-39742 | giflib     | 5.5        | 5.2.1         | 5.2.1           | 5.2.1            | fix_not_available             |                                               |
| CVE-2023-39533 | go         | 7.5        | 1.20.6        | 1.21.1          | 1.21.1           | fix_update_to_version_nixpkgs | https://github.com/NixOS/nixpkgs/pull/253738  |
| CVE-2023-38858 | faad2      | 6.5        | 2.10.1        | 2.10.1          | 2.10.1           | fix_not_available             |                                               |
| CVE-2023-38857 | faad2      | 5.5        | 2.10.1        | 2.10.1          | 2.10.1           | fix_not_available             |                                               |
| CVE-2023-38633 | librsvg    | 5.5        | 2.55.1        | 2.56.3          | 2.56.3           | fix_update_to_version_nixpkgs | https://github.com/NixOS/nixpkgs/pull/246763  |
|                |            |            |               |                 |                  |                               | https://github.com/NixOS/nixpkgs/pull/246860  |
| CVE-2023-37769 | pixman     | 6.5        | 0.42.2        | 0.42.2          | 0.42.2           | err_not_vulnerable_based_on_re|                                               |
| CVE-2023-31484 | perl       | 8.1        | 5.36.0-env    | 5.38.0          | 5.38.0           | fix_update_to_version_nixpkgs | https://github.com/NixOS/nixpkgs/pull/241848  |
|                |            |            |               |                 |                  |                               | https://github.com/NixOS/nixpkgs/pull/247547  |
| CVE-2023-31484 | perl       | 8.1        | 5.36.0        | 5.38.0          | 5.38.0           | fix_update_to_version_nixpkgs | https://github.com/NixOS/nixpkgs/pull/241848  |
|                |            |            |               |                 |                  |                               | https://github.com/NixOS/nixpkgs/pull/247547  |
| CVE-2023-30571 | libarchive | 5.3        | 3.6.2         | 3.6.2           | 3.7.1            | fix_update_to_version_upstream|                                               |
| CVE-2023-29409 | go         | 5.3        | 1.20.6        | 1.21.1          | 1.21.1           | fix_update_to_version_nixpkgs | https://github.com/NixOS/nixpkgs/pull/247034  |
|                |            |            |               |                 |                  |                               | https://github.com/NixOS/nixpkgs/pull/253738  |
| CVE-2023-29383 | shadow     | 3.3        | 4.13          | 4.13            | 4.14.0           | fix_update_to_version_upstream| https://github.com/NixOS/nixpkgs/pull/233924  |
|                |            |            |               |                 |                  |                               | https://github.com/NixOS/nixpkgs/pull/254143  |
```

`vulnxscan` option `--nixprs` adds the column `nixpkgs_pr` to the output, to help manual analysis by listing PRs that appear relevant for the given issue.

## Footnotes and Future Work

For now, consider `vulnxscan` as a demonstration. Some improvement ideas are listed below:
 - Consider adding patch information to SBOM (e.g. via the [pedigree](https://cyclonedx.org/use-cases/#pedigree) attribute) to be able to auto-detect patched vulnerabilities also when the input is SBOM.
 - Vulnerability scanners lack support for parsing the patch data: even if `sbomnix` added the patch data to the output SBOM, we suspect not many vulnerability scanners would read the information. As an example, the following discussion touches this topic on DependencyTrack: https://github.com/DependencyTrack/dependency-track/issues/919.
 - Identifying packages is hard as pointed out in https://discourse.nixos.org/t/the-future-of-the-vulnerability-roundups/22424/5. As an example, CPEs are inaccurate which causes issues in matching vulnerabilities: https://github.com/DependencyTrack/dependency-track/discussions/2290.
 - Nix ecosystem is not supported in OSV: the way `osv.py` makes use of OSV data for Nix targets -- as explained in section [Nix and OSV vulnerability database](#nix-and-osv-vulnerability-database) -- makes the reported OSV vulnerabilities include false positives.

### Other Future Work
- [vulnxscan](../src//vulnxscan/vulnxscan_cli.py) could include more scanners in addition to [vulnix](https://github.com/nix-community/vulnix), [grype](https://github.com/anchore/grype), and [osv.py](../src/vulnxscan/osv.py). Suggestions for other open-source scanners, especially those that can digest CycloneDX or SPDX SBOMs are welcome. Consider e.g. [bombon](https://github.com/nikstur/bombon) and [cve-bin-tool](https://github.com/intel/cve-bin-tool). Adding cve-bin-tool to vulnxscan was [demonstrated](https://github.com/tiiuae/sbomnix/pull/75) earlier, but not merged due to reasons explained in the [PR](https://github.com/tiiuae/sbomnix/pull/75#issuecomment-1670958503).


================================================
FILE: flake.nix
================================================
# SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0
{
  description = "Flakes file for sbomnix";

  inputs = {
    nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
    flake-parts.url = "github:hercules-ci/flake-parts";
    flake-root.url = "github:srid/flake-root";
    # For preserving compatibility with non-Flake users
    flake-compat = {
      url = "github:nix-community/flake-compat";
      flake = false;
    };
    # pre-commit hooks
    git-hooks-nix = {
      url = "github:cachix/git-hooks.nix";
      inputs = {
        nixpkgs.follows = "nixpkgs";
        flake-compat.follows = "flake-compat";
      };
    };
  };

  outputs =
    inputs@{ flake-parts, ... }:
    flake-parts.lib.mkFlake
      {
        inherit inputs;
      }
      {
        systems = [
          "x86_64-linux"
          "aarch64-linux"
          "aarch64-darwin"
        ];

        imports = [
          ./nix
        ];
      };
}


================================================
FILE: nix/apps.nix
================================================
# SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0
{
  perSystem =
    { self', ... }:
    {
      apps =
        let
          inherit (self'.packages) sbomnix;
          mkApp = program: description: {
            type = "app";
            inherit program;
            meta = {
              inherit description;
            };
          };
        in
        {
          # nix run .#repology_cli
          repology_cli = mkApp "${sbomnix}/bin/repology_cli" "Query Repology using an SBOM as input";

          # nix run .#repology_cve
          repology_cve = mkApp "${sbomnix}/bin/repology_cve" "Find CVEs for packages known to Repology";

          # nix run .#nix_outdated
          nix_outdated = mkApp "${sbomnix}/bin/nix_outdated" "List outdated nix dependencies in priority order";

          # nix run .#nixgraph
          nixgraph = mkApp "${sbomnix}/bin/nixgraph" "Visualize nix package dependencies";

          # nix run .#nixmeta
          nixmeta = mkApp "${sbomnix}/bin/nixmeta" "Summarize nixpkgs meta-attributes";

          # nix run .#vulnxscan
          vulnxscan = mkApp "${sbomnix}/bin/vulnxscan" "Scan nix artifacts or SBOMs for vulnerabilities";

          # nix run .#provenance
          provenance = mkApp "${sbomnix}/bin/provenance" "Generate SLSA provenance for a nix target";
        };
    };
}


================================================
FILE: nix/default.nix
================================================
# SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0
{
  imports = [
    ./apps.nix
    ./formatter.nix
    ./packages.nix
    ./git-hooks.nix
  ];
}


================================================
FILE: nix/formatter.nix
================================================
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0
{ ... }:
{
  perSystem =
    { config, pkgs, ... }:
    {
      formatter =
        let
          inherit (config.pre-commit.settings) package configFile;
        in
        pkgs.writeShellScriptBin "pre-commit-run" ''
          exec ${pkgs.lib.getExe package} run --all-files --config ${configFile}
        '';
    };
}


================================================
FILE: nix/git-hooks.nix
================================================
# SPDX-FileCopyrightText: 2025-2026 TII (SSRC) and the Ghaf contributors
# SPDX-License-Identifier: Apache-2.0
{ inputs, ... }:
{
  imports = with inputs; [
    git-hooks-nix.flakeModule
  ];
  perSystem =
    { pkgs, ... }:
    let
      pyrightPythonEnv = pkgs.python3.withPackages (
        pp: with pp; [
          beautifulsoup4
          colorlog
          dfdiskcache
          filelock
          graphviz
          pp."license-expression"
          numpy
          packageurl-python
          packaging
          pandas
          pyrate-limiter
          reuse
          requests
          requests-cache
          requests-ratelimiter
          tabulate
        ]
      );
      pyrightWrapper = pkgs.writeShellScriptBin "pyright-sbomnix" ''
        exec ${pkgs.lib.getExe pkgs.pyright} --pythonpath ${pyrightPythonEnv}/bin/python "$@"
      '';
    in
    {
      pre-commit = {
        settings.hooks = {
          gitlint.enable = true;
          typos = {
            enable = true;
            excludes = [
              "^LICENSES/.*"
              "^tests/resources/.*"
            ];
          };
          end-of-file-fixer = {
            enable = true;
            excludes = [
              "^LICENSES/.*"
              "^tests/resources/.*"
            ];
          };
          trim-trailing-whitespace = {
            enable = true;
            excludes = [
              "^LICENSES/.*"
              "^tests/resources/.*"
            ];
          };
          actionlint.enable = true;
          deadnix.enable = true;
          nixfmt.enable = true;
          pyright = {
            enable = true;
            pass_filenames = false;
            settings.binPath = "${pyrightWrapper}/bin/pyright-sbomnix";
          };
          ruff.enable = true;
          ruff-format.enable = true;
          reuse.enable = true;
          shellcheck.enable = true;
          statix = {
            enable = true;
            args = [
              "fix"
            ];
          };
        };
      };
    };
}


================================================
FILE: nix/packages.nix
================================================
# SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0
{ self, ... }:
{
  perSystem =
    {
      pkgs,
      lib,
      config,
      self',
      ...
    }:
    let
      pp = pkgs.python3.pkgs;
      baseVersion = pkgs.lib.removeSuffix "\n" (builtins.readFile ../VERSION);
      # Append git state so local builds are distinguishable from release
      # artifacts. shortRev is set on a clean tree; dirtyShortRev (Nix >= 2.14)
      # is set when the working tree has uncommitted changes.
      gitSuffix =
        if self ? shortRev then
          "+g${self.shortRev}"
        else if self ? dirtyShortRev then
          "+g${self.dirtyShortRev}"
        else
          "";
      # Thin wrapper that calls a module entry point via the ambient python3.
      # PYTHONPATH (set in shellHook) resolves to the local src/, so edits are
      # picked up without reinstalling.
      mkDevEntry =
        name: module:
        pkgs.writeShellScriptBin name ''
          exec python3 -c "import sys; sys.argv[0]='${name}'; from ${module} import main; main()" "$@"
        '';
      prefix_path = with pkgs; [
        git
        graphviz
        grype
        nix
        nix-visualize
        vulnix
      ];
      check_inputs = with pp; [
        hypothesis
        jsonschema
        pytest
        pytest-cov
        pytest-xdist
      ];
      build_system = with pp; [ setuptools ];
      build_inputs = with pp; [
        beautifulsoup4
        colorlog
        dfdiskcache
        filelock
        graphviz
        pp."license-expression"
        numpy
        packageurl-python
        packaging
        pandas
        pyrate-limiter
        reuse
        requests
        requests-cache
        requests-ratelimiter
        tabulate
      ];
    in
    {
      packages = rec {
        default = sbomnix;
        sbomnix = pp.buildPythonPackage {
          pname = "sbomnix";
          version = "${baseVersion}${gitSuffix}";
          pyproject = true;
          src = lib.cleanSource ../.;
          postPatch = ''
            printf '%s' "${baseVersion}${gitSuffix}" > VERSION
          '';
          build-system = build_system;
          nativeCheckInputs = check_inputs;
          dependencies = build_inputs;
          pythonImportsCheck = [ "sbomnix" ];
          makeWrapperArgs = [
            "--prefix PATH : ${lib.makeBinPath prefix_path}"
          ];
        };
      };
      checks =
        # Force a build of all packages during a `nix flake check`.
        with lib; mapAttrs' (n: nameValuePair "package-${n}") self'.packages;
      devShells.default = pkgs.mkShell {
        name = "sbomnix-devshell";
        packages = [
          pkgs.pyright # for running pyright manually in devshell
          pkgs.ruff # for running ruff manually in devshell
        ]
        ++ check_inputs
        ++ build_system
        ++ build_inputs
        ++ [
          (mkDevEntry "sbomnix" "sbomnix.main")
          (mkDevEntry "nixgraph" "nixgraph.main")
          (mkDevEntry "nixmeta" "nixmeta.main")
          (mkDevEntry "nix_outdated" "nixupdate.nix_outdated")
          (mkDevEntry "vulnxscan" "vulnxscan.vulnxscan_cli")
          (mkDevEntry "repology_cli" "repology.repology_cli")
          (mkDevEntry "repology_cve" "repology.repology_cve")
          (mkDevEntry "provenance" "provenance.main")
        ];
        # Add the repo root to PYTHONPATH, so invoking entrypoints (and them being
        # able to find the python packages in the repo) becomes possible.
        # `pytest.ini` already sets this for invoking `pytest`
        # (cascading down to the processes it spawns), but this is for the developer
        # invoking entrypoints from inside the devshell.
        shellHook = ''
          ${config.pre-commit.installationScript}
          echo 1>&2 "Welcome to the development shell!"
          export PATH=${lib.makeBinPath prefix_path}:$PATH
          export PYTHONPATH="$PYTHONPATH:$(pwd)/src"
          # https://github.com/NixOS/nix/issues/1009:
          export TMPDIR="/tmp"
        '';
      };
    };
}


================================================
FILE: pyproject.toml
================================================
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
# SPDX-License-Identifier: Apache-2.0

[build-system]
requires = ["setuptools>=61"]
build-backend = "setuptools.build_meta"

[project]
name = "sbomnix"
dynamic = ["version"]
description = "Utility that generates SBOMs from nix packages"
readme = { file = "README.md", content-type = "text/markdown" }
requires-python = ">=3.10"
license = { text = "Apache-2.0" }
authors = [{ name = "TII", email = "henri.rosten@unikie.com" }]
classifiers = [
  "Development Status :: 3 - Alpha",
  "License :: OSI Approved :: Apache Software License",
  "Operating System :: POSIX :: Linux",
  "Programming Language :: Python :: 3 :: Only",
]
dependencies = [
  "beautifulsoup4",
  "colorlog",
  "df-diskcache",
  "filelock",
  "graphviz",
  "license-expression",
  "numpy",
  "packageurl-python",
  "packaging",
  "pandas",
  "reuse",
  "requests",
  "requests-cache",
  "requests-ratelimiter",
  "tabulate",
]

[project.urls]
Homepage = "https://github.com/tiiuae/sbomnix"

[project.scripts]
sbomnix = "sbomnix.main:main"
nixgraph = "nixgraph.main:main"
nixmeta = "nixmeta.main:main"
nix_outdated = "nixupdate.nix_outdated:main"
vulnxscan = "vulnxscan.vulnxscan_cli:main"
repology_cli = "repology.repology_cli:main"
repology_cve = "repology.repology_cve:main"
provenance = "provenance.main:main"

[tool.setuptools]
license-files = ["LICENSES/Apache-2.0.txt", "LICENSES/BSD-3-Clause.txt"]

[tool.setuptools.dynamic]
version = { file = ["VERSION"] }

[tool.setuptools.packages.find]
where = ["src"]

[tool.ruff]
line-length = 88
target-version = "py310"

[tool.ruff.lint]
preview = true
select = [
  "B",
  "E4",
  "E7",
  "E9",
  "F",
  "I",
  "PLE",
  "PLW",
  "PLR0911",
  "PLR0912",
  "PLR0913",
  "PLR0914",
  "PLR0915",
  "PLR0917",
  "RUF100",
]

[tool.ruff.lint.isort]
known-first-party = [
  "common",
  "nixgraph",
  "nixmeta",
  "nixupdate",
  "provenance",
  "repology",
  "sbomnix",
  "vulnxscan",
]


================================================
FILE: pyrightconfig.json
================================================
{
    "include": ["src"],
    "extraPaths": ["src"],
    "pythonVersion": "3.10",
    "typeCheckingMode": "standard",
    "reportMissingTypeStubs": false,
}


================================================
FILE: pytest.ini
================================================
# SPDX-FileCopyrightText: 2022-2023 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

[pytest]
pythonpath = .
addopts = --strict-markers
markers =
    integration: indicates a CLI or cross-module integration test.
    network: indicates a test that relies on external network access.
    slow: indicates a slow test.
    grype: indicates a test that invokes grype (triggers grype DB pre-warm).
    real_vulnix: opt-in tests that execute the real vulnix binary.


================================================
FILE: scripts/check-fast.sh
================================================
#!/usr/bin/env bash

# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

set -euo pipefail

nix fmt
nix --extra-experimental-features 'flakes nix-command' flake check --no-build
nix develop --command ./scripts/run-pytest-lane.sh fast


================================================
FILE: scripts/check-full.sh
================================================
#!/usr/bin/env bash

# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

set -euo pipefail

nix --extra-experimental-features 'flakes nix-command' flake check
nix develop --command ./scripts/run-pytest-lane.sh full


================================================
FILE: scripts/release-asset.sh
================================================
#!/usr/bin/env bash

# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

set -euo pipefail

mkdir -p build/

release_target=".#sbomnix"

nix run .#sbomnix -- "$release_target" \
  --cdx=./build/sbom.runtime.cdx.json \
  --spdx=./build/sbom.runtime.spdx.json \
  --csv=./build/sbom.runtime.csv

nix run .#sbomnix -- --buildtime "$release_target" \
  --cdx=./build/sbom.buildtime.cdx.json \
  --spdx=./build/sbom.buildtime.spdx.json \
  --csv=./build/sbom.buildtime.csv

echo
echo "Built release asset:"
ls -la build


================================================
FILE: scripts/run-pytest-lane.sh
================================================
#!/usr/bin/env bash

# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

set -euo pipefail

usage() {
  echo "usage: $0 {fast|full}" >&2
  exit 2
}

lane="${1:-}"
marker_expr=""
coverage=false
pytest_args=(
  -n auto
  -x
)

case "$lane" in
  fast)
    marker_expr="not slow and not network"
    pytest_args+=(-v --durations=10)
    ;;
  full)
    coverage=true
    pytest_args+=(-v --durations=20)
    ;;
  *)
    usage
    ;;
esac

if $coverage; then
  pytest_args+=(
    --cov=src
    --cov-report=term-missing
    --cov-report=xml
  )
fi

if [ -n "$marker_expr" ]; then
  pytest_args+=(-m "$marker_expr")
fi

pytest "${pytest_args[@]}" tests/


================================================
FILE: shell.nix
================================================
# SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII)
# SPDX-FileCopyrightText: 2020-2023 Eelco Dolstra and the flake-compat contributors
#
# SPDX-License-Identifier: MIT
# This file originates from:
# https://github.com/nix-community/flake-compat
# This file provides backward compatibility to nix < 2.4 clients
{
  system ? builtins.currentSystem,
}:
let
  lock = builtins.fromJSON (builtins.readFile ./flake.lock);

  inherit (lock.nodes.flake-compat.locked)
    owner
    repo
    rev
    narHash
    ;

  flake-compat = fetchTarball {
    url = "https://github.com/${owner}/${repo}/archive/${rev}.tar.gz";
    sha256 = narHash;
  };

  flake = import flake-compat {
    inherit system;
    src = ./.;
  };
in
flake.shellNix


================================================
FILE: src/common/__init__.py
================================================
# SPDX-FileCopyrightText: 2022 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0


================================================
FILE: src/common/cli_args.py
================================================
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Common argparse helper functions."""

import argparse
import sys
from weakref import WeakSet

from common.pkgmeta import get_py_pkg_version

_VERBOSE_COUNT_DEST = "_verbose_count"
_VERBOSE_WRAPPED_PARSERS = WeakSet()


class _VerboseCountAction(argparse.Action):
    """Count repeated short verbose flags without using parser defaults."""

    def __init__(self, option_strings, dest, nargs=0, **kwargs):
        if nargs != 0:
            raise ValueError("nargs must be 0")
        super().__init__(option_strings, dest, nargs=0, **kwargs)

    def __call__(self, _parser, namespace, _values, _option_string=None):
        count = getattr(namespace, _VERBOSE_COUNT_DEST, 0) + 1
        setattr(namespace, _VERBOSE_COUNT_DEST, count)
        setattr(namespace, self.dest, count)


def check_positive(val):
    """Raise ArgumentTypeError if val is not a positive integer."""
    intval = int(val)
    if intval <= 0:
        raise argparse.ArgumentTypeError(f"{val} is not a positive integer")
    return intval


def _is_integer(value):
    """Return True if value can be parsed as an integer."""
    try:
        int(value)
    except ValueError:
        return False
    return True


def _normalize_verbose_args(args):
    """Normalize compact short verbose values before argparse sees positionals."""
    normalized = []
    args = list(sys.argv[1:] if args is None else args)
    idx = 0
    while idx < len(args):
        arg = args[idx]
        if arg == "-v" and idx + 1 < len(args) and _is_integer(args[idx + 1]):
            normalized.append(f"--verbose={args[idx + 1]}")
            idx += 2
            continue
        if arg.startswith("-v") and arg != "-v":
            value = arg[2:]
            if value.startswith("="):
                value = value[1:]
            if value and _is_integer(value):
                normalized.append(f"--verbose={value}")
                idx += 1
                continue
        normalized.append(arg)
        idx += 1
    return normalized


def _finalize_verbose_namespace(namespace):
    """Remove internal argparse bookkeeping from the parsed namespace."""
    if hasattr(namespace, _VERBOSE_COUNT_DEST):
        delattr(namespace, _VERBOSE_COUNT_DEST)
    return namespace


def _wrap_verbose_parser(parser):
    """Teach parse_known_args to normalize compact short verbose values."""
    if parser in _VERBOSE_WRAPPED_PARSERS:
        return

    parse_known_args = parser.parse_known_args

    def parse_known_args_with_verbose(args=None, namespace=None):
        namespace, extras = parse_known_args(
            _normalize_verbose_args(args),
            namespace,
        )
        return _finalize_verbose_namespace(namespace), extras

    parser.parse_known_args = parse_known_args_with_verbose
    _VERBOSE_WRAPPED_PARSERS.add(parser)


def add_verbose_argument(parser, default=0, max_level=3, root_parser=None):
    """Add a standard verbose flag to an argparse parser."""
    _wrap_verbose_parser(root_parser or parser)
    parser.set_defaults(verbose=default, **{_VERBOSE_COUNT_DEST: 0})
    levels = ["0=INFO", "1=VERBOSE", "2=DEBUG", "3=SPAM"]
    level_help = ", ".join(levels[: max_level + 1])
    short_help = (
        f"Increase verbosity; repeat as -vv for DEBUG (default: --verbose={default})"
    )
    long_help = (
        f"Set verbosity level explicitly ({level_help}) (default: --verbose={default})"
    )
    parser.add_argument(
        "-v",
        action=_VerboseCountAction,
        dest="verbose",
        help=short_help,
    )
    parser.add_argument(
        "--verbose",
        type=int,
        dest="verbose",
        metavar="N",
        help=long_help,
    )


def add_version_argument(parser, package="sbomnix"):
    """Add a standard version flag to an argparse parser."""
    parser.add_argument(
        "--version", action="version", version=get_py_pkg_version(package)
    )


================================================
FILE: src/common/columns.py
================================================
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Shared DataFrame column names used across package boundaries."""

COMMENT = "comment"
COUNT = "count"
CPE = "cpe"
CLASSIFY = "classify"
DEPENDENCY_UID = "dependency_uid"
LEVEL = "level"
MODIFIED = "modified"
NAME = "name"
NEWEST_UPSTREAM_RELEASE = "newest_upstream_release"
NIXPKGS_PR = "nixpkgs_pr"
OUTPUTS = "outputs"
PACKAGE = "package"
PACKAGE_REPOLOGY = "package_repology"
PATCHED = "patched"
PNAME = "pname"
POTENTIALLY_VULNERABLE = "potentially_vulnerable"
RAW_NAME = "raw_name"
REPO = "repo"
REPO_VERSION_CLASSIFY = "repo_version_classify"
SBOM_VERSION_CLASSIFY = "sbom_version_classify"
SCANNER = "scanner"
SEVERITY = "severity"
SIMILARITY = "similarity"
SORTCOL = "sortcol"
SRC_PATH = "src_path"
STATUS = "status"
STORE_PATH = "store_path"
SUM = "sum"
TARGET_PATH = "target_path"
URL = "url"
VERSION = "version"
VERSION_CMP = "version_cmp"
VERSION_LOCAL = "version_local"
VERSION_NIXPKGS = "version_nixpkgs"
VERSION_REPOLOGY = "version_repology"
VERSION_SBOM = "version_sbom"
VERSION_UPSTREAM = "version_upstream"
VULN_ID = "vuln_id"
WHITELIST = "whitelist"
WHITELIST_COMMENT = "whitelist_comment"


================================================
FILE: src/common/df.py
================================================
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Shared dataframe helpers."""

import csv
import logging
import urllib.error
from typing import Literal, cast, overload

import pandas as pd
from tabulate import tabulate

from common.errors import CsvLoadError
from common.log import LOG


def df_to_csv_file(df, name, loglevel=logging.INFO):
    """Write dataframe to csv file."""
    df.to_csv(
        path_or_buf=name, quoting=csv.QUOTE_ALL, sep=",", index=False, encoding="utf-8"
    )
    LOG.log(loglevel, "Wrote: %s", name)


@overload
def df_from_csv_file(name, exit_on_error: Literal[True] = True) -> pd.DataFrame: ...


@overload
def df_from_csv_file(
    name,
    exit_on_error: Literal[False],
) -> pd.DataFrame | None: ...


def df_from_csv_file(name, exit_on_error=True):
    """Read csv file into dataframe."""
    LOG.debug("Reading: %s", name)
    try:
        df = pd.read_csv(name, keep_default_na=False, dtype=str)
        df.reset_index(drop=True, inplace=True)
        return df
    except (
        pd.errors.EmptyDataError,
        pd.errors.ParserError,
        urllib.error.HTTPError,
        urllib.error.URLError,
    ) as error:
        if exit_on_error:
            raise CsvLoadError(name, error) from error
        LOG.debug("Error reading csv file '%s':\n%s", name, error)
        return None


def df_regex_filter(df: pd.DataFrame, column: str, regex: str) -> pd.DataFrame:
    """Return rows where column `column` values match the given regex."""
    LOG.debug("column:'%s', regex:'%s'", column, regex)
    return cast(pd.DataFrame, df[df[column].str.contains(regex, regex=True, na=False)])


def df_log(df, loglevel, tablefmt="presto"):
    """Log dataframe with given loglevel and tablefmt."""
    if LOG.isEnabledFor(loglevel):
        if df is None or df.empty:
            return
        df = df.fillna("")
        table = tabulate(
            df, headers="keys", tablefmt=tablefmt, stralign="left", showindex=False
        )
        LOG.log(loglevel, "\n%s\n", table)


================================================
FILE: src/common/errors.py
================================================
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Shared exception types for expected user-facing failures."""

import os
import shlex


class SbomnixError(RuntimeError):
    """Base class for expected user-facing errors."""


class FlakeRefResolutionError(SbomnixError):
    """Raised when an input looks like a flakeref but cannot be resolved."""

    def __init__(self, flakeref, stderr="", action="evaluating"):
        self.flakeref = flakeref
        self.stderr = "" if stderr is None else str(stderr)
        message = f"Failed {action} flakeref '{flakeref}'"
        stderr_summary = self.stderr.strip()
        if stderr_summary:
            message += f": {stderr_summary}"
        super().__init__(message)


class FlakeRefRealisationError(FlakeRefResolutionError):
    """Raised when a flakeref resolves but cannot be force-realised."""

    def __init__(self, flakeref, stderr=""):
        super().__init__(flakeref, stderr=stderr, action="force-realising")


class CsvLoadError(SbomnixError):
    """Raised when a CSV input cannot be read."""

    def __init__(self, name, error):
        self.name = name
        self.error = error
        super().__init__(f"Error reading csv file '{name}':\n{error}")


class CommandNotFoundError(SbomnixError):
    """Raised when a required executable is not available in PATH."""

    def __init__(self, name):
        self.name = name
        super().__init__(f"command '{name}' is not in PATH")


class NixCommandError(SbomnixError):
    """Raised when a required Nix command fails."""

    def __init__(self, command, stderr="", stdout=""):
        self.command = _format_command(command)
        self.stderr = "" if stderr is None else str(stderr)
        self.stdout = "" if stdout is None else str(stdout)
        message = f"Failed running Nix command `{self.command}`"
        detail = self.stderr.strip() or self.stdout.strip()
        if detail:
            message += f": {detail}"
        super().__init__(message)


class InvalidNixArtifactError(SbomnixError):
    """Raised when a CLI target is not a valid nix artifact."""

    def __init__(self, path):
        self.path = path
        super().__init__(f"Specified target is not a nix artifact: '{path}'")


class InvalidNixJsonError(SbomnixError):
    """Raised when a Nix JSON interface returns an unsupported shape."""

    def __init__(self, command, detail):
        self.command = command
        self.detail = detail
        super().__init__(
            f"Unexpected JSON from `{command}`: {detail}. "
            "The pinned Nix output schema may have changed; refusing to continue."
        )


class MissingNixDeriverError(SbomnixError):
    """Raised when a nix artifact cannot be mapped back to a derivation."""

    def __init__(self, path):
        self.path = path
        super().__init__(f"No deriver found for: '{path}'")


class MissingNixDerivationMetadataError(SbomnixError):
    """Raised when an artifact has no derivation metadata to model as a package."""

    def __init__(self, path):
        self.path = path
        super().__init__(f"No derivation metadata found for: '{path}'")


class MissingNixOutPathError(SbomnixError):
    """Raised when a derivation does not expose an out path."""

    def __init__(self, path):
        self.path = path
        super().__init__(f"No outpath found for: '{path}'")


class InvalidCpeDictionaryError(SbomnixError):
    """Raised when the downloaded CPE dictionary has invalid columns."""

    def __init__(self, required_cols):
        self.required_cols = tuple(sorted(required_cols))
        super().__init__(
            f"Missing required columns {list(self.required_cols)} from cpedict"
        )


class WhitelistApplicationError(SbomnixError):
    """Raised when vulnerability whitelist application cannot proceed."""

    def __init__(self, message):
        super().__init__(message)


class InvalidSbomError(SbomnixError):
    """Raised when a supplied SBOM path is invalid."""

    def __init__(self, path):
        self.path = path
        super().__init__(f"Specified sbom target is not a json file: '{path}'")


def _format_command(command):
    if isinstance(command, bytes):
        return command.decode(errors="replace")
    if isinstance(command, str):
        return command
    return shlex.join(os.fspath(part) for part in command)


================================================
FILE: src/common/flakeref.py
================================================
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Flakeref resolution helpers."""

import logging
import pathlib
import re

from common.errors import FlakeRefRealisationError, FlakeRefResolutionError
from common.log import LOG, LOG_VERBOSE
from common.nix_utils import parse_nix_derivation_show
from common.proc import ExecCmdFn, exec_cmd, nix_cmd

NIXOS_CONFIGURATION_TOPLEVEL_SUFFIX = ".config.system.build.toplevel"
_NIXOS_CONFIGURATION_PREFIX_RE = re.compile(
    r"^(?P<flake>.+)#nixosConfigurations\.(?P<rest>.+)$"
)
_UNQUOTED_ATTR_SEGMENT_RE = re.compile(r"^[A-Za-z0-9_'-]+$")
_NIX_STRING_ESCAPES = {
    '"': '"',
    "\\": "\\",
    "n": "\n",
    "r": "\r",
    "t": "\t",
}


def try_resolve_flakeref(  # noqa: PLR0913
    flakeref: str,
    force_realise: bool = False,
    impure: bool = False,
    derivation: bool = False,
    *,
    exec_cmd_fn: ExecCmdFn | None = None,
    log: logging.Logger | None = None,
) -> str | None:
    """
    Resolve flakeref to out-path, force-realising the output if
    ``force_realise`` is True.
    """
    exec_cmd_fn = exec_cmd if exec_cmd_fn is None else exec_cmd_fn
    log = LOG if log is None else log

    looks_like_flakeref = _looks_like_flakeref(flakeref)
    if derivation and not force_realise and looks_like_flakeref:
        log.info("Evaluating flakeref '%s'", flakeref)
        cmd = nix_cmd("derivation", "show", flakeref, impure=impure)
        ret = exec_cmd_fn(cmd, raise_on_error=False, return_error=True, log_error=False)
        if ret is None or ret.returncode != 0:
            raise FlakeRefResolutionError(flakeref, ret.stderr if ret else "")
        drv_paths = parse_nix_derivation_show(ret.stdout)
        drv_path = next(iter(drv_paths), "")
        if not drv_path:
            raise FlakeRefResolutionError(
                flakeref,
                "nix derivation show returned no derivation path",
            )
        log.debug("flakeref='%s' maps to derivation='%s'", flakeref, drv_path)
        return drv_path

    if force_realise and looks_like_flakeref:
        log.info("Realising flakeref '%s'", flakeref)
        cmd = nix_cmd(
            "build",
            "--no-link",
            "--print-out-paths",
            flakeref,
            impure=impure,
        )
        ret = exec_cmd_fn(cmd, raise_on_error=False, return_error=True, log_error=False)
        if ret is None or ret.returncode != 0:
            raise FlakeRefRealisationError(flakeref, ret.stderr if ret else "")
        nixpath = _first_output_path(ret.stdout)
        if not nixpath:
            raise FlakeRefRealisationError(
                flakeref,
                "nix build returned no output path",
            )
        log.debug("flakeref='%s' maps to path='%s'", flakeref, nixpath)
        return nixpath

    if looks_like_flakeref:
        log.info("Evaluating flakeref '%s'", flakeref)
    else:
        log.log(LOG_VERBOSE, "Evaluating '%s'", flakeref)
    cmd = nix_cmd("eval", "--raw", flakeref, impure=impure)
    ret = exec_cmd_fn(cmd, raise_on_error=False, return_error=True, log_error=False)
    if ret is None or ret.returncode != 0:
        if looks_like_flakeref:
            raise FlakeRefResolutionError(flakeref, ret.stderr if ret else "")
        log.debug("not a flakeref: '%s'", flakeref)
        return None
    nixpath = ret.stdout.strip()
    log.debug("flakeref='%s' maps to path='%s'", flakeref, nixpath)
    if not force_realise:
        return nixpath
    log.info("Realising flakeref '%s'", flakeref)
    cmd = nix_cmd("build", "--no-link", flakeref, impure=impure)
    ret = exec_cmd_fn(cmd, raise_on_error=False, return_error=True, log_error=False)
    if ret is None or ret.returncode != 0:
        raise FlakeRefRealisationError(flakeref, ret.stderr if ret else "")
    return nixpath


def _first_output_path(stdout: str) -> str:
    """Return the first output path printed by ``nix build --print-out-paths``."""
    return next((line.strip() for line in stdout.splitlines() if line.strip()), "")


def parse_nixos_configuration_ref(
    flakeref: str,
    *,
    suffix: str = "",
) -> tuple[str, str] | None:
    """
    Parse ``<flake>#nixosConfigurations.<name><suffix>``.

    ``name`` may be either an unquoted attr segment or a quoted segment such as
    ``"host.example.com"``. The returned name is decoded and safe to re-quote.
    """
    match = _NIXOS_CONFIGURATION_PREFIX_RE.match(flakeref or "")
    if not match:
        return None
    parsed = _consume_nix_attr_segment(match.group("rest"))
    if not parsed:
        return None
    name, tail = parsed
    if tail != suffix:
        return None
    return match.group("flake"), name


def quote_nix_attr_segment(name: str) -> str:
    """Return a safely quoted Nix attr path segment."""
    escaped = []
    idx = 0
    while idx < len(name):
        if name.startswith("${", idx):
            escaped.append(r"\${")
            idx += 2
            continue
        char = name[idx]
        if char == '"':
            escaped.append('\\"')
        elif char == "\\":
            escaped.append("\\\\")
        elif char == "\n":
            escaped.append("\\n")
        elif char == "\r":
            escaped.append("\\r")
        elif char == "\t":
            escaped.append("\\t")
        else:
            escaped.append(char)
        idx += 1
    return '"' + "".join(escaped) + '"'


def _consume_nix_attr_segment(value: str) -> tuple[str, str] | None:
    if not value:
        return None
    if value.startswith('"'):
        end = _find_quoted_attr_end(value)
        if end is None:
            return None
        raw_segment = value[: end + 1]
        segment = _decode_nix_quoted_attr_segment(raw_segment)
        if segment is None:
            return None
        return segment, value[end + 1 :]

    segment, separator, tail = value.partition(".")
    if not segment or not _UNQUOTED_ATTR_SEGMENT_RE.match(segment):
        return None
    return segment, f"{separator}{tail}" if separator else ""


def _decode_nix_quoted_attr_segment(value: str) -> str | None:
    end = len(value) - 1
    if len(value) < 2 or value[0] != '"' or value[end] != '"':
        return None

    decoded = []
    idx = 1
    while idx < end:
        char = value[idx]
        if char == "$" and idx + 1 < end and value[idx + 1] == "{":
            return None
        if char != "\\":
            decoded.append(char)
            idx += 1
            continue

        idx += 1
        if idx >= end:
            return None
        escaped = value[idx]
        if escaped == "$" and idx + 1 < end and value[idx + 1] == "{":
            decoded.append("${")
            idx += 2
            continue
        decoded.append(_NIX_STRING_ESCAPES.get(escaped, f"\\{escaped}"))
        idx += 1
    return "".join(decoded)


def _find_quoted_attr_end(value: str) -> int | None:
    escaped = False
    for idx, char in enumerate(value[1:], start=1):
        if escaped:
            escaped = False
            continue
        if char == "\\":
            escaped = True
            continue
        if char == '"':
            return idx
    return None


def _looks_like_flakeref(flakeref: str) -> bool:
    """Return true if the input is likely intended as a flake reference."""
    looks_like = False
    if flakeref:
        path = pathlib.Path(flakeref)
        if path.exists():
            looks_like = path.is_dir() and (path / "flake.nix").exists()
        else:
            looks_like = (
                flakeref.startswith("nixpkgs=")
                or "#" in flakeref
                or "?" in flakeref
                or re.match(r"^[A-Za-z][A-Za-z0-9+.-]*:", flakeref) is not None
            )
    return looks_like


================================================
FILE: src/common/http.py
================================================
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Shared HTTP session primitives."""

from collections.abc import Collection
from typing import Any

from requests import Session
from requests.adapters import HTTPAdapter
from requests_cache import CacheMixin
from requests_ratelimiter import LimiterMixin
from urllib3.util.retry import Retry

DEFAULT_RETRY_STATUS_CODES = (429, 500, 502, 503, 504)


class CachedLimiterSession(CacheMixin, LimiterMixin, Session):  # pyright: ignore[reportIncompatibleMethodOverride]
    """
    Session class with caching and rate-limiting.
    https://requests-cache.readthedocs.io/en/stable/user_guide/compatibility.html
    """


def mount_retries(
    session: Session,
    *,
    allowed_methods: Collection[str] = frozenset(("GET", "HEAD")),
) -> Session:
    """Attach a retrying adapter to a requests session."""
    retry = Retry(
        total=3,
        connect=3,
        read=3,
        status=3,
        backoff_factor=1,
        status_forcelist=DEFAULT_RETRY_STATUS_CODES,
        allowed_methods=allowed_methods,
        raise_on_status=False,
        respect_retry_after_header=True,
    )
    adapter = HTTPAdapter(max_retries=retry)
    session.mount("http://", adapter)
    session.mount("https://", adapter)
    return session


def create_cached_limited_session(
    *,
    per_second: int | None = None,
    per_minute: int | None = None,
    expire_after: int | None = None,
    user_agent: str | None = None,
    allowed_methods: Collection[str] = frozenset(("GET", "HEAD")),
) -> Session:
    """Create a cached, rate-limited session with retry policy attached."""
    kwargs: dict[str, Any] = {}
    if per_second is not None:
        kwargs["per_second"] = per_second
    if per_minute is not None:
        kwargs["per_minute"] = per_minute
    if expire_after is not None:
        kwargs["expire_after"] = expire_after
    session = CachedLimiterSession(**kwargs)
    mount_retries(session, allowed_methods=allowed_methods)
    if user_agent:
        session.headers.update({"User-Agent": user_agent})
    return session


================================================
FILE: src/common/log.py
================================================
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Shared logging configuration and logger access."""

import logging
import os
from typing import Any, cast

from colorlog import ColoredFormatter, default_log_colors

LOG_VERBOSE = 15
LOG_SPAM = logging.DEBUG - 1
LOG_TRACE = LOG_SPAM
LOG_LEVELS = [logging.INFO, LOG_VERBOSE, logging.DEBUG, LOG_SPAM]


class SbomnixLogger(logging.Logger):
    """Project logger with sbomnix-specific verbose levels."""

    def verbose(self, msg: object, *args: object, **kwargs: Any) -> None:
        """Log at the project VERBOSE level."""
        if self.isEnabledFor(LOG_VERBOSE):
            kwargs.setdefault("stacklevel", 2)
            self._log(LOG_VERBOSE, msg, args, **kwargs)

    def spam(self, msg: object, *args: object, **kwargs: Any) -> None:
        """Log at the project SPAM level."""
        if self.isEnabledFor(LOG_SPAM):
            kwargs.setdefault("stacklevel", 2)
            self._log(LOG_SPAM, msg, args, **kwargs)

    def trace(self, msg: object, *args: object, **kwargs: Any) -> None:
        """Log at the project TRACE level alias."""
        if self.isEnabledFor(LOG_TRACE):
            kwargs.setdefault("stacklevel", 2)
            self._log(LOG_TRACE, msg, args, **kwargs)


__all__ = [
    "LOG",
    "LOG_SPAM",
    "LOG_TRACE",
    "LOG_VERBOSE",
    "is_debug_enabled",
    "set_log_verbosity",
]


logging.addLevelName(LOG_VERBOSE, "VERBOSE")
logging.addLevelName(LOG_SPAM, "SPAM")
logging.setLoggerClass(SbomnixLogger)

LOG = cast(SbomnixLogger, logging.getLogger(os.path.abspath(__file__)))


def set_log_verbosity(verbosity=0):
    """Set logging verbosity."""
    verbosity = min(len(LOG_LEVELS) - 1, max(verbosity, 0))
    _init_logging(verbosity)


def _init_logging(verbosity=0):
    """Initialize logging."""
    level = LOG_LEVELS[verbosity]
    if level <= logging.DEBUG:
        logformat = (
            "%(log_color)s%(levelname)-8s%(reset)s "
            "%(filename)s:%(funcName)s():%(lineno)d "
            "%(message)s"
        )
    else:
        logformat = "%(log_color)s%(levelname)-8s%(reset)s %(message)s"
    log_colors = {
        **default_log_colors,
        "INFO": "fg_bold_white",
        "VERBOSE": "fg_bold_cyan",
        "DEBUG": "fg_bold_white",
        "SPAM": "fg_bold_white",
    }
    if LOG.handlers:
        stream = LOG.handlers[0]
    else:
        stream = logging.StreamHandler()
    formatter = ColoredFormatter(
        logformat,
        log_colors=log_colors,
        stream=getattr(stream, "stream", None),
    )
    stream.setFormatter(formatter)
    if not LOG.handlers:
        LOG.addHandler(stream)
    LOG.setLevel(level)


def is_debug_enabled():
    """Return True when project logging is enabled for DEBUG details."""
    return LOG.isEnabledFor(logging.DEBUG)


set_log_verbosity(0)


================================================
FILE: src/common/nix_utils.py
================================================
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Helpers for normalizing nix store paths and derivation JSON."""

import json
import os
import re

from common.errors import InvalidNixJsonError

RE_NIX_STORE_PATH_BASENAME = re.compile(r"^[0-9a-z]{32}-.+")
RE_NIX_STORE_PATH = re.compile(r"(?P<store_path>/(?:[^/\s:]+/)+[0-9a-z]{32}-[^/\s:]+)")
NIX_DERIVATION_SHOW_JSON = "nix derivation show"
NIX_PATH_INFO_JSON = "nix path-info --json --json-format 1"


def get_nix_store_dir(path=None, default: str | None = "/nix/store") -> str | None:
    """Infer the nix store directory from an absolute store path-like string."""
    if path:
        match = RE_NIX_STORE_PATH.search(str(path))
        if match:
            return os.path.dirname(match.group("store_path"))
    return default


def normalize_nix_store_path(path, store_dir="/nix/store"):
    """Return an absolute store path for basename-only store path strings."""
    if not isinstance(path, str) or not path:
        return path
    if os.path.isabs(path) or not RE_NIX_STORE_PATH_BASENAME.match(path):
        return path
    return os.path.join(store_dir, path)


def _iter_nix_store_dir_candidates(value):
    """Yield strings that may reveal the nix store directory."""
    if isinstance(value, str):
        yield value
    elif isinstance(value, dict):
        for item in value.values():
            yield from _iter_nix_store_dir_candidates(item)
    elif isinstance(value, (list, tuple)):
        for item in value:
            yield from _iter_nix_store_dir_candidates(item)


def _infer_nix_store_dir(drv_info, default="/nix/store"):
    """Infer the nix store directory from derivation fields when keys are relative."""
    if not isinstance(drv_info, dict):
        return default
    for candidate in _iter_nix_store_dir_candidates(
        {
            "builder": drv_info.get("builder"),
            "outputs": drv_info.get("outputs"),
            "env": drv_info.get("env"),
        }
    ):
        store_dir = get_nix_store_dir(candidate, default=None)
        if store_dir:
            return store_dir
    return default


def _normalize_nix_derivation_info(drv_info, store_dir):
    """Normalize basename-only store paths within derivation info."""
    if not isinstance(drv_info, dict):
        return drv_info

    normalized = dict(drv_info)

    outputs = normalized.get("outputs")
    if isinstance(outputs, dict):
        normalized["outputs"] = {}
        for name, output in outputs.items():
            normalized_output = output
            if isinstance(output, dict):
                normalized_output = dict(output)
                if normalized_output.get("path"):
                    normalized_output["path"] = normalize_nix_store_path(
                        normalized_output["path"], store_dir
                    )
            normalized["outputs"][name] = normalized_output

    env = normalized.get("env")
    if isinstance(env, dict):
        normalized["env"] = {
            key: normalize_nix_store_path(value, store_dir)
            for key, value in env.items()
        }

    inputs = normalized.get("inputs")
    if isinstance(inputs, dict):
        normalized_inputs = dict(inputs)
        srcs = normalized_inputs.get("srcs")
        if isinstance(srcs, list):
            normalized_inputs["srcs"] = [
                normalize_nix_store_path(src, store_dir) for src in srcs
            ]
        drvs = normalized_inputs.get("drvs")
        if isinstance(drvs, dict):
            normalized_inputs["drvs"] = {
                normalize_nix_store_path(path, store_dir): outputs
                for path, outputs in drvs.items()
            }
        normalized["inputs"] = normalized_inputs

    return normalized


def load_nix_json(stdout, command):
    """Load JSON produced by a Nix command and raise a user-facing error on drift."""
    try:
        return json.loads(stdout)
    except json.JSONDecodeError as error:
        raise InvalidNixJsonError(command, f"invalid JSON: {error.msg}") from error


def parse_nix_derivation_show(stdout, store_path_hint=None):
    """Normalize `nix derivation show` JSON across direct and wrapped formats."""
    payload = load_nix_json(stdout, NIX_DERIVATION_SHOW_JSON)
    if not isinstance(payload, dict):
        raise InvalidNixJsonError(
            NIX_DERIVATION_SHOW_JSON,
            f"expected top-level object, got {type(payload).__name__}",
        )
    derivations = payload.get("derivations", payload)
    if not isinstance(derivations, dict):
        raise InvalidNixJsonError(
            NIX_DERIVATION_SHOW_JSON,
            f"expected `derivations` object, got {type(derivations).__name__}",
        )

    normalized = {}
    default_store_dir = get_nix_store_dir(store_path_hint) or "/nix/store"
    for drv_path, drv_info in derivations.items():
        _validate_derivation_entry(drv_path, drv_info)
        store_dir = get_nix_store_dir(drv_path, default=None)
        if not store_dir:
            store_dir = _infer_nix_store_dir(drv_info, default=default_store_dir)
        normalized_drv_path = normalize_nix_store_path(drv_path, store_dir)
        normalized[normalized_drv_path] = _normalize_nix_derivation_info(
            drv_info, store_dir
        )
    return normalized


def _validate_derivation_entry(drv_path, drv_info):
    """Validate the `nix derivation show` fields consumed by this project."""
    if not isinstance(drv_path, str) or not drv_path:
        raise InvalidNixJsonError(
            NIX_DERIVATION_SHOW_JSON,
            "expected derivation keys to be non-empty strings",
        )
    if not isinstance(drv_info, dict):
        raise InvalidNixJsonError(
            NIX_DERIVATION_SHOW_JSON,
            f"expected derivation `{drv_path}` to be an object",
        )
    _validate_optional_mapping(drv_info, "env", f"derivation `{drv_path}`")
    _validate_derivation_outputs(drv_path, drv_info)
    _check_optional_derivation_inputs(drv_path, drv_info)


def _validate_optional_mapping(record, field, owner):
    value = record.get(field)
    if value is not None and not isinstance(value, dict):
        raise InvalidNixJsonError(
            NIX_DERIVATION_SHOW_JSON,
            f"expected `{field}` in {owner} to be an object",
        )


def _validate_derivation_outputs(drv_path, drv_info):
    outputs = drv_info.get("outputs")
    if outputs is None:
        return
    if not isinstance(outputs, dict):
        raise InvalidNixJsonError(
            NIX_DERIVATION_SHOW_JSON,
            f"expected `outputs` in derivation `{drv_path}` to be an object",
        )
    for output_name, output in outputs.items():
        if not isinstance(output_name, str) or not output_name:
            raise InvalidNixJsonError(
                NIX_DERIVATION_SHOW_JSON,
                f"expected output names in derivation `{drv_path}` to be strings",
            )
        if not isinstance(output, dict):
            raise InvalidNixJsonError(
                NIX_DERIVATION_SHOW_JSON,
                f"expected output `{output_name}` in derivation `{drv_path}` "
                "to be an object",
            )
        output_path = output.get("path")
        if output_path is not None and not isinstance(output_path, str):
            raise InvalidNixJsonError(
                NIX_DERIVATION_SHOW_JSON,
                f"expected output `{output_name}` path in derivation `{drv_path}` "
                "to be a string",
            )


def _check_optional_derivation_inputs(drv_path, drv_info):
    """Validate input shape without requiring callers to consume dependencies."""
    inputs = drv_info.get("inputs")
    if inputs is not None:
        if not isinstance(inputs, dict):
            raise InvalidNixJsonError(
                NIX_DERIVATION_SHOW_JSON,
                f"expected `inputs` in derivation `{drv_path}` to be an object",
            )
        _validate_optional_mapping(inputs, "drvs", f"`inputs` for `{drv_path}`")
        srcs = inputs.get("srcs")
        if srcs is not None:
            if not isinstance(srcs, list):
                raise InvalidNixJsonError(
                    NIX_DERIVATION_SHOW_JSON,
                    f"expected `inputs.srcs` in derivation `{drv_path}` to be a list",
                )
            _validated_path_values(
                srcs,
                f"`inputs.srcs` in derivation `{drv_path}`",
                NIX_DERIVATION_SHOW_JSON,
            )
    _reject_legacy_derivation_inputs(drv_path, drv_info)


def _reject_legacy_derivation_inputs(drv_path, drv_info):
    for field in ("inputDrvs", "inputSrcs"):
        if field in drv_info:
            raise InvalidNixJsonError(
                NIX_DERIVATION_SHOW_JSON,
                f"unsupported legacy `{field}` in derivation `{drv_path}`",
            )


def nix_derivation_input_drv_paths(drv_path, drv_info):
    """Return validated input derivation paths from normalized derivation JSON."""
    inputs = _require_derivation_inputs(drv_path, drv_info)
    if "drvs" not in inputs:
        raise InvalidNixJsonError(
            NIX_DERIVATION_SHOW_JSON,
            f"missing `inputs.drvs` in derivation `{drv_path}`",
        )
    drvs = inputs["drvs"]
    if not isinstance(drvs, dict):
        raise InvalidNixJsonError(
            NIX_DERIVATION_SHOW_JSON,
            f"expected `inputs.drvs` in derivation `{drv_path}` to be an object",
        )
    return _validated_path_keys(
        drvs,
        f"`inputs.drvs` in derivation `{drv_path}`",
        NIX_DERIVATION_SHOW_JSON,
    )


def nix_derivation_input_src_paths(drv_path, drv_info):
    """Return validated direct source inputs from normalized derivation JSON."""
    inputs = _require_derivation_inputs(drv_path, drv_info)
    if "srcs" not in inputs:
        raise InvalidNixJsonError(
            NIX_DERIVATION_SHOW_JSON,
            f"missing `inputs.srcs` in derivation `{drv_path}`",
        )
    srcs = inputs["srcs"]
    if not isinstance(srcs, list):
        raise InvalidNixJsonError(
            NIX_DERIVATION_SHOW_JSON,
            f"expected `inputs.srcs` in derivation `{drv_path}` to be a list",
        )
    return _validated_path_values(
        srcs,
        f"`inputs.srcs` in derivation `{drv_path}`",
        NIX_DERIVATION_SHOW_JSON,
    )


def _require_derivation_inputs(drv_path, drv_info):
    """Return the validated modern derivation input object.

    Parsing derivation metadata only validates an optional ``inputs`` object
    because some callers use unrelated fields. Graph construction depends on
    the modern dependency schema, so this accessor requires ``inputs`` and the
    field-specific accessors require both ``inputs.drvs`` and ``inputs.srcs``.
    Real leaf derivations still expose those fields as empty containers.
    """
    if not isinstance(drv_info, dict):
        raise InvalidNixJsonError(
            NIX_DERIVATION_SHOW_JSON,
            f"expected derivation `{drv_path}` to be an object",
        )
    _reject_legacy_derivation_inputs(drv_path, drv_info)
    if "inputs" not in drv_info:
        raise InvalidNixJsonError(
            NIX_DERIVATION_SHOW_JSON,
            f"missing derivation inputs in `{drv_path}`",
        )
    inputs = drv_info["inputs"]
    if not isinstance(inputs, dict):
        raise InvalidNixJsonError(
            NIX_DERIVATION_SHOW_JSON,
            f"expected `inputs` in derivation `{drv_path}` to be an object",
        )
    return inputs


def normalize_nix_path_info(path_info, *, command=NIX_PATH_INFO_JSON):
    """Normalize and validate Nix path-info JSON to a path-indexed dictionary."""
    if isinstance(path_info, dict):
        normalized = {}
        for path, info in path_info.items():
            if not isinstance(path, str) or not path:
                raise InvalidNixJsonError(
                    command,
                    "expected path-info object keys to be non-empty strings",
                )
            if not isinstance(info, dict):
                raise InvalidNixJsonError(
                    command,
                    f"expected path-info record for `{path}` to be an object",
                )
            normalized[path] = info
        return normalized

    if isinstance(path_info, list):
        normalized = {}
        for index, info in enumerate(path_info):
            if not isinstance(info, dict):
                raise InvalidNixJsonError(
                    command,
                    f"expected path-info list item {index} to be an object",
                )
            path = info.get("path") or info.get("storePath")
            if not isinstance(path, str) or not path:
                raise InvalidNixJsonError(
                    command,
                    f"missing path string in path-info list item {index}",
                )
            normalized[path] = info
        return normalized

    raise InvalidNixJsonError(
        command,
        f"expected top-level object or list, got {type(path_info).__name__}",
    )


def nix_path_info_references(info, path, *, command=NIX_PATH_INFO_JSON):
    """Return validated path-info references for a store path."""
    if "references" not in info:
        raise InvalidNixJsonError(
            command,
            f"missing `references` in path-info for `{path}`",
        )
    references = info["references"]
    if not isinstance(references, list):
        raise InvalidNixJsonError(
            command,
            f"expected `references` in path-info for `{path}` to be a list",
        )
    for index, reference in enumerate(references):
        if not isinstance(reference, str) or not reference:
            raise InvalidNixJsonError(
                command,
                f"expected `references[{index}]` in path-info for `{path}` "
                "to be a non-empty string",
            )
    return references


def nix_path_info_deriver(info, path, *, command=NIX_PATH_INFO_JSON):
    """Return a validated path-info deriver value, or None when absent."""
    deriver = info.get("deriver")
    if deriver is None or deriver == "":
        return None
    if not isinstance(deriver, str):
        raise InvalidNixJsonError(
            command,
            f"expected `deriver` in path-info for `{path}` to be a string or null",
        )
    return deriver


def nix_path_info_nar_hash(info, path, *, command=NIX_PATH_INFO_JSON):
    """Return a validated path-info NAR hash."""
    nar_hash = info.get("narHash")
    if not isinstance(nar_hash, str) or not nar_hash:
        raise InvalidNixJsonError(
            command,
            f"missing `narHash` string in path-info for `{path}`",
        )
    return nar_hash


def _validated_path_keys(paths, owner, command):
    validated = []
    for path in paths:
        if not isinstance(path, str) or not path:
            raise InvalidNixJsonError(
                command,
                f"expected keys in {owner} to be non-empty strings",
            )
        validated.append(path)
    return validated


def _validated_path_values(paths, owner, command):
    validated = []
    for index, path in enumerate(paths):
        if not isinstance(path, str) or not path:
            raise InvalidNixJsonError(
                command,
                f"expected paths in {owner} to be non-empty strings "
                f"(invalid index {index})",
            )
        validated.append(path)
    return validated


================================================
FILE: src/common/package_names.py
================================================
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Package name normalization helpers."""

import re

import pandas as pd


def nix_to_repology_pkg_name(nix_pkg_name):
    """Convert nix package name to repology package name."""
    if not nix_pkg_name or pd.isnull(nix_pkg_name):
        return nix_pkg_name
    nix_pkg_name = nix_pkg_name.lower()
    re_nix_to_repo = re.compile(
        r"^(?:"
        r"(python)|(perl)|(emacs)|(vim)plugin|(ocaml)|"
        r"(gnome)-shell-extension|(lisp)|(ruby)|(lua)|"
        r"(php)[0-9]*Packages|(go)|(coq)|(rust)"
        r")"
        r"[0-9.]*-(.+)"
    )
    match = re.match(re_nix_to_repo, nix_pkg_name)
    if match:
        matches = list(filter(None, match.groups()))
        assert len(matches) == 2, f"Unexpected package name '{nix_pkg_name}'"
        nix_pkg_name = f"{matches[0]}:{matches[1]}"
    if nix_pkg_name == "python3":
        nix_pkg_name = "python"
    if nix_pkg_name == "libtiff":
        nix_pkg_name = "tiff"
    return nix_pkg_name


================================================
FILE: src/common/pkgmeta.py
================================================
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Package metadata helpers."""

import importlib.metadata
import subprocess
from pathlib import Path

_REPO_ROOT = Path(__file__).resolve().parents[2]


def get_py_pkg_version(package="sbomnix"):
    """Return package version, including git state when run from source."""
    try:
        return importlib.metadata.version(package)
    except importlib.metadata.PackageNotFoundError:
        return _dev_version()


def _dev_version():
    """Derive version from git when the package is not installed.

    Produces the same format as the Nix package version so that devshell
    and nix-built invocations report identical strings for the same checkout:
      <base>+g<hash>          clean tree with commits beyond the release tag
      <base>+g<hash>.dirty    tree has tracked modifications (untracked files ignored)
    pip normalises '-' to '.' in local version identifiers, so '.dirty' is
    used here to match what importlib.metadata returns from the installed
    package.
    """
    try:
        base = (_REPO_ROOT / "VERSION").read_text().strip()
        short_hash = subprocess.run(
            ["git", "rev-parse", "--short", "HEAD"],
            capture_output=True,
            text=True,
            check=True,
            cwd=_REPO_ROOT,
        ).stdout.strip()
        dirty = subprocess.run(
            ["git", "status", "--porcelain", "--untracked-files=no"],
            capture_output=True,
            text=True,
            check=True,
            cwd=_REPO_ROOT,
        ).stdout.strip()
        return f"{base}+g{short_hash}{'.dirty' if dirty else ''}"
    except Exception:
        try:
            return (_REPO_ROOT / "VERSION").read_text().strip() + ".dev"
        except Exception:
            return "0.0.0"


================================================
FILE: src/common/proc.py
================================================
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Shared subprocess and nix command helpers."""

import logging
import os
import shlex
import subprocess
from collections.abc import Callable, Sequence
from shutil import which
from typing import IO, Literal, overload

from common.errors import CommandNotFoundError, InvalidNixArtifactError
from common.log import LOG, LOG_VERBOSE

CommandPart = str | os.PathLike[str]
ExecCmdResult = subprocess.CompletedProcess[str] | subprocess.CalledProcessError | None
ExecCmdFn = Callable[..., ExecCmdResult]


@overload
def exec_cmd(
    cmd: Sequence[CommandPart],
    raise_on_error: Literal[True] = True,
    return_error: bool = False,
    log_error: bool = True,
    stdout: IO[str] | None = None,
) -> subprocess.CompletedProcess[str]: ...


@overload
def exec_cmd(
    cmd: Sequence[CommandPart],
    raise_on_error: Literal[False],
    return_error: Literal[True],
    log_error: bool = True,
    stdout: IO[str] | None = None,
) -> subprocess.CompletedProcess[str] | subprocess.CalledProcessError | None: ...


@overload
def exec_cmd(
    cmd: Sequence[CommandPart],
    raise_on_error: Literal[False],
    return_error: Literal[False] = False,
    log_error: bool = True,
    stdout: IO[str] | None = None,
) -> subprocess.CompletedProcess[str] | None: ...


def exec_cmd(
    cmd: Sequence[CommandPart],
    raise_on_error: bool = True,
    return_error: bool = False,
    log_error: bool = True,
    stdout: IO[str] | None = None,
) -> ExecCmdResult:
    """Run shell command `cmd`."""
    if isinstance(cmd, (str, bytes, os.PathLike)):
        raise TypeError("cmd must be an argv sequence, not a string-like value")
    argv = [os.fspath(part) for part in cmd]
    command_str = shlex.join(argv)
    LOG.debug("Running: %s", command_str)
    try:
        if stdout:
            ret = subprocess.run(argv, encoding="utf-8", check=True, stdout=stdout)
        else:
            ret = subprocess.run(
                argv,
                capture_output=True,
                encoding="utf-8",
                check=True,
            )
        return ret
    except subprocess.CalledProcessError as error:
        if log_error:
            LOG.error(
                "Error running shell command:\n cmd:   '%s'\n stdout: %s\n stderr: %s",
                command_str,
                error.stdout,
                error.stderr,
            )
        if raise_on_error:
            raise error
        if return_error:
            return error
        return None


def exit_unless_command_exists(
    name: str,
    *,
    which_fn: Callable[[str], str | None] | None = None,
) -> None:
    """Raise if `name` is not an executable in PATH."""
    which_fn = which if which_fn is None else which_fn
    name_is_in_path = which_fn(name) is not None
    if not name_is_in_path:
        raise CommandNotFoundError(name)


def exit_unless_nix_artifact(
    path: str,
    force_realise: bool = False,
    *,
    exec_cmd_fn: ExecCmdFn | None = None,
    log: logging.Logger | None = None,
) -> None:
    """
    Raise if `path` is not a nix artifact. If `force_realise` is True, build
    the installable before querying path information.
    """
    exec_cmd_fn = exec_cmd if exec_cmd_fn is None else exec_cmd_fn
    log = LOG if log is None else log

    log.debug("force_realize: %s", force_realise)
    try:
        if force_realise:
            log.log(LOG_VERBOSE, "Try force-realising store-path '%s'", path)
            exec_cmd_fn(nix_cmd("build", "--no-link", path))
        exec_cmd_fn(nix_cmd("path-info", path))
        return
    except subprocess.CalledProcessError:
        raise InvalidNixArtifactError(path) from None


def nix_cmd(*args: str, impure: bool = False) -> list[str]:
    """Build argv for nix commands that require flakes + nix-command support."""
    cmd = [
        "nix",
        *args,
        "--extra-experimental-features",
        "flakes",
        "--extra-experimental-features",
        "nix-command",
    ]
    if impure:
        cmd.append("--impure")
    return cmd


================================================
FILE: src/common/regex.py
================================================
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Small regex helpers kept for compatibility with older call sites."""

import re


def regex_match(regex, string):
    """Return true if ``regex`` matches ``string``."""
    if not regex or not string:
        return False
    return re.match(regex, string) is not None


================================================
FILE: src/common/spdx.py
================================================
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Helpers for validating SPDX license identifiers."""

from functools import lru_cache

from license_expression import ExpressionError, get_spdx_licensing


@lru_cache(maxsize=1)
def _spdx_licensing():
    return get_spdx_licensing()


def canonicalize_spdx_license_id(identifier):
    """Return a canonical SPDX identifier for a single license key."""
    if not identifier:
        return None
    try:
        parsed = _spdx_licensing().parse(str(identifier), validate=True)
    except ExpressionError:
        return None
    return getattr(parsed, "key", None)


================================================
FILE: src/common/versioning.py
================================================
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Shared version parsing and comparison helpers."""

import re

import packaging.version

from common.log import LOG, LOG_SPAM


def number_distance(n1: object, n2: object) -> float:
    """
    Return float value between [0.0,1.0] indicating the distance
    between two non-negative numbers.
    Returns 1.0 if the two numbers are equal.
    Returns 0.0 if either argument is not a non-negative number.
    """
    if (
        not isinstance(n1, (float, int))
        or not isinstance(n2, (float, int))
        or n1 < 0
        or n2 < 0
    ):
        return 0.0
    min_n = min(n1, n2)
    max_n = max(n1, n2)
    if max_n == 0:
        return 1.0
    if min_n == 0:
        min_n += 1
        max_n += 1
    return min_n / max_n


def version_distance(v1: object, v2: object) -> float:
    """
    Return float value between [0.0,1.0] indicating the closeness
    of the given two version number strings.
    """
    v1 = str(v1)
    v2 = str(v2)
    v1_clean = re.sub(r"[^0-9.]+", "", v1)
    v2_clean = re.sub(r"[^0-9.]+", "", v2)
    re_vsplit = re.compile(r".*?(?P<ver_beg>[0-9][0-9]*)(?P<ver_end>.*)$")
    match = re.match(re_vsplit, v1_clean)
    if not match:
        LOG.debug("Unexpected v1 version '%s'", v1)
        return 0.0
    v1_major = match.group("ver_beg")
    v1_minor = match.group("ver_end").replace(".", "")
    v1_float = float(v1_major + "." + v1_minor)
    match = re.match(re_vsplit, v2_clean)
    if not match:
        LOG.debug("Unexpected v2 version '%s'", v2)
        return 0.0
    v2_major = match.group("ver_beg")
    v2_minor = match.group("ver_end").replace(".", "")
    v2_float = float(v2_major + "." + v2_minor)
    return number_distance(v1_float, v2_float)


def parse_version(ver_str: object) -> packaging.version.Version | None:
    """
    Return comparable version object from the given version string.
    Returns None if the version string can not be converted to version object.
    """
    ver_str = str(ver_str)
    if not ver_str:
        return None
    re_ver = re.compile(r".*?(?P<ver_beg>[0-9][0-9.]*)(?P<ver_end>.*)$")
    match = re_ver.match(ver_str)
    if not match:
        LOG.debug("Unable to parse version '%s'", ver_str)
        return None
    ver_beg = match.group("ver_beg").rstrip(".")
    ver_end = match.group("ver_end")
    ver_end = re.sub(r"[^0-9.]+", "", ver_end).lstrip(".")
    if ver_end:
        ver_end = f"+{ver_end}"
    else:
        ver_end = ""
    ver_end = ver_end.rstrip(".")
    ver = f"{ver_beg}{ver_end}"
    ver = re.sub(r"\.+", ".", ver)
    LOG.log(LOG_SPAM, "%s --> %s", ver_str, ver)
    if not ver:
        LOG.debug("Invalid version '%s'", ver_str)
        return None
    try:
        return packaging.version.parse(ver)
    except packaging.version.InvalidVersion:
        LOG.debug("Invalid version '%s'", ver_str)
        return None


================================================
FILE: src/nixgraph/__init__.py
================================================
# SPDX-FileCopyrightText: 2022 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0


================================================
FILE: src/nixgraph/graph.py
================================================
#!/usr/bin/env python3

# SPDX-FileCopyrightText: 2022-2023 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Python script to query and visualize nix package dependencies."""

from dataclasses import dataclass

import pandas as pd

from common.df import df_to_csv_file
from common.log import LOG, is_debug_enabled
from nixgraph.render import NixDependencyGraph
from sbomnix.closure import derivation_dependencies_df
from sbomnix.derivation import load_recursive
from sbomnix.derivers import require_deriver
from sbomnix.runtime import load_runtime_closure


@dataclass(frozen=True)
class LoadedDependencies:
    """Dependency dataframe and graph traversal start path."""

    start_path: str
    df: pd.DataFrame
    dtype: str


def load_dependencies(nix_path, buildtime=False):
    """Load nixgraph dependency rows from structured Nix data."""
    LOG.debug("nix_path: %s", nix_path)
    dtype = "buildtime" if buildtime else "runtime"
    LOG.info("Loading %s dependencies referenced by '%s'", dtype, nix_path)
    if buildtime:
        drv_path = require_deriver(nix_path)
        _derivations, drv_infos = load_recursive(drv_path)
        loaded = LoadedDependencies(
            start_path=drv_path,
            df=derivation_dependencies_df(drv_infos),
            dtype=dtype,
        )
    else:
        runtime_closure = load_runtime_closure(nix_path)
        loaded = LoadedDependencies(
            start_path=nix_path,
            df=runtime_closure.df_deps,
            dtype=dtype,
        )
    if loaded.df.empty:
        LOG.info("No %s dependencies", dtype)
    return loaded


def draw_dependencies(loaded, args):
    """Draw loaded dependencies as a directed graph."""
    if is_debug_enabled():
        df_to_csv_file(loaded.df, f"nixgraph_deps_{loaded.dtype}.csv")
    digraph = NixDependencyGraph(loaded.df)
    return digraph.draw(loaded.start_path, args)


================================================
FILE: src/nixgraph/main.py
================================================
#!/usr/bin/env python3

# SPDX-FileCopyrightText: 2022-2023 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Python script to query and visualize nix package dependencies"""

import argparse

from common.cli_args import add_verbose_argument, add_version_argument, check_positive
from common.errors import SbomnixError
from common.log import LOG, set_log_verbosity
from nixgraph.graph import draw_dependencies, load_dependencies
from sbomnix.cli_utils import resolve_nix_target

###############################################################################


def getargs(args=None):
    """Parse command line arguments"""
    desc = "Visualize nix artifact dependencies"
    epil = "Example: nixgraph /path/to/derivation.drv "
    parser = argparse.ArgumentParser(description=desc, epilog=epil)

    helps = (
        "Target nix store path (e.g. derivation file or nix output path) or flakeref"
    )
    parser.add_argument("NIXREF", help=helps, type=str)

    add_version_argument(parser)

    helps = "Scan buildtime dependencies instead of runtime dependencies"
    parser.add_argument("--buildtime", help=helps, action="store_true")

    helps = "Set the graph maxdepth (default: --depth=1)"
    parser.add_argument("--depth", help=helps, type=check_positive, default=1)

    helps = (
        "Draw inverse graph starting from node (path) names that match the "
        "specified regular expression"
    )
    parser.add_argument("--inverse", help=helps)

    helps = (
        "Set the output file name, default is 'graph.png'. "
        "The output filename extension determines the output format. "
        "Common supported formats include: png, jpg, pdf, and dot. "
        "For a full list of supported output formats, see: "
        "https://graphviz.org/doc/info/output.html. In addition to graphviz "
        "supported output formats, the tool supports output in csv to "
        "allow post-processing the output data. Specify output file with "
        ".csv extension to output the query result in textual csv format."
    )
    parser.add_argument("-o", "--out", nargs="?", help=helps, default="graph.png")

    helps = "Colorize nodes that match the specified regular expression"
    parser.add_argument("--colorize", help=helps)

    helps = (
        "Keep drawing the dependencies until package name matches "
        "the specified regular expression. This option works together with "
        "--depth so that drawing stops when the first of the two "
        "conditions match: when the package name matches the given regex "
        "or when the specified graph depth is reached."
    )
    parser.add_argument("--until", help=helps)

    helps = "Show nix store path in node label, together with package name"
    parser.add_argument("--pathnames", help=helps, action="store_true")

    add_verbose_argument(parser)

    return parser.parse_args(args)


################################################################################


def main():
    """main entry point"""
    args = getargs()
    set_log_verbosity(args.verbose)
    try:
        _run(args)
    except SbomnixError as error:
        LOG.fatal("%s", error)
        raise SystemExit(1) from error


def _run(args):
    target = resolve_nix_target(args.NIXREF, buildtime=args.buildtime)
    deps = load_dependencies(target.path, args.buildtime)
    draw_dependencies(deps, args)


if __name__ == "__main__":
    main()

################################################################################


================================================
FILE: src/nixgraph/render.py
================================================
#!/usr/bin/env python3

# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Helpers for traversing and rendering nix dependency graphs."""

import html
import os
from typing import Any

import graphviz as gv
import pandas as pd

from common import columns as cols
from common.df import df_regex_filter, df_to_csv_file
from common.log import LOG, LOG_SPAM
from common.regex import regex_match
from sbomnix.closure import walk_dependency_rows

DBG_INDENT = "    "
GRAPHVIZ_RENDER_WARN_EDGES = 2000


class NixDependencyGraph:
    """Draw nix package dependencies as graph."""

    def __init__(self, df_dependencies):
        self.df = df_dependencies
        self.digraph = None
        self.df_out_csv = None
        self.maxdepth = 1
        self.inverse_regex = None
        self.until_regex = None
        self.colorize_regex = None
        self.pathnames = False

    def draw(self, start_path, args):
        """Draw dependency graph."""
        self._init_df_out(args)
        self.maxdepth = args.depth if hasattr(args, "depth") else 1
        self.inverse_regex = args.inverse if hasattr(args, "inverse") else None
        self.until_regex = args.until if hasattr(args, "until") else None
        self.colorize_regex = args.colorize if hasattr(args, "colorize") else None
        self.pathnames = args.pathnames if hasattr(args, "pathnames") else False
        self.digraph = gv.Digraph()
        self.nodes_drawn = set()
        self.digraph.attr("graph", rankdir="LR")
        self.digraph.attr("node", shape="box")
        self.digraph.attr("node", style="rounded")
        self.digraph.attr("node", margin="0.3,0.1")
        self.digraph.attr("graph", concentrate="false")
        initlen = len(self.digraph.body)

        walked_rows = self._walk_rows(start_path)
        if self.df_out_csv is not None:
            self.df_out_csv = self._walked_rows_to_dataframe(walked_rows)
        else:
            for walked in walked_rows:
                self._draw_row(walked.row, walked.depth)

        if len(self.digraph.body) > initlen:
            self._warn_if_large_graphviz_render(args.out, len(walked_rows))
            self._render(args.out)
        elif self.df_out_csv is not None and not self.df_out_csv.empty:
            if hasattr(args, "return_df") and args.return_df:
                LOG.debug("Returning graph as dataframe")
                return self.df_out_csv
            df_to_csv_file(self.df_out_csv, args.out)
        else:
            LOG.warning("Nothing to draw")
        return None

    def _walk_rows(self, start_path):
        if self.inverse_regex:
            df = df_regex_filter(self.df, cols.SRC_PATH, self.inverse_regex)
            start_paths = df[cols.SRC_PATH].tolist() if not df.empty else []
            for inverse_path in dict.fromkeys(start_paths):
                LOG.debug("Start path inverse: %s", inverse_path)
            return walk_dependency_rows(
                self.df,
                start_paths,
                self.maxdepth,
                inverse=True,
                stop_at=self._matches_until,
            )
        LOG.debug("Start path: %s", start_path)
        return walk_dependency_rows(
            self.df,
            start_path,
            self.maxdepth,
            stop_at=self._matches_until,
        )

    def _walked_rows_to_dataframe(self, walked_rows):
        rows = [{"graph_depth": walked.depth, **walked.row} for walked in walked_rows]
        if rows:
            return pd.DataFrame.from_records(rows)
        return pd.DataFrame()

    def _draw_row(self, row, depth):
        self._dbg_print_row(row, depth)
        if self._matches_until(row):
            LOG.debug("%sReached until_function", (DBG_INDENT * (depth - 1)))
            return
        self._add_node(row[cols.SRC_PATH], row["src_pname"])
        self._add_node(row[cols.TARGET_PATH], row["target_pname"])
        self._add_edge(row)

    def _init_df_out(self, args):
        if hasattr(args, "out"):
            _fname, extension = os.path.splitext(args.out)
            fileformat = extension[1:]
            if fileformat == "csv":
                self.df_out_csv = pd.DataFrame()
        elif hasattr(args, "return_df") and args.return_df:
            self.df_out_csv = pd.DataFrame()
        else:
            self.df_out_csv = None

    def _render(self, filename):
        if self.df_out_csv is not None:
            return
        if self.digraph is None:
            return
        fname, extension = os.path.splitext(filename)
        gformat = extension[1:]
        if gformat == "dot":
            self.digraph.save(filename)
            LOG.info("Wrote: %s", filename)
            return
        self.digraph.render(filename=fname, format=gformat, cleanup=True)
        LOG.info("Wrote: %s", filename)

    def _warn_if_large_graphviz_render(self, filename, edge_count):
        if edge_count < GRAPHVIZ_RENDER_WARN_EDGES:
            return
        _fname, extension = os.path.splitext(filename)
        if extension[1:] in ("csv", "dot"):
            return
        LOG.warning(
            "Rendering %s dependency edges with Graphviz may be slow; "
            "use --out graph.csv or --out graph.dot for faster output.",
            edge_count,
        )

    def _matches_until(self, row):
        return regex_match(self.until_regex, row["target_pname"])

    def _add_edge(self, row):
        if self.df_out_csv is not None:
            return
        if self.digraph is None:
            return
        self.digraph.edge(row[cols.TARGET_PATH], row[cols.SRC_PATH], style=None)

    def _add_node(self, path, pname):
        if self.df_out_csv is not None:
            return
        if self.digraph is None:
            return
        if path in self.nodes_drawn:
            return
        self.nodes_drawn.add(path)
        node_id = path
        node_name = html.escape(str(pname))
        if self.pathnames:
            beg = '<FONT POINT-SIZE="8">'
            end = "</FONT>"
            label = f"<{node_name}<BR/>{beg}{str(path)}{end}>"
        else:
            label = node_name
        fillcolor = "#EEEEEE"
        if regex_match(self.colorize_regex, pname):
            fillcolor = "#FFE6E6"
        self.digraph.node(node_id, label, style="rounded,filled", fillcolor=fillcolor)

    def _dbg_print_row(self, row: dict[str, Any], depth):
        LOG.log(
            LOG_SPAM,
            "%sFound: %s ==> %s",
            (DBG_INDENT * (depth - 1)),
            row[cols.TARGET_PATH],
            row[cols.SRC_PATH],
        )


================================================
FILE: src/nixmeta/__init__.py
================================================
# SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0


================================================
FILE: src/nixmeta/flake_metadata.py
================================================
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Helpers for resolving nixpkgs metadata from flakerefs."""

import json
import pathlib
import re

from common.log import LOG, LOG_SPAM
from common.proc import exec_cmd, nix_cmd


def get_flake_metadata(flakeref, *, exec_cmd_fn=exec_cmd, nix_cmd_fn=nix_cmd, log=LOG):
    """Return ``nix flake metadata`` JSON for the given flakeref."""
    if flakeref.startswith("nixpkgs="):
        flakeref = flakeref.removeprefix("nixpkgs=")
    log.info("Reading flake metadata for '%s'", flakeref)
    cmd = nix_cmd_fn("flake", "metadata", flakeref, "--json")
    ret = exec_cmd_fn(cmd, raise_on_error=False, return_error=True, log_error=False)
    if ret is None or ret.returncode != 0:
        log.warning("Failed reading flake metadata: %s", flakeref)
        return None
    meta_json = json.loads(ret.stdout)
    log.log(LOG_SPAM, meta_json)
    return meta_json


def is_nixpkgs_metadata(meta_json):
    """Return true if the given metadata describes nixpkgs."""
    try:
        if (
            "path" in meta_json
            and "description" in meta_json
            and meta_json["description"]
            == "A collection of packages for the Nix package manager"
        ):
            return True
        if (
            "path" in meta_json
            and meta_json["locked"]["owner"] == "NixOS"
            and meta_json["locked"]["repo"] == "nixpkgs"
        ):
            return True
    except (KeyError, TypeError):
        return False
    return False


def _locked_obj_is_nixpkgs(node_name, locked_obj):
    try:
        if locked_obj.get("repo") == "nixpkgs":
            return True
        if node_name.startswith("nixpkgs") and locked_obj.get("type") == "path":
            return True
    except AttributeError:
        return False
    return False


def _input_node_names(value):
    if isinstance(value, str):
        return [value]
    if isinstance(value, list) and value and isinstance(value[-1], str):
        # Lock-file override chains store the resolved input node as the last item.
        return [value[-1]]
    return []


def _get_flake_nixpkgs_obj(meta_json):
    try:
        nodes = meta_json["locks"]["nodes"]
        root_name = meta_json["locks"]["root"]
        root_inputs = nodes[root_name].get("inputs", {})
    except (KeyError, TypeError, AttributeError):
        return None

    for node_name in _input_node_names(root_inputs.get("nixpkgs")):
        try:
            return nodes[node_name]["locked"]
        except (KeyError, TypeError):
            continue

    candidates = []
    for node_name, node in nodes.items():
        try:
            locked_obj = node["locked"]
        except (KeyError, TypeError):
            continue
        if _locked_obj_is_nixpkgs(node_name, locked_obj):
            candidates.append(locked_obj)
    if len(candidates) == 1:
        return candidates[0]
    return None


def _get_flake_nixpkgs_val(meta_json, key):
    nixpkgs_obj = _get_flake_nixpkgs_obj(meta_json)
    if nixpkgs_obj is None:
        return None
    try:
        return nixpkgs_obj[key]
    except (KeyError, TypeError):
        return None


def _get_nixpkgs_flakeref_github(meta_json, *, log=LOG):
    owner = _get_flake_nixpkgs_val(meta_json, "owner")
    repo = _get_flake_nixpkgs_val(meta_json, "repo")
    rev = _get_flake_nixpkgs_val(meta_json, "rev")
    if None in [owner, repo, rev]:
        log.debug(
            "owner, repo, or rev not found: %s",
            _get_flake_nixpkgs_obj(meta_json),
        )
        return None
    return f"github:{owner}/{repo}?rev={rev}"


def _get_nixpkgs_flakeref_git(meta_json, *, log=LOG):
    url = _get_flake_nixpkgs_val(meta_json, "url")
    rev = _get_flake_nixpkgs_val(meta_json, "rev")
    ref = _get_flake_nixpkgs_val(meta_json, "ref")
    if None in [url, rev, ref]:
        log.debug("url, rev, or ref not found: %s", _get_flake_nixpkgs_obj(meta_json))
        return None
    return f"git+{url}?ref={ref}&rev={rev}"


def _get_nixpkgs_flakeref_path(meta_json, *, log=LOG):
    path = _get_flake_nixpkgs_val(meta_json, "path")
    if path is None:
        log.debug("path not found: %s", _get_flake_nixpkgs_obj(meta_json))
        return None
    return f"path:{path}"


def _get_nixpkgs_flakeref_tarball(meta_json, *, log=LOG):
    url = _get_flake_nixpkgs_val(meta_json, "url")
    if url is None:
        log.debug("url not found: %s", _get_flake_nixpkgs_obj(meta_json))
        return None
    return f"{url}"


def get_nixpkgs_flakeref(meta_json, *, log=LOG):
    """Given flake metadata, return the locked nixpkgs flakeref."""
    locked_type = _get_flake_nixpkgs_val(meta_json, "type")
    if locked_type == "github":
        return _get_nixpkgs_flakeref_github(meta_json, log=log)
    if locked_type == "git":
        return _get_nixpkgs_flakeref_git(meta_json, log=log)
    if locked_type == "path":
        return _get_nixpkgs_flakeref_path(meta_json, log=log)
    if locked_type == "tarball":
        return _get_nixpkgs_flakeref_tarball(meta_json, log=log)
    log.debug("Unsupported nixpkgs locked type: %s", locked_type)
    return None


def nixref_to_nixpkgs_path(
    flakeref,
    *,
    get_flake_metadata_fn=get_flake_metadata,
    log=LOG,
    log_spam=LOG_SPAM,
):
    """Return the nix store path of the nixpkgs pinned by ``flakeref``."""
    if not flakeref:
        return None
    log.info("Resolving nixpkgs path for '%s'", flakeref)
    log.debug("Finding meta-info for nixpkgs pinned in nixref: %s", flakeref)
    match = re.match(r"([^#]+)#", flakeref)
    if match:
        flakeref = match.group(1)
        log.debug("Stripped target specifier: %s", flakeref)
    meta_json = get_flake_metadata_fn(flakeref)
    if not is_nixpkgs_metadata(meta_json):
        log.debug("non-nixpkgs flakeref: %s", flakeref)
        nixpkgs_flakeref = get_nixpkgs_flakeref(meta_json, log=log)
        if not nixpkgs_flakeref:
            log.warning("Failed parsing locked nixpkgs: %s", flakeref)
            return None
        log.log(log_spam, "using nixpkgs_flakeref: %s", nixpkgs_flakeref)
        meta_json = get_flake_metadata_fn(nixpkgs_flakeref)
        if not is_nixpkgs_metadata(meta_json):
            log.warning("Failed reading nixpkgs metadata: %s", flakeref)
            return None
    return pathlib.Path(meta_json["path"]).absolute()


================================================
FILE: src/nixmeta/main.py
================================================
#!/usr/bin/env python3

# SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII)
# SPDX-License-Identifier: Apache-2.0

"""Python script for summarizing nixpkgs meta-attributes"""

import argparse
import pathlib

from common.cli_args import add_verbose_argument, add_version_argument
from common.errors import SbomnixError
from common.log import LOG, set_log_verbosity
from common.proc import exit_unless_command_exists
from nixmeta.scanner import NixMetaScanner

################################################################################


def _getargs(args=None):
    """Parse command line arguments"""
    desc = (
        "Summarize nixpkgs meta-attributes from the given nixpkgs version "
        "to a csv output file."
    )
    epil = "Example: nixmeta --flakeref=github:NixOS/nixpkgs?ref=master"
    parser = argparse.ArgumentParser(description=desc, epilog=epil)
    helps = (
        "Flake reference specifying the location of the flake "
        "from which the pinned nixpkgs target version is read. "
        "The default value is the "
        "current nixpkgs version in its 'nixos-unstable' branch. "
        "For more details, see: "
        "https://nixos.org/manual/nix/stable/command-ref/new-cli/nix3-flake"
        "#flake-references and "
        "https://nixos.wiki/wiki/Nix_channels "
        "(default: --flakeref=github:NixOS/nixpkgs?ref=nixos-unstable)."
    )
    parser.add_argument(
        "-f",
        "--flakeref",
        help=helps,
        type=str,
        default="github:NixOS/nixpkgs?ref=nixos-unstable",
    )
    helps = "Path to output file (default: --out=nixmeta.csv)."
    parser.add_argument(
        "-o",
        "--out",
        help=helps,
        type=pathlib.Path,
        default="nixmeta.csv",
    )
    helps = (
        "Append to output file - removing duplicate entries - instead of "
        "completely overwriting possible earlier output file."
    )
    parser.add_argument(
        "-a",
        "--append",
        help=helps,
        action="store_true",
    )
    add_version_argument(parser)
    add_verbose_argument(parser)
    return parser.parse_args(args)


###############################################################################


def main():
    """main entry point"""
    args = _getargs()
    set_log_verbosity(args.verbose)
    try:
        _run(args)
    except SbomnixError as error:
        LOG.fatal("%s", error)
        raise SystemExit(1) from error


def _run(args):
    # Fail early if the following commands are not in PATH
    exit_unless_command_exists("nix")
    exit_unless_command_exists("nix-env")
    # Scan metadata from the flakeref pinned nixpkgs
    LOG.info("Scanning nixpkgs metadata for '%s'", args.flakeref)
    scanner = NixMetaScanner()
    scanner.scan(args.flakeref)
    # Output to csv file
    scanner.to_csv(args.out, args.append)


################################################################################

if __name__ == "__main__":
    main()

################################################################################


================================================
FILE: src/nixmeta/metadata_json.py
================================================
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Helpers for flattening nix-env metadata JSON."""

import json

import pandas as pd

from common import columns as cols
from common.log import LOG


def parse_meta_entry(meta, key):
    """Flatten nested metadata values for a single key into a string."""
    items = []
    if isinstance(meta, dict):
        items.extend([parse_meta_entry(meta.get(key, ""), key)])
    elif isinstance(meta, list):
        items.extend([parse_meta_entry(item, key) for item in meta])
    else:
        return str(meta)
    return ";".join(list(filter(None, items)))


def parse_json_metadata(json_filename, *, log=LOG):
    """Parse package metadata from a ``nix-env --json`` output file."""
    with open(json_filename, "r", encoding="utf-8") as inf:
        log.debug('Loading meta-info from "%s"', json_filename)
        json_dict = json.loads(inf.read())
    dict_selected = {}
    setcol = dict_selected.setdefault
    for pkg in json_dict.values():
        setcol(cols.NAME, []).append(pkg.get("name", ""))
        setcol("pname", []).append(pkg.get("pname", ""))
        setcol(cols.VERSION, []).append(pkg.get("version", ""))
        meta = pkg.get("meta", {})
        setcol("meta_homepage", []).append(parse_meta_entry(meta, key="homepage"))
        setcol("meta_unfree", []).append(meta.get("unfree", ""))
        setcol("meta_description", []).append(meta.get("description", ""))
        setcol("meta_position", []).append(meta.get("position", ""))
        meta_license = meta.get("license", {})
        setcol("meta_license_short", []).append(
            parse_meta_entry(meta_license, key="shortName")
        )
        setcol("meta_license_spdxid", []).append(
            parse_meta_entry(meta_license, key="spdxId")
        )
        meta_maintainers = meta.get("maintainers", {})
        setcol("meta_maintainers_email", []).append(
            parse_meta_entry(meta_maintainers, key="email")
        )
    return pd.DataFrame(dict_selected).astype(str)


================================================
FILE: src/nixmeta/scanner.py
================================================
#!/usr/bin/env python3

# SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII)
# SPDX-License-Identifier: Apache-2.0

"""Summarize nixpkgs meta-attributes"""

import pathlib
import subprocess
from tempfile import NamedTemporaryFile

import pandas as pd

from common.df import df_from_csv_file, df_to_csv_file
from common.log import LOG, LOG_SPAM
from common.proc import exec_cmd, nix_cmd
from nixmeta.flake_metadata import get_flake_metadata, nixref_to_nixpkgs_path
from nixmeta.metadata_json import parse_json_metadata

###############################################################################


def _run_nix_env_metadata(cmd, stdout):
    """Run nix-env metadata scan while keeping successful eval warnings quiet."""
    ret = subprocess.run(
        cmd,
        encoding="utf-8",
        check=True,
        stdout=stdout,
        stderr=subprocess.PIPE,
    )
    if ret.stderr:
        LOG.debug("nix-env metadata stderr:\n%s", ret.stderr.strip())


class NixMetaScanner:
    """Scan nixpkgs meta-info"""

    def __init__(self):
        self.df_meta = None

    def scan(self, nixref):
        """
        Scan nixpkgs meta-info using nixpkgs version pinned in nixref;
        nixref can be a nix store path, flakeref or dynamical attribute set.
        """
        nixpkgs_path = nixref_to_nixpkgs_path(
            nixref,
            get_flake_metadata_fn=lambda flakeref: get_flake_metadata(
                flakeref,
                exec_cmd_fn=exec_cmd,
                nix_cmd_fn=nix_cmd,
                log=LOG,
            ),
            log=LOG,
            log_spam=LOG_SPAM,
        )
        if not nixpkgs_path:
            # try format which is understood by nix-env:
            #   https://ianthehenry.com/posts/how-to-learn-nix/chipping-away-at-flakes/
            # ownpkgs-nix-env.nix:
            #   { ... }:
            #     (builtins.getFlake "/tmp/ownpkgs-special-unstable").
            #     outputs.packages.${builtins.currentSystem}
            # and execute
            #   NIX_PATH="nixpkgs=/tmp/ownpkgs-special-unstable/ownpkgs-nix-env.nix"
            #   sbomnix /nix/store/outputpath-for-ownpkgs-special-unstable-flake-output
            nixpkgs_path = pathlib.Path(nixref)
        self.scan_path(nixpkgs_path)

    def scan_path(self, nixpkgs_path):
        """Scan nixpkgs meta-info using an already resolved nixpkgs path."""
        nixpkgs_path = pathlib.Path(nixpkgs_path)
        if not nixpkgs_path.exists():
            LOG.warning("Nixpkgs not in nix store: %s", nixpkgs_path.as_posix())
            return
        LOG.debug("nixpkgs: %s", nixpkgs_path)
        self._read_nixpkgs_meta(nixpkgs_path)

    def scan_expression(self, expression, *, impure=False):
        """Scan nixpkgs meta-info using an expression returning a package set."""
        prefix = "nixmeta_expr_"
        suffix = ".nix"
        with NamedTemporaryFile(
            mode="w",
            delete=True,
            encoding="utf-8",
            prefix=prefix,
            suffix=suffix,
        ) as f:
            f.write(expression)
            f.flush()
            self._read_nixpkgs_meta(
                pathlib.Path(f.name),
                enable_flakes=True,
                impure=impure,
            )

    def to_csv(self, csv_path, append=False):
        """Export meta-info to a csv file"""
        csv_path = pathlib.Path(csv_path)
        if append and csv_path.exists():
            df = df_from_csv_file(csv_path)
            self.df_meta = pd.concat([self.df_meta, df], ignore_index=True)
            self._drop_duplicates()
        if self.df_meta is None or self.df_meta.empty:
            LOG.info("Nothing to output")
            return
        csv_path.parent.mkdir(parents=True, exist_ok=True)
        df_to_csv_file(self.df_meta, csv_path.absolute().as_posix())

    def to_df(self):
        """Return meta-info as dataframe"""
        return self.df_meta

    def _read_nixpkgs_meta(
        self,
        nixpkgs_path,
        *,
        enable_flakes=False,
        impure=False,
    ):
        prefix = "nixmeta_"
        suffix = ".json"
        with NamedTemporaryFile(delete=True, prefix=prefix, suffix=suffix) as f:
            LOG.info("Reading nixpkgs metadata from '%s'", nixpkgs_path.as_posix())
            cmd = [
                "nix-env",
                "-qa",
                "--meta",
                "--json",
                "-f",
                f"{nixpkgs_path.as_posix()}",
            ]
            if enable_flakes:
                cmd.extend(["--option", "experimental-features", "nix-command flakes"])
            if impure:
                cmd.append("--impure")
            cmd.extend(["--arg", "config", "{allowAliases=false;}"])
            _run_nix_env_metadata(cmd, stdout=f)
            LOG.debug("Generated meta.json: %s", f.name)
            LOG.info("Parsing nixpkgs metadata")
            self.df_meta = parse_json_metadata(f.name, log=LOG)
            self._drop_duplicates()

    def _drop_duplicates(self):
        if self.df_meta is None or self.df_meta.empty:
            return
        self.df_meta = self.df_meta.astype(str)
        self.df_meta.fillna("", inplace=True)
        uids = [
            "name",
            "version",
            "meta_license_short",
            "meta_license_spdxid",
            "meta_homepage",
        ]
        self.df_meta.sort_values(by=uids, inplace=True)
        self.df_meta.drop_duplicates(subset=uids, keep="last", inplace=True)


###############################################################################


================================================
FILE: src/nixupdate/__init__.py
================================================
# SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0


================================================
FILE: src/nixupdate/nix_outdated.py
================================================
#!/usr/bin/env python3

# SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Command-line tool to list outdated nix dependencies in priority order"""

import os
from argparse import ArgumentParser
from tempfile import NamedTemporaryFile

from common.cli_args import add_verbose_argument, add_version_argument
from common.errors import SbomnixError
from common.log import LOG, set_log_verbosity
from common.proc import exec_cmd
from nixupdate.nix_visualize import (
    nix_visualize_csv_to_df as _nix_visualize_csv_to_df_impl,
)
from nixupdate.nix_visualize import run_nix_visualize as _run_nix_visualize_impl
from nixupdate.pipeline import OutdatedScanHooks, collect_outdated_scan_data
from nixupdate.pipeline import query_repology as _query_repology_impl
from nixupdate.report import console_out_table as _console_out_table_impl
from nixupdate.report import drop_newest_duplicates as _drop_newest_dups_impl
from nixupdate.report import generate_report_df as _generate_report_df_impl
from nixupdate.report import write_report as _write_report_impl
from sbomnix.cli_utils import generate_temp_sbom, resolve_nix_target

###############################################################################


def getargs(args=None):
    """Parse command line arguments"""
    desc = (
        "Command line tool to list outdated nix dependencies for NIXREF. "
        "By default, the script outputs runtime dependencies of "
        "NIXREF that appear outdated in nixpkgs 'nix_unstable' channel - the "
        "list of output packages would potentially need a PR to update the "
        "package in nixpkgs to the latest upstream release version specified "
        "in the output table column 'version_upstream'. "
        "The list of output packages is in priority "
        "order based on how many other packages depend on the potentially "
        "outdated package."
    )
    epil = f"Example: ./{os.path.basename(__file__)} '/nix/path/or/flakeref'"
    parser = ArgumentParser(description=desc, epilog=epil)
    # Arguments that specify the target:
    helps = (
        "Target nix store path (e.g. derivation file or nix output path) or flakeref"
    )
    parser.add_argument("NIXREF", help=helps, type=str)
    # Other arguments:
    helps = (
        "Include locally outdated dependencies to the output. "
        "By default, the script "
        "outputs dependencies outdated in nixpkgs. With this option "
        "the tool also includes to the output the dependencies that are "
        "outdated locally (i.e. would need nix flake update or similar). "
        "The output list includes runtime dependencies that are locally "
        "outdated and would have an update available in nixpkgs nix_unstable "
        "channel, as well as runtime "
        "dependencies that are outdated in nixpkgs nix_unstable channel "
        "that would have an update in the package's upstream repository."
    )
    parser.add_argument("--local", help=helps, action="store_true")
    helps = "Scan target buildtime instead of runtime dependencies."
    parser.add_argument("--buildtime", help=helps, action="store_true")
    helps = "Path to output file (default: ./nix_outdated.csv)"
    parser.add_argument(
        "-o", "--out", nargs="?", help=helps, default="nix_outdated.csv"
    )
    add_version_argument(parser)
    add_verbose_argument(parser)
    return parser.parse_args(args)


################################################################################


def _query_repology(sbompath):
    return _query_repology_impl(sbompath)


def _run_nix_visualize(target_path):
    return _run_nix_visualize_impl(
        target_path,
        exec_cmd_fn=exec_cmd,
        tempfile_factory=NamedTemporaryFile,
        log=LOG,
    )


def _nix_visualize_csv_to_df(csvpath):
    LOG.debug("Transforming nix-visualize csv to dataframe")
    return _nix_visualize_csv_to_df_impl(csvpath)


def _generate_report_df(df_nv, df_repo):
    return _generate_report_df_impl(df_nv, df_repo, log=LOG)


def _drop_newest_dups(df_con, df_cmp):
    return _drop_newest_dups_impl(df_con, df_cmp, log=LOG)


def _report(df, args):
    _write_report_impl(df, args, log=LOG)


def _console_out_table(table, local=False, buildtime=False):
    _console_out_table_impl(table, local=local, buildtime=buildtime, log=LOG)


################################################################################


def main():
    """main entry point"""
    args = getargs()
    set_log_verbosity(args.verbose)
    try:
        _run(args)
    except SbomnixError as error:
        LOG.fatal("%s", error)
        raise SystemExit(1) from error


def _run(args):
    target = resolve_nix_target(args.NIXREF, buildtime=args.buildtime)
    scan_data = collect_outdated_scan_data(
        target.path,
        args.buildtime,
        hooks=OutdatedScanHooks(
            query_repology=_query_repology,
            generate_temp_sbom=generate_temp_sbom,
            run_nix_visualize=_run_nix_visualize,
            parse_nix_visualize=_nix_visualize_csv_to_df,
        ),
    )
    df_report = _generate_report_df(scan_data.nix_visualize, scan_data.repology)
    _report(df_report, args)


################################################################################

if __name__ == "__main__":
    main()

################################################################################


================================================
FILE: src/nixupdate/nix_visualize.py
================================================
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Helpers for running and parsing ``nix-visualize`` output."""

import pathlib
from tempfile import NamedTemporaryFile

from common import columns as cols
from common.df import df_from_csv_file
from common.log import LOG, LOG_VERBOSE
from common.package_names import nix_to_repology_pkg_name
from common.proc import exec_cmd


def run_nix_visualize(
    target_path,
    *,
    exec_cmd_fn=exec_cmd,
    tempfile_factory=NamedTemporaryFile,
    log=LOG,
):
    """Run ``nix-visualize`` and return the generated CSV path."""
    log.log(LOG_VERBOSE, "Running nix-visualize")
    with tempfile_factory(
        delete=False,
        prefix="nix-visualize_",
        suffix=".csv",
    ) as outfile:
        cmd = ["nix-visualize", f"--output={outfile.name}", target_path]
        exec_cmd_fn(cmd)
        return pathlib.Path(outfile.name)


def nix_visualize_csv_to_df(csvpath):
    """Convert ``nix-visualize`` CSV output into Repology-comparable rows."""
    df = df_from_csv_file(csvpath)
    re_split = (
        r"^[^-]+?-"
        r"(.+?)-"
        r"(\d[-_.0-9pf]*g?b?(?:pre[0-9])*(?:\+git[0-9]*)?)"
        r"(?:-lib|-bin|-env|-man|-su|-dev|-doc|-info|-nc|-host|-p[0-9]+|\.drv|)"
        r"$"
    )
    df[[cols.PACKAGE, cols.VERSION]] = df[cols.RAW_NAME].str.extract(
        re_split,
        expand=True,
    )
    df[cols.PACKAGE] = df.apply(
        lambda row: nix_to_repology_pkg_name(row.package),
        axis=1,
    )
    return df


================================================
FILE: src/nixupdate/pipeline.py
================================================
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Execution pipeline helpers for ``nix_outdated``."""

import logging
from dataclasses import dataclass
from typing import Any, Callable

import pandas as pd

from common.df import df_log
from common.log import LOG, LOG_SPAM, LOG_VERBOSE
from nixupdate.nix_visualize import nix_visualize_csv_to_df, run_nix_visualize
from repology.adapter import RepologyAdapter, RepologyQuery
from sbomnix.cli_utils import generate_temp_sbom

HookFn = Callable[..., Any]


@dataclass
class OutdatedScanData:
    """Collected intermediate dataframes used by ``nix_outdated``."""

    repology: pd.DataFrame
    nix_visualize: pd.DataFrame | None = None


def query_repology(sbompath, *, adapter=None, log=LOG):
    """Query Repology package/version data for a generated SBOM."""
    log.log(LOG_VERBOSE, "Querying repology")
    if adapter is None:
        adapter = RepologyAdapter()
    return adapter.query(
        RepologyQuery(
            repository="nix_unstable",
            sbom_cdx=sbompath,
        )
    )


@dataclass
class OutdatedScanHooks:
    """Injectable helpers used by ``collect_outdated_scan_data``."""

    query_repology: HookFn = query_repology
    generate_temp_sbom: HookFn = generate_temp_sbom
    run_nix_visualize: HookFn = run_nix_visualize
    parse_nix_visualize: HookFn = nix_visualize_csv_to_df


def collect_outdated_scan_data(
    target_path,
    buildtime,
    hooks=None,
):
    """Collect Repology and ``nix-visualize`` inputs for reporting."""
    hooks = OutdatedScanHooks() if hooks is None else hooks
    dtype = "buildtime" if buildtime else "runtime"
    LOG.verbose("Checking %s dependencies referenced by '%s'", dtype, target_path)
    df_nix_visualize = None
    sbom_artifact = hooks.generate_temp_sbom(
        target_path,
        buildtime,
        prefix="nixdeps_",
        cdx_suffix=".cdx.json",
    )
    try:
        sbom_path = sbom_artifact.cdx_path
        LOG.debug("Using SBOM '%s'", sbom_path)
        df_repology = hooks.query_repology(sbom_path)
    finally:
        if not LOG.isEnabledFor(logging.DEBUG):
            sbom_artifact.cleanup()
    df_log(df_repology, LOG_SPAM)

    if buildtime:
        LOG.verbose("Not running nix-visualize due to '--buildtime' argument")
    else:
        nix_visualize_out = hooks.run_nix_visualize(target_path)
        LOG.debug("Using nix-visualize out: '%s'", nix_visualize_out)
        try:
            df_nix_visualize = hooks.parse_nix_visualize(nix_visualize_out)
            df_log(df_nix_visualize, LOG_SPAM)
        finally:
            if not LOG.isEnabledFor(logging.DEBUG):
                nix_visualize_out.unlink(missing_ok=True)

    df_log(df_repology, logging.DEBUG)
    df_log(df_nix_visualize, logging.DEBUG)
    return OutdatedScanData(
        repology=df_repology,
        nix_visualize=df_nix_visualize,
    )


================================================
FILE: src/nixupdate/report.py
================================================
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Report shaping helpers for ``nix_outdated``."""

import logging

from tabulate import tabulate

from common import columns as cols
from common.df import df_log, df_to_csv_file
from common.log import LOG, LOG_SPAM, LOG_VERBOSE


def generate_report_df(df_nix_visualize, df_repology, *, log=LOG, log_spam=LOG_SPAM):
    """Merge Repology and ``nix-visualize`` data into a reporting dataframe."""
    if df_nix_visualize is None:
        df_repology = df_repology.copy(deep=True)
        df_repology[cols.LEVEL] = "0"
        df_repology.rename(
            columns={cols.VERSION: cols.VERSION_REPOLOGY},
            inplace=True,
        )
        return df_repology
    df = df_nix_visualize.merge(
        df_repology,
        how="left",
        left_on=[cols.PACKAGE, cols.VERSION],
        right_on=[cols.PACKAGE, cols.VERSION_SBOM],
        suffixes=["", "_repology"],
    )
    log.log(log_spam, "Merged nix-visualize and repology data:")
    df_log(df, log_spam)
    return df


def drop_newest_duplicates(df_console, df_compare, *, log=LOG):
    """Drop outdated rows when a corresponding ``newest`` row also exists."""
    df_ret = df_console.copy(deep=True)
    for row in df_console.itertuples():
        df_pkgs = df_compare[df_compare[cols.PACKAGE] == row.nix_package]
        df_newest = df_pkgs[df_pkgs[cols.STATUS] == "newest"]
        if not df_newest.empty:
            log.debug(
                "Ignoring outdated package '%s' since newest version is also available",
                row.nix_package,
            )
            df_ret = df_ret[df_ret.nix_package != row.nix_package]
    return df_ret


def console_out_table(table, *, local=False, buildtime=False, log=LOG):
    """Write the formatted console table."""
    update_target = "in nixpkgs"
    if local:
        update_target = "locally"
    priority = ":"
    if not buildtime:
        priority = (
            " (in priority order based on how many other "
            "packages depend on the potentially outdated package):"
        )
    log.info(
        "Dependencies that need update %s%s\n\n%s\n\n",
        update_target,
        priority,
        table,
    )


def write_report(df, args, *, log=LOG):
    """Write the nix-outdated console and CSV reports."""
    if df is None or df.empty:
        log.info("No outdated dependencies found")
        return
    log.log(LOG_VERBOSE, "Writing console report")
    select_cols = {
        cols.LEVEL: "priority",
        cols.PACKAGE: "nix_package",
        cols.VERSION_SBOM: cols.VERSION_LOCAL,
        cols.VERSION_REPOLOGY: cols.VERSION_NIXPKGS,
        cols.NEWEST_UPSTREAM_RELEASE: cols.VERSION_UPSTREAM,
    }
    if args.local:
        df_console = df[df[cols.SBOM_VERSION_CLASSIFY] == "sbom_pkg_needs_update"]
        df_console = df_console.rename(columns=select_cols)[select_cols.values()]
        df_console.drop_duplicates(
            df_console.columns.difference(["priority"]), keep="first", inplace=True
        )
        if args.buildtime:
            df_console = df_console.drop(["priority"], axis=1)
        table = tabulate(
            df_console,
            headers="keys",
            tablefmt="orgtbl",
            numalign="center",
            showindex=False,
        )
        console_out_table(table, local=args.local, buildtime=args.buildtime, log=log)
    else:
        df_console = df[df[cols.REPO_VERSION_CLASSIFY] == "repo_pkg_needs_update"]
        df_console = df_console.rename(columns=select_cols)[select_cols.values()]
        df_console.drop_duplicates(
            df_console.columns.difference(["priority"]), keep="first", inplace=True
        )
        df_console = drop_newest_duplicates(df_console, df, log=log)
        if args.buildtime:
            df_console = df_console.drop(["priority"], axis=1)
        table = tabulate(
            df_console,
            headers="keys",
            tablefmt="orgtbl",
            numalign="center",
            showindex=False,
        )
        console_out_table(table, local=args.local, buildtime=args.buildtime, log=log)

    if log.isEnabledFor(logging.DEBUG):
        df_to_csv_file(df, "df_nixoutdated_merged.csv")
    df_to_csv_file(df_console, args.out)


================================================
FILE: src/provenance/__init__.py
================================================
# SPDX-FileCopyrightText: 2024 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0


================================================
FILE: src/provenance/dependencies.py
================================================
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Helpers for provenance dependency resolution."""

import logging
from dataclasses import dataclass, field
from typing import Any, Callable

from common.errors import InvalidNixJsonError
from common.log import LOG, LOG_VERBOSE
from common.nix_utils import (
    NIX_PATH_INFO_JSON,
    nix_path_info_references,
    parse_nix_derivation_show,
)
from common.proc import exec_cmd, nix_cmd
from provenance.digests import normalize_digest, output_digest
from provenance.nix_commands import exec_required_nix_command
from provenance.path_info import query_path_hashes, query_path_info
from provenance.subjects import output_path

HookFn = Callable[..., Any]


@dataclass
class DependencyHooks:
    """Injectable helpers used by provenance dependency resolution."""

    exec_cmd_fn: HookFn = exec_cmd
    query_path_hashes_fn: HookFn = field(default_factory=lambda: query_path_hashes)
    parse_nix_derivation_show_fn: HookFn = parse_nix_derivation_show
    normalize_digest_fn: HookFn = normalize_digest
    output_digest_fn: HookFn = output_digest
    output_path_fn: HookFn = output_path
    log: logging.Logger = LOG


def derivation_outputs_by_path(infos, hooks=None):
    """Index derivation info by absolute output path."""
    hooks = DependencyHooks() if hooks is None else hooks
    outputs_by_path = {}
    for info in infos.values():
        if not isinstance(info, dict):
            continue
        outputs = info.get("outputs")
        if not isinstance(outputs, dict):
            continue
        env = info.get("env")
        for name, output in outputs.items():
            resolved_output_path = hooks.output_path_fn(name, output, env)
            if resolved_output_path:
                outputs_by_path[resolved_output_path] = (info, output)
    return outputs_by_path


def dependency_paths(drv_path, recursive=False, outputs_by_path=None, hooks=None):
    """Return dependency store paths from structured path-info data."""
    hooks = DependencyHooks() if hooks is None else hooks
    path_infos = query_path_info(
        [drv_path],
        exec_cmd_fn=hooks.exec_cmd_fn,
        recursive=recursive,
    )
    if path_infos is None:
        return []
    if recursive:
        paths = list(path_infos)
        for path in outputs_by_path or ():
            if path not in path_infos:
                paths.append(path)
        return paths

    drv_info = path_infos.get(drv_path)
    if drv_info is None:
        raise InvalidNixJsonError(
            NIX_PATH_INFO_JSON,
            f"missing path-info record for `{drv_path}`",
        )
    return list(nix_path_info_references(drv_info, drv_path))


def dependency_package(drv, output_hash, infos, outputs_by_path, hooks=None):
    """Create a dependency package entry with a normalized digest."""
    hooks = DependencyHooks() if hooks is None else hooks
    info = infos.get(drv)
    output_info = outputs_by_path.get(drv)
    if output_info:
        info = output_info[0]
    digest = hooks.output_digest_fn(output_info[1]) if output_info else None
    if digest is None:
        digest = hooks.normalize_digest_fn(output_hash)
    if digest is None:
        hooks.log.warning("Cannot determine digest for dependency '%s'", drv)
        return None

    package = {
        "name": drv.split("-", 1)[-1].removesuffix(".drv"),
        "uri": drv,
        "digest": digest,
    }

    if info:
        package["name"] = info["name"]
        if version := info["env"].get("version"):
            package["annotations"] = {"version": version}
    return package


def get_dependencies(drv_path, recursive=False, hooks=None):
    """Get dependencies of derivation and parse them into ResourceDescriptors."""
    hooks = DependencyHooks() if hooks is None else hooks

    hooks.log.log(
        LOG_VERBOSE,
        "Querying derivation dependencies %s",
        "recursively" if recursive else "",
    )

    cmd = nix_cmd("derivation", "show", "-r", drv_path)
    infos = hooks.parse_nix_derivation_show_fn(
        exec_required_nix_command(cmd, hooks.exec_cmd_fn).stdout,
        store_path_hint=drv_path,
    )
    outputs_by_path = derivation_outputs_by_path(infos, hooks=hooks)
    references = dependency_paths(
        drv_path,
        recursive=recursive,
        outputs_by_path=outputs_by_path,
        hooks=hooks,
    )
    hashes = hooks.query_path_hashes_fn(references, exec_cmd_fn=hooks.exec_cmd_fn)

    dependencies = []
    for drv, output_hash in zip(references, hashes, strict=True):
        hooks.log.debug("Creating dependency entry for %s", drv)
        package = dependency_package(
            drv,
            output_hash,
            infos,
            outputs_by_path,
            hooks=hooks,
        )
        if package is not None:
            dependencies.append(package)

    return dependencies


================================================
FILE: src/provenance/digests.py
================================================
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Digest normalization helpers for provenance generation."""

import base64
import binascii
import re

HASH_SIZE_BYTES = {
    "blake3": 32,
    "md5": 16,
    "sha1": 20,
    "sha256": 32,
    "sha512": 64,
}
NIX32_ALPHABET = "0123456789abcdfghijklmnpqrsvwxyz"
NIX32_INDEX = {char: index for index, char in enumerate(NIX32_ALPHABET)}


def canonical_hash_algo(hash_algo):
    """Normalize Nix hash algorithm labels to plain algorithm names."""
    if not hash_algo:
        return None
    return str(hash_algo).removeprefix("r:")


def hash_size_bytes(hash_algo):
    """Return expected digest size for the given algorithm."""
    hash_algo = canonical_hash_algo(hash_algo)
    if hash_algo is None:
        return None
    return HASH_SIZE_BYTES.get(hash_algo)


def decode_nix32(hash_value, size_bytes):
    """Decode nix base32 digest strings into raw bytes."""
    try:
        value = 0
        for char in hash_value:
            value = value * 32 + NIX32_INDEX[char]
    except KeyError:
        return None

    if value.bit_length() > size_bytes * 8:
        return None

    encoded_size = (len(hash_value) * 5 + 7) // 8
    raw = value.to_bytes(encoded_size, "little")
    return raw[:size_bytes].ljust(size_bytes, b"\0")


def decode_hash_bytes(hash_value, hash_algo):
    """Decode known Nix hash encodings into raw bytes."""
    size_bytes = hash_size_bytes(hash_algo)
    if size_bytes is None:
        return None

    if re.fullmatch(rf"[0-9a-f]{{{size_bytes * 2}}}", hash_value):
        return bytes.fromhex(hash_value)

    if len(hash_value) == (size_bytes * 8 + 4) // 5:
        decoded = decode_nix32(hash_value, size_bytes)
        if decoded is not None:
            return decoded

    padding = "=" * (-len(hash_value) % 4)
    try:
        decoded = base64.b64decode(hash_value + padding, validate=True)
    except (ValueError, binascii.Error):
        return None
    if len(decoded) != size_bytes:
        return None
    return decoded


def split_hash_value(hash_value, hash_algo=None):
    """Split a typed hash string into canonical algorithm and raw value."""
    hash_algo = canonical_hash_algo(hash_algo)
    hash_value = str(hash_value).strip()

    if hash_algo:
        for separator in (":", "-"):
            resource_prefix = f"r:{hash_algo}{separator}"
            if hash_value.startswith(resource_prefix):
                return hash_algo, hash_value.removeprefix(resource_prefix)
            prefix = f"{hash_algo}{separator}"
            if hash_value.startswith(prefix):
                return hash_algo, hash_value.removeprefix(prefix)

    match = re.match(
        r"^(?P<algo>(?:r:)?[A-Za-z0-9]+)(?P<sep>[:-])(?P<rest>.+)$",
        hash_value,
    )
    if match:
        return canonical_hash_algo(match.group("algo")), match.group("rest")

    return hash_algo, hash_value


def normalize_digest(hash_value, hash_algo=None):
    """Return digest in a canonical base16 representation."""
    if not hash_value:
        return None
    hash_value = str(hash_value).strip()
    if not hash_value:
        return None

    hash_algo, raw_hash_value = split_hash_value(hash_value, hash_algo=hash_algo)
    if not hash_algo:
        return None

    decoded = decode_hash_bytes(raw_hash_value, hash_algo)
    if decoded is None:
        return None
    return {hash_algo: decoded.hex()}


def output_digest(data, *, normalize_digest_fn=normalize_digest):
    """Return digest from derivation output metadata when available."""
    if not isinstance(data, dict):
        return None
    hash_value = data.get("hash")
    if not hash_value:
        return None
    return normalize_digest_fn(hash_value, hash_algo=data.get("hashAlgo"))


================================================
FILE: src/provenance/main.py
================================================
#!/usr/bin/env python3

# SPDX-FileCopyrightText: 2024 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Python script that generates SLSA v1.0 provenance file for a nix target"""

import argparse
import json
import os
from dataclasses import dataclass

from common.cli_args import add_verbose_argument, add_version_argument
from common.errors import SbomnixError
from common.log import LOG, set_log_verbosity
from common.nix_utils import parse_nix_derivation_show
from common.proc import exec_cmd, nix_cmd
from provenance.dependencies import DependencyHooks, get_dependencies
from provenance.digests import normalize_digest, output_digest
from provenance.schema import (
    SchemaHooks,
    get_external_parameters,
    get_internal_parameters,
    provenance_document,
    timestamp,
)
from provenance.subjects import SubjectHooks, get_subjects, output_path


@dataclass
class BuildMeta:
    """Dataclass for build metadata"""

    build_type: str
    builder_id: str
    invocation_id: str
    build_begin_ts: str
    build_finished_ts: str
    external_parameters: str
    internal_parameters: str


def get_env_metadata():
    """Read build metadata from env variables"""

    # these need to be in the same order as the fields in BuildMeta definition
    env_vars = [
        "PROVENANCE_BUILD_TYPE",
        "PROVENANCE_BUILDER_ID",
        "PROVENANCE_INVOCATION_ID",
        "PROVENANCE_TIMESTAMP_BEGIN",
        "PROVENANCE_TIMESTAMP_FINISHED",
        "PROVENANCE_EXTERNAL_PARAMS",
        "PROVENANCE_INTERNAL_PARAMS",
    ]

    values = [os.environ.get(name, "") for name in env_vars]

    LOG.verbose("Reading metadata from environment:")
    for name, value in zip(env_vars, values, strict=True):
        LOG.verbose("| %s = %s", name, value)

    return BuildMeta(*values)


def provenance(target: str, metadata: BuildMeta, recursive: bool = False) -> dict:
    """Create the provenance file"""
    return provenance_document(
        target,
        metadata,
        recursive=recursive,
        hooks=SchemaHooks(
            exec_cmd_fn=exec_cmd,
            nix_cmd_fn=nix_cmd,
            parse_nix_derivation_show_fn=parse_nix_derivation_show,
            get_subjects_fn=lambda outputs, env=None: get_subjects(
                outputs,
                env=env,
                hooks=SubjectHooks(
                    exec_cmd_fn=exec_cmd,
                    normalize_digest_fn=normalize_digest,
                    output_digest_fn=output_digest,
                    output_path_fn=output_path,
                    log=LOG,
                ),
            ),
            get_dependencies_fn=lambda drv_path, recursive=False: get_dependencies(
                drv_path,
                recursive=recursive,
                hooks=DependencyHooks(
                    exec_cmd_fn=exec_cmd,
                    parse_nix_derivation_show_fn=parse_nix_derivation_show,
                    normalize_digest_fn=normalize_digest,
                    output_digest_fn=output_digest,
                    output_path_fn=output_path,
                    log=LOG,
                ),
            ),
            get_external_parameters_fn=get_external_parameters,
            get_internal_parameters_fn=get_internal_parameters,
            timestamp_fn=timestamp,
            log=LOG,
        ),
    )


def getargs(args=None):
    """Parse command line arguments"""

    parser = argparse.ArgumentParser(
        prog="nix-provenance",
        description="Get SLSA v1.0 provenance file from nix flake or derivation",
    )
    parser.add_argument(
        "target",
        help="Flake reference or derivation path",
    )
    parser.add_argument(
        "--recursive",
        action="store_true",
        help="Resolve every dependency recursively",
    )
    parser.add_argument(
        "-o",
        "--out",
        help="Path to file where provenance should be saved",
        default=os.environ.get("PROVENANCE_OUTPUT_FILE"),
    )
    add_verbose_argument(parser)
    add_version_argument(parser)

    return parser.parse_args(args)


def main():
    """main entry point"""

    args = getargs()
    set_log_verbosity(args.verbose)

    build_metadata = get_env_metadata()

    try:
        schema = provenance(args.target, build_metadata, recursive=args.recursive)
    except SbomnixError as error:
        LOG.fatal("%s", error)
        raise SystemExit(1) from error

    if args.out:
        with open(args.out, "w", encoding="utf-8") as filepath:
            LOG.info("Writing provenance file into '%s'", args.out)
            filepath.write(json.dumps(schema, indent=2))
    else:
        print(json.dumps(schema, indent=2))


if __name__ == "__main__":
    main()


================================================
FILE: src/provenance/nix_commands.py
================================================
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Nix command helpers for provenance generation."""

import subprocess

from common.errors import NixCommandError


def exec_required_nix_command(cmd, exec_cmd_fn):
    """Run a required Nix command and raise a user-facing error on failure."""
    try:
        return exec_cmd_fn(cmd)
    except subprocess.CalledProcessError as error:
        raise NixCommandError(
            cmd,
            stderr=error.stderr,
            stdout=error.stdout,
        ) from None


================================================
FILE: src/provenance/path_info.py
================================================
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Structured Nix path-info helpers for provenance generation."""

import errno
import subprocess

from common.errors import InvalidNixJsonError, NixCommandError
from common.nix_utils import (
    NIX_PATH_INFO_JSON,
    load_nix_json,
    nix_path_info_nar_hash,
    normalize_nix_path_info,
)
from common.proc import exec_cmd, nix_cmd


def query_path_info(
    paths,
    *,
    exec_cmd_fn=exec_cmd,
    recursive=False,
    raise_on_error=True,
):
    """Return structured path-info records indexed by store path."""
    if not paths:
        return {}
    recursive_args = ["--recursive"] if recursive else []
    cmd = nix_cmd(
        "path-info",
        "--json",
        "--json-format",
        "1",
        *recursive_args,
        *paths,
    )
    try:
        ret = exec_cmd_fn(cmd, raise_on_error=raise_on_error)
    except subprocess.CalledProcessError as error:
        raise NixCommandError(
            cmd,
            stderr=error.stderr,
            stdout=error.stdout,
        ) from None
    if ret is None:
        return None
    return normalize_nix_path_info(load_nix_json(ret.stdout, NIX_PATH_INFO_JSON))


def query_path_hashes(paths, *, exec_cmd_fn=exec_cmd):
    """Query NAR hashes for paths, splitting requests that exceed argv limits."""
    paths = list(paths)
    if not paths:
        return []
    try:
        path_infos = query_path_info(paths, exec_cmd_fn=exec_cmd_fn)
    except OSError as error:
        if error.errno != errno.E2BIG or len(paths) == 1:
            raise
        midpoint = len(paths) // 2
        return query_path_hashes(
            paths[:midpoint],
            exec_cmd_fn=exec_cmd_fn,
        ) + query_path_hashes(
            paths[midpoint:],
            exec_cmd_fn=exec_cmd_fn,
        )
    if path_infos is None:
        return []
    return [nar_hash_for_path(path_infos, path) for path in paths]


def nar_hash_for_path(path_infos, path):
    """Return the NAR hash for one path-info record."""
    info = path_infos.get(path)
    if info is None:
        raise InvalidNixJsonError(
            NIX_PATH_INFO_JSON,
            f"missing path-info record for `{path}`",
        )
    return nix_path_info_nar_hash(info, path)


================================================
FILE: src/provenance/schema.py
================================================
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Helpers for assembling provenance documents."""

import json
import logging
from dataclasses import dataclass
from datetime import datetime, timezone
from typing import Any, Callable, Protocol

from common.errors import InvalidNixJsonError, MissingNixDerivationMetadataError
from common.log import LOG, LOG_VERBOSE
from common.nix_utils import NIX_DERIVATION_SHOW_JSON, parse_nix_derivation_show
from common.proc import exec_cmd, nix_cmd
from provenance.dependencies import get_dependencies
from provenance.nix_commands import exec_required_nix_command
from provenance.subjects import get_subjects

JsonDict = dict[str, Any]
HookFn = Callable[..., Any]


class ProvenanceMetadata(Protocol):
    """Build metadata fields consumed by provenance schema assembly."""

    build_type: str
    builder_id: str
    invocation_id: str
    build_begin_ts: str
    build_finished_ts: str
    external_parameters: str
    internal_parameters: str


def get_external_parameters(metadata: ProvenanceMetadata) -> JsonDict:
    """Get externalParameters from env variable."""
    params = json.loads(metadata.external_parameters or "{}")
    return {key: value for key, value in params.items() if value}


def get_internal_parameters(metadata: ProvenanceMetadata) -> JsonDict:
    """Get internalParameters from env variable."""
    params = json.loads(metadata.internal_parameters or "{}")
    return {key: value for key, value in params.items() if value}


def timestamp(unix_time: str) -> str:
    """Turn unix timestamp into RFC 3339 format."""
    if not unix_time:
        return ""

    dtime = datetime.fromtimestamp(
        int(unix_time),
        tz=timezone.utc,
    )

    return dtime.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-4] + "Z"


@dataclass
class SchemaHooks:
    """Injectable helpers used by provenance schema assembly."""

    exec_cmd_fn: HookFn = exec_cmd
    nix_cmd_fn: HookFn = nix_cmd
    parse_nix_derivation_show_fn: HookFn = parse_nix_derivation_show
    get_subjects_fn: HookFn = get_subjects
    get_dependencies_fn: HookFn = get_dependencies
    get_external_parameters_fn: HookFn = get_external_parameters
    get_internal_parameters_fn: HookFn = get_internal_parameters
    timestamp_fn: HookFn = timestamp
    log: logging.Logger = LOG


def provenance_document(
    target: str,
    metadata: ProvenanceMetadata,
    recursive: bool = False,
    hooks: SchemaHooks | None = None,
) -> JsonDict:
    """Create the provenance file."""
    hooks = SchemaHooks() if hooks is None else hooks

    hooks.log.info("Generating provenance file for '%s'", target)

    cmd = hooks.nix_cmd_fn("derivation", "show", target)
    drv_json = hooks.parse_nix_derivation_show_fn(
        exec_required_nix_command(cmd, hooks.exec_cmd_fn).stdout,
        store_path_hint=target,
    )
    if not drv_json:
        raise MissingNixDerivationMetadataError(target)
    drv_path, drv_json = next(iter(drv_json.items()))
    outputs = drv_json.get("outputs")
    if outputs is None:
        raise InvalidNixJsonError(
            NIX_DERIVATION_SHOW_JSON,
            f"missing `outputs` in target derivation `{drv_path}`",
        )

    hooks.log.log(LOG_VERBOSE, "Resolved derivation path is '%s'", drv_path)

    return {
        "_type": "https://in-toto.io/Statement/v1",
        "subject": hooks.get_subjects_fn(outputs, env=drv_json.get("env")),
        "predicateType": "https://slsa.dev/provenance/v1",
        "predicate": {
            "buildDefinition": {
                "buildType": metadata.build_type,
                "externalParameters": hooks.get_external_parameters_fn(metadata),
                "internalParameters": hooks.get_internal_parameters_fn(metadata),
                "resolvedDependencies": hooks.get_dependencies_fn(drv_path, recursive),
            },
            "runDetails": {
                "builder": {
                    "id": metadata.builder_id,
                    "builderDependencies": [],
                    "version": {},
                },
                "metadata": {
                    "invocationId": metadata.invocation_id,
                    "startedOn": hooks.timestamp_fn(metadata.build_begin_ts),
                    "finishedOn": hooks.timestamp_fn(metadata.build_finished_ts),
                },
                "byproducts": [],
            },
        },
    }


================================================
FILE: src/provenance/subjects.py
================================================
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Helpers for deriving in-toto subjects from nix outputs."""

import logging
from collections.abc import Mapping
from dataclasses import dataclass
from typing import Any, Callable

from common.log import LOG, LOG_VERBOSE
from common.proc import ExecCmdFn, exec_cmd
from provenance.digests import normalize_digest, output_digest
from provenance.path_info import nar_hash_for_path, query_path_info

Digest = dict[str, str]
Subject = dict[str, Any]
OutputPathFn = Callable[[str, Any, Mapping[str, str] | None], str | None]
OutputDigestFn = Callable[[Any], Digest | None]
NormalizeDigestFn = Callable[..., Digest | None]


def output_path(
    name: str,
    output: Any,
    env: Mapping[str, str] | None = None,
) -> str | None:
    """Return the resolved absolute output path from outputs or env."""
    if isinstance(output, dict) and output.get("path"):
        return str(output["path"])
    env = env or {}
    return env.get(name)


@dataclass
class SubjectHooks:
    """Injectable helpers used by ``get_subjects``."""

    exec_cmd_fn: ExecCmdFn = exec_cmd
    normalize_digest_fn: NormalizeDigestFn = normalize_digest
    output_digest_fn: OutputDigestFn = output_digest
    output_path_fn: OutputPathFn = output_path
    log: logging.Logger = LOG


def get_subjects(
    outputs: Mapping[str, Any],
    env: Mapping[str, str] | None = None,
    hooks: SubjectHooks | None = None,
) -> list[Subject]:
    """Parse derivation outputs into in-toto subjects."""
    hooks = SubjectHooks() if hooks is None else hooks

    hooks.log.log(LOG_VERBOSE, "Parsing derivation outputs")

    env = env or {}
    subjects: list[Subject] = []
    for name, data in outputs.items():
        resolved_output_path = hooks.output_path_fn(name, data, env)
        subject: Subject = {"name": name}
        resolved_output_digest = hooks.output_digest_fn(data)
        if resolved_output_path:
            subject["uri"] = resolved_output_path
        if resolved_output_digest is not None:
            subject["digest"] = resolved_output_digest
            hooks.log.log(
                LOG_VERBOSE,
                "Using derivation metadata hash for fixed-output output '%s'",
                name,
            )
        elif resolved_output_path:
            path_infos = query_path_info(
                [resolved_output_path],
                exec_cmd_fn=hooks.exec_cmd_fn,
                raise_on_error=False,
            )
            if path_infos is None or resolved_output_path not in path_infos:
                hooks.log.warning(
                    "Derivation output '%s' was not found in the nix store, "
                    "assuming it was not built.",
                    name,
                )
                continue
            digest = hooks.normalize_digest_fn(
                nar_hash_for_path(path_infos, resolved_output_path)
            )
            if digest is None:
                hooks.log.warning(
                    "Cannot normalize NAR hash for derivation output '%s'",
                    name,
                )
                continue
            subject["digest"] = digest
        else:
            hooks.log.warning(
                "Cannot determine path or digest for derivation output '%s'",
                name,
            )
            continue

        subjects.append(subject)

    return subjects


================================================
FILE: src/repology/__init__.py
================================================
# SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Repology package exports."""

from . import cves, reporting

__all__ = ["cves", "reporting"]


================================================
FILE: src/repology/adapter.py
================================================
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Repology query adapter."""

import pathlib
import re
import urllib.parse
from dataclasses import dataclass
from typing import Optional, cast

import numpy as np
import pandas as pd

import repology.exceptions
from common import columns as cols
from common.df import df_regex_filter
from common.log import LOG
from repology.session import DEFAULT_REPOLOGY_SESSION, REPOLOGY_REQUEST_TIMEOUT

from .cves import parse_cve_html
from .projects_parser import parse_projects_search_html
from .sbom import (
    is_ignored_sbom_package,
    make_sbom_status_row,
    merge_sbom_fields,
    parse_cdx_sbom,
    sbom_row_classify,
)

REPOLOGY_PROJECTS_URL = "https://repology.org/projects/"
REPOLOGY_PROJECT_URL = "https://repology.org/project/"


@dataclass
class RepologyQuery:
    """Repology query parameters independent of the CLI parser."""

    repository: str
    pkg_exact: Optional[str] = None
    pkg_search: Optional[str] = None
    sbom_cdx: Optional[pathlib.Path] = None
    re_package: Optional[str] = None
    re_version: Optional[str] = None
    re_status: Optional[str] = None
    re_vuln: Optional[str] = None

    def __post_init__(self):
        if self.sbom_cdx and not isinstance(self.sbom_cdx, pathlib.Path):
            self.sbom_cdx = pathlib.Path(self.sbom_cdx)
        query_modes = (
            bool(self.pkg_exact),
            bool(self.pkg_search),
            self.sbom_cdx is not None,
        )
        if sum(query_modes) != 1:
            raise ValueError(
                "RepologyQuery requires exactly one of pkg_exact, "
                "pkg_search, or sbom_cdx"
            )
        if not self.repository:
            raise ValueError("RepologyQuery requires a repository name")


def repo_row_classify(row):
    """Classify repository-side version status."""
    if row.status == "outdated":
        return "repo_pkg_needs_update"
    return ""


class RepologyAdapter:
    """Query and parse Repology package data."""

    def __init__(self, session=None, request_timeout=REPOLOGY_REQUEST_TIMEOUT):
        self.session = DEFAULT_REPOLOGY_SESSION if session is None else session
        self.request_timeout = request_timeout
        self.url_projects = REPOLOGY_PROJECTS_URL
        self._reset_state()

    def _reset_state(self):
        self.processed = set()
        self.pkgs_dict = {}
        self.df = pd.DataFrame()
        self.urlq = None
        self.df_sbom = None

    def _packages_to_df(self, query, re_pkg_internal=None):
        if not self.pkgs_dict:
            return
        LOG.debug("packages in pkgs_dict: %s", len(self.pkgs_dict[cols.PACKAGE]))
        df: pd.DataFrame = pd.DataFrame.from_dict(self.pkgs_dict)
        df_cols = list(df.columns)
        if query.repository and cols.REPO in df_cols:
            df = df_regex_filter(df, cols.REPO, re.escape(query.repository))
        if re_pkg_internal and cols.PACKAGE in df_cols:
            re_pkg_internal = f"^(?:[a-z0-9]+:)?{re.escape(re_pkg_internal)}$"
            df = df_regex_filter(df, cols.PACKAGE, re_pkg_internal)
        if query.re_package and cols.PACKAGE in df_cols:
            df = df_regex_filter(df, cols.PACKAGE, query.re_package)
        if query.re_version and cols.VERSION in df_cols:
            df = df_regex_filter(df, cols.VERSION, query.re_version)
        if query.re_status and cols.STATUS in df_cols:
            df = df_regex_filter(df, cols.STATUS, query.re_status)
        if query.re_vuln and cols.POTENTIALLY_VULNERABLE in df_cols:
            df = df_regex_filter(df, cols.POTENTIALLY_VULNERABLE, query.re_vuln)
        self.df = pd.concat([self.df, cast(pd.DataFrame, df)])
        self.df.replace(np.nan, "", regex=True, inplace=True)
        self.df.drop_duplicates(keep="first", inplace=True)
        self.df.sort_values(by=self.df.columns.values.tolist(), inplace=True)
        self.df.reset_index(drop=True, inplace=True)

    def _append_package_rows(self, package_rows):
        for package_row in package_rows:
            for key, value in package_row.items():
                self.pkgs_dict.setdefault(key, []).append(value)

    def _get_resp(self, url):
        LOG.debug("GET: %s", url)
        resp = self.session.get(url, timeout=self.request_timeout)
        LOG.debug("resp.status_code: %s", resp.status_code)
        if resp.status_code == 404:
            LOG.fatal("No matching packages found")
            raise repology.exceptions.RepologyNoMatchingPackages
        resp.raise_for_status()
        return resp

    def query_cves(self, pkg_name, pkg_version):
        """Query vulnerabilities for a single package/version pair."""
        pkg = urllib.parse.quote(pkg_name)
        ver = urllib.parse.quote(pkg_version)
        query = f"{REPOLOGY_PROJECT_URL}{pkg}/cves?version={ver}"
        LOG.debug("GET: %s", query)
        resp = self.session.get(query, timeout=self.request_timeout)
        LOG.debug("resp.status_code: %s", resp.status_code)
        if resp.status_code == 404:
            LOG.warning("Repology package '%s' not found", pkg_name)
            return None
        resp.raise_for_status()
        return parse_cve_html(resp.text, pkg_name, pkg_version)

    def _query_pkg_search(self, pkg_search, repository, stop_pkg=None):
        pkg = urllib.parse.quote(pkg_search)
        repo = urllib.parse.quote(repository)
        search_term = f"?search={pkg}&inrepo={repo}"
        url = f"{self.url_projects}{search_term}"
        self.urlq = url
        while True:
            resp = self._get_resp(url)
            url_last = url
            page = parse_projects_search_html(
                resp.text,
                repository,
                self.processed,
                pkg_stop=stop_pkg,
            )
            self.processed = page.processed_ids
            self._append_package_rows(page.package_rows)
            next_query_project = page.next_query_project
            if not next_query_project:
                LOG.debug("stopping (no next_query_project)")
                break
            next_query_project = urllib.parse.quote(next_query_project)
            url = f"{self.url_projects}{next_query_project}/{search_term}"
            if url == url_last:
                LOG.debug("stopping ('%s'=='%s')", url_last, url)
                break

    def _query_pkg_exact(self, pkg_name, repository):
        self._query_pkg_search(pkg_name, repository, stop_pkg=pkg_name)

    def _query_sbom_cdx(self, query):
        self.df_sbom = parse_cdx_sbom(query.sbom_cdx)
        for component in self.df_sbom.to_dict("records"):
            LOG.debug("Package: %s", component)
            name = component[cols.NAME]
            version = component.get(cols.VERSION, "")
            if not name:
                LOG.fatal("Missing package name: %s", component)
                raise repology.exceptions.RepologyUnexpectedResponse
            pkg_id = f"{query.repository}:{name}"
            if pkg_id in self.processed:
                LOG.debug("Package '%s' in sbom already processed", name)
                self._packages_to_df(query, re_pkg_internal=name)
                continue
            if not version:
                self._append_package_rows(
                    [
                        make_sbom_status_row(
                            query.repository,
                            name,
                            "",
                            "NO_VERSION",
                        )
                    ]
                )
                self._packages_to_df(query, re_pkg_internal=name)
                continue
            if is_ignored_sbom_package(name):
                self._append_package_rows(
                    [
                        make_sbom_status_row(
                            query.repository,
                            name,
                            version,
                            "IGNORED",
                        )
                    ]
                )
                self._packages_to_df(query, re_pkg_internal=name)
                continue
            try:
                self._query_pkg_exact(name, query.repository)
            except repology.exceptions.RepologyNoMatchingPackages:
                LOG.debug("Package '%s' not found in repology", name)
            if pkg_id not in self.processed:
                self._append_package_rows(
                    [
                        make_sbom_status_row(
                            query.repository,
                            name,
                            version,
                            "NOT_FOUND",
                        )
                    ]
                )
            self._packages_to_df(query, re_pkg_internal=name)
        self.urlq = self.url_projects

    def query(self, query):
        """Query package information from repology.org."""
        self._reset_state()
        if query.pkg_search:
            self._query_pkg_search(query.pkg_search, query.repository)
        elif query.pkg_exact:
            self._query_pkg_exact(query.pkg_exact, query.repository)
        elif query.sbom_cdx:
            self._query_sbom_cdx(query)
        self._packages_to_df(query, re_pkg_internal=query.pkg_exact)
        if self.df.empty:
            LOG.debug("No matching packages found")
            raise repology.exceptions.RepologyNoMatchingPackages
        if self.df_sbom is not None:
            self.df = merge_sbom_fields(self.df_sbom, self.df)
            self.df[cols.SBOM_VERSION_CLASSIFY] = self.df.apply(
                sbom_row_classify,
                axis=1,
            )
        self.df[cols.REPO_VERSION_CLASSIFY] = self.df.apply(repo_row_classify, axis=1)
        self.df.replace(np.nan, "", regex=True, inplace=True)
        self.df.drop_duplicates(keep="first", inplace=True)
        self.df.sort_values(by=self.df.columns.values.tolist(), inplace=True)
        self.df.reset_index(drop=True, inplace=True)
        return self.df.copy(deep=True)


================================================
FILE: src/repology/cves.py
================================================
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Helpers for parsing Repology CVE pages."""

import re

import numpy as np
import pandas as pd
from bs4 import BeautifulSoup

import repology.exceptions
from common import columns as cols
from common.log import LOG, LOG_SPAM
from common.versioning import parse_version


def is_affected(version, affected_ver_str, *, log=LOG, log_spam=LOG_SPAM):
    """
    Return True if version number is included in the repology affected version
    string. Also returns true if parsing affected version string fails,
    in order to avoid false negatives.
    """
    log.log(log_spam, "Affected version(s): %s", affected_ver_str)
    version_local = parse_version(version)
    if not version_local:
        log.fatal("Unexpected local version string: %s", version)
        raise repology.exceptions.RepologyError
    affected_ver_str = f" {affected_ver_str} "
    ver_group = re.compile(
        r"(?P<beg>[(\[])(?P<begver>[^,]*), *(?P<endver>[^)\]]*)(?P<end>[\])])"
    )
    matches = re.findall(ver_group, affected_ver_str)
    if matches:
        log.log(log_spam, "Parsed group version(s): %s", matches)
    for impacted_group in matches:
        if len(impacted_group) != 4:
            log.fatal("Unexpected version group: %s", affected_ver_str)
            raise repology.exceptions.RepologyUnexpectedResponse
        beg_ind = impacted_group[0]
        beg_ver_parsed = parse_version(impacted_group[1])
        if not beg_ver_parsed:
            return True
        end_ind = impacted_group[3]
        end_ver_parsed = parse_version(impacted_group[2])
        if not end_ver_parsed:
            return True
        beg_affected = False
        end_affected = False
        if (version_local > beg_ver_parsed) or (
            version_local == beg_ver_parsed and beg_ind == "["
        ):
            beg_affected = True
        if (version_local < end_ver_parsed) or (
            version_local == end_ver_parsed and end_ind == "]"
        ):
            end_affected = True
        if beg_affected and end_affected:
            return True
    ver_one = r"(?<= )(?<!\()(?P<version>\d[^ $)]+)(?= )"
    matches = re.findall(ver_one, affected_ver_str)
    log.log(log_spam, "Parsed single version(s): %s", matches)
    for impacted_version_text in matches:
        impacted_version = parse_version(impacted_version_text)
        if impacted_version == version_local:
            return True
    return False


def parse_cve_html(html_text, pkg_name, pkg_version, *, log=LOG, log_spam=LOG_SPAM):
    """Parse a Repology CVE page into a dataframe."""
    soup = BeautifulSoup(html_text, "html.parser")
    tables = soup.find_all("table")
    if not tables:
        log.debug("Unexpected response: CVE table missing")
        return pd.DataFrame()
    cve_table = tables[0]
    if cve_table.thead is None or cve_table.tbody is None:
        log.debug("Unexpected response: CVE table missing header or body")
        return pd.DataFrame()
    headers = {}
    for idx, header in enumerate(cve_table.thead.find_all("th")):
        headers[header.text] = idx
    if not headers or "CVE ID" not in headers:
        log.fatal("Unexpected response")
        raise repology.exceptions.RepologyUnexpectedResponse
    log.log(log_spam, headers)
    cve_table_rows = cve_table.tbody.find_all("tr")
    cve_dict = {}
    for row in cve_table_rows:
        affected_versions = row.find_all("span", {"class": "version version-outdated"})
        if not affected_versions:
            continue
        cells = row.find_all("td")
        if not cells:
            continue
        cve_row = cells[headers["CVE ID"]]
        log.log(log_spam, "CVE: %s", cve_row)
        ver_row = cells[headers["Affected version(s)"]]
        log.log(log_spam, "Versions: %s", ver_row)
        if not is_affected(pkg_version, ver_row.text, log=log, log_spam=log_spam):
            continue
        cve_info = cve_row.text.strip().split("\n")
        log.debug("CVE info: %s", cve_info)
        cve_dict.setdefault(cols.PACKAGE, []).append(pkg_name)
        cve_dict.setdefault(cols.VERSION, []).append(pkg_version)
        cve_dict.setdefault("cve", []).append(cve_info[0])
    df = pd.DataFrame.from_dict(cve_dict)
    df.replace(np.nan, "", regex=True, inplace=True)
    df.drop_duplicates(keep="first", inplace=True)
    return df


================================================
FILE: src/repology/exceptions.py
================================================
# SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Repology exceptions"""


class RepologyError(Exception):
    """Base class for exceptions raised in the repology modules"""

    pass


class RepologyNoMatchingPackages(RepologyError):
    """Raised when no matching repology packages found"""

    pass


class RepologyUnexpectedResponse(RepologyError):
    """Raised when repology sends unexpected response"""

    pass


================================================
FILE: src/repology/projects_parser.py
================================================
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""HTML parser helpers for Repology project search pages."""

import re
from dataclasses import dataclass

from bs4 import BeautifulSoup

import repology.exceptions
from common import columns as cols
from common.log import LOG, LOG_SPAM


@dataclass
class ParsedProjectsPage:
    """Parsed data extracted from a Repology projects response."""

    package_rows: list[dict[str, str]]
    next_query_project: str
    processed_ids: set[str]


def parse_projects_search_html(  # noqa: PLR0912, PLR0914, PLR0915
    html, repository, processed_ids=None, pkg_stop=None
):
    """Parse a Repology package search response."""
    processed_ids = set() if processed_ids is None else set(processed_ids)
    next_query_project = ""
    package_rows = []
    soup = BeautifulSoup(html, "html.parser")
    tables = soup.find_all("table")
    if not tables:
        LOG.debug("Projects table missing: no matching packages")
        return ParsedProjectsPage(package_rows, next_query_project, processed_ids)
    projects_table = tables[0]
    if projects_table.thead is None or projects_table.tbody is None:
        LOG.fatal("Unexpected response, malformed projects table")
        raise repology.exceptions.RepologyUnexpectedResponse
    headers = {}
    for idx, header in enumerate(projects_table.thead.find_all("th")):
        headers[header.text] = idx
    if not headers:
        LOG.fatal("Unexpected response, missing headers")
        raise repology.exceptions.RepologyUnexpectedResponse
    LOG.log(LOG_SPAM, headers)
    rows = 0
    stop_query = False
    for row in projects_table.tbody.find_all("tr"):
        cells = row.find_all("td")
        if not cells:
            LOG.log(LOG_SPAM, "No columns on row: %s", row)
            continue
        rows += 1
        LOG.log(LOG_SPAM, "cols: %s", cells)
        pkg = cells[headers["Project"]]
        pkg_links = pkg.find_all("a")
        if not pkg_links:
            LOG.fatal("Unexpected response, missing project link")
            raise repology.exceptions.RepologyUnexpectedResponse
        pkg_name = pkg_links[0].string
        if not stop_query and pkg_stop and pkg_name == pkg_stop:
            stop_query = True
            LOG.debug("Stopping queries after parsing the current response")
        pkg_id = f"{repository}:{pkg_name}"
        if pkg_id in processed_ids:
            LOG.debug("Package '%s' in search resp already processed", pkg_name)
            continue
        LOG.debug("Adding package '%s' to processed_ids", pkg_name)
        processed_ids.add(pkg_id)
        newest = cells[headers["Newest"]]
        newest_releases = []
        for nspan in newest.find_all("span", {"class": "version-newest"}):
            rel_version = re.sub(r"[^\x00-\x7f]+", "", nspan.text)
            newest_releases.append(rel_version)
        sel = cells[headers["Selected"]]
        statuses = re.findall(r'version-([^"]+)"', str(sel))
        vspans = sel.find_all("span", {"class": "version"})
        for idx, vspan in enumerate(vspans):
            ver = re.sub(r"[^\x00-\x7f]+", "", vspan.text)
            vulnerable = bool(vspan.find_all("span", {"class": "vulnerable"}))
            status = statuses[idx]
            package_rows.append(
                {
                    cols.REPO: repository,
                    cols.PACKAGE: pkg_name,
                    cols.VERSION: ver,
                    cols.STATUS: status,
                    cols.POTENTIALLY_VULNERABLE: str(int(vulnerable)),
                    cols.NEWEST_UPSTREAM_RELEASE: ";".join(newest_releases),
                }
            )
            LOG.log(LOG_SPAM, "Added: %s:%s:%s", pkg_name, ver, status)
        if rows == 200 and not stop_query:
            next_query_project = pkg_name
    if rows > 200:
        LOG.warning(
            "Unexpected response: raising this warning to notify the "
            "possibility the repology API has changed and might no longer "
            "match what this client expects"
        )
    return ParsedProjectsPage(package_rows, next_query_project, processed_ids)


================================================
FILE: src/repology/repology_cli.py
================================================
#!/usr/bin/env python3

# SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Command-line interface to query repology.org for package information."""

import os
import pathlib
from argparse import SUPPRESS, ArgumentParser, ArgumentTypeError

import repology.exceptions
from common.cli_args import add_verbose_argument, add_version_argument
from common.log import LOG, set_log_verbosity
from repology.adapter import RepologyAdapter, RepologyQuery
from repology.reporting import write_query_report

###############################################################################


def _pkg_str(str_obj):
    if isinstance(str_obj, str) and len(str_obj) > 0:
        return str_obj
    raise ArgumentTypeError("Value must be a non-empty string")


def getargs(args=None):
    """
    Parse arguments: by default parses the sys.argv if `args` is not
    specified, otherwise, parses arguments from the `args` list of strings.

    This is simply a wrapper for function ArgumentParser.parse_args(),
    returning argument attributes in argparse.Namespace object.
    """
    desc = "Command line client to query repology.org for package information."
    epil = (
        f"Example: ./{os.path.basename(__file__)} --pkg_search 'firef' "
        " --repository 'nix_unstable'"
    )
    parser = ArgumentParser(description=desc, epilog=epil, add_help=False)
    required = parser.add_argument_group(
        "Required arguments",
        "Following arguments are mutually exclusive:",
    )
    exclusiveq = required.add_mutually_exclusive_group(required=True)
    requiredo = parser.add_argument_group("Required other arguments")
    filtergr = parser.add_argument_group(
        "Optional output filter arguments (regular expressions)"
    )
    optional = parser.add_argument_group("Optional other arguments")
    helps = "Show this help message and exit"
    optional.add_argument("-h", "--help", action="help", default=SUPPRESS, help=helps)
    helps = "Package name exact match (see: https://repology.org/projects/)"
    exclusiveq.add_argument("--pkg_exact", help=helps, type=_pkg_str)
    helps = "Package name search term (see: https://repology.org/projects/)"
    exclusiveq.add_argument("--pkg_search", help=helps, type=_pkg_str)
    helps = "Read the package names and versions from the given cdx SBOM"
    exclusiveq.add_argument("--sbom_cdx", help=helps, type=pathlib.Path)
    helps = "Repository name exact match (see: https://repology.org/repositories)"
    requiredo.add_argument(
        "--repository", required=True, help=helps, type=str, default=""
    )
    helps = "Filter reported results based on package name"
    filtergr.add_argument("-p", "--re_package", help=helps, type=str, default=None)
    helps = "Filter reported results based on version string"
    filtergr.add_argument("-V", "--re_version", help=helps, type=str, default=None)
    helps = "Filter reported results based on status string"
    filtergr.add_argument("-s", "--re_status", help=helps, type=str, default=None)
    helps = "Filter reported results based on vulnerability status"
    filtergr.add_argument("-c", "--re_vuln", help=helps, type=str, default=None)
    helps = "Summarize output result statistics"
    optional.add_argument("--stats", help=helps, action="store_true")
    add_verbose_argument(optional, root_parser=parser)
    helps = "Path to output report file (default: ./repology_report.csv)"
    optional.add_argument("-o", "--out", help=helps, default="repology_report.csv")
    add_version_argument(optional)
    if args:
        return parser.parse_args(args)
    return parser.parse_args()


################################################################################


def _query_from_args(args):
    return RepologyQuery(
        repository=args.repository,
        pkg_exact=args.pkg_exact,
        pkg_search=args.pkg_search,
        sbom_cdx=args.sbom_cdx,
        re_package=args.re_package,
        re_version=args.re_version,
        re_status=args.re_status,
        re_vuln=args.re_vuln,
    )


class Repology:
    """Compatibility wrapper that keeps CLI reporting separate from queries."""

    def __init__(self, adapter=None):
        self.adapter = RepologyAdapter() if adapter is None else adapter
        self.df = None
        self.urlq = None
        self.df_sbom = None

    def query(self, args, stdout_report=True, file_report=True):
        """Query package information from repology.org."""
        if not file_report:
            args.out = None
        self.df = self.adapter.query(_query_from_args(args))
        self.urlq = self.adapter.urlq
        self.df_sbom = self.adapter.df_sbom
        if stdout_report or args.out is not None:
            write_query_report(
                self.df,
                args,
                query_url=self.urlq,
                df_sbom=self.df_sbom,
                console_report=stdout_report,
            )
        return self.df.copy(deep=True)


################################################################################


def main():
    """main entry point"""
    args = getargs()
    set_log_verbosity(args.verbose)
    repology_cli = Repology()
    try:
        repology_cli.query(args)
    except repology.exceptions.RepologyNoMatchingPackages:
        LOG.warning("No matching packages found")


################################################################################

if __name__ == "__main__":
    main()

################################################################################


================================================
FILE: src/repology/repology_cve.py
================================================
#!/usr/bin/env python3

# SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Command-line interface to query CVE info from repology.org."""

import os
from argparse import ArgumentParser, ArgumentTypeError

from common.cli_args import add_verbose_argument, add_version_argument
from common.df import df_to_csv_file
from common.log import set_log_verbosity
from repology.adapter import RepologyAdapter
from repology.reporting import report_cves
from repology.session import REPOLOGY_REQUEST_TIMEOUT

###############################################################################


def _pkg_str(str_obj):
    if isinstance(str_obj, str) and len(str_obj) > 0:
        return str_obj
    raise ArgumentTypeError("Value must be a non-empty string")


def getargs(args=None):
    """Parse command line arguments."""
    desc = (
        "Query repology.org for CVEs that impact package PKG_NAME version PKG_VERSION."
    )
    epil = f"Example: ./{os.path.basename(__file__)} openssl 3.1.0"
    parser = ArgumentParser(description=desc, epilog=epil)
    helps = "Target package name"
    parser.add_argument("PKG_NAME", help=helps, type=_pkg_str)
    helps = "Target package version"
    parser.add_argument("PKG_VERSION", help=helps, type=str)
    add_verbose_argument(parser, max_level=2)
    helps = "Path to output file (default: ./repology_cves.csv)"
    parser.add_argument(
        "-o", "--out", nargs="?", help=helps, default="repology_cves.csv"
    )
    add_version_argument(parser)
    return parser.parse_args(args)


################################################################################


def query_cve(
    pkg_name, pkg_version, session=None, request_timeout=REPOLOGY_REQUEST_TIMEOUT
):
    """
    Return vulnerabilities known to repology that impact the given package name
    and version. Results are returned in pandas dataframe.
    """
    adapter = RepologyAdapter(session=session, request_timeout=request_timeout)
    return adapter.query_cves(pkg_name, pkg_version)


################################################################################


def main():
    """main entry point."""
    args = getargs()
    set_log_verbosity(args.verbose)
    df = query_cve(args.PKG_NAME, args.PKG_VERSION)
    if not report_cves(df):
        return
    df_to_csv_file(df, args.out)


################################################################################

if __name__ == "__main__":
    main()

################################################################################


================================================
FILE: src/repology/reporting.py
================================================
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Console and CSV reporting helpers for Repology commands."""

from tabulate import tabulate

from common import columns as cols
from common.df import df_to_csv_file
from common.log import LOG


def _stats_sbom(df, *, log=LOG):  # noqa: PLR0914
    df = df.copy()
    df = df.drop_duplicates(keep="first", subset=[cols.PACKAGE, cols.VERSION])
    repo_rows_n = df.shape[0]
    repo_skipped_cols = ["NO_VERSION", "IGNORED", "NOT_FOUND"]
    df_skipped = df[df.status.isin(repo_skipped_cols)]
    repo_skipped_n = df_skipped.shape[0]
    repo_skipped_pct = f"{repo_skipped_n / repo_rows_n:.0%}"
    df_ignored = df[df.status.isin(["IGNORED"])]
    ignored_n = df_ignored.shape[0]
    df_no_version = df[df.status.isin(["NO_VERSION"])]
    no_version_n = df_no_version.shape[0]
    df_not_found = df[df.status.isin(["NOT_FOUND"])]
    not_found_n = df_not_found.shape[0]
    df_repology = df[~df.status.isin(repo_skipped_cols)]
    repology_rows_n = df_repology.shape[0]
    sbom_in_repo = f"{repology_rows_n / repo_rows_n:.0%}"
    sbom_rows = f"Unique packages: {repo_rows_n} ({1:.0%})"
    sbom_skipped = (
        f"sbom packages not in repology: {repo_skipped_n} ({repo_skipped_pct})"
    )
    ignored = f"IGNORED (sbom component is not a package in repology): {ignored_n}"
    no_version = (
        f"NO_VERSION (sbom component is missing the version number): {no_version_n}"
    )
    not_found = f"NOT_FOUND (sbom component was not found in repology): {not_found_n}"
    sbom_pkgs_in_repo = f"sbom packages in repology: {repology_rows_n} ({sbom_in_repo})"
    log.info(
        "\n\tRepology SBOM package statistics:\n"
        "\t  %s\n"
        "\t   ==> %s\n"
        "\t   ==> %s\n"
        "\t        - %s\n"
        "\t        - %s\n"
        "\t        - %s\n",
        sbom_rows,
        sbom_pkgs_in_repo,
        sbom_skipped,
        ignored,
        no_version,
        not_found,
    )


def _stats_repology(df, *, log=LOG):  # noqa: PLR0914
    df = df.copy(deep=True)
    base_cols = ["newest", "devel", "unique", "outdated"]
    df = df[df.status.isin(base_cols)]
    df = df.drop_duplicates(keep="first", subset=[cols.PACKAGE, cols.VERSION])
    base_rows_n = df.shape[0]
    if base_rows_n <= 0:
        log.debug("No base packages, skipping stats")
        return
    df_newest = df[df.status.isin(["newest"])]
    newest_rows_n = df_newest.shape[0]
    newest_pct = f"{newest_rows_n / base_rows_n:.0%}"
    df_outdated = df[df.status.isin(["outdated"])]
    outdated_rows_n = df_outdated.shape[0]
    outdated_pct = f"{outdated_rows_n / base_rows_n:.0%}"
    df_dev_uniq = df[df.status.isin(["devel", "unique"])]
    dev_uniq_rows_n = df_dev_uniq.shape[0]
    dev_uniq_pct = f"{dev_uniq_rows_n / base_rows_n:.0%}"
    df_vuln = df[df.potentially_vulnerable.isin(["1"])]
    vuln_rows_n = df_vuln.shape[0]
    vuln_pct = f"{vuln_rows_n / base_rows_n:.0%}"
    base_rows = (
        f"Unique compared packages: {base_rows_n} ({1:.0%})\t(status in: {base_cols})"
    )
    new_rows = f"newest: {newest_rows_n} ({newest_pct})"
    outdated_rows = f"outdated: {outdated_rows_n} ({outdated_pct})"
    dev_uniq_rows = f"devel or unique: {dev_uniq_rows_n} ({dev_uniq_pct})"
    vuln_rows = f"potentially vulnerable: {vuln_rows_n} ({vuln_pct})"
    about = "https://repology.org/docs/about"
    log.info(
        "\n\tRepology package statistics:\n"
        "\t (see the status descriptions in: %s)\n"
        "\t   %s\n"
        "\t    ==> %s\n"
        "\t    ==> %s\n"
        "\t    ==> %s\n"
        "\t    ==> %s\n",
        about,
        base_rows,
        new_rows,
        outdated_rows,
        dev_uniq_rows,
        vuln_rows,
    )


def report_cves(df, *, log=LOG):
    """Render a CVE table to the console when rows exist."""
    if df is None or df.empty:
        log.warning("No matching vulnerabilities found")
        return False
    table = tabulate(
        df,
        headers="keys",
        tablefmt="orgtbl",
        numalign="center",
        showindex=False,
    )
    log.info("Repology affected CVE(s)\n\n%s\n\n", table)
    return True


def write_query_report(  # noqa: PLR0913
    df, args, *, query_url, df_sbom, console_report=True, log=LOG
):
    """Generate result report to console and to csv file."""
    report_df = df.copy(deep=True)
    console_df = report_df.copy(deep=True)
    col = cols.NEWEST_UPSTREAM_RELEASE
    console_df[col] = console_df[col].str.slice(0, 26)
    console_df = console_df[~console_df.status.isin(["IGNORED", "NO_VERSION"])]
    console_df = console_df.drop_duplicates(keep="first")
    if console_report:
        table = tabulate(
            console_df,
            headers="keys",
            tablefmt="orgtbl",
            numalign="center",
            showindex=False,
        )
        log.info(
            "Repology package info, packages:%s\n\n%s\n\nFor more details, see: %s\n",
            console_df.shape[0],
            table,
            query_url,
        )
        if args.stats:
            _stats_repology(report_df, log=log)
            if df_sbom is not None:
                _stats_sbom(report_df, log=log)
    if args.out is not None:
        df_to_csv_file(report_df, args.out)


================================================
FILE: src/repology/sbom.py
================================================
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""CycloneDX SBOM helpers used by Repology queries."""

import json
import re

import pandas as pd

from common import columns as cols
from common.log import LOG
from common.package_names import nix_to_repology_pkg_name
from common.versioning import parse_version

IGNORE_SBOM_PACKAGE_PATTERNS = (
    r".*\.gz",
    r".*\.patch",
    r".*\.xz",
    r".*\.bz2",
    r".*\.zip",
    r".*\.gem",
    r".*\.tgz",
    r".*\.h",
    r".*\.c",
    r".*\.diff",
    r".*\?.*",
    r".*\&.*",
)
IGNORE_SBOM_REGEX = re.compile(f"(?:{'|'.join(IGNORE_SBOM_PACKAGE_PATTERNS)})")


def parse_cdx_sbom(path):
    """Parse CycloneDX SBOM components into a normalized dataframe."""
    LOG.debug("Parsing cdx sbom: %s", path)
    with open(path, encoding="utf-8") as inf:
        json_dict = json.loads(inf.read())
    metadata = json_dict.get("metadata", {})
    components = list(json_dict.get("components", []))
    if "component" in metadata:
        components.append(metadata["component"])
    components_dict = {}
    for component in components:
        name = nix_to_repology_pkg_name(component["name"])
        components_dict.setdefault(cols.NAME, []).append(name)
        components_dict.setdefault(cols.VERSION, []).append(component["version"])
    if not components_dict:
        return pd.DataFrame({cols.NAME: [], cols.VERSION: []})
    df_components = pd.DataFrame(components_dict)
    df_components.fillna("", inplace=True)
    df_components = df_components.astype(str)
    df_components.sort_values(cols.NAME, inplace=True)
    df_components.reset_index(drop=True, inplace=True)
    return df_components


def is_ignored_sbom_package(package_name):
    """Return true if a SBOM component should be ignored for Repology lookup."""
    return re.match(IGNORE_SBOM_REGEX, package_name) is not None


def make_sbom_status_row(repository, package, version, status):
    """Build a synthetic Repology result row for a SBOM component."""
    return {
        cols.REPO: repository,
        cols.PACKAGE: package,
        cols.VERSION: version,
        cols.STATUS: status,
        cols.POTENTIALLY_VULNERABLE: "",
        cols.NEWEST_UPSTREAM_RELEASE: "",
    }


def merge_sbom_fields(df_sbom, df_repo):
    """Join SBOM package/version fields into Repology query results."""
    df = pd.merge(
        left=df_sbom,
        right=df_repo,
        how="left",
        left_on=[cols.NAME],
        right_on=[cols.PACKAGE],
        suffixes=("_sbom", ""),
    )
    df[cols.VERSION_SBOM] = df.pop(cols.VERSION_SBOM)
    df.drop(cols.NAME, axis=1, inplace=True)
    return df


def sbom_row_classify(row):
    """Classify whether the SBOM version appears outdated."""
    if row.status == "outdated":
        return "sbom_pkg_needs_update"
    if row.status in ["devel", "unique", "newest"]:
        ver_sbom = parse_version(row.version_sbom)
        ver_repo = parse_version(row.version)
        if not ver_sbom or not ver_repo or ver_sbom < ver_repo:
            return "sbom_pkg_needs_update"
    return ""


================================================
FILE: src/repology/session.py
================================================
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Shared HTTP helpers for repology.org clients."""

from common.http import create_cached_limited_session

REPOLOGY_CACHE_SECONDS = 6 * 60 * 60
REPOLOGY_REQUEST_TIMEOUT = 60
REPOLOGY_USER_AGENT = "repology_cli/0 (https://github.com/tiiuae/sbomnix/)"


def create_repology_session():
    """Return a cached, rate-limited, retrying HTTP session."""
    return create_cached_limited_session(
        per_second=1,
        expire_after=REPOLOGY_CACHE_SECONDS,
        user_agent=REPOLOGY_USER_AGENT,
    )


DEFAULT_REPOLOGY_SESSION = create_repology_session()


================================================
FILE: src/sbomnix/__init__.py
================================================
# SPDX-FileCopyrightText: 2022 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0


================================================
FILE: src/sbomnix/builder.py
================================================
#!/usr/bin/env python3

# SPDX-FileCopyrightText: 2022-2023 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""SBOM builder orchestration."""

import logging
import uuid
from dataclasses import dataclass
from typing import Any

import numpy as np
import pandas as pd

from common import columns as cols
from common.df import df_to_csv_file
from common.errors import (
    MissingNixDerivationMetadataError,
    MissingNixDeriverError,
    SbomnixError,
)
from common.log import LOG, is_debug_enabled
from sbomnix.closure import (
    DEPENDENCY_COLUMNS,
    dependencies_to_depth,
    dependency_paths,
    derivation_dependencies_df,
)
from sbomnix.components import (
    recursive_derivations_to_dataframe,
    runtime_derivations_to_dataframe,
)
from sbomnix.dependency_index import build_dependency_index
from sbomnix.derivation import load_recursive
from sbomnix.derivers import find_deriver, is_loadable_deriver_path, require_deriver
from sbomnix.exporters import build_cdx_document, build_spdx_document, write_json
from sbomnix.meta import Meta, NixpkgsMetaSource
from sbomnix.runtime import (
    load_runtime_closure,
)
from sbomnix.vuln_enrichment import enrich_cdx_with_vulnerabilities

###############################################################################

# Namespace UUID (a UUIDv4) for stable UUIDv5 identifiers.
# See RFC9562, *6.6.  Namespace ID Usage and Allocation*.
SBOMNIX_UUID_NAMESPACE = uuid.UUID("136af32e-0d0e-48bc-912c-31b26af294b9")


@dataclass(frozen=True)
class StructuredClosure:
    """Structured dependency data used to assemble an SBOM."""

    df_deps: pd.DataFrame
    recursive_buildtime_derivations: dict[str, Any] | None = None
    runtime_output_paths_by_load_path: dict[str, set[str]] | None = None


def _runtime_output_paths_by_load_path(output_paths_by_drv):
    output_paths_by_load_path = {}
    for drv_path, output_paths in output_paths_by_drv.items():
        if is_loadable_deriver_path(drv_path):
            output_paths_by_load_path.setdefault(drv_path, set()).update(output_paths)
            continue
        for output_path in output_paths:
            output_paths_by_load_path.setdefault(output_path, set()).add(output_path)
    return output_paths_by_load_path


def _mapped_runtime_output_paths(output_paths_by_load_path):
    if not output_paths_by_load_path:
        return set()
    return set().union(*output_paths_by_load_path.values())


class SbomBuilder:
    """Generate SBOMs in various formats."""

    def __init__(  # noqa: PLR0913, PLR0917
        self,
        nix_path,
        buildtime=False,
        depth=None,
        flakeref=None,
        original_ref=None,
        meta_nixpkgs=None,
        impure=False,
        include_meta=True,
        include_vulns=False,
        include_cpe=True,
    ):
        # self.uid specifies the attribute that identifies SBOM components.
        # See the column names in
        # self.df_sbomdb (sbom.csv) for a list of all components' attributes.
        self.uid = cols.STORE_PATH
        self.nix_path = nix_path
        self.buildtime = buildtime
        self.target_deriver = self._resolve_target_deriver(nix_path)
        self.target_component_ref = None
        self._recursive_buildtime_derivations = None
        self._runtime_output_paths_by_load_path = None
        self.df_deps = None
        self.depth = depth
        self._structured_closure = self._load_structured_closure(nix_path)
        self._init_dependencies(self._structured_closure)
        self.df_sbomdb = None
        self.df_sbomdb_outputs_exploded = None
        self.dependency_index = None
        self.flakeref = flakeref
        self.original_ref = original_ref
        self.meta_nixpkgs = meta_nixpkgs
        self.impure = impure
        self.meta = None
        # "disabled" records explicit opt-out; "none" means auto-selection
        # found no source.
        self.nixpkgs_meta_source = NixpkgsMetaSource(method="disabled")
        self.include_cpe = include_cpe
        self._init_components(include_meta)
        target_component_ref = self._resolve_target_component_ref()
        self.target_component_ref = target_component_ref
        self.include_vulns = include_vulns
        # Use a random UUID as the serial number when any data source that is
        # not strictly coming from the resolved target component is used.
        if include_vulns or include_meta or include_cpe:
            LOG.verbose("Using random UUIDv4")
            self.uuid = uuid.uuid4()
        else:
            LOG.verbose("Using stable UUIDv5 for '%s'", target_component_ref)
            # This uses a UUIDv5, resulting in a stable UUID across runs for
            # the same SBOM subject.
            self.uuid = uuid.uuid5(SBOMNIX_UUID_NAMESPACE, target_component_ref)
        self.sbom_type = "runtime_and_buildtime"
        if not self.buildtime:
            self.sbom_type = "runtime_only"

    def _resolve_target_deriver(self, nix_path):
        if self.buildtime:
            return require_deriver(nix_path)
        try:
            return find_deriver(nix_path)
        except SbomnixError:
            raise
        except RuntimeError:
            LOG.debug(
                "Runtime target has no loadable deriver: %s",
                nix_path,
                exc_info=True,
            )
            return None

    def _load_structured_closure(self, nix_path):
        """Load structured dependency data for the configured SBOM type."""
        if self.buildtime:
            if self.target_deriver is None:
                raise MissingNixDeriverError(nix_path)
            return self._load_recursive_buildtime_closure()
        return self._load_runtime_path_info_closure(nix_path)

    def _init_dependencies(self, closure):
        """Initialize dependency attributes from loaded structured data."""
        self.df_deps = closure.df_deps
        self._recursive_buildtime_derivations = closure.recursive_buildtime_derivations
        self._runtime_output_paths_by_load_path = (
            closure.runtime_output_paths_by_load_path
        )

    def _load_recursive_buildtime_closure(self):
        """Load build-time dependencies from recursive derivation JSON."""
        if self.target_deriver is None:
            raise MissingNixDeriverError(self.nix_path)
        derivations, drv_infos = load_recursive(self.target_deriver)
        df_deps = derivation_dependencies_df(drv_infos)
        if self.depth:
            df_deps = self._filter_dependencies_to_depth(
                df_deps,
                self.target_deriver,
                self.depth,
            )
        return StructuredClosure(
            df_deps=df_deps,
            recursive_buildtime_derivations=derivations,
        )

    def _load_runtime_path_info_closure(self, nix_path):
        """Load runtime dependencies from structured path-info JSON."""
        runtime_closure = load_runtime_closure(nix_path)
        output_paths_by_load_path = _runtime_output_paths_by_load_path(
            runtime_closure.output_paths_by_drv
        )
        mapped_paths = _mapped_runtime_output_paths(output_paths_by_load_path)
        if nix_path not in mapped_paths:
            load_path = self.target_deriver or nix_path
            output_paths_by_load_path.setdefault(load_path, set()).add(nix_path)
            mapped_paths.add(nix_path)
        graph_only_paths = dependency_paths(runtime_closure.df_deps) - mapped_paths
        if graph_only_paths:
            LOG.debug(
                "Runtime path-info references graph-only paths: %s",
                sorted(graph_only_paths),
            )
        df_deps = runtime_closure.df_deps
        if self.depth:
            df_deps = self._filter_dependencies_to_depth(
                df_deps,
                nix_path,
                self.depth,
            )
        return StructuredClosure(
            df_deps=df_deps,
            runtime_output_paths_by_load_path=output_paths_by_load_path,
        )

    def _init_runtime_components(self, paths):
        if self._runtime_output_paths_by_load_path is None:
            raise AssertionError("Runtime output metadata was not initialized")
        df_components = runtime_derivations_to_dataframe(
            paths,
            self._runtime_output_paths_by_load_path,
            include_cpe=self.include_cpe,
        )
        if df_components.empty:
            raise MissingNixDerivationMetadataError(self.nix_path)
        return df_components

    def _filter_dependencies_to_depth(
        self,
        df_deps,
        start_path,
        depth,
        columns=DEPENDENCY_COLUMNS,
    ):
        """Return dependency rows reachable from ``start_path`` up to ``depth``."""
        LOG.debug("Reading dependencies until depth=%s", depth)
        return dependencies_to_depth(df_deps, start_path, depth, columns=columns)

    def _init_components(self, include_meta):
        """Initialize the SBOM component dataframe."""
        paths = self._sbom_component_paths()
        # Populate store based on the dependencies
        if self._recursive_buildtime_derivations is not None:
            self.df_sbomdb = recursive_derivations_to_dataframe(
                paths,
                self._recursive_buildtime_derivations,
                include_cpe=self.include_cpe,
            )
        elif self._runtime_output_paths_by_load_path is not None:
            self.df_sbomdb = self._init_runtime_components(paths)
        else:
            # _load_structured_closure always selects exactly one metadata source.
            raise AssertionError("Structured dependency metadata was not initialized")
        # Join with meta information
        if include_meta:
            self._join_meta()
        # Clean, drop duplicates, sort
        self.df_sbomdb.replace(np.nan, "", regex=True, inplace=True)
        self.df_sbomdb.drop_duplicates(subset=[self.uid], keep="first", inplace=True)
        self.df_sbomdb.sort_values(by=[cols.NAME, self.uid], inplace=True)
        self.df_sbomdb_outputs_exploded = self.df_sbomdb.explode(cols.OUTPUTS)
        self._init_dependency_index()

    def _sbom_component_paths(self):
        if self.df_deps is None or self.df_deps.empty:
            if self._runtime_output_paths_by_load_path is not None:
                return set().union(*self._runtime_output_paths_by_load_path.values())
            # No dependencies, so the only component in the sbom
            # will be the target itself.
            if self.target_deriver:
                return {self.target_deriver}
            return {self.nix_path}
        return dependency_paths(self.df_deps)

    def _resolve_target_component_ref(self) -> str:
        """Return the component reference that represents the SBOM subject."""
        if self.df_sbomdb is None:
            raise AssertionError("SBOM component metadata was not initialized")
        if self.target_deriver:
            df_target = self.df_sbomdb[
                self.df_sbomdb[cols.STORE_PATH] == self.target_deriver
            ]
            if not df_target.empty:
                return self.target_deriver
        for component in self.df_sbomdb.to_dict("records"):
            store_path = component.get(cols.STORE_PATH)
            if not isinstance(store_path, str):
                continue
            outputs = component.get(cols.OUTPUTS, [])
            if isinstance(outputs, str):
                outputs = [outputs]
            elif not isinstance(outputs, (list, tuple, set)):
                continue
            if self.nix_path in outputs:
                return store_path
        if self.target_deriver:
            return self.target_deriver
        raise MissingNixDerivationMetadataError(self.nix_path)

    def _init_dependency_index(self):
        """Build indexed dependency lookups used during export."""
        self.dependency_index = build_dependency_index(
            self.df_deps,
            self.df_sbomdb,
            self.df_sbomdb_outputs_exploded,
            uid=self.uid,
        )

    def _join_meta(self):
        """Join component rows with nixpkgs metadata."""
        if self.df_sbomdb is None:
            raise AssertionError("SBOM component metadata was not initialized")
        self.meta = Meta()
        df_meta, source = self.meta.get_nixpkgs_meta_with_source(
            target_path=self.nix_path,
            flakeref=self.flakeref,
            original_ref=self.original_ref,
            explicit_nixpkgs=self.meta_nixpkgs,
            impure=self.impure,
        )
        self.nixpkgs_meta_source = source
        if df_meta is None or df_meta.empty:
            if source.message:
                LOG.info("%s", source.message)
            if source.path:
                LOG.warning(
                    "Failed reading nix meta information: "
                    "SBOM will include only minimum set of attributes"
                )
            else:
                LOG.info(
                    "Skipping nix meta information: "
                    "SBOM will include only minimum set of attributes"
                )
            return
        if is_debug_enabled():
            df_to_csv_file(df_meta, "meta.csv")
        # Join based on package name including the version number
        self.df_sbomdb = self.df_sbomdb.merge(
            df_meta,
            how="left",
            left_on=[cols.NAME],
            right_on=[cols.NAME],
            suffixes=("", "_meta"),
        )

    def lookup_dependencies(self, drv, uid=cols.STORE_PATH):
        """Return indexed dependency values for one SBOM component."""
        dependency_index = getattr(self, "dependency_index", None)
        if dependency_index is None:
            return None
        return dependency_index.lookup(drv, uid=uid)

    def to_cdx_data(self):
        """Return the SBOM as a CycloneDX document."""
        return build_cdx_document(self)

    def enrich_cdx_with_vulnerabilities(self, cdx):
        """Add vulnerability scan results to an existing CycloneDX document."""
        return enrich_cdx_with_vulnerabilities(self, cdx)

    def to_spdx_data(self):
        """Return the SBOM as an SPDX document."""
        return build_spdx_document(self)

    def write_json(self, pathname, data, printinfo=False):
        """Write a JSON document to a file."""
        write_json(pathname, data, printinfo=printinfo)

    def to_cdx(self, cdx_path, printinfo=True):
        """Export SBOM components to a CycloneDX JSON file."""
        cdx = self.to_cdx_data()
        self.write_json(cdx_path, cdx, printinfo)

    def to_spdx(self, spdx_path, printinfo=True):
        """Export SBOM components to an SPDX JSON file."""
        spdx = self.to_spdx_data()
        self.write_json(spdx_path, spdx, printinfo)

    def to_csv(self, csv_path, loglevel=logging.INFO):
        """Export SBOM components to a CSV file."""
        df_to_csv_file(self.df_sbomdb, csv_path, loglevel)


================================================
FILE: src/sbomnix/cdx.py
================================================
#!/usr/bin/env python3

# SPDX-FileCopyrightText: 2022-2023 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""CycloneDX utils"""

import re

from common import columns as cols
from common.log import LOG, LOG_SPAM
from common.spdx import canonicalize_spdx_license_id
from vulnxscan.utils import _vuln_source, _vuln_url


def _drv_to_cdx_licenses_entry(drv, column_name, cdx_license_type):
    """Parse license entries of type cdx_license_type from column_name"""
    licenses = []
    if column_name not in drv._asdict():
        # Return empty list if column name is not in drv
        return licenses
    license_str = getattr(drv, column_name)
    if not license_str:
        # Return empty list if license string is empty
        return licenses
    # Parse the ";" separated licenses to cdx license format
    license_strings = license_str.split(";")
    for license_string in license_strings:
        license_value = license_string
        # Give up generating the 'licenses' entry if license id should be
        # spdx but it's not:
        if "spdxid" in column_name:
            canonical = canonicalize_spdx_license_id(license_value)
            if not canonical:
                LOG.debug("Invalid spdxid license '%s':'%s'", drv.name, license_string)
                return []
            license_value = canonical
        license_dict = {"license": {cdx_license_type: license_value}}
        licenses.append(license_dict)
    return licenses


def _cdx_component_add_licenses(component, drv):
    """Add licenses array to cdx component (if any)"""
    licenses = []
    # First, try reading the license in spdxid-format
    licenses = _drv_to_cdx_licenses_entry(drv, "meta_license_spdxid", "id")
    # If it fails, try reading the license short name
    if not licenses:
        licenses = _drv_to_cdx_licenses_entry(drv, "meta_license_short", "name")
    # Give up if package does not have license information associated
    if not licenses:
        LOG.log(LOG_SPAM, "No license info found for '%s'", drv.name)
        return
    # Otherwise, add the licenses entry
    component["licenses"] = licenses


def _cdx_component_add_patches(component, drv):
    """Add security patch information to cdx component (if any)"""
    if drv.patches:
        security_patches = []
        for p in drv.patches.split(" "):
            ids = re.findall(r"CVE-\d{4}-\d+", p, re.IGNORECASE)
            if ids:
                resolves = []
                for i in ids:
                    resolves.append(
                        {
                            "type": "security",
                            "id": i.upper(),
                            "references": [f"file://{p}"],
                        }
                    )
                security_patches.append(
                    {
                        "type": "unofficial",
                        "resolves": resolves,
                    }
                )
        if security_patches:
            pedigree = {}
            pedigree["patches"] = security_patches
            component["pedigree"] = pedigree


def _drv_to_cdx_component(drv, uid=cols.STORE_PATH):
    """Convert one SBOM component row to a CycloneDX component."""
    component = {}
    # Set the cdx component type based on the following heuristic:
    # - Set the default component type to 'library'
    # - Set the component type to 'file' if the drv version string is missing
    #   and out-path matches the below pattern
    component["type"] = "library"
    if not drv.version:
        if drv.out and re.search(r"(\.tar\.|\?|\.[a-z]+$)", drv.out):
            component["type"] = "file"
    component["bom-ref"] = getattr(drv, uid)
    component["name"] = drv.pname
    component["version"] = drv.version
    if drv.purl:
        component["purl"] = drv.purl
    if drv.cpe:
        component["cpe"] = drv.cpe
    if "meta_description" in drv._asdict() and drv.meta_description:
        component["description"] = drv.meta_description
    _cdx_component_add_licenses(component, drv)
    _cdx_component_add_patches(component, drv)
    properties = []
    for output_path in drv.outputs:
        prop = {}
        prop["name"] = "nix:output_path"
        prop["value"] = output_path
        properties.append(prop)
    if drv.store_path:
        prop = {}
        prop["name"] = "nix:drv_path"
        prop["value"] = drv.store_path
        properties.append(prop)
    # To externalReferences?
    if drv.urls:
        prop = {}
        prop["name"] = "nix:fetch_url"
        prop["value"] = drv.urls
        properties.append(prop)
    if "meta_homepage" in drv._asdict() and drv.meta_homepage:
        prop = {}
        prop["name"] = "homepage"
        prop["value"] = drv.meta_homepage
        properties.append(prop)
    if "meta_position" in drv._asdict() and drv.meta_position:
        prop = {}
        prop["name"] = "nix:position"
        prop["value"] = drv.meta_position
        properties.append(prop)
    if properties:
        component["properties"] = properties
    return component


def _drv_to_cdx_dependency(drv, deps_list, uid=cols.STORE_PATH):
    """Return CycloneDX dependency structure for one component row."""
    dependency = {}
    dependency["ref"] = getattr(drv, uid)
    if deps_list:
        dependency["dependsOn"] = deps_list
    return dependency


def _vuln_to_cdx_vuln(vuln):
    """Return cdx vulnerability entry from vulnix row"""
    vulnerability = {}
    vulnerability["bom-ref"] = vuln.store_path
    vulnerability["id"] = vuln.vuln_id
    source = {}
    source["url"] = _vuln_url(vuln)
    source["name"] = _vuln_source(vuln)
    vulnerability["source"] = source
    vulnerability["ratings"] = []
    # If the vulnerability is still being assessed, it will be missing a valid number
    if vuln.severity != "":
        rating = {}
        rating["source"] = source
        rating["score"] = vuln.severity
        vulnerability["ratings"].append(rating)
    vulnerability["tools"] = []
    for scanner in vuln.scanner:
        tool = {}
        tool["name"] = scanner
        vulnerability["tools"].append(tool)
    return vulnerability


================================================
FILE: src/sbomnix/cli_utils.py
================================================
#!/usr/bin/env python3

# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Shared CLI orchestration helpers."""

import logging
import pathlib
import subprocess
from dataclasses import dataclass
from tempfile import NamedTemporaryFile

from common.errors import InvalidNixArtifactError, MissingNixOutPathError
from common.flakeref import (
    NIXOS_CONFIGURATION_TOPLEVEL_SUFFIX,
    parse_nixos_configuration_ref,
    quote_nix_attr_segment,
    try_resolve_flakeref,
)
from common.log import LOG
from common.proc import exec_cmd, exit_unless_nix_artifact, nix_cmd
from sbomnix.builder import SbomBuilder


@dataclass(frozen=True)
class ResolvedNixTarget:
    """Resolved nix CLI target."""

    path: str
    flakeref: str | None = None
    original_ref: str | None = None


@dataclass(frozen=True)
class GeneratedSbom:
    """Paths of generated temporary SBOM artifacts."""

    cdx_path: pathlib.Path
    csv_path: pathlib.Path | None = None

    def cleanup(self):
        """Remove generated artifacts if they exist."""
        self.cdx_path.unlink(missing_ok=True)
        if self.csv_path is not None:
            self.csv_path.unlink(missing_ok=True)


def resolve_nix_target(nixref, buildtime=False, impure=False):
    """Resolve a CLI target to a nix path, preserving flakeref context."""
    runtime = not buildtime
    resolved_ref = _normalize_nixos_configuration_ref(nixref)
    target_path = try_resolve_flakeref(
        resolved_ref,
        force_realise=runtime,
        impure=impure,
        derivation=buildtime,
    )
    if target_path:
        return ResolvedNixTarget(
            path=target_path,
            flakeref=resolved_ref,
            original_ref=nixref,
        )

    target_path = pathlib.Path(nixref).resolve().as_posix()
    if runtime and target_path.endswith(".drv"):
        target_path = _realise_derivation_output(target_path)
    else:
        exit_unless_nix_artifact(nixref, force_realise=runtime)
    return ResolvedNixTarget(path=target_path, original_ref=nixref)


def _realise_derivation_output(path):
    try:
        ret = exec_cmd(
            nix_cmd(
                "build",
                "--no-link",
                "--print-out-paths",
                f"{path}^*",
            )
        )
    except subprocess.CalledProcessError:
        raise InvalidNixArtifactError(path) from None
    out_path = next(
        (line.strip() for line in ret.stdout.splitlines() if line.strip()), ""
    )
    if not out_path:
        raise MissingNixOutPathError(path)
    LOG.debug("runtime derivation target '%s' maps to output '%s'", path, out_path)
    return out_path


def _normalize_nixos_configuration_ref(nixref):
    parsed = parse_nixos_configuration_ref(nixref)
    if not parsed:
        return nixref
    flake, name = parsed
    attr = quote_nix_attr_segment(name)
    return f"{flake}#nixosConfigurations.{attr}{NIXOS_CONFIGURATION_TOPLEVEL_SUFFIX}"


def generate_temp_sbom(
    target_path,
    buildtime=False,
    prefix="sbomnix_",
    cdx_suffix=".cdx.json",
    include_csv=False,
):
    """Generate temporary SBOM artifact files for downstream CLI workflows."""
    LOG.info("Generating SBOM for target '%s'", target_path)
    sbom = SbomBuilder(target_path, buildtime, include_meta=False)
    cdx_path = None
    csv_path = None
    try:
        with NamedTemporaryFile(delete=False, prefix=prefix, suffix=cdx_suffix) as fcdx:
            cdx_path = pathlib.Path(fcdx.name)
            if not include_csv:
                sbom.to_cdx(cdx_path, printinfo=False)
                return GeneratedSbom(cdx_path=cdx_path)
            with NamedTemporaryFile(delete=False, prefix=prefix, suffix=".csv") as fcsv:
                csv_path = pathlib.Path(fcsv.name)
                sbom.to_cdx(cdx_path, printinfo=False)
                sbom.to_csv(csv_path, loglevel=logging.DEBUG)
        return GeneratedSbom(cdx_path=cdx_path, csv_path=csv_path)
    except Exception:
        if cdx_path is not None:
            cdx_path.unlink(missing_ok=True)
        if csv_path is not None:
            csv_path.unlink(missing_ok=True)
        raise


================================================
FILE: src/sbomnix/closure.py
================================================
#!/usr/bin/env python3

# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Dependency closure helpers shared by SBOM generation paths."""

from dataclasses import dataclass
from typing import Any, Callable, Iterable

import pandas as pd

from common import columns as cols
from common.nix_utils import (
    nix_derivation_input_drv_paths,
    nix_derivation_input_src_paths,
)

DEPENDENCY_COLUMNS = [
    cols.SRC_PATH,
    "src_pname",
    cols.TARGET_PATH,
    "target_pname",
]


@dataclass(frozen=True)
class DependencyWalkRow:
    """One dependency row reached during graph traversal."""

    row: dict[str, Any]
    depth: int


def dependency_paths(df_deps):
    """Return all source and target paths referenced by dependency rows."""
    if df_deps is None or df_deps.empty:
        return set()
    src_paths = df_deps[cols.SRC_PATH].unique().tolist()
    target_paths = df_deps[cols.TARGET_PATH].unique().tolist()
    return set(src_paths + target_paths)


def dependencies_to_depth(df_deps, start_path, depth, columns=DEPENDENCY_COLUMNS):
    """Return dependency rows reachable from ``start_path`` up to ``depth``."""
    rows = [walked.row for walked in walk_dependency_rows(df_deps, start_path, depth)]
    if not rows:
        return pd.DataFrame(columns=pd.Index(columns))
    return pd.DataFrame.from_records(rows, columns=pd.Index(columns))


def walk_dependency_rows(
    df_deps,
    start_paths: str | Iterable[str],
    depth,
    *,
    inverse=False,
    stop_at: Callable[[dict[str, Any]], bool] | None = None,
):
    """Return dependency rows reached by a depth-limited graph walk."""
    if df_deps is None or df_deps.empty:
        return []

    if isinstance(start_paths, str):
        normalized_start_paths = [start_paths]
    else:
        normalized_start_paths = list(start_paths)

    match_column = cols.SRC_PATH if inverse else cols.TARGET_PATH
    next_column = cols.TARGET_PATH if inverse else cols.SRC_PATH
    rows_by_path = _dependency_rows_by_path(df_deps, match_column)
    rows = []
    visited_edges = set()

    def walk(current_path, curr_depth=0):
        curr_depth += 1
        if curr_depth > depth:
            return
        for row in rows_by_path.get(current_path, ()):
            edge_key = (row[cols.TARGET_PATH], row[cols.SRC_PATH])
            if edge_key in visited_edges:
                continue
            visited_edges.add(edge_key)
            rows.append(DependencyWalkRow(row=row, depth=curr_depth))
            if stop_at is not None and stop_at(row):
                continue
            walk(row[next_column], curr_depth)

    for start_path in dict.fromkeys(normalized_start_paths):
        walk(start_path)
    return rows


def _dependency_rows_by_path(df_deps, match_column):
    """Return dependency row records indexed by the path column used for walking."""
    rows_by_path = {}
    for row in df_deps.to_dict("records"):
        rows_by_path.setdefault(row[match_column], []).append(row)
    return rows_by_path


def derivation_dependencies_df(drv_infos):
    """Return build-time dependency edges from recursive derivation JSON."""
    rows = []
    for target_path, drv_info in drv_infos.items():
        for src_path in _iter_input_paths(drv_info, target_path):
            rows.append(
                {
                    cols.SRC_PATH: src_path,
                    "src_pname": store_path_label(src_path),
                    cols.TARGET_PATH: target_path,
                    "target_pname": store_path_label(target_path),
                }
            )
    return dependency_rows_to_dataframe(rows)


def dependency_rows_to_dataframe(rows, columns=DEPENDENCY_COLUMNS):
    """Return sorted dependency dataframe from row dictionaries."""
    df_deps = pd.DataFrame.from_records(rows, columns=pd.Index(columns))
    if not df_deps.empty:
        df_deps.drop_duplicates(inplace=True)
        df_deps.sort_values(
            by=["src_pname", cols.SRC_PATH, "target_pname", cols.TARGET_PATH],
            inplace=True,
        )
    return df_deps


def store_path_label(path):
    """Return the Nix store graph-style label for a store path."""
    basename = str(path).rstrip("/").rsplit("/", maxsplit=1)[-1]
    _hash, separator, name = basename.partition("-")
    return name if separator else basename


def _iter_input_paths(drv_info, target_path=None):
    """Yield validated input derivation and source paths from derivation JSON."""
    yield from nix_derivation_input_drv_paths(target_path, drv_info)
    yield from nix_derivation_input_src_paths(target_path, drv_info)


================================================
FILE: src/sbomnix/components.py
================================================
#!/usr/bin/env python3

# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""SBOM component dataframe helpers."""

import pandas as pd

from common.log import LOG
from sbomnix.cpe import CPE
from sbomnix.derivation import load_many


def recursive_derivations_to_dataframe(paths, derivations, include_cpe=True):
    """Return component rows from an already-loaded derivation closure."""
    drvs = []
    for path in sorted(paths):
        drv = derivations.get(path)
        if not drv:
            LOG.debug("Recursive buildtime closure missing path: %s", path)
            continue
        drvs.append(drv)
    return derivations_to_dataframe(drvs, include_cpe=include_cpe)


def runtime_derivations_to_dataframe(
    paths, output_paths_by_load_path, include_cpe=True
):
    """Return component rows from runtime output-to-load-path mappings."""
    filtered_outputs_by_load_path = filter_runtime_outputs_by_load_path(
        paths,
        output_paths_by_load_path,
    )
    derivations = load_many(
        sorted(filtered_outputs_by_load_path),
        output_paths_by_drv=filtered_outputs_by_load_path,
        ignore_missing=True,
    ).values()
    return derivations_to_dataframe(derivations, include_cpe=include_cpe)


def derivations_to_dataframe(derivations, include_cpe=True):
    """Return component rows for loaded derivations."""
    cpe_generator = CPE(include_cpe=include_cpe)
    drv_dicts = []
    for drv in derivations:
        drv.set_cpe(cpe_generator)
        drv_dicts.append(drv.to_dict())
    return pd.DataFrame.from_records(drv_dicts)


def filter_runtime_outputs_by_load_path(paths, output_paths_by_load_path):
    """Filter runtime output mappings to the selected component paths."""
    selected_paths = set(paths)
    filtered_outputs_by_load_path = {}
    for load_path, output_paths in output_paths_by_load_path.items():
        filtered_output_paths = set(output_paths) & selected_paths
        if filtered_output_paths:
            filtered_outputs_by_load_path[load_path] = filtered_output_paths
    return filtered_outputs_by_load_path


================================================
FILE: src/sbomnix/cpe.py
================================================
# SPDX-FileCopyrightText: 2022-2023 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Generate CPE (Common Platform Enumeration) identifiers"""

import string

from common.df import df_from_csv_file, df_log
from common.errors import InvalidCpeDictionaryError
from common.log import LOG, LOG_SPAM
from sbomnix.dfcache import LockedDfCache

###############################################################################

_CPE_CSV_URL = "https://github.com/tiiuae/cpedict/raw/main/data/cpes.csv"
# Update local cached version of _CPE_CSV_URL once a day or when local cache
# is cleaned:
_CPE_CSV_CACHE_TTL = 60 * 60 * 24

###############################################################################


class CPE:
    """Generate Common Platform Enumeration identifiers"""

    def __init__(
        self,
        include_cpe=True,
    ):
        self._product_vendor = {}
        self._ambiguous_products = set()
        # Let's initialize the fields anyway.
        if not include_cpe:
            self.df_cpedict = None
            return

        self.cache = LockedDfCache()
        self.df_cpedict = self.cache.get(_CPE_CSV_URL)
        if self.df_cpedict is not None and not self.df_cpedict.empty:
            LOG.debug("read CPE dictionary from cache")
        else:
            LOG.debug("CPE cache miss, downloading: %s", _CPE_CSV_URL)
            self.df_cpedict = df_from_csv_file(_CPE_CSV_URL, exit_on_error=False)
            if self.df_cpedict is None or self.df_cpedict.empty:
                LOG.warning(
                    "Failed downloading cpedict: CPE information might not be accurate"
                )
            else:
                self.cache.set(_CPE_CSV_URL, self.df_cpedict, ttl=_CPE_CSV_CACHE_TTL)

        if self.df_cpedict is not None:
            # Verify the loaded cpedict contains at least the following columns
            required_cols = {"vendor", "product"}
            if not required_cols.issubset(self.df_cpedict):
                raise InvalidCpeDictionaryError(required_cols)
            self._init_product_vendor_index()

    def _init_product_vendor_index(self):
        df_cpedict = self.df_cpedict
        if df_cpedict is None:
            return
        product_counts = df_cpedict.groupby("product", sort=False).size()
        unique_products = [
            product for product, count in product_counts.items() if count == 1
        ]
        self._ambiguous_products = {
            product for product, count in product_counts.items() if count != 1
        }
        df_unique = df_cpedict[df_cpedict["product"].isin(unique_products)]
        self._product_vendor = dict(
            zip(df_unique["product"], df_unique["vendor"], strict=False)
        )

    def _cpedict_vendor(self, product):
        if not product or len(product) == 1:
            LOG.debug("invalid product name '%s'", product)
            return None
        if self.df_cpedict is None:
            LOG.log(LOG_SPAM, "missing cpedict")
            return None
        vendor = self._product_vendor.get(product)
        if vendor:
            LOG.log(LOG_SPAM, "found vendor for product '%s': '%s'", product, vendor)
            return vendor
        if product not in self._ambiguous_products:
            LOG.log(LOG_SPAM, "no matches for product '%s'", product)
            return None

        # If there is more than one product with the same name, we cannot
        # determine which vendor name should be used for the CPE. Therefore,
        # treat it the same way as no matches.
        LOG.log(LOG_SPAM, "more than one match for product '%s':", product)
        if LOG.isEnabledFor(LOG_SPAM):
            df = self.df_cpedict[self.df_cpedict["product"] == product]
            df_log(df, LOG_SPAM)
        return None

    def _candidate_vendor(self, product):
        """
        Return vendor name based on the product name:
            - Try finding exact match from the CPE dictionary
            - Try finding exact match based on variations of the product name
            - Use product name as vendor name if other attempts failed
        """
        vendor = self._cpedict_vendor(product)
        if not vendor:
            # No exact match found from cpe dictionary based on product name:
            # try finding vendor for the product name we get by removing
            # possible trailing digits from the original product name
            product_mod = product.rstrip(string.digits)
            if product != product_mod:
                LOG.log(LOG_SPAM, "re-trying with product name '%s'", product_mod)
                vendor = self._cpedict_vendor(product_mod)
        if not vendor:
            # Use the product name when no CPE dictionary vendor matches.
            vendor = product
            LOG.log(LOG_SPAM, "using product name as vendor '%s'", vendor)
        return vendor

    def generate(self, name, version):
        """Generate CPE identifier, given the product name and version"""
        cpe_vendor = self._candidate_vendor(name.strip())
        cpe_product = name.strip()
        cpe_version = version.strip()
        cpe_end = "*:*:*:*:*:*:*"
        ret = f"cpe:2.3:a:{cpe_vendor}:{cpe_product}:{cpe_version}:{cpe_end}"
        LOG.log(LOG_SPAM, "CPE: '%s'", ret)
        return ret


###############################################################################


================================================
FILE: src/sbomnix/dependency_index.py
================================================
#!/usr/bin/env python3

# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Indexed dependency lookups for SBOM export."""

from dataclasses import dataclass, field

import pandas as pd

from common import columns as cols


def _sorted_unique(values):
    return sorted({value for value in values if value})


def _normalize_outputs(outputs):
    if isinstance(outputs, (list, tuple)):
        return [output for output in outputs if output]
    if isinstance(outputs, str) and outputs:
        return [outputs]
    return []


def _group_dependency_rows(df, dep_col):
    if df.empty:
        return {}
    return {
        target_path: _sorted_unique(group[dep_col].tolist())
        for target_path, group in df.groupby(cols.TARGET_PATH)
    }


@dataclass
class DependencyIndex:
    """Lookup dependency identifiers without repeated dataframe merges."""

    by_store_path: dict[str, list[str]]
    component_frame: pd.DataFrame
    _uid_maps: dict[str, dict[str, str]] = field(default_factory=dict)

    def lookup(self, drv, uid=cols.STORE_PATH):
        """Return dependency identifiers for ``drv`` using the requested column."""
        dep_store_paths = self.by_store_path.get(drv.store_path, [])
        if not dep_store_paths:
            return None
        if uid == cols.STORE_PATH:
            return dep_store_paths
        uid_map = self._get_uid_map(uid)
        if uid_map is None:
            return None
        self_uid = getattr(drv, uid, None)
        dep_uids = sorted(
            {
                uid_map[dep_store_path]
                for dep_store_path in dep_store_paths
                if dep_store_path in uid_map and uid_map[dep_store_path]
            }
        )
        if self_uid is not None:
            dep_uids = [dep_uid for dep_uid in dep_uids if dep_uid != self_uid]
        return dep_uids or None

    def _get_uid_map(self, uid):
        if uid in self._uid_maps:
            return self._uid_maps[uid]
        if uid not in self.component_frame.columns:
            return None
        uid_map = dict(
            self.component_frame.loc[:, [cols.STORE_PATH, uid]].itertuples(
                index=False,
                name=None,
            )
        )
        self._uid_maps[uid] = uid_map
        return uid_map


def build_dependency_index(df_deps, df_sbomdb, df_sbomdb_outputs_exploded, uid):
    """Build an indexed dependency map for all SBOM components."""
    if df_sbomdb is None or df_sbomdb.empty:
        return DependencyIndex(by_store_path={}, component_frame=pd.DataFrame())

    by_store_path = {drv.store_path: [] for drv in df_sbomdb.itertuples()}
    if df_deps is None or df_deps.empty:
        return DependencyIndex(by_store_path=by_store_path, component_frame=df_sbomdb)

    runtime_sources = df_sbomdb_outputs_exploded.loc[:, [cols.OUTPUTS, uid]].rename(
        columns={uid: cols.DEPENDENCY_UID}
    )
    runtime_edges = df_deps.merge(
        runtime_sources,
        how="inner",
        left_on=[cols.SRC_PATH],
        right_on=[cols.OUTPUTS],
    )
    runtime_by_target = _group_dependency_rows(runtime_edges, cols.DEPENDENCY_UID)

    buildtime_sources = df_sbomdb.loc[:, [cols.STORE_PATH]].copy()
    buildtime_sources[cols.DEPENDENCY_UID] = df_sbomdb[uid]
    buildtime_edges = df_deps.merge(
        buildtime_sources,
        how="inner",
        left_on=[cols.SRC_PATH],
        right_on=[cols.STORE_PATH],
    )
    buildtime_by_target = _group_dependency_rows(buildtime_edges, cols.DEPENDENCY_UID)

    for drv in df_sbomdb.itertuples():
        deps: set[str] = set(buildtime_by_target.get(drv.store_path, ()))
        for output in _normalize_outputs(drv.outputs):
            deps.update(runtime_by_target.get(output, ()))
        self_uid = getattr(drv, uid, None)
        if self_uid is not None:
            deps.discard(self_uid)
        by_store_path[drv.store_path] = sorted(deps)

    return DependencyIndex(
        by_store_path=by_store_path,
        component_frame=df_sbomdb,
    )


================================================
FILE: src/sbomnix/derivation.py
================================================
# From: https://github.com/flyingcircusio/vulnix/blob/1.10.1/LICENSE:
# SPDX-License-Identifier: BSD-3-Clause
# SPDX-FileCopyrightText: Flying Circus Internet Operations GmbH

# SPDX-FileCopyrightText: 2022-2023 Technology Innovation Institute (TII)

"""Nix derivation, originally from https://github.com/flyingcircusio/vulnix"""

import bisect
import json
import subprocess
from itertools import islice

from packageurl import PackageURL

from common.errors import NixCommandError
from common.log import LOG, LOG_SPAM
from common.nix_utils import parse_nix_derivation_show
from common.proc import exec_cmd, nix_cmd

###############################################################################


def _batched(iterable, size):
    iterator = iter(iterable)
    while batch := list(islice(iterator, size)):
        yield batch


def load(path, outpath):
    """Load derivation from path"""
    cmd = nix_cmd("derivation", "show", path)
    drv_infos = parse_nix_derivation_show(
        _exec_required_nix_command(cmd).stdout,
        store_path_hint=path,
    )
    drv_path = path
    drv_info = drv_infos.get(path)
    if drv_info is None and drv_infos:
        drv_path, drv_info = next(iter(drv_infos.items()))
    if drv_info is None:
        raise NixCommandError(
            cmd,
            stderr=f"No derivation metadata returned for '{path}'",
        )
    if outpath is None and path != drv_path and not path.endswith(".drv"):
        outpath = path
    d_obj = Derive.from_nix_derivation_info(drv_path, drv_info, outpath)
    LOG.log(LOG_SPAM, "load derivation: %s", d_obj)
    LOG.log(LOG_SPAM, "derivation attrs: %s", d_obj.to_dict())
    return d_obj


def load_many(paths, output_paths_by_drv=None, batch_size=200, ignore_missing=False):
    """Load many derivations with batched `nix derivation show` calls."""
    if not paths:
        return {}
    output_paths_by_drv = {} if output_paths_by_drv is None else output_paths_by_drv
    loaded = {}
    for batch in _batched(dict.fromkeys(paths), batch_size):
        drv_infos = _load_derivation_infos(
            batch,
            store_path_hint=batch[0],
            ignore_missing=ignore_missing,
        )
        query_to_drv_path = _query_paths_to_derivations(batch, drv_infos)
        output_paths_by_loaded_drv = {}
        missing_paths = []
        for query_path in batch:
            drv_path = query_to_drv_path.get(query_path)
            if drv_path is None:
                missing_paths.append(query_path)
                continue
            output_paths = output_paths_by_loaded_drv.setdefault(drv_path, set())
            output_paths.update(output_paths_by_drv.get(drv_path, ()))
            output_paths.update(output_paths_by_drv.get(query_path, ()))
            if query_path != drv_path and not query_path.endswith(".drv"):
                output_paths.add(query_path)

        for drv_path, output_paths in output_paths_by_loaded_drv.items():
            drv_info = drv_infos[drv_path]
            sorted_output_paths = sorted(output_paths)
            drv = Derive.from_nix_derivation_info(
                drv_path,
                drv_info,
                sorted_output_paths[0] if sorted_output_paths else None,
            )
            for outpath in sorted_output_paths[1:]:
                drv.add_output_path(outpath)
            LOG.log(LOG_SPAM, "load derivation: %s", drv)
            LOG.log(LOG_SPAM, "derivation attrs: %s", drv.to_dict())
            loaded[drv_path] = drv

        for path in missing_paths:
            if ignore_missing:
                LOG.debug("Skipping path without derivation metadata: %s", path)
                continue
            loaded[path] = load(
                path,
                next(iter(output_paths_by_drv.get(path, ())), None),
            )
    return loaded


def _load_derivation_infos(paths, store_path_hint=None, ignore_missing=False):
    if ignore_missing:
        ret = exec_cmd(
            nix_cmd("derivation", "show", *paths),
            raise_on_error=False,
            log_error=False,
        )
    else:
        ret = _exec_required_nix_command(nix_cmd("derivation", "show", *paths))
    if ret is not None:
        return parse_nix_derivation_show(ret.stdout, store_path_hint=store_path_hint)
    if len(paths) == 1:
        return {}
    midpoint = len(paths) // 2
    left = _load_derivation_infos(
        paths[:midpoint],
        store_path_hint=paths[0],
        ignore_missing=ignore_missing,
    )
    right = _load_derivation_infos(
        paths[midpoint:],
        store_path_hint=paths[midpoint],
        ignore_missing=ignore_missing,
    )
    return {**left, **right}


def _query_paths_to_derivations(query_paths, drv_infos):
    output_to_drv_path = {}
    for drv_path, drv_info in drv_infos.items():
        for output_path in _derivation_output_paths(drv_info):
            output_to_drv_path.setdefault(output_path, drv_path)

    query_to_drv_path = {}
    for query_path in query_paths:
        if query_path in drv_infos:
            query_to_drv_path[query_path] = query_path
            continue
        drv_path = output_to_drv_path.get(query_path)
        if drv_path:
            query_to_drv_path[query_path] = drv_path
    return query_to_drv_path


def _derivation_output_paths(drv_info):
    outputs = drv_info.get("outputs", {})
    env_vars = drv_info.get("env", {})
    if not isinstance(outputs, dict):
        outputs = {}
    if not isinstance(env_vars, dict):
        env_vars = {}
    output_paths = []

    def add_output_path(path):
        if path and path not in output_paths:
            output_paths.append(path)

    for output_name, output in outputs.items():
        path = _derivation_output_path(outputs, output_name)
        if path:
            add_output_path(path)
        elif isinstance(output, str):
            add_output_path(output)
        else:
            add_output_path(env_vars.get(output_name))
    for output_name in str(env_vars.get("outputs", "")).split():
        add_output_path(env_vars.get(output_name))
    return output_paths


def load_recursive(path):
    """Load a derivation and its recursive build-time closure."""
    cmd = nix_cmd("derivation", "show", "--recursive", path)
    drv_infos = parse_nix_derivation_show(
        _exec_required_nix_command(cmd).stdout,
        store_path_hint=path,
    )
    if not drv_infos:
        raise NixCommandError(
            cmd,
            stderr=f"No derivation metadata returned for '{path}'",
        )
    loaded = {}
    for drv_path, drv_info in drv_infos.items():
        drv = Derive.from_nix_derivation_info(drv_path, drv_info)
        LOG.log(LOG_SPAM, "load derivation: %s", drv)
        LOG.log(LOG_SPAM, "derivation attrs: %s", drv.to_dict())
        loaded[drv_path] = drv
    return loaded, drv_infos


def _exec_required_nix_command(cmd):
    try:
        return exec_cmd(cmd)
    except subprocess.CalledProcessError as error:
        raise NixCommandError(
            cmd,
            stderr=error.stderr,
            stdout=error.stdout,
        ) from None


def destructure(env):
    """Decodes Nix 2.0 __structuredAttrs."""
    if "__json" in env:
        return json.loads(env["__json"])
    return {}


class Derive:
    """Nix derivation as found as .drv files in the Nix store."""

    def __init__(
        self,
        _outputs=None,
        _system=None,
        _builder=None,
        _args=None,
        envVars=None,
        _derivations=None,
        name=None,
        patches=None,
    ):
        """Create a derivation from a .drv file.

        The derivation files are just accidentally Python-syntax, but
        hey! :-)
        """
        if envVars is None:
            envVars = {}
        envVars = dict(envVars)
        LOG.log(LOG_SPAM, envVars)
        self.name = name or envVars.get("name")
        if not self.name:
            self.name = destructure(envVars)["name"]

        pname = envVars.get("pname", self.name)
        # pname read from envVars might not match the pname in nixpkgs.
        # As an example 'Authen-SASL' full pname is 'perl5.36.0-Authen-SASL'
        # Below, we reconstruct the full pname based on self.name which
        # contains the full pname:
        self.pname = self.name.partition(pname)[0] + pname
        self.version = envVars.get("version", "")
        self.patches = patches or envVars.get("patches", "")
        self.system = envVars.get("system", "")
        self.out = envVars.get("out", "")
        self.outputs = []
        self.store_path = None
        outputs = envVars.get("outputs", "").split()
        for output in outputs:
            path = envVars.get(output, None)
            self.add_output_path(path)
        LOG.log(LOG_SPAM, "%s outputs: %s", self, self.outputs)
        # pname 'source' in Nix has special meaning - it is the default name
        # for all fetchFromGitHub derivations. As such, it should not be used
        # to construct cpe or purl, rather, cpe and purl should be empty
        # for such packages.
        self.cpe = ""
        self.purl = ""
        self._refresh_purl()
        self.urls = envVars.get("urls", "")

    @classmethod
    def from_nix_derivation_info(cls, path, drv_info, outpath=None):
        """Create a derivation from normalized `nix derivation show` JSON."""
        env_vars = dict(drv_info.get("env", {}))
        name = _coerce_derivation_string(drv_info.get("name")) or env_vars.get("name")
        if not name:
            name = destructure(env_vars).get("name")
        outputs = drv_info.get("outputs", {})
        if not isinstance(outputs, dict):
            outputs = {}
        drv = cls(
            envVars=env_vars,
            name=name,
            patches=env_vars.get("patches", ""),
        )
        drv.system = _coerce_derivation_string(drv_info.get("system")) or drv.system
        drv.version = env_vars.get("version", "")
        if not drv.version:
            drv.version = _coerce_derivation_string(drv_info.get("version"))
        drv.out = drv.out or _derivation_output_path(outputs, "out")
        drv._refresh_purl()
        drv.outputs = []
        _set_derivation_output_paths(drv, outputs, env_vars)
        drv.init(path, outpath)
        return drv

    def init(self, path, outpath):
        """Initialize self.store_path and self.outputs"""
        if self.store_path is not None:
            raise AssertionError("Derivation is already initialized")
        LOG.log(LOG_SPAM, "path:%s, outpath:%s", path, outpath)
        self.store_path = path
        outpath = outpath if outpath and outpath != path else self.out
        self.add_output_path(outpath)

    def __repr__(self):
        return f"<Derive({repr(self.name)})>"

    def set_cpe(self, cpe_generator):
        """Generate cpe identifier"""
        if self.pname != "source" and cpe_generator is not None:
            self.cpe = cpe_generator.generate(self.pname, self.version)

    def add_output_path(self, path):
        """Add an output path to derivation"""
        if path and path not in self.outputs and path != self.store_path:
            LOG.log(LOG_SPAM, "adding outpath to %s:%s", self, path)
            bisect.insort(self.outputs, path)

    def _refresh_purl(self):
        self.purl = ""
        if self.pname != "source":
            self.purl = str(
                PackageURL(type="nix", name=self.pname, version=self.version)
            )

    def to_dict(self):
        """Return derivation as dictionary"""
        ret = {}
        for attr in vars(self):
            ret[attr] = getattr(self, attr)
        return ret


def _derivation_output_path(outputs, output_name):
    output = outputs.get(output_name)
    if isinstance(output, dict):
        return output.get("path", "")
    if isinstance(output, str):
        return output
    return ""


def _coerce_derivation_string(value):
    if isinstance(value, str):
        return value
    return ""


def _set_derivation_output_paths(drv, outputs, env_vars):
    for output in outputs.values():
        if isinstance(output, dict):
            drv.add_output_path(output.get("path"))
        else:
            drv.add_output_path(output)
    if drv.outputs:
        return
    for output_name in str(env_vars.get("outputs", "")).split():
        drv.add_output_path(env_vars.get(output_name))


================================================
FILE: src/sbomnix/derivers.py
================================================
# From: https://github.com/flyingcircusio/vulnix/blob/1.10.1/LICENSE:
# SPDX-License-Identifier: BSD-3-Clause
# SPDX-FileCopyrightText: Flying Circus Internet Operations GmbH

# SPDX-FileCopyrightText: 2022-2023 Technology Innovation Institute (TII)

"""Deriver lookup helpers for Nix store paths."""

import os

from common.errors import MissingNixDeriverError, SbomnixError
from common.log import LOG, LOG_SPAM
from common.nix_utils import parse_nix_derivation_show
from common.proc import exec_cmd, nix_cmd


def is_loadable_deriver_path(path):
    """Return whether path names an existing Nix derivation file."""
    return (
        isinstance(path, str)
        and path != "unknown-deriver"
        and path.endswith(".drv")
        and os.path.exists(path)
    )


def find_deriver(path):
    """Return drv path for the given nix store artifact path."""
    LOG.log(LOG_SPAM, path)
    if path.endswith(".drv"):
        return path
    cmd = nix_cmd("derivation", "show", path)
    ret = exec_cmd(cmd, raise_on_error=False, log_error=False)
    if not ret:
        LOG.log(LOG_SPAM, "Deriver not found for '%s'", path)
        return None
    qvd_json_keys = list(
        parse_nix_derivation_show(ret.stdout, store_path_hint=path).keys()
    )
    if not qvd_json_keys:
        LOG.log(LOG_SPAM, "Not qvd_deriver for '%s'", path)
        return None
    qvd_deriver = qvd_json_keys[0]
    LOG.log(LOG_SPAM, "qvd_deriver: %s", qvd_deriver)
    if is_loadable_deriver_path(qvd_deriver):
        return qvd_deriver

    if qvd_deriver and qvd_deriver != "unknown-deriver":
        raise RuntimeError(
            f"Deriver `{qvd_deriver}` does not exist.  "
            f"Couldn't find deriver for path `{path}`"
        )
    raise RuntimeError(
        "Cannot determine deriver. Is this really a path into the nix store?",
        path,
    )


def require_deriver(path, *, find_deriver_fn=find_deriver, log=LOG):
    """Return the deriver for ``path`` or raise a typed error."""
    try:
        drv_path = find_deriver_fn(path)
    except SbomnixError:
        raise
    except RuntimeError as error:
        raise MissingNixDeriverError(path) from error
    if not drv_path:
        raise MissingNixDeriverError(path)
    log.debug("nix_drv: %s", drv_path)
    return drv_path


================================================
FILE: src/sbomnix/dfcache.py
================================================
# SPDX-FileCopyrightText: 2022-2024 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Thread-safe DataFrameDiskCache"""

import pathlib
import tempfile
from getpass import getuser

from dfdiskcache import DataFrameDiskCache
from filelock import FileLock

###############################################################################

# DataFrameDiskCache cache local path and lock file
DFCACHE_PATH = pathlib.Path(tempfile.gettempdir()) / f"{getuser()}_sbomnix_df_cache"
DFCACHE_LOCK = DFCACHE_PATH / "dfcache.lock"

################################################################################


class LockedDfCache:
    """Thread-safe (and process-safe) wrapper for DataFrameDiskCache"""

    def __init__(self):
        self.dflock = FileLock(DFCACHE_LOCK)

    def __getattr__(self, name):
        def wrap(*a, **k):
            with self.dflock:
                # We intentionally do not store the dfcache as object variable
                # but re-instantiate it every time any LockedDfCache method
                # is called. DataFrameDiskCache internally makes use of sqlite
                # which does not allow concurrent connections to the database.
                # Having the dfcache initiated once in __init__() and then
                # re-used here would mean the connection would remain reserved
                # for the first thread making other threads throw with
                # 'database locked' etc. even if we otherwise protect
                # concurrent writes.
                dfcache = DataFrameDiskCache(cache_dir_path=DFCACHE_PATH)
                return getattr(dfcache, name)(*a, **k)

        return wrap


###############################################################################


================================================
FILE: src/sbomnix/exporters.py
================================================
#!/usr/bin/env python3

# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""SBOM document exporters."""

import json
import re
from datetime import datetime, timezone

from common import columns as cols
from common.log import LOG
from common.pkgmeta import get_py_pkg_version
from common.spdx import canonicalize_spdx_license_id
from sbomnix.cdx import _drv_to_cdx_component, _drv_to_cdx_dependency

_NIXPKGS_META_SOURCE_FIELDS = (
    ("nixpkgs:metadata_source_method", "method"),
    ("nixpkgs:path", "path"),
    ("nixpkgs:rev", "rev"),
    ("nixpkgs:flakeref", "flakeref"),
    ("nixpkgs:version", "version"),
    ("nixpkgs:message", "message"),
)


def write_json(pathname, data, printinfo=False):
    """Write JSON data to a file."""
    with open(pathname, "w", encoding="utf-8") as outfile:
        json_string = json.dumps(data, indent=2)
        outfile.write(json_string)
        if printinfo:
            LOG.info("Wrote: %s", outfile.name)


def _nixpkgs_meta_source_properties(sbomdb):
    """Return non-empty document properties for nixpkgs metadata source."""
    source = getattr(sbomdb, "nixpkgs_meta_source", None)
    if source is None:
        return []
    properties = []
    for property_name, attr_name in _NIXPKGS_META_SOURCE_FIELDS:
        value = getattr(source, attr_name)
        if value:
            properties.append({"name": property_name, "value": str(value)})
    return properties


def _spdx_nixpkgs_meta_source_comment(sbomdb):
    """Return a compact SPDX comment line for nixpkgs metadata source."""
    source = getattr(sbomdb, "nixpkgs_meta_source", None)
    if source is None:
        return None
    fields = []
    for property_name, attr_name in _NIXPKGS_META_SOURCE_FIELDS:
        value = getattr(source, attr_name)
        if value:
            fields.append(f"{property_name.removeprefix('nixpkgs:')}={value}")
    if not fields:
        return None
    return "nixpkgs metadata source: " + "; ".join(fields)


def build_cdx_document(sbomdb):
    """Build a CycloneDX document from an SBOM builder."""
    cdx = {}
    cdx["bomFormat"] = "CycloneDX"
    cdx["specVersion"] = "1.4"
    cdx["version"] = 1
    cdx["serialNumber"] = f"urn:uuid:{sbomdb.uuid}"
    cdx["metadata"] = {}
    cdx["metadata"]["timestamp"] = datetime.now(timezone.utc).astimezone().isoformat()
    cdx["metadata"]["properties"] = []
    prop = {}
    prop["name"] = "sbom_type"
    prop["value"] = sbomdb.sbom_type
    cdx["metadata"]["properties"].append(prop)
    if sbomdb.depth:
        prop = {}
        prop["name"] = "sbom_dependencies_depth"
        prop["value"] = str(sbomdb.depth)
        cdx["metadata"]["properties"].append(prop)
    cdx["metadata"]["properties"].extend(_nixpkgs_meta_source_properties(sbomdb))
    tool = {}
    tool["vendor"] = "TII"
    tool["name"] = "sbomnix"
    tool["version"] = get_py_pkg_version()
    cdx["metadata"]["tools"] = []
    cdx["metadata"]["tools"].append(tool)
    cdx["components"] = []
    cdx["dependencies"] = []
    for drv in sbomdb.df_sbomdb.itertuples():
        component = _drv_to_cdx_component(drv, uid=sbomdb.uid)
        if drv.store_path == sbomdb.target_component_ref:
            cdx["metadata"]["component"] = component
        else:
            cdx["components"].append(component)
        deps = sbomdb.lookup_dependencies(drv, uid=sbomdb.uid)
        dependency = _drv_to_cdx_dependency(drv, deps, uid=sbomdb.uid)
        cdx["dependencies"].append(dependency)
    return cdx


def _str_to_spdxid(strval):
    # Only letters, numbers, '.', and '-' are allowed in spdx idstring,
    # replace all other characters with '-'
    idstring = re.sub(r"[^\-.a-zA-Z0-9]", "-", strval)
    # Return idstring with prefix "SPDXRef-"
    if idstring.startswith("-"):
        return f"SPDXRef{idstring}"
    return f"SPDXRef-{idstring}"


def _drv_to_spdx_license_list(drv):
    license_attr_name = "meta_license_spdxid"
    if license_attr_name not in drv._asdict():
        return []
    license_str = getattr(drv, license_attr_name)
    if not license_str:
        return []
    license_strings = license_str.split(";")
    licenses = []
    for license_string in license_strings:
        canonical = canonicalize_spdx_license_id(license_string)
        if not canonical:
            continue
        licenses.append(canonical)
    return licenses


def _drv_to_spdx_extrefs(drv):
    extrefs = []
    if drv.cpe:
        cpe_ref = {}
        cpe_ref["referenceCategory"] = "SECURITY"
        cpe_ref["referenceType"] = "cpe23Type"
        cpe_ref["referenceLocator"] = drv.cpe
        extrefs.append(cpe_ref)
    if drv.purl:
        purl_ref = {}
        purl_ref["referenceCategory"] = "PACKAGE-MANAGER"
        purl_ref["referenceType"] = "purl"
        purl_ref["referenceLocator"] = drv.purl
        extrefs.append(purl_ref)
    return extrefs


def _drv_to_spdx_package(drv, uid=cols.STORE_PATH):
    """Convert one entry from sbomdb (drv) to an SPDX package."""
    pkg = {}
    pkg["name"] = drv.pname
    pkg["SPDXID"] = _str_to_spdxid(getattr(drv, uid))
    pkg["versionInfo"] = drv.version
    pkg["downloadLocation"] = "NOASSERTION"
    if drv.urls:
        pkg["downloadLocation"] = drv.urls
    if "meta_homepage" in drv._asdict() and drv.meta_homepage:
        pkg["homepage"] = drv.meta_homepage
    if "meta_description" in drv._asdict() and drv.meta_description:
        pkg["summary"] = drv.meta_description
    licenses = _drv_to_spdx_license_list(drv)
    if licenses:
        pkg["licenseInfoFromFiles"] = licenses
    licence_entry = licenses[0] if len(licenses) == 1 else "NOASSERTION"
    pkg["licenseConcluded"] = licence_entry
    pkg["licenseDeclared"] = licence_entry
    pkg["copyrightText"] = "NOASSERTION"
    extrefs = _drv_to_spdx_extrefs(drv)
    if extrefs:
        pkg["externalRefs"] = extrefs
    return pkg


def _drv_to_spdx_relationships(drv, deps_list, uid=cols.STORE_PATH):
    """Return list of SPDX relationships for one sbomdb row."""
    relationships = []
    if not deps_list:
        return relationships
    drv_spdxid = _str_to_spdxid(getattr(drv, uid))
    relationship_type = "DEPENDS_ON"
    for dep in deps_list:
        relationship = {}
        relationship["spdxElementId"] = drv_spdxid
        relationship["relationshipType"] = relationship_type
        relationship["relatedSpdxElement"] = _str_to_spdxid(dep)
        relationships.append(relationship)
    return relationships


def build_spdx_document(sbomdb):
    """Build an SPDX document from an SBOM builder."""
    spdx = {}
    spdx["spdxVersion"] = "SPDX-2.3"
    spdx["dataLicense"] = "CC0-1.0"
    spdx["SPDXID"] = "SPDXRef-DOCUMENT"
    spdx["name"] = ""
    spdx["documentNamespace"] = f"sbomnix://{sbomdb.uuid}"
    creation_info = {}
    creation_info["created"] = datetime.now(timezone.utc).astimezone().isoformat()
    creation_info["creators"] = []
    creation_info["creators"].append(f"Tool: sbomnix-{get_py_pkg_version()}")
    spdx["creationInfo"] = creation_info
    comments = [f"included dependencies: '{sbomdb.sbom_type}'"]
    source_comment = _spdx_nixpkgs_meta_source_comment(sbomdb)
    if source_comment:
        comments.append(source_comment)
    spdx["comment"] = "\n".join(comments)
    spdx["packages"] = []
    spdx["relationships"] = []
    for drv in sbomdb.df_sbomdb.itertuples():
        package = _drv_to_spdx_package(drv, uid=sbomdb.uid)
        spdx["packages"].append(package)
        if drv.store_path == sbomdb.target_component_ref:
            spdx["name"] = _str_to_spdxid(getattr(drv, sbomdb.uid))
        deps = sbomdb.lookup_dependencies(drv, uid=sbomdb.uid)
        relationships = _drv_to_spdx_relationships(drv, deps, uid=sbomdb.uid)
        for relation in relationships:
            spdx["relationships"].append(relation)
    return spdx


================================================
FILE: src/sbomnix/main.py
================================================
#!/usr/bin/env python3

# SPDX-FileCopyrightText: 2022-2023 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Python script that generates SBOMs from nix packages"""

import argparse

from common.cli_args import add_verbose_argument, add_version_argument, check_positive
from common.errors import SbomnixError
from common.log import LOG, set_log_verbosity
from sbomnix.builder import SbomBuilder
from sbomnix.cli_utils import resolve_nix_target

###############################################################################


def getargs(args=None):
    """Parse command line arguments"""
    desc = (
        "This tool finds dependencies of the specified nix store path "
        "or flake reference NIXREF and "
        "writes SBOM file(s) as specified in output arguments."
    )
    epil = "Example: sbomnix /nix/store/path/or/flakeref"
    parser = argparse.ArgumentParser(description=desc, epilog=epil)

    helps = (
        "Target nix store path (e.g. derivation file or nix output path) or flakeref"
    )
    parser.add_argument("NIXREF", help=helps, type=str)
    helps = "Scan buildtime dependencies instead of runtime dependencies"
    parser.add_argument("--buildtime", help=helps, action="store_true")
    helps = (
        "Set the depth of the included dependencies. As an example, --depth=1 "
        "indicates the SBOM should include only the NIXREF direct dependencies. "
        "With --depth=2, the output SBOM includes the direct dependencies and the "
        "first level of transitive dependencies. "
        "By default, when --depth is not specified, the output SBOM includes "
        "all dependencies all the way to the root of the dependency tree."
    )
    parser.add_argument("--depth", help=helps, type=check_positive)
    add_version_argument(parser)
    add_verbose_argument(parser)
    helps = "Include vulnerabilities in the output of CyloneDX SBOM"
    parser.add_argument("--include-vulns", help=helps, action="store_true")
    helps = "Exclude Nixpkgs metadata information in the output"
    parser.add_argument(
        "--exclude-meta", help=helps, action="store_true", default=False
    )
    helps = (
        "Nixpkgs source used for metadata enrichment. Accepts a nixpkgs "
        "flakeref, a nixpkgs source path, or nix-path. Overrides automatic "
        "metadata-source detection."
    )
    parser.add_argument("--meta-nixpkgs", help=helps, metavar="META_NIXPKGS")
    helps = "Exclude using heuristics-based CPE matches in the output"
    parser.add_argument(
        "--exclude-cpe-matching", help=helps, action="store_true", default=False
    )

    group = parser.add_argument_group("output arguments")
    helps = "Path to csv output file (default: ./sbom.csv)"
    group.add_argument("--csv", nargs="?", help=helps, default="sbom.csv")
    helps = "Path to cyclonedx json output file (default: ./sbom.cdx.json)"
    group.add_argument("--cdx", nargs="?", help=helps, default="sbom.cdx.json")
    helps = "Path to spdx json output file (default: ./sbom.spdx.json)"
    group.add_argument("--spdx", nargs="?", help=helps, default="sbom.spdx.json")
    helps = "Run nix command with --impure"
    parser.add_argument("--impure", help=helps, action="store_true")

    return parser.parse_args(args)


################################################################################


def main():
    """main entry point"""
    args = getargs()
    set_log_verbosity(args.verbose)
    try:
        _run(args)
    except SbomnixError as error:
        LOG.fatal("%s", error)
        raise SystemExit(1) from error


def _run(args):
    if args.exclude_meta and args.meta_nixpkgs:
        raise SbomnixError("--exclude-meta cannot be used with --meta-nixpkgs")
    target = resolve_nix_target(
        args.NIXREF, buildtime=args.buildtime, impure=args.impure
    )
    LOG.info("Generating SBOM for target '%s'", target.path)
    sbom = SbomBuilder(
        nix_path=target.path,
        buildtime=args.buildtime,
        depth=args.depth,
        flakeref=target.flakeref,
        original_ref=target.original_ref,
        meta_nixpkgs=args.meta_nixpkgs,
        impure=args.impure,
        include_meta=not args.exclude_meta,
        include_vulns=args.include_vulns,
        include_cpe=not args.exclude_cpe_matching,
    )
    if args.cdx:
        cdx = sbom.to_cdx_data()
        if args.include_vulns:
            sbom.enrich_cdx_with_vulnerabilities(cdx)
        sbom.write_json(args.cdx, cdx, printinfo=True)
    if args.spdx:
        sbom.to_spdx(args.spdx)
    if args.csv:
        sbom.to_csv(args.csv)


################################################################################

if __name__ == "__main__":
    main()

################################################################################


================================================
FILE: src/sbomnix/meta.py
================================================
# SPDX-FileCopyrightText: 2022-2023 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Cache and scan nixpkgs meta information."""

import pathlib
import tempfile
from dataclasses import replace
from getpass import getuser

from filelock import FileLock

from common.log import LOG
from nixmeta.scanner import NixMetaScanner
from sbomnix.dfcache import LockedDfCache
from sbomnix.meta_source import (
    META_NIXPKGS_NIX_PATH,
    SCAN_EXCEPTIONS,
    NixpkgsMetaSource,
    NixpkgsMetaSourceResolver,
    classify_meta_nixpkgs,
)

###############################################################################

# Update locally generated nixpkgs meta-info every 30 days or when local cache
# is cleaned.
_NIXMETA_NIXPKGS_TTL = 60 * 60 * 24 * 30

# FileLock lock path
_FLOCK = pathlib.Path(tempfile.gettempdir()) / f"{getuser()}_sbomnix_meta.lock"

###############################################################################

__all__ = [
    "META_NIXPKGS_NIX_PATH",
    "Meta",
    "NixpkgsMetaSource",
    "classify_meta_nixpkgs",
]


class Meta:
    """Cache nixpkgs meta information."""

    def __init__(self):
        self.lock = FileLock(_FLOCK)
        self.cache = LockedDfCache()
        self.source_resolver = NixpkgsMetaSourceResolver()

    def get_nixpkgs_meta(self, nixref=None):
        """
        Return nixpkgs meta pinned in `nixref`. `nixref` can point to a
        nix store path or flake reference. If nixref is None, attempt to
        read the nixpkgs store path from NIX_PATH environment variable.
        """
        source = self.source_resolver.resolve_default_source(nixref)
        return self._scan_source(source)

    def get_nixpkgs_meta_with_source(
        self,
        *,
        target_path=None,
        flakeref=None,
        original_ref=None,
        explicit_nixpkgs=None,
        impure=False,
    ):
        """Return nixpkgs metadata and selected metadata source."""
        source = self._resolve_source(
            target_path=target_path,
            flakeref=flakeref,
            original_ref=original_ref,
            explicit_nixpkgs=explicit_nixpkgs,
            impure=impure,
        )
        return self._scan_source_with_source(source)

    def _resolve_source(
        self,
        *,
        target_path=None,
        flakeref=None,
        original_ref=None,
        explicit_nixpkgs=None,
        impure=False,
    ):
        if explicit_nixpkgs:
            return self.source_resolver.resolve_meta_nixpkgs_option(
                explicit_nixpkgs,
                target_path=target_path,
            )
        if flakeref:
            source = self.source_resolver.resolve_flakeref_target_source(
                flakeref,
                impure=impure,
            )
            if source is not None:
                return source
            return self.source_resolver.resolve_flakeref_lock_source(flakeref)

        return self.source_resolver.path_target_without_source(
            target_path=target_path,
            original_ref=original_ref,
        )

    def _scan_source(self, source):
        df, _source = self._scan_source_with_source(source)
        return df

    def _scan_source_with_source(self, source):
        if not source.path:
            return None, source
        if source.expression:
            LOG.debug("Scanning meta-info using nix expression for: %s", source.path)
            df = self._scan_expression(
                source.expression,
                cache_key=source.expression_cache_key,
                impure=source.expression_impure,
            )
            if df is not None and not df.empty:
                return df, source
            LOG.warning(
                "Failed scanning evaluated package set: %s",
                source.path,
            )
            return None, replace(
                source,
                message=(
                    "Evaluated package-set metadata scan failed. "
                    "Skipping nixpkgs metadata."
                ),
            )
        LOG.debug("Scanning meta-info using nixpkgs path: %s", source.path)
        return self._scan(source.path), source

    def _scan_expression(self, expression, *, cache_key=None, impure=False):
        if cache_key is None:
            with self.lock:
                LOG.debug("cache disabled, scanning expression")
                df = self._try_scan_expression(expression, impure=impure)
                if df is None or df.empty:
                    LOG.warning("Failed scanning uncached nixmeta expression")
                    return None
                return df
        cache_key = f"expr:{cache_key}"
        with self.lock:
            df = self.cache.get(cache_key)
            if df is not None and not df.empty:
                LOG.debug("found from cache: %s", cache_key)
                return df
            LOG.debug("cache miss, scanning expression: %s", cache_key)
            df = self._try_scan_expression(expression, impure=impure)
            if df is None or df.empty:
                LOG.warning("Failed scanning nixmeta expression: %s", cache_key)
                return None
            self.cache.set(key=cache_key, value=df, ttl=_NIXMETA_NIXPKGS_TTL)
            return df

    @staticmethod
    def _try_scan_expression(expression, *, impure=False):
        try:
            scanner = NixMetaScanner()
            scanner.scan_expression(expression, impure=impure)
            return scanner.to_df()
        except SCAN_EXCEPTIONS:
            LOG.debug("Failed scanning nixmeta expression", exc_info=True)
            return None

    def _scan(self, nixpkgs_path):
        # In case sbomnix is run concurrently, we want to make sure there's
        # only one instance of NixMetaScanner.scan_path() running at a time.
        # The reason is, NixMetaScanner.scan_path() potentially invokes
        # `nix-env -qa --meta --json -f /path/to/nixpkgs` which is very
        # memory intensive. The locking needs to happen here (and not in
        # NixMetaScanner) because this object caches the nixmeta info.
        # First scan generates the cache, after which the consecutive scans
        # will read the scan results from the cache, not having to run
        # the nix-env command again, making the consecutive scans relatively
        # fast and light-weight.
        with self.lock:
            df = self.cache.get(nixpkgs_path)
            if df is not None and not df.empty:
                LOG.debug("found from cache: %s", nixpkgs_path)
                return df
            LOG.debug("cache miss, scanning: %s", nixpkgs_path)
            scanner = NixMetaScanner()
            scanner.scan_path(nixpkgs_path)
            df = scanner.to_df()
            if df is None or df.empty:
                LOG.warning("Failed scanning nixmeta: %s", nixpkgs_path)
                return None
            # Cache requires some TTL, so we set it to some value here.
            # Although, we could as well store it indefinitely as it should
            # not change given the same key (nixpkgs store path).
            self.cache.set(key=nixpkgs_path, value=df, ttl=_NIXMETA_NIXPKGS_TTL)
            return df


###############################################################################


================================================
FILE: src/sbomnix/meta_source.py
================================================
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Resolve nixpkgs metadata sources from target context and CLI options."""

import json
import os
import pathlib
import re
from dataclasses import dataclass, replace
from subprocess import CalledProcessError
from urllib.parse import urlencode

from common.errors import SbomnixError
from common.flakeref import (
    NIXOS_CONFIGURATION_TOPLEVEL_SUFFIX,
    parse_nixos_configuration_ref,
    quote_nix_attr_segment,
)
from common.log import LOG
from common.proc import exec_cmd, nix_cmd
from nixmeta.scanner import nixref_to_nixpkgs_path

META_NIXPKGS_NIX_PATH = "nix-path"

RESERVED_META_NIXPKGS_MODES = frozenset({META_NIXPKGS_NIX_PATH})

SCAN_EXCEPTIONS = (KeyError, OSError, CalledProcessError, TypeError, ValueError)
_NIXREF_RESOLUTION_EXCEPTIONS = (AttributeError, *SCAN_EXCEPTIONS)


@dataclass(frozen=True)
class NixpkgsMetaSource:
    """Description of the nixpkgs source used for metadata enrichment."""

    method: str
    path: str | None = None
    flakeref: str | None = None
    rev: str | None = None
    version: str | None = None
    message: str | None = None
    expression: str | None = None
    expression_cache_key: str | None = None
    expression_impure: bool = False


def classify_meta_nixpkgs(value):
    """Classify a --meta-nixpkgs value as a reserved mode or explicit source."""
    if value in RESERVED_META_NIXPKGS_MODES:
        return value
    return "explicit"


def read_nixpkgs_version(nixpkgs_path):
    """Read nixpkgs version from a source path if available."""
    try:
        return (
            (pathlib.Path(nixpkgs_path) / "lib" / ".version")
            .read_text(encoding="utf-8")
            .strip()
        )
    except OSError:
        return None


def is_nix_store_path(path):
    """Return true when path syntactically points into /nix/store."""
    return pathlib.Path(path).as_posix().startswith("/nix/store/")


def nixpkgs_meta_source_with_path(source):
    """Attach path-local nixpkgs version to a metadata source."""
    if not source.path:
        return source
    return replace(source, version=read_nixpkgs_version(source.path))


class NixpkgsMetaSourceResolver:
    """Resolve a nixpkgs metadata source without scanning metadata."""

    @staticmethod
    def path_target_without_source(target_path=None, original_ref=None):
        """Return the no-source result for store-path targets."""
        LOG.debug(
            "No automatic nixpkgs metadata source for target path=%s original_ref=%s",
            target_path,
            original_ref,
        )
        return NixpkgsMetaSource(
            method="none",
            message=(
                "No nixpkgs metadata source was provided for store-path target. "
                "Skipping nixpkgs metadata. Re-run with "
                "--meta-nixpkgs <nixpkgs-flakeref-or-path> to include metadata."
            ),
        )

    def resolve_meta_nixpkgs_option(self, meta_nixpkgs, *, target_path=None):
        """Resolve an explicit --meta-nixpkgs source or reserved mode."""
        LOG.debug(
            "Resolving explicit nixpkgs metadata source for target path=%s",
            target_path,
        )
        mode = classify_meta_nixpkgs(meta_nixpkgs)
        if mode == META_NIXPKGS_NIX_PATH:
            return self.resolve_nix_path_source(
                message="NIX_PATH metadata source may not match the target",
                required=True,
            )
        return self.resolve_explicit_source(meta_nixpkgs)

    def resolve_flakeref_target_source(self, flakeref, *, impure=False):
        """Resolve target-specific nixpkgs metadata for known flakeref outputs."""
        parsed = self._parse_nixos_toplevel_flakeref(flakeref)
        if not parsed:
            return None
        flake, name = parsed
        name_attr = quote_nix_attr_segment(name)
        pkgs_path_ref = f"{flake}#nixosConfigurations.{name_attr}.pkgs.path"
        pkgs_path = self._nix_eval_raw(pkgs_path_ref, impure=impure)
        if pkgs_path:
            expression_flake = self._flake_ref_for_expression(
                flake,
                impure=impure,
            )
            return nixpkgs_meta_source_with_path(
                NixpkgsMetaSource(
                    method="flakeref-target",
                    path=pkgs_path,
                    flakeref=pkgs_path_ref,
                    message="Scanning evaluated NixOS package set from flakeref",
                    expression=self._nixos_pkgs_expression(expression_flake, name),
                    expression_cache_key=self._nixos_pkgs_expression_cache_key(
                        expression_flake,
                        name,
                        impure=impure,
                    ),
                    expression_impure=impure,
                ),
            )

        return self._nixos_toplevel_without_source()

    @staticmethod
    def _nixos_toplevel_without_source():
        return NixpkgsMetaSource(
            method="none",
            message=(
                "Failed resolving target-specific nixpkgs metadata source from "
                "NixOS configuration flakeref. Skipping nixpkgs metadata. Re-run "
                "with --meta-nixpkgs <nixpkgs-flakeref-or-path> to include metadata."
            ),
        )

    @staticmethod
    def _parse_nixos_toplevel_flakeref(flakeref):
        return parse_nixos_configuration_ref(
            flakeref,
            suffix=NIXOS_CONFIGURATION_TOPLEVEL_SUFFIX,
        )

    @staticmethod
    def _nixos_pkgs_expression(flake, name):
        flake_json = json.dumps(flake)
        name_attr = quote_nix_attr_segment(name)
        return (
            "let\n"
            f"  flake = builtins.getFlake {flake_json};\n"
            "in\n"
            f"  flake.nixosConfigurations.{name_attr}.pkgs\n"
        )

    def _flake_ref_for_expression(self, flake, *, impure=False):
        if self._flake_ref_has_stable_lock(flake):
            return flake
        if self._should_lock_flake_ref_for_expression(flake):
            locked_ref = self._locked_flake_ref_from_metadata(flake, impure=impure)
            if locked_ref:
                return locked_ref
        return self._normalize_local_flake_ref_for_expression(flake)

    @staticmethod
    def _flake_ref_has_stable_lock(flake):
        return re.search(r"(?:[?&])(?:narHash|rev)=", flake) is not None

    @classmethod
    def _should_lock_flake_ref_for_expression(cls, flake):
        if cls._flake_ref_has_stable_lock(flake):
            return False
        if cls._is_existing_local_flake_ref(flake):
            return True
        return re.match(r"^[A-Za-z][A-Za-z0-9+.-]*:", flake or "") is not None

    @staticmethod
    def _is_existing_local_flake_ref(flake):
        path_text = flake
        if flake.startswith("path:"):
            path_text = flake.removeprefix("path:").partition("?")[0]
        elif re.match(r"^[A-Za-z][A-Za-z0-9+.-]*:", flake or ""):
            return False
        return pathlib.Path(path_text).expanduser().exists()

    @staticmethod
    def _locked_flake_ref_from_metadata(flake, *, impure=False):
        meta_json = NixpkgsMetaSourceResolver._nix_flake_metadata(
            flake,
            impure=impure,
        )
        if meta_json is None:
            return None
        try:
            source_path = meta_json["path"]
            locked = meta_json["locked"]
            nar_hash = locked["narHash"]
        except (KeyError, TypeError):
            return None
        if not source_path or not nar_hash or not is_nix_store_path(source_path):
            return None
        query = {"narHash": nar_hash}
        locked_dir = locked.get("dir")
        if locked_dir:
            query["dir"] = locked_dir
        return f"path:{source_path}?{urlencode(query, safe='/')}"

    @staticmethod
    def _nix_flake_metadata(flake, *, impure=False):
        LOG.debug("Reading flake metadata for nixpkgs metadata expression: %s", flake)
        ret = exec_cmd(
            nix_cmd("flake", "metadata", flake, "--json", impure=impure),
            raise_on_error=False,
            return_error=True,
            log_error=False,
        )
        if ret is None or ret.returncode != 0:
            LOG.debug("Failed reading flake metadata for expression: %s", flake)
            return None
        try:
            return json.loads(ret.stdout)
        except ValueError:
            LOG.debug("Failed parsing flake metadata for expression: %s", flake)
            return None

    @staticmethod
    def _normalize_local_flake_ref_for_expression(flake):
        if flake.startswith("path:"):
            path_text, separator, query = flake.removeprefix("path:").partition("?")
            path = pathlib.Path(path_text).expanduser()
            if not path.is_absolute():
                path_text = path.resolve().as_posix()
            return f"path:{path_text}{separator}{query}"
        if re.match(r"^[A-Za-z][A-Za-z0-9+.-]*:", flake or ""):
            return flake
        path = pathlib.Path(flake).expanduser()
        if path.exists() or flake.startswith((".", "/", "~")):
            return path.resolve().as_posix()
        return flake

    @classmethod
    def _nixos_pkgs_expression_cache_key(cls, flake, name, *, impure=False):
        if impure:
            return None
        stable_ref = cls._stable_flake_ref_for_expression_cache(flake)
        if not stable_ref:
            return None
        cache_parts = json.dumps([stable_ref, name], separators=(",", ":"))
        return f"nixos-pkgs:{cache_parts}"

    @staticmethod
    def _stable_flake_ref_for_expression_cache(flake):
        if flake.startswith("path:/nix/store/"):
            return flake
        if flake.startswith("/nix/store/"):
            return flake
        if re.search(r"(?:[?&])rev=", flake):
            return flake
        return None

    @staticmethod
    def _nix_eval_raw(flakeref, *, impure=False):
        LOG.debug("Evaluating nixpkgs metadata helper flakeref '%s'", flakeref)
        ret = exec_cmd(
            nix_cmd("eval", "--raw", flakeref, impure=impure),
            raise_on_error=False,
            return_error=True,
            log_error=False,
        )
        if ret is None or ret.returncode != 0:
            LOG.debug(
                "Failed evaluating nixpkgs metadata helper flakeref: %s", flakeref
            )
            return None
        return ret.stdout.strip() or None

    def resolve_explicit_source(self, meta_nixpkgs):
        """Resolve an explicit --meta-nixpkgs path or flakeref."""
        path = pathlib.Path(meta_nixpkgs)
        if path.exists():
            resolved_path = path.resolve()
            if is_nix_store_path(resolved_path):
                return nixpkgs_meta_source_with_path(
                    NixpkgsMetaSource(
                        method="explicit",
                        path=resolved_path.as_posix(),
                    ),
                )
            nixpath = self._try_normalize_mutable_path(resolved_path)
            if nixpath:
                return nixpkgs_meta_source_with_path(
                    NixpkgsMetaSource(
                        method="explicit",
                        path=nixpath.as_posix(),
                        flakeref=resolved_path.as_posix(),
                    ),
                )
            raise SbomnixError(
                "Explicit --meta-nixpkgs path must resolve to an immutable "
                f"/nix/store source before scanning: '{meta_nixpkgs}'"
            )
        try:
            nixpath = nixref_to_nixpkgs_path(meta_nixpkgs)
        except _NIXREF_RESOLUTION_EXCEPTIONS as error:
            raise SbomnixError(
                f"Failed resolving --meta-nixpkgs source: '{meta_nixpkgs}'"
            ) from error
        if not nixpath:
            raise SbomnixError(
                f"Failed resolving --meta-nixpkgs source: '{meta_nixpkgs}'"
            )
        return nixpkgs_meta_source_with_path(
            NixpkgsMetaSource(
                method="explicit",
                path=nixpath.as_posix(),
                flakeref=meta_nixpkgs,
            ),
        )

    @staticmethod
    def _try_normalize_mutable_path(path):
        try:
            nixpath = nixref_to_nixpkgs_path(path.as_posix())
        except _NIXREF_RESOLUTION_EXCEPTIONS:
            LOG.debug(
                "Failed normalizing mutable nixpkgs path: %s",
                path.as_posix(),
                exc_info=True,
            )
            return None
        if nixpath and is_nix_store_path(nixpath):
            return nixpath
        return None

    def resolve_flakeref_lock_source(self, nixref):
        """Return the nixpkgs source selected by a flakeref lock graph."""
        if nixref:
            LOG.debug("Reading nixpkgs path from nixref: %s", nixref)
            nixpath = nixref_to_nixpkgs_path(nixref)
            if nixpath:
                return nixpkgs_meta_source_with_path(
                    NixpkgsMetaSource(
                        method="flakeref-lock",
                        path=nixpath.as_posix(),
                        flakeref=nixref,
                    ),
                )
        return NixpkgsMetaSource(method="none")

    def resolve_default_source(self, nixref=None):
        """Return the metadata source for the older direct Meta API."""
        if nixref:
            return self.resolve_flakeref_lock_source(nixref)
        if "NIX_PATH" in os.environ:
            return self.resolve_nix_path_source()
        return NixpkgsMetaSource(method="none")

    def resolve_nix_path_source(self, *, message=None, required=False):
        """Return the nixpkgs source referenced by NIX_PATH."""
        LOG.debug("Reading nixpkgs path from NIX_PATH environment")
        nix_path = os.environ.get("NIX_PATH", "")
        m_nixpkgs = re.search(r"(?:^|:)nixpkgs=([^:]+)", nix_path)
        if m_nixpkgs:
            return nixpkgs_meta_source_with_path(
                NixpkgsMetaSource(
                    method="nix-path",
                    path=m_nixpkgs.group(1),
                    message=message,
                ),
            )
        if required:
            raise SbomnixError(
                "NIX_PATH does not contain a nixpkgs= entry required by "
                "--meta-nixpkgs nix-path"
            )
        return NixpkgsMetaSource(method="none")


================================================
FILE: src/sbomnix/runtime.py
================================================
#!/usr/bin/env python3

# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Runtime closure helpers based on structured Nix path-info JSON."""

import subprocess
from dataclasses import dataclass

import pandas as pd

from common import columns as cols
from common.errors import NixCommandError
from common.nix_utils import (
    NIX_PATH_INFO_JSON,
    load_nix_json,
    nix_path_info_deriver,
    nix_path_info_references,
    normalize_nix_path_info,
)
from common.proc import exec_cmd, nix_cmd
from sbomnix.closure import (
    dependency_rows_to_dataframe,
    store_path_label,
)


@dataclass(frozen=True)
class RuntimeClosure:
    """Runtime dependency edges and output-to-deriver mapping."""

    df_deps: pd.DataFrame
    output_paths_by_drv: dict[str, set[str]]


def load_runtime_closure(path):
    """Load runtime closure information using ``nix path-info`` JSON."""
    cmd = nix_cmd(
        "path-info",
        "--json",
        "--json-format",
        "1",
        "--recursive",
        path,
    )
    try:
        ret = exec_cmd(cmd)
    except subprocess.CalledProcessError as error:
        raise NixCommandError(
            cmd,
            stderr=error.stderr,
            stdout=error.stdout,
        ) from None
    return runtime_closure_from_path_info(load_nix_json(ret.stdout, NIX_PATH_INFO_JSON))


def runtime_closure_from_path_info(path_info):
    """Return runtime closure data from parsed ``nix path-info`` JSON."""
    rows = []
    output_paths_by_drv = {}
    for target_path, info in normalize_nix_path_info(path_info).items():
        deriver = nix_path_info_deriver(info, target_path)
        if deriver:
            output_paths_by_drv.setdefault(deriver, set()).add(target_path)
        for src_path in nix_path_info_references(info, target_path):
            if src_path == target_path:
                continue
            rows.append(
                {
                    cols.SRC_PATH: src_path,
                    "src_pname": store_path_label(src_path),
                    cols.TARGET_PATH: target_path,
                    "target_pname": store_path_label(target_path),
                }
            )
    return RuntimeClosure(
        df_deps=dependency_rows_to_dataframe(rows),
        output_paths_by_drv=output_paths_by_drv,
    )


================================================
FILE: src/sbomnix/vuln_enrichment.py
================================================
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""CycloneDX vulnerability enrichment helpers."""

import pathlib
from tempfile import NamedTemporaryFile
from typing import cast

import pandas as pd

from common import columns as cols
from sbomnix.cdx import _vuln_to_cdx_vuln
from vulnxscan.vulnscan import VulnScan


def enrich_cdx_with_vulnerabilities(sbomdb, cdx):
    """Add vulnerability scan results to an existing CycloneDX document."""
    scanner = VulnScan()
    scanner.scan_vulnix(_vulnix_target_path(sbomdb), sbomdb.buildtime)
    temp_cdx_path = None
    try:
        with NamedTemporaryFile(
            delete=False,
            prefix="vulnxscan_",
            suffix=".json",
        ) as outfile:
            temp_cdx_path = outfile.name
            sbomdb.write_json(temp_cdx_path, cdx, printinfo=False)
        scanner.scan_grype(temp_cdx_path)
        scanner.scan_osv(temp_cdx_path)
    finally:
        if temp_cdx_path is not None:
            pathlib.Path(temp_cdx_path).unlink(missing_ok=True)

    cdx["vulnerabilities"] = []
    vuln_frames = [
        df
        for df in [scanner.df_grype, scanner.df_osv, scanner.df_vulnix]
        if df is not None
    ]
    if not vuln_frames:
        return cdx
    df_vulns = pd.concat(vuln_frames, ignore_index=True)
    if df_vulns.empty:
        return cdx
    if cols.MODIFIED in df_vulns.columns:
        df_vulns = df_vulns.drop(cols.MODIFIED, axis=1)
    vuln_grouped = cast(
        pd.DataFrame,
        df_vulns.groupby(
            [cols.PACKAGE, cols.VERSION, cols.SEVERITY, cols.VULN_ID],
            as_index=False,
        ).agg({cols.SCANNER: pd.Series.unique}),
    )
    vuln_components = pd.merge(
        left=vuln_grouped,
        right=sbomdb.df_sbomdb,
        how="inner",
        left_on=[cols.PACKAGE, cols.VERSION],
        right_on=[cols.PNAME, cols.VERSION],
    )
    for vuln in vuln_components.itertuples():
        cdx["vulnerabilities"].append(_vuln_to_cdx_vuln(vuln))
    return cdx


def _vulnix_target_path(sbomdb):
    """Return the target path to use for vulnix scans."""
    if sbomdb.buildtime:
        return sbomdb.target_deriver
    return sbomdb.nix_path


================================================
FILE: src/vulnxscan/__init__.py
================================================
# SPDX-FileCopyrightText: 2022 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0


================================================
FILE: src/vulnxscan/github_prs.py
================================================
#!/usr/bin/env python3

# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""GitHub PR search helpers for vulnerability triage."""

import json
import time
import urllib.parse

from common.http import create_cached_limited_session
from common.log import LOG, LOG_SPAM

GITHUB_API_CACHE_SECONDS = 6 * 60 * 60
GITHUB_API_REQUEST_TIMEOUT = 60
GITHUB_API_USER_AGENT = "sbomnix-github-prs/0 (https://github.com/tiiuae/sbomnix/)"


def append_search_results(prs, result, max_results=5):
    """Append GitHub issue search result URLs to ``result``."""
    for item in prs["items"]:
        if len(result) >= max_results:
            LOG.log(
                LOG_SPAM,
                "More than %s PRs, skipping: %s",
                max_results,
                item["html_url"],
            )
            continue
        result.add(item["html_url"])
    return result


class GitHubPrLookup:
    """Search likely nixpkgs PRs related to a vulnerability."""

    def __init__(
        self,
        session=None,
        sleeper=None,
        request_timeout=GITHUB_API_REQUEST_TIMEOUT,
    ):
        self.session = (
            create_cached_limited_session(
                per_minute=9,
                per_second=1,
                expire_after=GITHUB_API_CACHE_SECONDS,
                user_agent=GITHUB_API_USER_AGENT,
            )
            if session is None
            else session
        )
        self.sleeper = time.sleep if sleeper is None else sleeper
        self.request_timeout = request_timeout

    def query(self, query_str, delay=60):
        """Query the GitHub issues search API."""
        query_str_quoted = urllib.parse.quote(query_str, safe=":/")
        query = f"https://api.github.com/search/issues?q={query_str_quoted}"
        LOG.debug("GET: %s", query)
        resp = self.session.get(query, timeout=self.request_timeout)
        if not resp.ok and "rate limit exceeded" in resp.text:
            max_delay = 60
            if delay > max_delay:
                LOG.warning("Rate limit exceeded requesting %s", query)
                return {"items": []}
            LOG.debug("Sleeping %s seconds before re-requesting", delay)
            self.sleeper(delay)
            LOG.debug("Re-requesting")
            return self.query(query_str, delay * 2)
        resp.raise_for_status()
        resp_json = json.loads(resp.text)
        LOG.log(LOG_SPAM, "total_count=%s", resp_json["total_count"])
        return resp_json

    def find_nixpkgs_prs(self, row):
        """Return likely nixpkgs PR URLs for a vulnerable package row."""
        if hasattr(row, "whitelist") and row.whitelist:
            LOG.log(LOG_SPAM, "Whitelisted, skipping PR query: %s", row)
            return ""
        nixpr = "repo:NixOS/nixpkgs is:pr"
        unmerged = "is:unmerged is:open"
        merged = "is:merged"
        version = None
        result = set()
        append_search_results(self.query(f"{nixpr} {unmerged} {row.vuln_id}"), result)
        append_search_results(self.query(f"{nixpr} {merged} {row.vuln_id}"), result)
        if row.classify == "fix_update_to_version_nixpkgs":
            version = row.version_nixpkgs
        elif row.classify == "fix_update_to_version_upstream":
            version = row.version_upstream
        if version:
            pkg = row.package
            append_search_results(
                self.query(f"{nixpr} {unmerged} {pkg} in:title {version} in:title"),
                result,
            )
            append_search_results(
                self.query(f"{nixpr} {merged} {pkg} in:title {version} in:title"),
                result,
            )
        return " \n".join(sorted(result))


================================================
FILE: src/vulnxscan/osv.py
================================================
#!/usr/bin/env python3

# SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Demonstrate querying OSV db for vulnerabilities based on cdx SBOM"""

import argparse
import os
import pathlib

from common.cli_args import add_verbose_argument, add_version_argument
from common.df import df_to_csv_file
from common.errors import InvalidSbomError, SbomnixError
from common.log import LOG, set_log_verbosity
from vulnxscan.osv_client import OSV

###############################################################################


def getargs(args=None):
    """Parse command line arguments"""
    desc = "Scan CycloneDX SBOM components for OSV vulnerabilities"
    epil = f"Example: ./{os.path.basename(__file__)} /path/to/sbom.json"
    parser = argparse.ArgumentParser(description=desc, epilog=epil)
    add_verbose_argument(parser)
    helps = "Path to CycloneDX SBOM json file"
    parser.add_argument("SBOM", help=helps, type=pathlib.Path)
    helps = "Path to output file (default: ./osv.csv)"
    parser.add_argument("-o", "--out", nargs="?", help=helps, default="osv.csv")
    helps = (
        'List of ecosystems to query (default: "GIT,OSS-Fuzz"). '
        "For more details, see https://osv.dev"
    )
    parser.add_argument("--ecosystems", type=str, help=helps, default="GIT,OSS-Fuzz")
    add_version_argument(parser)
    return parser.parse_args(args)


def _run(args):
    if not args.SBOM.exists():
        raise InvalidSbomError(args.SBOM)
    osv = OSV()
    ecosystems = [str(x).strip() for x in args.ecosystems.split(",")]
    osv.query_vulns(args.SBOM.as_posix(), ecosystems)
    df_vulns = osv.to_dataframe()
    df_to_csv_file(df_vulns, args.out)


def main():
    """main entry point"""
    args = getargs()
    set_log_verbosity(args.verbose)
    try:
        _run(args)
    except SbomnixError as error:
        LOG.fatal("%s", error)
        raise SystemExit(1) from error


################################################################################

if __name__ == "__main__":
    main()

################################################################################


================================================
FILE: src/vulnxscan/osv_client.py
================================================
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Reusable OSV querying helpers."""

import json

import pandas as pd

from common import columns as cols
from common.http import create_cached_limited_session
from common.log import LOG, LOG_SPAM

OSV_CACHE_SECONDS = 6 * 60 * 60
OSV_QUERY_URL = "https://api.osv.dev/v1/querybatch"
OSV_REQUEST_TIMEOUT = 60
OSV_USER_AGENT = "sbomnix-osv/0 (https://github.com/tiiuae/sbomnix/)"


def create_osv_session():
    """Return a retrying HTTP session for OSV requests."""
    return create_cached_limited_session(
        per_second=1,
        expire_after=OSV_CACHE_SECONDS,
        user_agent=OSV_USER_AGENT,
        allowed_methods=frozenset(("GET", "HEAD", "POST")),
    )


class OSV:
    """Query and parse OSV vulnerability data."""

    def __init__(self, session=None, request_timeout=OSV_REQUEST_TIMEOUT):
        self.session = create_osv_session() if session is None else session
        self.request_timeout = request_timeout
        self.vulns_dict = {}

    def _parse_vulns(self, package, vulns):
        setcol = self.vulns_dict.setdefault
        for vuln in vulns["vulns"]:
            setcol(cols.VULN_ID, []).append(vuln["id"])
            setcol(cols.MODIFIED, []).append(vuln["modified"])
            setcol(cols.PACKAGE, []).append(package["package"]["name"])
            setcol(cols.VERSION, []).append(package["version"])

    def _parse_batch_response(self, query, results):
        # Preserve the previous tolerant behavior if the API returns fewer results.
        for package, vulns in zip(query["queries"], results, strict=False):
            if not package or not vulns:
                continue
            LOG.debug("package: %s", package)
            LOG.debug("vulns: %s", vulns)
            if "vulns" not in vulns:
                continue
            self._parse_vulns(package, vulns)

    def _post_batch_query(self, query):
        LOG.log(LOG_SPAM, "query: %s", query)
        LOG.log(LOG_SPAM, "sending request to '%s'", OSV_QUERY_URL)
        resp = self.session.post(
            OSV_QUERY_URL,
            json=query,
            timeout=self.request_timeout,
        )
        LOG.debug("resp.status_code: %s", resp.status_code)
        LOG.log(LOG_SPAM, "resp.json: %s", resp.json())
        resp.raise_for_status()
        payload = resp.json()
        self._parse_batch_response(query, payload.get("results", []))

    def _parse_sbom(self, path):
        LOG.debug("Parsing sbom: %s", path)
        with open(path, encoding="utf-8") as inf:
            json_dict = json.loads(inf.read())
        components = json_dict["components"] + [json_dict["metadata"]["component"]]
        components_dict = {}
        setcol = components_dict.setdefault
        for component in components:
            setcol(cols.NAME, []).append(component["name"])
            setcol(cols.VERSION, []).append(component["version"])
        df_components = pd.DataFrame(components_dict)
        df_components.fillna("", inplace=True)
        df_components = df_components.astype(str)
        df_components.sort_values(
            cols.NAME,
            inplace=True,
            key=lambda col: col.str.lower(),
        )
        df_components.reset_index(drop=True, inplace=True)
        return df_components

    def query_vulns(self, sbom_path, ecosystems=None):
        """Query each package in an SBOM for OSV vulnerabilities."""
        LOG.verbose("Querying vulnerabilities")
        df_sbom = self._parse_sbom(sbom_path)
        max_queries = 1000
        batchquery = {"queries": []}
        if ecosystems is None:
            ecosystems = ["GIT", "OSS-Fuzz"]
        for component in df_sbom.to_dict("records"):
            name = component[cols.NAME]
            version = component.get(cols.VERSION, "")
            if not version:
                LOG.debug("skipping osv query (unknown version): %s", name)
                continue
            for ecosystem in ecosystems:
                query = {
                    "version": version,
                    "package": {
                        "name": name,
                        "ecosystem": ecosystem,
                    },
                }
                batchquery["queries"].append(query)
                if len(batchquery["queries"]) >= max_queries:
                    self._post_batch_query(batchquery)
                    batchquery["queries"] = []
        if batchquery["queries"]:
            self._post_batch_query(batchquery)

    def to_dataframe(self):
        """Return found vulnerabilities as a pandas dataframe."""
        return pd.DataFrame.from_dict(self.vulns_dict)


================================================
FILE: src/vulnxscan/parsers.py
================================================
#!/usr/bin/env python3

# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Parsing helpers for scanner output formats."""

import json

import numpy as np
import pandas as pd

from common import columns as cols
from common.log import LOG, LOG_SPAM


def _severity_from_cache(cvss_cache, vuln_id):
    if cvss_cache is None:
        return ""
    return cvss_cache.get(vuln_id, "")


def parse_vulnix_json(json_str, *, cvss_cache=None, log=LOG):
    """Parse vulnix JSON output into a normalized dataframe."""
    vulnerable_packages = json.loads(json_str)
    vulnix_vulns_dict = {}
    setcol = vulnix_vulns_dict.setdefault
    for package in vulnerable_packages:
        cvss = package["cvssv3_basescore"]
        for cve in package["affected_by"]:
            severity = _severity_from_cache(cvss_cache, cve)
            if not severity and cve in cvss:
                severity = cvss[cve]
                if cvss_cache is not None:
                    cvss_cache[cve] = severity
            setcol(cols.PACKAGE, []).append(package["pname"])
            setcol(cols.VERSION, []).append(package["version"])
            setcol(cols.VULN_ID, []).append(cve)
            setcol(cols.SEVERITY, []).append(severity)
            setcol(cols.SCANNER, []).append("vulnix")
    df_vulnix = pd.DataFrame.from_dict(vulnix_vulns_dict)
    if not df_vulnix.empty:
        log.debug("Vulnix found vulnerabilities")
        df_vulnix.replace(np.nan, "", regex=True, inplace=True)
        df_vulnix.drop_duplicates(keep="first", inplace=True)
    return df_vulnix


def parse_grype_json(json_str, *, cvss_cache=None, log=LOG, log_spam=LOG_SPAM):
    """Parse grype JSON output into a normalized dataframe."""
    vulnerabilities = json.loads(json_str)
    log.log(log_spam, json.dumps(vulnerabilities, indent=2))
    grype_vulns_dict = {}
    setcol = grype_vulns_dict.setdefault
    for vuln in vulnerabilities["matches"]:
        if not vuln["artifact"]["version"]:
            log.log(
                log_spam,
                "'%s' missing version information: skipping",
                vuln["artifact"]["name"],
            )
            continue
        vid = vuln["vulnerability"]["id"]
        severity = _severity_from_cache(cvss_cache, vid)
        if not severity and vuln["vulnerability"]["cvss"]:
            for cvss in vuln["vulnerability"]["cvss"]:
                if float(cvss["version"]) >= 3:
                    log.log(log_spam, "selected cvss: %s", cvss)
                    severity = cvss["metrics"]["baseScore"]
                    if cvss_cache is not None:
                        cvss_cache[vid] = severity
                    break
        setcol(cols.PACKAGE, []).append(vuln["artifact"]["name"])
        setcol(cols.VERSION, []).append(vuln["artifact"]["version"])
        setcol(cols.VULN_ID, []).append(vuln["vulnerability"]["id"])
        setcol(cols.SEVERITY, []).append(severity)
        setcol(cols.SCANNER, []).append("grype")
    df_grype = pd.DataFrame.from_dict(grype_vulns_dict)
    if not df_grype.empty:
        log.debug("Grype found vulnerabilities")
        df_grype.replace(np.nan, "", regex=True, inplace=True)
        df_grype.drop_duplicates(keep="first", inplace=True)
    return df_grype


def normalize_osv_dataframe(df_osv, *, cvss_cache=None, log=LOG, log_spam=LOG_SPAM):
    """Normalize OSV query results into vulnxscan's dataframe shape."""
    if df_osv is None:
        return pd.DataFrame()
    df_osv = df_osv.copy(deep=True)
    if not df_osv.empty:
        df_osv[cols.SCANNER] = "osv"
        df_osv.replace(np.nan, "", regex=True, inplace=True)
        df_osv.drop_duplicates(keep="first", inplace=True)
        df_osv[cols.MODIFIED] = pd.to_datetime(
            df_osv[cols.MODIFIED],
            format="%Y-%m-%d",
            exact=False,
        )
        df_osv[cols.SEVERITY] = df_osv[cols.VULN_ID].apply(
            lambda vuln_id: _severity_from_cache(cvss_cache, vuln_id)
        )
        log.log(log_spam, "osv data:\n%s", df_osv.to_markdown())
        log.debug("OSV scan found vulnerabilities")
    return df_osv


================================================
FILE: src/vulnxscan/repology_lookup.py
================================================
#!/usr/bin/env python3

# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Repology-backed lookup helpers for vulnerability triage."""

from pathlib import Path

import pandas as pd

from common import columns as cols
from common.df import df_log
from common.log import LOG, LOG_SPAM
from common.package_names import nix_to_repology_pkg_name
from common.versioning import version_distance
from repology.adapter import RepologyAdapter, RepologyQuery
from repology.exceptions import RepologyNoMatchingPackages
from repology.repology_cve import query_cve


def select_newest(df):
    """Return the newest rows per package."""
    selected = []
    for pkg_name in df[cols.PACKAGE].unique():
        df_pkg = df[df[cols.PACKAGE] == str(pkg_name)]
        df_newest = df_pkg[df_pkg[cols.STATUS] == "newest"]
        if df_newest.empty:
            df_newest = df_pkg.sort_values(by=[cols.VERSION]).iloc[[-1]]
        selected.append(df_newest)
    if not selected:
        return pd.DataFrame()
    return pd.concat(selected, ignore_index=True)


def _add_triage_item(out_dict, vuln, whitelist_cols, df_repo=None):
    if df_repo is None:
        out_dict.setdefault(cols.VULN_ID, []).append(vuln.vuln_id)
        out_dict.setdefault(cols.URL, []).append(vuln.url)
        out_dict.setdefault(cols.PACKAGE, []).append(vuln.package)
        out_dict.setdefault(cols.SEVERITY, []).append(vuln.severity)
        out_dict.setdefault(cols.VERSION_LOCAL, []).append(vuln.version)
        out_dict.setdefault(cols.VERSION_NIXPKGS, []).append("")
        out_dict.setdefault(cols.VERSION_UPSTREAM, []).append("")
        out_dict.setdefault(cols.PACKAGE_REPOLOGY, []).append("")
        out_dict.setdefault(cols.SORTCOL, []).append(vuln.sortcol)
        if whitelist_cols:
            out_dict.setdefault(cols.WHITELIST, []).append(vuln.whitelist)
            out_dict.setdefault(cols.WHITELIST_COMMENT, []).append(
                vuln.whitelist_comment
            )
        return
    for item in df_repo.itertuples():
        out_dict.setdefault(cols.VULN_ID, []).append(vuln.vuln_id)
        out_dict.setdefault(cols.URL, []).append(vuln.url)
        out_dict.setdefault(cols.PACKAGE, []).append(vuln.package)
        out_dict.setdefault(cols.SEVERITY, []).append(vuln.severity)
        out_dict.setdefault(cols.VERSION_LOCAL, []).append(vuln.version)
        out_dict.setdefault(cols.VERSION_NIXPKGS, []).append(item.version)
        if item.newest_upstream_release and ";" in item.newest_upstream_release:
            version_upstream_str = item.newest_upstream_release.split(";")[0]
        else:
            version_upstream_str = item.newest_upstream_release
        out_dict.setdefault(cols.VERSION_UPSTREAM, []).append(version_upstream_str)
        out_dict.setdefault(cols.PACKAGE_REPOLOGY, []).append(item.package)
        out_dict.setdefault(cols.SORTCOL, []).append(vuln.sortcol)
        if whitelist_cols:
            out_dict.setdefault(cols.WHITELIST, []).append(vuln.whitelist)
            out_dict.setdefault(cols.WHITELIST_COMMENT, []).append(
                vuln.whitelist_comment
            )


def _version_similarity(row):
    ratio = version_distance(row.version, row.version_cmp)
    LOG.log(
        LOG_SPAM,
        "Version similarity ('%s' vs '%s' ==> %s)",
        row.version,
        row.version_cmp,
        ratio,
    )
    return ratio


class RepologyVulnerabilityLookup:
    """Cache and query Repology/CVE data used by triage."""

    def __init__(self, adapter=None, cve_query=None):
        self.adapter = RepologyAdapter() if adapter is None else adapter
        self.cve_query = query_cve if cve_query is None else cve_query
        self._repology_cve_dfs = {}
        self._repology_dfs = {}

    def is_vulnerable(self, repo_pkg_name, pkg_version, cve_id=None):
        """
        Return true if given pkg version is vulnerable. If ``cve_id`` is
        specified, return true only if pkg is affected by the given cve id.
        """
        LOG.debug("Finding vulnerability status for %s:%s", repo_pkg_name, pkg_version)
        key = f"{repo_pkg_name}:{pkg_version}"
        if key in self._repology_cve_dfs:
            LOG.log(LOG_SPAM, "Using cached repology_cve results")
            df = self._repology_cve_dfs[key]
        else:
            df = self.cve_query(str(repo_pkg_name), str(pkg_version))
            if df is None:
                df = pd.DataFrame()
            df_log(df, LOG_SPAM)
            self._repology_cve_dfs[key] = df
        if cve_id and not df.empty:
            df = df[df["cve"] == cve_id]
        return not df.empty

    def query_repology(self, pname, match_type="pkg_exact"):
        """Return cached Repology results for a package name."""
        LOG.log(LOG_SPAM, "Querying repology for '%s'", pname)
        cache_key = f"{match_type}:{pname}"
        if cache_key in self._repology_dfs:
            LOG.log(LOG_SPAM, "Using cached repology results")
            return self._repology_dfs[cache_key].copy(deep=True)
        if match_type == "pkg_search":
            query = RepologyQuery(
                repository="nix_unstable",
                pkg_search=pname,
                re_status="outdated|newest|devel|unique",
            )
        elif match_type == "sbom_cdx":
            query = RepologyQuery(
                repository="nix_unstable",
                sbom_cdx=Path(pname),
                re_status="outdated|newest|devel|unique",
            )
        elif match_type == "pkg_exact":
            query = RepologyQuery(
                repository="nix_unstable",
                pkg_exact=pname,
                re_status="outdated|newest|devel|unique",
            )
        else:
            raise ValueError(f"Unknown match_type: {match_type!r}")
        try:
            df_repology = self.adapter.query(query)
        except RepologyNoMatchingPackages:
            df_repology = None
        if df_repology is None or df_repology.empty:
            LOG.debug("No results from repology")
            return None
        df_repology = select_newest(df_repology)
        self._repology_dfs[cache_key] = df_repology.copy(deep=True)
        df_log(df_repology, LOG_SPAM)
        return df_repology

    def query_repology_versions(self, df_vuln_pkgs):
        """Augment vulnerable package rows with Repology version data."""
        LOG.verbose("Querying repology")
        result_dict = {}
        whitelist_cols = cols.WHITELIST in df_vuln_pkgs.columns
        for vuln in df_vuln_pkgs.itertuples():
            if whitelist_cols and vuln.whitelist:
                LOG.log(LOG_SPAM, "Whitelisted, skipping repology query: %s", vuln)
                _add_triage_item(result_dict, vuln, whitelist_cols)
                continue
            repo_pkg = nix_to_repology_pkg_name(vuln.package)
            LOG.log(LOG_SPAM, "Package '%s' ==> '%s'", vuln.package, repo_pkg)
            df_repology = self.query_repology(repo_pkg)
            if df_repology is None or df_repology.empty:
                _add_triage_item(result_dict, vuln, whitelist_cols)
                continue
            if df_repology.shape[0] == 1:
                LOG.log(LOG_SPAM, "One repology package matches")
                _add_triage_item(result_dict, vuln, whitelist_cols, df_repology)
                continue
            df_exact = df_repology[df_repology[cols.VERSION] == vuln.version]
            if not df_exact.empty:
                LOG.log(LOG_SPAM, "Exact version match '%s'", vuln.version)
                _add_triage_item(result_dict, vuln, whitelist_cols, df_exact)
                continue
            df_repology = df_repology.copy(deep=True)
            df_repology[cols.VERSION_CMP] = vuln.version
            df_repology[cols.SIMILARITY] = df_repology.apply(
                _version_similarity,
                axis=1,
            )
            df_similar = df_repology[df_repology[cols.SIMILARITY] >= 0.7]
            if not df_similar.empty:
                LOG.log(LOG_SPAM, "Version similarity match:\n%s", df_similar)
                best_match = df_similar[cols.SIMILARITY].max()
                df_similar = df_similar[df_similar[cols.SIMILARITY] == best_match]
                LOG.log(
                    LOG_SPAM,
                    "Selecting best match based on version:\n%s",
                    df_similar,
                )
                _add_triage_item(result_dict, vuln, whitelist_cols, df_similar)
                continue
            LOG.log(LOG_SPAM, "Vague match in repology pkg, adding vuln only")
            _add_triage_item(result_dict, vuln, whitelist_cols)
        df_result = pd.DataFrame(result_dict)
        df_result.fillna("", inplace=True)
        df_result.reset_index(drop=True, inplace=True)
        return df_result


================================================
FILE: src/vulnxscan/reporting.py
================================================
#!/usr/bin/env python3

# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Reporting helpers for vulnxscan findings."""

import pathlib
from typing import cast

import pandas as pd
from tabulate import tabulate

from common import columns as cols
from common.df import df_from_csv_file, df_to_csv_file
from common.log import LOG, LOG_VERBOSE
from vulnxscan.utils import _is_patched, _reformat_scanner, _vuln_sortcol, _vuln_url
from vulnxscan.whitelist import df_apply_whitelist, df_drop_whitelisted, load_whitelist


def build_report_dataframe(df_vulnix, df_grype, df_osv, *, log=LOG):
    """Combine scanner findings into the final report dataframe."""
    scanner_dfs = [df for df in [df_vulnix, df_grype, df_osv] if df is not None]
    if not scanner_dfs:
        log.debug("No scanners reported any findings")
        return pd.DataFrame()
    df = pd.concat(scanner_dfs, ignore_index=True)
    if df.empty:
        log.debug("No scanners reported any findings")
        return pd.DataFrame()
    if cols.MODIFIED not in df.columns:
        df[cols.MODIFIED] = pd.NaT
    df[cols.SORTCOL] = df.apply(_vuln_sortcol, axis=1)
    df[cols.COUNT] = 1
    group_cols = [
        cols.VULN_ID,
        cols.PACKAGE,
        cols.SEVERITY,
        cols.VERSION,
        cols.SORTCOL,
    ]
    df = df.pivot_table(index=group_cols, columns=cols.SCANNER, values=cols.COUNT)
    df.reset_index(drop=False, inplace=True)
    scanners = ["grype", "osv"]
    if df_vulnix is not None:
        scanners.append("vulnix")
    df.reindex(group_cols + scanners, axis=1)
    for scanner_col in scanners:
        if scanner_col not in df:
            df[scanner_col] = 0
    df[cols.SUM] = df[scanners].sum(axis=1).astype(int)
    df["grype"] = df.apply(lambda row: _reformat_scanner(row.grype), axis=1)
    df["osv"] = df.apply(lambda row: _reformat_scanner(row.osv), axis=1)
    if "vulnix" in scanners:
        df["vulnix"] = df.apply(lambda row: _reformat_scanner(row.vulnix), axis=1)
    df[cols.URL] = df.apply(_vuln_url, axis=1)
    sort_cols = [cols.SORTCOL, cols.PACKAGE, cols.SEVERITY, cols.VERSION]
    df.sort_values(by=sort_cols, ascending=False, inplace=True)
    report_cols = (
        [cols.VULN_ID, cols.URL, cols.PACKAGE, cols.VERSION, cols.SEVERITY]
        + scanners
        + [cols.SUM, cols.SORTCOL]
    )
    return df[report_cols]


def filter_patched_report(df_report, sbom_csv, *, log=LOG):
    """Filter out vulnerabilities that are marked as patched in the SBOM CSV."""
    log.log(LOG_VERBOSE, "Filtering patched vulnerabilities")
    df_sbom_csv = df_from_csv_file(sbom_csv)
    df = pd.merge(
        left=df_report,
        right=df_sbom_csv,
        how="left",
        left_on=[cols.PACKAGE, cols.VERSION],
        right_on=[cols.PNAME, cols.VERSION],
        suffixes=("", "_sbom_csv"),
    )
    df[cols.PATCHED] = df.apply(_is_patched, axis=1)
    df = df[~df[cols.PATCHED]]
    df = cast(pd.DataFrame, df[list(df_report.columns)])
    return df.drop_duplicates(keep="first")


def apply_whitelist_annotations(df_report, whitelist_csv):
    """Apply whitelist annotations in-place when a whitelist is provided."""
    if whitelist_csv is None:
        return
    df_whitelist = load_whitelist(whitelist_csv)
    if df_whitelist is None:
        return
    df_apply_whitelist(df_whitelist, df_report)


def render_console_report(df_report, *, df_triaged=None, log=LOG):
    """Render the console report for the final vulnerability dataframe."""
    log.debug("")
    if df_triaged is not None:
        df = df_triaged.copy()
        if cols.PACKAGE_REPOLOGY in df:
            df = df.drop(cols.PACKAGE_REPOLOGY, axis=1)
    else:
        df = df_report.copy()
    df = df.drop(cols.SORTCOL, axis=1)
    df = df_drop_whitelisted(df)
    if df.empty:
        log.info("Whitelisted all vulnerabilities")
        return
    version_cols = [col for col in df.columns if "version" in col]
    for col in version_cols:
        df[col] = df[col].str.slice(0, 16)
    table = tabulate(
        df,
        headers="keys",
        tablefmt="orgtbl",
        numalign="center",
        showindex=False,
    )
    log.info(
        "Console report\n\n"
        "Potential vulnerabilities impacting version_local: "
        "\n\n%s\n\n",
        table,
    )


def write_reports(df_report, out_path, *, df_triaged=None):
    """Write the main CSV report and optional triage report."""
    out_path = pathlib.Path(out_path)
    df_to_csv_file(df_report, out_path.resolve().as_posix())
    if df_triaged is not None:
        parents = out_path.parents[0].resolve().as_posix()
        triage_out = f"{parents}/{out_path.stem}.triage{out_path.suffix}"
        df_to_csv_file(df_triaged, triage_out)


================================================
FILE: src/vulnxscan/scanners.py
================================================
#!/usr/bin/env python3

# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Scanner command execution helpers for vulnxscan."""

from common.log import LOG, LOG_VERBOSE
from common.proc import exec_cmd
from vulnxscan.osv_client import OSV


def run_vulnix_scan(target_path, buildtime=False, *, exec_cmd_fn=exec_cmd, log=LOG):
    """Run vulnix and return its process result."""
    log.log(LOG_VERBOSE, "Running vulnix scan")
    extra_opts = ["-C", "--json"]
    if buildtime:
        extra_opts = ["--json"]
    cmd = ["vulnix", target_path, *extra_opts]
    return exec_cmd_fn(
        cmd,
        raise_on_error=False,
        return_error=True,
        log_error=False,
    )


def run_grype_scan(sbom_path, *, exec_cmd_fn=exec_cmd, log=LOG):
    """Run grype against the given CycloneDX SBOM path."""
    log.log(LOG_VERBOSE, "Running grype scan")
    cmd = ["grype", f"sbom:{sbom_path}", "--add-cpes-if-none", "--output", "json"]
    return exec_cmd_fn(cmd)


def run_osv_scan(sbom_path, *, osv_factory=OSV, log=LOG):
    """Run OSV queries for the given CycloneDX SBOM path."""
    log.log(LOG_VERBOSE, "Running OSV scan")
    osv = osv_factory()
    osv.query_vulns(sbom_path)
    return osv.to_dataframe()


================================================
FILE: src/vulnxscan/triage.py
================================================
#!/usr/bin/env python3

# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Vulnerability triage helpers."""

from common import columns as cols
from common.df import df_log
from common.log import LOG, LOG_SPAM
from common.versioning import parse_version
from vulnxscan.github_prs import GitHubPrLookup
from vulnxscan.repology_lookup import RepologyVulnerabilityLookup

_DEFAULT_REPOLOGY_LOOKUP = None
_DEFAULT_GITHUB_PR_LOOKUP = None


def _get_default_repology_lookup():
    global _DEFAULT_REPOLOGY_LOOKUP  # noqa: PLW0603
    if _DEFAULT_REPOLOGY_LOOKUP is None:
        _DEFAULT_REPOLOGY_LOOKUP = RepologyVulnerabilityLookup()
    return _DEFAULT_REPOLOGY_LOOKUP


def _get_default_github_lookup():
    global _DEFAULT_GITHUB_PR_LOOKUP  # noqa: PLW0603
    if _DEFAULT_GITHUB_PR_LOOKUP is None:
        _DEFAULT_GITHUB_PR_LOOKUP = GitHubPrLookup()
    return _DEFAULT_GITHUB_PR_LOOKUP


def classify_vulnerability(row, repology_lookup=None):  # noqa: PLR0911
    """Classify a vulnerable package row using Repology/CVE data."""
    repology_lookup = (
        _get_default_repology_lookup() if repology_lookup is None else repology_lookup
    )
    if not row.version_nixpkgs and not row.version_upstream:
        return "err_missing_repology_version"
    if row.version_local and not repology_lookup.is_vulnerable(
        row.package_repology, row.version_local, row.vuln_id
    ):
        return "err_not_vulnerable_based_on_repology"
    version_local = parse_version(row.version_local)
    version_nixpkgs = parse_version(row.version_nixpkgs)
    if not version_local or not version_nixpkgs:
        return "err_invalid_version"
    if row.version_nixpkgs and version_local < version_nixpkgs:
        if not repology_lookup.is_vulnerable(
            row.package_repology, row.version_nixpkgs, row.vuln_id
        ):
            return "fix_update_to_version_nixpkgs"
    version_upstream = parse_version(row.version_upstream)
    if not version_upstream:
        return "err_invalid_version"
    if row.version_upstream and version_local < version_upstream:
        if not repology_lookup.is_vulnerable(
            row.package_repology, version_upstream, row.vuln_id
        ):
            return "fix_update_to_version_upstream"
    return "fix_not_available"


def triage_vulnerabilities(
    df_report,
    search_nix_prs,
    repology_lookup=None,
    github_lookup=None,
):
    """Enrich and classify a vulnerability report."""
    repology_lookup = (
        _get_default_repology_lookup() if repology_lookup is None else repology_lookup
    )
    github_lookup = (
        _get_default_github_lookup() if github_lookup is None else github_lookup
    )
    LOG.debug("")
    df = df_report.copy()
    uids = [
        cols.VULN_ID,
        cols.PACKAGE,
        cols.SEVERITY,
        cols.VERSION,
        cols.URL,
        cols.SORTCOL,
    ]
    if cols.WHITELIST in df.columns:
        uids.append(cols.WHITELIST)
        uids.append(cols.WHITELIST_COMMENT)
    df_vuln_pkgs = df.groupby(by=uids).size().reset_index(name=cols.COUNT)
    LOG.debug("Number of vulnerable packages: %s", df_vuln_pkgs.shape[0])
    if df_vuln_pkgs.empty:
        return df_vuln_pkgs
    df_log(df_vuln_pkgs, LOG_SPAM)
    df_vuln_pkgs = repology_lookup.query_repology_versions(df_vuln_pkgs)
    LOG.debug("Vulnerable pkgs with repology version info: %s", df_vuln_pkgs.shape[0])
    df_log(df_vuln_pkgs, LOG_SPAM)
    df_vuln_pkgs[cols.CLASSIFY] = df_vuln_pkgs.apply(
        lambda row: classify_vulnerability(row, repology_lookup=repology_lookup),
        axis=1,
    )
    if search_nix_prs:
        LOG.verbose("Querying nixpkgs github PRs")
        df_vuln_pkgs[cols.NIXPKGS_PR] = df_vuln_pkgs.apply(
            github_lookup.find_nixpkgs_prs,
            axis=1,
        )
    sort_cols = [cols.SORTCOL, cols.PACKAGE, cols.SEVERITY, cols.VERSION_LOCAL]
    df_vuln_pkgs.sort_values(by=sort_cols, ascending=False, inplace=True)
    return df_vuln_pkgs


================================================
FILE: src/vulnxscan/utils.py
================================================
#!/usr/bin/env python3

# SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Shared report and file helpers for vulnxscan."""

import json
import re

import pandas as pd

from common.log import LOG

################################################################################


def _reformat_scanner(val):
    if val and not pd.isnull(val):
        return "1"
    return "0"


def _vuln_sortcol(row):
    # Return a string that should make the vulns we want to see high
    # on the report list to bubble up when sorted in ascending order based
    # on the returned string
    match = re.match(r".*[A-Za-z][-_]([1-2][0-9]{3})[-_]([0-9]+).*", row.vuln_id)
    if match:
        year = match.group(1)
        number = str(match.group(2)).zfill(10)
        return f"{year}A{number}"
    if row.modified and not pd.isnull(row.modified):
        return f"{row.modified.year}A{int(row.modified.timestamp())}"
    return str(row.vuln_id)


def _vuln_url(row):
    osv_url = "https://osv.dev/"
    nvd_url = "https://nvd.nist.gov/vuln/detail/"
    if row.vuln_id.lower().startswith("cve"):
        return f"{nvd_url}{row.vuln_id}"
    if getattr(row, "osv", False) or ("osv" in getattr(row, "scanner", [])):
        return f"{osv_url}{row.vuln_id}"
    return ""


def _vuln_source(row):
    if row.vuln_id.lower().startswith("cve"):
        return "NVD"
    if getattr(row, "osv", False) or ("osv" in getattr(row, "scanner", [])):
        return "OSV"
    return ""


def _is_patched(row):
    if row.vuln_id and str(row.vuln_id).lower() in str(row.patches).lower():
        patches = row.patches.split()
        patch = [p for p in patches if str(row.vuln_id).lower() in str(p).lower()]
        LOG.info("%s for '%s' is patched with: %s", row.vuln_id, row.package, patch)
        return True
    return False


def _is_json(path):
    try:
        with open(path, encoding="utf-8") as f:
            json_obj = json.load(f)
            if json_obj:
                return True
            return False
    except (json.JSONDecodeError, OSError, UnicodeError):
        return False


================================================
FILE: src/vulnxscan/vulnscan.py
================================================
#!/usr/bin/env python3

# SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""
VulnScan abstracts over querying and collecting vulnerability information
from grype, vulnix, and osv databases
"""

import pandas as pd

from common import columns as cols
from common.df import df_to_csv_file
from common.log import LOG, LOG_SPAM, is_debug_enabled
from common.proc import exec_cmd
from vulnxscan import parsers as vulnxscan_parsers
from vulnxscan import reporting as vulnxscan_reporting
from vulnxscan import scanners as vulnxscan_scanners
from vulnxscan.triage import triage_vulnerabilities
from vulnxscan.utils import _vuln_sortcol


class VulnScan:
    """Run vulnerability scans, generate reports"""

    def __init__(self):
        self.df_vulnix = None
        self.df_grype = None
        self.df_osv = None
        self.df_report = None
        self.df_triaged = None
        # Key:vuln_id, value:severity
        self.cvss = {}

    def _parse_vulnix(self, json_str):
        self.df_vulnix = vulnxscan_parsers.parse_vulnix_json(
            json_str,
            cvss_cache=self.cvss,
            log=LOG,
        )
        if not self.df_vulnix.empty:
            if is_debug_enabled():
                df_to_csv_file(self.df_vulnix, "df_vulnix.csv")

    def scan_vulnix(self, target_path, buildtime=False):
        """Run vulnix scan for nix artifact at target_path"""
        self.df_vulnix = pd.DataFrame()
        ret = vulnxscan_scanners.run_vulnix_scan(
            target_path,
            buildtime=buildtime,
            exec_cmd_fn=exec_cmd,
            log=LOG,
        )
        if ret and hasattr(ret, "stderr") and ret.stderr:
            LOG.warning(ret)
            LOG.warning(ret.stderr)
            self.df_vulnix = None
        if ret and hasattr(ret, "stdout") and ret.stdout:
            self._parse_vulnix(ret.stdout)

    def _parse_grype(self, json_str):
        self.df_grype = vulnxscan_parsers.parse_grype_json(
            json_str,
            cvss_cache=self.cvss,
            log=LOG,
            log_spam=LOG_SPAM,
        )
        if not self.df_grype.empty:
            if is_debug_enabled():
                df_to_csv_file(self.df_grype, "df_grype.csv")

    def scan_grype(self, sbom_path):
        """Run grype scan using the SBOM at sbom_path as input"""
        ret = vulnxscan_scanners.run_grype_scan(
            sbom_path,
            exec_cmd_fn=exec_cmd,
            log=LOG,
        )
        if ret.stdout:
            self._parse_grype(ret.stdout)

    def _parse_osv(self, df_osv):
        self.df_osv = vulnxscan_parsers.normalize_osv_dataframe(
            df_osv,
            cvss_cache=self.cvss,
            log=LOG,
            log_spam=LOG_SPAM,
        )
        if not self.df_osv.empty:
            if is_debug_enabled():
                df_to_csv_file(self.df_osv, "df_osv.csv")

    def scan_osv(self, sbom_path):
        """Run osv scan using the SBOM at sbom_path as input"""
        df_osv = vulnxscan_scanners.run_osv_scan(sbom_path, log=LOG)
        self._parse_osv(df_osv)

    def _generate_report(self):
        self.df_report = vulnxscan_reporting.build_report_dataframe(
            self.df_vulnix,
            self.df_grype,
            self.df_osv,
            log=LOG,
        )
        if self.df_report.empty:
            self.df_report = None
            return
        if is_debug_enabled():
            df_report_raw = pd.concat(
                [
                    df
                    for df in [self.df_vulnix, self.df_grype, self.df_osv]
                    if df is not None
                ],
                ignore_index=True,
            )
            if not df_report_raw.empty:
                df_report_raw[cols.SORTCOL] = df_report_raw.apply(
                    _vuln_sortcol,
                    axis=1,
                )
                df_to_csv_file(df_report_raw, "df_report_raw.csv")

    def _filter_patched(self, sbom_csv):
        self.df_report = vulnxscan_reporting.filter_patched_report(
            self.df_report,
            sbom_csv,
            log=LOG,
        )

    def _apply_whitelist(self, whitelist_csv):
        vulnxscan_reporting.apply_whitelist_annotations(self.df_report, whitelist_csv)

    def _console_report(self):
        vulnxscan_reporting.render_console_report(
            self.df_report,
            df_triaged=self.df_triaged,
            log=LOG,
        )

    def report(self, args, sbom_csv):
        """Generate the vulnerability reports: csv file and a table to console"""
        self._generate_report()
        if self.df_report is None or self.df_report.empty:
            LOG.info("No vulnerabilities found")
            return
        if sbom_csv:
            self._filter_patched(sbom_csv)
        if args.whitelist:
            LOG.verbose("Applying whitelist '%s'", args.whitelist)
            self._apply_whitelist(args.whitelist)
        if args.triage:
            LOG.verbose("Running vulnerability triage")
            self.df_triaged = triage_vulnerabilities(self.df_report, args.nixprs)
        # Rename 'version' to 'version_local'
        self.df_report.columns = [
            cols.VERSION_LOCAL if col == cols.VERSION else col
            for col in self.df_report.columns
        ]

        LOG.debug("Writing reports")
        # Console report
        self._console_report()
        # File report
        vulnxscan_reporting.write_reports(
            self.df_report,
            args.out,
            df_triaged=self.df_triaged if args.triage else None,
        )


================================================
FILE: src/vulnxscan/vulnxscan_cli.py
================================================
#!/usr/bin/env python3

# SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""
Scan nix artifact or CycloneDX SBOM for vulnerabilities with various
open-source vulnerability scanners.
"""

import argparse
import logging
import pathlib

from common.cli_args import add_verbose_argument, add_version_argument
from common.errors import InvalidSbomError, SbomnixError
from common.log import LOG, set_log_verbosity
from common.proc import exit_unless_command_exists
from sbomnix.cli_utils import generate_temp_sbom, resolve_nix_target
from vulnxscan.utils import _is_json
from vulnxscan.vulnscan import VulnScan

###############################################################################


def getargs(args=None):
    """Parse command line arguments"""
    desc = (
        "Scan nix artifact or CycloneDX SBOM for vulnerabilities with "
        "various open-source vulnerability scanners."
    )
    epil = "Example: ./vulnxscan.py /path/to/nix/out/or/drv/or/flakeref"
    parser = argparse.ArgumentParser(description=desc, epilog=epil)
    helps = (
        "Target nix store path (e.g. derivation file or nix output path) or flakeref"
    )
    parser.add_argument("TARGET", help=helps, type=str)
    add_verbose_argument(parser)
    helps = "Path to output file (default: ./vulns.csv)"
    parser.add_argument("-o", "--out", nargs="?", help=helps, default="vulns.csv")
    helps = (
        "Scan target buildtime instead of runtime dependencies. This option "
        "has no impact if the scan target is SBOM (ref: --sbom)."
    )
    parser.add_argument("--buildtime", help=helps, action="store_true")
    helps = (
        "Indicate that TARGET is a cdx SBOM instead of path to nix artifact. "
        "This allows running vulnxscan using input SBOMs from any tool "
        "capable of generating cdx SBOM. This option makes it possible to run "
        "vulnxscan postmortem against any (potentially earlier) release of "
        "the TARGET. "
        "Moreover, this option allows using vulnxscan against non-nix targets "
        "as long as SBOM includes valid CPE identifiers and purls. "
        "If this option is specified, vulnix scan will not run, since vulnix "
        "is nix-only and requires components' nix store paths. "
        "Also, if this option is specified, option '--buildtime' will be "
        "ignored since target packages will be read from the given SBOM."
    )
    parser.add_argument("--sbom", help=helps, action="store_true")
    helps = (
        "Path to whitelist file. Vulnerabilities that match any whitelisted "
        "entries will not be included to the console output and are annotated "
        "accordingly in the output csv. See more details in the vulnxscan "
        "README.md."
    )
    parser.add_argument("--whitelist", help=helps, type=pathlib.Path)
    helps = (
        "Add more information to vulnxscan output by querying "
        "repology.org for available package versions in nix-unstable and "
        "package upstream. This option is intended to help manual analysis. "
        "Output is written to a separate OUT file with 'triage' infix, "
        "by default: 'vulns.triage.csv'."
    )
    parser.add_argument("--triage", help=helps, action="store_true")
    triagegr = parser.add_argument_group("Other arguments")
    helps = (
        "Search nixpkgs github for PRs that might include more information "
        "concerning possible nixpkgs fixes for the found vulnerabilities. "
        "This option adds URLs to (at most five) PRs that appear valid "
        "for each vulnerability based on heuristic. "
        "The PR search takes significant "
        "time due to github API rate limits, which is why this feature is "
        "not enabled by default. This option has no impact unless '--triage' "
        "is also specified."
    )
    triagegr.add_argument("--nixprs", help=helps, action="store_true")
    add_version_argument(parser)
    return parser.parse_args(args)


################################################################################


def main():
    """main entry point"""
    args = getargs()
    set_log_verbosity(args.verbose)
    try:
        _run(args)
    except SbomnixError as error:
        LOG.fatal("%s", error)
        raise SystemExit(1) from error


def _run(args):

    # Fail early if following commands are not in path
    exit_unless_command_exists("grype")
    exit_unless_command_exists("vulnix")

    scanner = VulnScan()
    sbom_artifact = None
    if args.sbom:
        target_path = pathlib.Path(args.TARGET).resolve().as_posix()
        if not _is_json(target_path):
            raise InvalidSbomError(args.TARGET)
        sbom_cdx_path = target_path
        sbom_csv_path = None
    else:
        target = resolve_nix_target(args.TARGET, buildtime=args.buildtime)
        target_path = target.path
        sbom_artifact = generate_temp_sbom(
            target_path,
            args.buildtime,
            prefix="vulnxscan_",
            cdx_suffix=".json",
            include_csv=True,
        )
        sbom_cdx_path = sbom_artifact.cdx_path
        sbom_csv_path = sbom_artifact.csv_path
        LOG.debug("Using cdx SBOM '%s'", sbom_cdx_path)
        LOG.debug("Using csv SBOM '%s'", sbom_csv_path)
    try:
        if not args.sbom:
            scanner.scan_vulnix(target_path, args.buildtime)
        scanner.scan_grype(sbom_cdx_path)
        scanner.scan_osv(sbom_cdx_path)
        scanner.report(args, sbom_csv_path)
    finally:
        if (
            not args.sbom
            and not LOG.isEnabledFor(logging.DEBUG)
            and sbom_artifact is not None
        ):
            # Remove generated temp files unless verbosity is DEBUG or more verbose
            sbom_artifact.cleanup()


if __name__ == "__main__":
    main()

################################################################################


================================================
FILE: src/vulnxscan/whitelist.py
================================================
#!/usr/bin/env python3

# SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""
Utility functions when dealing with whitelists
"""

################################################################################

# Whitelist

from common import columns as cols
from common.df import df_from_csv_file, df_log
from common.errors import WhitelistApplicationError
from common.log import LOG, LOG_SPAM


def load_whitelist(whitelist_csv_path):
    """
    Load vulnerability whitelist from the given path. Returns None
    if the whitelist is not a valid vulnerability whitelist. Otherwise
    returns whitelist_csv_path as dataframe.
    """
    df = df_from_csv_file(whitelist_csv_path, exit_on_error=False)
    if df is None:
        return None
    # Whitelist must have the following columns
    if not set([cols.VULN_ID, cols.COMMENT]).issubset(df.columns):
        LOG.warning("Whitelist csv missing required columns")
        return None
    if cols.WHITELIST in df.columns:
        # Interpret possible string values in "whitelist" column
        # to boolean as follows:
        df[cols.WHITELIST] = df[cols.WHITELIST].replace({"": True})
        df[cols.WHITELIST] = (
            df[cols.WHITELIST].astype(str).replace({"False": False, "0": False})
        )
        df[cols.WHITELIST] = df[cols.WHITELIST].astype("bool")
    return df


def df_apply_whitelist(df_whitelist, df_vulns):
    """
    Apply df_whitelist to vulnerabilities in df_vulns, changing df_vulns
    in-place.
    Adds columns "whitelist" and "whitelist_comment" to df_vulns based
    on whitelisting regular expressions in column df_whitelist["vuln_id"].
    If df_whitelist["package"] exists and is not empty, require strict
    match in df_whitelist["package"] and df_vulns["package"].
    If df_whitelist["whitelist"] exists and is False, do *not* whitelist
    the entry even if the rule matches, but only apply the column
    "whitelist_comment" to matching entries.
    """
    # Add default values to whitelist columns
    df_vulns[cols.WHITELIST] = False
    df_vulns[cols.WHITELIST_COMMENT] = ""
    if cols.VULN_ID not in df_vulns:
        raise WhitelistApplicationError("Missing 'vuln_id' column from df_vulns")
    if cols.VULN_ID not in df_whitelist:
        LOG.warning("Whitelist ignored: missing 'vuln_id' column from whitelist")
        return
    check_pkg_name = False
    if cols.PACKAGE in df_whitelist.columns and cols.PACKAGE in df_vulns.columns:
        check_pkg_name = True
    check_whitelist = False
    if cols.WHITELIST in df_whitelist.columns:
        check_whitelist = True
    # Iterate rows in df_whitelist in reverse order so the whitelist rules
    # on top of the file get higher priority
    df_whitelist_rev = df_whitelist[::-1]
    for whitelist_entry in df_whitelist_rev.itertuples():
        LOG.log(LOG_SPAM, "whitelist_entry: %s", whitelist_entry)
        regex = str(whitelist_entry.vuln_id).strip()
        LOG.log(LOG_SPAM, "whitelist regex: %s", regex)
        df_matches = df_vulns[cols.VULN_ID].str.fullmatch(regex)
        if check_pkg_name and whitelist_entry.package:
            LOG.log(LOG_SPAM, "filtering by package name: %s", whitelist_entry.package)
            df_matches = df_matches & (
                df_vulns[cols.PACKAGE] == whitelist_entry.package
            )
        df_vulns.loc[df_matches, cols.WHITELIST] = True
        if check_whitelist:
            LOG.log(LOG_SPAM, "entry[whitelist]=%s", bool(whitelist_entry.whitelist))
            df_vulns.loc[df_matches, cols.WHITELIST] = bool(whitelist_entry.whitelist)
        df_vulns.loc[df_matches, cols.WHITELIST_COMMENT] = whitelist_entry.comment
        LOG.log(LOG_SPAM, "matches %s vulns", len(df_vulns[df_matches]))
        df_log(df_vulns[df_matches], LOG_SPAM)


def df_drop_whitelisted(df):
    """
    Drop whitelisted vulnerabilities from `df` as well as
    the related columns.
    """
    if cols.WHITELIST in df.columns:
        # Convert possible string to boolean
        df = df[~df[cols.WHITELIST]]
        df = df.drop(cols.WHITELIST, axis=1)
    if cols.WHITELIST_COMMENT in df.columns:
        df = df.drop(cols.WHITELIST_COMMENT, axis=1)
    return df


================================================
FILE: tests/__init__.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0


================================================
FILE: tests/compare_deps.py
================================================
#!/usr/bin/env python3

# SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Python script that compares dependencies between sbomnix and nixgraph"""

import argparse
import json
import os
import pathlib
import sys

import pandas as pd

from common.cli_args import add_verbose_argument
from common.df import df_from_csv_file, df_to_csv_file
from common.log import LOG, LOG_SPAM, is_debug_enabled, set_log_verbosity
from common.regex import regex_match

###############################################################################


def getargs():
    """Parse command line arguments"""
    desc = "Compare nixgraph and sbomnix output to cross-validate"
    epil = (
        f"Example: ./{os.path.basename(__file__)} "
        "--sbom /path/to/sbom.json --graph /path/to/graph.csv"
    )
    parser = argparse.ArgumentParser(description=desc, epilog=epil)
    add_verbose_argument(parser)
    helps = "Path to sbom in csv format"
    parser.add_argument("--sbom", help=helps, type=pathlib.Path, required=True)
    helps = "Path to graph in csv format"
    parser.add_argument("--graph", help=helps, type=pathlib.Path, required=True)
    return parser.parse_args()


################################################################################


def _parse_sbom(path):
    LOG.info("Loading sbom data from '%s'", path)
    with path.open(encoding="utf-8") as inf:
        json_dict = json.loads(inf.read())

        # Parse sbom type
        sbom_type = ""
        for prop_dict in json_dict["metadata"]["properties"]:
            if "sbom_type" in prop_dict["name"]:
                sbom_type = prop_dict["value"]
        if not sbom_type:
            LOG.fatal("Failed to find sbom_type")
            sys.exit(1)
        LOG.debug(sbom_type)

        # Parse components
        components = json_dict["components"] + [json_dict["metadata"]["component"]]
        comp_parsed_dict = {}
        setcol = comp_parsed_dict.setdefault
        for cmp in components:
            # setcol("bom_ref", []).append(cmp["bom-ref"])
            outpaths = []
            for prop_dict in cmp["properties"]:
                if "output_path" in prop_dict["name"]:
                    outpaths.append(prop_dict["value"])
                elif "drv_path" in prop_dict["name"]:
                    setcol("drv_path", []).append(prop_dict["value"])
            setcol("output_path", []).append(outpaths)
        df_components = pd.DataFrame(comp_parsed_dict)

        # Parse dependencies
        deps = json_dict["dependencies"]
        deps_parsed_dict = {}
        setcol = deps_parsed_dict.setdefault
        for dep in deps:
            if "dependsOn" not in dep:
                setcol("ref", []).append(dep["ref"])
                setcol("depends_on", []).append("")
                continue
            for dependson in dep["dependsOn"]:
                setcol("ref", []).append(dep["ref"])
                setcol("depends_on", []).append(dependson)
        df_dependencies = pd.DataFrame(deps_parsed_dict)

        # Join df_components with df_dependencies
        df_parsed = df_components.merge(
            df_dependencies,
            how="outer",
            left_on=["drv_path"],
            right_on=["ref"],
        )
        df_parsed.fillna("", inplace=True)
        if is_debug_enabled():
            df_to_csv_file(df_parsed, "df_sbom_parsed.csv")
        return df_parsed, sbom_type


def _parse_graph(path):
    LOG.info("Loading graph data from '%s'", path)
    df_graph = df_from_csv_file(path)
    df_graph.fillna("", inplace=True)
    df_graph = df_graph.astype(str)
    src_path = df_graph["src_path"].iloc[0]
    graph_type = "buildtime" if regex_match(r".*\.[a-z]+$", src_path) else "runtime"
    return df_graph, graph_type


def _filter_set(re_filter_out_list, target_set):
    matching_set = set()
    for target in target_set:
        for regex in re_filter_out_list:
            if regex_match(regex, target):
                matching_set.add(target)
                break
    return target_set - matching_set


################################################################################


def sbom_internal_checks(df_sbom):
    """Cross-check sbom components vs dependencies"""
    passed = True
    # Empty "output_path" indicates component is referenced in the
    # sbom "dependency" section, but missing from the "components" section
    df = df_sbom[df_sbom["output_path"].isna()]
    if not df.empty:
        missing = df["ref"].to_list()
        LOG.fatal("sbom component missing: %s", missing)
        passed = False
    # Empty "ref" indicates component is listed in the sbom
    # "components" section, but missing from the "dependencies"
    df = df_sbom[df_sbom["ref"].isna()]
    if not df.empty:
        missing = df["drv_path"].to_list()
        LOG.fatal("sbom dependency missing for component: %s", missing)
        passed = False
    return passed


def compare_dependencies(df_sbom, df_graph, sbom_type, graph_type):
    """Compare dependencies in df_sbom and df_braph"""
    LOG.debug("sbom_type=%s", sbom_type)
    LOG.debug("graph_type=%s", graph_type)
    deps_sbom_all = set()
    deps_graph_all = set()
    df_sbom = df_sbom.explode("output_path")
    df_sbom = df_sbom.astype(str)

    if (graph_type == "runtime" and sbom_type != "runtime_only") or (
        graph_type == "buildtime" and sbom_type == "runtime_only"
    ):
        LOG.fatal("Unable to compare: graph='%s' vs sbom='%s'", graph_type, sbom_type)
        return False
    if graph_type == "runtime":
        LOG.info("Comparing runtime dependencies")
        for out_path in df_sbom["output_path"].unique().tolist():
            LOG.log(LOG_SPAM, "target: %s", out_path)
            df_sbom_deps = df_sbom[df_sbom["output_path"] == out_path]
            sbom_deps = list(filter(None, df_sbom_deps["depends_on"].unique().tolist()))
            LOG.log(LOG_SPAM, "sbom    depends-ons: %s", sbom_deps)
            deps_sbom_all.update(set(sbom_deps))
            df_graph_deps = df_graph[df_graph["target_path"] == out_path]
            # Map graph src_path to sbom paths
            dfr = df_sbom.merge(
                df_graph_deps, how="inner", left_on="output_path", right_on="src_path"
            ).loc[:, ["drv_path"]]
            graph_deps = list(filter(None, dfr["drv_path"].unique().tolist()))
            LOG.log(LOG_SPAM, "graph   depends-ons: %s", graph_deps)
            deps_graph_all.update(set(graph_deps))

    if graph_type == "buildtime":
        LOG.info("Comparing buildtime dependencies")
        for drv_path in df_sbom["drv_path"].unique().tolist():
            LOG.log(LOG_SPAM, "target: %s", drv_path)
            df_sbom_deps = df_sbom[df_sbom["drv_path"] == drv_path]
            sbom_deps = list(filter(None, df_sbom_deps["depends_on"].unique().tolist()))
            LOG.log(LOG_SPAM, "sbom    depends-ons: %s", sbom_deps)
            deps_sbom_all.update(set(sbom_deps))
            dfr = df_graph[df_graph["target_path"] == drv_path]
            graph_deps = list(filter(None, dfr["src_path"].unique().tolist()))
            LOG.log(LOG_SPAM, "graph   depends-ons: %s", graph_deps)
            deps_graph_all.update(set(graph_deps))

    deps_only_in_sbom = set()
    deps_only_in_graph = set()
    deps_only_in_sbom.update(deps_sbom_all - deps_graph_all)
    deps_only_in_graph.update(deps_graph_all - deps_sbom_all)

    # Filter out the following dependencies from the "deps_only_in_graph":
    # Store paths that match these regular expressions have no known derivers.
    # As such, they are not included in the sbom, but they are still drawn in
    # the graph. Not including such paths in the sbom is not an error, so
    # we filter them out here:
    re_no_known_drvs = [
        r".*\.patch$",
        r".*\.patch.gz$",
        r".*\.sh$",
        r".*\.bash$",
        r".*\.diff$",
        r".*\.c$",
        r".*\.h$",
        r".*\.py$",
        r".*\.pl$",
        r".*\.xsl$",
        r".*\.lock$",
        r".*\.cnf$",
        r".*\.conf$",
        r".*\.crt$",
        r".*\.nix$",
        r".*\.in$",
        r".*\.plist$",
        r".*\.options$",
        r".*\.build$",
        r".*\.xcspec$",
        r".*\.toml$",
        r".*\.tmac$",
        r".*\.ds$",
        r".*\.key$",
        r".*\-source$",
        r".*\-builder$",
        r".*\-prefetch-git$",
        r".*\-inputrc$",
        r".*\-patch-registry-deps$",
        r".*\-make-initrd-ng$",
        r".*\.kaem$",
        r".*\.mk$",
        r".*\-nuke-refs$",
        r".*\-setup-hook$",
        r".*\-remove-references-to$",
    ]
    deps_only_in_graph = _filter_set(re_no_known_drvs, deps_only_in_graph)

    passed = True
    if deps_only_in_sbom:
        passed = False
        LOG.fatal("Dependencies only in sbom:")
        for dep in sorted(deps_only_in_sbom):
            LOG.fatal("  %s", dep)

    if deps_only_in_graph:
        passed = False
        LOG.fatal("Dependencies only in graph:")
        for dep in sorted(deps_only_in_graph):
            LOG.fatal("  %s", dep)

    return passed


################################################################################


def main():
    """main entry point"""
    args = getargs()
    set_log_verbosity(args.verbose)
    if not args.sbom.exists():
        LOG.fatal("Invalid path: '%s'", args.sbom)
        sys.exit(1)
    if not args.graph.exists():
        LOG.fatal("Invalid path: '%s'", args.graph)
        sys.exit(1)
    df_sbom, sbom_type = _parse_sbom(args.sbom)
    df_graph, graph_type = _parse_graph(args.graph)

    # Checks
    sbom_check = sbom_internal_checks(df_sbom)
    deps_check = compare_dependencies(df_sbom, df_graph, sbom_type, graph_type)

    if sbom_check and deps_check:
        sys.exit(0)
    else:
        sys.exit(1)


################################################################################

if __name__ == "__main__":
    main()

################################################################################


================================================
FILE: tests/compare_sboms.py
================================================
#!/usr/bin/env python3

# SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Python script that compares two sboms"""

import argparse
import json
import os
import pathlib
import sys

import pandas as pd

from common.cli_args import add_verbose_argument
from common.df import df_to_csv_file
from common.log import LOG, is_debug_enabled, set_log_verbosity

###############################################################################


def getargs():
    """Parse command line arguments"""
    desc = "Compare CycloneDX or SPDX sbom json files"
    epil = (
        f"Example: ./{os.path.basename(__file__)} "
        "/path/to/sbom.cdx.json /path/to/sbom.cdx.json"
    )
    parser = argparse.ArgumentParser(description=desc, epilog=epil)
    add_verbose_argument(parser)
    helps = "Path to first sbom json file"
    parser.add_argument("FILE1", help=helps, type=pathlib.Path)
    helps = "Path to second sbom json file"
    parser.add_argument("FILE2", help=helps, type=pathlib.Path)
    helps = (
        "Set the SBOM component attribute(s) used as unique identifier"
        "(default: --uid='name,version')"
    )
    parser.add_argument("--uid", help=helps, type=str, default="name,version")
    return parser.parse_args()


################################################################################


def _sbom_df_from_dict(dict_obj):
    df_ret = pd.DataFrame(dict_obj)
    df_ret.fillna("", inplace=True)
    df_ret = df_ret.astype(str)
    df_ret.sort_values("name", inplace=True, key=lambda col: col.str.lower())
    df_ret.reset_index(drop=True, inplace=True)
    return df_ret


def _parse_sbom_cdx(json_dict):
    components = json_dict["components"] + [json_dict["metadata"]["component"]]
    components_dict = {}
    setcol = components_dict.setdefault
    for cmp in components:
        setcol("uid", []).append(cmp["bom-ref"])
        setcol("name", []).append(cmp["name"])
        setcol("version", []).append(cmp["version"])
    return _sbom_df_from_dict(components_dict)


def _parse_sbom_spdx(json_dict):
    packages = json_dict["packages"]
    packages_dict = {}
    setcol = packages_dict.setdefault
    for cmp in packages:
        setcol("uid", []).append(cmp["SPDXID"])
        setcol("name", []).append(cmp["name"])
        setcol("version", []).append(cmp["versionInfo"])
    return _sbom_df_from_dict(packages_dict)


def _parse_sbom(path):
    with path.open(encoding="utf-8") as inf:
        json_dict = json.loads(inf.read())
        sbom_format = ""
        if "SPDXID" in json_dict:
            sbom_format = "SPDX"
            return _parse_sbom_spdx(json_dict)
        if "bomFormat" in json_dict:
            sbom_format = json_dict["bomFormat"]
            return _parse_sbom_cdx(json_dict)
        LOG.fatal("Unsupported SBOM format: '%s'", sbom_format)
        sys.exit(1)


def _log_rows(df, name):
    for row in df.itertuples(index=False, name=name):
        LOG.info(row)


def _compare_sboms(args, df1, df2):
    """Describe diff of sboms df1 and df2, return True if they are equal"""
    if is_debug_enabled():
        df_to_csv_file(df1, "df_sbom_file1.csv")
        df_to_csv_file(df2, "df_sbom_file2.csv")

    uid_list = [str(uid) for uid in args.uid.split(",")]
    df1_uidg = df1.groupby(by=uid_list).size().reset_index(name="count")
    df1_non_uniq = df1_uidg[df1_uidg["count"] > 1]

    df2_uidg = df2.groupby(by=uid_list).size().reset_index(name="count")
    df2_non_uniq = df2_uidg[df2_uidg["count"] > 1]

    df_common = pd.merge(left=df1, right=df2, how="inner", on=uid_list)
    df_common.drop_duplicates(subset=uid_list, inplace=True)

    df1_only = pd.merge(left=df1, right=df2, how="left", on=uid_list)
    df1_only = df1_only[df1_only["uid_y"].isna()]
    df1_only.drop_duplicates(subset=uid_list, inplace=True)

    df2_only = pd.merge(left=df2, right=df1, how="left", on=uid_list)
    df2_only = df2_only[df2_only["uid_y"].isna()]
    df2_only.drop_duplicates(subset=uid_list, inplace=True)

    LOG.info("Using uid: '%s'", uid_list)
    LOG.info("")

    LOG.info("FILE1 path '%s'", args.FILE1)
    LOG.info("FILE1 number of unique entries: %s", len(df1_uidg.index))
    if not df1_non_uniq.empty:
        LOG.info("FILE1 number of non-unique entries: %s", len(df1_non_uniq))
        _log_rows(df1_non_uniq, "non_unique")
    LOG.info("")

    LOG.info("FILE2 path '%s'", args.FILE2)
    LOG.info("FILE2 number of unique components: %s", len(df2_uidg.index))
    if not df2_non_uniq.empty:
        LOG.info("FILE2 number of non-unique entries: %s", len(df2_non_uniq))
        _log_rows(df2_non_uniq, "non_unique")
    LOG.info("")

    LOG.info("FILE1 and FILE2 common entries: %s", len(df_common))
    if not df_common.empty:
        _log_rows(df_common[uid_list], "common")
    LOG.info("")

    LOG.info("FILE1 only entries: %s", len(df1_only))
    if not df1_only.empty:
        _log_rows(df1_only[uid_list], "file1_only")
    LOG.info("")

    LOG.info("FILE2 only entries: %s", len(df2_only))
    if not df2_only.empty:
        _log_rows(df2_only[uid_list], "file2_only")
    LOG.info("")

    return len(df1_only) == 0 and len(df2_only) == 0


################################################################################


def main():
    """main entry point"""
    args = getargs()
    set_log_verbosity(args.verbose)
    if not args.FILE1.exists():
        LOG.fatal("Invalid path: '%s'", args.sbom)
        sys.exit(1)
    if not args.FILE2.exists():
        LOG.fatal("Invalid path: '%s'", args.graph)
        sys.exit(1)

    df_sbom_f1 = _parse_sbom(args.FILE1)
    df_sbom_f2 = _parse_sbom(args.FILE2)
    equal = _compare_sboms(args, df_sbom_f1, df_sbom_f2)
    if equal:
        sys.exit(0)
    else:
        sys.exit(1)


################################################################################

if __name__ == "__main__":
    main()

################################################################################


================================================
FILE: tests/conftest.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Shared pytest fixtures for the test suite."""

import os
import re
import shutil
import subprocess
import time
from pathlib import Path

import pytest

from tests import vulnix_test_support

REPOROOT = Path(__file__).resolve().parent.parent
INTEGRATION_DIR = REPOROOT / "tests" / "integration"
RESOURCES_DIR = REPOROOT / "tests" / "resources"

_GRYPE_TEST_DB = RESOURCES_DIR / "grype-test-db.tar.gz"
_GRYPE_CACHE_SUBDIR = "grype-cache"


def _output_mentions_repology_host(output):
    patterns = (
        r"https://repology\.org(?:/|$)",
        r"host=['\"]repology\.org['\"]",
        r"HTTPSConnectionPool\(host=['\"]repology\.org['\"]",
    )
    return any(re.search(pattern, output) for pattern in patterns)


def _pythonpath_with_repo_root(env):
    repo_root = REPOROOT.as_posix()
    pythonpath = env.get("PYTHONPATH", "")
    if not pythonpath:
        env["PYTHONPATH"] = repo_root
        return env
    paths = pythonpath.split(os.pathsep)
    if repo_root not in paths:
        env["PYTHONPATH"] = f"{pythonpath}{os.pathsep}{repo_root}"
    return env


@pytest.fixture(scope="session", autouse=True)
def _warm_grype_db(request, tmp_path_factory):
    """Import the committed minimal grype DB into a session-scoped temp dir.

    Returns the cache Path so _run_python_script can point GRYPE_DB_CACHE_DIR
    at it. Returns None when no grype-marked tests are collected (non-grype
    sessions are unaffected and no DB import runs).

    Each pytest-xdist worker imports into its own getbasetemp() subdir, so
    there is no shared state and no locking is needed.
    """
    has_grype_test = any(
        item.get_closest_marker("grype") for item in request.session.items
    )
    if not has_grype_test:
        return None
    cache_dir = tmp_path_factory.getbasetemp() / _GRYPE_CACHE_SUBDIR
    cache_dir.mkdir(exist_ok=True)
    subprocess.run(
        ["grype", "db", "import", str(_GRYPE_TEST_DB)],
        env={
            **os.environ,
            "GRYPE_DB_CACHE_DIR": str(cache_dir),
            "GRYPE_DB_VALIDATE_AGE": "false",
        },
        check=True,
    )
    return cache_dir


@pytest.fixture(scope="session")
def _configure_test_vulnix(request, tmp_path_factory):
    """Prepare the vulnix test wrapper with a deterministic default mode."""
    requested_mode = os.environ.get("SBOMNIX_TEST_VULNIX_MODE", "dummy")
    if requested_mode not in {"dummy", "real"}:
        raise ValueError(
            "invalid SBOMNIX_TEST_VULNIX_MODE "
            f"{requested_mode!r}; expected one of: dummy, real"
        )
    tmp_root = tmp_path_factory.getbasetemp().parent
    real_vulnix = shutil.which("vulnix")
    if requested_mode == "real" and real_vulnix is None:
        raise RuntimeError(
            "real vulnix selected for tests, but 'vulnix' is not available in PATH"
        )
    cache_dir = vulnix_test_support.default_vulnix_cache_dir()
    reporter = request.config.pluginmanager.get_plugin("terminalreporter")
    if reporter is not None:
        reporter.write_sep(
            "=",
            f"vulnix test mode: {requested_mode}",
            bold=True,
        )
        if requested_mode == "real":
            reporter.write_line(f"real vulnix binary: {real_vulnix}")
            reporter.write_line(f"real vulnix cache dir: {cache_dir}")
        else:
            reporter.write_line("using dummy vulnix wrapper for deterministic tests")
    return vulnix_test_support.configure_vulnix_for_tests(
        tmp_root=tmp_root,
        effective_mode=requested_mode,
        cache_dir=cache_dir,
        real_vulnix=real_vulnix,
    )


@pytest.fixture(name="test_work_dir")
def fixture_test_work_dir(tmp_path):
    """Return a per-test working directory."""
    return Path(tmp_path)


@pytest.fixture(name="test_nix_drv", scope="session")
def fixture_test_nix_drv():
    """Instantiate a small test derivation chain once per test session."""
    test_derivation = RESOURCES_DIR / "test-derivation-chain.nix"
    ret = subprocess.run(
        ["nix-instantiate", test_derivation.as_posix()],
        capture_output=True,
        encoding="utf-8",
        check=True,
    )
    drv = Path(ret.stdout.strip())
    assert drv.exists()
    return drv


@pytest.fixture(name="test_nix_result", scope="session")
def fixture_test_nix_result(test_nix_drv, tmp_path_factory):
    """Build nixpkgs.hello once per test session."""
    build_dir = tmp_path_factory.mktemp("nix-build")
    result = build_dir / "result"
    cmd = ["nix-build", test_nix_drv.as_posix(), "-o", result.as_posix()]
    subprocess.run(cmd, check=True)
    assert result.exists()
    return result


@pytest.fixture(name="test_cdx_sbom", scope="session")
def fixture_test_cdx_sbom():
    """Return a static CycloneDX SBOM fixture for offline SBOM-input tests."""
    sbom = RESOURCES_DIR / "sample_cdx_sbom.json"
    assert sbom.exists()
    return sbom


@pytest.fixture(name="_run_python_script")
def fixture_run_python_script(test_work_dir, _warm_grype_db, _configure_test_vulnix):
    """Invoke a Python entrypoint from an isolated test workdir."""

    def _run(args, **kwargs):
        env = _pythonpath_with_repo_root(os.environ.copy())
        env.setdefault("GRYPE_DB_AUTO_UPDATE", "false")
        env.setdefault("GRYPE_DB_VALIDATE_AGE", "false")
        if _warm_grype_db is not None:
            env["GRYPE_DB_CACHE_DIR"] = str(_warm_grype_db)
        env = vulnix_test_support.build_vulnix_test_env(
            env,
            config=_configure_test_vulnix,
        )
        kwargs.setdefault("cwd", test_work_dir)
        check = kwargs.pop("check", True)
        return subprocess.run(args, check=check, env=env, **kwargs)

    return _run


@pytest.fixture(name="_run_python_script_retry_on_repology_network_error")
def fixture_run_python_script_retry_on_repology_network_error(_run_python_script):
    """Retry transient repology.org connectivity failures before failing."""

    def _run(args):
        markers = [
            "requests.exceptions.ConnectTimeout",
            "requests.exceptions.ConnectionError",
            "requests.exceptions.ReadTimeout",
            "urllib3.exceptions.ConnectTimeoutError",
            "urllib3.exceptions.ReadTimeoutError",
            "Max retries exceeded",
            "Connection timed out",
            "Temporary failure in name resolution",
            "Name or service not known",
        ]
        retry_delays = [15, 45]
        last_ret = None
        for attempt in range(len(retry_delays) + 1):
            ret = _run_python_script(args, check=False, capture_output=True, text=True)
            if ret.returncode == 0:
                return ret
            last_ret = ret
            output = "\n".join(filter(None, [ret.stdout, ret.stderr]))
            is_repology_network_error = _output_mentions_repology_host(output) and any(
                marker in output for marker in markers
            )
            if not is_repology_network_error or attempt >= len(retry_delays):
                ret.check_returncode()
            delay = retry_delays[attempt]
            print(
                f"repology.org request failed with a transient network error; "
                f"retrying in {delay}s (attempt {attempt + 2}/{len(retry_delays) + 1})"
            )
            time.sleep(delay)
        last_ret.check_returncode()
        return last_ret

    return _run


def pytest_collection_modifyitems(items):
    """Mark integration tests based on their path."""
    run_real_vulnix = os.environ.get("SBOMNIX_RUN_REAL_VULNIX_TESTS") == "1"
    skip_real_vulnix = pytest.mark.skip(
        reason="real vulnix tests are opt-in; set SBOMNIX_RUN_REAL_VULNIX_TESTS=1"
    )
    for item in items:
        path = Path(str(item.fspath)).resolve()
        if INTEGRATION_DIR in path.parents:
            item.add_marker(pytest.mark.integration)
        if item.get_closest_marker("real_vulnix") and not run_real_vulnix:
            item.add_marker(skip_real_vulnix)


================================================
FILE: tests/integration/__init__.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0


================================================
FILE: tests/integration/test_nixgraph_cli.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""CLI integration tests for nixgraph and dependency comparisons."""

from textwrap import dedent

import pandas as pd
import pytest

from tests.testpaths import COMPARE_DEPS, NIXGRAPH, SBOMNIX
from tests.testutils import df_difference, df_to_string


def _write_nixgraph_test_flake(flake_dir):
    flake_dir.mkdir()
    (flake_dir / "flake.nix").write_text(
        dedent(
            """
            {
              outputs = { self }:
                let
                  mkPackage = system:
                    let
                      mkTestDerivation =
                        { name, pname, version, command }:
                        builtins.derivation {
                          inherit name pname system version;
                          builder = "/bin/sh";
                          args = [ "-c" command ];
                        };

                      first = mkTestDerivation {
                        name = "sbomnix-flake-first-1.0";
                        pname = "sbomnix-flake-first";
                        version = "1.0";
                        command = "echo first > $out";
                      };

                      second = mkTestDerivation {
                        name = "sbomnix-flake-second-1.0";
                        pname = "sbomnix-flake-second";
                        version = "1.0";
                        command = "echo ${first} > $out";
                      };
                    in
                    mkTestDerivation {
                      name = "sbomnix-flake-third-1.0";
                      pname = "sbomnix-flake-third";
                      version = "1.0";
                      command = "echo ${second} > $out";
                    };
                in
                {
                  packages.x86_64-linux.default = mkPackage "x86_64-linux";
                  packages.aarch64-linux.default = mkPackage "aarch64-linux";
                  packages.x86_64-darwin.default = mkPackage "x86_64-darwin";
                  packages.aarch64-darwin.default = mkPackage "aarch64-darwin";
                };
            }
            """
        ),
        encoding="utf-8",
    )
    return f"{flake_dir.as_posix()}#"


def test_nixgraph_help(_run_python_script):
    """Test nixgraph command line argument: '-h'."""
    _run_python_script([NIXGRAPH, "-h"])


def test_nixgraph_png(_run_python_script, test_nix_result, test_work_dir):
    """Test nixgraph with png output generates valid png image."""
    png_out = test_work_dir / "graph.png"
    _run_python_script([NIXGRAPH, test_nix_result, "--out", png_out, "--depth", "3"])
    assert png_out.exists()


def test_nixgraph_csv(_run_python_script, test_nix_result, test_work_dir):
    """Test nixgraph with csv output generates valid csv."""
    csv_out = test_work_dir / "graph.csv"
    _run_python_script([NIXGRAPH, test_nix_result, "--out", csv_out, "--depth", "3"])
    assert csv_out.exists()
    df_out = pd.read_csv(csv_out)
    assert not df_out.empty


def test_nixgraph_csv_runtime_drv(_run_python_script, test_nix_drv, test_work_dir):
    """Test nixgraph runtime graph generation from a direct derivation path."""
    csv_out = test_work_dir / "graph_runtime_drv.csv"
    _run_python_script([NIXGRAPH, test_nix_drv, "--out", csv_out, "--depth", "3"])
    assert csv_out.exists()
    df_out = pd.read_csv(csv_out)
    assert not df_out.empty
    assert set(df_out["target_pname"]) >= {
        "sbomnix-test-third-1.0",
        "sbomnix-test-second-1.0",
    }


def test_nixgraph_csv_buildtime(_run_python_script, test_nix_drv, test_work_dir):
    """Test nixgraph with buildtime csv output generates valid csv."""
    csv_out = test_work_dir / "graph_buildtime.csv"
    _run_python_script([NIXGRAPH, test_nix_drv, "--out", csv_out, "--buildtime"])
    assert csv_out.exists()
    df_out = pd.read_csv(csv_out)
    assert not df_out.empty


def test_nixgraph_dot_includes_edges_labels_and_style(
    _run_python_script,
    test_nix_result,
    test_work_dir,
):
    """Test DOT output for graph shape, labels, pathnames, and colorized nodes."""
    dot_out = test_work_dir / "graph.dot"
    _run_python_script(
        [
            NIXGRAPH,
            test_nix_result,
            "--out",
            dot_out,
            "--depth=3",
            "--pathnames",
            "--colorize=.*second.*",
        ]
    )
    dot = dot_out.read_text(encoding="utf-8")
    assert "->" in dot
    assert "sbomnix-test-third-1.0" in dot
    assert "sbomnix-test-second-1.0" in dot
    assert "<BR/>" in dot
    assert 'fillcolor="#FFE6E6"' in dot


def test_nixgraph_depth_and_until_limit_traversal(
    _run_python_script,
    test_nix_result,
    test_work_dir,
):
    """Test traversal limiting with --depth and --until."""
    depth_one_csv = test_work_dir / "graph_depth_one.csv"
    _run_python_script(
        [
            NIXGRAPH,
            test_nix_result,
            "--out",
            depth_one_csv,
            "--depth=1",
        ]
    )
    df_depth_one = pd.read_csv(depth_one_csv)
    assert df_depth_one["graph_depth"].max() == 1

    until_dot = test_work_dir / "graph_until.dot"
    _run_python_script(
        [
            NIXGRAPH,
            test_nix_result,
            "--out",
            until_dot,
            "--depth=100",
            "--until=.*second.*",
        ]
    )
    dot = until_dot.read_text(encoding="utf-8")
    assert "sbomnix-test-second-1.0" in dot
    assert "sbomnix-test-first-1.0" not in dot


def test_nixgraph_csv_runtime_flakeref(_run_python_script, test_work_dir):
    """Test nixgraph runtime graph generation from a flakeref."""
    flakeref = _write_nixgraph_test_flake(test_work_dir / "runtime-flake")
    csv_out = test_work_dir / "graph_runtime_flake.csv"
    _run_python_script([NIXGRAPH, flakeref, "--out", csv_out, "--depth=3"])
    assert csv_out.exists()
    df_out = pd.read_csv(csv_out)
    assert set(df_out["target_pname"]) >= {
        "sbomnix-flake-third-1.0",
        "sbomnix-flake-second-1.0",
    }


def test_nixgraph_csv_buildtime_flakeref(_run_python_script, test_work_dir):
    """Test nixgraph buildtime graph generation from a flakeref."""
    flakeref = _write_nixgraph_test_flake(test_work_dir / "buildtime-flake")
    csv_out = test_work_dir / "graph_buildtime_flake.csv"
    _run_python_script(
        [NIXGRAPH, flakeref, "--out", csv_out, "--buildtime", "--depth=3"]
    )
    assert csv_out.exists()
    df_out = pd.read_csv(csv_out)
    assert set(df_out["target_pname"]) >= {
        "sbomnix-flake-third-1.0.drv",
        "sbomnix-flake-second-1.0.drv",
    }


def test_nixgraph_csv_graph_inverse(_run_python_script, test_nix_result, test_work_dir):
    """Test nixgraph with '--inverse' argument."""
    csv_out = test_work_dir / "graph.csv"
    _run_python_script(
        [
            NIXGRAPH,
            test_nix_result,
            "--out",
            csv_out,
            "--depth=100",
        ]
    )
    assert csv_out.exists()
    df_out = pd.read_csv(csv_out)
    assert not df_out.empty

    csv_out_inv = test_work_dir / "graph_inverse.csv"
    _run_python_script(
        [
            NIXGRAPH,
            test_nix_result,
            "--out",
            csv_out_inv,
            "--depth=100",
            "--inverse=.*",
        ]
    )
    assert csv_out_inv.exists()
    df_out_inv = pd.read_csv(csv_out_inv)
    assert not df_out_inv.empty

    df_out = df_out.drop("graph_depth", axis=1).sort_values(by=["src_path"])
    df_out_inv = df_out_inv.drop("graph_depth", axis=1).sort_values(by=["src_path"])
    df_diff = df_difference(df_out, df_out_inv)
    assert df_diff.empty, df_to_string(df_diff)


def test_compare_deps_runtime(_run_python_script, test_nix_result, test_work_dir):
    """Compare nixgraph vs sbom runtime dependencies."""
    graph_csv_out = test_work_dir / "graph.csv"
    _run_python_script(
        [
            NIXGRAPH,
            test_nix_result,
            "--out",
            graph_csv_out,
            "--depth=100",
        ]
    )
    assert graph_csv_out.exists()

    out_path_cdx = test_work_dir / "sbom_cdx_test.json"
    _run_python_script(
        [
            SBOMNIX,
            test_nix_result,
            "--cdx",
            out_path_cdx.as_posix(),
        ]
    )
    assert out_path_cdx.exists()

    _run_python_script(
        [
            COMPARE_DEPS,
            "--sbom",
            out_path_cdx,
            "--graph",
            graph_csv_out,
        ]
    )


@pytest.mark.slow
def test_compare_deps_buildtime(_run_python_script, test_nix_drv, test_work_dir):
    """Compare nixgraph vs sbom buildtime dependencies."""
    graph_csv_out = test_work_dir / "graph.csv"
    _run_python_script(
        [
            NIXGRAPH,
            test_nix_drv,
            "--out",
            graph_csv_out,
            "--depth=100",
            "--buildtime",
        ]
    )
    assert graph_csv_out.exists()

    out_path_cdx = test_work_dir / "sbom_cdx_test.json"
    _run_python_script(
        [
            SBOMNIX,
            test_nix_drv,
            "--cdx",
            out_path_cdx.as_posix(),
            "--buildtime",
        ]
    )
    assert out_path_cdx.exists()

    _run_python_script(
        [
            COMPARE_DEPS,
            "--sbom",
            out_path_cdx,
            "--graph",
            graph_csv_out,
        ]
    )


================================================
FILE: tests/integration/test_nixmeta_cli.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""CLI integration tests for nixmeta."""

import pytest

from common.df import df_from_csv_file
from tests.testpaths import NIXMETA, RESOURCES_DIR


def test_nixmeta_help(_run_python_script):
    """Test nixmeta command line argument: '-h'."""
    _run_python_script([NIXMETA, "-h"])


@pytest.mark.slow
def test_nixmeta_sbomnix_flakeref(_run_python_script, test_work_dir):
    """Test nixmeta with a small package-set path."""
    out_path = test_work_dir / "nixmeta.csv"
    package_set = RESOURCES_DIR / "nixmeta-package-set.nix"
    _run_python_script(
        [
            NIXMETA,
            "--out",
            out_path.as_posix(),
            "--flakeref",
            package_set,
        ]
    )
    assert out_path.exists()
    df_meta = df_from_csv_file(out_path)
    assert df_meta is not None
    assert set(df_meta["name"]) == {
        "sbomnix-meta-first-1.0",
        "sbomnix-meta-second-2.0",
    }


================================================
FILE: tests/integration/test_nixupdate_cli.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""CLI integration tests for nix_outdated."""

import pytest

from tests.testpaths import NIX_OUTDATED


def test_nix_outdated_help(_run_python_script):
    """Test nix_outdated command line argument: '-h'."""
    _run_python_script([NIX_OUTDATED, "-h"])


@pytest.mark.network
@pytest.mark.slow
def test_nix_outdated_result(
    _run_python_script_retry_on_repology_network_error, test_nix_result, test_work_dir
):
    """Test nix_outdated with the nix result as input."""
    out_path_nix_outdated = test_work_dir / "nix_outdated.csv"
    _run_python_script_retry_on_repology_network_error(
        [
            NIX_OUTDATED,
            "--out",
            out_path_nix_outdated.as_posix(),
            test_nix_result,
        ]
    )
    assert out_path_nix_outdated.exists()


================================================
FILE: tests/integration/test_provenance_cli.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""CLI integration tests for provenance."""

from tests.testpaths import PROVENANCE, RESOURCES_DIR
from tests.testutils import validate_json


def test_provenance_help(_run_python_script):
    """Test provenance command line argument: '-h'."""
    _run_python_script([PROVENANCE, "-h"])


def test_provenance_schema(_run_python_script, test_nix_drv, test_work_dir):
    """Test provenance generates valid schema."""
    out_path = test_work_dir / "provenance_test.json"
    _run_python_script(
        [
            PROVENANCE,
            test_nix_drv,
            "--out",
            out_path.as_posix(),
        ]
    )
    assert out_path.exists()
    schema_path = RESOURCES_DIR / "provenance-1.0.schema.json"
    assert schema_path.exists()
    validate_json(out_path.as_posix(), schema_path)


def test_provenance_schema_recursive(_run_python_script, test_nix_drv, test_work_dir):
    """Test provenance generates valid schema with recursive option."""
    out_path = test_work_dir / "recursive_provenance_test.json"
    _run_python_script(
        [
            PROVENANCE,
            test_nix_drv,
            "--recursive",
            "--out",
            out_path.as_posix(),
        ]
    )
    assert out_path.exists()
    schema_path = RESOURCES_DIR / "provenance-1.0.schema.json"
    assert schema_path.exists()
    validate_json(out_path.as_posix(), schema_path)


================================================
FILE: tests/integration/test_repology_cli.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""CLI integration tests for repology."""

import pytest

from tests.testpaths import REPOLOGY_CLI


def test_repology_cli_help(_run_python_script):
    """Test repology_cli command line argument: '-h'."""
    _run_python_script([REPOLOGY_CLI, "-h"])


@pytest.mark.network
@pytest.mark.slow
def test_repology_cli_sbom(
    _run_python_script_retry_on_repology_network_error,
    test_cdx_sbom,
    test_work_dir,
):
    """Test repology_cli with SBOM as input."""
    out_path_repology = test_work_dir / "repology.csv"
    _run_python_script_retry_on_repology_network_error(
        [
            REPOLOGY_CLI,
            "--sbom_cdx",
            test_cdx_sbom.as_posix(),
            "--repository",
            "nix_unstable",
            "--out",
            out_path_repology.as_posix(),
        ]
    )
    assert out_path_repology.exists()


================================================
FILE: tests/integration/test_sbomnix_cli.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""CLI integration tests for sbomnix."""

import pandas as pd
import pytest

from tests.testpaths import COMPARE_SBOMS, RESOURCES_DIR, SBOMNIX
from tests.testutils import df_difference, df_to_string, validate_json


def test_sbomnix_help(_run_python_script):
    """Test sbomnix command line argument: '-h'."""
    _run_python_script([SBOMNIX, "-h"])


def test_sbomnix_type_runtime(_run_python_script, test_nix_result, test_work_dir):
    """Test sbomnix generates valid CycloneDX json with runtime dependencies."""
    out_path_cdx = test_work_dir / "sbom_cdx_test.json"
    out_path_spdx = test_work_dir / "sbom_spdx_test.json"
    _run_python_script(
        [
            SBOMNIX,
            test_nix_result,
            "--cdx",
            out_path_cdx.as_posix(),
            "--spdx",
            out_path_spdx.as_posix(),
        ]
    )
    assert out_path_cdx.exists()
    assert out_path_spdx.exists()

    cdx_schema_path = RESOURCES_DIR / "cdx_bom-1.4.schema.json"
    assert cdx_schema_path.exists()
    validate_json(out_path_cdx.as_posix(), cdx_schema_path)

    spdx_schema_path = RESOURCES_DIR / "spdx_bom-2.3.schema.json"
    assert spdx_schema_path.exists()
    validate_json(out_path_spdx.as_posix(), spdx_schema_path)


@pytest.mark.slow
def test_sbomnix_type_buildtime(_run_python_script, test_nix_drv, test_work_dir):
    """Test sbomnix generates valid CycloneDX json with buildtime dependencies."""
    out_path_cdx = test_work_dir / "sbom_cdx_test.json"
    out_path_spdx = test_work_dir / "sbom_spdx_test.json"
    _run_python_script(
        [
            SBOMNIX,
            test_nix_drv,
            "--cdx",
            out_path_cdx.as_posix(),
            "--spdx",
            out_path_spdx.as_posix(),
            "--buildtime",
        ]
    )
    assert out_path_cdx.exists()
    assert out_path_spdx.exists()

    cdx_schema_path = RESOURCES_DIR / "cdx_bom-1.4.schema.json"
    assert cdx_schema_path.exists()
    validate_json(out_path_cdx.as_posix(), cdx_schema_path)

    spdx_schema_path = RESOURCES_DIR / "spdx_bom-2.3.schema.json"
    assert spdx_schema_path.exists()
    validate_json(out_path_spdx.as_posix(), spdx_schema_path)


@pytest.mark.slow
def test_sbomnix_depth(_run_python_script, test_nix_drv, test_work_dir):
    """Test sbomnix '--depth' option."""
    out_path_csv_1 = test_work_dir / "sbom_csv_test_1.csv"
    out_path_csv_2 = test_work_dir / "sbom_csv_test_2.csv"
    _run_python_script(
        [
            SBOMNIX,
            test_nix_drv,
            "--buildtime",
            "--csv",
            out_path_csv_1.as_posix(),
            "--depth=2",
        ]
    )
    assert out_path_csv_1.exists()
    df_out_1 = pd.read_csv(out_path_csv_1)
    assert not df_out_1.empty

    _run_python_script(
        [
            SBOMNIX,
            test_nix_drv,
            "--buildtime",
            "--csv",
            out_path_csv_2.as_posix(),
            "--depth=1",
        ]
    )
    assert out_path_csv_2.exists()
    df_out_2 = pd.read_csv(out_path_csv_2)
    assert not df_out_2.empty

    df_diff = df_difference(df_out_1, df_out_2)
    assert not df_diff.empty, df_to_string(df_diff)
    df_right_only = df_diff[df_diff["_merge"] == "right_only"]
    assert df_right_only.empty, df_to_string(df_diff)


@pytest.mark.slow
def test_compare_subsequent_cdx_sboms(_run_python_script, test_nix_drv, test_work_dir):
    """Compare two sbomnix runs with same target produce the same cdx sbom."""
    out_path_cdx_1 = test_work_dir / "sbom_cdx_test_1.json"
    _run_python_script(
        [
            SBOMNIX,
            test_nix_drv,
            "--cdx",
            out_path_cdx_1.as_posix(),
            "--buildtime",
        ]
    )
    assert out_path_cdx_1.exists()

    out_path_cdx_2 = test_work_dir / "sbom_cdx_test_2.json"
    _run_python_script(
        [
            SBOMNIX,
            test_nix_drv,
            "--cdx",
            out_path_cdx_2.as_posix(),
            "--buildtime",
        ]
    )
    assert out_path_cdx_2.exists()

    _run_python_script([COMPARE_SBOMS, out_path_cdx_1, out_path_cdx_2])


@pytest.mark.slow
def test_compare_subsequent_spdx_sboms(_run_python_script, test_nix_drv, test_work_dir):
    """Compare two sbomnix runs with same target produce the same spdx sbom."""
    out_path_spdx_1 = test_work_dir / "sbom_spdx_test_1.json"
    _run_python_script(
        [
            SBOMNIX,
            test_nix_drv,
            "--spdx",
            out_path_spdx_1.as_posix(),
            "--buildtime",
        ]
    )
    assert out_path_spdx_1.exists()

    out_path_spdx_2 = test_work_dir / "sbom_spdx_test_2.json"
    _run_python_script(
        [
            SBOMNIX,
            test_nix_drv,
            "--spdx",
            out_path_spdx_2.as_posix(),
            "--buildtime",
        ]
    )
    assert out_path_spdx_2.exists()

    _run_python_script([COMPARE_SBOMS, out_path_spdx_1, out_path_spdx_2])


@pytest.mark.slow
def test_compare_spdx_and_cdx_sboms(_run_python_script, test_nix_drv, test_work_dir):
    """Compare spdx and cdx sboms from the same sbomnix invocation."""
    out_path_spdx = test_work_dir / "sbom_spdx_test.json"
    out_path_cdx = test_work_dir / "sbom_cdx_test.json"
    _run_python_script(
        [
            SBOMNIX,
            test_nix_drv,
            "--cdx",
            out_path_cdx.as_posix(),
            "--spdx",
            out_path_spdx.as_posix(),
            "--buildtime",
        ]
    )
    assert out_path_cdx.exists()
    assert out_path_spdx.exists()

    _run_python_script([COMPARE_SBOMS, out_path_cdx, out_path_spdx])


================================================
FILE: tests/integration/test_vulnxscan_cli.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""CLI integration tests for vulnxscan."""

import pandas as pd
import pytest

from tests.testpaths import RESOURCES_DIR, VULNXSCAN

# Synthetic CVE committed in tests/resources/grype-test-db.tar.gz.
# It targets sbomnix-test-first==1.0, which is in the test derivation chain.
_SYNTHETIC_CVE = "CVE-TEST-2026-00001"


def test_vulnxscan_help(_run_python_script):
    """Test vulnxscan command line argument: '-h'."""
    _run_python_script([VULNXSCAN, "--help"])


@pytest.mark.network
@pytest.mark.grype
def test_vulnxscan_scan_nix_result(_run_python_script, test_nix_result, test_work_dir):
    """Test vulnxscan scan with the nix result as input."""
    out_path_vulns = test_work_dir / "vulnxscan_test.csv"
    _run_python_script(
        [
            VULNXSCAN,
            test_nix_result.as_posix(),
            "--out",
            out_path_vulns.as_posix(),
        ]
    )
    df = pd.read_csv(out_path_vulns)
    assert _SYNTHETIC_CVE in df["vuln_id"].values, (
        f"{_SYNTHETIC_CVE} not found in scan output — "
        "check grype-test-db.tar.gz matches the test fixture packages"
    )


@pytest.mark.network
@pytest.mark.grype
def test_vulnxscan_scan_sbom(_run_python_script, test_cdx_sbom, test_work_dir):
    """Test vulnxscan scan with SBOM as input."""
    out_path_vulns = test_work_dir / "vulnxscan_test.csv"
    _run_python_script(
        [
            VULNXSCAN,
            "--sbom",
            test_cdx_sbom.as_posix(),
            "--out",
            out_path_vulns.as_posix(),
        ]
    )


@pytest.mark.network
@pytest.mark.grype
def test_vulnxscan_triage(_run_python_script, test_nix_result, test_work_dir):
    """Test vulnxscan scan with --triage."""
    out_path_vulns = test_work_dir / "vulnxscan_test.csv"
    _run_python_script(
        [
            VULNXSCAN,
            "--triage",
            "--out",
            out_path_vulns.as_posix(),
            test_nix_result.as_posix(),
        ]
    )
    df = pd.read_csv(out_path_vulns)
    assert _SYNTHETIC_CVE in df["vuln_id"].values, (
        f"{_SYNTHETIC_CVE} not found in triage output"
    )


@pytest.mark.network
@pytest.mark.grype
def test_vulnxscan_triage_whitelist(_run_python_script, test_nix_result, test_work_dir):
    """Test vulnxscan scan with --triage and --whitelist."""
    # Positive case: CVE is present without --whitelist
    out_no_whitelist = test_work_dir / "vulnxscan_no_whitelist.csv"
    ret_no_wl = _run_python_script(
        [
            VULNXSCAN,
            "--triage",
            "--out",
            out_no_whitelist.as_posix(),
            test_nix_result.as_posix(),
        ],
        capture_output=True,
        text=True,
    )
    assert "Potential vulnerabilities impacting version_local" in ret_no_wl.stderr
    df_no_wl = pd.read_csv(out_no_whitelist)
    assert _SYNTHETIC_CVE in df_no_wl["vuln_id"].values

    # Suppressed case: CVE is whitelisted away
    out_path_vulns = test_work_dir / "vulnxscan_test.csv"
    whitelist_csv = RESOURCES_DIR / "whitelist_all.csv"
    assert whitelist_csv.exists()
    ret = _run_python_script(
        [
            VULNXSCAN,
            "--triage",
            "--whitelist",
            whitelist_csv.as_posix(),
            "--out",
            out_path_vulns.as_posix(),
            test_nix_result.as_posix(),
        ],
        capture_output=True,
        text=True,
    )
    assert "Potential vulnerabilities impacting version_local" not in ret.stderr


================================================
FILE: tests/resources/README.md
================================================
<!--
SPDX-FileCopyrightText: 2022 Technology Innovation Institute (TII)

SPDX-License-Identifier: CC-BY-SA-4.0
-->

# Test resources

## CycloneDX 1.4 json schema

- cdx_bom-1.4.schema.json
- <https://github.com/CycloneDX/specification/blob/6638df1da64d4e4d2122c099db44e344cde0055d/schema/bom-1.4.schema.json>

## CycloneDX 1.3 json schema

- cdx_bom-1.3.schema.json
- <https://github.com/CycloneDX/specification/blob/9b04a94474dfcabafe7d3a9f8db6c7e5eb868adb/schema/bom-1.3.schema.json>

## SPDX 2.3 json schema

- spdx_bom-2.3.schema.json
- <https://github.com/spdx/spdx-spec/blob/214f23d34ee287cb1db5b31c3d571af291e836f3/schemas/spdx-schema.json>

## CycloneDX SPDX companion schema

- spdx.schema.json
- Local minimal schema shim used by offline CycloneDX jsonschema validation

## JSON Signature Format 0.82 schema

- jsf-0.82.schema.json
- Local minimal schema shim used by offline CycloneDX schema validation

## SLSA v1.0 provenance schema

- provenance-1.0.schema.json
- <https://slsa.dev/spec/v1.0/provenance#schema> translated and rewritten into jsonschema format.

## Sample CycloneDX SBOM

- sample_cdx_sbom.json
- Small static SBOM fixture for offline SBOM-input tests


================================================
FILE: tests/resources/cdx_bom-1.3.schema.json
================================================
{
  "$schema": "http://json-schema.org/draft-07/schema#",
  "$id": "http://cyclonedx.org/schema/bom-1.3a.schema.json",
  "type": "object",
  "title": "CycloneDX Software Bill-of-Material Specification",
  "$comment" : "CycloneDX JSON schema is published under the terms of the Apache License 2.0.",
  "required": [
    "bomFormat",
    "specVersion",
    "version"
  ],
  "properties": {
    "bomFormat": {
      "$id": "#/properties/bomFormat",
      "type": "string",
      "title": "BOM Format",
      "description": "Specifies the format of the BOM. This helps to identify the file as CycloneDX since BOMs do not have a filename convention nor does JSON schema support namespaces.",
      "enum": [
        "CycloneDX"
      ]
    },
    "specVersion": {
      "$id": "#/properties/specVersion",
      "type": "string",
      "title": "CycloneDX Specification Version",
      "description": "The version of the CycloneDX specification a BOM is written to (starting at version 1.2)",
      "examples": ["1.3"]
    },
    "serialNumber": {
      "$id": "#/properties/serialNumber",
      "type": "string",
      "title": "BOM Serial Number",
      "description": "Every BOM generated should have a unique serial number, even if the contents of the BOM being generated have not changed over time. The process or tool responsible for creating the BOM should create random UUID's for every BOM generated.",
      "examples": ["urn:uuid:3e671687-395b-41f5-a30f-a58921a69b79"],
      "pattern": "^urn:uuid:[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$"
    },
    "version": {
      "$id": "#/properties/version",
      "type": "integer",
      "title": "BOM Version",
      "description": "The version allows component publishers/authors to make changes to existing BOMs to update various aspects of the document such as description or licenses. When a system is presented with multiple BOMs for the same component, the system should use the most recent version of the BOM. The default version is '1' and should be incremented for each version of the BOM that is published. Each version of a component should have a unique BOM and if no changes are made to the BOMs, then each BOM will have a version of '1'.",
      "default": 1,
      "examples": [1]
    },
    "metadata": {
      "$id": "#/properties/metadata",
      "$ref": "#/definitions/metadata",
      "title": "BOM Metadata",
      "description": "Provides additional information about a BOM."
    },
    "components": {
      "$id": "#/properties/components",
      "type": "array",
      "items": {"$ref": "#/definitions/component"},
      "uniqueItems": true,
      "title": "Components"
    },
    "services": {
      "$id": "#/properties/services",
      "type": "array",
      "items": {"$ref": "#/definitions/service"},
      "uniqueItems": true,
      "title": "Services"
    },
    "externalReferences": {
      "$id": "#/properties/externalReferences",
      "type": "array",
      "items": {"$ref": "#/definitions/externalReference"},
      "title": "External References",
      "description": "External references provide a way to document systems, sites, and information that may be relevant but which are not included with the BOM."
    },
    "dependencies": {
      "$id": "#/properties/dependencies",
      "type": "array",
      "items": {"$ref": "#/definitions/dependency"},
      "uniqueItems": true,
      "title": "Dependencies",
      "description": "Provides the ability to document dependency relationships."
    },
    "compositions": {
      "$id": "#/properties/compositions",
      "type": "array",
      "items": {"$ref": "#/definitions/compositions"},
      "uniqueItems": true,
      "title": "Compositions",
      "description": "Compositions describe constituent parts (including components, services, and dependency relationships) and their completeness."
    }
  },
  "definitions": {
    "metadata": {
      "type": "object",
      "title": "BOM Metadata Object",
      "properties": {
        "timestamp": {
          "type": "string",
          "format": "date-time",
          "title": "Timestamp",
          "description": "The date and time (timestamp) when the document was created."
        },
        "tools": {
          "type": "array",
          "title": "Creation Tools",
          "description": "The tool(s) used in the creation of the BOM.",
          "items": {"$ref": "#/definitions/tool"}
        },
        "authors" :{
          "type": "array",
          "title": "Authors",
          "description": "The person(s) who created the BOM. Authors are common in BOMs created through manual processes. BOMs created through automated means may not have authors.",
          "items": {"$ref": "#/definitions/organizationalContact"}
        },
        "component": {
          "title": "Component",
          "description": "The component that the BOM describes.",
          "$ref": "#/definitions/component"
        },
        "manufacture": {
          "title": "Manufacture",
          "description": "The organization that manufactured the component that the BOM describes.",
          "$ref": "#/definitions/organizationalEntity"
        },
        "supplier": {
          "title": "Supplier",
          "description": " The organization that supplied the component that the BOM describes. The supplier may often be the manufacturer, but may also be a distributor or repackager.",
          "$ref": "#/definitions/organizationalEntity"
        },
        "licenses": {
          "type": "array",
          "title": "BOM License(s)",
          "items": {"$ref": "#/definitions/licenseChoice"}
        },
        "properties": {
          "type": "array",
          "title": "Properties",
          "description": "Provides the ability to document properties in a name-value store. This provides flexibility to include data not officially supported in the standard without having to use additional namespaces or create extensions. Unlike key-value stores, properties support duplicate names, each potentially having different values.",
          "items": {"$ref": "#/definitions/property"}
        }
      }
    },
    "tool": {
      "type": "object",
      "title": "Tool",
      "description": "The tool used to create the BOM.",
      "properties": {
        "vendor": {
          "type": "string",
          "title": "Tool Vendor",
          "description": "The date and time (timestamp) when the document was created."
        },
        "name": {
          "type": "string",
          "title": "Tool Name",
          "description": "The date and time (timestamp) when the document was created."
        },
        "version": {
          "type": "string",
          "title": "Tool Version",
          "description": "The date and time (timestamp) when the document was created."
        },
        "hashes": {
          "$id": "#/definitions/tool/properties/hashes",
          "type": "array",
          "items": {"$ref": "#/definitions/hash"},
          "title": "Hashes",
          "description": "The hashes of the tool (if applicable)."
        }
      }
    },
    "organizationalEntity": {
      "type": "object",
      "title": "Organizational Entity Object",
      "description": "",
      "properties": {
        "name": {
          "type": "string",
          "title": "Name",
          "description": "The name of the organization",
          "examples": [
            "Example Inc."
          ]
        },
        "url": {
          "type": "array",
          "items": {
            "type": "string",
            "format": "iri-reference"
          },
          "title": "URL",
          "description": "The URL of the organization. Multiple URLs are allowed.",
          "examples": ["https://example.com"]
        },
        "contact": {
          "type": "array",
          "title": "Contact",
          "description": "A contact at the organization. Multiple contacts are allowed.",
          "items": {"$ref": "#/definitions/organizationalContact"}
        }
      }
    },
    "organizationalContact": {
      "type": "object",
      "title": "Organizational Contact Object",
      "description": "",
      "properties": {
        "name": {
          "type": "string",
          "title": "Name",
          "description": "The name of a contact",
          "examples": ["Contact name"]
        },
        "email": {
          "type": "string",
          "title": "Email Address",
          "description": "The email address of the contact.",
          "examples": ["firstname.lastname@example.com"]
        },
        "phone": {
          "type": "string",
          "title": "Phone",
          "description": "The phone number of the contact.",
          "examples": ["800-555-1212"]
        }
      }
    },
    "component": {
      "type": "object",
      "title": "Component Object",
      "required": [
        "type",
        "name",
        "version"
      ],
      "properties": {
        "type": {
          "type": "string",
          "enum": [
            "application",
            "framework",
            "library",
            "container",
            "operating-system",
            "device",
            "firmware",
            "file"
          ],
          "title": "Component Type",
          "description": "Specifies the type of component. For software components, classify as application if no more specific appropriate classification is available or cannot be determined for the component.",
          "examples": ["library"]
        },
        "mime-type": {
          "type": "string",
          "title": "Mime-Type",
          "description": "The optional mime-type of the component. When used on file components, the mime-type can provide additional context about the kind of file being represented such as an image, font, or executable. Some library or framework components may also have an associated mime-type.",
          "examples": ["image/jpeg"],
          "pattern": "^[-+a-z0-9.]+/[-+a-z0-9.]+$"
        },
        "bom-ref": {
          "type": "string",
          "title": "BOM Reference",
          "description": "An optional identifier which can be used to reference the component elsewhere in the BOM. Every bom-ref should be unique."
        },
        "supplier": {
          "title": "Component Supplier",
          "description": " The organization that supplied the component. The supplier may often be the manufacturer, but may also be a distributor or repackager.",
          "$ref": "#/definitions/organizationalEntity"
        },
        "author": {
          "type": "string",
          "title": "Component Author",
          "description": "The person(s) or organization(s) that authored the component",
          "examples": ["Acme Inc"]
        },
        "publisher": {
          "type": "string",
          "title": "Component Publisher",
          "description": "The person(s) or organization(s) that published the component",
          "examples": ["Acme Inc"]
        },
        "group": {
          "type": "string",
          "title": "Component Group",
          "description": "The grouping name or identifier. This will often be a shortened, single name of the company or project that produced the component, or the source package or domain name. Whitespace and special characters should be avoided. Examples include: apache, org.apache.commons, and apache.org.",
          "examples": ["com.acme"]
        },
        "name": {
          "type": "string",
          "title": "Component Name",
          "description": "The name of the component. This will often be a shortened, single name of the component. Examples: commons-lang3 and jquery",
          "examples": ["tomcat-catalina"]
        },
        "version": {
          "type": "string",
          "title": "Component Version",
          "description": "The component version. The version should ideally comply with semantic versioning but is not enforced.",
          "examples": ["9.0.14"]
        },
        "description": {
          "type": "string",
          "title": "Component Description",
          "description": "Specifies a description for the component"
        },
        "scope": {
          "type": "string",
          "enum": [
            "required",
            "optional",
            "excluded"
          ],
          "title": "Component Scope",
          "description": "Specifies the scope of the component. If scope is not specified, 'required' scope should be assumed by the consumer of the BOM",
          "default": "required"
        },
        "hashes": {
          "type": "array",
          "title": "Component Hashes",
          "items": {"$ref": "#/definitions/hash"}
        },
        "licenses": {
          "type": "array",
          "items": {"$ref": "#/definitions/licenseChoice"},
          "title": "Component License(s)"
        },
        "copyright": {
          "type": "string",
          "title": "Component Copyright",
          "description": "An optional copyright notice informing users of the underlying claims to copyright ownership in a published work.",
          "examples": ["Acme Inc"]
        },
        "cpe": {
          "type": "string",
          "title": "Component Common Platform Enumeration (CPE)",
          "description": "DEPRECATED - DO NOT USE. This will be removed in a future version. Specifies a well-formed CPE name. See https://nvd.nist.gov/products/cpe",
          "examples": ["cpe:2.3:a:acme:component_framework:-:*:*:*:*:*:*:*"]
        },
        "purl": {
          "type": "string",
          "title": "Component Package URL (purl)",
          "examples": ["pkg:maven/com.acme/tomcat-catalina@9.0.14?packaging=jar"]
        },
        "swid": {
          "$ref": "#/definitions/swid",
          "title": "SWID Tag",
          "description": "Specifies metadata and content for ISO-IEC 19770-2 Software Identification (SWID) Tags."
        },
        "modified": {
          "type": "boolean",
          "title": "Component Modified From Original",
          "description": "DEPRECATED - DO NOT USE. This will be removed in a future version. Use the pedigree element instead to supply information on exactly how the component was modified. A boolean value indicating is the component has been modified from the original. A value of true indicates the component is a derivative of the original. A value of false indicates the component has not been modified from the original."
        },
        "pedigree": {
          "type": "object",
          "title": "Component Pedigree",
          "description": "Component pedigree is a way to document complex supply chain scenarios where components are created, distributed, modified, redistributed, combined with other components, etc. Pedigree supports viewing this complex chain from the beginning, the end, or anywhere in the middle. It also provides a way to document variants where the exact relation may not be known.",
          "properties": {
            "ancestors": {
              "type": "array",
              "title": "Ancestors",
              "description": "Describes zero or more components in which a component is derived from. This is commonly used to describe forks from existing projects where the forked version contains a ancestor node containing the original component it was forked from. For example, Component A is the original component. Component B is the component being used and documented in the BOM. However, Component B contains a pedigree node with a single ancestor documenting Component A - the original component from which Component B is derived from.",
              "items": {"$ref": "#/definitions/component"}
            },
            "descendants": {
              "type": "array",
              "title": "Descendants",
              "description": "Descendants are the exact opposite of ancestors. This provides a way to document all forks (and their forks) of an original or root component.",
              "items": {"$ref": "#/definitions/component"}
            },
            "variants": {
              "type": "array",
              "title": "Variants",
              "description": "Variants describe relations where the relationship between the components are not known. For example, if Component A contains nearly identical code to Component B. They are both related, but it is unclear if one is derived from the other, or if they share a common ancestor.",
              "items": {"$ref": "#/definitions/component"}
            },
            "commits": {
              "type": "array",
              "title": "Commits",
              "description": "A list of zero or more commits which provide a trail describing how the component deviates from an ancestor, descendant, or variant.",
              "items": {"$ref": "#/definitions/commit"}
            },
            "patches": {
              "type": "array",
              "title": "Patches",
              "description": ">A list of zero or more patches describing how the component deviates from an ancestor, descendant, or variant. Patches may be complimentary to commits or may be used in place of commits.",
              "items": {"$ref": "#/definitions/patch"}
            },
            "notes": {
              "type": "string",
              "title": "Notes",
              "description": "Notes, observations, and other non-structured commentary describing the components pedigree."
            }
          }
        },
        "externalReferences": {
          "type": "array",
          "items": {"$ref": "#/definitions/externalReference"},
          "title": "External References"
        },
        "components": {
          "$id": "#/definitions/component/properties/components",
          "type": "array",
          "items": {"$ref": "#/definitions/component"},
          "uniqueItems": true,
          "title": "Components"
        },
        "evidence": {
          "$ref": "#/definitions/componentEvidence",
          "title": "Evidence",
          "description": "Provides the ability to document evidence collected through various forms of extraction or analysis."
        },
        "properties": {
          "type": "array",
          "title": "Properties",
          "description": "Provides the ability to document properties in a name-value store. This provides flexibility to include data not officially supported in the standard without having to use additional namespaces or create extensions. Unlike key-value stores, properties support duplicate names, each potentially having different values.",
          "items": {"$ref": "#/definitions/property"}
        }
      }
    },
    "swid": {
      "type": "object",
      "title": "SWID Tag",
      "description": "Specifies metadata and content for ISO-IEC 19770-2 Software Identification (SWID) Tags.",
      "required": [
        "tagId",
        "name"
      ],
      "properties": {
        "tagId": {
          "type": "string",
          "title": "Tag ID",
          "description": "Maps to the tagId of a SoftwareIdentity."
        },
        "name": {
          "type": "string",
          "title": "Name",
          "description": "Maps to the name of a SoftwareIdentity."
        },
        "version": {
          "type": "string",
          "title": "Version",
          "default": "0.0",
          "description": "Maps to the version of a SoftwareIdentity."
        },
        "tagVersion": {
          "type": "integer",
          "title": "Tag Version",
          "default": 0,
          "description": "Maps to the tagVersion of a SoftwareIdentity."
        },
        "patch": {
          "type": "boolean",
          "title": "Patch",
          "default": false,
          "description": "Maps to the patch of a SoftwareIdentity."
        },
        "text": {
          "title": "Attachment text",
          "description": "Specifies the metadata and content of the SWID tag.",
          "$ref": "#/definitions/attachment"
        },
        "url": {
          "type": "string",
          "title": "URL",
          "description": "The URL to the SWID file.",
          "format": "iri-reference"
        }
      }
    },
    "attachment": {
      "type": "object",
      "title": "Attachment",
      "description": "Specifies the metadata and content for an attachment.",
      "required": [
        "content"
      ],
      "properties": {
        "contentType": {
          "type": "string",
          "title": "Content-Type",
          "description": "Specifies the content type of the text. Defaults to text/plain if not specified.",
          "default": "text/plain"
        },
        "encoding": {
          "type": "string",
          "title": "Encoding",
          "description": "Specifies the optional encoding the text is represented in.",
          "enum": [
            "base64"
          ]
        },
        "content": {
          "type": "string",
          "title": "Attachment Text",
          "description": "The attachment data"
        }
      }
    },
    "hash": {
      "type": "object",
      "title": "Hash Objects",
      "required": [
        "alg",
        "content"
      ],
      "properties": {
        "alg": {
          "$ref": "#/definitions/hash-alg"
        },
        "content": {
          "$ref": "#/definitions/hash-content"
        }
      }
    },
    "hash-alg": {
      "type": "string",
      "enum": [
        "MD5",
        "SHA-1",
        "SHA-256",
        "SHA-384",
        "SHA-512",
        "SHA3-256",
        "SHA3-384",
        "SHA3-512",
        "BLAKE2b-256",
        "BLAKE2b-384",
        "BLAKE2b-512",
        "BLAKE3"
      ],
      "title": "Hash Algorithm"
    },
    "hash-content": {
      "type": "string",
      "title": "Hash Content (value)",
      "examples": ["3942447fac867ae5cdb3229b658f4d48"],
      "pattern": "^([a-fA-F0-9]{32}|[a-fA-F0-9]{40}|[a-fA-F0-9]{64}|[a-fA-F0-9]{96}|[a-fA-F0-9]{128})$"
    },
    "license": {
      "type": "object",
      "title": "License Object",
      "oneOf": [
        {
          "required": ["id"]
        },
        {
          "required": ["name"]
        }
      ],
      "properties": {
        "id": {
          "$ref": "spdx.schema.json",
          "title": "License ID (SPDX)",
          "description": "A valid SPDX license ID",
          "examples": ["Apache-2.0"]
        },
        "name": {
          "type": "string",
          "title": "License Name",
          "description": "If SPDX does not define the license used, this field may be used to provide the license name",
          "examples": ["Acme Software License"]
        },
        "text": {
          "title": "License text",
          "description": "An optional way to include the textual content of a license.",
          "$ref": "#/definitions/attachment"
        },
        "url": {
          "type": "string",
          "title": "License URL",
          "description": "The URL to the license file. If specified, a 'license' externalReference should also be specified for completeness",
          "examples": ["https://www.apache.org/licenses/LICENSE-2.0.txt"],
          "format": "iri-reference"
        }
      }
    },
    "licenseChoice": {
      "type": "object",
      "title": "License(s)",
      "properties": {
        "license": {
          "$ref": "#/definitions/license"
        },
        "expression": {
          "type": "string",
          "title": "SPDX License Expression",
          "examples": [
            "Apache-2.0 AND (MIT OR GPL-2.0-only)",
            "GPL-3.0-only WITH Classpath-exception-2.0"
          ]
        }
      },
      "oneOf":[
        {
          "required": ["license"]
        },
        {
          "required": ["expression"]
        }
      ]
    },
    "commit": {
      "type": "object",
      "title": "Commit",
      "description": "Specifies an individual commit",
      "properties": {
        "uid": {
          "type": "string",
          "title": "UID",
          "description": "A unique identifier of the commit. This may be version control specific. For example, Subversion uses revision numbers whereas git uses commit hashes."
        },
        "url": {
          "type": "string",
          "title": "URL",
          "description": "The URL to the commit. This URL will typically point to a commit in a version control system.",
          "format": "iri-reference"
        },
        "author": {
          "title": "Author",
          "description": "The author who created the changes in the commit",
          "$ref": "#/definitions/identifiableAction"
        },
        "committer": {
          "title": "Committer",
          "description": "The person who committed or pushed the commit",
          "$ref": "#/definitions/identifiableAction"
        },
        "message": {
          "type": "string",
          "title": "Message",
          "description": "The text description of the contents of the commit"
        }
      }
    },
    "patch": {
      "type": "object",
      "title": "Patch",
      "description": "Specifies an individual patch",
      "required": [
        "type"
      ],
      "properties": {
        "type": {
          "type": "string",
          "enum": [
            "unofficial",
            "monkey",
            "backport",
            "cherry-pick"
          ],
          "title": "Type",
          "description": "Specifies the purpose for the patch including the resolution of defects, security issues, or new behavior or functionality"
        },
        "diff": {
          "title": "Diff",
          "description": "The patch file (or diff) that show changes. Refer to https://en.wikipedia.org/wiki/Diff",
          "$ref": "#/definitions/diff"
        },
        "resolves": {
          "type": "array",
          "items": {"$ref": "#/definitions/issue"},
          "title": "Resolves",
          "description": "A collection of issues the patch resolves"
        }
      }
    },
    "diff": {
      "type": "object",
      "title": "Diff",
      "description": "The patch file (or diff) that show changes. Refer to https://en.wikipedia.org/wiki/Diff",
      "properties": {
        "text": {
          "title": "Diff text",
          "description": "Specifies the optional text of the diff",
          "$ref": "#/definitions/attachment"
        },
        "url": {
          "type": "string",
          "title": "URL",
          "description": "Specifies the URL to the diff",
          "format": "iri-reference"
        }
      }
    },
    "issue": {
      "type": "object",
      "title": "Diff",
      "description": "The patch file (or diff) that show changes. Refer to https://en.wikipedia.org/wiki/Diff",
      "required": [
        "type"
      ],
      "properties": {
        "type": {
          "type": "string",
          "enum": [
            "defect",
            "enhancement",
            "security"
          ],
          "title": "Type",
          "description": "Specifies the type of issue"
        },
        "id": {
          "type": "string",
          "title": "ID",
          "description": "The identifier of the issue assigned by the source of the issue"
        },
        "name": {
          "type": "string",
          "title": "Name",
          "description": "The name of the issue"
        },
        "description": {
          "type": "string",
          "title": "Description",
          "description": "A description of the issue"
        },
        "source": {
          "type": "object",
          "title": "Source",
          "description": "The source of the issue where it is documented",
          "properties": {
            "name": {
              "type": "string",
              "title": "Name",
              "description": "The name of the source. For example 'National Vulnerability Database', 'NVD', and 'Apache'"
            },
            "url": {
              "type": "string",
              "title": "URL",
              "description": "The url of the issue documentation as provided by the source",
              "format": "iri-reference"
            }
          }
        },
        "references": {
          "type": "array",
          "items": {
            "type": "string",
            "format": "iri-reference"
          },
          "title": "References",
          "description": "A collection of URL's for reference. Multiple URLs are allowed.",
          "examples": ["https://example.com"]
        }
      }
    },
    "identifiableAction": {
      "type": "object",
      "title": "Identifiable Action",
      "description": "Specifies an individual commit",
      "properties": {
        "timestamp": {
          "type": "string",
          "format": "date-time",
          "title": "Timestamp",
          "description": "The timestamp in which the action occurred"
        },
        "name": {
          "type": "string",
          "title": "Name",
          "description": "The name of the individual who performed the action"
        },
        "email": {
          "type": "string",
          "format": "idn-email",
          "title": "E-mail",
          "description": "The email address of the individual who performed the action"
        }
      }
    },
    "externalReference": {
      "type": "object",
      "title": "External Reference",
      "description": "Specifies an individual external reference",
      "required": [
        "url",
        "type"
      ],
      "properties": {
        "url": {
          "type": "string",
          "title": "URL",
          "description": "The URL to the external reference",
          "format": "iri-reference"
        },
        "comment": {
          "type": "string",
          "title": "Comment",
          "description": "An optional comment describing the external reference"
        },
        "type": {
          "type": "string",
          "title": "Type",
          "description": "Specifies the type of external reference. There are built-in types to describe common references. If a type does not exist for the reference being referred to, use the \"other\" type.",
          "enum": [
            "vcs",
            "issue-tracker",
            "website",
            "advisories",
            "bom",
            "mailing-list",
            "social",
            "chat",
            "documentation",
            "support",
            "distribution",
            "license",
            "build-meta",
            "build-system",
            "other"
          ]
        },
        "hashes": {
          "$id": "#/definitions/externalReference/properties/hashes",
          "type": "array",
          "items": {"$ref": "#/definitions/hash"},
          "title": "Hashes",
          "description": "The hashes of the external reference (if applicable)."
        }
      }
    },
    "dependency": {
      "type": "object",
      "title": "Dependency",
      "description": "Defines the direct dependencies of a component. Components that do not have their own dependencies MUST be declared as empty elements within the graph. Components that are not represented in the dependency graph MAY have unknown dependencies. It is RECOMMENDED that implementations assume this to be opaque and not an indicator of a component being dependency-free.",
      "required": [
        "ref"
      ],
      "properties": {
        "ref": {
          "type": "string",
          "title": "Reference",
          "description": "References a component by the components bom-ref attribute"
        },
        "dependsOn": {
          "type": "array",
          "uniqueItems": true,
          "items": {
            "type": "string"
          },
          "title": "Depends On",
          "description": "The bom-ref identifiers of the components that are dependencies of this dependency object."
        }
      }
    },
    "service": {
      "type": "object",
      "title": "Service Object",
      "required": [
        "name"
      ],
      "properties": {
        "bom-ref": {
          "type": "string",
          "title": "BOM Reference",
          "description": "An optional identifier which can be used to reference the service elsewhere in the BOM. Every bom-ref should be unique."
        },
        "provider": {
          "title": "Provider",
          "description": "The organization that provides the service.",
          "$ref": "#/definitions/organizationalEntity"
        },
        "group": {
          "type": "string",
          "title": "Service Group",
          "description": "The grouping name, namespace, or identifier. This will often be a shortened, single name of the company or project that produced the service or domain name. Whitespace and special characters should be avoided.",
          "examples": ["com.acme"]
        },
        "name": {
          "type": "string",
          "title": "Service Name",
          "description": "The name of the service. This will often be a shortened, single name of the service.",
          "examples": ["ticker-service"]
        },
        "version": {
          "type": "string",
          "title": "Service Version",
          "description": "The service version.",
          "examples": ["1.0.0"]
        },
        "description": {
          "type": "string",
          "title": "Service Description",
          "description": "Specifies a description for the service"
        },
        "endpoints": {
          "type": "array",
          "items": {
            "type": "string",
            "format": "iri-reference"
          },
          "title": "Endpoints",
          "description": "The endpoint URIs of the service. Multiple endpoints are allowed.",
          "examples": ["https://example.com/api/v1/ticker"]
        },
        "authenticated": {
          "type": "boolean",
          "title": "Authentication Required",
          "description": "A boolean value indicating if the service requires authentication. A value of true indicates the service requires authentication prior to use. A value of false indicates the service does not require authentication."
        },
        "x-trust-boundary": {
          "type": "boolean",
          "title": "Crosses Trust Boundary",
          "description": "A boolean value indicating if use of the service crosses a trust zone or boundary. A value of true indicates that by using the service, a trust boundary is crossed. A value of false indicates that by using the service, a trust boundary is not crossed."
        },
        "data": {
          "type": "array",
          "items": {"$ref": "#/definitions/dataClassification"},
          "title": "Data Classification",
          "description": "Specifies the data classification."
        },
        "licenses": {
          "type": "array",
          "items": {"$ref": "#/definitions/licenseChoice"},
          "title": "Component License(s)"
        },
        "externalReferences": {
          "type": "array",
          "items": {"$ref": "#/definitions/externalReference"},
          "title": "External References"
        },
        "services": {
          "$id": "#/definitions/service/properties/services",
          "type": "array",
          "items": {"$ref": "#/definitions/service"},
          "uniqueItems": true,
          "title": "Services"
        },
        "properties": {
          "type": "array",
          "title": "Properties",
          "description": "Provides the ability to document properties in a name-value store. This provides flexibility to include data not officially supported in the standard without having to use additional namespaces or create extensions. Unlike key-value stores, properties support duplicate names, each potentially having different values.",
          "items": {"$ref": "#/definitions/property"}
        }
      }
    },
    "dataClassification": {
      "type": "object",
      "title": "Hash Objects",
      "required": [
        "flow",
        "classification"
      ],
      "properties": {
        "flow": {
          "$ref": "#/definitions/dataFlow"
        },
        "classification": {
          "type": "string"
        }
      }
    },
    "dataFlow": {
      "type": "string",
      "enum": [
        "inbound",
        "outbound",
        "bi-directional",
        "unknown"
      ],
      "title": "Data flow direction"
    },

    "copyright": {
      "type": "object",
      "title": "Copyright",
      "required": [
        "text"
      ],
      "properties": {
        "text": {
          "type": "string",
          "title": "Copyright Text"
        }
      }
    },

    "componentEvidence": {
      "type": "object",
      "title": "Evidence",
      "description": "Provides the ability to document evidence collected through various forms of extraction or analysis.",
      "properties": {
        "licenses": {
          "type": "array",
          "items": {"$ref": "#/definitions/licenseChoice"},
          "title": "Component License(s)"
        },
        "copyright": {
          "type": "array",
          "items": {"$ref": "#/definitions/copyright"},
          "title": "Copyright"
        }
      }
    },
    "compositions": {
      "type": "object",
      "title": "Compositions",
      "required": [
        "aggregate"
      ],
      "properties": {
        "aggregate": {
          "$ref": "#/definitions/aggregateType",
          "title": "Aggregate",
          "description": "Specifies an aggregate type that describe how complete a relationship is."
        },
        "assemblies": {
          "type": "array",
          "uniqueItems": true,
          "items": {
            "type": "string"
          },
          "title": "BOM references",
          "description": "The bom-ref identifiers of the components or services being described. Assemblies refer to nested relationships whereby a constituent part may include other constituent parts. References do not cascade to child parts. References are explicit for the specified constituent part only."
        },
        "dependencies": {
          "type": "array",
          "uniqueItems": true,
          "items": {
            "type": "string"
          },
          "title": "BOM references",
          "description": "The bom-ref identifiers of the components or services being described. Dependencies refer to a relationship whereby an independent constituent part requires another independent constituent part. References do not cascade to transitive dependencies. References are explicit for the specified dependency only."
        }
      }
    },
    "aggregateType": {
      "type": "string",
      "default": "not_specified",
      "enum": [
        "complete",
        "incomplete",
        "incomplete_first_party_only",
        "incomplete_third_party_only",
        "unknown",
        "not_specified"
      ]
    },
    "property": {
      "type": "object",
      "title": "Lightweight name-value pair",
      "properties": {
        "name": {
          "type": "string",
          "title": "Name",
          "description": "The name of the property. Duplicate names are allowed, each potentially having a different value."
        },
        "value": {
          "type": "string",
          "title": "Value",
          "description": "The value of the property."
        }
      }
    }
  }
}


================================================
FILE: tests/resources/cdx_bom-1.4.schema.json
================================================
{
  "$schema": "http://json-schema.org/draft-07/schema#",
  "$id": "http://cyclonedx.org/schema/bom-1.4.schema.json",
  "type": "object",
  "title": "CycloneDX Software Bill of Materials Standard",
  "$comment" : "CycloneDX JSON schema is published under the terms of the Apache License 2.0.",
  "required": [
    "bomFormat",
    "specVersion",
    "version"
  ],
  "additionalProperties": false,
  "properties": {
    "$schema": {
      "type": "string",
      "enum": [
        "http://cyclonedx.org/schema/bom-1.4.schema.json"
      ]
    },
    "bomFormat": {
      "type": "string",
      "title": "BOM Format",
      "description": "Specifies the format of the BOM. This helps to identify the file as CycloneDX since BOMs do not have a filename convention nor does JSON schema support namespaces. This value MUST be \"CycloneDX\".",
      "enum": [
        "CycloneDX"
      ]
    },
    "specVersion": {
      "type": "string",
      "title": "CycloneDX Specification Version",
      "description": "The version of the CycloneDX specification a BOM conforms to (starting at version 1.2).",
      "examples": ["1.4"]
    },
    "serialNumber": {
      "type": "string",
      "title": "BOM Serial Number",
      "description": "Every BOM generated SHOULD have a unique serial number, even if the contents of the BOM have not changed over time. If specified, the serial number MUST conform to RFC-4122. Use of serial numbers are RECOMMENDED.",
      "examples": ["urn:uuid:3e671687-395b-41f5-a30f-a58921a69b79"],
      "pattern": "^urn:uuid:[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$"
    },
    "version": {
      "type": "integer",
      "title": "BOM Version",
      "description": "Whenever an existing BOM is modified, either manually or through automated processes, the version of the BOM SHOULD be incremented by 1. When a system is presented with multiple BOMs with identical serial numbers, the system SHOULD use the most recent version of the BOM. The default version is '1'.",
      "default": 1,
      "examples": [1]
    },
    "metadata": {
      "$ref": "#/definitions/metadata",
      "title": "BOM Metadata",
      "description": "Provides additional information about a BOM."
    },
    "components": {
      "type": "array",
      "additionalItems": false,
      "items": {"$ref": "#/definitions/component"},
      "uniqueItems": true,
      "title": "Components",
      "description": "A list of software and hardware components."
    },
    "services": {
      "type": "array",
      "additionalItems": false,
      "items": {"$ref": "#/definitions/service"},
      "uniqueItems": true,
      "title": "Services",
      "description": "A list of services. This may include microservices, function-as-a-service, and other types of network or intra-process services."
    },
    "externalReferences": {
      "type": "array",
      "additionalItems": false,
      "items": {"$ref": "#/definitions/externalReference"},
      "title": "External References",
      "description": "External references provide a way to document systems, sites, and information that may be relevant but which are not included with the BOM."
    },
    "dependencies": {
      "type": "array",
      "additionalItems": false,
      "items": {"$ref": "#/definitions/dependency"},
      "uniqueItems": true,
      "title": "Dependencies",
      "description": "Provides the ability to document dependency relationships."
    },
    "compositions": {
      "type": "array",
      "additionalItems": false,
      "items": {"$ref": "#/definitions/compositions"},
      "uniqueItems": true,
      "title": "Compositions",
      "description": "Compositions describe constituent parts (including components, services, and dependency relationships) and their completeness."
    },
    "vulnerabilities": {
      "type": "array",
      "additionalItems": false,
      "items": {"$ref": "#/definitions/vulnerability"},
      "uniqueItems": true,
      "title": "Vulnerabilities",
      "description": "Vulnerabilities identified in components or services."
    },
    "signature": {
      "$ref": "#/definitions/signature",
      "title": "Signature",
      "description": "Enveloped signature in [JSON Signature Format (JSF)](https://cyberphone.github.io/doc/security/jsf.html)."
    }
  },
  "definitions": {
    "refType": {
      "$comment": "Identifier-DataType for interlinked elements.",
      "type": "string"
    },
    "metadata": {
      "type": "object",
      "title": "BOM Metadata Object",
      "additionalProperties": false,
      "properties": {
        "timestamp": {
          "type": "string",
          "format": "date-time",
          "title": "Timestamp",
          "description": "The date and time (timestamp) when the BOM was created."
        },
        "tools": {
          "type": "array",
          "title": "Creation Tools",
          "description": "The tool(s) used in the creation of the BOM.",
          "additionalItems": false,
          "items": {"$ref": "#/definitions/tool"}
        },
        "authors" :{
          "type": "array",
          "title": "Authors",
          "description": "The person(s) who created the BOM. Authors are common in BOMs created through manual processes. BOMs created through automated means may not have authors.",
          "additionalItems": false,
          "items": {"$ref": "#/definitions/organizationalContact"}
        },
        "component": {
          "title": "Component",
          "description": "The component that the BOM describes.",
          "$ref": "#/definitions/component"
        },
        "manufacture": {
          "title": "Manufacture",
          "description": "The organization that manufactured the component that the BOM describes.",
          "$ref": "#/definitions/organizationalEntity"
        },
        "supplier": {
          "title": "Supplier",
          "description": " The organization that supplied the component that the BOM describes. The supplier may often be the manufacturer, but may also be a distributor or repackager.",
          "$ref": "#/definitions/organizationalEntity"
        },
        "licenses": {
          "type": "array",
          "title": "BOM License(s)",
          "additionalItems": false,
          "items": {"$ref": "#/definitions/licenseChoice"}
        },
        "properties": {
          "type": "array",
          "title": "Properties",
          "description": "Provides the ability to document properties in a name-value store. This provides flexibility to include data not officially supported in the standard without having to use additional namespaces or create extensions. Unlike key-value stores, properties support duplicate names, each potentially having different values. Property names of interest to the general public are encouraged to be registered in the [CycloneDX Property Taxonomy](https://github.com/CycloneDX/cyclonedx-property-taxonomy). Formal registration is OPTIONAL.",
          "additionalItems": false,
          "items": {"$ref": "#/definitions/property"}
        }
      }
    },
    "tool": {
      "type": "object",
      "title": "Tool",
      "description": "Information about the automated or manual tool used",
      "additionalProperties": false,
      "properties": {
        "vendor": {
          "type": "string",
          "title": "Tool Vendor",
          "description": "The name of the vendor who created the tool"
        },
        "name": {
          "type": "string",
          "title": "Tool Name",
          "description": "The name of the tool"
        },
        "version": {
          "type": "string",
          "title": "Tool Version",
          "description": "The version of the tool"
        },
        "hashes": {
          "type": "array",
          "additionalItems": false,
          "items": {"$ref": "#/definitions/hash"},
          "title": "Hashes",
          "description": "The hashes of the tool (if applicable)."
        },
        "externalReferences": {
          "type": "array",
          "additionalItems": false,
          "items": {"$ref": "#/definitions/externalReference"},
          "title": "External References",
          "description": "External references provide a way to document systems, sites, and information that may be relevant but which are not included with the BOM."
        }
      }
    },
    "organizationalEntity": {
      "type": "object",
      "title": "Organizational Entity Object",
      "description": "",
      "additionalProperties": false,
      "properties": {
        "name": {
          "type": "string",
          "title": "Name",
          "description": "The name of the organization",
          "examples": [
            "Example Inc."
          ]
        },
        "url": {
          "type": "array",
          "items": {
            "type": "string",
            "format": "iri-reference"
          },
          "title": "URL",
          "description": "The URL of the organization. Multiple URLs are allowed.",
          "examples": ["https://example.com"]
        },
        "contact": {
          "type": "array",
          "title": "Contact",
          "description": "A contact at the organization. Multiple contacts are allowed.",
          "additionalItems": false,
          "items": {"$ref": "#/definitions/organizationalContact"}
        }
      }
    },
    "organizationalContact": {
      "type": "object",
      "title": "Organizational Contact Object",
      "description": "",
      "additionalProperties": false,
      "properties": {
        "name": {
          "type": "string",
          "title": "Name",
          "description": "The name of a contact",
          "examples": ["Contact name"]
        },
        "email": {
          "type": "string",
          "format": "idn-email",
          "title": "Email Address",
          "description": "The email address of the contact.",
          "examples": ["firstname.lastname@example.com"]
        },
        "phone": {
          "type": "string",
          "title": "Phone",
          "description": "The phone number of the contact.",
          "examples": ["800-555-1212"]
        }
      }
    },
    "component": {
      "type": "object",
      "title": "Component Object",
      "required": [
        "type",
        "name"
      ],
      "additionalProperties": false,
      "properties": {
        "type": {
          "type": "string",
          "enum": [
            "application",
            "framework",
            "library",
            "container",
            "operating-system",
            "device",
            "firmware",
            "file"
          ],
          "title": "Component Type",
          "description": "Specifies the type of component. For software components, classify as application if no more specific appropriate classification is available or cannot be determined for the component. Types include:\n\n* __application__ = A software application. Refer to [https://en.wikipedia.org/wiki/Application_software](https://en.wikipedia.org/wiki/Application_software) for information about applications.\n* __framework__ = A software framework. Refer to [https://en.wikipedia.org/wiki/Software_framework](https://en.wikipedia.org/wiki/Software_framework) for information on how frameworks vary slightly from libraries.\n* __library__ = A software library. Refer to [https://en.wikipedia.org/wiki/Library_(computing)](https://en.wikipedia.org/wiki/Library_(computing))\n for information about libraries. All third-party and open source reusable components will likely be a library. If the library also has key features of a framework, then it should be classified as a framework. If not, or is unknown, then specifying library is RECOMMENDED.\n* __container__ = A packaging and/or runtime format, not specific to any particular technology, which isolates software inside the container from software outside of a container through virtualization technology. Refer to [https://en.wikipedia.org/wiki/OS-level_virtualization](https://en.wikipedia.org/wiki/OS-level_virtualization)\n* __operating-system__ = A software operating system without regard to deployment model (i.e. installed on physical hardware, virtual machine, image, etc) Refer to [https://en.wikipedia.org/wiki/Operating_system](https://en.wikipedia.org/wiki/Operating_system)\n* __device__ = A hardware device such as a processor, or chip-set. A hardware device containing firmware SHOULD include a component for the physical hardware itself, and another component of type 'firmware' or 'operating-system' (whichever is relevant), describing information about the software running on the device.\n  See also the list of [known device properties](https://github.com/CycloneDX/cyclonedx-property-taxonomy/blob/main/cdx/device.md).\n* __firmware__ = A special type of software that provides low-level control over a devices hardware. Refer to [https://en.wikipedia.org/wiki/Firmware](https://en.wikipedia.org/wiki/Firmware)\n* __file__ = A computer file. Refer to [https://en.wikipedia.org/wiki/Computer_file](https://en.wikipedia.org/wiki/Computer_file) for information about files.",
          "examples": ["library"]
        },
        "mime-type": {
          "type": "string",
          "title": "Mime-Type",
          "description": "The optional mime-type of the component. When used on file components, the mime-type can provide additional context about the kind of file being represented such as an image, font, or executable. Some library or framework components may also have an associated mime-type.",
          "examples": ["image/jpeg"],
          "pattern": "^[-+a-z0-9.]+/[-+a-z0-9.]+$"
        },
        "bom-ref": {
          "$ref": "#/definitions/refType",
          "title": "BOM Reference",
          "description": "An optional identifier which can be used to reference the component elsewhere in the BOM. Every bom-ref MUST be unique within the BOM."
        },
        "supplier": {
          "title": "Component Supplier",
          "description": " The organization that supplied the component. The supplier may often be the manufacturer, but may also be a distributor or repackager.",
          "$ref": "#/definitions/organizationalEntity"
        },
        "author": {
          "type": "string",
          "title": "Component Author",
          "description": "The person(s) or organization(s) that authored the component",
          "examples": ["Acme Inc"]
        },
        "publisher": {
          "type": "string",
          "title": "Component Publisher",
          "description": "The person(s) or organization(s) that published the component",
          "examples": ["Acme Inc"]
        },
        "group": {
          "type": "string",
          "title": "Component Group",
          "description": "The grouping name or identifier. This will often be a shortened, single name of the company or project that produced the component, or the source package or domain name. Whitespace and special characters should be avoided. Examples include: apache, org.apache.commons, and apache.org.",
          "examples": ["com.acme"]
        },
        "name": {
          "type": "string",
          "title": "Component Name",
          "description": "The name of the component. This will often be a shortened, single name of the component. Examples: commons-lang3 and jquery",
          "examples": ["tomcat-catalina"]
        },
        "version": {
          "type": "string",
          "title": "Component Version",
          "description": "The component version. The version should ideally comply with semantic versioning but is not enforced.",
          "examples": ["9.0.14"]
        },
        "description": {
          "type": "string",
          "title": "Component Description",
          "description": "Specifies a description for the component"
        },
        "scope": {
          "type": "string",
          "enum": [
            "required",
            "optional",
            "excluded"
          ],
          "title": "Component Scope",
          "description": "Specifies the scope of the component. If scope is not specified, 'required' scope SHOULD be assumed by the consumer of the BOM.",
          "default": "required"
        },
        "hashes": {
          "type": "array",
          "title": "Component Hashes",
          "additionalItems": false,
          "items": {"$ref": "#/definitions/hash"}
        },
        "licenses": {
          "type": "array",
          "additionalItems": false,
          "items": {"$ref": "#/definitions/licenseChoice"},
          "title": "Component License(s)"
        },
        "copyright": {
          "type": "string",
          "title": "Component Copyright",
          "description": "A copyright notice informing users of the underlying claims to copyright ownership in a published work.",
          "examples": ["Acme Inc"]
        },
        "cpe": {
          "type": "string",
          "title": "Component Common Platform Enumeration (CPE)",
          "description": "Specifies a well-formed CPE name that conforms to the CPE 2.2 or 2.3 specification. See [https://nvd.nist.gov/products/cpe](https://nvd.nist.gov/products/cpe)",
          "examples": ["cpe:2.3:a:acme:component_framework:-:*:*:*:*:*:*:*"]
        },
        "purl": {
          "type": "string",
          "title": "Component Package URL (purl)",
          "description": "Specifies the package-url (purl). The purl, if specified, MUST be valid and conform to the specification defined at: [https://github.com/package-url/purl-spec](https://github.com/package-url/purl-spec)",
          "examples": ["pkg:maven/com.acme/tomcat-catalina@9.0.14?packaging=jar"]
        },
        "swid": {
          "$ref": "#/definitions/swid",
          "title": "SWID Tag",
          "description": "Specifies metadata and content for [ISO-IEC 19770-2 Software Identification (SWID) Tags](https://www.iso.org/standard/65666.html)."
        },
        "modified": {
          "type": "boolean",
          "title": "Component Modified From Original",
          "description": "[Deprecated] - DO NOT USE. This will be removed in a future version. Use the pedigree element instead to supply information on exactly how the component was modified. A boolean value indicating if the component has been modified from the original. A value of true indicates the component is a derivative of the original. A value of false indicates the component has not been modified from the original."
        },
        "pedigree": {
          "type": "object",
          "title": "Component Pedigree",
          "description": "Component pedigree is a way to document complex supply chain scenarios where components are created, distributed, modified, redistributed, combined with other components, etc. Pedigree supports viewing this complex chain from the beginning, the end, or anywhere in the middle. It also provides a way to document variants where the exact relation may not be known.",
          "additionalProperties": false,
          "properties": {
            "ancestors": {
              "type": "array",
              "title": "Ancestors",
              "description": "Describes zero or more components in which a component is derived from. This is commonly used to describe forks from existing projects where the forked version contains a ancestor node containing the original component it was forked from. For example, Component A is the original component. Component B is the component being used and documented in the BOM. However, Component B contains a pedigree node with a single ancestor documenting Component A - the original component from which Component B is derived from.",
              "additionalItems": false,
              "items": {"$ref": "#/definitions/component"}
            },
            "descendants": {
              "type": "array",
              "title": "Descendants",
              "description": "Descendants are the exact opposite of ancestors. This provides a way to document all forks (and their forks) of an original or root component.",
              "additionalItems": false,
              "items": {"$ref": "#/definitions/component"}
            },
            "variants": {
              "type": "array",
              "title": "Variants",
              "description": "Variants describe relations where the relationship between the components are not known. For example, if Component A contains nearly identical code to Component B. They are both related, but it is unclear if one is derived from the other, or if they share a common ancestor.",
              "additionalItems": false,
              "items": {"$ref": "#/definitions/component"}
            },
            "commits": {
              "type": "array",
              "title": "Commits",
              "description": "A list of zero or more commits which provide a trail describing how the component deviates from an ancestor, descendant, or variant.",
              "additionalItems": false,
              "items": {"$ref": "#/definitions/commit"}
            },
            "patches": {
              "type": "array",
              "title": "Patches",
              "description": ">A list of zero or more patches describing how the component deviates from an ancestor, descendant, or variant. Patches may be complimentary to commits or may be used in place of commits.",
              "additionalItems": false,
              "items": {"$ref": "#/definitions/patch"}
            },
            "notes": {
              "type": "string",
              "title": "Notes",
              "description": "Notes, observations, and other non-structured commentary describing the components pedigree."
            }
          }
        },
        "externalReferences": {
          "type": "array",
          "additionalItems": false,
          "items": {"$ref": "#/definitions/externalReference"},
          "title": "External References",
          "description": "External references provide a way to document systems, sites, and information that may be relevant but which are not included with the BOM."
        },
        "components": {
          "type": "array",
          "additionalItems": false,
          "items": {"$ref": "#/definitions/component"},
          "uniqueItems": true,
          "title": "Components",
          "description": "A list of software and hardware components included in the parent component. This is not a dependency tree. It provides a way to specify a hierarchical representation of component assemblies, similar to system &#8594; subsystem &#8594; parts assembly in physical supply chains."
        },
        "evidence": {
          "$ref": "#/definitions/componentEvidence",
          "title": "Evidence",
          "description": "Provides the ability to document evidence collected through various forms of extraction or analysis."
        },
        "releaseNotes": {
          "$ref": "#/definitions/releaseNotes",
          "title": "Release notes",
          "description": "Specifies optional release notes."
        },
        "properties": {
          "type": "array",
          "title": "Properties",
          "description": "Provides the ability to document properties in a name-value store. This provides flexibility to include data not officially supported in the standard without having to use additional namespaces or create extensions. Unlike key-value stores, properties support duplicate names, each potentially having different values. Property names of interest to the general public are encouraged to be registered in the [CycloneDX Property Taxonomy](https://github.com/CycloneDX/cyclonedx-property-taxonomy). Formal registration is OPTIONAL.",
          "additionalItems": false,
          "items": {"$ref": "#/definitions/property"}
        },
        "signature": {
          "$ref": "#/definitions/signature",
          "title": "Signature",
          "description": "Enveloped signature in [JSON Signature Format (JSF)](https://cyberphone.github.io/doc/security/jsf.html)."
        }
      }
    },
    "swid": {
      "type": "object",
      "title": "SWID Tag",
      "description": "Specifies metadata and content for ISO-IEC 19770-2 Software Identification (SWID) Tags.",
      "required": [
        "tagId",
        "name"
      ],
      "additionalProperties": false,
      "properties": {
        "tagId": {
          "type": "string",
          "title": "Tag ID",
          "description": "Maps to the tagId of a SoftwareIdentity."
        },
        "name": {
          "type": "string",
          "title": "Name",
          "description": "Maps to the name of a SoftwareIdentity."
        },
        "version": {
          "type": "string",
          "title": "Version",
          "default": "0.0",
          "description": "Maps to the version of a SoftwareIdentity."
        },
        "tagVersion": {
          "type": "integer",
          "title": "Tag Version",
          "default": 0,
          "description": "Maps to the tagVersion of a SoftwareIdentity."
        },
        "patch": {
          "type": "boolean",
          "title": "Patch",
          "default": false,
          "description": "Maps to the patch of a SoftwareIdentity."
        },
        "text": {
          "title": "Attachment text",
          "description": "Specifies the metadata and content of the SWID tag.",
          "$ref": "#/definitions/attachment"
        },
        "url": {
          "type": "string",
          "title": "URL",
          "description": "The URL to the SWID file.",
          "format": "iri-reference"
        }
      }
    },
    "attachment": {
      "type": "object",
      "title": "Attachment",
      "description": "Specifies the metadata and content for an attachment.",
      "required": [
        "content"
      ],
      "additionalProperties": false,
      "properties": {
        "contentType": {
          "type": "string",
          "title": "Content-Type",
          "description": "Specifies the content type of the text. Defaults to text/plain if not specified.",
          "default": "text/plain"
        },
        "encoding": {
          "type": "string",
          "title": "Encoding",
          "description": "Specifies the optional encoding the text is represented in.",
          "enum": [
            "base64"
          ]
        },
        "content": {
          "type": "string",
          "title": "Attachment Text",
          "description": "The attachment data. Proactive controls such as input validation and sanitization should be employed to prevent misuse of attachment text."
        }
      }
    },
    "hash": {
      "type": "object",
      "title": "Hash Objects",
      "required": [
        "alg",
        "content"
      ],
      "additionalProperties": false,
      "properties": {
        "alg": {
          "$ref": "#/definitions/hash-alg"
        },
        "content": {
          "$ref": "#/definitions/hash-content"
        }
      }
    },
    "hash-alg": {
      "type": "string",
      "enum": [
        "MD5",
        "SHA-1",
        "SHA-256",
        "SHA-384",
        "SHA-512",
        "SHA3-256",
        "SHA3-384",
        "SHA3-512",
        "BLAKE2b-256",
        "BLAKE2b-384",
        "BLAKE2b-512",
        "BLAKE3"
      ],
      "title": "Hash Algorithm"
    },
    "hash-content": {
      "type": "string",
      "title": "Hash Content (value)",
      "examples": ["3942447fac867ae5cdb3229b658f4d48"],
      "pattern": "^([a-fA-F0-9]{32}|[a-fA-F0-9]{40}|[a-fA-F0-9]{64}|[a-fA-F0-9]{96}|[a-fA-F0-9]{128})$"
    },
    "license": {
      "type": "object",
      "title": "License Object",
      "oneOf": [
        {
          "required": ["id"]
        },
        {
          "required": ["name"]
        }
      ],
      "additionalProperties": false,
      "properties": {
        "id": {
          "$ref": "spdx.schema.json",
          "title": "License ID (SPDX)",
          "description": "A valid SPDX license ID",
          "examples": ["Apache-2.0"]
        },
        "name": {
          "type": "string",
          "title": "License Name",
          "description": "If SPDX does not define the license used, this field may be used to provide the license name",
          "examples": ["Acme Software License"]
        },
        "text": {
          "title": "License text",
          "description": "An optional way to include the textual content of a license.",
          "$ref": "#/definitions/attachment"
        },
        "url": {
          "type": "string",
          "title": "License URL",
          "description": "The URL to the license file. If specified, a 'license' externalReference should also be specified for completeness",
          "examples": ["https://www.apache.org/licenses/LICENSE-2.0.txt"],
          "format": "iri-reference"
        }
      }
    },
    "licenseChoice": {
      "type": "object",
      "title": "License(s)",
      "additionalProperties": false,
      "properties": {
        "license": {
          "$ref": "#/definitions/license"
        },
        "expression": {
          "type": "string",
          "title": "SPDX License Expression",
          "examples": [
            "Apache-2.0 AND (MIT OR GPL-2.0-only)",
            "GPL-3.0-only WITH Classpath-exception-2.0"
          ]
        }
      },
      "oneOf":[
        {
          "required": ["license"]
        },
        {
          "required": ["expression"]
        }
      ]
    },
    "commit": {
      "type": "object",
      "title": "Commit",
      "description": "Specifies an individual commit",
      "additionalProperties": false,
      "properties": {
        "uid": {
          "type": "string",
          "title": "UID",
          "description": "A unique identifier of the commit. This may be version control specific. For example, Subversion uses revision numbers whereas git uses commit hashes."
        },
        "url": {
          "type": "string",
          "title": "URL",
          "description": "The URL to the commit. This URL will typically point to a commit in a version control system.",
          "format": "iri-reference"
        },
        "author": {
          "title": "Author",
          "description": "The author who created the changes in the commit",
          "$ref": "#/definitions/identifiableAction"
        },
        "committer": {
          "title": "Committer",
          "description": "The person who committed or pushed the commit",
          "$ref": "#/definitions/identifiableAction"
        },
        "message": {
          "type": "string",
          "title": "Message",
          "description": "The text description of the contents of the commit"
        }
      }
    },
    "patch": {
      "type": "object",
      "title": "Patch",
      "description": "Specifies an individual patch",
      "required": [
        "type"
      ],
      "additionalProperties": false,
      "properties": {
        "type": {
          "type": "string",
          "enum": [
            "unofficial",
            "monkey",
            "backport",
            "cherry-pick"
          ],
          "title": "Type",
          "description": "Specifies the purpose for the patch including the resolution of defects, security issues, or new behavior or functionality.\n\n* __unofficial__ = A patch which is not developed by the creators or maintainers of the software being patched. Refer to [https://en.wikipedia.org/wiki/Unofficial_patch](https://en.wikipedia.org/wiki/Unofficial_patch)\n* __monkey__ = A patch which dynamically modifies runtime behavior. Refer to [https://en.wikipedia.org/wiki/Monkey_patch](https://en.wikipedia.org/wiki/Monkey_patch)\n* __backport__ = A patch which takes code from a newer version of software and applies it to older versions of the same software. Refer to [https://en.wikipedia.org/wiki/Backporting](https://en.wikipedia.org/wiki/Backporting)\n* __cherry-pick__ = A patch created by selectively applying commits from other versions or branches of the same software."
        },
        "diff": {
          "title": "Diff",
          "description": "The patch file (or diff) that show changes. Refer to [https://en.wikipedia.org/wiki/Diff](https://en.wikipedia.org/wiki/Diff)",
          "$ref": "#/definitions/diff"
        },
        "resolves": {
          "type": "array",
          "additionalItems": false,
          "items": {"$ref": "#/definitions/issue"},
          "title": "Resolves",
          "description": "A collection of issues the patch resolves"
        }
      }
    },
    "diff": {
      "type": "object",
      "title": "Diff",
      "description": "The patch file (or diff) that show changes. Refer to https://en.wikipedia.org/wiki/Diff",
      "additionalProperties": false,
      "properties": {
        "text": {
          "title": "Diff text",
          "description": "Specifies the optional text of the diff",
          "$ref": "#/definitions/attachment"
        },
        "url": {
          "type": "string",
          "title": "URL",
          "description": "Specifies the URL to the diff",
          "format": "iri-reference"
        }
      }
    },
    "issue": {
      "type": "object",
      "title": "Diff",
      "description": "An individual issue that has been resolved.",
      "required": [
        "type"
      ],
      "additionalProperties": false,
      "properties": {
        "type": {
          "type": "string",
          "enum": [
            "defect",
            "enhancement",
            "security"
          ],
          "title": "Type",
          "description": "Specifies the type of issue"
        },
        "id": {
          "type": "string",
          "title": "ID",
          "description": "The identifier of the issue assigned by the source of the issue"
        },
        "name": {
          "type": "string",
          "title": "Name",
          "description": "The name of the issue"
        },
        "description": {
          "type": "string",
          "title": "Description",
          "description": "A description of the issue"
        },
        "source": {
          "type": "object",
          "title": "Source",
          "description": "The source of the issue where it is documented",
          "additionalProperties": false,
          "properties": {
            "name": {
              "type": "string",
              "title": "Name",
              "description": "The name of the source. For example 'National Vulnerability Database', 'NVD', and 'Apache'"
            },
            "url": {
              "type": "string",
              "title": "URL",
              "description": "The url of the issue documentation as provided by the source",
              "format": "iri-reference"
            }
          }
        },
        "references": {
          "type": "array",
          "items": {
            "type": "string",
            "format": "iri-reference"
          },
          "title": "References",
          "description": "A collection of URL's for reference. Multiple URLs are allowed.",
          "examples": ["https://example.com"]
        }
      }
    },
    "identifiableAction": {
      "type": "object",
      "title": "Identifiable Action",
      "description": "Specifies an individual commit",
      "additionalProperties": false,
      "properties": {
        "timestamp": {
          "type": "string",
          "format": "date-time",
          "title": "Timestamp",
          "description": "The timestamp in which the action occurred"
        },
        "name": {
          "type": "string",
          "title": "Name",
          "description": "The name of the individual who performed the action"
        },
        "email": {
          "type": "string",
          "format": "idn-email",
          "title": "E-mail",
          "description": "The email address of the individual who performed the action"
        }
      }
    },
    "externalReference": {
      "type": "object",
      "title": "External Reference",
      "description": "Specifies an individual external reference",
      "required": [
        "url",
        "type"
      ],
      "additionalProperties": false,
      "properties": {
        "url": {
          "type": "string",
          "title": "URL",
          "description": "The URL to the external reference",
          "format": "iri-reference"
        },
        "comment": {
          "type": "string",
          "title": "Comment",
          "description": "An optional comment describing the external reference"
        },
        "type": {
          "type": "string",
          "title": "Type",
          "description": "Specifies the type of external reference. There are built-in types to describe common references. If a type does not exist for the reference being referred to, use the \"other\" type.",
          "enum": [
            "vcs",
            "issue-tracker",
            "website",
            "advisories",
            "bom",
            "mailing-list",
            "social",
            "chat",
            "documentation",
            "support",
            "distribution",
            "license",
            "build-meta",
            "build-system",
            "release-notes",
            "other"
          ]
        },
        "hashes": {
          "type": "array",
          "additionalItems": false,
          "items": {"$ref": "#/definitions/hash"},
          "title": "Hashes",
          "description": "The hashes of the external reference (if applicable)."
        }
      }
    },
    "dependency": {
      "type": "object",
      "title": "Dependency",
      "description": "Defines the direct dependencies of a component. Components that do not have their own dependencies MUST be declared as empty elements within the graph. Components that are not represented in the dependency graph MAY have unknown dependencies. It is RECOMMENDED that implementations assume this to be opaque and not an indicator of a component being dependency-free.",
      "required": [
        "ref"
      ],
      "additionalProperties": false,
      "properties": {
        "ref": {
          "$ref": "#/definitions/refType",
          "title": "Reference",
          "description": "References a component by the components bom-ref attribute"
        },
        "dependsOn": {
          "type": "array",
          "uniqueItems": true,
          "additionalItems": false,
          "items": {
            "$ref": "#/definitions/refType"
          },
          "title": "Depends On",
          "description": "The bom-ref identifiers of the components that are dependencies of this dependency object."
        }
      }
    },
    "service": {
      "type": "object",
      "title": "Service Object",
      "required": [
        "name"
      ],
      "additionalProperties": false,
      "properties": {
        "bom-ref": {
          "$ref": "#/definitions/refType",
          "title": "BOM Reference",
          "description": "An optional identifier which can be used to reference the service elsewhere in the BOM. Every bom-ref MUST be unique within the BOM."
        },
        "provider": {
          "title": "Provider",
          "description": "The organization that provides the service.",
          "$ref": "#/definitions/organizationalEntity"
        },
        "group": {
          "type": "string",
          "title": "Service Group",
          "description": "The grouping name, namespace, or identifier. This will often be a shortened, single name of the company or project that produced the service or domain name. Whitespace and special characters should be avoided.",
          "examples": ["com.acme"]
        },
        "name": {
          "type": "string",
          "title": "Service Name",
          "description": "The name of the service. This will often be a shortened, single name of the service.",
          "examples": ["ticker-service"]
        },
        "version": {
          "type": "string",
          "title": "Service Version",
          "description": "The service version.",
          "examples": ["1.0.0"]
        },
        "description": {
          "type": "string",
          "title": "Service Description",
          "description": "Specifies a description for the service"
        },
        "endpoints": {
          "type": "array",
          "items": {
            "type": "string",
            "format": "iri-reference"
          },
          "title": "Endpoints",
          "description": "The endpoint URIs of the service. Multiple endpoints are allowed.",
          "examples": ["https://example.com/api/v1/ticker"]
        },
        "authenticated": {
          "type": "boolean",
          "title": "Authentication Required",
          "description": "A boolean value indicating if the service requires authentication. A value of true indicates the service requires authentication prior to use. A value of false indicates the service does not require authentication."
        },
        "x-trust-boundary": {
          "type": "boolean",
          "title": "Crosses Trust Boundary",
          "description": "A boolean value indicating if use of the service crosses a trust zone or boundary. A value of true indicates that by using the service, a trust boundary is crossed. A value of false indicates that by using the service, a trust boundary is not crossed."
        },
        "data": {
          "type": "array",
          "additionalItems": false,
          "items": {"$ref": "#/definitions/dataClassification"},
          "title": "Data Classification",
          "description": "Specifies the data classification."
        },
        "licenses": {
          "type": "array",
          "additionalItems": false,
          "items": {"$ref": "#/definitions/licenseChoice"},
          "title": "Component License(s)"
        },
        "externalReferences": {
          "type": "array",
          "additionalItems": false,
          "items": {"$ref": "#/definitions/externalReference"},
          "title": "External References",
          "description": "External references provide a way to document systems, sites, and information that may be relevant but which are not included with the BOM."
        },
        "services": {
          "type": "array",
          "additionalItems": false,
          "items": {"$ref": "#/definitions/service"},
          "uniqueItems": true,
          "title": "Services",
          "description": "A list of services included or deployed behind the parent service. This is not a dependency tree. It provides a way to specify a hierarchical representation of service assemblies."
        },
        "releaseNotes": {
          "$ref": "#/definitions/releaseNotes",
          "title": "Release notes",
          "description": "Specifies optional release notes."
        },
        "properties": {
          "type": "array",
          "title": "Properties",
          "description": "Provides the ability to document properties in a name-value store. This provides flexibility to include data not officially supported in the standard without having to use additional namespaces or create extensions. Unlike key-value stores, properties support duplicate names, each potentially having different values. Property names of interest to the general public are encouraged to be registered in the [CycloneDX Property Taxonomy](https://github.com/CycloneDX/cyclonedx-property-taxonomy). Formal registration is OPTIONAL.",
          "additionalItems": false,
          "items": {"$ref": "#/definitions/property"}
        },
        "signature": {
          "$ref": "#/definitions/signature",
          "title": "Signature",
          "description": "Enveloped signature in [JSON Signature Format (JSF)](https://cyberphone.github.io/doc/security/jsf.html)."
        }
      }
    },
    "dataClassification": {
      "type": "object",
      "title": "Hash Objects",
      "required": [
        "flow",
        "classification"
      ],
      "additionalProperties": false,
      "properties": {
        "flow": {
          "$ref": "#/definitions/dataFlow",
          "title": "Directional Flow",
          "description": "Specifies the flow direction of the data. Direction is relative to the service. Inbound flow states that data enters the service. Outbound flow states that data leaves the service. Bi-directional states that data flows both ways, and unknown states that the direction is not known."
        },
        "classification": {
          "type": "string",
          "title": "Classification",
          "description": "Data classification tags data according to its type, sensitivity, and value if altered, stolen, or destroyed."
        }
      }
    },
    "dataFlow": {
      "type": "string",
      "enum": [
        "inbound",
        "outbound",
        "bi-directional",
        "unknown"
      ],
      "title": "Data flow direction",
      "description": "Specifies the flow direction of the data. Direction is relative to the service. Inbound flow states that data enters the service. Outbound flow states that data leaves the service. Bi-directional states that data flows both ways, and unknown states that the direction is not known."
    },

    "copyright": {
      "type": "object",
      "title": "Copyright",
      "required": [
        "text"
      ],
      "additionalProperties": false,
      "properties": {
        "text": {
          "type": "string",
          "title": "Copyright Text"
        }
      }
    },

    "componentEvidence": {
      "type": "object",
      "title": "Evidence",
      "description": "Provides the ability to document evidence collected through various forms of extraction or analysis.",
      "additionalProperties": false,
      "properties": {
        "licenses": {
          "type": "array",
          "additionalItems": false,
          "items": {"$ref": "#/definitions/licenseChoice"},
          "title": "Component License(s)"
        },
        "copyright": {
          "type": "array",
          "additionalItems": false,
          "items": {"$ref": "#/definitions/copyright"},
          "title": "Copyright"
        }
      }
    },
    "compositions": {
      "type": "object",
      "title": "Compositions",
      "required": [
        "aggregate"
      ],
      "additionalProperties": false,
      "properties": {
        "aggregate": {
          "$ref": "#/definitions/aggregateType",
          "title": "Aggregate",
          "description": "Specifies an aggregate type that describe how complete a relationship is."
        },
        "assemblies": {
          "type": "array",
          "uniqueItems": true,
          "items": {
            "type": "string"
          },
          "title": "BOM references",
          "description": "The bom-ref identifiers of the components or services being described. Assemblies refer to nested relationships whereby a constituent part may include other constituent parts. References do not cascade to child parts. References are explicit for the specified constituent part only."
        },
        "dependencies": {
          "type": "array",
          "uniqueItems": true,
          "items": {
            "type": "string"
          },
          "title": "BOM references",
          "description": "The bom-ref identifiers of the components or services being described. Dependencies refer to a relationship whereby an independent constituent part requires another independent constituent part. References do not cascade to transitive dependencies. References are explicit for the specified dependency only."
        },
        "signature": {
          "$ref": "#/definitions/signature",
          "title": "Signature",
          "description": "Enveloped signature in [JSON Signature Format (JSF)](https://cyberphone.github.io/doc/security/jsf.html)."
        }
      }
    },
    "aggregateType": {
      "type": "string",
      "default": "not_specified",
      "enum": [
        "complete",
        "incomplete",
        "incomplete_first_party_only",
        "incomplete_third_party_only",
        "unknown",
        "not_specified"
      ]
    },
    "property": {
      "type": "object",
      "title": "Lightweight name-value pair",
      "properties": {
        "name": {
          "type": "string",
          "title": "Name",
          "description": "The name of the property. Duplicate names are allowed, each potentially having a different value."
        },
        "value": {
          "type": "string",
          "title": "Value",
          "description": "The value of the property."
        }
      }
    },
    "localeType": {
      "type": "string",
      "pattern": "^([a-z]{2})(-[A-Z]{2})?$",
      "title": "Locale",
      "description": "Defines a syntax for representing two character language code (ISO-639) followed by an optional two character country code. The language code MUST be lower case. If the country code is specified, the country code MUST be upper case. The language code and country code MUST be separated by a minus sign. Examples: en, en-US, fr, fr-CA"
    },
    "releaseType": {
      "type": "string",
      "examples": [
        "major",
        "minor",
        "patch",
        "pre-release",
        "internal"
      ],
      "description": "The software versioning type. It is RECOMMENDED that the release type use one of 'major', 'minor', 'patch', 'pre-release', or 'internal'. Representing all possible software release types is not practical, so standardizing on the recommended values, whenever possible, is strongly encouraged.\n\n* __major__ = A major release may contain significant changes or may introduce breaking changes.\n* __minor__ = A minor release, also known as an update, may contain a smaller number of changes than major releases.\n* __patch__ = Patch releases are typically unplanned and may resolve defects or important security issues.\n* __pre-release__ = A pre-release may include alpha, beta, or release candidates and typically have limited support. They provide the ability to preview a release prior to its general availability.\n* __internal__ = Internal releases are not for public consumption and are intended to be used exclusively by the project or manufacturer that produced it."
    },
    "note": {
      "type": "object",
      "title": "Note",
      "description": "A note containing the locale and content.",
      "required": [
        "text"
      ],
      "additionalProperties": false,
      "properties": {
        "locale": {
          "$ref": "#/definitions/localeType",
          "title": "Locale",
          "description": "The ISO-639 (or higher) language code and optional ISO-3166 (or higher) country code. Examples include: \"en\", \"en-US\", \"fr\" and \"fr-CA\""
        },
        "text": {
          "title": "Release note content",
          "description": "Specifies the full content of the release note.",
          "$ref": "#/definitions/attachment"
        }
      }
    },
    "releaseNotes": {
      "type": "object",
      "title": "Release notes",
      "required": [
        "type"
      ],
      "additionalProperties": false,
      "properties": {
        "type": {
          "$ref": "#/definitions/releaseType",
          "title": "Type",
          "description": "The software versioning type the release note describes."
        },
        "title": {
          "type": "string",
          "title": "Title",
          "description": "The title of the release."
        },
        "featuredImage": {
          "type": "string",
          "format": "iri-reference",
          "title": "Featured image",
          "description": "The URL to an image that may be prominently displayed with the release note."
        },
        "socialImage": {
          "type": "string",
          "format": "iri-reference",
          "title": "Social image",
          "description": "The URL to an image that may be used in messaging on social media platforms."
        },
        "description": {
          "type": "string",
          "title": "Description",
          "description": "A short description of the release."
        },
        "timestamp": {
          "type": "string",
          "format": "date-time",
          "title": "Timestamp",
          "description": "The date and time (timestamp) when the release note was created."
        },
        "aliases": {
          "type": "array",
          "items": {
            "type": "string"
          },
          "title": "Aliases",
          "description": "One or more alternate names the release may be referred to. This may include unofficial terms used by development and marketing teams (e.g. code names)."
        },
        "tags": {
          "type": "array",
          "items": {
            "type": "string"
          },
          "title": "Tags",
          "description": "One or more tags that may aid in search or retrieval of the release note."
        },
        "resolves": {
          "type": "array",
          "additionalItems": false,
          "items": {"$ref": "#/definitions/issue"},
          "title": "Resolves",
          "description": "A collection of issues that have been resolved."
        },
        "notes": {
          "type": "array",
          "additionalItems": false,
          "items": {"$ref": "#/definitions/note"},
          "title": "Notes",
          "description": "Zero or more release notes containing the locale and content. Multiple note objects may be specified to support release notes in a wide variety of languages."
        },
        "properties": {
          "type": "array",
          "title": "Properties",
          "description": "Provides the ability to document properties in a name-value store. This provides flexibility to include data not officially supported in the standard without having to use additional namespaces or create extensions. Unlike key-value stores, properties support duplicate names, each potentially having different values. Property names of interest to the general public are encouraged to be registered in the [CycloneDX Property Taxonomy](https://github.com/CycloneDX/cyclonedx-property-taxonomy). Formal registration is OPTIONAL.",
          "additionalItems": false,
          "items": {"$ref": "#/definitions/property"}
        }
      }
    },
    "advisory": {
      "type": "object",
      "title": "Advisory",
      "description": "Title and location where advisory information can be obtained. An advisory is a notification of a threat to a component, service, or system.",
      "required": ["url"],
      "additionalProperties": false,
      "properties": {
        "title": {
          "type": "string",
          "title": "Title",
          "description": "An optional name of the advisory."
        },
        "url": {
          "type": "string",
          "title": "URL",
          "format": "iri-reference",
          "description": "Location where the advisory can be obtained."
        }
      }
    },
    "cwe": {
      "type": "integer",
      "minimum": 1,
      "title": "CWE",
      "description": "Integer representation of a Common Weaknesses Enumerations (CWE). For example 399 (of https://cwe.mitre.org/data/definitions/399.html)"
    },
    "severity": {
      "type": "string",
      "title": "Severity",
      "description": "Textual representation of the severity of the vulnerability adopted by the analysis method. If the analysis method uses values other than what is provided, the user is expected to translate appropriately.",
      "enum": [
        "critical",
        "high",
        "medium",
        "low",
        "info",
        "none",
        "unknown"
      ]
    },
    "scoreMethod": {
      "type": "string",
      "title": "Method",
      "description": "Specifies the severity or risk scoring methodology or standard used.\n\n* CVSSv2 - [Common Vulnerability Scoring System v2](https://www.first.org/cvss/v2/)\n* CVSSv3 - [Common Vulnerability Scoring System v3](https://www.first.org/cvss/v3-0/)\n* CVSSv31 - [Common Vulnerability Scoring System v3.1](https://www.first.org/cvss/v3-1/)\n* OWASP - [OWASP Risk Rating Methodology](https://owasp.org/www-community/OWASP_Risk_Rating_Methodology)",
      "enum": [
        "CVSSv2",
        "CVSSv3",
        "CVSSv31",
        "OWASP",
        "other"
      ]
    },
    "impactAnalysisState": {
      "type": "string",
      "title": "Impact Analysis State",
      "description": "Declares the current state of an occurrence of a vulnerability, after automated or manual analysis. \n\n* __resolved__ = the vulnerability has been remediated. \n* __resolved\\_with\\_pedigree__ = the vulnerability has been remediated and evidence of the changes are provided in the affected components pedigree containing verifiable commit history and/or diff(s). \n* __exploitable__ = the vulnerability may be directly or indirectly exploitable. \n* __in\\_triage__ = the vulnerability is being investigated. \n* __false\\_positive__ = the vulnerability is not specific to the component or service and was falsely identified or associated. \n* __not\\_affected__ = the component or service is not affected by the vulnerability. Justification should be specified for all not_affected cases.",
      "enum": [
        "resolved",
        "resolved_with_pedigree",
        "exploitable",
        "in_triage",
        "false_positive",
        "not_affected"
      ]
    },
    "impactAnalysisJustification": {
      "type": "string",
      "title": "Impact Analysis Justification",
      "description": "The rationale of why the impact analysis state was asserted. \n\n* __code\\_not\\_present__ = the code has been removed or tree-shaked. \n* __code\\_not\\_reachable__ = the vulnerable code is not invoked at runtime. \n* __requires\\_configuration__ = exploitability requires a configurable option to be set/unset. \n* __requires\\_dependency__ = exploitability requires a dependency that is not present. \n* __requires\\_environment__ = exploitability requires a certain environment which is not present. \n* __protected\\_by\\_compiler__ = exploitability requires a compiler flag to be set/unset. \n* __protected\\_at\\_runtime__ = exploits are prevented at runtime. \n* __protected\\_at\\_perimeter__ = attacks are blocked at physical, logical, or network perimeter. \n* __protected\\_by\\_mitigating\\_control__ = preventative measures have been implemented that reduce the likelihood and/or impact of the vulnerability.",
      "enum": [
        "code_not_present",
        "code_not_reachable",
        "requires_configuration",
        "requires_dependency",
        "requires_environment",
        "protected_by_compiler",
        "protected_at_runtime",
        "protected_at_perimeter",
        "protected_by_mitigating_control"
      ]
    },
    "rating": {
      "type": "object",
      "title": "Rating",
      "description": "Defines the severity or risk ratings of a vulnerability.",
      "additionalProperties": false,
      "properties": {
        "source": {
          "$ref": "#/definitions/vulnerabilitySource",
          "description": "The source that calculated the severity or risk rating of the vulnerability."
        },
        "score": {
          "type": "number",
          "title": "Score",
          "description": "The numerical score of the rating."
        },
        "severity": {
          "$ref": "#/definitions/severity",
          "description": "Textual representation of the severity that corresponds to the numerical score of the rating."
        },
        "method": {
          "$ref": "#/definitions/scoreMethod"
        },
        "vector": {
          "type": "string",
          "title": "Vector",
          "description": "Textual representation of the metric values used to score the vulnerability"
        },
        "justification": {
          "type": "string",
          "title": "Justification",
          "description": "An optional reason for rating the vulnerability as it was"
        }
      }
    },
    "vulnerabilitySource": {
      "type": "object",
      "title": "Source",
      "description": "The source of vulnerability information. This is often the organization that published the vulnerability.",
      "additionalProperties": false,
      "properties": {
        "url": {
          "type": "string",
          "title": "URL",
          "description": "The url of the vulnerability documentation as provided by the source.",
          "examples": [
            "https://nvd.nist.gov/vuln/detail/CVE-2021-39182"
          ]
        },
        "name": {
          "type": "string",
          "title": "Name",
          "description": "The name of the source.",
          "examples": [
            "NVD",
            "National Vulnerability Database",
            "OSS Index",
            "VulnDB",
            "GitHub Advisories"
          ]
        }
      }
    },
    "vulnerability": {
      "type": "object",
      "title": "Vulnerability",
      "description": "Defines a weakness in an component or service that could be exploited or triggered by a threat source.",
      "additionalProperties": false,
      "properties": {
        "bom-ref": {
          "$ref": "#/definitions/refType",
          "title": "BOM Reference",
          "description": "An optional identifier which can be used to reference the vulnerability elsewhere in the BOM. Every bom-ref MUST be unique within the BOM."
        },
        "id": {
          "type": "string",
          "title": "ID",
          "description": "The identifier that uniquely identifies the vulnerability.",
          "examples": [
            "CVE-2021-39182",
            "GHSA-35m5-8cvj-8783",
            "SNYK-PYTHON-ENROCRYPT-1912876"
          ]
        },
        "source": {
          "$ref": "#/definitions/vulnerabilitySource",
          "description": "The source that published the vulnerability."
        },
        "references": {
          "type": "array",
          "title": "References",
          "description": "Zero or more pointers to vulnerabilities that are the equivalent of the vulnerability specified. Often times, the same vulnerability may exist in multiple sources of vulnerability intelligence, but have different identifiers. References provide a way to correlate vulnerabilities across multiple sources of vulnerability intelligence.",
          "additionalItems": false,
          "items": {
            "required": [
              "id",
              "source"
            ],
            "additionalProperties": false,
            "properties": {
              "id": {
                "type": "string",
                "title": "ID",
                "description": "An identifier that uniquely identifies the vulnerability.",
                "examples": [
                  "CVE-2021-39182",
                  "GHSA-35m5-8cvj-8783",
                  "SNYK-PYTHON-ENROCRYPT-1912876"
                ]
              },
              "source": {
                "$ref": "#/definitions/vulnerabilitySource",
                "description": "The source that published the vulnerability."
              }
            }
          }
        },
        "ratings": {
          "type": "array",
          "title": "Ratings",
          "description": "List of vulnerability ratings",
          "additionalItems": false,
          "items": {
            "$ref": "#/definitions/rating"
          }
        },
        "cwes": {
          "type": "array",
          "title": "CWEs",
          "description": "List of Common Weaknesses Enumerations (CWEs) codes that describes this vulnerability. For example 399 (of https://cwe.mitre.org/data/definitions/399.html)",
          "examples": [399],
          "additionalItems": false,
          "items": {
            "$ref": "#/definitions/cwe"
          }
        },
        "description": {
          "type": "string",
          "title": "Description",
          "description": "A description of the vulnerability as provided by the source."
        },
        "detail": {
          "type": "string",
          "title": "Details",
          "description": "If available, an in-depth description of the vulnerability as provided by the source organization. Details often include examples, proof-of-concepts, and other information useful in understanding root cause."
        },
        "recommendation": {
          "type": "string",
          "title": "Details",
          "description": "Recommendations of how the vulnerability can be remediated or mitigated."
        },
        "advisories": {
          "type": "array",
          "title": "Advisories",
          "description": "Published advisories of the vulnerability if provided.",
          "additionalItems": false,
          "items": {
            "$ref": "#/definitions/advisory"
          }
        },
        "created": {
          "type": "string",
          "format": "date-time",
          "title": "Created",
          "description": "The date and time (timestamp) when the vulnerability record was created in the vulnerability database."
        },
        "published": {
          "type": "string",
          "format": "date-time",
          "title": "Published",
          "description": "The date and time (timestamp) when the vulnerability record was first published."
        },
        "updated": {
          "type": "string",
          "format": "date-time",
          "title": "Updated",
          "description": "The date and time (timestamp) when the vulnerability record was last updated."
        },
        "credits": {
          "type": "object",
          "title": "Credits",
          "description": "Individuals or organizations credited with the discovery of the vulnerability.",
          "additionalProperties": false,
          "properties": {
            "organizations": {
              "type": "array",
              "title": "Organizations",
              "description": "The organizations credited with vulnerability discovery.",
              "additionalItems": false,
              "items": {
                "$ref": "#/definitions/organizationalEntity"
              }
            },
            "individuals": {
              "type": "array",
              "title": "Individuals",
              "description": "The individuals, not associated with organizations, that are credited with vulnerability discovery.",
              "additionalItems": false,
              "items": {
                "$ref": "#/definitions/organizationalContact"
              }
            }
          }
        },
        "tools": {
          "type": "array",
          "title": "Creation Tools",
          "description": "The tool(s) used to identify, confirm, or score the vulnerability.",
          "additionalItems": false,
          "items": {"$ref": "#/definitions/tool"}
        },
        "analysis": {
          "type": "object",
          "title": "Impact Analysis",
          "description": "An assessment of the impact and exploitability of the vulnerability.",
          "additionalProperties": false,
          "properties": {
            "state": {
              "$ref": "#/definitions/impactAnalysisState"
            },
            "justification": {
              "$ref": "#/definitions/impactAnalysisJustification"
            },
            "response": {
              "type": "array",
              "title": "Response",
              "description": "A response to the vulnerability by the manufacturer, supplier, or project responsible for the affected component or service. More than one response is allowed. Responses are strongly encouraged for vulnerabilities where the analysis state is exploitable.",
              "additionalItems": false,
              "items": {
                "type": "string",
                "enum": [
                  "can_not_fix",
                  "will_not_fix",
                  "update",
                  "rollback",
                  "workaround_available"
                ]
              }
            },
            "detail": {
              "type": "string",
              "title": "Detail",
              "description": "Detailed description of the impact including methods used during assessment. If a vulnerability is not exploitable, this field should include specific details on why the component or service is not impacted by this vulnerability."
            }
          }
        },
        "affects": {
          "type": "array",
          "uniqueItems": true,
          "additionalItems": false,
          "items": {
            "required": [
              "ref"
            ],
            "additionalProperties": false,
            "properties": {
              "ref": {
                "$ref": "#/definitions/refType",
                "title": "Reference",
                "description": "References a component or service by the objects bom-ref"
              },
              "versions": {
                "type": "array",
                "title": "Versions",
                "description": "Zero or more individual versions or range of versions.",
                "additionalItems": false,
                "items": {
                  "oneOf": [
                    {
                      "required": ["version"]
                    },
                    {
                      "required": ["range"]
                    }
                  ],
                  "additionalProperties": false,
                  "properties": {
                    "version": {
                      "description": "A single version of a component or service.",
                      "$ref": "#/definitions/version"
                    },
                    "range": {
                      "description": "A version range specified in Package URL Version Range syntax (vers) which is defined at https://github.com/package-url/purl-spec/VERSION-RANGE-SPEC.rst",
                      "$ref": "#/definitions/range"
                    },
                    "status": {
                      "description": "The vulnerability status for the version or range of versions.",
                      "$ref": "#/definitions/affectedStatus",
                      "default": "affected"
                    }
                  }
                }
              }
            }
          },
          "title": "Affects",
          "description": "The components or services that are affected by the vulnerability."
        },
        "properties": {
          "type": "array",
          "title": "Properties",
          "description": "Provides the ability to document properties in a name-value store. This provides flexibility to include data not officially supported in the standard without having to use additional namespaces or create extensions. Unlike key-value stores, properties support duplicate names, each potentially having different values. Property names of interest to the general public are encouraged to be registered in the [CycloneDX Property Taxonomy](https://github.com/CycloneDX/cyclonedx-property-taxonomy). Formal registration is OPTIONAL.",
          "additionalItems": false,
          "items": {
            "$ref": "#/definitions/property"
          }
        }
      }
    },
    "affectedStatus": {
      "description": "The vulnerability status of a given version or range of versions of a product. The statuses 'affected' and 'unaffected' indicate that the version is affected or unaffected by the vulnerability. The status 'unknown' indicates that it is unknown or unspecified whether the given version is affected. There can be many reasons for an 'unknown' status, including that an investigation has not been undertaken or that a vendor has not disclosed the status.",
      "type": "string",
      "enum": [
        "affected",
        "unaffected",
        "unknown"
      ]
    },
    "version": {
      "description": "A single version of a component or service.",
      "type": "string",
      "minLength": 1,
      "maxLength": 1024
    },
    "range": {
      "description": "A version range specified in Package URL Version Range syntax (vers) which is defined at https://github.com/package-url/purl-spec/VERSION-RANGE-SPEC.rst",
      "type": "string",
      "minLength": 1,
      "maxLength": 1024
    },
    "signature": {
      "$ref": "jsf-0.82.schema.json#/definitions/signature",
      "title": "Signature",
      "description": "Enveloped signature in [JSON Signature Format (JSF)](https://cyberphone.github.io/doc/security/jsf.html)."
    }
  }
}


================================================
FILE: tests/resources/grype-test-db.tar.gz.license
================================================
SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
SPDX-License-Identifier: Apache-2.0


================================================
FILE: tests/resources/jsf-0.82.schema.json
================================================
{
  "$schema": "http://json-schema.org/draft-07/schema#",
  "$id": "http://cyclonedx.org/schema/jsf-0.82.schema.json",
  "type": "object",
  "title": "JSON Signature Format (JSF) standard",
  "definitions": {
    "signature": {
      "type": "object"
    }
  }
}


================================================
FILE: tests/resources/make_grype_test_db.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Generate the minimal grype vulnerability DB used by the integration tests.

The output is tests/resources/grype-test-db.tar.gz — a ~2 KB archive
containing an empty SQLite database with one synthetic CVE entry. Committing
this artifact avoids the ~50 s cold-cache grype DB download during test runs.

Schema target: grype DB model version 6 (grype v0.79+).
  Find the current model: grype db status | grep Schema
  or: SELECT model FROM db_metadata;  in a freshly downloaded DB.

When grype bumps the model integer:
  1. Update MODEL below to match.
  2. Verify the vulnerability and affected-CPE blob JSON schemas against
     grype's source (db/v6/models/) or a live DB row.
  3. Re-run this script and commit the updated grype-test-db.tar.gz.

Synthetic CVE: CVE-TEST-2026-00001 affects sbomnix-test-first == 1.0.
Grype matches pkg:nix packages via auto-generated CPEs (--add-cpes-if-none).
The generated CPE for 'sbomnix-test-first' 1.0 is:
  cpe:2.3:a:sbomnix-test-first:sbomnix-test-first:1.0:*:*:*:*:*:*:*
so the DB entry uses vendor=product='sbomnix-test-first'.
"""

import json
import sqlite3
import tarfile
from pathlib import Path

HERE = Path(__file__).resolve().parent
OUT_ARCHIVE = HERE / "grype-test-db.tar.gz"
MODEL = 6
REVISION = 1
ADDITION = 4
SYNTHETIC_CVE = "CVE-TEST-2026-00001"
TEST_PACKAGE = "sbomnix-test-first"
TEST_VERSION = "1.0"

VULN_BLOB = json.dumps(
    {
        "id": SYNTHETIC_CVE,
        "assigner": ["test"],
        "description": (
            "Synthetic vulnerability for sbomnix grype integration tests. "
            "Not a real CVE."
        ),
        "refs": [],
        "severities": [
            {
                "scheme": "CVSS",
                "value": {"vector": "AV:N/AC:L/Au:N/C:N/I:N/A:P", "version": "2.0"},
                "source": "test",
                "rank": 1,
            }
        ],
    }
)

# Blob linked from affected_cpe_handles; constraint targets the test version.
AFFECTED_CPE_BLOB = json.dumps(
    {
        "cves": [SYNTHETIC_CVE],
        "ranges": [{"version": {"constraint": f"= {TEST_VERSION}"}}],
    }
)

# Exact DDL as created by grype — constraint names must match for migration.
DDL = """
CREATE TABLE `affected_cpe_handles` (`id` integer PRIMARY KEY AUTOINCREMENT,`vulnerability_id` integer NOT NULL,`cpe_id` integer,`blob_id` integer,CONSTRAINT `fk_affected_cpe_handles_cpe` FOREIGN KEY (`cpe_id`) REFERENCES `cpes`(`id`),CONSTRAINT `fk_affected_cpe_handles_vulnerability` FOREIGN KEY (`vulnerability_id`) REFERENCES `vulnerability_handles`(`id`));
CREATE TABLE `affected_package_handles` (`id` integer PRIMARY KEY AUTOINCREMENT,`vulnerability_id` integer NOT NULL,`operating_system_id` integer,`package_id` integer,`blob_id` integer,CONSTRAINT `fk_affected_package_handles_vulnerability` FOREIGN KEY (`vulnerability_id`) REFERENCES `vulnerability_handles`(`id`),CONSTRAINT `fk_affected_package_handles_operating_system` FOREIGN KEY (`operating_system_id`) REFERENCES `operating_systems`(`id`),CONSTRAINT `fk_affected_package_handles_package` FOREIGN KEY (`package_id`) REFERENCES `packages`(`id`));
CREATE TABLE `blobs` (`id` integer PRIMARY KEY AUTOINCREMENT,`value` text NOT NULL);
CREATE TABLE `cpes` (`id` integer PRIMARY KEY AUTOINCREMENT,`part` text NOT NULL,`vendor` text,`product` text NOT NULL,`edition` text,`language` text,`software_edition` text,`target_hardware` text,`target_software` text,`other` text);
CREATE TABLE `cwe_handles` (`id` integer PRIMARY KEY AUTOINCREMENT,`cve` text NOT NULL,`cwe` text NOT NULL,`source` text,`type` text);
CREATE TABLE `db_metadata` (`build_timestamp` datetime NOT NULL,`model` integer NOT NULL,`revision` integer NOT NULL,`addition` integer NOT NULL);
CREATE TABLE `epss_handles` (`id` integer PRIMARY KEY AUTOINCREMENT,`cve` text NOT NULL,`epss` real NOT NULL,`percentile` real NOT NULL);
CREATE TABLE `epss_metadata` (`date` datetime NOT NULL);
CREATE TABLE `known_exploited_vulnerability_handles` (`id` integer PRIMARY KEY AUTOINCREMENT,`cve` text NOT NULL,`blob_id` integer);
CREATE TABLE `operating_system_specifier_overrides` (`alias` text,`version` text,`version_pattern` text,`codename` text,`channel` text,`replacement` text,`replacement_major_version` text,`replacement_minor_version` text,`replacement_label_version` text,`replacement_channel` text,`rolling` numeric,`applicable_client_db_schemas` text,PRIMARY KEY (`alias`,`version`,`version_pattern`,`replacement`,`replacement_major_version`,`replacement_minor_version`,`replacement_label_version`,`replacement_channel`,`rolling`));
CREATE TABLE `operating_systems` (`id` integer PRIMARY KEY AUTOINCREMENT,`name` text,`release_id` text,`major_version` text,`minor_version` text,`label_version` text,`codename` text,`channel` text,`eol_date` datetime,`eoas_date` datetime);
CREATE TABLE `package_cpes` (`cpe_id` integer,`package_id` integer,PRIMARY KEY (`cpe_id`,`package_id`),CONSTRAINT `fk_package_cpes_cpe` FOREIGN KEY (`cpe_id`) REFERENCES `cpes`(`id`),CONSTRAINT `fk_package_cpes_package` FOREIGN KEY (`package_id`) REFERENCES `packages`(`id`));
CREATE TABLE `package_specifier_overrides` (`ecosystem` text,`replacement_ecosystem` text,PRIMARY KEY (`ecosystem`,`replacement_ecosystem`));
CREATE TABLE `packages` (`id` integer PRIMARY KEY AUTOINCREMENT,`ecosystem` text,`name` text);
CREATE TABLE `providers` (`id` text,`version` text,`processor` text,`date_captured` datetime,`input_digest` text,PRIMARY KEY (`id`));
CREATE TABLE `unaffected_cpe_handles` (`id` integer PRIMARY KEY AUTOINCREMENT,`vulnerability_id` integer NOT NULL,`cpe_id` integer,`blob_id` integer,CONSTRAINT `fk_unaffected_cpe_handles_vulnerability` FOREIGN KEY (`vulnerability_id`) REFERENCES `vulnerability_handles`(`id`),CONSTRAINT `fk_unaffected_cpe_handles_cpe` FOREIGN KEY (`cpe_id`) REFERENCES `cpes`(`id`));
CREATE TABLE `unaffected_package_handles` (`id` integer PRIMARY KEY AUTOINCREMENT,`vulnerability_id` integer NOT NULL,`operating_system_id` integer,`package_id` integer,`blob_id` integer,CONSTRAINT `fk_unaffected_package_handles_vulnerability` FOREIGN KEY (`vulnerability_id`) REFERENCES `vulnerability_handles`(`id`),CONSTRAINT `fk_unaffected_package_handles_operating_system` FOREIGN KEY (`operating_system_id`) REFERENCES `operating_systems`(`id`),CONSTRAINT `fk_unaffected_package_handles_package` FOREIGN KEY (`package_id`) REFERENCES `packages`(`id`));
CREATE TABLE `vulnerability_aliases` (`name` text,`alias` text NOT NULL,PRIMARY KEY (`name`,`alias`));
CREATE TABLE `vulnerability_handles` (`id` integer PRIMARY KEY AUTOINCREMENT,`name` text NOT NULL,`status` text NOT NULL,`published_date` datetime,`modified_date` datetime,`withdrawn_date` datetime,`provider_id` text NOT NULL,`blob_id` integer,CONSTRAINT `fk_vulnerability_handles_provider` FOREIGN KEY (`provider_id`) REFERENCES `providers`(`id`));
"""


def build(db_path: Path) -> None:
    db_path.unlink(missing_ok=True)
    con = sqlite3.connect(db_path)
    con.executescript(DDL)
    con.execute(
        "INSERT INTO db_metadata VALUES (datetime('now'), ?, ?, ?)",
        (MODEL, REVISION, ADDITION),
    )
    con.execute(
        "INSERT INTO providers VALUES ('test', '1', 'test', datetime('now'), "
        "'xxh64:0000000000000000')"
    )
    # Vulnerability detail blob (blob_id=1)
    con.execute("INSERT INTO blobs(value) VALUES (?)", (VULN_BLOB,))
    # Affected-CPE constraint blob (blob_id=2)
    con.execute("INSERT INTO blobs(value) VALUES (?)", (AFFECTED_CPE_BLOB,))
    con.execute(
        "INSERT INTO vulnerability_handles"
        "(name, status, published_date, modified_date, provider_id, blob_id)"
        " VALUES (?, 'active', datetime('now'), datetime('now'), 'test', 1)",
        (SYNTHETIC_CVE,),
    )
    # CPE: cpe:2.3:a:sbomnix-test-first:sbomnix-test-first:*:*:*:*:*:*:*:*
    con.execute(
        "INSERT INTO cpes(part, vendor, product) VALUES ('a', ?, ?)",
        (TEST_PACKAGE, TEST_PACKAGE),
    )
    con.execute(
        "INSERT INTO affected_cpe_handles(vulnerability_id, cpe_id, blob_id)"
        " VALUES (1, 1, 2)"
    )
    con.commit()
    con.close()


def main() -> None:
    db_path = HERE / "vulnerability.db"
    build(db_path)
    print(f"DB: {db_path.stat().st_size // 1024} KB")
    with tarfile.open(OUT_ARCHIVE, "w:gz") as tf:
        tf.add(db_path, arcname="vulnerability.db")
    db_path.unlink()
    print(f"Archive: {OUT_ARCHIVE.stat().st_size // 1024} KB → {OUT_ARCHIVE}")


if __name__ == "__main__":
    main()


================================================
FILE: tests/resources/nixmeta-package-set.nix
================================================
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

{ ... }:

let
  mkPackage =
    {
      name,
      pname,
      version,
      description,
      homepage,
    }:
    builtins.derivation {
      inherit name pname version;
      system = builtins.currentSystem;
      builder = "/bin/sh";
      args = [
        "-c"
        "echo ${name} > $out"
      ];
      meta = {
        inherit description homepage;
        license = {
          shortName = "Apache-2.0";
          spdxId = "Apache-2.0";
        };
      };
    };
in
{
  first = mkPackage {
    name = "sbomnix-meta-first-1.0";
    pname = "sbomnix-meta-first";
    version = "1.0";
    description = "First sbomnix metadata fixture package";
    homepage = "https://example.test/sbomnix-meta-first";
  };

  second = mkPackage {
    name = "sbomnix-meta-second-2.0";
    pname = "sbomnix-meta-second";
    version = "2.0";
    description = "Second sbomnix metadata fixture package";
    homepage = "https://example.test/sbomnix-meta-second";
  };
}


================================================
FILE: tests/resources/provenance-1.0.schema.json
================================================
{
  "$schema": "http://json-schema.org/draft-07/schema#",
  "$id": "https://in-toto.io/Statement/v1",
  "title": "SLSA Provenance v1.0",
  "type": "object",
  "additionalProperties": false,
  "required": [
    "_type",
    "subject",
    "predicateType",
    "predicate"
  ],
  "properties": {
    "_type": {
      "description": "Identifier for the schema of the Statement. Always https://in-toto.io/Statement/v1 for this version of the spec.",
      "type": "string"
    },
    "subject": {
      "description": "Set of software artifacts that the attestation applies to. Each element represents a single software artifact.",
      "type": "array",
      "items": {
        "type": "object",
        "properties": {
          "name": {
            "description": "Identifier to distinguish this artifact from others within the subject.",
            "type": "string"
          },
          "digest": {
            "description": "Collection of cryptographic digests for the contents of this artifact.",
            "type": "object"
          }
        }
      }
    },
    "predicateType": {
      "description": "URI identifying the type of the Predicate.",
      "type": "string"
    },
    "predicate": {
      "type": "object",
      "additionalProperties": false,
      "required": [
        "buildDefinition",
        "runDetails"
      ],
      "properties": {
        "buildDefinition": {
          "type": "object",
          "additionalProperties": false,
          "minProperties": 4,
          "properties": {
            "buildType": {
              "description": "Identifies the template for how to perform the build and interpret the parameters and dependencies.",
              "type": "string"
            },
            "externalParameters": {
              "description": "The parameters that are under external control, such as those set by a user or tenant of the build platform.",
              "type": "object"
            },
            "internalParameters": {
              "description": "The parameters that are under the control of the entity represented by builder.id.",
              "type": "object"
            },
            "resolvedDependencies": {
              "description": "Unordered collection of artifacts needed at build time.",
              "type": "array",
              "items": {
                "$ref": "#/$defs/ResourceDescriptor"
              }
            }
          }
        },
        "runDetails": {
          "type": "object",
          "additionalProperties": false,
          "required": [
            "builder",
            "metadata",
            "byproducts"
          ],
          "properties": {
            "builder": {
              "description": "dentifies the build platform that executed the invocation.",
              "type": "object",
              "properties": {
                "id": {
                  "description": "URI indicating the transitive closure of the trusted build platform.",
                  "type": "string"
                },
                "builderDependencies": {
                  "description": "Dependencies used by the orchestrator that are not run within the workload and that do not affect the build",
                  "type": "array",
                  "items": {
                    "$ref": "#/$defs/ResourceDescriptor"
                  }
                },
                "version": {
                  "description": "Map of names of components of the build platform to their version.",
                  "type": "object"
                }
              }
            },
            "metadata": {
              "description": "Metadata about this particular execution of the build.",
              "type": "object",
              "properties": {
                "invocationId": {
                  "description": "Identifies this particular build invocation",
                  "type": "string"
                },
                "startedOn": {
                  "description": "The timestamp of when the build started.",
                  "type": "string"
                },
                "finishedOn": {
                  "description": "The timestamp of when the build completed.",
                  "type": "string"
                }
              }
            },
            "byproducts": {
              "description": "Additional artifacts generated during the build that are not considered the “output” of the build",
              "type": "array",
              "items": {
                "$ref": "#/$defs/ResourceDescriptor"
              }
            }
          }
        }
      }
    }
  },
  "$defs": {
    "ResourceDescriptor": {
      "$id": "/schema/ResourceDescriptor",
      "$schema": "http://json-schema.org/draft-07/schema#",
      "type": "object",
      "properties": {
        "name": {
          "description": "Machine-readable identifier for distinguishing between descriptors.",
          "type": "string"
        },
        "uri": {
          "description": "A URI used to identify the resource or artifact globally.",
          "type": "string"
        },
        "digest": {
          "description": "A set of cryptographic digests of the contents of the resource or artifact.",
          "type": "object"
        },
        "content": {
          "description": "The contents of the resource or artifact.",
          "type": "string"
        },
        "downloadLocation": {
          "description": "The location of the described resource or artifact, if different from the uri.",
          "type": "string"
        },
        "mediaType": {
          "description": "The MIME Type (i.e., media type) of the described resource or artifact.",
          "type": "string"
        },
        "annotations": {
          "description": "This field MAY be used to provide additional information or metadata about the resource or artifact that may be useful to the consumer when evaluating the attestation against a policy.",
          "type": "object"
        }
      }
    }
  }
}


================================================
FILE: tests/resources/repology/cves_openssl.html
================================================
<html>
  <body>
    <table>
      <thead>
        <tr>
          <th>CVE ID</th>
          <th>Affected version(s)</th>
        </tr>
      </thead>
      <tbody>
        <tr>
          <td>CVE-2024-1111</td>
          <td>
            <span class="version version-outdated">[3.0.0, 3.1.0]</span>
          </td>
        </tr>
        <tr>
          <td>CVE-2024-2222</td>
          <td>
            <span class="version version-outdated">[1.0.0, 2.0.0]</span>
          </td>
        </tr>
      </tbody>
    </table>
  </body>
</html>


================================================
FILE: tests/resources/repology/projects_empty.html
================================================
<html>
  <body>
    <p>No matches</p>
  </body>
</html>


================================================
FILE: tests/resources/repology/projects_hello.html
================================================
<html>
  <body>
    <table>
      <thead>
        <tr>
          <th>Project</th>
          <th>Newest</th>
          <th>Selected</th>
        </tr>
      </thead>
      <tbody>
        <tr>
          <td><a href="/project/hello/versions">hello</a></td>
          <td>
            <span class="version-newest">2.11</span>
            <span class="version-newest">2.12-rc1</span>
          </td>
          <td>
            <span class="version version-outdated"
              >2.10<span class="vulnerable"></span
            ></span>
            <span class="version version-newest">2.11</span>
          </td>
        </tr>
      </tbody>
    </table>
  </body>
</html>


================================================
FILE: tests/resources/sample_cdx_sbom.json
================================================
{
  "bomFormat": "CycloneDX",
  "specVersion": "1.4",
  "serialNumber": "urn:uuid:11111111-1111-4111-8111-111111111111",
  "version": 1,
  "metadata": {
    "component": {
      "type": "library",
      "name": "openssl",
      "version": "3.1.0",
      "licenses": [
        {
          "license": {
            "id": "Apache-2.0"
          }
        }
      ],
      "purl": "pkg:generic/openssl@3.1.0"
    }
  },
  "components": []
}


================================================
FILE: tests/resources/spdx.schema.json
================================================
{
  "$schema": "http://json-schema.org/draft-07/schema#",
  "$id": "http://cyclonedx.org/schema/spdx.schema.json",
  "type": "string",
  "title": "SPDX license identifier",
  "description": "Local offline companion schema for CycloneDX license id validation."
}


================================================
FILE: tests/resources/spdx_bom-2.3.schema.json
================================================
{
  "$schema" : "http://json-schema.org/draft-07/schema#",
  "$id" : "http://spdx.org/rdf/terms/2.3",
  "title" : "SPDX 2.3",
  "type" : "object",
  "properties" : {
    "SPDXID" : {
      "type" : "string",
      "description" : "Uniquely identify any element in an SPDX document which may be referenced by other elements."
    },
    "annotations" : {
      "description" : "Provide additional information about an SpdxElement.",
      "type" : "array",
      "items" : {
        "type" : "object",
        "properties" : {
          "annotationDate" : {
            "description" : "Identify when the comment was made. This is to be specified according to the combined date and time in the UTC format, as specified in the ISO 8601 standard.",
            "type" : "string"
          },
          "annotationType" : {
            "description" : "Type of the annotation.",
            "type" : "string",
            "enum" : [ "OTHER", "REVIEW" ]
          },
          "annotator" : {
            "description" : "This field identifies the person, organization, or tool that has commented on a file, package, snippet, or the entire document.",
            "type" : "string"
          },
          "comment" : {
            "type" : "string"
          }
        },
        "required" : [ "annotationDate", "annotationType", "annotator", "comment" ],
        "additionalProperties" : false,
        "description" : "An Annotation is a comment on an SpdxItem by an agent."
      }
    },
    "comment" : {
      "type" : "string"
    },
    "creationInfo" : {
      "type" : "object",
      "properties" : {
        "comment" : {
          "type" : "string"
        },
        "created" : {
          "description" : "Identify when the SPDX document was originally created. The date is to be specified according to combined date and time in UTC format as specified in ISO 8601 standard.",
          "type" : "string"
        },
        "creators" : {
          "description" : "Identify who (or what, in the case of a tool) created the SPDX document. If the SPDX document was created by an individual, indicate the person's name. If the SPDX document was created on behalf of a company or organization, indicate the entity name. If the SPDX document was created using a software tool, indicate the name and version for that tool. If multiple participants or tools were involved, use multiple instances of this field. Person name or organization name may be designated as “anonymous” if appropriate.",
          "minItems" : 1,
          "type" : "array",
          "items" : {
            "description" : "Identify who (or what, in the case of a tool) created the SPDX document. If the SPDX document was created by an individual, indicate the person's name. If the SPDX document was created on behalf of a company or organization, indicate the entity name. If the SPDX document was created using a software tool, indicate the name and version for that tool. If multiple participants or tools were involved, use multiple instances of this field. Person name or organization name may be designated as “anonymous” if appropriate.",
            "type" : "string"
          }
        },
        "licenseListVersion" : {
          "description" : "An optional field for creators of the SPDX file to provide the version of the SPDX License List used when the SPDX file was created.",
          "type" : "string"
        }
      },
      "required" : [ "created", "creators" ],
      "additionalProperties" : false,
      "description" : "One instance is required for each SPDX file produced. It provides the necessary information for forward and backward compatibility for processing tools."
    },
    "dataLicense" : {
      "description" : "License expression for dataLicense. See SPDX Annex D for the license expression syntax.  Compliance with the SPDX specification includes populating the SPDX fields therein with data related to such fields (\"SPDX-Metadata\"). The SPDX specification contains numerous fields where an SPDX document creator may provide relevant explanatory text in SPDX-Metadata. Without opining on the lawfulness of \"database rights\" (in jurisdictions where applicable), such explanatory text is copyrightable subject matter in most Berne Convention countries. By using the SPDX specification, or any portion hereof, you hereby agree that any copyright rights (as determined by your jurisdiction) in any SPDX-Metadata, including without limitation explanatory text, shall be subject to the terms of the Creative Commons CC0 1.0 Universal license. For SPDX-Metadata not containing any copyright rights, you hereby agree and acknowledge that the SPDX-Metadata is provided to you \"as-is\" and without any representations or warranties of any kind concerning the SPDX-Metadata, express, implied, statutory or otherwise, including without limitation warranties of title, merchantability, fitness for a particular purpose, non-infringement, or the absence of latent or other defects, accuracy, or the presence or absence of errors, whether or not discoverable, all to the greatest extent permissible under applicable law.",
      "type" : "string"
    },
    "externalDocumentRefs" : {
      "description" : "Identify any external SPDX documents referenced within this SPDX document.",
      "type" : "array",
      "items" : {
        "type" : "object",
        "properties" : {
          "checksum" : {
            "type" : "object",
            "properties" : {
              "algorithm" : {
                "description" : "Identifies the algorithm used to produce the subject Checksum. Currently, SHA-1 is the only supported algorithm. It is anticipated that other algorithms will be supported at a later time.",
                "type" : "string",
                "enum" : [ "SHA1", "BLAKE3", "SHA3-384", "SHA256", "SHA384", "BLAKE2b-512", "BLAKE2b-256", "SHA3-512", "MD2", "ADLER32", "MD4", "SHA3-256", "BLAKE2b-384", "SHA512", "MD6", "MD5", "SHA224" ]
              },
              "checksumValue" : {
                "description" : "The checksumValue property provides a lower case hexidecimal encoded digest value produced using a specific algorithm.",
                "type" : "string"
              }
            },
            "required" : [ "algorithm", "checksumValue" ],
            "additionalProperties" : false,
            "description" : "A Checksum is value that allows the contents of a file to be authenticated. Even small changes to the content of the file will change its checksum. This class allows the results of a variety of checksum and cryptographic message digest algorithms to be represented."
          },
          "externalDocumentId" : {
            "description" : "externalDocumentId is a string containing letters, numbers, ., - and/or + which uniquely identifies an external document within this document.",
            "type" : "string"
          },
          "spdxDocument" : {
            "description" : "SPDX ID for SpdxDocument.  A property containing an SPDX document.",
            "type" : "string"
          }
        },
        "required" : [ "checksum", "externalDocumentId", "spdxDocument" ],
        "additionalProperties" : false,
        "description" : "Information about an external SPDX document reference including the checksum. This allows for verification of the external references."
      }
    },
    "hasExtractedLicensingInfos" : {
      "description" : "Indicates that a particular ExtractedLicensingInfo was defined in the subject SpdxDocument.",
      "type" : "array",
      "items" : {
        "type" : "object",
        "properties" : {
          "comment" : {
            "type" : "string"
          },
          "crossRefs" : {
            "description" : "Cross Reference Detail for a license SeeAlso URL",
            "type" : "array",
            "items" : {
              "type" : "object",
              "properties" : {
                "isLive" : {
                  "description" : "Indicate a URL is still a live accessible location on the public internet",
                  "type" : "boolean"
                },
                "isValid" : {
                  "description" : "True if the URL is a valid well formed URL",
                  "type" : "boolean"
                },
                "isWayBackLink" : {
                  "description" : "True if the License SeeAlso URL points to a Wayback archive",
                  "type" : "boolean"
                },
                "match" : {
                  "description" : "Status of a License List SeeAlso URL reference if it refers to a website that matches the license text.",
                  "type" : "string"
                },
                "order" : {
                  "description" : "The ordinal order of this element within a list",
                  "type" : "integer"
                },
                "timestamp" : {
                  "description" : "Timestamp",
                  "type" : "string"
                },
                "url" : {
                  "description" : "URL Reference",
                  "type" : "string"
                }
              },
              "required" : [ "url" ],
              "additionalProperties" : false,
              "description" : "Cross reference details for the a URL reference"
            }
          },
          "extractedText" : {
            "description" : "Provide a copy of the actual text of the license reference extracted from the package, file or snippet that is associated with the License Identifier to aid in future analysis.",
            "type" : "string"
          },
          "licenseId" : {
            "description" : "A human readable short form license identifier for a license. The license ID is either on the standard license list or the form \"LicenseRef-[idString]\" where [idString] is a unique string containing letters, numbers, \".\" or \"-\".  When used within a license expression, the license ID can optionally include a reference to an external document in the form \"DocumentRef-[docrefIdString]:LicenseRef-[idString]\" where docRefIdString is an ID for an external document reference.",
            "type" : "string"
          },
          "name" : {
            "description" : "Identify name of this SpdxElement.",
            "type" : "string"
          },
          "seeAlsos" : {
            "type" : "array",
            "items" : {
              "type" : "string"
            }
          }
        },
        "required" : [ "extractedText", "licenseId" ],
        "additionalProperties" : false,
        "description" : "An ExtractedLicensingInfo represents a license or licensing notice that was found in a package, file or snippet. Any license text that is recognized as a license may be represented as a License rather than an ExtractedLicensingInfo."
      }
    },
    "name" : {
      "description" : "Identify name of this SpdxElement.",
      "type" : "string"
    },
    "revieweds" : {
      "description" : "Reviewed",
      "type" : "array",
      "items" : {
        "type" : "object",
        "properties" : {
          "comment" : {
            "type" : "string"
          },
          "reviewDate" : {
            "description" : "The date and time at which the SpdxDocument was reviewed. This value must be in UTC and have 'Z' as its timezone indicator.",
            "type" : "string"
          },
          "reviewer" : {
            "description" : "The name and, optionally, contact information of the person who performed the review. Values of this property must conform to the agent and tool syntax.  The reviewer property is deprecated in favor of Annotation with an annotationType review.",
            "type" : "string"
          }
        },
        "required" : [ "reviewDate" ],
        "additionalProperties" : false,
        "description" : "This class has been deprecated in favor of an Annotation with an Annotation type of review."
      }
    },
    "spdxVersion" : {
      "description" : "Provide a reference number that can be used to understand how to parse and interpret the rest of the file. It will enable both future changes to the specification and to support backward compatibility. The version number consists of a major and minor version indicator. The major field will be incremented when incompatible changes between versions are made (one or more sections are created, modified or deleted). The minor field will be incremented when backwards compatible changes are made.",
      "type" : "string"
    },
    "documentNamespace" : {
      "type" : "string",
      "description" : "The URI provides an unambiguous mechanism for other SPDX documents to reference SPDX elements within this SPDX document."
    },
    "documentDescribes" : {
      "description" : "Packages, files and/or Snippets described by this SPDX document",
      "type" : "array",
      "items" : {
        "type" : "string",
        "description" : "SPDX ID for each Package, File, or Snippet."
      }
    },
    "packages" : {
      "description" : "Packages referenced in the SPDX document",
      "type" : "array",
      "items" : {
        "type" : "object",
        "properties" : {
          "SPDXID" : {
            "type" : "string",
            "description" : "Uniquely identify any element in an SPDX document which may be referenced by other elements."
          },
          "annotations" : {
            "description" : "Provide additional information about an SpdxElement.",
            "type" : "array",
            "items" : {
              "type" : "object",
              "properties" : {
                "annotationDate" : {
                  "description" : "Identify when the comment was made. This is to be specified according to the combined date and time in the UTC format, as specified in the ISO 8601 standard.",
                  "type" : "string"
                },
                "annotationType" : {
                  "description" : "Type of the annotation.",
                  "type" : "string",
                  "enum" : [ "OTHER", "REVIEW" ]
                },
                "annotator" : {
                  "description" : "This field identifies the person, organization, or tool that has commented on a file, package, snippet, or the entire document.",
                  "type" : "string"
                },
                "comment" : {
                  "type" : "string"
                }
              },
              "required" : [ "annotationDate", "annotationType", "annotator", "comment" ],
              "additionalProperties" : false,
              "description" : "An Annotation is a comment on an SpdxItem by an agent."
            }
          },
          "attributionTexts" : {
            "description" : "This field provides a place for the SPDX data creator to record acknowledgements that may be required to be communicated in some contexts. This is not meant to include the actual complete license text (see licenseConculded and licenseDeclared), and may or may not include copyright notices (see also copyrightText). The SPDX data creator may use this field to record other acknowledgements, such as particular clauses from license texts, which may be necessary or desirable to reproduce.",
            "type" : "array",
            "items" : {
              "description" : "This field provides a place for the SPDX data creator to record acknowledgements that may be required to be communicated in some contexts. This is not meant to include the actual complete license text (see licenseConculded and licenseDeclared), and may or may not include copyright notices (see also copyrightText). The SPDX data creator may use this field to record other acknowledgements, such as particular clauses from license texts, which may be necessary or desirable to reproduce.",
              "type" : "string"
            }
          },
          "builtDate" : {
            "description" : "This field provides a place for recording the actual date the package was built.",
            "type" : "string"
          },
          "checksums" : {
            "description" : "The checksum property provides a mechanism that can be used to verify that the contents of a File or Package have not changed.",
            "type" : "array",
            "items" : {
              "type" : "object",
              "properties" : {
                "algorithm" : {
                  "description" : "Identifies the algorithm used to produce the subject Checksum. Currently, SHA-1 is the only supported algorithm. It is anticipated that other algorithms will be supported at a later time.",
                  "type" : "string",
                  "enum" : [ "SHA1", "BLAKE3", "SHA3-384", "SHA256", "SHA384", "BLAKE2b-512", "BLAKE2b-256", "SHA3-512", "MD2", "ADLER32", "MD4", "SHA3-256", "BLAKE2b-384", "SHA512", "MD6", "MD5", "SHA224" ]
                },
                "checksumValue" : {
                  "description" : "The checksumValue property provides a lower case hexidecimal encoded digest value produced using a specific algorithm.",
                  "type" : "string"
                }
              },
              "required" : [ "algorithm", "checksumValue" ],
              "additionalProperties" : false,
              "description" : "A Checksum is value that allows the contents of a file to be authenticated. Even small changes to the content of the file will change its checksum. This class allows the results of a variety of checksum and cryptographic message digest algorithms to be represented."
            }
          },
          "comment" : {
            "type" : "string"
          },
          "copyrightText" : {
            "description" : "The text of copyright declarations recited in the package, file or snippet.\n\nIf the copyrightText field is not present, it implies an equivalent meaning to NOASSERTION.",
            "type" : "string"
          },
          "description" : {
            "description" : "Provides a detailed description of the package.",
            "type" : "string"
          },
          "downloadLocation" : {
            "description" : "The URI at which this package is available for download. Private (i.e., not publicly reachable) URIs are acceptable as values of this property. The values http://spdx.org/rdf/terms#none and http://spdx.org/rdf/terms#noassertion may be used to specify that the package is not downloadable or that no attempt was made to determine its download location, respectively.",
            "type" : "string"
          },
          "externalRefs" : {
            "description" : "An External Reference allows a Package to reference an external source of additional information, metadata, enumerations, asset identifiers, or downloadable content believed to be relevant to the Package.",
            "type" : "array",
            "items" : {
              "type" : "object",
              "properties" : {
                "comment" : {
                  "type" : "string"
                },
                "referenceCategory" : {
                  "description" : "Category for the external reference",
                  "type" : "string",
                  "enum" : [ "OTHER", "PERSISTENT-ID", "SECURITY", "PACKAGE-MANAGER" ]
                },
                "referenceLocator" : {
                  "description" : "The unique string with no spaces necessary to access the package-specific information, metadata, or content within the target location. The format of the locator is subject to constraints defined by the <type>.",
                  "type" : "string"
                },
                "referenceType" : {
                  "description" : "Type of the external reference. These are definined in an appendix in the SPDX specification.",
                  "type" : "string"
                }
              },
              "required" : [ "referenceCategory", "referenceLocator", "referenceType" ],
              "additionalProperties" : false,
              "description" : "An External Reference allows a Package to reference an external source of additional information, metadata, enumerations, asset identifiers, or downloadable content believed to be relevant to the Package."
            }
          },
          "filesAnalyzed" : {
            "description" : "Indicates whether the file content of this package has been available for or subjected to analysis when creating the SPDX document. If false indicates packages that represent metadata or URI references to a project, product, artifact, distribution or a component. If set to false, the package must not contain any files.",
            "type" : "boolean"
          },
          "hasFiles" : {
            "description" : "Indicates that a particular file belongs to a package.",
            "type" : "array",
            "items" : {
              "description" : "SPDX ID for File.  Indicates that a particular file belongs to a package.",
              "type" : "string"
            }
          },
          "homepage" : {
            "type" : "string"
          },
          "licenseComments" : {
            "description" : "The licenseComments property allows the preparer of the SPDX document to describe why the licensing in spdx:licenseConcluded was chosen.",
            "type" : "string"
          },
          "licenseConcluded" : {
            "description" : "License expression for licenseConcluded. See SPDX Annex D for the license expression syntax.  The licensing that the preparer of this SPDX document has concluded, based on the evidence, actually applies to the SPDX Item.\n\nIf the licenseConcluded field is not present for an SPDX Item, it implies an equivalent meaning to NOASSERTION.",
            "type" : "string"
          },
          "licenseDeclared" : {
            "description" : "License expression for licenseDeclared. See SPDX Annex D for the license expression syntax.  The licensing that the creators of the software in the package, or the packager, have declared. Declarations by the original software creator should be preferred, if they exist.",
            "type" : "string"
          },
          "licenseInfoFromFiles" : {
            "description" : "The licensing information that was discovered directly within the package. There will be an instance of this property for each distinct value of alllicenseInfoInFile properties of all files contained in the package.\n\nIf the licenseInfoFromFiles field is not present for a package and filesAnalyzed property for that same pacakge is true or omitted, it implies an equivalent meaning to NOASSERTION.",
            "type" : "array",
            "items" : {
              "description" : "License expression for licenseInfoFromFiles. See SPDX Annex D for the license expression syntax.  The licensing information that was discovered directly within the package. There will be an instance of this property for each distinct value of alllicenseInfoInFile properties of all files contained in the package.\n\nIf the licenseInfoFromFiles field is not present for a package and filesAnalyzed property for that same pacakge is true or omitted, it implies an equivalent meaning to NOASSERTION.",
              "type" : "string"
            }
          },
          "name" : {
            "description" : "Identify name of this SpdxElement.",
            "type" : "string"
          },
          "originator" : {
            "description" : "The name and, optionally, contact information of the person or organization that originally created the package. Values of this property must conform to the agent and tool syntax.",
            "type" : "string"
          },
          "packageFileName" : {
            "description" : "The base name of the package file name. For example, zlib-1.2.5.tar.gz.",
            "type" : "string"
          },
          "packageVerificationCode" : {
            "type" : "object",
            "properties" : {
              "packageVerificationCodeExcludedFiles" : {
                "description" : "A file that was excluded when calculating the package verification code. This is usually a file containing SPDX data regarding the package. If a package contains more than one SPDX file all SPDX files must be excluded from the package verification code. If this is not done it would be impossible to correctly calculate the verification codes in both files.",
                "type" : "array",
                "items" : {
                  "description" : "A file that was excluded when calculating the package verification code. This is usually a file containing SPDX data regarding the package. If a package contains more than one SPDX file all SPDX files must be excluded from the package verification code. If this is not done it would be impossible to correctly calculate the verification codes in both files.",
                  "type" : "string"
                }
              },
              "packageVerificationCodeValue" : {
                "description" : "The actual package verification code as a hex encoded value.",
                "type" : "string"
              }
            },
            "required" : [ "packageVerificationCodeValue" ],
            "additionalProperties" : false,
            "description" : "A manifest based verification code (the algorithm is defined in section 4.7 of the full specification) of the SPDX Item. This allows consumers of this data and/or database to determine if an SPDX item they have in hand is identical to the SPDX item from which the data was produced. This algorithm works even if the SPDX document is included in the SPDX item."
          },
          "primaryPackagePurpose" : {
            "description" : "This field provides information about the primary purpose of the identified package. Package Purpose is intrinsic to how the package is being used rather than the content of the package.",
            "type" : "string",
            "enum" : [ "OTHER", "INSTALL", "ARCHIVE", "FIRMWARE", "APPLICATION", "FRAMEWORK", "LIBRARY", "CONTAINER", "SOURCE", "DEVICE", "OPERATING_SYSTEM", "FILE" ]
          },
          "releaseDate" : {
            "description" : "This field provides a place for recording the date the package was released.",
            "type" : "string"
          },
          "sourceInfo" : {
            "description" : "Allows the producer(s) of the SPDX document to describe how the package was acquired and/or changed from the original source.",
            "type" : "string"
          },
          "summary" : {
            "description" : "Provides a short description of the package.",
            "type" : "string"
          },
          "supplier" : {
            "description" : "The name and, optionally, contact information of the person or organization who was the immediate supplier of this package to the recipient. The supplier may be different than originator when the software has been repackaged. Values of this property must conform to the agent and tool syntax.",
            "type" : "string"
          },
          "validUntilDate" : {
            "description" : "This field provides a place for recording the end of the support period for a package from the supplier.",
            "type" : "string"
          },
          "versionInfo" : {
            "description" : "Provides an indication of the version of the package that is described by this SpdxDocument.",
            "type" : "string"
          }
        },
        "required" : [ "SPDXID", "downloadLocation", "name" ],
        "additionalProperties" : false
      }
    },
    "files" : {
      "description" : "Files referenced in the SPDX document",
      "type" : "array",
      "items" : {
        "type" : "object",
        "properties" : {
          "SPDXID" : {
            "type" : "string",
            "description" : "Uniquely identify any element in an SPDX document which may be referenced by other elements."
          },
          "annotations" : {
            "description" : "Provide additional information about an SpdxElement.",
            "type" : "array",
            "items" : {
              "type" : "object",
              "properties" : {
                "annotationDate" : {
                  "description" : "Identify when the comment was made. This is to be specified according to the combined date and time in the UTC format, as specified in the ISO 8601 standard.",
                  "type" : "string"
                },
                "annotationType" : {
                  "description" : "Type of the annotation.",
                  "type" : "string",
                  "enum" : [ "OTHER", "REVIEW" ]
                },
                "annotator" : {
                  "description" : "This field identifies the person, organization, or tool that has commented on a file, package, snippet, or the entire document.",
                  "type" : "string"
                },
                "comment" : {
                  "type" : "string"
                }
              },
              "required" : [ "annotationDate", "annotationType", "annotator", "comment" ],
              "additionalProperties" : false,
              "description" : "An Annotation is a comment on an SpdxItem by an agent."
            }
          },
          "artifactOfs" : {
            "description" : "Indicates the project in which the SpdxElement originated. Tools must preserve doap:homepage and doap:name properties and the URI (if one is known) of doap:Project resources that are values of this property. All other properties of doap:Projects are not directly supported by SPDX and may be dropped when translating to or from some SPDX formats.",
            "type" : "array",
            "items" : {
              "type" : "object"
            }
          },
          "attributionTexts" : {
            "description" : "This field provides a place for the SPDX data creator to record acknowledgements that may be required to be communicated in some contexts. This is not meant to include the actual complete license text (see licenseConculded and licenseDeclared), and may or may not include copyright notices (see also copyrightText). The SPDX data creator may use this field to record other acknowledgements, such as particular clauses from license texts, which may be necessary or desirable to reproduce.",
            "type" : "array",
            "items" : {
              "description" : "This field provides a place for the SPDX data creator to record acknowledgements that may be required to be communicated in some contexts. This is not meant to include the actual complete license text (see licenseConculded and licenseDeclared), and may or may not include copyright notices (see also copyrightText). The SPDX data creator may use this field to record other acknowledgements, such as particular clauses from license texts, which may be necessary or desirable to reproduce.",
              "type" : "string"
            }
          },
          "checksums" : {
            "description" : "The checksum property provides a mechanism that can be used to verify that the contents of a File or Package have not changed.",
            "minItems" : 1,
            "type" : "array",
            "items" : {
              "type" : "object",
              "properties" : {
                "algorithm" : {
                  "description" : "Identifies the algorithm used to produce the subject Checksum. Currently, SHA-1 is the only supported algorithm. It is anticipated that other algorithms will be supported at a later time.",
                  "type" : "string",
                  "enum" : [ "SHA1", "BLAKE3", "SHA3-384", "SHA256", "SHA384", "BLAKE2b-512", "BLAKE2b-256", "SHA3-512", "MD2", "ADLER32", "MD4", "SHA3-256", "BLAKE2b-384", "SHA512", "MD6", "MD5", "SHA224" ]
                },
                "checksumValue" : {
                  "description" : "The checksumValue property provides a lower case hexidecimal encoded digest value produced using a specific algorithm.",
                  "type" : "string"
                }
              },
              "required" : [ "algorithm", "checksumValue" ],
              "additionalProperties" : false,
              "description" : "A Checksum is value that allows the contents of a file to be authenticated. Even small changes to the content of the file will change its checksum. This class allows the results of a variety of checksum and cryptographic message digest algorithms to be represented."
            }
          },
          "comment" : {
            "type" : "string"
          },
          "copyrightText" : {
            "description" : "The text of copyright declarations recited in the package, file or snippet.\n\nIf the copyrightText field is not present, it implies an equivalent meaning to NOASSERTION.",
            "type" : "string"
          },
          "fileContributors" : {
            "description" : "This field provides a place for the SPDX file creator to record file contributors. Contributors could include names of copyright holders and/or authors who may not be copyright holders yet contributed to the file content.",
            "type" : "array",
            "items" : {
              "description" : "This field provides a place for the SPDX file creator to record file contributors. Contributors could include names of copyright holders and/or authors who may not be copyright holders yet contributed to the file content.",
              "type" : "string"
            }
          },
          "fileDependencies" : {
            "description" : "This field is deprecated since SPDX 2.0 in favor of using Section 7 which provides more granularity about relationships.",
            "type" : "array",
            "items" : {
              "description" : "SPDX ID for File.  This field is deprecated since SPDX 2.0 in favor of using Section 7 which provides more granularity about relationships.",
              "type" : "string"
            }
          },
          "fileName" : {
            "description" : "The name of the file relative to the root of the package.",
            "type" : "string"
          },
          "fileTypes" : {
            "description" : "The type of the file.",
            "type" : "array",
            "items" : {
              "description" : "The type of the file.",
              "type" : "string",
              "enum" : [ "OTHER", "DOCUMENTATION", "IMAGE", "VIDEO", "ARCHIVE", "SPDX", "APPLICATION", "SOURCE", "BINARY", "TEXT", "AUDIO" ]
            }
          },
          "licenseComments" : {
            "description" : "The licenseComments property allows the preparer of the SPDX document to describe why the licensing in spdx:licenseConcluded was chosen.",
            "type" : "string"
          },
          "licenseConcluded" : {
            "description" : "License expression for licenseConcluded. See SPDX Annex D for the license expression syntax.  The licensing that the preparer of this SPDX document has concluded, based on the evidence, actually applies to the SPDX Item.\n\nIf the licenseConcluded field is not present for an SPDX Item, it implies an equivalent meaning to NOASSERTION.",
            "type" : "string"
          },
          "licenseInfoInFiles" : {
            "description" : "Licensing information that was discovered directly in the subject file. This is also considered a declared license for the file.\n\nIf the licenseInfoInFile field is not present for a file, it implies an equivalent meaning to NOASSERTION.",
            "type" : "array",
            "items" : {
              "description" : "License expression for licenseInfoInFile. See SPDX Annex D for the license expression syntax.  Licensing information that was discovered directly in the subject file. This is also considered a declared license for the file.\n\nIf the licenseInfoInFile field is not present for a file, it implies an equivalent meaning to NOASSERTION.",
              "type" : "string"
            }
          },
          "noticeText" : {
            "description" : "This field provides a place for the SPDX file creator to record potential legal notices found in the file. This may or may not include copyright statements.",
            "type" : "string"
          }
        },
        "required" : [ "SPDXID", "checksums", "fileName" ],
        "additionalProperties" : false
      }
    },
    "snippets" : {
      "description" : "Snippets referenced in the SPDX document",
      "type" : "array",
      "items" : {
        "type" : "object",
        "properties" : {
          "SPDXID" : {
            "type" : "string",
            "description" : "Uniquely identify any element in an SPDX document which may be referenced by other elements."
          },
          "annotations" : {
            "description" : "Provide additional information about an SpdxElement.",
            "type" : "array",
            "items" : {
              "type" : "object",
              "properties" : {
                "annotationDate" : {
                  "description" : "Identify when the comment was made. This is to be specified according to the combined date and time in the UTC format, as specified in the ISO 8601 standard.",
                  "type" : "string"
                },
                "annotationType" : {
                  "description" : "Type of the annotation.",
                  "type" : "string",
                  "enum" : [ "OTHER", "REVIEW" ]
                },
                "annotator" : {
                  "description" : "This field identifies the person, organization, or tool that has commented on a file, package, snippet, or the entire document.",
                  "type" : "string"
                },
                "comment" : {
                  "type" : "string"
                }
              },
              "required" : [ "annotationDate", "annotationType", "annotator", "comment" ],
              "additionalProperties" : false,
              "description" : "An Annotation is a comment on an SpdxItem by an agent."
            }
          },
          "attributionTexts" : {
            "description" : "This field provides a place for the SPDX data creator to record acknowledgements that may be required to be communicated in some contexts. This is not meant to include the actual complete license text (see licenseConculded and licenseDeclared), and may or may not include copyright notices (see also copyrightText). The SPDX data creator may use this field to record other acknowledgements, such as particular clauses from license texts, which may be necessary or desirable to reproduce.",
            "type" : "array",
            "items" : {
              "description" : "This field provides a place for the SPDX data creator to record acknowledgements that may be required to be communicated in some contexts. This is not meant to include the actual complete license text (see licenseConculded and licenseDeclared), and may or may not include copyright notices (see also copyrightText). The SPDX data creator may use this field to record other acknowledgements, such as particular clauses from license texts, which may be necessary or desirable to reproduce.",
              "type" : "string"
            }
          },
          "comment" : {
            "type" : "string"
          },
          "copyrightText" : {
            "description" : "The text of copyright declarations recited in the package, file or snippet.\n\nIf the copyrightText field is not present, it implies an equivalent meaning to NOASSERTION.",
            "type" : "string"
          },
          "licenseComments" : {
            "description" : "The licenseComments property allows the preparer of the SPDX document to describe why the licensing in spdx:licenseConcluded was chosen.",
            "type" : "string"
          },
          "licenseConcluded" : {
            "description" : "License expression for licenseConcluded. See SPDX Annex D for the license expression syntax.  The licensing that the preparer of this SPDX document has concluded, based on the evidence, actually applies to the SPDX Item.\n\nIf the licenseConcluded field is not present for an SPDX Item, it implies an equivalent meaning to NOASSERTION.",
            "type" : "string"
          },
          "licenseInfoInSnippets" : {
            "description" : "Licensing information that was discovered directly in the subject snippet. This is also considered a declared license for the snippet.\n\nIf the licenseInfoInSnippet field is not present for a snippet, it implies an equivalent meaning to NOASSERTION.",
            "type" : "array",
            "items" : {
              "description" : "License expression for licenseInfoInSnippet. See SPDX Annex D for the license expression syntax.  Licensing information that was discovered directly in the subject snippet. This is also considered a declared license for the snippet.\n\nIf the licenseInfoInSnippet field is not present for a snippet, it implies an equivalent meaning to NOASSERTION.",
              "type" : "string"
            }
          },
          "name" : {
            "description" : "Identify name of this SpdxElement.",
            "type" : "string"
          },
          "ranges" : {
            "description" : "This field defines the byte range in the original host file (in X.2) that the snippet information applies to",
            "minItems" : 1,
            "type" : "array",
            "items" : {
              "type" : "object",
              "properties" : {
                "endPointer" : {
                  "type" : "object",
                  "properties" : {
                    "reference" : {
                      "description" : "SPDX ID for File",
                      "type" : "string"
                    },
                    "offset" : {
                      "type" : "integer",
                      "description" : "Byte offset in the file"
                    },
                    "lineNumber" : {
                      "type" : "integer",
                      "description" : "line number offset in the file"
                    }
                  },
                  "required" : [ "reference" ],
                  "additionalProperties" : false
                },
                "startPointer" : {
                  "type" : "object",
                  "properties" : {
                    "reference" : {
                      "description" : "SPDX ID for File",
                      "type" : "string"
                    },
                    "offset" : {
                      "type" : "integer",
                      "description" : "Byte offset in the file"
                    },
                    "lineNumber" : {
                      "type" : "integer",
                      "description" : "line number offset in the file"
                    }
                  },
                  "required" : [ "reference" ],
                  "additionalProperties" : false
                }
              },
              "required" : [ "endPointer", "startPointer" ],
              "additionalProperties" : false
            }
          },
          "snippetFromFile" : {
            "description" : "SPDX ID for File.  File containing the SPDX element (e.g. the file contaning a snippet).",
            "type" : "string"
          }
        },
        "required" : [ "SPDXID", "name", "ranges", "snippetFromFile" ],
        "additionalProperties" : false
      }
    },
    "relationships" : {
      "description" : "Relationships referenced in the SPDX document",
      "type" : "array",
      "items" : {
        "type" : "object",
        "properties" : {
          "spdxElementId" : {
            "type" : "string",
            "description" : "Id to which the SPDX element is related"
          },
          "comment" : {
            "type" : "string"
          },
          "relatedSpdxElement" : {
            "description" : "SPDX ID for SpdxElement.  A related SpdxElement.",
            "type" : "string"
          },
          "relationshipType" : {
            "description" : "Describes the type of relationship between two SPDX elements.",
            "type" : "string",
            "enum" : [ "VARIANT_OF", "COPY_OF", "PATCH_FOR", "TEST_DEPENDENCY_OF", "CONTAINED_BY", "DATA_FILE_OF", "OPTIONAL_COMPONENT_OF", "ANCESTOR_OF", "GENERATES", "CONTAINS", "OPTIONAL_DEPENDENCY_OF", "FILE_ADDED", "REQUIREMENT_DESCRIPTION_FOR", "DEV_DEPENDENCY_OF", "DEPENDENCY_OF", "BUILD_DEPENDENCY_OF", "DESCRIBES", "PREREQUISITE_FOR", "HAS_PREREQUISITE", "PROVIDED_DEPENDENCY_OF", "DYNAMIC_LINK", "DESCRIBED_BY", "METAFILE_OF", "DEPENDENCY_MANIFEST_OF", "PATCH_APPLIED", "RUNTIME_DEPENDENCY_OF", "TEST_OF", "TEST_TOOL_OF", "DEPENDS_ON", "SPECIFICATION_FOR", "FILE_MODIFIED", "DISTRIBUTION_ARTIFACT", "AMENDS", "DOCUMENTATION_OF", "GENERATED_FROM", "STATIC_LINK", "OTHER", "BUILD_TOOL_OF", "TEST_CASE_OF", "PACKAGE_OF", "DESCENDANT_OF", "FILE_DELETED", "EXPANDED_FROM_ARCHIVE", "DEV_TOOL_OF", "EXAMPLE_OF" ]
          }
        },
        "required" : [ "spdxElementId", "relatedSpdxElement", "relationshipType" ],
        "additionalProperties" : false
      }
    }
  },
  "required" : [ "SPDXID", "creationInfo", "dataLicense", "name", "spdxVersion" ],
  "additionalProperties" : false
}

================================================
FILE: tests/resources/test-derivation-chain.nix
================================================
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

{
  system ? builtins.currentSystem,
}:

let
  mkTestDerivation =
    {
      name,
      pname,
      version,
      command,
    }:
    builtins.derivation {
      inherit
        name
        pname
        system
        version
        ;
      builder = "/bin/sh";
      args = [
        "-c"
        command
      ];
    };

  first = mkTestDerivation {
    name = "sbomnix-test-first-1.0";
    pname = "sbomnix-test-first";
    version = "1.0";
    command = "echo first > $out";
  };

  second = mkTestDerivation {
    name = "sbomnix-test-second-1.0";
    pname = "sbomnix-test-second";
    version = "1.0";
    command = "echo ${first} > $out";
  };
in
mkTestDerivation {
  name = "sbomnix-test-third-1.0";
  pname = "sbomnix-test-third";
  version = "1.0";
  command = "echo ${second} > $out";
}


================================================
FILE: tests/test_builder_runtime.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Focused tests for SBOM builder runtime closure selection."""

import pandas as pd
import pytest

from common import columns as cols
from common.errors import MissingNixDerivationMetadataError, SbomnixError
from sbomnix import builder as sbomnix_builder
from sbomnix.builder import SbomBuilder
from sbomnix.closure import dependency_rows_to_dataframe
from sbomnix.runtime import RuntimeClosure

TARGET_PATH = "/nix/store/11111111111111111111111111111111-target-1.0"
TARGET_DERIVER = "/nix/store/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-target-1.0.drv"
GRAPH_ONLY_PATH = "/nix/store/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-source"


def _builder_double():
    builder = object.__new__(SbomBuilder)
    builder.nix_path = TARGET_PATH
    builder.buildtime = False
    builder.target_deriver = TARGET_DERIVER
    builder.target_component_ref = None
    builder.include_cpe = False
    builder.depth = None
    builder.df_deps = None
    builder._runtime_output_paths_by_load_path = None
    return builder


def _runtime_closure(output_paths_by_drv, rows=None):
    return RuntimeClosure(
        df_deps=dependency_rows_to_dataframe([] if rows is None else rows),
        output_paths_by_drv=output_paths_by_drv,
    )


def test_runtime_path_info_dependencies_accepts_existing_derivers(monkeypatch):
    closure = _runtime_closure({TARGET_DERIVER: {TARGET_PATH}})
    monkeypatch.setattr(
        sbomnix_builder,
        "load_runtime_closure",
        lambda _path: closure,
    )
    monkeypatch.setattr(
        sbomnix_builder,
        "is_loadable_deriver_path",
        lambda path: path == TARGET_DERIVER,
    )

    builder = _builder_double()

    loaded = builder._load_runtime_path_info_closure(TARGET_PATH)
    builder._init_dependencies(loaded)

    assert loaded.runtime_output_paths_by_load_path == {TARGET_DERIVER: {TARGET_PATH}}
    assert builder._runtime_output_paths_by_load_path == {TARGET_DERIVER: {TARGET_PATH}}
    assert builder.df_deps.equals(closure.df_deps)


def test_runtime_components_propagate_derivation_loading_failures(monkeypatch):
    def fail_runtime_components(*_args, **_kwargs):
        raise ValueError("broken derivation metadata")

    monkeypatch.setattr(
        sbomnix_builder,
        "runtime_derivations_to_dataframe",
        fail_runtime_components,
    )

    builder = _builder_double()
    builder._runtime_output_paths_by_load_path = {TARGET_DERIVER: {TARGET_PATH}}

    with pytest.raises(ValueError, match="broken derivation metadata"):
        builder._init_runtime_components({TARGET_PATH})

    assert builder._runtime_output_paths_by_load_path == {TARGET_DERIVER: {TARGET_PATH}}


def test_runtime_components_reject_missing_derivation_metadata(monkeypatch):
    monkeypatch.setattr(
        sbomnix_builder,
        "runtime_derivations_to_dataframe",
        lambda *_args, **_kwargs: pd.DataFrame(),
    )

    builder = _builder_double()
    builder._runtime_output_paths_by_load_path = {TARGET_PATH: {TARGET_PATH}}

    with pytest.raises(MissingNixDerivationMetadataError, match=TARGET_PATH):
        builder._init_runtime_components({TARGET_PATH})


def test_runtime_deriver_lookup_preserves_typed_errors(monkeypatch):
    def fail_find_deriver(_path):
        raise SbomnixError("schema drift")

    monkeypatch.setattr(sbomnix_builder, "find_deriver", fail_find_deriver)

    builder = _builder_double()

    with pytest.raises(SbomnixError, match="schema drift"):
        builder._resolve_target_deriver(TARGET_PATH)


@pytest.mark.parametrize(
    "deriver",
    [
        "unknown-deriver",
        "/nix/store/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-target-1.0",
        "/nix/store/cccccccccccccccccccccccccccccccc-missing-1.0.drv",
    ],
)
def test_runtime_path_info_dependencies_uses_output_queries_for_unloadable_derivers(
    monkeypatch,
    deriver,
):
    closure = _runtime_closure({deriver: {TARGET_PATH}})
    monkeypatch.setattr(
        sbomnix_builder,
        "load_runtime_closure",
        lambda _path: closure,
    )
    monkeypatch.setattr(
        sbomnix_builder,
        "is_loadable_deriver_path",
        lambda _path: False,
    )

    builder = _builder_double()

    loaded = builder._load_runtime_path_info_closure(TARGET_PATH)
    builder._init_dependencies(loaded)

    assert loaded.runtime_output_paths_by_load_path == {TARGET_PATH: {TARGET_PATH}}
    assert builder._runtime_output_paths_by_load_path == {TARGET_PATH: {TARGET_PATH}}
    assert builder.df_deps.equals(closure.df_deps)


def test_runtime_path_info_dependencies_accepts_graph_only_references(monkeypatch):
    rows = [
        {
            "src_path": GRAPH_ONLY_PATH,
            "src_pname": "source",
            "target_path": TARGET_PATH,
            "target_pname": "target-1.0",
        }
    ]
    closure = _runtime_closure({TARGET_DERIVER: {TARGET_PATH}}, rows=rows)
    monkeypatch.setattr(
        sbomnix_builder,
        "load_runtime_closure",
        lambda _path: closure,
    )
    monkeypatch.setattr(
        sbomnix_builder,
        "is_loadable_deriver_path",
        lambda path: path == TARGET_DERIVER,
    )

    builder = _builder_double()

    loaded = builder._load_runtime_path_info_closure(TARGET_PATH)
    builder._init_dependencies(loaded)

    assert loaded.runtime_output_paths_by_load_path == {TARGET_DERIVER: {TARGET_PATH}}
    assert builder._runtime_output_paths_by_load_path == {TARGET_DERIVER: {TARGET_PATH}}
    assert builder.df_deps.equals(closure.df_deps)


def test_runtime_path_info_dependencies_supports_targets_without_derivers(
    monkeypatch,
):
    closure = _runtime_closure({})
    monkeypatch.setattr(
        sbomnix_builder,
        "load_runtime_closure",
        lambda _path: closure,
    )

    builder = _builder_double()
    builder.target_deriver = None

    loaded = builder._load_runtime_path_info_closure(TARGET_PATH)
    builder._init_dependencies(loaded)

    assert loaded.runtime_output_paths_by_load_path == {TARGET_PATH: {TARGET_PATH}}
    assert builder._runtime_output_paths_by_load_path == {TARGET_PATH: {TARGET_PATH}}


def test_target_component_ref_uses_runtime_output_when_deriver_is_unavailable():
    builder = _builder_double()
    builder.target_deriver = None
    builder.df_sbomdb = pd.DataFrame(
        [
            {
                cols.STORE_PATH: "/nix/store/runtime-load-path",
                cols.OUTPUTS: [TARGET_PATH],
            }
        ]
    )

    assert builder._resolve_target_component_ref() == "/nix/store/runtime-load-path"


def test_target_component_ref_skips_missing_outputs_when_deriver_is_unavailable():
    builder = _builder_double()
    builder.target_deriver = None
    builder.df_sbomdb = pd.DataFrame(
        [
            {
                cols.STORE_PATH: "/nix/store/no-outputs",
                cols.OUTPUTS: float("nan"),
            },
            {
                cols.STORE_PATH: "/nix/store/runtime-load-path",
                cols.OUTPUTS: [TARGET_PATH],
            },
        ]
    )

    assert builder._resolve_target_component_ref() == "/nix/store/runtime-load-path"


def test_target_component_ref_handles_non_identifier_output_column(monkeypatch):
    monkeypatch.setattr(cols, "OUTPUTS", "store-outputs")
    builder = _builder_double()
    builder.target_deriver = None
    builder.df_sbomdb = pd.DataFrame(
        [
            {
                cols.STORE_PATH: "/nix/store/runtime-load-path",
                cols.OUTPUTS: [TARGET_PATH],
            }
        ]
    )

    assert builder._resolve_target_component_ref() == "/nix/store/runtime-load-path"


def test_target_component_ref_rejects_missing_runtime_target_metadata():
    builder = _builder_double()
    builder.target_deriver = None
    builder.df_sbomdb = pd.DataFrame(
        [
            {
                cols.STORE_PATH: "/nix/store/runtime-load-path",
                cols.OUTPUTS: ["/nix/store/other-output"],
            }
        ]
    )

    with pytest.raises(MissingNixDerivationMetadataError, match=TARGET_PATH):
        builder._resolve_target_component_ref()


================================================
FILE: tests/test_buildtime_closure.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Focused tests for recursive build-time derivation parsing."""

import pytest

from common.errors import InvalidNixJsonError
from sbomnix.closure import derivation_dependencies_df


def test_recursive_buildtime_dependencies_df_reads_new_derivation_inputs():
    drv_infos = {
        "/nix/store/11111111111111111111111111111111-target-1.0.drv": {
            "inputs": {
                "drvs": {
                    "/nix/store/22222222222222222222222222222222-dep-a-1.0.drv": [
                        "out"
                    ],
                    "/nix/store/33333333333333333333333333333333-dep-b-1.0.drv": [
                        "out"
                    ],
                },
                "srcs": [
                    "/nix/store/44444444444444444444444444444444-builder.sh",
                ],
            }
        }
    }

    df = derivation_dependencies_df(drv_infos)
    rows = df.sort_values("src_path").to_dict("records")

    assert rows == [
        {
            "src_path": "/nix/store/22222222222222222222222222222222-dep-a-1.0.drv",
            "src_pname": "dep-a-1.0.drv",
            "target_path": "/nix/store/11111111111111111111111111111111-target-1.0.drv",
            "target_pname": "target-1.0.drv",
        },
        {
            "src_path": "/nix/store/33333333333333333333333333333333-dep-b-1.0.drv",
            "src_pname": "dep-b-1.0.drv",
            "target_path": "/nix/store/11111111111111111111111111111111-target-1.0.drv",
            "target_pname": "target-1.0.drv",
        },
        {
            "src_path": "/nix/store/44444444444444444444444444444444-builder.sh",
            "src_pname": "builder.sh",
            "target_path": "/nix/store/11111111111111111111111111111111-target-1.0.drv",
            "target_pname": "target-1.0.drv",
        },
    ]


def test_recursive_buildtime_dependencies_df_rejects_legacy_input_drvs():
    drv_infos = {
        "/nix/store/11111111111111111111111111111111-target-1.0.drv": {
            "inputDrvs": {
                "/nix/store/22222222222222222222222222222222-dep-a-1.0.drv": ["out"],
            }
        }
    }

    with pytest.raises(InvalidNixJsonError, match="unsupported legacy `inputDrvs`"):
        derivation_dependencies_df(drv_infos)


def test_recursive_buildtime_dependencies_df_rejects_missing_input_schema():
    drv_infos = {
        "/nix/store/11111111111111111111111111111111-target-1.0.drv": {
            "name": "target-1.0",
        }
    }

    with pytest.raises(InvalidNixJsonError, match="missing derivation inputs"):
        derivation_dependencies_df(drv_infos)


def test_recursive_buildtime_dependencies_df_accepts_empty_modern_inputs():
    drv_infos = {
        "/nix/store/11111111111111111111111111111111-leaf-1.0.drv": {
            "inputs": {
                "drvs": {},
                "srcs": [],
            }
        }
    }

    df = derivation_dependencies_df(drv_infos)

    assert df.empty
    assert list(df.columns) == [
        "src_path",
        "src_pname",
        "target_path",
        "target_pname",
    ]


def test_recursive_buildtime_dependencies_df_rejects_missing_source_inputs():
    drv_infos = {
        "/nix/store/11111111111111111111111111111111-target-1.0.drv": {
            "inputs": {
                "drvs": {},
            }
        }
    }

    with pytest.raises(InvalidNixJsonError, match="missing `inputs.srcs`"):
        derivation_dependencies_df(drv_infos)


def test_recursive_buildtime_dependencies_df_rejects_missing_derivation_inputs():
    drv_infos = {
        "/nix/store/11111111111111111111111111111111-target-1.0.drv": {
            "inputs": {
                "srcs": [],
            }
        }
    }

    with pytest.raises(InvalidNixJsonError, match="missing `inputs.drvs`"):
        derivation_dependencies_df(drv_infos)


================================================
FILE: tests/test_cli_conventions.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Focused tests for shared CLI conventions."""

import re
import subprocess
from pathlib import Path

import pytest

from common.pkgmeta import _dev_version, get_py_pkg_version
from nixgraph import main as nixgraph_main
from nixmeta import main as nixmeta_main
from nixupdate import nix_outdated
from provenance import main as provenance_main
from repology import repology_cli, repology_cve
from sbomnix import main as sbomnix_main
from vulnxscan import osv as osv_cli
from vulnxscan import vulnxscan_cli


def _stringify(value):
    if isinstance(value, Path):
        return value.as_posix()
    return str(value)


CLI_ARG_CASES = [
    (sbomnix_main.getargs, [".#pkg"]),
    (nixgraph_main.getargs, [".#pkg"]),
    (nixmeta_main._getargs, []),
    (nix_outdated.getargs, [".#pkg"]),
    (vulnxscan_cli.getargs, [".#pkg"]),
    (osv_cli.getargs, ["sbom.json"]),
    (
        repology_cli.getargs,
        ["--pkg_exact", "openssl", "--repository", "nix_unstable"],
    ),
    (repology_cve.getargs, ["openssl", "3.1.0"]),
    (provenance_main.getargs, [".#pkg"]),
]


@pytest.mark.parametrize(
    "getargs",
    [
        sbomnix_main.getargs,
        nixgraph_main.getargs,
        nixmeta_main._getargs,
        nix_outdated.getargs,
        vulnxscan_cli.getargs,
        osv_cli.getargs,
        repology_cli.getargs,
        repology_cve.getargs,
        provenance_main.getargs,
    ],
)
def test_cli_version_flags_exit_zero(getargs, capsys):
    with pytest.raises(SystemExit) as excinfo:
        getargs(["--version"])

    assert excinfo.value.code == 0
    assert capsys.readouterr().out.strip() == get_py_pkg_version()


@pytest.mark.parametrize(
    ("getargs", "base_argv"),
    CLI_ARG_CASES,
)
def test_cli_verbose_default_is_normal_info(getargs, base_argv):
    assert getargs(base_argv).verbose == 0


@pytest.mark.parametrize(
    ("getargs", "base_argv"),
    CLI_ARG_CASES,
)
@pytest.mark.parametrize(
    "verbose_argv",
    [
        ["-v"],
        ["--verbose=1"],
        ["--verbose", "1"],
    ],
)
def test_cli_verbose_level_one_forms_match(getargs, base_argv, verbose_argv):
    assert getargs([*verbose_argv, *base_argv]).verbose == 1


@pytest.mark.parametrize(
    ("getargs", "base_argv"),
    CLI_ARG_CASES,
)
@pytest.mark.parametrize(
    "verbose_argv",
    [
        ["-v", "-v"],
        ["-vv"],
        ["-v", "2"],
        ["--verbose=2"],
        ["--verbose", "2"],
    ],
)
def test_cli_verbose_level_two_forms_match(getargs, base_argv, verbose_argv):
    assert getargs([*verbose_argv, *base_argv]).verbose == 2


@pytest.mark.parametrize(
    ("getargs", "argv", "expected_out"),
    [
        (nixgraph_main.getargs, ["-o", "graph.dot", ".#pkg"], "graph.dot"),
        (nixmeta_main._getargs, ["-o", "meta.csv"], "meta.csv"),
        (nix_outdated.getargs, ["-o", "nix_outdated.csv", ".#pkg"], "nix_outdated.csv"),
        (vulnxscan_cli.getargs, ["-o", "vulns.csv", ".#pkg"], "vulns.csv"),
        (osv_cli.getargs, ["-o", "osv.csv", "sbom.json"], "osv.csv"),
        (
            repology_cli.getargs,
            [
                "-o",
                "repology.csv",
                "--pkg_exact",
                "openssl",
                "--repository",
                "nix_unstable",
            ],
            "repology.csv",
        ),
        (
            repology_cve.getargs,
            ["-o", "repology_cves.csv", "openssl", "3.1.0"],
            "repology_cves.csv",
        ),
        (
            provenance_main.getargs,
            ["-o", "provenance.json", ".#pkg"],
            "provenance.json",
        ),
    ],
)
def test_single_output_clis_accept_short_o_alias(getargs, argv, expected_out):
    assert _stringify(getargs(argv).out) == expected_out


_REPO_ROOT = Path(__file__).resolve().parents[1]
_DEV_VERSION_RE = re.compile(
    r"^(?P<base>\d+\.\d+\.\d+)\+g(?P<hash>[0-9a-f]+)(?P<dirty>\.dirty)?$"
)


def test_dev_version_format_matches_nix_package_format():
    """_dev_version() must produce the same PEP 440 local-version format as
    the Nix postPatch hook so that devshell and packaged invocations report
    identical strings for the same checkout.

    Expected format: <semver>+g<hash>[.dirty]
    The '.dirty' suffix matches what pip writes to METADATA after normalising
    the '-dirty' suffix that the Nix dirtyShortRev attribute appends.
    """
    version = _dev_version()
    m = _DEV_VERSION_RE.match(version)
    assert m, f"_dev_version() returned {version!r}; expected <semver>+g<hash>[.dirty]"

    expected_base = (_REPO_ROOT / "VERSION").read_text().strip()
    assert m.group("base") == expected_base, (
        f"base {m.group('base')!r} does not match VERSION file {expected_base!r}"
    )

    expected_hash = subprocess.run(
        ["git", "rev-parse", "--short", "HEAD"],
        capture_output=True,
        text=True,
        check=True,
        cwd=_REPO_ROOT,
    ).stdout.strip()
    assert m.group("hash") == expected_hash, (
        f"hash {m.group('hash')!r} does not match HEAD {expected_hash!r}"
    )

    is_dirty = bool(
        subprocess.run(
            ["git", "status", "--porcelain", "--untracked-files=no"],
            capture_output=True,
            text=True,
            check=True,
            cwd=_REPO_ROOT,
        ).stdout.strip()
    )
    assert bool(m.group("dirty")) == is_dirty, (
        f"dirty flag in {version!r} does not match working-tree state (dirty={is_dirty})"
    )


@pytest.mark.slow
def test_dev_version_parity_with_nix_package_version():
    """_dev_version() must equal the version written into the Nix-built
    package's dist-info METADATA for the same checkout.

    This exercises the full packaging pipeline — gitSuffix in
    nix/packages.nix, the postPatch VERSION rewrite, setuptools wheel build,
    and pip normalisation — and compares the result with _dev_version(), so
    any drift between the Nix packaging path and the Python fallback is caught.
    Evaluating the Nix version attribute alone is not sufficient because
    postPatch could write a different string than the attribute implies.
    """
    system = subprocess.run(
        ["nix", "eval", "--impure", "--raw", "--expr", "builtins.currentSystem"],
        capture_output=True,
        text=True,
        check=True,
        cwd=_REPO_ROOT,
    ).stdout.strip()
    out_path = subprocess.run(
        [
            "nix",
            "build",
            f".#packages.{system}.sbomnix",
            "--print-out-paths",
            "--no-link",
        ],
        capture_output=True,
        text=True,
        check=True,
        cwd=_REPO_ROOT,
    ).stdout.strip()

    metadata_files = list(
        Path(out_path).glob("lib/python*/site-packages/sbomnix-*.dist-info/METADATA")
    )
    assert metadata_files, f"no sbomnix dist-info METADATA found under {out_path}"
    version_line = next(
        line
        for line in metadata_files[0].read_text().splitlines()
        if line.startswith("Version:")
    )
    installed_version = version_line.split(":", 1)[1].strip()

    assert _dev_version() == installed_version, (
        f"devshell version {_dev_version()!r} != "
        f"installed METADATA version {installed_version!r}"
    )


def test_repology_cli_uses_uppercase_v_for_version_filter():
    args = repology_cli.getargs(
        [
            "-V",
            "^3\\.1\\.",
            "--pkg_exact",
            "openssl",
            "--repository",
            "nix_unstable",
        ]
    )

    assert args.re_version == "^3\\.1\\."


================================================
FILE: tests/test_cli_error_boundaries.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Focused tests for CLI error-to-exit-code boundaries."""

from types import SimpleNamespace

import pytest

from common.errors import SbomnixError
from nixgraph import main as nixgraph_main
from nixmeta import main as nixmeta_main
from nixupdate import nix_outdated
from provenance import main as provenance_main
from vulnxscan import osv as osv_cli
from vulnxscan import vulnxscan_cli


def test_vulnxscan_invalid_sbom_exits_nonzero(tmp_path, monkeypatch):
    invalid_sbom = tmp_path / "invalid.json"
    invalid_sbom.write_text("not json", encoding="utf-8")

    args = SimpleNamespace(
        TARGET=invalid_sbom.as_posix(),
        verbose=0,
        out="vulns.csv",
        buildtime=False,
        sbom=True,
        whitelist=None,
        triage=False,
        nixprs=False,
    )
    monkeypatch.setattr(vulnxscan_cli, "getargs", lambda: args)
    monkeypatch.setattr(vulnxscan_cli, "set_log_verbosity", lambda _verbosity: None)
    monkeypatch.setattr(
        vulnxscan_cli, "exit_unless_command_exists", lambda _command: None
    )

    with pytest.raises(SystemExit) as excinfo:
        vulnxscan_cli.main()

    assert excinfo.value.code == 1


def test_osv_invalid_sbom_exits_nonzero(tmp_path, monkeypatch):
    missing_sbom = tmp_path / "missing.json"
    args = SimpleNamespace(
        SBOM=missing_sbom,
        verbose=0,
        out="osv.csv",
        ecosystems="GIT",
    )
    monkeypatch.setattr(osv_cli, "getargs", lambda: args)
    monkeypatch.setattr(osv_cli, "set_log_verbosity", lambda _verbosity: None)

    with pytest.raises(SystemExit) as excinfo:
        osv_cli.main()

    assert excinfo.value.code == 1


@pytest.mark.parametrize(
    ("module", "args", "prep", "patched_name"),
    [
        (
            nix_outdated,
            SimpleNamespace(
                NIXREF=".#broken",
                buildtime=False,
                local=False,
                out="nix_outdated.csv",
                verbose=0,
            ),
            lambda monkeypatch: None,
            "resolve_nix_target",
        ),
        (
            nixgraph_main,
            SimpleNamespace(
                NIXREF=".#broken",
                buildtime=False,
                depth=1,
                inverse=None,
                out="graph.png",
                colorize=None,
                until=None,
                pathnames=False,
                verbose=0,
            ),
            lambda monkeypatch: None,
            "resolve_nix_target",
        ),
        (
            vulnxscan_cli,
            SimpleNamespace(
                TARGET=".#broken",
                verbose=0,
                out="vulns.csv",
                buildtime=False,
                sbom=False,
                whitelist=None,
                triage=False,
                nixprs=False,
            ),
            lambda monkeypatch: monkeypatch.setattr(
                vulnxscan_cli, "exit_unless_command_exists", lambda _command: None
            ),
            "resolve_nix_target",
        ),
        (
            nixmeta_main,
            SimpleNamespace(
                flakeref="github:NixOS/nixpkgs?ref=nixos-unstable",
                out="nixmeta.csv",
                append=False,
                verbose=0,
            ),
            lambda monkeypatch: None,
            "exit_unless_command_exists",
        ),
        (
            provenance_main,
            SimpleNamespace(
                target="/nix/store/broken.drv",
                recursive=False,
                out=None,
                verbose=0,
            ),
            lambda monkeypatch: monkeypatch.setattr(
                provenance_main,
                "get_env_metadata",
                lambda: provenance_main.BuildMeta("", "", "", "", "", "{}", "{}"),
            ),
            "provenance",
        ),
    ],
)
def test_cli_translates_sbomnix_errors_to_exit_code_1(
    monkeypatch, module, args, prep, patched_name
):
    prep(monkeypatch)
    monkeypatch.setattr(module, "getargs", lambda: args, raising=False)
    monkeypatch.setattr(module, "_getargs", lambda: args, raising=False)
    monkeypatch.setattr(module, "set_log_verbosity", lambda _verbosity: None)
    monkeypatch.setattr(
        module,
        patched_name,
        lambda *_args, **_kwargs: (_ for _ in ()).throw(
            SbomnixError("expected failure")
        ),
    )

    with pytest.raises(SystemExit) as excinfo:
        module.main()

    assert excinfo.value.code == 1


================================================
FILE: tests/test_cli_smoke.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Offline smoke tests for lightweight CLI entrypoint boundaries."""

from types import SimpleNamespace

import pandas as pd

from common.df import df_from_csv_file
from repology import repology_cve
from vulnxscan import osv as osv_cli


def test_repology_cve_main_writes_output_csv(tmp_path, monkeypatch):
    out_path = tmp_path / "repology_cves.csv"
    reported = []

    monkeypatch.setattr(
        repology_cve,
        "getargs",
        lambda: SimpleNamespace(
            PKG_NAME="openssl",
            PKG_VERSION="3.1.0",
            out=out_path.as_posix(),
            verbose=0,
        ),
    )
    monkeypatch.setattr(repology_cve, "set_log_verbosity", lambda _verbosity: None)
    monkeypatch.setattr(
        repology_cve,
        "query_cve",
        lambda pkg_name, pkg_version: pd.DataFrame(
            [
                {
                    "package": pkg_name,
                    "version": pkg_version,
                    "cve": "CVE-2024-1111",
                }
            ]
        ),
    )
    monkeypatch.setattr(
        repology_cve,
        "report_cves",
        lambda df: reported.append(df.copy(deep=True)) or True,
    )

    repology_cve.main()

    assert len(reported) == 1
    assert df_from_csv_file(out_path).to_dict(orient="records") == [
        {
            "package": "openssl",
            "version": "3.1.0",
            "cve": "CVE-2024-1111",
        }
    ]


def test_osv_main_writes_output_csv_with_requested_ecosystems(tmp_path, monkeypatch):
    sbom_path = tmp_path / "sbom.cdx.json"
    out_path = tmp_path / "osv.csv"
    sbom_path.write_text(
        '{"metadata":{"component":{"name":"hello","version":"1.0"}},"components":[]}',
        encoding="utf-8",
    )

    class FakeOSV:
        def __init__(self):
            self.calls = []

        def query_vulns(self, sbom, ecosystems):
            self.calls.append((sbom, ecosystems))

        def to_dataframe(self):
            return pd.DataFrame(
                [
                    {
                        "vuln_id": "OSV-1",
                        "modified": "2024-01-01",
                        "package": "hello",
                        "version": "1.0",
                    }
                ]
            )

    fake_osv = FakeOSV()
    monkeypatch.setattr(
        osv_cli,
        "getargs",
        lambda: SimpleNamespace(
            SBOM=sbom_path,
            ecosystems="GIT, OSS-Fuzz",
            out=out_path.as_posix(),
            verbose=0,
        ),
    )
    monkeypatch.setattr(osv_cli, "set_log_verbosity", lambda _verbosity: None)
    monkeypatch.setattr(osv_cli, "OSV", lambda: fake_osv)

    osv_cli.main()

    assert fake_osv.calls == [(sbom_path.as_posix(), ["GIT", "OSS-Fuzz"])]
    assert df_from_csv_file(out_path).to_dict(orient="records") == [
        {
            "vuln_id": "OSV-1",
            "modified": "2024-01-01",
            "package": "hello",
            "version": "1.0",
        }
    ]


================================================
FILE: tests/test_common_log.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Focused tests for shared logging levels."""

import logging

import pytest

from common.log import LOG, LOG_SPAM, LOG_TRACE, LOG_VERBOSE, set_log_verbosity


@pytest.mark.parametrize(
    ("verbosity", "level"),
    [
        (0, logging.INFO),
        (1, LOG_VERBOSE),
        (2, logging.DEBUG),
        (3, LOG_SPAM),
        (99, LOG_SPAM),
        (-1, logging.INFO),
    ],
)
def test_set_log_verbosity_maps_cli_levels_to_logging_levels(verbosity, level):
    try:
        set_log_verbosity(verbosity)
        assert LOG.level == level
    finally:
        set_log_verbosity(0)


def test_custom_log_level_names_are_registered():
    assert logging.getLevelName(LOG_VERBOSE) == "VERBOSE"
    assert logging.getLevelName(LOG_SPAM) == "SPAM"
    assert LOG_TRACE == LOG_SPAM


def test_verbose_level_is_between_info_and_debug():
    try:
        set_log_verbosity(1)
        assert LOG.isEnabledFor(logging.INFO)
        assert LOG.isEnabledFor(LOG_VERBOSE)
        assert not LOG.isEnabledFor(logging.DEBUG)
    finally:
        set_log_verbosity(0)


================================================
FILE: tests/test_common_versioning.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Unit tests for shared version and package-name helpers."""

import pytest
from hypothesis import example, given
from hypothesis import strategies as st
from packaging.version import Version

from common.package_names import nix_to_repology_pkg_name
from common.versioning import number_distance, parse_version, version_distance

NON_NEGATIVE_NUMBERS = st.one_of(
    st.integers(min_value=0, max_value=10**18),
    st.floats(min_value=0.0, allow_nan=False, allow_infinity=False),
)
NEGATIVE_NUMBERS = st.one_of(
    st.integers(max_value=-1),
    st.floats(max_value=-0.001, allow_nan=False, allow_infinity=False),
)

VERSION_TEXT = st.text(max_size=120)


@pytest.mark.parametrize(
    ("left", "right", "expected"),
    [
        (0, 0, 1.0),
        (0, 1, 0.5),
        (-1, 1, 0.0),
    ],
)
def test_number_distance_documents_edge_cases(left, right, expected):
    assert number_distance(left, right) == expected


@given(NON_NEGATIVE_NUMBERS, NON_NEGATIVE_NUMBERS)
@example(0, 0)
@example(0, 1)
def test_number_distance_is_symmetric_for_non_negative_numbers(left, right):
    assert number_distance(left, right) == number_distance(right, left)


@given(NON_NEGATIVE_NUMBERS, NON_NEGATIVE_NUMBERS)
@example(0, 0)
@example(0, 1)
def test_number_distance_is_bounded_for_non_negative_numbers(left, right):
    result = number_distance(left, right)

    assert 0.0 <= result <= 1.0


@given(NON_NEGATIVE_NUMBERS)
@example(0)
def test_number_distance_identity_for_non_negative_numbers(value):
    assert number_distance(value, value) == 1.0


@given(NEGATIVE_NUMBERS, NON_NEGATIVE_NUMBERS)
def test_number_distance_returns_zero_for_negative_arguments(negative, value):
    assert number_distance(negative, value) == 0.0
    assert number_distance(value, negative) == 0.0


def test_parse_version_normalizes_suffixes():
    parsed = parse_version("openssl-3.0p1")

    assert parsed == Version("3.0+1")


@given(VERSION_TEXT)
def test_parse_version_never_raises_for_text(value):
    parse_version(value)


@given(VERSION_TEXT)
def test_parse_version_is_idempotent_after_string_roundtrip(value):
    parsed = parse_version(value)

    if parsed is not None:
        assert parse_version(str(parsed)) == parsed


def test_version_distance_handles_identical_and_invalid_versions():
    assert version_distance("1.2.3", "1.2.3") == 1.0
    assert version_distance("release", "1.2.3") == 0.0


@given(VERSION_TEXT, VERSION_TEXT)
def test_version_distance_is_bounded_for_text(left, right):
    result = version_distance(left, right)

    assert 0.0 <= result <= 1.0


def test_nix_to_repology_pkg_name_handles_prefixes_and_special_cases():
    assert nix_to_repology_pkg_name("python311-requests") == "python:requests"
    assert nix_to_repology_pkg_name("ruby-rake") == "ruby:rake"
    assert nix_to_repology_pkg_name("python3") == "python"
    assert nix_to_repology_pkg_name("libtiff") == "tiff"


================================================
FILE: tests/test_compare_deps.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Tests for the dependency comparison test harness."""

import pandas as pd

from tests.compare_deps import compare_dependencies


def test_compare_dependencies_filters_darwin_buildtime_source_paths():
    """Darwin nixgraph output may include graph-only source paths without derivers."""
    target_drv = "/nix/store/hash-hello-2.12.3.drv"
    dependency_drv = "/nix/store/hash-dependency.drv"
    graph_only_paths = [
        "/nix/store/hash-meson.build.in",
        "/nix/store/hash-Info.plist",
        "/nix/store/hash-meson.options",
        "/nix/store/hash-meson.build",
        "/nix/store/hash-lua-setup-hook",
        "/nix/store/hash-remove-references-to",
        "/nix/store/hash-Architectures.xcspec",
        "/nix/store/hash-ToolchainInfo.plist",
        "/nix/store/hash-ProductTypes.xcspec",
        "/nix/store/hash-PackageTypes.xcspec",
    ]

    df_sbom = pd.DataFrame(
        {
            "drv_path": [target_drv, dependency_drv],
            "output_path": ["/nix/store/hash-hello", "/nix/store/hash-dependency"],
            "ref": [target_drv, dependency_drv],
            "depends_on": [dependency_drv, ""],
        }
    )
    df_graph = pd.DataFrame(
        {
            "target_path": [target_drv] * (1 + len(graph_only_paths)),
            "src_path": [dependency_drv, *graph_only_paths],
        }
    )

    assert compare_dependencies(
        df_sbom,
        df_graph,
        sbom_type="runtime_and_buildtime",
        graph_type="buildtime",
    )


================================================
FILE: tests/test_components.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Focused tests for SBOM component dataframe helpers."""

from sbomnix import components as sbomnix_components


class FakeDrv:
    """Minimal derivation double for component dataframe tests."""

    def __init__(self, store_path, name):
        self.store_path = store_path
        self.name = name
        self.outputs = []
        self.cpe_set = False

    def set_cpe(self, _generator):
        self.cpe_set = True

    def to_dict(self):
        return {
            "store_path": self.store_path,
            "name": self.name,
            "outputs": self.outputs,
            "cpe_set": self.cpe_set,
        }


def test_recursive_derivations_to_dataframe_skips_missing_paths():
    derivations = {
        "/nix/store/first.drv": FakeDrv("/nix/store/first.drv", "first"),
        "/nix/store/second.drv": FakeDrv("/nix/store/second.drv", "second"),
    }

    # Keep the test focused on component assembly without loading CPE data.
    df_components = sbomnix_components.recursive_derivations_to_dataframe(
        [
            "/nix/store/missing.drv",
            "/nix/store/second.drv",
            "/nix/store/first.drv",
        ],
        derivations,
        include_cpe=False,
    )

    assert df_components.to_dict("records") == [
        {
            "store_path": "/nix/store/first.drv",
            "name": "first",
            "outputs": [],
            "cpe_set": True,
        },
        {
            "store_path": "/nix/store/second.drv",
            "name": "second",
            "outputs": [],
            "cpe_set": True,
        },
    ]


def test_runtime_derivations_to_dataframe_filters_outputs_before_loading(monkeypatch):
    load_calls = []

    def fake_load_many(paths, output_paths_by_drv=None, ignore_missing=False):
        load_calls.append((paths, output_paths_by_drv, ignore_missing))
        return {
            "/nix/store/first.drv": FakeDrv("/nix/store/first.drv", "first"),
            "/nix/store/second.drv": FakeDrv("/nix/store/second.drv", "second"),
        }

    monkeypatch.setattr(sbomnix_components, "load_many", fake_load_many)

    df_components = sbomnix_components.runtime_derivations_to_dataframe(
        {
            "/nix/store/first-out",
            "/nix/store/second-out",
        },
        {
            "/nix/store/first.drv": {
                "/nix/store/first-out",
                "/nix/store/ignored-first-out",
            },
            "/nix/store/second.drv": {
                "/nix/store/second-out",
            },
            "/nix/store/ignored.drv": {
                "/nix/store/ignored-out",
            },
        },
        include_cpe=False,
    )

    assert load_calls == [
        (
            ["/nix/store/first.drv", "/nix/store/second.drv"],
            {
                "/nix/store/first.drv": {"/nix/store/first-out"},
                "/nix/store/second.drv": {"/nix/store/second-out"},
            },
            True,
        )
    ]
    assert df_components["store_path"].to_list() == [
        "/nix/store/first.drv",
        "/nix/store/second.drv",
    ]


================================================
FILE: tests/test_cpe.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Focused tests for CPE generation."""

import pandas as pd

from sbomnix import cpe


class FakeCache:
    def __init__(self, df):
        self.df = df

    def get(self, _url):
        return self.df

    def set(self, *_args, **_kwargs):
        raise AssertionError("cache set should not be called for populated data")


def test_cpe_uses_indexed_unique_product_vendor(monkeypatch):
    monkeypatch.setattr(
        cpe,
        "LockedDfCache",
        lambda: FakeCache(
            pd.DataFrame(
                {
                    "product": ["openssl", "curl"],
                    "vendor": ["openssl_project", "curl_project"],
                }
            )
        ),
    )

    generated = cpe.CPE().generate("openssl", "3.0.0")

    assert generated == "cpe:2.3:a:openssl_project:openssl:3.0.0:*:*:*:*:*:*:*"


def test_cpe_ambiguous_product_falls_back_to_product_name(monkeypatch):
    monkeypatch.setattr(
        cpe,
        "LockedDfCache",
        lambda: FakeCache(
            pd.DataFrame(
                {
                    "product": ["openssl", "openssl"],
                    "vendor": ["first_vendor", "second_vendor"],
                }
            )
        ),
    )

    generated = cpe.CPE().generate("openssl", "3.0.0")

    assert generated == "cpe:2.3:a:openssl:openssl:3.0.0:*:*:*:*:*:*:*"


================================================
FILE: tests/test_dependency_index.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Focused tests for indexed SBOM dependency lookups."""

import pandas as pd

from sbomnix.dependency_index import build_dependency_index


def test_build_dependency_index_combines_runtime_and_buildtime_edges():
    """Index both output-path and derivation-path dependencies for one component."""
    df_sbomdb = pd.DataFrame(
        [
            {
                "store_path": "/nix/store/target.drv",
                "purl": "pkg:nix/target@1.0",
                "outputs": ["/nix/store/target-out"],
            },
            {
                "store_path": "/nix/store/runtime-dep.drv",
                "purl": "pkg:nix/runtime-dep@1.0",
                "outputs": ["/nix/store/runtime-dep-out"],
            },
            {
                "store_path": "/nix/store/build-dep.drv",
                "purl": "pkg:nix/build-dep@1.0",
                "outputs": ["/nix/store/build-dep-out"],
            },
        ]
    )
    df_outputs = df_sbomdb.explode("outputs")
    df_deps = pd.DataFrame(
        [
            {
                "src_path": "/nix/store/runtime-dep-out",
                "target_path": "/nix/store/target-out",
            },
            {
                "src_path": "/nix/store/build-dep.drv",
                "target_path": "/nix/store/target.drv",
            },
        ]
    )

    index = build_dependency_index(df_deps, df_sbomdb, df_outputs, uid="store_path")
    target_drv = next(df_sbomdb.itertuples())

    assert index.lookup(target_drv) == [
        "/nix/store/build-dep.drv",
        "/nix/store/runtime-dep.drv",
    ]
    assert index.lookup(target_drv, uid="purl") == [
        "pkg:nix/build-dep@1.0",
        "pkg:nix/runtime-dep@1.0",
    ]


def test_build_dependency_index_returns_none_without_dependencies():
    """Return no lookup entries when the component has no indexed dependencies."""
    df_sbomdb = pd.DataFrame(
        [
            {
                "store_path": "/nix/store/target.drv",
                "purl": "pkg:nix/target@1.0",
                "outputs": ["/nix/store/target-out"],
            }
        ]
    )
    index = build_dependency_index(
        pd.DataFrame(),
        df_sbomdb,
        df_sbomdb.explode("outputs"),
        uid="store_path",
    )

    assert index.lookup(next(df_sbomdb.itertuples())) is None


================================================
FILE: tests/test_derivation_hardening.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Unit tests for derivation loading and SPDX hardening."""

import json
from collections import namedtuple
from types import SimpleNamespace

from common import spdx as common_spdx
from sbomnix import cdx as sbomnix_cdx
from sbomnix import derivation as sbomnix_derivation
from sbomnix import exporters as sbomnix_exporters


def test_load_derivation_uses_nix_derivation_show(monkeypatch):
    drv_path = "/nix/store/0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-hello-2.12.3.drv"
    out_path = "/nix/store/1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-hello-2.12.3"
    doc_path = "/nix/store/2ccccccccccccccccccccccccccccccc-hello-2.12.3-doc"
    calls = []

    def fake_nix_cmd(*args):
        return ["nix", *args]

    def fake_exec_cmd(cmd, **_kwargs):
        calls.append(cmd)
        return SimpleNamespace(
            stdout=json.dumps(
                {
                    "version": 4,
                    "derivations": {
                        drv_path: {
                            "name": "hello-2.12.3",
                            "version": 4,
                            "system": "x86_64-linux",
                            "outputs": {
                                "doc": {"path": doc_path},
                                "out": {"path": out_path},
                            },
                            "env": {
                                "name": "hello-2.12.3",
                                "pname": "hello",
                                "out": out_path,
                                "outputs": "out doc",
                                "version": "2.12.3",
                                "urls": "https://example.test/hello.tar.gz",
                            },
                        }
                    },
                }
            )
        )

    monkeypatch.setattr(sbomnix_derivation, "nix_cmd", fake_nix_cmd)
    monkeypatch.setattr(sbomnix_derivation, "exec_cmd", fake_exec_cmd)

    drv = sbomnix_derivation.load(drv_path, None)

    assert calls == [["nix", "derivation", "show", drv_path]]
    assert drv.store_path == drv_path
    assert drv.name == "hello-2.12.3"
    assert drv.pname == "hello"
    assert drv.version == "2.12.3"
    assert drv.system == "x86_64-linux"
    assert drv.out == out_path
    assert drv.outputs == [out_path, doc_path]
    assert drv.urls == "https://example.test/hello.tar.gz"
    assert drv.purl == "pkg:nix/hello@2.12.3"


def test_canonicalize_spdx_license_id_canonicalizes_aliases():
    expected_canonical_ids = {
        "GPL-2.0+": "GPL-2.0-or-later",
        "GPL-3.0": "GPL-3.0-only",
        "GPL-3.0+": "GPL-3.0-or-later",
        "LGPL-2.1": "LGPL-2.1-only",
        "LGPL-2.1+": "LGPL-2.1-or-later",
    }
    for license_id, canonical_id in expected_canonical_ids.items():
        assert common_spdx.canonicalize_spdx_license_id(license_id) == canonical_id
    assert (
        common_spdx.canonicalize_spdx_license_id("LicenseRef-scancode-free-unknown")
        == "LicenseRef-scancode-free-unknown"
    )
    assert common_spdx.canonicalize_spdx_license_id("MIT AND Apache-2.0") is None
    assert common_spdx.canonicalize_spdx_license_id("not-a-license") is None


def test_cdx_and_spdx_license_exporters_use_canonical_spdx_ids():
    drv_type = namedtuple(
        "Drv",
        [
            "name",
            "pname",
            "version",
            "purl",
            "cpe",
            "meta_description",
            "meta_license_spdxid",
            "meta_license_short",
            "patches",
            "outputs",
            "store_path",
            "out",
            "urls",
            "meta_homepage",
            "meta_position",
        ],
    )
    drv = drv_type(
        name="hello-2.12.3",
        pname="hello",
        version="2.12.3",
        purl="pkg:nix/hello@2.12.3",
        cpe="",
        meta_description="Hello",
        meta_license_spdxid=(
            "GPL-3.0;GPL-3.0+;LGPL-2.1;LGPL-2.1+;LicenseRef-scancode-free-unknown"
        ),
        meta_license_short="GPL2+",
        patches="",
        outputs=["/nix/store/out"],
        store_path="/nix/store/0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-hello-2.12.3.drv",
        out="/nix/store/out",
        urls="",
        meta_homepage="",
        meta_position="",
    )

    component = sbomnix_cdx._drv_to_cdx_component(drv)
    package = sbomnix_exporters._drv_to_spdx_package(drv)

    assert component["licenses"] == [
        {"license": {"id": "GPL-3.0-only"}},
        {"license": {"id": "GPL-3.0-or-later"}},
        {"license": {"id": "LGPL-2.1-only"}},
        {"license": {"id": "LGPL-2.1-or-later"}},
        {"license": {"id": "LicenseRef-scancode-free-unknown"}},
    ]
    assert package["licenseInfoFromFiles"] == [
        "GPL-3.0-only",
        "GPL-3.0-or-later",
        "LGPL-2.1-only",
        "LGPL-2.1-or-later",
        "LicenseRef-scancode-free-unknown",
    ]


def test_cdx_falls_back_to_license_short_name_when_spdx_id_is_invalid():
    drv_type = namedtuple(
        "Drv",
        [
            "name",
            "pname",
            "version",
            "purl",
            "cpe",
            "meta_description",
            "meta_license_spdxid",
            "meta_license_short",
            "patches",
            "outputs",
            "store_path",
            "out",
            "urls",
            "meta_homepage",
            "meta_position",
        ],
    )
    drv = drv_type(
        name="hello-2.12.3",
        pname="hello",
        version="2.12.3",
        purl="pkg:nix/hello@2.12.3",
        cpe="",
        meta_description="Hello",
        meta_license_spdxid="not-a-license",
        meta_license_short="Custom Short Name",
        patches="",
        outputs=["/nix/store/out"],
        store_path="/nix/store/0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-hello-2.12.3.drv",
        out="/nix/store/out",
        urls="",
        meta_homepage="",
        meta_position="",
    )

    component = sbomnix_cdx._drv_to_cdx_component(drv)
    package = sbomnix_exporters._drv_to_spdx_package(drv)

    assert component["licenses"] == [{"license": {"name": "Custom Short Name"}}]
    assert "licenseInfoFromFiles" not in package
    assert package["licenseConcluded"] == "NOASSERTION"


================================================
FILE: tests/test_flakeref_resolution.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Focused tests for flakeref resolution helpers."""

import string
from types import SimpleNamespace

import pytest
from hypothesis import given
from hypothesis import strategies as st

from common.errors import FlakeRefRealisationError, FlakeRefResolutionError
from common.flakeref import (
    parse_nixos_configuration_ref,
    quote_nix_attr_segment,
    try_resolve_flakeref,
)
from common.log import LOG_VERBOSE


class CapturingLogger:
    def __init__(self):
        self.records = []

    def info(self, msg, *args):
        self.records.append(("info", msg, args))

    def log(self, level, msg, *args):
        self.records.append(("log", level, msg, args))

    def debug(self, msg, *args):
        self.records.append(("debug", msg, args))


SAFE_PATH_CHARS = string.ascii_letters + string.digits + "._-"
PATH_SEGMENTS = st.text(SAFE_PATH_CHARS, min_size=1, max_size=16).filter(
    lambda segment: segment not in {".", ".."}
)
PLAIN_MISSING_PATHS = st.lists(PATH_SEGMENTS, min_size=1, max_size=3).map(
    lambda parts: "hypothesis-missing/" + "/".join(parts)
)
FLAKE_ATTRS = st.text(SAFE_PATH_CHARS, min_size=1, max_size=24)
FLAKE_REFS = st.one_of(
    FLAKE_ATTRS.map(lambda attr: f".#{attr}"),
    FLAKE_ATTRS.map(lambda attr: f"nixpkgs?ref=nixos-unstable#{attr}"),
    st.builds(
        lambda owner, repo, attr: f"github:{owner}/{repo}#{attr}",
        PATH_SEGMENTS,
        PATH_SEGMENTS,
        FLAKE_ATTRS,
    ),
)


@pytest.mark.parametrize(
    ("name", "quoted"),
    [
        ("host${foo}.é", r'"host\${foo}.é"'),
        ('quote"slash\\tab\tnewline\n', r'"quote\"slash\\tab\tnewline\n"'),
    ],
)
def test_nixos_configuration_attr_segments_use_nix_string_escaping(name, quoted):
    assert quote_nix_attr_segment(name) == quoted
    assert parse_nixos_configuration_ref(f"/flake#nixosConfigurations.{quoted}") == (
        "/flake",
        name,
    )


def test_nixos_configuration_parser_rejects_unescaped_interpolation():
    assert parse_nixos_configuration_ref('/flake#nixosConfigurations."${foo}"') is None


def test_try_resolve_flakeref_uses_argv_lists():
    calls = []

    def fake_exec_cmd(cmd, **kwargs):
        calls.append((cmd, kwargs))
        return SimpleNamespace(stdout="/nix/store/resolved\n", stderr="", returncode=0)

    resolved = try_resolve_flakeref(
        "/tmp/my flake#pkg",
        force_realise=True,
        impure=True,
        exec_cmd_fn=fake_exec_cmd,
    )

    assert resolved == "/nix/store/resolved"
    assert calls == [
        (
            [
                "nix",
                "build",
                "--no-link",
                "--print-out-paths",
                "/tmp/my flake#pkg",
                "--extra-experimental-features",
                "flakes",
                "--extra-experimental-features",
                "nix-command",
                "--impure",
            ],
            {"raise_on_error": False, "return_error": True, "log_error": False},
        ),
    ]


def test_try_resolve_flakeref_can_return_derivation_path():
    calls = []

    def fake_exec_cmd(cmd, **kwargs):
        calls.append((cmd, kwargs))
        return SimpleNamespace(
            stdout=(
                '{"derivations": {'
                '"11111111111111111111111111111111-package-1.0.drv":'
                '{"name": "package-1.0"}}, "version": 4}'
            ),
            stderr="",
            returncode=0,
        )

    resolved = try_resolve_flakeref(
        "nixpkgs#package",
        derivation=True,
        impure=True,
        exec_cmd_fn=fake_exec_cmd,
    )

    assert resolved == "/nix/store/11111111111111111111111111111111-package-1.0.drv"
    assert calls == [
        (
            [
                "nix",
                "derivation",
                "show",
                "nixpkgs#package",
                "--extra-experimental-features",
                "flakes",
                "--extra-experimental-features",
                "nix-command",
                "--impure",
            ],
            {"raise_on_error": False, "return_error": True, "log_error": False},
        )
    ]


def test_try_resolve_flakeref_logs_flake_progress_at_info():
    logger = CapturingLogger()

    def fake_exec_cmd(_cmd, **_kwargs):
        return SimpleNamespace(
            stdout="/nix/store/resolved\n",
            stderr="",
            returncode=0,
        )

    resolved = try_resolve_flakeref(
        ".#hello",
        force_realise=True,
        exec_cmd_fn=fake_exec_cmd,
        log=logger,
    )

    assert resolved == "/nix/store/resolved"
    assert (
        "info",
        "Realising flakeref '%s'",
        (".#hello",),
    ) in logger.records


def test_try_resolve_flakeref_keeps_plain_path_probe_verbose():
    logger = CapturingLogger()

    def fake_exec_cmd(_cmd, **_kwargs):
        return SimpleNamespace(stdout="", stderr="dummy eval failure", returncode=1)

    resolved = try_resolve_flakeref(
        "/nix/store/not-a-flake-output",
        exec_cmd_fn=fake_exec_cmd,
        log=logger,
    )

    assert resolved is None
    assert (
        "log",
        LOG_VERBOSE,
        "Evaluating '%s'",
        ("/nix/store/not-a-flake-output",),
    ) in (logger.records)
    assert not [record for record in logger.records if record[0] == "info"]


def test_try_resolve_flakeref_raises_on_failed_force_realise():
    def fake_exec_cmd(_cmd, **_kwargs):
        return SimpleNamespace(stdout="", stderr="build failed", returncode=1)

    with pytest.raises(FlakeRefRealisationError, match="build failed"):
        try_resolve_flakeref(
            "/tmp/my flake#pkg",
            force_realise=True,
            exec_cmd_fn=fake_exec_cmd,
        )


def test_try_resolve_flakeref_raises_when_force_realise_prints_no_path():
    def fake_exec_cmd(_cmd, **_kwargs):
        return SimpleNamespace(stdout="\n", stderr="", returncode=0)

    with pytest.raises(FlakeRefRealisationError, match="returned no output path"):
        try_resolve_flakeref(
            "/tmp/my flake#pkg",
            force_realise=True,
            exec_cmd_fn=fake_exec_cmd,
        )


def test_try_resolve_flakeref_raises_on_failed_eval_for_flakeref():
    def fake_exec_cmd(_cmd, **_kwargs):
        return SimpleNamespace(stdout="", stderr="attribute missing", returncode=1)

    with pytest.raises(FlakeRefResolutionError, match="attribute missing"):
        try_resolve_flakeref(".#missing", exec_cmd_fn=fake_exec_cmd)


def test_try_resolve_flakeref_returns_none_for_non_flake_path():
    def fake_exec_cmd(_cmd, **_kwargs):
        return SimpleNamespace(
            stdout="",
            stderr="does not contain a 'flake.nix'",
            returncode=1,
        )

    resolved = try_resolve_flakeref(
        "/nix/store/not-a-flake-output",
        exec_cmd_fn=fake_exec_cmd,
    )

    assert resolved is None


@given(PLAIN_MISSING_PATHS)
def test_try_resolve_flakeref_returns_none_for_generated_plain_paths(path):
    def fake_exec_cmd(_cmd, **_kwargs):
        return SimpleNamespace(stdout="", stderr="dummy eval failure", returncode=1)

    resolved = try_resolve_flakeref(path, exec_cmd_fn=fake_exec_cmd)

    assert resolved is None


@pytest.mark.parametrize("path", ["missing", "./missing", "foo/bar"])
def test_try_resolve_flakeref_returns_none_for_missing_relative_paths(path):
    def fake_exec_cmd(_cmd, **_kwargs):
        return SimpleNamespace(stdout="", stderr="dummy eval failure", returncode=1)

    resolved = try_resolve_flakeref(path, exec_cmd_fn=fake_exec_cmd)

    assert resolved is None


def test_try_resolve_flakeref_returns_none_for_existing_fragment_path_when_eval_fails(
    tmp_path,
):
    existing_path = tmp_path / "contains#hash"
    existing_path.mkdir()
    calls = []

    def fake_exec_cmd(cmd, **kwargs):
        calls.append((cmd, kwargs))
        return SimpleNamespace(
            stdout="",
            stderr="does not contain a 'flake.nix'",
            returncode=1,
        )

    resolved = try_resolve_flakeref(existing_path.as_posix(), exec_cmd_fn=fake_exec_cmd)

    assert resolved is None
    assert calls


@given(FLAKE_REFS)
def test_try_resolve_flakeref_raises_for_generated_flakeref_failures(flakeref):
    calls = []

    def fake_exec_cmd(cmd, **kwargs):
        calls.append((cmd, kwargs))
        return SimpleNamespace(stdout="", stderr="attribute missing", returncode=1)

    with pytest.raises(FlakeRefResolutionError, match="attribute missing"):
        try_resolve_flakeref(flakeref, exec_cmd_fn=fake_exec_cmd)

    assert flakeref in calls[0][0]
    assert calls[0][1] == {
        "raise_on_error": False,
        "return_error": True,
        "log_error": False,
    }


@given(FLAKE_REFS)
def test_try_resolve_flakeref_strips_generated_eval_output(flakeref):
    resolved_path = "/nix/store/00000000000000000000000000000000-package"

    def fake_exec_cmd(cmd, **_kwargs):
        assert flakeref in cmd
        return SimpleNamespace(stdout=f"{resolved_path}\n", stderr="", returncode=0)

    resolved = try_resolve_flakeref(flakeref, exec_cmd_fn=fake_exec_cmd)

    assert resolved == resolved_path


def test_flakeref_realisation_error_accepts_none_stderr():
    error = FlakeRefRealisationError(".#pkg", stderr=None)

    assert error.stderr == ""
    assert str(error) == "Failed force-realising flakeref '.#pkg'"


def test_flake_ref_resolution_error_preserves_stderr_verbatim():
    error = FlakeRefResolutionError(".#pkg", stderr="stderr details\n")

    assert error.stderr == "stderr details\n"
    assert str(error) == "Failed evaluating flakeref '.#pkg': stderr details"


================================================
FILE: tests/test_library_exceptions.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Unit tests for typed library exceptions."""

import subprocess
from types import SimpleNamespace

import pandas as pd
import pytest

from common import df as common_df
from common.errors import (
    CommandNotFoundError,
    CsvLoadError,
    InvalidCpeDictionaryError,
    InvalidNixArtifactError,
    InvalidNixJsonError,
    MissingNixDeriverError,
    WhitelistApplicationError,
)
from common.proc import exit_unless_command_exists, exit_unless_nix_artifact
from repology.reporting import report_cves
from sbomnix import cpe
from sbomnix.derivers import require_deriver
from vulnxscan import whitelist


def test_df_from_csv_file_raises_csv_load_error(monkeypatch):
    def fail_read_csv(*_args, **_kwargs):
        raise pd.errors.ParserError("bad csv")

    monkeypatch.setattr(common_df.pd, "read_csv", fail_read_csv)

    with pytest.raises(CsvLoadError, match="Error reading csv file 'broken.csv'"):
        common_df.df_from_csv_file("broken.csv")


def test_df_log_ignores_none():
    common_df.df_log(None, 0)


def test_exit_unless_command_exists_raises_typed_error():
    with pytest.raises(CommandNotFoundError, match="command 'nix' is not in PATH"):
        exit_unless_command_exists("nix", which_fn=lambda _name: None)


def test_exit_unless_nix_artifact_raises_typed_error():
    def fail_exec_cmd(*_args, **_kwargs):
        raise subprocess.CalledProcessError(1, ["nix", "path-info", "missing"])

    with pytest.raises(
        InvalidNixArtifactError,
        match="Specified target is not a nix artifact: 'missing'",
    ):
        exit_unless_nix_artifact("missing", exec_cmd_fn=fail_exec_cmd)


def test_exit_unless_nix_artifact_uses_modern_nix_commands():
    calls = []

    def fake_exec_cmd(cmd, **_kwargs):
        calls.append(cmd)
        return SimpleNamespace(stdout="/nix/store/target\n")

    exit_unless_nix_artifact(
        "/nix/store/target",
        force_realise=True,
        exec_cmd_fn=fake_exec_cmd,
    )

    assert calls == [
        [
            "nix",
            "build",
            "--no-link",
            "/nix/store/target",
            "--extra-experimental-features",
            "flakes",
            "--extra-experimental-features",
            "nix-command",
        ],
        [
            "nix",
            "path-info",
            "/nix/store/target",
            "--extra-experimental-features",
            "flakes",
            "--extra-experimental-features",
            "nix-command",
        ],
    ]


def test_find_deriver_raises_typed_error():
    with pytest.raises(MissingNixDeriverError, match="No deriver found for: 'missing'"):
        require_deriver("missing", find_deriver_fn=lambda _path: None)


def test_require_deriver_wraps_lookup_runtime_errors():
    def fail_find_deriver(_path):
        raise RuntimeError("deriver metadata exists but is not loadable")

    with pytest.raises(
        MissingNixDeriverError,
        match="No deriver found for: 'missing'",
    ):
        require_deriver("missing", find_deriver_fn=fail_find_deriver)


def test_require_deriver_preserves_typed_lookup_errors():
    def fail_find_deriver(_path):
        raise InvalidNixJsonError("nix derivation show", "bad schema")

    with pytest.raises(InvalidNixJsonError, match="bad schema"):
        require_deriver("missing", find_deriver_fn=fail_find_deriver)


def test_cpe_raises_typed_error_when_required_columns_are_missing(monkeypatch):
    class FakeCache:
        def get(self, _url):
            return pd.DataFrame({"product": ["openssl"]})

        def set(self, *_args, **_kwargs):
            raise AssertionError("cache set should not be called for populated data")

    monkeypatch.setattr(cpe, "LockedDfCache", FakeCache)

    with pytest.raises(InvalidCpeDictionaryError, match="cpedict"):
        cpe.CPE()


def test_df_apply_whitelist_raises_typed_error_without_vuln_id_column():
    df_whitelist = pd.DataFrame({"vuln_id": ["CVE-.*"], "comment": ["reason"]})
    df_vulns = pd.DataFrame({"package": ["openssl"]})

    with pytest.raises(
        WhitelistApplicationError,
        match="Missing 'vuln_id' column from df_vulns",
    ):
        whitelist.df_apply_whitelist(df_whitelist, df_vulns)


def test_repology_cve_report_returns_false_on_empty_results():
    assert report_cves(None) is False
    assert report_cves(pd.DataFrame()) is False


================================================
FILE: tests/test_nix_cli_argv.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Whitespace-safe argv construction tests for nix-facing helpers."""

import json
from types import SimpleNamespace

import pytest

from common.proc import exec_cmd, nix_cmd
from nixmeta import flake_metadata
from nixupdate import nix_outdated
from sbomnix import derivers as sbomnix_derivers
from sbomnix.meta import Meta


def test_exec_cmd_rejects_string_commands():
    with pytest.raises(
        TypeError,
        match="cmd must be an argv sequence, not a string-like value",
    ):
        exec_cmd("echo hello")


def test_find_deriver_uses_argv_list(monkeypatch):
    calls = []
    drv_basename = "my target.drv"

    def fake_exec_cmd(cmd, **kwargs):
        calls.append((cmd, kwargs))
        if cmd[:3] == ["nix", "derivation", "show"]:
            return SimpleNamespace(
                stdout=json.dumps(
                    {
                        "derivations": {
                            drv_basename: {"name": "target"},
                        },
                        "version": 4,
                    }
                ),
                returncode=0,
                stderr="",
            )
        raise AssertionError(f"unexpected command: {cmd} kwargs={kwargs}")

    monkeypatch.setattr(sbomnix_derivers, "exec_cmd", fake_exec_cmd)
    monkeypatch.setattr("os.path.exists", lambda path: path.endswith(".drv"))

    drv_path = sbomnix_derivers.find_deriver("/nix/store/my target")

    assert drv_path == "my target.drv"
    assert calls == [
        (
            [
                "nix",
                "derivation",
                "show",
                "/nix/store/my target",
                "--extra-experimental-features",
                "flakes",
                "--extra-experimental-features",
                "nix-command",
            ],
            {"raise_on_error": False, "log_error": False},
        ),
    ]


def test_find_deriver_supports_nix_2_33_wrapped_json(monkeypatch):
    target_path = "/custom/store/0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-root"
    drv_basename = "1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-root.drv"

    def fake_exec_cmd(cmd, **kwargs):
        if cmd[:3] == ["nix", "derivation", "show"]:
            return SimpleNamespace(
                stdout=json.dumps(
                    {
                        "derivations": {drv_basename: {"name": "root"}},
                        "version": 4,
                    }
                ),
                returncode=0,
                stderr="",
            )
        raise AssertionError(f"unexpected command: {cmd} kwargs={kwargs}")

    monkeypatch.setattr(sbomnix_derivers, "exec_cmd", fake_exec_cmd)
    monkeypatch.setattr("os.path.exists", lambda path: path.endswith(".drv"))

    drv_path = sbomnix_derivers.find_deriver(target_path)

    assert drv_path == f"/custom/store/{drv_basename}"


def test_find_deriver_rejects_unloadable_structured_deriver(monkeypatch):
    calls = []
    target_path = "/nix/store/target"
    drv_path = "/nix/store/missing-target.drv"

    def fake_exec_cmd(cmd, **kwargs):
        calls.append((cmd, kwargs))
        if cmd[:3] == ["nix", "derivation", "show"]:
            return SimpleNamespace(
                stdout=json.dumps(
                    {
                        "derivations": {drv_path: {"name": "target"}},
                        "version": 4,
                    }
                ),
                returncode=0,
                stderr="",
            )
        raise AssertionError(f"unexpected command: {cmd} kwargs={kwargs}")

    monkeypatch.setattr(sbomnix_derivers, "exec_cmd", fake_exec_cmd)
    monkeypatch.setattr("os.path.exists", lambda _path: False)

    with pytest.raises(RuntimeError, match="missing-target.drv"):
        sbomnix_derivers.find_deriver(target_path)

    assert calls == [
        (
            [
                "nix",
                "derivation",
                "show",
                target_path,
                "--extra-experimental-features",
                "flakes",
                "--extra-experimental-features",
                "nix-command",
            ],
            {"raise_on_error": False, "log_error": False},
        )
    ]


def test_get_flake_metadata_uses_argv_list():
    calls = []

    def fake_exec_cmd(cmd, **kwargs):
        calls.append((cmd, kwargs))
        return SimpleNamespace(stdout='{"path": "/nix/store/nixpkgs"}', returncode=0)

    meta = flake_metadata.get_flake_metadata(
        "/tmp/my flake",
        exec_cmd_fn=fake_exec_cmd,
        nix_cmd_fn=nix_cmd,
    )

    assert meta == {"path": "/nix/store/nixpkgs"}
    assert calls == [
        (
            [
                "nix",
                "flake",
                "metadata",
                "/tmp/my flake",
                "--json",
                "--extra-experimental-features",
                "flakes",
                "--extra-experimental-features",
                "nix-command",
            ],
            {"raise_on_error": False, "return_error": True, "log_error": False},
        )
    ]


def test_get_flake_metadata_strips_nixpkgs_prefix_without_splitting_spaces():
    calls = []

    def fake_exec_cmd(cmd, **kwargs):
        calls.append((cmd, kwargs))
        return SimpleNamespace(stdout='{"path": "/nix/store/nixpkgs"}', returncode=0)

    flake_metadata.get_flake_metadata(
        "nixpkgs=/tmp/my flake",
        exec_cmd_fn=fake_exec_cmd,
        nix_cmd_fn=nix_cmd,
    )

    assert calls[0][0][3] == "/tmp/my flake"


def test_run_nix_visualize_uses_argv_list(tmp_path, monkeypatch):
    calls = []
    output_path = tmp_path / "graph output.csv"

    class FakeTempFile:
        """Minimal context manager compatible with NamedTemporaryFile."""

        def __init__(self, path):
            self.name = path.as_posix()

        def __enter__(self):
            return self

        def __exit__(self, exc_type, exc, traceback):
            return False

    def fake_exec_cmd(cmd, **kwargs):
        calls.append((cmd, kwargs))
        return SimpleNamespace(stdout="", returncode=0)

    monkeypatch.setattr(
        nix_outdated,
        "NamedTemporaryFile",
        lambda **_kwargs: FakeTempFile(output_path),
    )
    monkeypatch.setattr(nix_outdated, "exec_cmd", fake_exec_cmd)

    returned_path = nix_outdated._run_nix_visualize("/nix/store/my target")

    assert returned_path == output_path
    assert calls == [
        (
            [
                "nix-visualize",
                f"--output={output_path.as_posix()}",
                "/nix/store/my target",
            ],
            {},
        )
    ]


def test_meta_reads_nix_path_entry_with_spaces(monkeypatch):
    scanned = []

    monkeypatch.setenv("NIX_PATH", "foo=/tmp/other:nixpkgs=/tmp/my flake")
    monkeypatch.setattr(Meta, "_scan", lambda self, path: scanned.append(path) or path)

    resolved = Meta().get_nixpkgs_meta()

    assert resolved == "/tmp/my flake"
    assert scanned == ["/tmp/my flake"]


================================================
FILE: tests/test_nix_outdated_pipeline.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Offline tests for nix_outdated pipeline and reporting behavior."""

from contextlib import contextmanager
from types import SimpleNamespace

import pandas as pd

from common.df import df_from_csv_file
from common.log import LOG, LOG_LEVELS, set_log_verbosity
from nixupdate import pipeline
from nixupdate.report import generate_report_df, write_report


class FakeSbomArtifact:
    def __init__(self, cdx_path):
        self.cdx_path = cdx_path
        self.cleaned = False

    def cleanup(self):
        self.cleaned = True


def _repology_df():
    return pd.DataFrame(
        [
            {
                "package": "hello",
                "version": "2.11",
                "version_sbom": "2.10",
                "newest_upstream_release": "2.12",
                "status": "outdated",
                "repo_version_classify": "repo_pkg_needs_update",
                "sbom_version_classify": "sbom_pkg_needs_update",
            }
        ]
    )


@contextmanager
def _log_verbosity(verbosity):
    previous_level = LOG.level
    set_log_verbosity(verbosity)
    try:
        yield
    finally:
        if previous_level in LOG_LEVELS:
            set_log_verbosity(LOG_LEVELS.index(previous_level))
        else:
            LOG.setLevel(previous_level)


def test_collect_outdated_scan_data_runtime_uses_hooks_and_cleans_outputs(tmp_path):
    artifact = FakeSbomArtifact(tmp_path / "deps.cdx.json")
    graph_csv = tmp_path / "graph.csv"
    calls = []

    def generate_temp_sbom(target_path, buildtime, prefix, cdx_suffix):
        calls.append(("generate_temp_sbom", target_path, buildtime, prefix, cdx_suffix))
        return artifact

    def query_repology(sbom_path):
        calls.append(("query_repology", sbom_path))
        return _repology_df()

    def run_nix_visualize(target_path):
        calls.append(("run_nix_visualize", target_path))
        graph_csv.write_text("package,version,level\nhello,2.10,1\n", encoding="utf-8")
        return graph_csv

    def parse_nix_visualize(csv_path):
        calls.append(("parse_nix_visualize", csv_path, csv_path.exists()))
        return pd.DataFrame(
            [
                {
                    "package": "hello",
                    "version": "2.10",
                    "level": "1",
                }
            ]
        )

    with _log_verbosity(0):
        data = pipeline.collect_outdated_scan_data(
            "/nix/store/root",
            buildtime=False,
            hooks=pipeline.OutdatedScanHooks(
                query_repology=query_repology,
                generate_temp_sbom=generate_temp_sbom,
                run_nix_visualize=run_nix_visualize,
                parse_nix_visualize=parse_nix_visualize,
            ),
        )

    assert calls == [
        ("generate_temp_sbom", "/nix/store/root", False, "nixdeps_", ".cdx.json"),
        ("query_repology", artifact.cdx_path),
        ("run_nix_visualize", "/nix/store/root"),
        ("parse_nix_visualize", graph_csv, True),
    ]
    assert artifact.cleaned
    assert not graph_csv.exists()
    assert data.repology.to_dict(orient="records") == _repology_df().to_dict(
        orient="records"
    )
    assert data.nix_visualize is not None
    assert data.nix_visualize.to_dict(orient="records") == [
        {
            "package": "hello",
            "version": "2.10",
            "level": "1",
        }
    ]


def test_collect_outdated_scan_data_buildtime_skips_nix_visualize(tmp_path):
    artifact = FakeSbomArtifact(tmp_path / "deps.cdx.json")

    with _log_verbosity(0):
        data = pipeline.collect_outdated_scan_data(
            "/nix/store/root.drv",
            buildtime=True,
            hooks=pipeline.OutdatedScanHooks(
                query_repology=lambda _sbom_path: _repology_df(),
                generate_temp_sbom=lambda *_args, **_kwargs: artifact,
                run_nix_visualize=lambda _target_path: (_ for _ in ()).throw(
                    AssertionError("nix-visualize should not run for buildtime scans")
                ),
                parse_nix_visualize=lambda _csv_path: (_ for _ in ()).throw(
                    AssertionError("nix-visualize output should not be parsed")
                ),
            ),
        )

    assert artifact.cleaned
    assert data.nix_visualize is None
    assert data.repology.to_dict(orient="records") == _repology_df().to_dict(
        orient="records"
    )


def test_collect_outdated_scan_data_buildtime_debug_keeps_nix_visualize_optional(
    tmp_path,
):
    artifact = FakeSbomArtifact(tmp_path / "deps.cdx.json")

    with _log_verbosity(2):
        data = pipeline.collect_outdated_scan_data(
            "/nix/store/root.drv",
            buildtime=True,
            hooks=pipeline.OutdatedScanHooks(
                query_repology=lambda _sbom_path: _repology_df(),
                generate_temp_sbom=lambda *_args, **_kwargs: artifact,
                run_nix_visualize=lambda _target_path: (_ for _ in ()).throw(
                    AssertionError("nix-visualize should not run for buildtime scans")
                ),
                parse_nix_visualize=lambda _csv_path: (_ for _ in ()).throw(
                    AssertionError("nix-visualize output should not be parsed")
                ),
            ),
        )

    assert not artifact.cleaned
    assert data.nix_visualize is None
    assert data.repology.to_dict(orient="records") == _repology_df().to_dict(
        orient="records"
    )


def test_generate_report_df_buildtime_adds_default_priority_and_renames_version():
    df_report = generate_report_df(None, _repology_df())

    assert list(df_report["level"]) == ["0"]
    assert list(df_report["version_repology"]) == ["2.11"]
    assert "version" not in df_report.columns


def test_write_report_defaults_to_nixpkgs_updates_and_drops_newest_duplicates(tmp_path):
    out_path = tmp_path / "nix_outdated.csv"
    df = pd.DataFrame(
        [
            {
                "level": "1",
                "package": "openssl",
                "version": "3.0",
                "version_sbom": "3.0",
                "version_repology": "3.1",
                "newest_upstream_release": "3.2",
                "status": "outdated",
                "repo_version_classify": "repo_pkg_needs_update",
                "sbom_version_classify": "",
            },
            {
                "level": "2",
                "package": "hello",
                "version": "2.10",
                "version_sbom": "2.10",
                "version_repology": "2.11",
                "newest_upstream_release": "2.12",
                "status": "outdated",
                "repo_version_classify": "repo_pkg_needs_update",
                "sbom_version_classify": "",
            },
            {
                "level": "3",
                "package": "hello",
                "version": "2.12",
                "version_sbom": "2.12",
                "version_repology": "2.12",
                "newest_upstream_release": "2.12",
                "status": "newest",
                "repo_version_classify": "",
                "sbom_version_classify": "",
            },
            {
                "level": "4",
                "package": "local-only",
                "version": "1.0",
                "version_sbom": "1.0",
                "version_repology": "1.1",
                "newest_upstream_release": "1.1",
                "status": "outdated",
                "repo_version_classify": "",
                "sbom_version_classify": "sbom_pkg_needs_update",
            },
        ]
    )

    write_report(
        df,
        SimpleNamespace(local=False, buildtime=False, out=out_path.as_posix()),
    )

    report = df_from_csv_file(out_path)
    assert report.to_dict(orient="records") == [
        {
            "priority": "1",
            "nix_package": "openssl",
            "version_local": "3.0",
            "version_nixpkgs": "3.1",
            "version_upstream": "3.2",
        }
    ]


def test_write_report_local_buildtime_outputs_local_updates_without_priority(tmp_path):
    out_path = tmp_path / "nix_outdated_local.csv"
    df = pd.DataFrame(
        [
            {
                "level": "0",
                "package": "local-only",
                "version": "1.0",
                "version_sbom": "1.0",
                "version_repology": "1.1",
                "newest_upstream_release": "1.2",
                "status": "outdated",
                "repo_version_classify": "",
                "sbom_version_classify": "sbom_pkg_needs_update",
            },
            {
                "level": "0",
                "package": "repo-only",
                "version": "2.0",
                "version_sbom": "2.0",
                "version_repology": "2.1",
                "newest_upstream_release": "2.2",
                "status": "outdated",
                "repo_version_classify": "repo_pkg_needs_update",
                "sbom_version_classify": "",
            },
        ]
    )

    write_report(
        df,
        SimpleNamespace(local=True, buildtime=True, out=out_path.as_posix()),
    )

    report = df_from_csv_file(out_path)
    assert list(report.columns) == [
        "nix_package",
        "version_local",
        "version_nixpkgs",
        "version_upstream",
    ]
    assert report.to_dict(orient="records") == [
        {
            "nix_package": "local-only",
            "version_local": "1.0",
            "version_nixpkgs": "1.1",
            "version_upstream": "1.2",
        }
    ]


================================================
FILE: tests/test_nix_target_resolution.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Focused tests for shared nix target resolution helpers."""

import subprocess
from types import SimpleNamespace

import pytest

from common.errors import (
    FlakeRefRealisationError,
    FlakeRefResolutionError,
    InvalidNixArtifactError,
    MissingNixOutPathError,
)
from sbomnix import cli_utils as sbomnix_cli_utils


def test_resolve_nix_target_preserves_flakeref_on_success(monkeypatch):
    monkeypatch.setattr(
        sbomnix_cli_utils,
        "try_resolve_flakeref",
        lambda *_args, **_kwargs: "/nix/store/resolved",
    )

    resolved = sbomnix_cli_utils.resolve_nix_target(".#hello", buildtime=False)

    assert resolved == sbomnix_cli_utils.ResolvedNixTarget(
        path="/nix/store/resolved",
        flakeref=".#hello",
        original_ref=".#hello",
    )


def test_resolve_nix_target_requests_derivation_for_buildtime_flakeref(monkeypatch):
    calls = []

    def fake_resolve(flakeref, **kwargs):
        calls.append((flakeref, kwargs))
        return "/nix/store/resolved.drv"

    monkeypatch.setattr(
        sbomnix_cli_utils,
        "try_resolve_flakeref",
        fake_resolve,
    )

    resolved = sbomnix_cli_utils.resolve_nix_target(".#hello", buildtime=True)

    assert resolved.path == "/nix/store/resolved.drv"
    assert calls == [
        (
            ".#hello",
            {
                "force_realise": False,
                "impure": False,
                "derivation": True,
            },
        )
    ]


def test_resolve_nix_target_normalizes_plain_nixos_configuration(monkeypatch):
    calls = []

    def fake_resolve(flakeref, **_kwargs):
        calls.append(flakeref)
        return "/nix/store/resolved"

    monkeypatch.setattr(
        sbomnix_cli_utils,
        "try_resolve_flakeref",
        fake_resolve,
    )

    resolved = sbomnix_cli_utils.resolve_nix_target(
        "/flake#nixosConfigurations.host",
        buildtime=False,
    )

    assert calls == ['/flake#nixosConfigurations."host".config.system.build.toplevel']
    assert resolved == sbomnix_cli_utils.ResolvedNixTarget(
        path="/nix/store/resolved",
        flakeref='/flake#nixosConfigurations."host".config.system.build.toplevel',
        original_ref="/flake#nixosConfigurations.host",
    )


def test_resolve_nix_target_normalizes_quoted_nixos_configuration(monkeypatch):
    calls = []

    def fake_resolve(flakeref, **_kwargs):
        calls.append(flakeref)
        return "/nix/store/resolved"

    monkeypatch.setattr(
        sbomnix_cli_utils,
        "try_resolve_flakeref",
        fake_resolve,
    )

    resolved = sbomnix_cli_utils.resolve_nix_target(
        '/flake#nixosConfigurations."host.example.com"',
        buildtime=False,
    )

    assert calls == [
        '/flake#nixosConfigurations."host.example.com".config.system.build.toplevel'
    ]
    assert resolved == sbomnix_cli_utils.ResolvedNixTarget(
        path="/nix/store/resolved",
        flakeref=(
            '/flake#nixosConfigurations."host.example.com".config.system.build.toplevel'
        ),
        original_ref='/flake#nixosConfigurations."host.example.com"',
    )


@pytest.mark.parametrize(
    "nixref",
    [
        "/flake#nixosConfigurations.",
        '/flake#nixosConfigurations."unterminated',
        '/flake#nixosConfigurations."trailing\\',
    ],
)
def test_resolve_nix_target_leaves_malformed_nixos_configuration_refs(
    nixref,
    monkeypatch,
):
    calls = []

    def fake_resolve(flakeref, **_kwargs):
        calls.append(flakeref)
        return "/nix/store/resolved"

    monkeypatch.setattr(
        sbomnix_cli_utils,
        "try_resolve_flakeref",
        fake_resolve,
    )

    resolved = sbomnix_cli_utils.resolve_nix_target(nixref, buildtime=False)

    assert calls == [nixref]
    assert resolved == sbomnix_cli_utils.ResolvedNixTarget(
        path="/nix/store/resolved",
        flakeref=nixref,
        original_ref=nixref,
    )


def test_resolve_nix_target_propagates_flakeref_realisation_failure_without_path_probe(
    monkeypatch,
):
    artifact_checks = []

    def raise_realisation_error(*_args, **_kwargs):
        raise FlakeRefRealisationError(".#broken", "build failed")

    monkeypatch.setattr(
        sbomnix_cli_utils,
        "try_resolve_flakeref",
        raise_realisation_error,
    )
    monkeypatch.setattr(
        sbomnix_cli_utils,
        "exit_unless_nix_artifact",
        lambda path, force_realise=False: artifact_checks.append((path, force_realise)),
    )

    with pytest.raises(FlakeRefRealisationError) as excinfo:
        sbomnix_cli_utils.resolve_nix_target(".#broken", buildtime=False)

    assert (
        str(excinfo.value) == "Failed force-realising flakeref '.#broken': build failed"
    )
    assert not artifact_checks


def test_resolve_nix_target_propagates_flakeref_eval_failure_without_path_probe(
    monkeypatch,
):
    artifact_checks = []

    def raise_resolution_error(*_args, **_kwargs):
        raise FlakeRefResolutionError(".#broken", "attribute missing")

    monkeypatch.setattr(
        sbomnix_cli_utils,
        "try_resolve_flakeref",
        raise_resolution_error,
    )
    monkeypatch.setattr(
        sbomnix_cli_utils,
        "exit_unless_nix_artifact",
        lambda path, force_realise=False: artifact_checks.append((path, force_realise)),
    )

    with pytest.raises(FlakeRefResolutionError) as excinfo:
        sbomnix_cli_utils.resolve_nix_target(".#broken", buildtime=False)

    assert (
        str(excinfo.value) == "Failed evaluating flakeref '.#broken': attribute missing"
    )
    assert not artifact_checks


def test_resolve_nix_target_uses_plain_path_validation(monkeypatch):
    artifact_checks = []

    monkeypatch.setattr(
        sbomnix_cli_utils,
        "try_resolve_flakeref",
        lambda *_args, **_kwargs: None,
    )
    monkeypatch.setattr(
        sbomnix_cli_utils,
        "exit_unless_nix_artifact",
        lambda path, force_realise=False: artifact_checks.append((path, force_realise)),
    )

    resolved = sbomnix_cli_utils.resolve_nix_target("/nix/store/not-a-flake")

    assert resolved == sbomnix_cli_utils.ResolvedNixTarget(
        path="/nix/store/not-a-flake",
        flakeref=None,
        original_ref="/nix/store/not-a-flake",
    )
    assert artifact_checks == [("/nix/store/not-a-flake", True)]


def test_resolve_nix_target_realises_runtime_drv_target(monkeypatch):
    calls = []

    monkeypatch.setattr(
        sbomnix_cli_utils,
        "try_resolve_flakeref",
        lambda *_args, **_kwargs: None,
    )

    def fake_exec_cmd(cmd, **kwargs):
        calls.append((cmd, kwargs))
        return SimpleNamespace(stdout="/nix/store/resolved-output\n")

    monkeypatch.setattr(sbomnix_cli_utils, "exec_cmd", fake_exec_cmd)

    resolved = sbomnix_cli_utils.resolve_nix_target(
        "/nix/store/target.drv",
        buildtime=False,
    )

    assert resolved == sbomnix_cli_utils.ResolvedNixTarget(
        path="/nix/store/resolved-output",
        flakeref=None,
        original_ref="/nix/store/target.drv",
    )
    assert calls == [
        (
            [
                "nix",
                "build",
                "--no-link",
                "--print-out-paths",
                "/nix/store/target.drv^*",
                "--extra-experimental-features",
                "flakes",
                "--extra-experimental-features",
                "nix-command",
            ],
            {},
        )
    ]


def test_resolve_nix_target_uses_first_runtime_drv_output(monkeypatch):
    monkeypatch.setattr(
        sbomnix_cli_utils,
        "try_resolve_flakeref",
        lambda *_args, **_kwargs: None,
    )
    monkeypatch.setattr(
        sbomnix_cli_utils,
        "exec_cmd",
        lambda *_args, **_kwargs: SimpleNamespace(
            stdout="\n/nix/store/first-output\n/nix/store/second-output\n"
        ),
    )

    resolved = sbomnix_cli_utils.resolve_nix_target(
        "/nix/store/target.drv",
        buildtime=False,
    )

    assert resolved.path == "/nix/store/first-output"


def test_resolve_nix_target_rejects_empty_runtime_drv_output(monkeypatch):
    monkeypatch.setattr(
        sbomnix_cli_utils,
        "try_resolve_flakeref",
        lambda *_args, **_kwargs: None,
    )
    monkeypatch.setattr(
        sbomnix_cli_utils,
        "exec_cmd",
        lambda *_args, **_kwargs: SimpleNamespace(stdout="\n"),
    )

    with pytest.raises(MissingNixOutPathError):
        sbomnix_cli_utils.resolve_nix_target(
            "/nix/store/target.drv",
            buildtime=False,
        )


def test_resolve_nix_target_rejects_failed_runtime_drv_realisation(monkeypatch):
    monkeypatch.setattr(
        sbomnix_cli_utils,
        "try_resolve_flakeref",
        lambda *_args, **_kwargs: None,
    )

    def fake_exec_cmd(*_args, **_kwargs):
        raise subprocess.CalledProcessError(1, ["nix", "build"])

    monkeypatch.setattr(sbomnix_cli_utils, "exec_cmd", fake_exec_cmd)

    with pytest.raises(InvalidNixArtifactError):
        sbomnix_cli_utils.resolve_nix_target(
            "/nix/store/target.drv",
            buildtime=False,
        )


================================================
FILE: tests/test_nix_utils_parsing.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Focused tests for nix derivation JSON normalization helpers."""

import json

import pytest

from common.errors import InvalidNixJsonError
from common.nix_utils import get_nix_store_dir, parse_nix_derivation_show


def test_parse_nix_derivation_show_normalizes_nix_2_33_store_paths():
    parsed = parse_nix_derivation_show(
        json.dumps(
            {
                "version": 4,
                "derivations": {
                    "0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-root.drv": {
                        "name": "root",
                        "outputs": {
                            "out": {
                                "path": "1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-root",
                            }
                        },
                        "inputs": {
                            "drvs": {
                                "2ccccccccccccccccccccccccccccccc-dep.drv": ["out"],
                            },
                            "srcs": [
                                "3ddddddddddddddddddddddddddddddd-source",
                            ],
                        },
                    }
                },
            }
        ),
        store_path_hint="/custom/store/0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-root.drv",
    )

    assert parsed == {
        "/custom/store/0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-root.drv": {
            "name": "root",
            "outputs": {
                "out": {
                    "path": "/custom/store/1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-root",
                }
            },
            "inputs": {
                "drvs": {
                    "/custom/store/2ccccccccccccccccccccccccccccccc-dep.drv": ["out"],
                },
                "srcs": [
                    "/custom/store/3ddddddddddddddddddddddddddddddd-source",
                ],
            },
        }
    }


def test_get_nix_store_dir_ignores_colon_separated_env_paths():
    assert (
        get_nix_store_dir(
            "/custom/store/1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-bin:"
            "/custom/store/2ccccccccccccccccccccccccccccccc-sbin"
        )
        == "/custom/store"
    )


def test_parse_nix_derivation_show_infers_store_dir_from_path_like_env_values():
    drv_basename = "0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-root.drv"
    out_basename = "1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-root"

    parsed = parse_nix_derivation_show(
        json.dumps(
            {
                "version": 4,
                "derivations": {
                    drv_basename: {
                        "name": "root",
                        "outputs": {"out": {"method": "nar"}},
                        "env": {
                            "out": out_basename,
                            "PATH": (
                                "/custom/store/3ddddddddddddddddddddddddddddddd-coreutils/bin:"
                                "/custom/store/4eeeeeeeeeeeeeeeeeeeeeeeeeeeeeee-git/bin:"
                                "/custom/store/5fffffffffffffffffffffffffffffff-graphviz/bin"
                            ),
                        },
                    }
                },
            }
        )
    )

    drv_path = f"/custom/store/{drv_basename}"
    assert list(parsed) == [drv_path]
    assert parsed[drv_path]["env"]["out"] == f"/custom/store/{out_basename}"


def test_parse_nix_derivation_show_rejects_changed_wrapper_shape():
    with pytest.raises(InvalidNixJsonError, match="expected `derivations` object"):
        parse_nix_derivation_show(json.dumps({"version": 4, "derivations": []}))


def test_parse_nix_derivation_show_rejects_changed_output_shape():
    with pytest.raises(InvalidNixJsonError, match="expected `outputs`"):
        parse_nix_derivation_show(
            json.dumps(
                {
                    "version": 4,
                    "derivations": {
                        "/nix/store/0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-root.drv": {
                            "outputs": [],
                        }
                    },
                }
            )
        )


def test_parse_nix_derivation_show_rejects_invalid_json():
    with pytest.raises(InvalidNixJsonError, match="invalid JSON"):
        parse_nix_derivation_show("not-json")


================================================
FILE: tests/test_nixgraph_graph.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Focused tests for nixgraph loading and traversal."""

from types import SimpleNamespace

import pandas as pd

from nixgraph import graph as nixgraph_graph
from nixgraph import render as nixgraph_render
from nixgraph.render import NixDependencyGraph
from sbomnix.closure import dependency_rows_to_dataframe
from sbomnix.runtime import RuntimeClosure


class CapturingLogger:
    def __init__(self):
        self.records = []

    def debug(self, msg, *args):
        self.records.append(("debug", msg, args))

    def info(self, msg, *args):
        self.records.append(("info", msg, args))

    def warning(self, msg, *args):
        self.records.append(("warning", msg, args))

    def log(self, level, msg, *args):
        self.records.append(("log", level, msg, args))


def test_dependency_graph_returns_dataframe_for_csv_output():
    """Return the traversed graph rows directly when CSV mode is requested."""
    df_dependencies = pd.DataFrame.from_records(
        [
            {
                "src_path": "/nix/store/bash",
                "src_pname": "bash",
                "target_path": "/nix/store/hello",
                "target_pname": "hello",
            },
            {
                "src_path": "/nix/store/glibc",
                "src_pname": "glibc",
                "target_path": "/nix/store/bash",
                "target_pname": "bash",
            },
        ]
    )
    args = SimpleNamespace(
        out="graph.csv",
        depth=3,
        inverse=None,
        until=None,
        colorize=None,
        pathnames=False,
        return_df=True,
    )

    df_out = NixDependencyGraph(df_dependencies).draw("/nix/store/hello", args)
    df_out = df_out.sort_values(["graph_depth", "src_path"]).reset_index(drop=True)

    assert list(df_out["graph_depth"]) == [1, 2]
    assert list(df_out["target_path"]) == ["/nix/store/hello", "/nix/store/bash"]
    assert list(df_out["src_path"]) == ["/nix/store/bash", "/nix/store/glibc"]


def test_dependency_graph_inverse_returns_dataframe_for_csv_output():
    """Return inverse traversal rows through the shared dependency walker."""
    df_dependencies = pd.DataFrame.from_records(
        [
            {
                "src_path": "/nix/store/bash",
                "src_pname": "bash",
                "target_path": "/nix/store/hello",
                "target_pname": "hello",
            },
            {
                "src_path": "/nix/store/glibc",
                "src_pname": "glibc",
                "target_path": "/nix/store/bash",
                "target_pname": "bash",
            },
        ]
    )
    args = SimpleNamespace(
        out="graph.csv",
        depth=3,
        inverse="glibc",
        until=None,
        colorize=None,
        pathnames=False,
        return_df=True,
    )

    df_out = NixDependencyGraph(df_dependencies).draw("/nix/store/hello", args)
    df_out = df_out.sort_values(["graph_depth", "target_path"]).reset_index(drop=True)

    assert list(df_out["graph_depth"]) == [1, 2]
    assert list(df_out["target_path"]) == ["/nix/store/bash", "/nix/store/hello"]
    assert list(df_out["src_path"]) == ["/nix/store/glibc", "/nix/store/bash"]


def test_dependency_graph_writes_raw_dot_without_graphviz_render(tmp_path):
    class FakeDigraph:
        def __init__(self):
            self.saved = []
            self.rendered = []

        def save(self, filename):
            self.saved.append(filename)

        def render(self, **kwargs):
            self.rendered.append(kwargs)

    fake = FakeDigraph()
    graph = NixDependencyGraph(pd.DataFrame())
    graph.digraph = fake

    dot_path = tmp_path / "graph.dot"
    graph._render(dot_path.as_posix())

    assert fake.saved == [dot_path.as_posix()]
    assert fake.rendered == []


def test_dependency_graph_deduplicates_rendered_nodes():
    node_calls = []

    class FakeDigraph:
        def node(self, *args, **kwargs):
            node_calls.append((args, kwargs))

    graph = NixDependencyGraph(pd.DataFrame())
    graph.digraph = FakeDigraph()
    graph.nodes_drawn = set()

    graph._add_node("/nix/store/bash", "bash")
    graph._add_node("/nix/store/bash", "bash")

    assert len(node_calls) == 1


def test_dependency_graph_warns_before_large_graphviz_render(monkeypatch):
    logger = CapturingLogger()
    monkeypatch.setattr(nixgraph_render, "LOG", logger)
    monkeypatch.setattr(nixgraph_render, "GRAPHVIZ_RENDER_WARN_EDGES", 1)
    monkeypatch.setattr(NixDependencyGraph, "_render", lambda self, filename: None)
    df_dependencies = pd.DataFrame.from_records(
        [
            {
                "src_path": "/nix/store/bash",
                "src_pname": "bash",
                "target_path": "/nix/store/hello",
                "target_pname": "hello",
            },
        ]
    )
    args = SimpleNamespace(
        out="graph.png",
        depth=1,
        inverse=None,
        until=None,
        colorize=None,
        pathnames=False,
    )

    NixDependencyGraph(df_dependencies).draw("/nix/store/hello", args)

    assert (
        "warning",
        "Rendering %s dependency edges with Graphviz may be slow; "
        "use --out graph.csv or --out graph.dot for faster output.",
        (1,),
    ) in logger.records


def test_load_dependencies_logs_dependency_loading_at_info(monkeypatch):
    logger = CapturingLogger()
    monkeypatch.setattr(nixgraph_graph, "LOG", logger)
    monkeypatch.setattr(
        nixgraph_graph,
        "load_runtime_closure",
        lambda *_args, **_kwargs: RuntimeClosure(
            df_deps=dependency_rows_to_dataframe([]),
            output_paths_by_drv={},
        ),
    )

    nixgraph_graph.load_dependencies("/nix/store/target")

    assert (
        "info",
        "Loading %s dependencies referenced by '%s'",
        ("runtime", "/nix/store/target"),
    ) in logger.records


def test_load_dependencies_buildtime_uses_derivation_json(monkeypatch):
    drv_infos = {
        "/nix/store/11111111111111111111111111111111-target.drv": {
            "inputs": {
                "drvs": {
                    "/nix/store/22222222222222222222222222222222-dep.drv": ["out"],
                },
                "srcs": [
                    "/nix/store/33333333333333333333333333333333-source",
                ],
            }
        }
    }
    monkeypatch.setattr(
        nixgraph_graph,
        "require_deriver",
        lambda path: path,
    )
    monkeypatch.setattr(
        nixgraph_graph,
        "load_recursive",
        lambda path: ({path: object()}, drv_infos),
    )

    deps = nixgraph_graph.load_dependencies(
        "/nix/store/target.drv",
        buildtime=True,
    )

    assert deps.start_path == "/nix/store/target.drv"
    assert deps.df.to_dict("records") == [
        {
            "src_path": "/nix/store/22222222222222222222222222222222-dep.drv",
            "src_pname": "dep.drv",
            "target_path": "/nix/store/11111111111111111111111111111111-target.drv",
            "target_pname": "target.drv",
        },
        {
            "src_path": "/nix/store/33333333333333333333333333333333-source",
            "src_pname": "source",
            "target_path": "/nix/store/11111111111111111111111111111111-target.drv",
            "target_pname": "target.drv",
        },
    ]


def test_load_dependencies_runtime_uses_resolved_output_path(monkeypatch):
    monkeypatch.setattr(
        nixgraph_graph,
        "load_runtime_closure",
        lambda *_args, **_kwargs: RuntimeClosure(
            df_deps=dependency_rows_to_dataframe(
                [
                    {
                        "src_path": "/nix/store/dep",
                        "src_pname": "dep",
                        "target_path": "/nix/store/target",
                        "target_pname": "target",
                    }
                ]
            ),
            output_paths_by_drv={},
        ),
    )

    deps = nixgraph_graph.load_dependencies("/nix/store/target")

    assert deps.start_path == "/nix/store/target"
    assert deps.df.to_dict("records") == [
        {
            "src_path": "/nix/store/dep",
            "src_pname": "dep",
            "target_path": "/nix/store/target",
            "target_pname": "target",
        }
    ]


def test_nixgraph_no_longer_exposes_removed_graph_helpers():
    assert not hasattr(nixgraph_graph, "NixDependencies")
    assert not hasattr(nixgraph_graph, "parse_nix_query_out")
    assert not hasattr(nixgraph_graph, "runtime_query_output")
    assert not hasattr(nixgraph_graph, "buildtime_query_output")
    assert not hasattr(nixgraph_graph, "find_output_path")


================================================
FILE: tests/test_nixmeta_parsing.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Focused tests for nixmeta parsing helpers."""

import json

from nixmeta import metadata_json


def test_parse_json_metadata_flattens_nested_fields(tmp_path):
    json_path = tmp_path / "meta.json"
    json_path.write_text(
        json.dumps(
            {
                "hello": {
                    "name": "hello-2.12.1",
                    "pname": "hello",
                    "version": "2.12.1",
                    "meta": {
                        "homepage": ["https://example.invalid/hello"],
                        "unfree": False,
                        "description": "GNU hello",
                        "position": "pkgs/tools/misc/hello/default.nix:1",
                        "license": [
                            {"shortName": "GPLv3+", "spdxId": "GPL-3.0-or-later"}
                        ],
                        "maintainers": [
                            {"email": "maintainer@example.invalid"},
                        ],
                    },
                }
            }
        ),
        encoding="utf-8",
    )

    df = metadata_json.parse_json_metadata(json_path)

    assert df.to_dict(orient="records") == [
        {
            "name": "hello-2.12.1",
            "pname": "hello",
            "version": "2.12.1",
            "meta_homepage": "https://example.invalid/hello",
            "meta_unfree": "False",
            "meta_description": "GNU hello",
            "meta_position": "pkgs/tools/misc/hello/default.nix:1",
            "meta_license_short": "GPLv3+",
            "meta_license_spdxid": "GPL-3.0-or-later",
            "meta_maintainers_email": "maintainer@example.invalid",
        }
    ]


================================================
FILE: tests/test_nixmeta_progress.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Focused tests for nixmeta progress logging."""

import json
import subprocess
from types import SimpleNamespace

from nixmeta import flake_metadata
from nixmeta import main as nixmeta_main
from nixmeta import scanner as nixmeta_scanner


class CapturingLogger:
    def __init__(self):
        self.records = []

    def debug(self, msg, *args):
        self.records.append(("debug", msg, args))

    def info(self, msg, *args):
        self.records.append(("info", msg, args))

    def warning(self, msg, *args):
        self.records.append(("warning", msg, args))

    def fatal(self, msg, *args):
        self.records.append(("fatal", msg, args))

    def log(self, level, msg, *args):
        self.records.append(("log", level, msg, args))


def test_nixmeta_main_logs_scan_start(monkeypatch):
    args = SimpleNamespace(
        flakeref="github:NixOS/nixpkgs?ref=nixos-unstable",
        out="nixmeta.csv",
        append=False,
    )
    logger = CapturingLogger()
    events = []

    class FakeScanner:
        def scan(self, flakeref):
            events.append(("scan", flakeref))

        def to_csv(self, out, append):
            events.append(("to_csv", out, append))

    monkeypatch.setattr(nixmeta_main, "LOG", logger)
    monkeypatch.setattr(
        nixmeta_main,
        "exit_unless_command_exists",
        lambda command: events.append(("command", command)),
    )
    monkeypatch.setattr(nixmeta_main, "NixMetaScanner", FakeScanner)

    nixmeta_main._run(args)

    assert (
        "info",
        "Scanning nixpkgs metadata for '%s'",
        ("github:NixOS/nixpkgs?ref=nixos-unstable",),
    ) in logger.records
    assert events == [
        ("command", "nix"),
        ("command", "nix-env"),
        ("scan", "github:NixOS/nixpkgs?ref=nixos-unstable"),
        ("to_csv", "nixmeta.csv", False),
    ]


def test_get_flake_metadata_logs_metadata_read():
    logger = CapturingLogger()

    def fake_exec_cmd(_cmd, **_kwargs):
        return SimpleNamespace(stdout='{"path": "/nix/store/nixpkgs"}', returncode=0)

    meta = flake_metadata.get_flake_metadata(
        "nixpkgs=/tmp/my flake",
        exec_cmd_fn=fake_exec_cmd,
        log=logger,
    )

    assert meta == {"path": "/nix/store/nixpkgs"}
    assert (
        "info",
        "Reading flake metadata for '%s'",
        ("/tmp/my flake",),
    ) in logger.records


def test_get_nixpkgs_flakeref_uses_root_nixpkgs_input_with_renamed_node():
    meta_json = {
        "locks": {
            "root": "root",
            "nodes": {
                "root": {"inputs": {"nixpkgs": "nixpkgs_3"}},
                "nixpkgs": {
                    "locked": {
                        "type": "github",
                        "owner": "NixOS",
                        "repo": "nixpkgs",
                        "rev": "wrong",
                    }
                },
                "nixpkgs_3": {
                    "locked": {
                        "type": "github",
                        "owner": "NixOS",
                        "repo": "nixpkgs",
                        "rev": "right",
                    }
                },
            },
        }
    }

    assert (
        flake_metadata.get_nixpkgs_flakeref(meta_json)
        == "github:NixOS/nixpkgs?rev=right"
    )


def test_nixmeta_scanner_logs_nix_env_progress(tmp_path, monkeypatch):
    nixpkgs_path = tmp_path / "nixpkgs"
    nixpkgs_path.mkdir()
    logger = CapturingLogger()
    commands = []

    def fake_run_nix_env_metadata(_cmd, stdout):
        commands.append(_cmd)
        stdout.write(
            json.dumps(
                {
                    "hello": {
                        "name": "hello-2.12.1",
                        "pname": "hello",
                        "version": "2.12.1",
                        "meta": {
                            "homepage": "https://example.invalid/hello",
                            "unfree": False,
                            "description": "GNU hello",
                            "position": "pkgs/tools/misc/hello/default.nix:1",
                            "license": {
                                "shortName": "GPLv3+",
                                "spdxId": "GPL-3.0-or-later",
                            },
                            "maintainers": {
                                "email": "maintainer@example.invalid",
                            },
                        },
                    }
                }
            ).encode("utf-8")
        )
        stdout.flush()

    monkeypatch.setattr(nixmeta_scanner, "LOG", logger)
    monkeypatch.setattr(
        nixmeta_scanner,
        "nixref_to_nixpkgs_path",
        lambda *_args, **_kwargs: nixpkgs_path,
    )
    monkeypatch.setattr(
        nixmeta_scanner,
        "_run_nix_env_metadata",
        fake_run_nix_env_metadata,
    )

    scanner = nixmeta_scanner.NixMetaScanner()
    scanner.scan("github:NixOS/nixpkgs?ref=nixos-unstable")

    assert (
        "info",
        "Reading nixpkgs metadata from '%s'",
        (nixpkgs_path.as_posix(),),
    ) in logger.records
    assert ("info", "Parsing nixpkgs metadata", ()) in logger.records
    assert commands
    assert scanner.to_df() is not None


def test_run_nix_env_metadata_captures_successful_stderr(monkeypatch, tmp_path):
    calls = []
    logger = CapturingLogger()

    def fake_run(cmd, **kwargs):
        calls.append((cmd, kwargs))
        return SimpleNamespace(stderr="warning: noisy eval\n")

    monkeypatch.setattr(nixmeta_scanner.subprocess, "run", fake_run)
    monkeypatch.setattr(nixmeta_scanner, "LOG", logger)
    out_path = tmp_path / "meta.json"

    with out_path.open("w", encoding="utf-8") as out:
        nixmeta_scanner._run_nix_env_metadata(["nix-env"], stdout=out)

    assert calls
    assert calls[0][1]["stderr"] is subprocess.PIPE
    assert calls[0][1]["stdout"].name == out_path.as_posix()
    assert (
        "debug",
        "nix-env metadata stderr:\n%s",
        ("warning: noisy eval",),
    ) in logger.records


def test_nixmeta_scanner_tolerates_empty_metadata_json(tmp_path, monkeypatch):
    nixpkgs_path = tmp_path / "nixpkgs"
    nixpkgs_path.mkdir()

    def fake_run_nix_env_metadata(_cmd, stdout):
        stdout.write(b"{}")
        stdout.flush()

    monkeypatch.setattr(
        nixmeta_scanner,
        "_run_nix_env_metadata",
        fake_run_nix_env_metadata,
    )

    scanner = nixmeta_scanner.NixMetaScanner()
    scanner.scan_path(nixpkgs_path)

    assert scanner.to_df() is not None
    assert scanner.to_df().empty


def test_nixmeta_expression_scan_enables_flakes(monkeypatch):
    commands = []

    def fake_run_nix_env_metadata(cmd, stdout):
        commands.append(cmd)
        stdout.write(b"{}")
        stdout.flush()

    monkeypatch.setattr(
        nixmeta_scanner,
        "_run_nix_env_metadata",
        fake_run_nix_env_metadata,
    )

    scanner = nixmeta_scanner.NixMetaScanner()
    scanner.scan_expression('builtins.getFlake "github:NixOS/nixpkgs"')

    assert commands
    assert [
        "--option",
        "experimental-features",
        "nix-command flakes",
    ] in [commands[0][idx : idx + 3] for idx in range(len(commands[0]) - 2)]


def test_nixmeta_expression_scan_honors_impure(monkeypatch):
    commands = []

    def fake_run_nix_env_metadata(cmd, stdout):
        commands.append(cmd)
        stdout.write(b"{}")
        stdout.flush()

    monkeypatch.setattr(
        nixmeta_scanner,
        "_run_nix_env_metadata",
        fake_run_nix_env_metadata,
    )

    scanner = nixmeta_scanner.NixMetaScanner()
    scanner.scan_expression(
        'builtins.getFlake "path:/tmp/local-flake"',
        impure=True,
    )

    assert commands
    assert "--impure" in commands[0]


================================================
FILE: tests/test_nixmeta_source.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Focused tests for nixpkgs metadata source selection."""

import json
import pathlib
from types import SimpleNamespace

import pytest

from common.errors import SbomnixError
from sbomnix import meta as sbomnix_meta
from sbomnix import meta_source as sbomnix_meta_source


def test_classify_meta_nixpkgs_reserved_modes_before_explicit_source():
    assert (
        sbomnix_meta.classify_meta_nixpkgs(sbomnix_meta.META_NIXPKGS_NIX_PATH)
        == sbomnix_meta.META_NIXPKGS_NIX_PATH
    )
    assert sbomnix_meta.classify_meta_nixpkgs("/nix/store/source") == "explicit"


def test_get_nixpkgs_meta_with_source_records_flakeref_lock(monkeypatch, tmp_path):
    nixpkgs_path = tmp_path / "nixpkgs"
    (nixpkgs_path / "lib").mkdir(parents=True)
    (nixpkgs_path / "lib" / ".version").write_text("25.11\n", encoding="utf-8")
    scanned = []

    monkeypatch.setattr(
        sbomnix_meta_source,
        "nixref_to_nixpkgs_path",
        lambda _nixref: nixpkgs_path,
    )
    monkeypatch.setattr(
        sbomnix_meta.Meta,
        "_scan",
        lambda self, path: scanned.append(path) or "df",
    )

    df_meta, source = sbomnix_meta.Meta().get_nixpkgs_meta_with_source(
        target_path="/nix/store/target",
        flakeref=".#target",
        original_ref=".#target",
    )

    assert df_meta == "df"
    assert scanned == [nixpkgs_path.as_posix()]
    assert source == sbomnix_meta.NixpkgsMetaSource(
        method="flakeref-lock",
        path=nixpkgs_path.as_posix(),
        flakeref=".#target",
        version="25.11",
    )


def test_get_nixpkgs_meta_with_source_records_opt_in_nix_path(monkeypatch):
    scanned = []

    monkeypatch.setenv("NIX_PATH", "foo=/tmp/other:nixpkgs=/tmp/my flake")
    monkeypatch.setattr(
        sbomnix_meta.Meta,
        "_scan",
        lambda self, path: scanned.append(path) or "df",
    )

    df_meta, source = sbomnix_meta.Meta().get_nixpkgs_meta_with_source(
        target_path="/nix/store/target",
        flakeref=None,
        original_ref="/nix/store/target",
        explicit_nixpkgs=sbomnix_meta.META_NIXPKGS_NIX_PATH,
    )

    assert df_meta == "df"
    assert scanned == ["/tmp/my flake"]
    assert source == sbomnix_meta.NixpkgsMetaSource(
        method="nix-path",
        path="/tmp/my flake",
        message="NIX_PATH metadata source may not match the target",
    )


def test_explicit_nix_path_source_requires_nixpkgs_entry(monkeypatch):
    def fail_if_scanned(self, path):
        raise AssertionError(f"nix-path scan should not run: {path}")

    monkeypatch.setenv("NIX_PATH", "foo=/tmp/other")
    monkeypatch.setattr(sbomnix_meta.Meta, "_scan", fail_if_scanned)

    with pytest.raises(SbomnixError, match="NIX_PATH.*nixpkgs="):
        sbomnix_meta.Meta().get_nixpkgs_meta_with_source(
            target_path="/nix/store/target",
            flakeref=None,
            original_ref="/nix/store/target",
            explicit_nixpkgs=sbomnix_meta.META_NIXPKGS_NIX_PATH,
        )


def test_path_target_without_source_skips_nix_path_metadata(monkeypatch):
    def fail_if_scanned(self, path):
        raise AssertionError(f"path-target scan should be skipped: {path}")

    monkeypatch.setenv("NIX_PATH", "nixpkgs=/tmp/nixpkgs")
    monkeypatch.setattr(sbomnix_meta.Meta, "_scan", fail_if_scanned)

    df_meta, source = sbomnix_meta.Meta().get_nixpkgs_meta_with_source(
        target_path="/nix/store/resolved-target",
        flakeref=None,
        original_ref="./result",
    )

    assert df_meta is None
    assert source.method == "none"
    assert source.path is None
    assert "store-path target" in source.message
    assert "./result" not in source.message
    assert "--meta-nixpkgs" in source.message


def test_explicit_store_path_source_records_explicit_method(monkeypatch, tmp_path):
    nixpkgs_path = tmp_path / "nixpkgs"
    (nixpkgs_path / "lib").mkdir(parents=True)
    (nixpkgs_path / "lib" / ".version").write_text("25.11\n", encoding="utf-8")
    scanned = []

    monkeypatch.setattr(
        sbomnix_meta.Meta,
        "_scan",
        lambda self, path: scanned.append(path) or "df",
    )
    monkeypatch.setattr(
        sbomnix_meta_source,
        "is_nix_store_path",
        lambda path: path.as_posix() == nixpkgs_path.as_posix(),
    )

    df_meta, source = sbomnix_meta.Meta().get_nixpkgs_meta_with_source(
        target_path="/nix/store/target",
        explicit_nixpkgs=nixpkgs_path.as_posix(),
    )

    assert df_meta == "df"
    assert scanned == [nixpkgs_path.as_posix()]
    assert source == sbomnix_meta.NixpkgsMetaSource(
        method="explicit",
        path=nixpkgs_path.as_posix(),
        version="25.11",
    )


def test_explicit_flakeref_source_resolves_nixpkgs_path(monkeypatch):
    nixpkgs_path = "/nix/store/abc-source"
    scanned = []

    monkeypatch.setattr(
        sbomnix_meta_source,
        "nixref_to_nixpkgs_path",
        lambda _nixref: pathlib.Path(nixpkgs_path),
    )
    monkeypatch.setattr(
        sbomnix_meta.Meta,
        "_scan",
        lambda self, path: scanned.append(path) or "df",
    )

    df_meta, source = sbomnix_meta.Meta().get_nixpkgs_meta_with_source(
        target_path="/nix/store/target",
        explicit_nixpkgs="github:NixOS/nixpkgs?rev=abc",
    )

    assert df_meta == "df"
    assert scanned == [nixpkgs_path]
    assert source == sbomnix_meta.NixpkgsMetaSource(
        method="explicit",
        path=nixpkgs_path,
        flakeref="github:NixOS/nixpkgs?rev=abc",
    )


def test_mutable_explicit_path_is_normalized_before_scanning(monkeypatch, tmp_path):
    mutable_path = tmp_path / "nixpkgs-checkout"
    mutable_path.mkdir()
    store_path = pathlib.Path("/nix/store/normalized-source")
    scanned = []

    monkeypatch.setattr(
        sbomnix_meta_source,
        "nixref_to_nixpkgs_path",
        lambda nixref: store_path if nixref == mutable_path.as_posix() else None,
    )
    monkeypatch.setattr(
        sbomnix_meta.Meta,
        "_scan",
        lambda self, path: scanned.append(path) or "df",
    )

    df_meta, source = sbomnix_meta.Meta().get_nixpkgs_meta_with_source(
        target_path="/nix/store/target",
        explicit_nixpkgs=mutable_path.as_posix(),
    )

    assert df_meta == "df"
    assert scanned == [store_path.as_posix()]
    assert source == sbomnix_meta.NixpkgsMetaSource(
        method="explicit",
        path=store_path.as_posix(),
        flakeref=mutable_path.as_posix(),
    )


def test_mutable_explicit_path_is_rejected_if_not_cache_safe(monkeypatch, tmp_path):
    mutable_path = tmp_path / "nixpkgs-checkout"
    mutable_path.mkdir()

    monkeypatch.setattr(
        sbomnix_meta_source, "nixref_to_nixpkgs_path", lambda _nixref: None
    )

    with pytest.raises(SbomnixError, match="immutable /nix/store source"):
        sbomnix_meta.Meta().get_nixpkgs_meta_with_source(
            target_path="/nix/store/target",
            explicit_nixpkgs=mutable_path.as_posix(),
        )


def test_nixos_toplevel_flakeref_prefers_configuration_pkgs_path(
    monkeypatch,
    tmp_path,
):
    nixpkgs_path = tmp_path / "nixpkgs"
    nixpkgs_path.mkdir()
    calls = []
    expressions = []
    fake_df = SimpleNamespace(empty=False)

    def fake_exec_cmd(cmd, **_kwargs):
        calls.append(cmd)
        if cmd == [
            "nix",
            "eval",
            "--raw",
            '/flake#nixosConfigurations."host".pkgs.path',
        ]:
            return SimpleNamespace(stdout=f"{nixpkgs_path}\n", returncode=0)
        raise AssertionError(f"unexpected command: {cmd}")

    monkeypatch.setattr(
        sbomnix_meta_source,
        "nix_cmd",
        lambda *args, impure=False: ["nix", *args] + (["--impure"] if impure else []),
    )
    monkeypatch.setattr(sbomnix_meta_source, "exec_cmd", fake_exec_cmd)
    monkeypatch.setattr(
        sbomnix_meta_source,
        "nixref_to_nixpkgs_path",
        lambda _nixref: (_ for _ in ()).throw(
            AssertionError("lock-node lookup should not run")
        ),
    )
    monkeypatch.setattr(
        sbomnix_meta.Meta,
        "_scan_expression",
        lambda self, expression, *, cache_key=None, impure=False: (
            expressions.append((expression, cache_key, impure)) or fake_df
        ),
    )

    df_meta, source = sbomnix_meta.Meta().get_nixpkgs_meta_with_source(
        target_path="/nix/store/target",
        flakeref="/flake#nixosConfigurations.host.config.system.build.toplevel",
        original_ref="/flake#nixosConfigurations.host.config.system.build.toplevel",
    )

    assert df_meta is fake_df
    assert calls == [
        ["nix", "eval", "--raw", '/flake#nixosConfigurations."host".pkgs.path']
    ]
    assert source.method == "flakeref-target"
    assert source.path == nixpkgs_path.as_posix()
    assert source.flakeref == '/flake#nixosConfigurations."host".pkgs.path'
    assert source.message == "Scanning evaluated NixOS package set from flakeref"
    assert expressions == [
        (
            'let\n  flake = builtins.getFlake "/flake";\nin\n'
            '  flake.nixosConfigurations."host".pkgs\n',
            None,
            False,
        )
    ]


def test_nixos_toplevel_expression_locks_relative_flake_refs(
    monkeypatch,
    tmp_path,
):
    nixpkgs_path = tmp_path / "nixpkgs"
    nixpkgs_path.mkdir()
    source_path = "/nix/store/root-source"
    calls = []
    expressions = []
    fake_df = SimpleNamespace(empty=False)

    def fake_exec_cmd(cmd, **_kwargs):
        calls.append(cmd)
        if cmd == [
            "nix",
            "eval",
            "--raw",
            '.#nixosConfigurations."host".pkgs.path',
        ]:
            return SimpleNamespace(stdout=f"{nixpkgs_path}\n", returncode=0)
        if cmd == ["nix", "flake", "metadata", ".", "--json"]:
            return SimpleNamespace(
                stdout=json.dumps(
                    {
                        "path": source_path,
                        "locked": {"narHash": "sha256-abc"},
                    }
                ),
                returncode=0,
            )
        raise AssertionError(f"unexpected command: {cmd}")

    monkeypatch.setattr(
        sbomnix_meta_source,
        "nix_cmd",
        lambda *args, impure=False: ["nix", *args] + (["--impure"] if impure else []),
    )
    monkeypatch.setattr(sbomnix_meta_source, "exec_cmd", fake_exec_cmd)
    monkeypatch.setattr(
        sbomnix_meta.Meta,
        "_scan_expression",
        lambda self, expression, *, cache_key=None, impure=False: (
            expressions.append((expression, cache_key, impure)) or fake_df
        ),
    )

    df_meta, source = sbomnix_meta.Meta().get_nixpkgs_meta_with_source(
        target_path="/nix/store/target",
        flakeref=".#nixosConfigurations.host.config.system.build.toplevel",
        original_ref=".#nixosConfigurations.host.config.system.build.toplevel",
    )

    locked_ref = f"path:{source_path}?narHash=sha256-abc"
    assert df_meta is fake_df
    assert source.method == "flakeref-target"
    assert source.flakeref == '.#nixosConfigurations."host".pkgs.path'
    assert calls == [
        ["nix", "eval", "--raw", '.#nixosConfigurations."host".pkgs.path'],
        ["nix", "flake", "metadata", ".", "--json"],
    ]
    cache_key = "nixos-pkgs:" + json.dumps(
        [locked_ref, "host"],
        separators=(",", ":"),
    )
    assert expressions == [
        (
            f'let\n  flake = builtins.getFlake "{locked_ref}";\nin\n'
            '  flake.nixosConfigurations."host".pkgs\n',
            cache_key,
            False,
        )
    ]


def test_nixos_toplevel_expression_preserves_locked_subflake_dir(
    monkeypatch,
    tmp_path,
):
    nixpkgs_path = tmp_path / "nixpkgs"
    nixpkgs_path.mkdir()
    source_path = "/nix/store/root-source"
    calls = []
    expressions = []
    fake_df = SimpleNamespace(empty=False)

    def fake_exec_cmd(cmd, **_kwargs):
        calls.append(cmd)
        if cmd == [
            "nix",
            "eval",
            "--raw",
            'path:.?dir=sub/flake#nixosConfigurations."host".pkgs.path',
        ]:
            return SimpleNamespace(stdout=f"{nixpkgs_path}\n", returncode=0)
        if cmd == ["nix", "flake", "metadata", "path:.?dir=sub/flake", "--json"]:
            return SimpleNamespace(
                stdout=json.dumps(
                    {
                        "path": source_path,
                        "locked": {
                            "narHash": "sha256-abc",
                            "dir": "sub/flake",
                        },
                    }
                ),
                returncode=0,
            )
        raise AssertionError(f"unexpected command: {cmd}")

    monkeypatch.setattr(
        sbomnix_meta_source,
        "nix_cmd",
        lambda *args, impure=False: ["nix", *args] + (["--impure"] if impure else []),
    )
    monkeypatch.setattr(sbomnix_meta_source, "exec_cmd", fake_exec_cmd)
    monkeypatch.setattr(
        sbomnix_meta.Meta,
        "_scan_expression",
        lambda self, expression, *, cache_key=None, impure=False: (
            expressions.append((expression, cache_key, impure)) or fake_df
        ),
    )

    df_meta, source = sbomnix_meta.Meta().get_nixpkgs_meta_with_source(
        target_path="/nix/store/target",
        flakeref=(
            "path:.?dir=sub/flake#nixosConfigurations.host.config.system.build.toplevel"
        ),
        original_ref=(
            "path:.?dir=sub/flake#nixosConfigurations.host.config.system.build.toplevel"
        ),
    )

    locked_ref = f"path:{source_path}?narHash=sha256-abc&dir=sub/flake"
    assert df_meta is fake_df
    assert source.method == "flakeref-target"
    assert calls == [
        [
            "nix",
            "eval",
            "--raw",
            'path:.?dir=sub/flake#nixosConfigurations."host".pkgs.path',
        ],
        ["nix", "flake", "metadata", "path:.?dir=sub/flake", "--json"],
    ]
    cache_key = "nixos-pkgs:" + json.dumps(
        [locked_ref, "host"],
        separators=(",", ":"),
    )
    assert expressions == [
        (
            f'let\n  flake = builtins.getFlake "{locked_ref}";\nin\n'
            '  flake.nixosConfigurations."host".pkgs\n',
            cache_key,
            False,
        )
    ]


def test_nixos_toplevel_flakeref_handles_quoted_configuration_names(
    monkeypatch,
    tmp_path,
):
    nixpkgs_path = tmp_path / "nixpkgs"
    nixpkgs_path.mkdir()
    expressions = []
    fake_df = SimpleNamespace(empty=False)

    def fake_exec_cmd(cmd, **_kwargs):
        if cmd == [
            "nix",
            "eval",
            "--raw",
            '/flake#nixosConfigurations."host.example.com".pkgs.path',
        ]:
            return SimpleNamespace(stdout=f"{nixpkgs_path}\n", returncode=0)
        raise AssertionError(f"unexpected command: {cmd}")

    monkeypatch.setattr(
        sbomnix_meta_source,
        "nix_cmd",
        lambda *args, impure=False: ["nix", *args] + (["--impure"] if impure else []),
    )
    monkeypatch.setattr(sbomnix_meta_source, "exec_cmd", fake_exec_cmd)
    monkeypatch.setattr(
        sbomnix_meta.Meta,
        "_scan_expression",
        lambda self, expression, *, cache_key=None, impure=False: (
            expressions.append((expression, cache_key, impure)) or fake_df
        ),
    )

    df_meta, source = sbomnix_meta.Meta().get_nixpkgs_meta_with_source(
        target_path="/nix/store/target",
        flakeref=(
            '/flake#nixosConfigurations."host.example.com".config.system.build.toplevel'
        ),
        original_ref=(
            '/flake#nixosConfigurations."host.example.com".config.system.build.toplevel'
        ),
    )

    assert df_meta is fake_df
    assert source.method == "flakeref-target"
    assert source.flakeref == (
        '/flake#nixosConfigurations."host.example.com".pkgs.path'
    )
    assert expressions == [
        (
            'let\n  flake = builtins.getFlake "/flake";\nin\n'
            '  flake.nixosConfigurations."host.example.com".pkgs\n',
            None,
            False,
        )
    ]


def test_nixos_toplevel_flakeref_metadata_eval_honors_impure(monkeypatch, tmp_path):
    nixpkgs_path = tmp_path / "nixpkgs"
    nixpkgs_path.mkdir()
    calls = []
    expressions = []
    fake_df = SimpleNamespace(empty=False)

    def fake_exec_cmd(cmd, **_kwargs):
        calls.append(cmd)
        return SimpleNamespace(stdout=f"{nixpkgs_path}\n", returncode=0)

    monkeypatch.setattr(
        sbomnix_meta_source,
        "nix_cmd",
        lambda *args, impure=False: ["nix", *args] + (["--impure"] if impure else []),
    )
    monkeypatch.setattr(sbomnix_meta_source, "exec_cmd", fake_exec_cmd)
    monkeypatch.setattr(
        sbomnix_meta.Meta,
        "_scan_expression",
        lambda self, expression, *, cache_key=None, impure=False: (
            expressions.append((expression, cache_key, impure)) or fake_df
        ),
    )

    df_meta, source = sbomnix_meta.Meta().get_nixpkgs_meta_with_source(
        target_path="/nix/store/target",
        flakeref="/flake#nixosConfigurations.host.config.system.build.toplevel",
        original_ref="/flake#nixosConfigurations.host.config.system.build.toplevel",
        impure=True,
    )

    assert df_meta is fake_df
    assert source.method == "flakeref-target"
    assert calls == [
        [
            "nix",
            "eval",
            "--raw",
            '/flake#nixosConfigurations."host".pkgs.path',
            "--impure",
        ]
    ]
    assert expressions == [
        (
            'let\n  flake = builtins.getFlake "/flake";\nin\n'
            '  flake.nixosConfigurations."host".pkgs\n',
            None,
            True,
        )
    ]
    assert source.expression_cache_key is None
    assert source.expression_impure is True


def test_nixos_toplevel_expression_cache_uses_only_stable_refs(monkeypatch, tmp_path):
    nixpkgs_path = tmp_path / "nixpkgs"
    nixpkgs_path.mkdir()
    expressions = []
    fake_df = SimpleNamespace(empty=False)

    def fake_exec_cmd(_cmd, **_kwargs):
        return SimpleNamespace(stdout=f"{nixpkgs_path}\n", returncode=0)

    monkeypatch.setattr(
        sbomnix_meta_source,
        "nix_cmd",
        lambda *args, impure=False: ["nix", *args] + (["--impure"] if impure else []),
    )
    monkeypatch.setattr(sbomnix_meta_source, "exec_cmd", fake_exec_cmd)
    monkeypatch.setattr(
        sbomnix_meta.Meta,
        "_scan_expression",
        lambda self, expression, *, cache_key=None, impure=False: (
            expressions.append((expression, cache_key, impure)) or fake_df
        ),
    )

    df_meta, source = sbomnix_meta.Meta().get_nixpkgs_meta_with_source(
        target_path="/nix/store/target",
        flakeref=(
            "github:example/flake?rev=abc"
            '#nixosConfigurations."host:8080".config.system.build.toplevel'
        ),
        original_ref=(
            "github:example/flake?rev=abc"
            '#nixosConfigurations."host:8080".config.system.build.toplevel'
        ),
    )

    cache_key = "nixos-pkgs:" + json.dumps(
        ["github:example/flake?rev=abc", "host:8080"],
        separators=(",", ":"),
    )
    assert df_meta is fake_df
    assert source.method == "flakeref-target"
    assert expressions == [
        (
            'let\n  flake = builtins.getFlake "github:example/flake?rev=abc";\n'
            "in\n"
            '  flake.nixosConfigurations."host:8080".pkgs\n',
            cache_key,
            False,
        )
    ]


def test_nixos_toplevel_expression_scan_failure_skips_metadata(
    monkeypatch,
    tmp_path,
):
    nixpkgs_path = tmp_path / "nixpkgs"
    nixpkgs_path.mkdir()
    scanned = []

    def fake_exec_cmd(_cmd, **_kwargs):
        return SimpleNamespace(stdout=f"{nixpkgs_path}\n", returncode=0)

    monkeypatch.setattr(
        sbomnix_meta_source,
        "nix_cmd",
        lambda *args, impure=False: ["nix", *args] + (["--impure"] if impure else []),
    )
    monkeypatch.setattr(sbomnix_meta_source, "exec_cmd", fake_exec_cmd)
    monkeypatch.setattr(
        sbomnix_meta.Meta,
        "_scan_expression",
        lambda *_args, **_kwargs: None,
    )
    monkeypatch.setattr(
        sbomnix_meta.Meta,
        "_scan",
        lambda self, path: scanned.append(path),
    )

    df_meta, source = sbomnix_meta.Meta().get_nixpkgs_meta_with_source(
        target_path="/nix/store/target",
        flakeref="/flake#nixosConfigurations.host.config.system.build.toplevel",
        original_ref="/flake#nixosConfigurations.host.config.system.build.toplevel",
    )

    assert df_meta is None
    assert scanned == []
    assert source.method == "flakeref-target"
    assert source.expression is not None
    assert source.path == nixpkgs_path.as_posix()
    assert source.message == (
        "Evaluated package-set metadata scan failed. Skipping nixpkgs metadata."
    )


def test_nixos_toplevel_flakeref_without_pkgs_path_returns_message(monkeypatch):
    calls = []

    def fake_exec_cmd(cmd, **_kwargs):
        calls.append(cmd)
        if cmd == [
            "nix",
            "eval",
            "--raw",
            '/flake#nixosConfigurations."host".pkgs.path',
        ]:
            return SimpleNamespace(stdout="", stderr="missing", returncode=1)
        raise AssertionError(f"unexpected command: {cmd}")

    monkeypatch.setattr(
        sbomnix_meta_source,
        "nix_cmd",
        lambda *args, impure=False: ["nix", *args] + (["--impure"] if impure else []),
    )
    monkeypatch.setattr(sbomnix_meta_source, "exec_cmd", fake_exec_cmd)
    df_meta, source = sbomnix_meta.Meta().get_nixpkgs_meta_with_source(
        target_path="/nix/store/target",
        flakeref="/flake#nixosConfigurations.host.config.system.build.toplevel",
        original_ref="/flake#nixosConfigurations.host.config.system.build.toplevel",
    )

    assert df_meta is None
    assert calls == [
        ["nix", "eval", "--raw", '/flake#nixosConfigurations."host".pkgs.path'],
    ]
    assert source.method == "none"
    assert source.path is None
    assert "NixOS configuration flakeref" in source.message
    assert "--meta-nixpkgs" in source.message


def test_nixos_toplevel_flakeref_without_pkgs_returns_message(
    monkeypatch,
):
    calls = []

    def fake_exec_cmd(cmd, **_kwargs):
        calls.append(cmd)
        return SimpleNamespace(stdout="", stderr="missing", returncode=1)

    monkeypatch.setattr(
        sbomnix_meta_source,
        "nix_cmd",
        lambda *args, impure=False: ["nix", *args] + (["--impure"] if impure else []),
    )
    monkeypatch.setattr(sbomnix_meta_source, "exec_cmd", fake_exec_cmd)
    monkeypatch.setattr(
        sbomnix_meta_source,
        "nixref_to_nixpkgs_path",
        lambda _nixref: None,
    )

    df_meta, source = sbomnix_meta.Meta().get_nixpkgs_meta_with_source(
        target_path="/nix/store/target",
        flakeref="/flake#nixosConfigurations.host.config.system.build.toplevel",
        original_ref="/flake#nixosConfigurations.host.config.system.build.toplevel",
    )

    assert df_meta is None
    assert calls == [
        ["nix", "eval", "--raw", '/flake#nixosConfigurations."host".pkgs.path'],
    ]
    assert source.method == "none"
    assert source.path is None
    assert "NixOS configuration flakeref" in source.message
    assert "--meta-nixpkgs" in source.message


def test_plain_nixos_configuration_attrset_is_not_target_inferred(
    monkeypatch,
    tmp_path,
):
    nixpkgs_path = tmp_path / "lock-source"
    nixpkgs_path.mkdir()
    scanned = []

    monkeypatch.setattr(
        sbomnix_meta.Meta,
        "_scan",
        lambda self, path: scanned.append(path) or "df",
    )
    monkeypatch.setattr(
        sbomnix_meta_source,
        "nixref_to_nixpkgs_path",
        lambda _nixref: nixpkgs_path,
    )

    df_meta, source = sbomnix_meta.Meta().get_nixpkgs_meta_with_source(
        target_path="/nix/store/target",
        flakeref="/flake#nixosConfigurations.host",
        original_ref="/flake#nixosConfigurations.host",
    )

    assert df_meta == "df"
    assert scanned == [nixpkgs_path.as_posix()]
    assert source.method == "flakeref-lock"


def test_meta_scan_uses_already_resolved_scanner_path(monkeypatch):
    calls = []
    fake_df = SimpleNamespace(empty=False)

    class FakeScanner:
        """Scanner stand-in that records normalized scan paths."""

        def scan(self, path):
            raise AssertionError(f"scan should not resolve path again: {path}")

        def scan_path(self, path):
            calls.append(path)

        def to_df(self):
            return fake_df

    meta = sbomnix_meta.Meta()
    monkeypatch.setattr(meta.cache, "get", lambda _key: None)
    monkeypatch.setattr(meta.cache, "set", lambda **_kwargs: None)
    monkeypatch.setattr(sbomnix_meta, "NixMetaScanner", FakeScanner)

    assert meta._scan("/nix/store/source") is fake_df
    assert calls == ["/nix/store/source"]


================================================
FILE: tests/test_nixmeta_source_export.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Tests for SBOM-level nixpkgs metadata source export."""

import uuid

import pandas as pd

from sbomnix.builder import SbomBuilder
from sbomnix.meta import NixpkgsMetaSource


def _make_minimal_sbom():
    sbomdb = object.__new__(SbomBuilder)
    sbomdb.uid = "store_path"
    sbomdb.nix_path = "/nix/store/target"
    sbomdb.buildtime = False
    sbomdb.target_deriver = "/nix/store/target.drv"
    sbomdb.target_component_ref = "/nix/store/target.drv"
    sbomdb.depth = None
    sbomdb.uuid = uuid.uuid4()
    sbomdb.sbom_type = "runtime_only"
    sbomdb.nixpkgs_meta_source = NixpkgsMetaSource(
        method="flakeref-target",
        path="/nix/store/source",
        rev="1234",
        flakeref=".#target",
        version="25.11",
        message="base nixpkgs source metadata",
    )
    sbomdb.df_sbomdb = pd.DataFrame(
        [
            {
                "store_path": "/nix/store/target.drv",
                "pname": "target",
                "name": "target",
                "version": "1.0",
                "outputs": ["/nix/store/target"],
                "out": "/nix/store/target",
                "purl": "",
                "cpe": "",
                "urls": "",
                "patches": "",
            }
        ]
    )
    return sbomdb


def test_cdx_document_records_nixpkgs_metadata_source(monkeypatch):
    sbomdb = _make_minimal_sbom()
    monkeypatch.setattr(
        SbomBuilder, "lookup_dependencies", lambda *_args, **_kwargs: None
    )

    cdx = sbomdb.to_cdx_data()

    properties = {prop["name"]: prop["value"] for prop in cdx["metadata"]["properties"]}
    assert properties["nixpkgs:metadata_source_method"] == "flakeref-target"
    assert properties["nixpkgs:path"] == "/nix/store/source"
    assert properties["nixpkgs:rev"] == "1234"
    assert properties["nixpkgs:flakeref"] == ".#target"
    assert properties["nixpkgs:version"] == "25.11"
    assert properties["nixpkgs:message"] == "base nixpkgs source metadata"


def test_spdx_document_records_nixpkgs_metadata_source(monkeypatch):
    sbomdb = _make_minimal_sbom()
    monkeypatch.setattr(
        SbomBuilder, "lookup_dependencies", lambda *_args, **_kwargs: None
    )

    spdx = sbomdb.to_spdx_data()

    assert "included dependencies: 'runtime_only'" in spdx["comment"]
    assert (
        "nixpkgs metadata source: metadata_source_method=flakeref-target"
        in spdx["comment"]
    )
    assert "path=/nix/store/source" in spdx["comment"]
    assert "rev=1234" in spdx["comment"]
    assert "message=base nixpkgs source metadata" in spdx["comment"]
    assert "warning=" not in spdx["comment"]


================================================
FILE: tests/test_osv_client.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Focused tests for the reusable OSV client."""

from vulnxscan.osv_client import OSV


class FakeResponse:
    def __init__(self, payload):
        self._payload = payload
        self.status_code = 200

    def json(self):
        return self._payload

    def raise_for_status(self):
        return None


class FakeSession:
    def __init__(self):
        self.calls = []

    def post(self, url, json=None, timeout=None):
        self.calls.append((url, json, timeout))
        return FakeResponse(
            {
                "results": [
                    {
                        "vulns": [
                            {
                                "id": "OSV-1",
                                "modified": "2024-01-01",
                            }
                        ]
                    }
                ]
            }
        )


def test_osv_client_posts_with_timeout_and_parses_results(tmp_path):
    sbom_path = tmp_path / "sbom.json"
    sbom_path.write_text(
        ('{"metadata":{"component":{"name":"hello","version":"1.0"}},"components":[]}'),
        encoding="utf-8",
    )

    session = FakeSession()
    osv = OSV(session=session, request_timeout=17)

    osv.query_vulns(sbom_path.as_posix(), ecosystems=["GIT"])

    assert session.calls == [
        (
            "https://api.osv.dev/v1/querybatch",
            {
                "queries": [
                    {
                        "version": "1.0",
                        "package": {
                            "name": "hello",
                            "ecosystem": "GIT",
                        },
                    }
                ]
            },
            17,
        )
    ]
    assert osv.to_dataframe().to_dict(orient="records") == [
        {
            "vuln_id": "OSV-1",
            "modified": "2024-01-01",
            "package": "hello",
            "version": "1.0",
        }
    ]


================================================
FILE: tests/test_provenance_batching.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Offline provenance tests that do not require CLI execution."""

import errno
import json
import subprocess

from provenance.path_info import query_path_hashes


def _path_info_paths(cmd):
    assert cmd[:5] == ["nix", "path-info", "--json", "--json-format", "1"]
    args = cmd[5:]
    if "--extra-experimental-features" in args:
        args = args[: args.index("--extra-experimental-features")]
    return args


def test_provenance_hash_query_batches_on_e2big():
    """Test provenance splits oversized path-info hash queries and preserves order."""
    references = [f"/nix/store/hash-{idx}" for idx in range(5)]
    calls = []

    def fake_exec_cmd(cmd, **_kwargs):
        if cmd[:5] == ["nix", "path-info", "--json", "--json-format", "1"]:
            batch = _path_info_paths(cmd)
            calls.append(batch)
            if len(batch) > 2:
                raise OSError(errno.E2BIG, "Argument list too long")
            path_info = {
                path: {"narHash": f"sha256:hash-{path.rsplit('-', 1)[-1]}"}
                for path in batch
            }
            return subprocess.CompletedProcess(
                cmd,
                0,
                stdout=json.dumps(path_info),
                stderr="",
            )
        raise AssertionError(f"unexpected command: {cmd}")

    hashes = query_path_hashes(
        references,
        exec_cmd_fn=fake_exec_cmd,
    )

    assert hashes == [f"sha256:hash-{idx}" for idx in range(5)]
    assert calls == [
        references,
        references[:2],
        references[2:],
        references[2:3],
        references[3:],
    ]


================================================
FILE: tests/test_provenance_path_info.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Focused tests for strict provenance path-info handling."""

import json
import subprocess
from types import SimpleNamespace

import pytest

from common.errors import InvalidNixJsonError, NixCommandError
from common.nix_utils import normalize_nix_path_info
from provenance.dependencies import DependencyHooks, dependency_paths
from provenance.path_info import nar_hash_for_path, query_path_info


def test_normalize_path_info_rejects_malformed_list_records():
    with pytest.raises(InvalidNixJsonError, match="missing path string"):
        normalize_nix_path_info([{"narHash": "sha256-test"}])


def test_normalize_path_info_rejects_malformed_object_records():
    with pytest.raises(InvalidNixJsonError, match="expected path-info record"):
        normalize_nix_path_info({"/nix/store/target": None})


def test_normalize_path_info_supports_list_records():
    first = "/nix/store/11111111111111111111111111111111-first"
    second = "/nix/store/22222222222222222222222222222222-second"

    assert normalize_nix_path_info(
        [
            {"path": first, "references": []},
            {"storePath": second, "references": [first]},
        ]
    ) == {
        first: {"path": first, "references": []},
        second: {"storePath": second, "references": [first]},
    }


def test_nar_hash_for_path_rejects_missing_hash():
    with pytest.raises(InvalidNixJsonError, match="missing `narHash`"):
        nar_hash_for_path({"/nix/store/target": {}}, "/nix/store/target")


def test_nar_hash_for_path_rejects_missing_record():
    with pytest.raises(InvalidNixJsonError, match="missing path-info record"):
        nar_hash_for_path({}, "/nix/store/target")


def test_dependency_paths_rejects_mismatched_path_info_record():
    requested = "/nix/store/11111111111111111111111111111111-requested.drv"
    returned = "/nix/store/22222222222222222222222222222222-other.drv"

    def fake_exec_cmd(cmd, **_kwargs):
        return SimpleNamespace(stdout=json.dumps({returned: {"references": []}}))

    with pytest.raises(InvalidNixJsonError, match="missing path-info record"):
        dependency_paths(
            requested,
            hooks=DependencyHooks(exec_cmd_fn=fake_exec_cmd),
        )


def test_dependency_paths_recursive_includes_derivation_outputs():
    root_drv = "/nix/store/11111111111111111111111111111111-root.drv"
    dep_drv = "/nix/store/22222222222222222222222222222222-dependency.drv"
    root_out = "/nix/store/33333333333333333333333333333333-root"
    dep_out = "/nix/store/44444444444444444444444444444444-dependency"

    def fake_exec_cmd(cmd, **_kwargs):
        assert "--recursive" in cmd
        return SimpleNamespace(
            stdout=json.dumps(
                {
                    root_drv: {"references": [dep_drv]},
                    dep_drv: {"references": []},
                }
            )
        )

    assert dependency_paths(
        root_drv,
        recursive=True,
        outputs_by_path={
            root_out: ({}, {}),
            dep_out: ({}, {}),
        },
        hooks=DependencyHooks(exec_cmd_fn=fake_exec_cmd),
    ) == [
        root_drv,
        dep_drv,
        root_out,
        dep_out,
    ]


def test_query_path_info_wraps_nix_command_failures():
    def fail_exec_cmd(cmd, **_kwargs):
        raise subprocess.CalledProcessError(
            returncode=1,
            cmd=cmd,
            stderr="unsupported path-info json format",
        )

    with pytest.raises(NixCommandError, match="unsupported path-info json format"):
        query_path_info(
            ["/nix/store/11111111111111111111111111111111-target-1.0"],
            exec_cmd_fn=fail_exec_cmd,
        )


================================================
FILE: tests/test_provenance_subjects.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Focused tests for provenance digest and subject handling."""

import json
import logging
import subprocess
from types import SimpleNamespace

import pytest

from common.errors import (
    InvalidNixJsonError,
    MissingNixDerivationMetadataError,
    NixCommandError,
)
from common.log import LOG
from common.nix_utils import parse_nix_derivation_show
from provenance import main as provenance_main
from provenance.dependencies import (
    DependencyHooks,
    dependency_package,
    get_dependencies,
)
from provenance.digests import normalize_digest, output_digest
from provenance.subjects import SubjectHooks, get_subjects, output_path


def _dependency_hooks(*, exec_cmd_fn, query_path_hashes_fn=None):
    if query_path_hashes_fn is None:
        return DependencyHooks(
            exec_cmd_fn=exec_cmd_fn,
            parse_nix_derivation_show_fn=parse_nix_derivation_show,
            normalize_digest_fn=normalize_digest,
            output_digest_fn=output_digest,
            output_path_fn=output_path,
            log=LOG,
        )
    return DependencyHooks(
        exec_cmd_fn=exec_cmd_fn,
        query_path_hashes_fn=query_path_hashes_fn,
        parse_nix_derivation_show_fn=parse_nix_derivation_show,
        normalize_digest_fn=normalize_digest,
        output_digest_fn=output_digest,
        output_path_fn=output_path,
        log=LOG,
    )


def _subject_hooks(exec_cmd_fn):
    return SubjectHooks(
        exec_cmd_fn=exec_cmd_fn,
        normalize_digest_fn=normalize_digest,
        output_digest_fn=output_digest,
        output_path_fn=output_path,
        log=LOG,
    )


def _path_info_paths(cmd):
    if cmd[:5] != ["nix", "path-info", "--json", "--json-format", "1"]:
        return None
    args = cmd[5:]
    if "--extra-experimental-features" in args:
        args = args[: args.index("--extra-experimental-features")]
    return args


def test_get_dependencies_supports_nix_2_33_wrapped_json():
    drv_path = "/nix/store/0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-root.drv"
    dep_basename = "1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-dependency.drv"
    dep_path = f"/nix/store/{dep_basename}"
    digest = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"

    def fake_exec_cmd(cmd, **kwargs):
        if _path_info_paths(cmd) == [drv_path]:
            return SimpleNamespace(
                stdout=json.dumps({drv_path: {"references": [dep_path]}})
            )
        if cmd[:4] == ["nix", "derivation", "show", "-r"]:
            return SimpleNamespace(
                stdout=json.dumps(
                    {
                        "derivations": {
                            dep_basename: {
                                "name": "dependency",
                                "env": {"version": "1.2.3"},
                            }
                        },
                        "version": 4,
                    }
                )
            )
        raise AssertionError(f"unexpected command: {cmd} kwargs={kwargs}")

    assert get_dependencies(
        drv_path,
        hooks=_dependency_hooks(
            exec_cmd_fn=fake_exec_cmd,
            query_path_hashes_fn=lambda _paths, **_kwargs: [
                "sha256:1b8m03r63zqhnjf7l5wnldhh7c134ap5vpj0850ymkq1iyzicy5s"
            ],
        ),
    ) == [
        {
            "name": "dependency",
            "uri": dep_path,
            "digest": {"sha256": digest},
            "annotations": {"version": "1.2.3"},
        }
    ]


def test_normalize_digest_does_not_shell_out():
    assert normalize_digest(
        "sha256:1b8m03r63zqhnjf7l5wnldhh7c134ap5vpj0850ymkq1iyzicy5s"
    ) == {"sha256": "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"}
    assert normalize_digest("sha256-ungWv48Bz+pBQUDeXa4iI7ADYaOWF3qctBD/YfIAFa0=") == {
        "sha256": "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"
    }
    assert normalize_digest(
        "77a94a83ccab42a68278ac5d3e340dcefecd736dd4feff1de71dec137b6b44ce",
        "r:sha256",
    ) == {"sha256": "77a94a83ccab42a68278ac5d3e340dcefecd736dd4feff1de71dec137b6b44ce"}


def test_normalize_digest_rejects_overflowing_nix32_values():
    assert normalize_digest("sha256:" + ("z" * 52)) is None


def test_dependency_package_skips_non_normalized_digest(caplog):
    drv_path = "/nix/store/1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-dependency.drv"

    with caplog.at_level(logging.WARNING, logger=LOG.name):
        package = dependency_package(
            drv_path,
            "sha999:abc",
            {},
            {},
            hooks=DependencyHooks(
                normalize_digest_fn=normalize_digest,
                output_digest_fn=output_digest,
                log=LOG,
            ),
        )

    assert package is None
    assert "Cannot determine digest" in caplog.text


def test_get_dependencies_prefers_fixed_output_digest_for_output_paths():
    drv_path = "/nix/store/0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-root.drv"
    dep_drv_basename = "1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-source.drv"
    dep_out_basename = "2ccccccccccccccccccccccccccccccc-source"
    dep_out_path = f"/nix/store/{dep_out_basename}"
    metadata_digest = "77a94a83ccab42a68278ac5d3e340dcefecd736dd4feff1de71dec137b6b44ce"

    def fake_exec_cmd(cmd, **kwargs):
        if _path_info_paths(cmd) == [drv_path]:
            return SimpleNamespace(
                stdout=json.dumps({drv_path: {"references": [dep_out_path]}})
            )
        if cmd[:4] == ["nix", "derivation", "show", "-r"]:
            return SimpleNamespace(
                stdout=json.dumps(
                    {
                        "derivations": {
                            dep_drv_basename: {
                                "name": "source",
                                "outputs": {
                                    "out": {
                                        "path": dep_out_basename,
                                        "hash": metadata_digest,
                                        "hashAlgo": "r:sha256",
                                    }
                                },
                                "env": {"version": "1.2.3"},
                            }
                        },
                        "version": 4,
                    }
                )
            )
        raise AssertionError(f"unexpected command: {cmd} kwargs={kwargs}")

    assert get_dependencies(
        drv_path,
        hooks=_dependency_hooks(
            exec_cmd_fn=fake_exec_cmd,
            query_path_hashes_fn=lambda _paths, **_kwargs: [
                "sha256:09i0w2qz3i5yp7m3yziq4z2n2r2v9s6d3n8j4x1q8k0m5a6b7c8d"
            ],
        ),
    ) == [
        {
            "name": "source",
            "uri": dep_out_path,
            "digest": {"sha256": metadata_digest},
            "annotations": {"version": "1.2.3"},
        }
    ]


def test_get_dependencies_maps_env_only_output_paths_back_to_derivations():
    drv_path = "/nix/store/0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-root.drv"
    dep_out_basename = "2ccccccccccccccccccccccccccccccc-source"
    dep_out_path = f"/nix/store/{dep_out_basename}"
    digest = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"

    def fake_exec_cmd(cmd, **kwargs):
        if _path_info_paths(cmd) == [drv_path]:
            return SimpleNamespace(
                stdout=json.dumps({drv_path: {"references": [dep_out_path]}})
            )
        if cmd[:4] == ["nix", "derivation", "show", "-r"]:
            return SimpleNamespace(
                stdout=json.dumps(
                    {
                        "derivations": {
                            "1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-source.drv": {
                                "name": "special-source",
                                "outputs": {"out": {"method": "nar"}},
                                "env": {
                                    "out": dep_out_basename,
                                    "version": "1.2.3",
                                },
                            }
                        },
                        "version": 4,
                    }
                )
            )
        raise AssertionError(f"unexpected command: {cmd} kwargs={kwargs}")

    assert get_dependencies(
        drv_path,
        hooks=_dependency_hooks(
            exec_cmd_fn=fake_exec_cmd,
            query_path_hashes_fn=lambda _paths, **_kwargs: [
                "sha256:1b8m03r63zqhnjf7l5wnldhh7c134ap5vpj0850ymkq1iyzicy5s"
            ],
        ),
    ) == [
        {
            "name": "special-source",
            "uri": dep_out_path,
            "digest": {"sha256": digest},
            "annotations": {"version": "1.2.3"},
        }
    ]


def test_get_dependencies_wraps_derivation_show_failures():
    drv_path = "/nix/store/0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-root.drv"

    def fake_exec_cmd(cmd, **_kwargs):
        raise subprocess.CalledProcessError(
            returncode=1,
            cmd=cmd,
            stderr="derivation show failed",
        )

    with pytest.raises(NixCommandError, match="derivation show failed"):
        get_dependencies(
            drv_path,
            hooks=_dependency_hooks(exec_cmd_fn=fake_exec_cmd),
        )


def test_get_subjects_falls_back_to_env_output_paths():
    output_path_value = "/custom/store/1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-nghttp2-1.68.1"
    output_hash = "1b8m03r63zqhnjf7l5wnldhh7c134ap5vpj0850ymkq1iyzicy5s"
    digest = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"

    def fake_exec_cmd(cmd, **kwargs):
        if _path_info_paths(cmd) == [output_path_value]:
            return SimpleNamespace(
                stdout=json.dumps(
                    {output_path_value: {"narHash": f"sha256:{output_hash}"}}
                )
            )
        raise AssertionError(f"unexpected command: {cmd} kwargs={kwargs}")

    assert get_subjects(
        {"out": {"method": "nar"}},
        env={"out": output_path_value},
        hooks=_subject_hooks(fake_exec_cmd),
    ) == [
        {
            "name": "out",
            "uri": output_path_value,
            "digest": {"sha256": digest},
        }
    ]


def test_get_subjects_prefers_derivation_hash_for_realized_flat_outputs():
    output_path_value = "/custom/store/1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-nghttp2-1.68.1"
    output_hash = "sha256-ungWv48Bz+pBQUDeXa4iI7ADYaOWF3qctBD/YfIAFa0="
    digest = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"

    def fail_exec_cmd(cmd, **kwargs):
        raise AssertionError(f"unexpected command: {cmd} kwargs={kwargs}")

    assert get_subjects(
        {"out": {"method": "flat", "hash": output_hash}},
        env={"out": output_path_value},
        hooks=_subject_hooks(fail_exec_cmd),
    ) == [
        {
            "name": "out",
            "uri": output_path_value,
            "digest": {"sha256": digest},
        }
    ]


def test_get_subjects_uses_derivation_hash_when_output_is_not_realized():
    output_path_value = "/custom/store/1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-nghttp2-1.68.1"
    output_hash = "sha256-ungWv48Bz+pBQUDeXa4iI7ADYaOWF3qctBD/YfIAFa0="
    digest = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"

    def fail_exec_cmd(cmd, **kwargs):
        raise AssertionError(f"unexpected command: {cmd} kwargs={kwargs}")

    assert get_subjects(
        {"out": {"method": "nar", "hash": output_hash}},
        env={"out": output_path_value},
        hooks=_subject_hooks(fail_exec_cmd),
    ) == [
        {
            "name": "out",
            "uri": output_path_value,
            "digest": {"sha256": digest},
        }
    ]


def test_get_subjects_supports_resource_sha256_metadata():
    output_path_value = "/custom/store/1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-nghttp2-1.68.1"
    digest = "77a94a83ccab42a68278ac5d3e340dcefecd736dd4feff1de71dec137b6b44ce"

    def fail_exec_cmd(cmd, **kwargs):
        raise AssertionError(f"unexpected command: {cmd} kwargs={kwargs}")

    assert get_subjects(
        {
            "out": {
                "hash": digest,
                "hashAlgo": "r:sha256",
            }
        },
        env={"out": output_path_value},
        hooks=_subject_hooks(fail_exec_cmd),
    ) == [
        {
            "name": "out",
            "uri": output_path_value,
            "digest": {"sha256": digest},
        }
    ]


def test_get_subjects_skips_unrealized_outputs_without_digest():
    output_path_value = "/custom/store/2ccccccccccccccccccccccccccccccc-nghttp2-doc"

    def fake_exec_cmd(cmd, **_kwargs):
        assert _path_info_paths(cmd) == [output_path_value]

    assert not get_subjects(
        {"out": {"method": "nar"}},
        env={"out": output_path_value},
        hooks=_subject_hooks(fake_exec_cmd),
    )


def test_get_subjects_skip_only_missing_unrealized_outputs():
    output_path_value = "/custom/store/1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-nghttp2-1.68.1"
    missing_path = "/custom/store/2ccccccccccccccccccccccccccccccc-nghttp2-doc"
    output_hash = "1b8m03r63zqhnjf7l5wnldhh7c134ap5vpj0850ymkq1iyzicy5s"
    digest = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"

    def fake_exec_cmd(cmd, **kwargs):
        if _path_info_paths(cmd) == [output_path_value]:
            return SimpleNamespace(
                stdout=json.dumps(
                    {output_path_value: {"narHash": f"sha256:{output_hash}"}}
                )
            )
        if _path_info_paths(cmd) == [missing_path]:
            return None
        raise AssertionError(f"unexpected command: {cmd} kwargs={kwargs}")

    assert get_subjects(
        {"out": {"path": output_path_value}, "doc": {"path": missing_path}},
        hooks=_subject_hooks(fake_exec_cmd),
    ) == [
        {
            "name": "out",
            "uri": output_path_value,
            "digest": {"sha256": digest},
        }
    ]


def test_provenance_uses_store_path_hint_for_nix_2_33_outputs_without_path(monkeypatch):
    target = "/custom/store/0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-root.drv"
    drv_basename = "0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-root.drv"
    out_basename = "1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-root"
    output_hash = "sha256-ungWv48Bz+pBQUDeXa4iI7ADYaOWF3qctBD/YfIAFa0="
    digest = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"

    def fake_exec_cmd(cmd, **kwargs):
        if cmd[:3] == ["nix", "derivation", "show"]:
            assert cmd[3] == target
            return SimpleNamespace(
                stdout=json.dumps(
                    {
                        "version": 4,
                        "derivations": {
                            drv_basename: {
                                "name": "root",
                                "outputs": {
                                    "out": {
                                        "method": "nar",
                                        "hash": output_hash,
                                    }
                                },
                                "env": {"out": out_basename},
                            }
                        },
                    }
                )
            )
        raise AssertionError(f"unexpected command: {cmd} kwargs={kwargs}")

    monkeypatch.setattr(provenance_main, "exec_cmd", fake_exec_cmd)
    monkeypatch.setattr(
        provenance_main, "get_dependencies", lambda *_args, **_kwargs: []
    )

    metadata = provenance_main.BuildMeta("", "", "", "", "", "{}", "{}")
    provenance = provenance_main.provenance(target, metadata)

    assert provenance["subject"] == [
        {
            "name": "out",
            "uri": f"/custom/store/{out_basename}",
            "digest": {"sha256": digest},
        }
    ]


def test_provenance_wraps_target_derivation_show_failures(monkeypatch):
    target = "/custom/store/0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-root.drv"

    def fake_exec_cmd(cmd, **_kwargs):
        raise subprocess.CalledProcessError(
            returncode=1,
            cmd=cmd,
            stderr="target derivation show failed",
        )

    monkeypatch.setattr(provenance_main, "exec_cmd", fake_exec_cmd)

    metadata = provenance_main.BuildMeta("", "", "", "", "", "{}", "{}")
    with pytest.raises(NixCommandError, match="target derivation show failed"):
        provenance_main.provenance(target, metadata)


def test_provenance_rejects_empty_target_derivation_metadata(monkeypatch):
    target = "/custom/store/0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-root.drv"

    def fake_exec_cmd(cmd, **_kwargs):
        if cmd[:3] == ["nix", "derivation", "show"]:
            return SimpleNamespace(stdout=json.dumps({"version": 4, "derivations": {}}))
        raise AssertionError(f"unexpected command: {cmd}")

    monkeypatch.setattr(provenance_main, "exec_cmd", fake_exec_cmd)

    metadata = provenance_main.BuildMeta("", "", "", "", "", "{}", "{}")
    with pytest.raises(
        MissingNixDerivationMetadataError,
        match="No derivation metadata found",
    ):
        provenance_main.provenance(target, metadata)


def test_provenance_rejects_target_derivation_without_outputs(monkeypatch):
    target = "/custom/store/0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-root.drv"
    drv_basename = "0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-root.drv"

    def fake_exec_cmd(cmd, **_kwargs):
        if cmd[:3] == ["nix", "derivation", "show"]:
            return SimpleNamespace(
                stdout=json.dumps(
                    {
                        "version": 4,
                        "derivations": {
                            drv_basename: {
                                "name": "root",
                                "env": {"name": "root"},
                            }
                        },
                    }
                )
            )
        raise AssertionError(f"unexpected command: {cmd}")

    monkeypatch.setattr(provenance_main, "exec_cmd", fake_exec_cmd)

    metadata = provenance_main.BuildMeta("", "", "", "", "", "{}", "{}")
    with pytest.raises(
        InvalidNixJsonError,
        match=r"missing `outputs` in target derivation",
    ):
        provenance_main.provenance(target, metadata)


def test_provenance_keeps_fixed_output_subjects_when_output_is_not_realized(
    monkeypatch,
):
    target = "/custom/store/0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-root.drv"
    drv_basename = "0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-root.drv"
    out_basename = "1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-root"
    output_hash = "sha256-ungWv48Bz+pBQUDeXa4iI7ADYaOWF3qctBD/YfIAFa0="
    digest = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"

    def fake_exec_cmd(cmd, **kwargs):
        if cmd[:3] == ["nix", "derivation", "show"]:
            assert cmd[3] == target
            return SimpleNamespace(
                stdout=json.dumps(
                    {
                        "version": 4,
                        "derivations": {
                            drv_basename: {
                                "name": "root",
                                "outputs": {
                                    "out": {"method": "nar", "hash": output_hash}
                                },
                                "env": {"out": out_basename},
                            }
                        },
                    }
                )
            )
        raise AssertionError(f"unexpected command: {cmd} kwargs={kwargs}")

    monkeypatch.setattr(provenance_main, "exec_cmd", fake_exec_cmd)
    monkeypatch.setattr(
        provenance_main, "get_dependencies", lambda *_args, **_kwargs: []
    )

    metadata = provenance_main.BuildMeta("", "", "", "", "", "{}", "{}")
    provenance = provenance_main.provenance(target, metadata)

    assert provenance["subject"] == [
        {
            "name": "out",
            "uri": f"/custom/store/{out_basename}",
            "digest": {"sha256": digest},
        }
    ]


================================================
FILE: tests/test_repology_adapter.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Offline tests for the Repology adapter."""

import json

import pytest

from repology.adapter import RepologyAdapter, RepologyQuery
from repology.exceptions import RepologyNoMatchingPackages
from repology.session import REPOLOGY_REQUEST_TIMEOUT
from tests.testpaths import RESOURCES_DIR

REPOLOGY_FIXTURES_DIR = RESOURCES_DIR / "repology"


class FakeResponse:
    def __init__(self, text, status_code=200):
        self.text = text
        self.status_code = status_code

    def raise_for_status(self):
        if self.status_code >= 400:
            raise RuntimeError(f"unexpected status code: {self.status_code}")


class MappingSession:
    def __init__(self, responses):
        self.responses = responses
        self.calls = []

    def get(self, url, timeout=None):
        self.calls.append((url, timeout))
        if url not in self.responses:
            raise AssertionError(f"unexpected URL requested: {url}")
        return self.responses[url]


def _fixture_text(name):
    return (REPOLOGY_FIXTURES_DIR / name).read_text(encoding="utf-8")


def test_repology_adapter_pkg_exact_parses_fixture_and_uses_timeout():
    url = "https://repology.org/projects/?search=hello&inrepo=nix_unstable"
    session = MappingSession(
        {
            url: FakeResponse(_fixture_text("projects_hello.html")),
        }
    )

    df = RepologyAdapter(session=session).query(
        RepologyQuery(
            repository="nix_unstable",
            pkg_exact="hello",
        )
    )

    assert session.calls == [(url, REPOLOGY_REQUEST_TIMEOUT)]
    assert list(df["package"].unique()) == ["hello"]
    assert set(df["status"]) == {"newest", "outdated"}
    outdated = df[df["status"] == "outdated"].iloc[0]
    assert outdated["version"] == "2.10"
    assert outdated["potentially_vulnerable"] == "1"
    assert outdated["newest_upstream_release"] == "2.11;2.12-rc1"
    assert outdated["repo_version_classify"] == "repo_pkg_needs_update"


def test_repology_adapter_pkg_exact_raises_for_empty_results():
    url = "https://repology.org/projects/?search=missing&inrepo=nix_unstable"
    session = MappingSession(
        {
            url: FakeResponse(_fixture_text("projects_empty.html")),
        }
    )

    with pytest.raises(RepologyNoMatchingPackages):
        RepologyAdapter(session=session).query(
            RepologyQuery(
                repository="nix_unstable",
                pkg_exact="missing",
            )
        )

    assert session.calls == [(url, REPOLOGY_REQUEST_TIMEOUT)]


def test_repology_adapter_sbom_query_marks_special_statuses(tmp_path):
    sbom_path = tmp_path / "sbom.cdx.json"
    sbom_path.write_text(
        json.dumps(
            {
                "metadata": {},
                "components": [
                    {"name": "hello", "version": "2.10"},
                    {"name": "archive.tar.gz", "version": "1.0"},
                    {"name": "missingver", "version": ""},
                    {"name": "missingpkg", "version": "9.9"},
                ],
            }
        ),
        encoding="utf-8",
    )
    hello_url = "https://repology.org/projects/?search=hello&inrepo=nix_unstable"
    missing_url = "https://repology.org/projects/?search=missingpkg&inrepo=nix_unstable"
    session = MappingSession(
        {
            hello_url: FakeResponse(_fixture_text("projects_hello.html")),
            missing_url: FakeResponse(_fixture_text("projects_empty.html")),
        }
    )

    df = RepologyAdapter(session=session).query(
        RepologyQuery(
            repository="nix_unstable",
            sbom_cdx=sbom_path,
        )
    )

    assert session.calls == [
        (hello_url, REPOLOGY_REQUEST_TIMEOUT),
        (missing_url, REPOLOGY_REQUEST_TIMEOUT),
    ]
    assert set(df["status"]) == {
        "IGNORED",
        "NOT_FOUND",
        "NO_VERSION",
        "newest",
        "outdated",
    }
    hello_rows = df[df["package"] == "hello"]
    assert set(hello_rows["sbom_version_classify"]) == {"sbom_pkg_needs_update"}
    assert set(hello_rows["repo_version_classify"]) == {
        "",
        "repo_pkg_needs_update",
    }
    assert df[df["package"] == "archive.tar.gz"].iloc[0]["status"] == "IGNORED"
    assert df[df["package"] == "missingver"].iloc[0]["status"] == "NO_VERSION"
    assert df[df["package"] == "missingpkg"].iloc[0]["status"] == "NOT_FOUND"


def test_repology_adapter_query_cves_parses_fixture_and_uses_timeout():
    url = "https://repology.org/project/openssl/cves?version=3.1.0"
    session = MappingSession(
        {
            url: FakeResponse(_fixture_text("cves_openssl.html")),
        }
    )

    df = RepologyAdapter(session=session).query_cves("openssl", "3.1.0")

    assert session.calls == [(url, REPOLOGY_REQUEST_TIMEOUT)]
    assert list(df["package"]) == ["openssl"]
    assert list(df["version"]) == ["3.1.0"]
    assert list(df["cve"]) == ["CVE-2024-1111"]


================================================
FILE: tests/test_repology_cve.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Offline tests for Repology CVE queries."""

from repology.repology_cve import query_cve
from repology.session import REPOLOGY_REQUEST_TIMEOUT
from tests.testpaths import RESOURCES_DIR

REPOLOGY_FIXTURES_DIR = RESOURCES_DIR / "repology"


class FakeResponse:
    def __init__(self, text, status_code=200):
        self.text = text
        self.status_code = status_code

    def raise_for_status(self):
        if self.status_code >= 400:
            raise RuntimeError(f"unexpected status code: {self.status_code}")


class MappingSession:
    def __init__(self, responses):
        self.responses = responses
        self.calls = []

    def get(self, url, timeout=None):
        self.calls.append((url, timeout))
        if url not in self.responses:
            raise AssertionError(f"unexpected URL requested: {url}")
        return self.responses[url]


def test_query_cve_parses_fixture_and_uses_timeout():
    url = "https://repology.org/project/openssl/cves?version=3.1.0"
    session = MappingSession(
        {
            url: FakeResponse(
                (REPOLOGY_FIXTURES_DIR / "cves_openssl.html").read_text(
                    encoding="utf-8"
                )
            ),
        }
    )

    df = query_cve("openssl", "3.1.0", session=session)

    assert session.calls == [(url, REPOLOGY_REQUEST_TIMEOUT)]
    assert list(df["package"]) == ["openssl"]
    assert list(df["version"]) == ["3.1.0"]
    assert list(df["cve"]) == ["CVE-2024-1111"]


================================================
FILE: tests/test_repology_projects_parser.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Offline tests for the Repology projects-page parser."""

import pytest

from repology.exceptions import RepologyUnexpectedResponse
from repology.projects_parser import parse_projects_search_html
from tests.testpaths import RESOURCES_DIR

REPOLOGY_FIXTURES_DIR = RESOURCES_DIR / "repology"


def _fixture_text(name):
    return (REPOLOGY_FIXTURES_DIR / name).read_text(encoding="utf-8")


def test_parse_projects_search_html_parses_fixture_rows():
    parsed = parse_projects_search_html(
        _fixture_text("projects_hello.html"),
        "nix_unstable",
    )

    assert parsed.next_query_project == ""
    assert parsed.processed_ids == {"nix_unstable:hello"}
    assert parsed.package_rows == [
        {
            "repo": "nix_unstable",
            "package": "hello",
            "version": "2.10",
            "status": "outdated",
            "potentially_vulnerable": "1",
            "newest_upstream_release": "2.11;2.12-rc1",
        },
        {
            "repo": "nix_unstable",
            "package": "hello",
            "version": "2.11",
            "status": "newest",
            "potentially_vulnerable": "0",
            "newest_upstream_release": "2.11;2.12-rc1",
        },
    ]


def test_parse_projects_search_html_respects_already_processed_packages():
    parsed = parse_projects_search_html(
        _fixture_text("projects_hello.html"),
        "nix_unstable",
        processed_ids={"nix_unstable:hello"},
    )

    assert parsed.next_query_project == ""
    assert parsed.processed_ids == {"nix_unstable:hello"}
    assert not parsed.package_rows


def test_parse_projects_search_html_raises_for_malformed_table():
    malformed = """
    <html>
      <body>
        <table>
          <thead><tr><th>Project</th></tr></thead>
        </table>
      </body>
    </html>
    """

    with pytest.raises(RepologyUnexpectedResponse):
        parse_projects_search_html(malformed, "nix_unstable")


================================================
FILE: tests/test_repology_sbom.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Unit tests for Repology SBOM helpers."""

import json

import pandas as pd

from repology.sbom import (
    is_ignored_sbom_package,
    make_sbom_status_row,
    merge_sbom_fields,
    parse_cdx_sbom,
    sbom_row_classify,
)


def test_parse_cdx_sbom_normalizes_names_and_includes_metadata_component(tmp_path):
    sbom_path = tmp_path / "sbom.cdx.json"
    sbom_path.write_text(
        json.dumps(
            {
                "metadata": {
                    "component": {"name": "libtiff", "version": "4.6.0"},
                },
                "components": [
                    {"name": "python311-requests", "version": "2.32.0"},
                ],
            }
        ),
        encoding="utf-8",
    )

    df = parse_cdx_sbom(sbom_path)

    assert df.to_dict("records") == [
        {"name": "python:requests", "version": "2.32.0"},
        {"name": "tiff", "version": "4.6.0"},
    ]


def test_merge_sbom_fields_and_classify_outdated_versions():
    df_sbom = pd.DataFrame([{"name": "hello", "version": "2.10"}])
    df_repo = pd.DataFrame(
        [
            {
                "repo": "nix_unstable",
                "package": "hello",
                "version": "2.11",
                "status": "newest",
                "potentially_vulnerable": "0",
                "newest_upstream_release": "2.12",
            }
        ]
    )

    df = merge_sbom_fields(df_sbom, df_repo)
    df["sbom_version_classify"] = df.apply(sbom_row_classify, axis=1)

    assert df["version_sbom"].tolist() == ["2.10"]
    assert df["sbom_version_classify"].tolist() == ["sbom_pkg_needs_update"]


def test_sbom_status_helpers_cover_ignored_rows():
    assert is_ignored_sbom_package("archive.tar.gz") is True
    assert is_ignored_sbom_package("openssl") is False
    assert make_sbom_status_row("nix_unstable", "archive.tar.gz", "1.0", "IGNORED") == {
        "repo": "nix_unstable",
        "package": "archive.tar.gz",
        "version": "1.0",
        "status": "IGNORED",
        "potentially_vulnerable": "",
        "newest_upstream_release": "",
    }


================================================
FILE: tests/test_runtime_closure.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Focused tests for structured runtime closure parsing."""

import subprocess

import pytest

from common.errors import InvalidNixJsonError, NixCommandError
from sbomnix import runtime as sbomnix_runtime
from sbomnix.runtime import runtime_closure_from_path_info


def test_runtime_closure_from_path_info_extracts_edges_and_derivers():
    closure = runtime_closure_from_path_info(
        {
            "/nix/store/11111111111111111111111111111111-target-1.0": {
                "deriver": "/nix/store/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-target-1.0.drv",
                "references": [
                    "/nix/store/11111111111111111111111111111111-target-1.0",
                    "/nix/store/22222222222222222222222222222222-dep-1.0",
                ],
            },
            "/nix/store/22222222222222222222222222222222-dep-1.0": {
                "deriver": "/nix/store/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-dep-1.0.drv",
                "references": ["/nix/store/22222222222222222222222222222222-dep-1.0"],
            },
        }
    )

    assert closure.df_deps.to_dict("records") == [
        {
            "src_path": "/nix/store/22222222222222222222222222222222-dep-1.0",
            "src_pname": "dep-1.0",
            "target_path": "/nix/store/11111111111111111111111111111111-target-1.0",
            "target_pname": "target-1.0",
        }
    ]
    assert closure.output_paths_by_drv == {
        "/nix/store/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-target-1.0.drv": {
            "/nix/store/11111111111111111111111111111111-target-1.0"
        },
        "/nix/store/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-dep-1.0.drv": {
            "/nix/store/22222222222222222222222222222222-dep-1.0"
        },
    }


def test_runtime_closure_from_path_info_supports_list_payloads():
    closure = runtime_closure_from_path_info(
        [
            {
                "path": "/nix/store/11111111111111111111111111111111-target-1.0",
                "deriver": "/nix/store/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-target-1.0.drv",
                "references": [],
            }
        ]
    )

    assert closure.df_deps.empty
    assert closure.output_paths_by_drv == {
        "/nix/store/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-target-1.0.drv": {
            "/nix/store/11111111111111111111111111111111-target-1.0"
        }
    }


def test_runtime_closure_from_path_info_rejects_missing_references():
    with pytest.raises(InvalidNixJsonError, match="missing `references`"):
        runtime_closure_from_path_info(
            {
                "/nix/store/11111111111111111111111111111111-target-1.0": {
                    "deriver": None,
                }
            }
        )


def test_runtime_closure_from_path_info_rejects_malformed_reference_items():
    with pytest.raises(InvalidNixJsonError, match=r"references\[0\]"):
        runtime_closure_from_path_info(
            {
                "/nix/store/11111111111111111111111111111111-target-1.0": {
                    "references": [None],
                }
            }
        )


def test_load_runtime_closure_wraps_nix_command_failures(monkeypatch):
    def fail_exec_cmd(cmd):
        raise subprocess.CalledProcessError(
            returncode=1,
            cmd=cmd,
            stderr="unsupported path-info json format",
        )

    monkeypatch.setattr(sbomnix_runtime, "exec_cmd", fail_exec_cmd)

    with pytest.raises(NixCommandError, match="unsupported path-info json format"):
        sbomnix_runtime.load_runtime_closure(
            "/nix/store/11111111111111111111111111111111-target-1.0"
        )


================================================
FILE: tests/test_sbom_closure.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Focused tests for SBOM dependency closure helpers."""

import pandas as pd

from sbomnix.closure import (
    dependencies_to_depth,
    dependency_paths,
    walk_dependency_rows,
)


def _dependency_df():
    return pd.DataFrame.from_records(
        [
            {
                "src_path": "/nix/store/bash",
                "src_pname": "bash",
                "target_path": "/nix/store/hello",
                "target_pname": "hello",
            },
            {
                "src_path": "/nix/store/glibc",
                "src_pname": "glibc",
                "target_path": "/nix/store/bash",
                "target_pname": "bash",
            },
            {
                "src_path": "/nix/store/zlib",
                "src_pname": "zlib",
                "target_path": "/nix/store/glibc",
                "target_pname": "glibc",
            },
        ]
    )


def test_dependencies_to_depth_returns_reachable_dependency_rows():
    df_depth = dependencies_to_depth(_dependency_df(), "/nix/store/hello", depth=2)

    assert df_depth.to_dict("records") == [
        {
            "src_path": "/nix/store/bash",
            "src_pname": "bash",
            "target_path": "/nix/store/hello",
            "target_pname": "hello",
        },
        {
            "src_path": "/nix/store/glibc",
            "src_pname": "glibc",
            "target_path": "/nix/store/bash",
            "target_pname": "bash",
        },
    ]


def test_walk_dependency_rows_supports_inverse_traversal():
    walked = walk_dependency_rows(
        _dependency_df(),
        "/nix/store/zlib",
        depth=2,
        inverse=True,
    )

    assert [row.depth for row in walked] == [1, 2]
    assert [row.row["target_path"] for row in walked] == [
        "/nix/store/glibc",
        "/nix/store/bash",
    ]
    assert [row.row["src_path"] for row in walked] == [
        "/nix/store/zlib",
        "/nix/store/glibc",
    ]


def test_walk_dependency_rows_stops_after_matching_boundary_row():
    walked = walk_dependency_rows(
        _dependency_df(),
        "/nix/store/hello",
        depth=3,
        stop_at=lambda row: row["target_pname"] == "bash",
    )

    assert [row.depth for row in walked] == [1, 2]
    assert [row.row["target_path"] for row in walked] == [
        "/nix/store/hello",
        "/nix/store/bash",
    ]
    assert [row.row["src_path"] for row in walked] == [
        "/nix/store/bash",
        "/nix/store/glibc",
    ]


def test_dependencies_to_depth_returns_empty_dataframe_for_missing_start():
    df_depth = dependencies_to_depth(_dependency_df(), "/nix/store/missing", depth=2)

    assert df_depth.empty
    assert list(df_depth.columns) == [
        "src_path",
        "src_pname",
        "target_path",
        "target_pname",
    ]


def test_dependencies_to_depth_deduplicates_shared_diamond_edges():
    df_deps = pd.DataFrame.from_records(
        [
            {
                "src_path": "/nix/store/left",
                "src_pname": "left",
                "target_path": "/nix/store/root",
                "target_pname": "root",
            },
            {
                "src_path": "/nix/store/right",
                "src_pname": "right",
                "target_path": "/nix/store/root",
                "target_pname": "root",
            },
            {
                "src_path": "/nix/store/shared",
                "src_pname": "shared",
                "target_path": "/nix/store/left",
                "target_pname": "left",
            },
            {
                "src_path": "/nix/store/shared",
                "src_pname": "shared",
                "target_path": "/nix/store/right",
                "target_pname": "right",
            },
            {
                "src_path": "/nix/store/leaf",
                "src_pname": "leaf",
                "target_path": "/nix/store/shared",
                "target_pname": "shared",
            },
        ]
    )

    df_depth = dependencies_to_depth(df_deps, "/nix/store/root", depth=3)

    assert df_depth.to_dict("records") == [
        {
            "src_path": "/nix/store/left",
            "src_pname": "left",
            "target_path": "/nix/store/root",
            "target_pname": "root",
        },
        {
            "src_path": "/nix/store/shared",
            "src_pname": "shared",
            "target_path": "/nix/store/left",
            "target_pname": "left",
        },
        {
            "src_path": "/nix/store/leaf",
            "src_pname": "leaf",
            "target_path": "/nix/store/shared",
            "target_pname": "shared",
        },
        {
            "src_path": "/nix/store/right",
            "src_pname": "right",
            "target_path": "/nix/store/root",
            "target_pname": "root",
        },
        {
            "src_path": "/nix/store/shared",
            "src_pname": "shared",
            "target_path": "/nix/store/right",
            "target_pname": "right",
        },
    ]


def test_dependency_paths_returns_all_source_and_target_paths():
    assert dependency_paths(_dependency_df()) == {
        "/nix/store/bash",
        "/nix/store/glibc",
        "/nix/store/hello",
        "/nix/store/zlib",
    }


================================================
FILE: tests/test_sbom_vuln_enrichment.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Focused tests for SBOM vulnerability enrichment boundaries."""

import uuid
from pathlib import Path
from types import SimpleNamespace

import pandas as pd
import pytest

from common.errors import SbomnixError
from sbomnix import cli_utils as sbomnix_cli_utils
from sbomnix import main as sbomnix_main
from sbomnix import vuln_enrichment as sbomnix_vuln_enrichment
from sbomnix.builder import SbomBuilder


class CapturingLogger:
    def __init__(self):
        self.records = []

    def info(self, msg, *args):
        self.records.append(("info", msg, args))

    def fatal(self, msg, *args):
        self.records.append(("fatal", msg, args))


def test_sbomnix_getargs_accepts_meta_nixpkgs():
    args = sbomnix_main.getargs(
        [
            "/nix/store/target",
            "--meta-nixpkgs",
            "nix-path",
        ]
    )

    assert args.meta_nixpkgs == "nix-path"


def test_sbomnix_run_rejects_exclude_meta_with_meta_nixpkgs():
    args = SimpleNamespace(
        NIXREF="/nix/store/target",
        buildtime=False,
        depth=None,
        verbose=0,
        include_vulns=False,
        exclude_meta=True,
        meta_nixpkgs="nix-path",
        exclude_cpe_matching=False,
        csv=None,
        cdx=None,
        spdx=None,
        impure=True,
    )

    with pytest.raises(SbomnixError, match="--exclude-meta"):
        sbomnix_main._run(args)


def test_sbomnix_main_enriches_cdx_explicitly_when_include_vulns_is_set(monkeypatch):
    args = SimpleNamespace(
        NIXREF=".#target",
        buildtime=False,
        depth=None,
        verbose=0,
        include_vulns=True,
        exclude_meta=False,
        meta_nixpkgs=None,
        exclude_cpe_matching=False,
        csv=None,
        cdx="sbom.cdx.json",
        spdx=None,
        impure=True,
    )
    events = []

    class FakeSbomBuilder:
        def __init__(self, **kwargs):
            events.append(("init", kwargs))

        def to_cdx_data(self):
            events.append(("to_cdx_data",))
            return {"bomFormat": "CycloneDX"}

        def enrich_cdx_with_vulnerabilities(self, cdx):
            events.append(("enrich", dict(cdx)))
            cdx["vulnerabilities"] = []

        def write_json(self, path, data, printinfo=False):
            events.append(("write_json", path, dict(data), printinfo))

        def to_spdx(self, _path):
            raise AssertionError("to_spdx should not run in this test")

        def to_csv(self, _path):
            raise AssertionError("to_csv should not run in this test")

    monkeypatch.setattr(sbomnix_main, "getargs", lambda: args)
    monkeypatch.setattr(sbomnix_main, "set_log_verbosity", lambda _verbosity: None)
    monkeypatch.setattr(
        sbomnix_main,
        "resolve_nix_target",
        lambda *_args, **_kwargs: sbomnix_cli_utils.ResolvedNixTarget(
            path="/nix/store/target",
            flakeref=".#target",
        ),
    )
    monkeypatch.setattr(sbomnix_main, "SbomBuilder", FakeSbomBuilder)

    sbomnix_main.main()

    assert events == [
        (
            "init",
            {
                "nix_path": "/nix/store/target",
                "buildtime": False,
                "depth": None,
                "flakeref": ".#target",
                "original_ref": None,
                "meta_nixpkgs": None,
                "impure": True,
                "include_meta": True,
                "include_vulns": True,
                "include_cpe": True,
            },
        ),
        ("to_cdx_data",),
        ("enrich", {"bomFormat": "CycloneDX"}),
        (
            "write_json",
            "sbom.cdx.json",
            {"bomFormat": "CycloneDX", "vulnerabilities": []},
            True,
        ),
    ]


def test_sbomnix_main_logs_generation_before_initializing_builder(monkeypatch):
    args = SimpleNamespace(
        NIXREF=".#target",
        buildtime=False,
        depth=None,
        verbose=0,
        include_vulns=False,
        exclude_meta=False,
        meta_nixpkgs=None,
        exclude_cpe_matching=False,
        csv=None,
        cdx=None,
        spdx=None,
        impure=False,
    )
    logger = CapturingLogger()
    events = []

    class FakeSbomBuilder:
        def __init__(self, **kwargs):
            events.append(("init", kwargs))

    monkeypatch.setattr(sbomnix_main, "LOG", logger)
    monkeypatch.setattr(
        sbomnix_main,
        "resolve_nix_target",
        lambda *_args, **_kwargs: sbomnix_cli_utils.ResolvedNixTarget(
            path="/nix/store/target",
            flakeref=".#target",
        ),
    )
    monkeypatch.setattr(sbomnix_main, "SbomBuilder", FakeSbomBuilder)

    sbomnix_main._run(args)

    assert logger.records == [
        ("info", "Generating SBOM for target '%s'", ("/nix/store/target",))
    ]
    assert events == [
        (
            "init",
            {
                "nix_path": "/nix/store/target",
                "buildtime": False,
                "depth": None,
                "flakeref": ".#target",
                "original_ref": None,
                "meta_nixpkgs": None,
                "impure": False,
                "include_meta": True,
                "include_vulns": False,
                "include_cpe": True,
            },
        )
    ]


def test_to_cdx_no_longer_triggers_vulnerability_scans(tmp_path, monkeypatch):
    seen_calls = []

    def no_dependencies(_self, _drv, **_kwargs):
        return None

    class FailIfCalledScanner:
        def __init__(self):
            seen_calls.append("init")

        def scan_vulnix(self, _target_path, _buildtime):
            raise AssertionError("scan_vulnix should not run during plain export")

        def scan_grype(self, _sbom_path):
            raise AssertionError("scan_grype should not run during plain export")

        def scan_osv(self, _sbom_path):
            raise AssertionError("scan_osv should not run during plain export")

    # Bypass __init__ to keep the test focused on export behavior without Nix IO.
    sbomdb = object.__new__(SbomBuilder)
    sbomdb.uid = "store_path"
    sbomdb.nix_path = "/nix/store/target"
    sbomdb.buildtime = False
    sbomdb.target_deriver = "/nix/store/target.drv"
    sbomdb.target_component_ref = "/nix/store/target.drv"
    sbomdb.depth = None
    sbomdb.uuid = uuid.uuid4()
    sbomdb.include_vulns = True
    sbomdb.sbom_type = "runtime_only"
    sbomdb.df_sbomdb = pd.DataFrame(
        [
            {
                "store_path": "/nix/store/target.drv",
                "pname": "target",
                "name": "target",
                "version": "1.0",
                "outputs": ["/nix/store/target"],
                "out": "/nix/store/target",
                "purl": "",
                "cpe": "",
                "urls": "",
                "patches": "",
            }
        ]
    )

    monkeypatch.setattr("sbomnix.vuln_enrichment.VulnScan", FailIfCalledScanner)
    monkeypatch.setattr(SbomBuilder, "lookup_dependencies", no_dependencies)

    out_path = tmp_path / "out.cdx.json"
    sbomdb.to_cdx(out_path, printinfo=False)

    assert out_path.exists()
    assert not seen_calls


@pytest.mark.parametrize(
    ("buildtime", "expected_target"),
    [
        (False, "/nix/store/target-output"),
        (True, "/nix/store/target.drv"),
    ],
)
def test_sbom_vuln_enrichment_scans_expected_nix_target(
    buildtime,
    expected_target,
    monkeypatch,
):
    seen_vulnix_calls = []

    class CapturingScanner:
        def __init__(self):
            self.df_grype = pd.DataFrame()
            self.df_osv = pd.DataFrame()
            self.df_vulnix = pd.DataFrame()

        def scan_vulnix(self, target_path, scan_buildtime):
            seen_vulnix_calls.append((target_path, scan_buildtime))

        def scan_grype(self, _sbom_path):
            return None

        def scan_osv(self, _sbom_path):
            return None

    # Bypass __init__ to keep the test focused on enrichment target selection.
    sbomdb = object.__new__(SbomBuilder)
    sbomdb.nix_path = "/nix/store/target-output"
    sbomdb.buildtime = buildtime
    sbomdb.target_deriver = "/nix/store/target.drv"
    sbomdb.target_component_ref = "/nix/store/target.drv"
    sbomdb.df_sbomdb = pd.DataFrame()

    monkeypatch.setattr(sbomnix_vuln_enrichment, "VulnScan", CapturingScanner)

    cdx = {"bomFormat": "CycloneDX"}

    sbomdb.enrich_cdx_with_vulnerabilities(cdx)

    assert seen_vulnix_calls == [(expected_target, buildtime)]
    assert cdx["vulnerabilities"] == []


def test_sbom_vuln_tempfile_is_removed_on_scan_failure(tmp_path, monkeypatch):
    temp_cdx_path = tmp_path / "vulnscan_temp.json"
    seen_paths = []

    def no_dependencies(_self, _drv, **_kwargs):
        return None

    class FakeTempFile:
        def __init__(self, path):
            self.name = path.as_posix()

        def __enter__(self):
            Path(self.name).touch()
            return self

        def __exit__(self, exc_type, exc, traceback):
            return False

    class FailingScanner:
        def __init__(self):
            self.df_grype = pd.DataFrame()
            self.df_osv = pd.DataFrame()
            self.df_vulnix = pd.DataFrame()

        def scan_vulnix(self, _target_path, _buildtime):
            return None

        def scan_grype(self, sbom_path):
            sbom_path = Path(sbom_path)
            seen_paths.append(sbom_path)
            assert sbom_path.exists()

        def scan_osv(self, sbom_path):
            sbom_path = Path(sbom_path)
            seen_paths.append(sbom_path)
            raise RuntimeError("osv scan failed")

    # Bypass __init__ to keep the test focused on enrichment tempfile cleanup.
    sbomdb = object.__new__(SbomBuilder)
    sbomdb.uid = "store_path"
    sbomdb.nix_path = "/nix/store/target"
    sbomdb.buildtime = False
    sbomdb.target_deriver = "/nix/store/target.drv"
    sbomdb.target_component_ref = "/nix/store/target.drv"
    sbomdb.depth = None
    sbomdb.uuid = uuid.uuid4()
    sbomdb.include_vulns = True
    sbomdb.sbom_type = "runtime_only"
    sbomdb.df_sbomdb = pd.DataFrame(
        [
            {
                "store_path": "/nix/store/target.drv",
                "pname": "target",
                "name": "target",
                "version": "1.0",
                "outputs": ["/nix/store/target"],
                "out": "/nix/store/target",
                "purl": "",
                "cpe": "",
                "urls": "",
                "patches": "",
            }
        ]
    )

    monkeypatch.setattr(
        sbomnix_vuln_enrichment,
        "NamedTemporaryFile",
        lambda **_kwargs: FakeTempFile(temp_cdx_path),
    )
    monkeypatch.setattr(sbomnix_vuln_enrichment, "VulnScan", FailingScanner)
    monkeypatch.setattr(SbomBuilder, "lookup_dependencies", no_dependencies)

    cdx = sbomdb.to_cdx_data()

    with pytest.raises(RuntimeError, match="osv scan failed"):
        sbomdb.enrich_cdx_with_vulnerabilities(cdx)

    assert seen_paths == [temp_cdx_path, temp_cdx_path]
    assert not temp_cdx_path.exists()


================================================
FILE: tests/test_schema_validation.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Focused tests for offline schema validation helpers."""

from tests.testpaths import RESOURCES_DIR, SAMPLE_CDX_SBOM
from tests.testutils import resolve_local_schema_path, validate_json


def test_local_schema_aliases_resolve_to_vendored_resources():
    """Resolve the vendored schema aliases used by local validation."""
    assert resolve_local_schema_path("spdx.schema.json", RESOURCES_DIR).name == (
        "spdx.schema.json"
    )
    assert (
        resolve_local_schema_path(
            "http://cyclonedx.org/schema/spdx.schema.json",
            RESOURCES_DIR,
        ).name
        == "spdx.schema.json"
    )
    assert (
        resolve_local_schema_path(
            "jsf-0.82.schema.json#/definitions/signature",
            RESOURCES_DIR,
        ).name
        == "jsf-0.82.schema.json"
    )


def test_validate_json_uses_only_local_schema_resources():
    """Validate a sample CycloneDX SBOM without network access."""
    validate_json(SAMPLE_CDX_SBOM, RESOURCES_DIR / "cdx_bom-1.4.schema.json")


================================================
FILE: tests/test_store_batching.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Focused tests for batched store and derivation loading."""

import json
import subprocess
from types import SimpleNamespace

import pytest

from common.errors import NixCommandError
from sbomnix import derivation as sbomnix_derivation


def test_load_many_batches_nix_derivation_show_and_preserves_outputs(monkeypatch):
    calls = []

    def fake_exec_cmd(cmd, **kwargs):
        calls.append((cmd, kwargs))
        return SimpleNamespace(
            stdout=json.dumps(
                {
                    "derivations": {
                        "/nix/store/first.drv": {
                            "name": "first",
                            "env": {
                                "name": "first",
                                "pname": "first",
                                "version": "1.0",
                            },
                            "outputs": {
                                "out": {"path": "/nix/store/first-out"},
                            },
                        },
                        "/nix/store/second.drv": {
                            "name": "second",
                            "env": {
                                "name": "second",
                                "pname": "second",
                                "version": "2.0",
                            },
                            "outputs": {
                                "out": {"path": "/nix/store/second-out"},
                            },
                        },
                    },
                    "version": 4,
                }
            ),
            returncode=0,
            stderr="",
        )

    monkeypatch.setattr(sbomnix_derivation, "exec_cmd", fake_exec_cmd)

    loaded = sbomnix_derivation.load_many(
        ["/nix/store/first.drv", "/nix/store/second.drv"],
        output_paths_by_drv={
            "/nix/store/first.drv": {"/nix/store/first-extra-out"},
            "/nix/store/second.drv": {"/nix/store/second-extra-out"},
        },
        batch_size=50,
    )

    assert calls == [
        (
            [
                "nix",
                "derivation",
                "show",
                "/nix/store/first.drv",
                "/nix/store/second.drv",
                "--extra-experimental-features",
                "flakes",
                "--extra-experimental-features",
                "nix-command",
            ],
            {},
        )
    ]
    assert loaded["/nix/store/first.drv"].outputs == [
        "/nix/store/first-extra-out",
        "/nix/store/first-out",
    ]
    assert loaded["/nix/store/second.drv"].outputs == [
        "/nix/store/second-extra-out",
        "/nix/store/second-out",
    ]


def test_load_many_supports_output_path_queries(monkeypatch):
    calls = []

    def fake_exec_cmd(cmd, **kwargs):
        calls.append((cmd, kwargs))
        return SimpleNamespace(
            stdout=json.dumps(
                {
                    "derivations": {
                        "/nix/store/canonical.drv": {
                            "name": "first",
                            "env": {
                                "name": "first",
                                "pname": "first",
                                "version": "1.0",
                            },
                            "outputs": {
                                "out": {"path": "/nix/store/first-out"},
                                "dev": {"path": "/nix/store/first-dev"},
                            },
                        },
                    },
                    "version": 4,
                }
            ),
            returncode=0,
            stderr="",
        )

    monkeypatch.setattr(sbomnix_derivation, "exec_cmd", fake_exec_cmd)

    loaded = sbomnix_derivation.load_many(
        ["/nix/store/first-out"],
        output_paths_by_drv={
            "/nix/store/first-out": {"/nix/store/first-out"},
        },
        batch_size=50,
    )

    assert calls == [
        (
            [
                "nix",
                "derivation",
                "show",
                "/nix/store/first-out",
                "--extra-experimental-features",
                "flakes",
                "--extra-experimental-features",
                "nix-command",
            ],
            {},
        )
    ]
    assert list(loaded) == ["/nix/store/canonical.drv"]
    assert loaded["/nix/store/canonical.drv"].store_path == "/nix/store/canonical.drv"
    assert loaded["/nix/store/canonical.drv"].outputs == [
        "/nix/store/first-dev",
        "/nix/store/first-out",
    ]


def test_load_many_maps_output_queries_from_derivation_env(monkeypatch):
    def fake_exec_cmd(cmd, **kwargs):
        return SimpleNamespace(
            stdout=json.dumps(
                {
                    "derivations": {
                        "/nix/store/fixed.drv": {
                            "name": "fixed",
                            "env": {
                                "name": "fixed",
                                "out": "/nix/store/fixed-out",
                                "outputs": "out",
                                "pname": "fixed",
                                "version": "1.0",
                            },
                            "outputs": {
                                "out": {
                                    "hash": "sha256-test",
                                    "method": "flat",
                                },
                            },
                        },
                    },
                    "version": 4,
                }
            ),
            returncode=0,
            stderr="",
        )

    monkeypatch.setattr(sbomnix_derivation, "exec_cmd", fake_exec_cmd)

    loaded = sbomnix_derivation.load_many(
        ["/nix/store/fixed-out"],
        output_paths_by_drv={
            "/nix/store/fixed-out": {"/nix/store/fixed-out"},
        },
        batch_size=50,
    )

    assert list(loaded) == ["/nix/store/fixed.drv"]
    assert loaded["/nix/store/fixed.drv"].outputs == ["/nix/store/fixed-out"]


def test_load_many_can_ignore_missing_output_derivations(monkeypatch):
    calls = []

    def fake_exec_cmd(cmd, **kwargs):
        calls.append((cmd, kwargs))
        query_paths = cmd[3:-4]
        if "/nix/store/missing-out" in query_paths:
            assert kwargs == {"raise_on_error": False, "log_error": False}
            return None
        return SimpleNamespace(
            stdout=json.dumps(
                {
                    "derivations": {
                        "/nix/store/good.drv": {
                            "name": "good",
                            "env": {
                                "name": "good",
                                "pname": "good",
                                "version": "1.0",
                            },
                            "outputs": {
                                "out": {"path": "/nix/store/good-out"},
                            },
                        },
                    },
                    "version": 4,
                }
            ),
            returncode=0,
            stderr="",
        )

    monkeypatch.setattr(sbomnix_derivation, "exec_cmd", fake_exec_cmd)

    loaded = sbomnix_derivation.load_many(
        ["/nix/store/good-out", "/nix/store/missing-out"],
        output_paths_by_drv={
            "/nix/store/good-out": {"/nix/store/good-out"},
            "/nix/store/missing-out": {"/nix/store/missing-out"},
        },
        batch_size=50,
        ignore_missing=True,
    )

    assert list(loaded) == ["/nix/store/good.drv"]
    assert [call[0][3:-4] for call in calls] == [
        ["/nix/store/good-out", "/nix/store/missing-out"],
        ["/nix/store/good-out"],
        ["/nix/store/missing-out"],
    ]


def test_load_recursive_wraps_nix_command_failures(monkeypatch):
    def fail_exec_cmd(cmd):
        raise subprocess.CalledProcessError(
            returncode=1,
            cmd=cmd,
            stderr="recursive derivation show failed",
        )

    monkeypatch.setattr(sbomnix_derivation, "exec_cmd", fail_exec_cmd)

    with pytest.raises(NixCommandError, match="recursive derivation show failed"):
        sbomnix_derivation.load_recursive(
            "/nix/store/11111111111111111111111111111111-target-1.0.drv"
        )


def test_load_rejects_empty_derivation_metadata(monkeypatch):
    monkeypatch.setattr(
        sbomnix_derivation,
        "exec_cmd",
        lambda _cmd: SimpleNamespace(stdout="{}", stderr="", returncode=0),
    )

    with pytest.raises(NixCommandError, match="No derivation metadata returned"):
        sbomnix_derivation.load(
            "/nix/store/11111111111111111111111111111111-target-1.0",
            None,
        )


def test_load_recursive_rejects_empty_derivation_metadata(monkeypatch):
    monkeypatch.setattr(
        sbomnix_derivation,
        "exec_cmd",
        lambda _cmd: SimpleNamespace(stdout="{}", stderr="", returncode=0),
    )

    with pytest.raises(NixCommandError, match="No derivation metadata returned"):
        sbomnix_derivation.load_recursive(
            "/nix/store/11111111111111111111111111111111-target-1.0.drv"
        )


================================================
FILE: tests/test_temp_sbom_generation.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Focused tests for temporary SBOM generation and cleanup."""

from pathlib import Path
from types import SimpleNamespace

import pytest

from sbomnix import cli_utils as sbomnix_cli_utils
from vulnxscan import vulnxscan_cli


def test_vulnxscan_cleans_generated_tempfiles_on_failure(tmp_path, monkeypatch):
    sbom_cdx_path = tmp_path / "generated.cdx.json"
    sbom_csv_path = tmp_path / "generated.csv"
    sbom_cdx_path.write_text("{}", encoding="utf-8")
    sbom_csv_path.write_text("", encoding="utf-8")

    args = SimpleNamespace(
        TARGET="target",
        verbose=0,
        out="vulns.csv",
        buildtime=False,
        sbom=False,
        whitelist=None,
        triage=False,
        nixprs=False,
    )

    class FailingScanner:
        def scan_vulnix(self, _target_path, _buildtime):
            return None

        def scan_grype(self, _sbom_path):
            raise RuntimeError("scan failed")

        def scan_osv(self, _sbom_path):
            raise AssertionError("scan_osv should not run after grype failure")

        def report(self, _args, _sbom_csv_path):
            raise AssertionError("report should not run after scan failure")

    monkeypatch.setattr(vulnxscan_cli, "getargs", lambda: args)
    monkeypatch.setattr(vulnxscan_cli, "set_log_verbosity", lambda _verbosity: None)
    monkeypatch.setattr(
        vulnxscan_cli, "exit_unless_command_exists", lambda _command: None
    )
    monkeypatch.setattr(
        vulnxscan_cli,
        "resolve_nix_target",
        lambda _target, buildtime=False: sbomnix_cli_utils.ResolvedNixTarget(
            path="/nix/store/target"
        ),
    )
    monkeypatch.setattr(
        vulnxscan_cli,
        "generate_temp_sbom",
        lambda _target_path, _buildtime, **_kwargs: sbomnix_cli_utils.GeneratedSbom(
            cdx_path=sbom_cdx_path,
            csv_path=sbom_csv_path,
        ),
    )
    monkeypatch.setattr(vulnxscan_cli, "VulnScan", FailingScanner)

    with pytest.raises(RuntimeError, match="scan failed"):
        vulnxscan_cli.main()

    assert not sbom_cdx_path.exists()
    assert not sbom_csv_path.exists()


def test_generate_temp_sbom_without_csv_returns_only_cdx_path(tmp_path, monkeypatch):
    sbom_cdx_path = tmp_path / "generated.cdx.json"

    class FakeTempFile:
        def __init__(self, path):
            self.name = path.as_posix()

        def __enter__(self):
            Path(self.name).touch()
            return self

        def __exit__(self, exc_type, exc, traceback):
            return False

    class DummySbomBuilder:
        def __init__(self, _target_path, _buildtime, include_meta=False):
            assert include_meta is False

        def to_cdx(self, sbom_path, printinfo=False):
            Path(sbom_path).write_text("{}", encoding="utf-8")
            assert printinfo is False

        def to_csv(self, _sbom_path, loglevel=None):
            raise AssertionError("to_csv should not run when include_csv is False")

    monkeypatch.setattr(
        sbomnix_cli_utils,
        "NamedTemporaryFile",
        lambda **_kwargs: FakeTempFile(sbom_cdx_path),
    )
    monkeypatch.setattr(sbomnix_cli_utils, "SbomBuilder", DummySbomBuilder)

    generated = sbomnix_cli_utils.generate_temp_sbom(
        "/nix/store/target",
        buildtime=False,
        prefix="nixdeps_",
        cdx_suffix=".cdx.json",
    )

    assert generated == sbomnix_cli_utils.GeneratedSbom(
        cdx_path=sbom_cdx_path,
        csv_path=None,
    )
    assert sbom_cdx_path.exists()
    generated.cleanup()
    assert not sbom_cdx_path.exists()


def test_generate_temp_sbom_cleans_tempfiles_on_generation_failure(
    tmp_path, monkeypatch
):
    sbom_cdx_path = tmp_path / "generated.cdx.json"
    sbom_csv_path = tmp_path / "generated.csv"

    class FakeTempFile:
        def __init__(self, path):
            self.name = path.as_posix()

        def __enter__(self):
            Path(self.name).touch()
            return self

        def __exit__(self, exc_type, exc, traceback):
            return False

    class FailingSbomBuilder:
        def __init__(self, _target_path, _buildtime, include_meta=False):
            assert include_meta is False

        def to_cdx(self, sbom_path, printinfo=False):
            Path(sbom_path).write_text("{}", encoding="utf-8")
            assert printinfo is False

        def to_csv(self, sbom_path, loglevel=None):
            Path(sbom_path).write_text("", encoding="utf-8")
            assert loglevel is not None
            raise RuntimeError("sbom csv generation failed")

    monkeypatch.setattr(
        sbomnix_cli_utils,
        "NamedTemporaryFile",
        lambda **kwargs: FakeTempFile(
            sbom_cdx_path if kwargs["suffix"] == ".json" else sbom_csv_path
        ),
    )
    monkeypatch.setattr(sbomnix_cli_utils, "SbomBuilder", FailingSbomBuilder)

    with pytest.raises(RuntimeError, match="sbom csv generation failed"):
        sbomnix_cli_utils.generate_temp_sbom(
            "/nix/store/target",
            buildtime=False,
            prefix="vulnxscan_",
            cdx_suffix=".json",
            include_csv=True,
        )

    assert not sbom_cdx_path.exists()
    assert not sbom_csv_path.exists()


def test_generate_temp_sbom_cleans_first_tempfile_if_second_creation_fails(
    tmp_path, monkeypatch
):
    sbom_cdx_path = tmp_path / "generated.cdx.json"

    class FakeTempFile:
        def __init__(self, path):
            self.name = path.as_posix()

        def __enter__(self):
            Path(self.name).touch()
            return self

        def __exit__(self, exc_type, exc, traceback):
            return False

    class DummySbomBuilder:
        def __init__(self, _target_path, _buildtime, include_meta=False):
            assert include_meta is False

        def to_cdx(self, _sbom_path, printinfo=False):
            raise AssertionError("to_cdx should not run if csv tempfile creation fails")

        def to_csv(self, _sbom_path, loglevel=None):
            raise AssertionError("to_csv should not run if csv tempfile creation fails")

    def fake_named_temporary_file(**kwargs):
        if kwargs["suffix"] == ".json":
            return FakeTempFile(sbom_cdx_path)
        raise RuntimeError("csv tempfile creation failed")

    monkeypatch.setattr(
        sbomnix_cli_utils,
        "NamedTemporaryFile",
        fake_named_temporary_file,
    )
    monkeypatch.setattr(sbomnix_cli_utils, "SbomBuilder", DummySbomBuilder)

    with pytest.raises(RuntimeError, match="csv tempfile creation failed"):
        sbomnix_cli_utils.generate_temp_sbom(
            "/nix/store/target",
            buildtime=False,
            prefix="vulnxscan_",
            cdx_suffix=".json",
            include_csv=True,
        )

    assert not sbom_cdx_path.exists()


================================================
FILE: tests/test_vulnix_test_support.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Tests for the vulnix test wrapper helpers."""

from __future__ import annotations

import os
import shutil
import subprocess

import pytest

from tests import vulnix_test_support


def test_build_vulnix_test_env_prepends_wrapper_dir(tmp_path):
    """Wrapper dir should take precedence on PATH for test subprocesses."""
    wrapper_dir = tmp_path / "bin"
    config = vulnix_test_support.VulnixTestConfig(
        wrapper_dir=wrapper_dir,
        effective_mode="dummy",
        effective_cache_dir=None,
        real_vulnix=None,
    )
    env = vulnix_test_support.build_vulnix_test_env(
        {"PATH": "/usr/bin"},
        config=config,
    )
    assert env["PATH"] == os.pathsep.join([str(wrapper_dir), "/usr/bin"])
    assert env["SBOMNIX_TEST_VULNIX_EFFECTIVE_MODE"] == "dummy"
    assert env["SBOMNIX_TEST_REAL_VULNIX"] == ""
    assert "SBOMNIX_TEST_VULNIX_EFFECTIVE_CACHE_DIR" not in env


def test_dummy_vulnix_wrapper_returns_empty_json(tmp_path):
    """Dummy mode should behave like a no-op vulnix process."""
    config = vulnix_test_support.configure_vulnix_for_tests(
        tmp_root=tmp_path,
        effective_mode="dummy",
        cache_dir=tmp_path / "cache",
        real_vulnix=None,
    )
    env = vulnix_test_support.build_vulnix_test_env({}, config=config)
    ret = subprocess.run(
        [str(config.wrapper_dir / "vulnix"), "--json"],
        check=True,
        capture_output=True,
        encoding="utf-8",
        env=env,
    )
    assert ret.stdout == "[]"
    assert ret.stderr == ""


def test_real_vulnix_wrapper_forwards_cache_dir_and_args(tmp_path):
    """Real mode wrapper should exec the underlying binary with the cache dir."""
    real_vulnix = tmp_path / "real-vulnix"
    real_vulnix.write_text(
        """#!/bin/sh
set -eu
printf '%s\\n' "$@"
""",
        encoding="utf-8",
    )
    real_vulnix.chmod(0o755)
    cache_dir = tmp_path / "cache"
    cache_dir.mkdir()
    (cache_dir / "Data.fs").write_text("ready", encoding="utf-8")
    config = vulnix_test_support.configure_vulnix_for_tests(
        tmp_root=tmp_path,
        effective_mode="real",
        cache_dir=cache_dir,
        real_vulnix=real_vulnix.as_posix(),
    )
    env = vulnix_test_support.build_vulnix_test_env({}, config=config)
    env = {"PATH": os.environ.get("PATH", os.defpath), **env}
    ret = subprocess.run(
        [str(config.wrapper_dir / "vulnix"), "target", "-C", "--json"],
        check=True,
        capture_output=True,
        encoding="utf-8",
        env=env,
    )
    assert ret.stdout.splitlines() == [
        "--cache-dir",
        cache_dir.as_posix(),
        "target",
        "-C",
        "--json",
    ]


def test_configure_vulnix_for_tests_rejects_unknown_mode(tmp_path):
    """configure_vulnix_for_tests should only accept dummy or real modes."""
    with pytest.raises(ValueError, match="invalid effective vulnix mode"):
        vulnix_test_support.configure_vulnix_for_tests(
            tmp_root=tmp_path,
            effective_mode="surprise",
            cache_dir=tmp_path / "cache",
            real_vulnix=None,
        )


def test_real_vulnix_wrapper_shows_clear_error_when_binary_missing(tmp_path):
    """Real mode wrapper should fail with a readable message if env is stale."""
    config = vulnix_test_support.configure_vulnix_for_tests(
        tmp_root=tmp_path,
        effective_mode="dummy",
        cache_dir=tmp_path / "cache",
        real_vulnix=None,
    )
    env = {
        "PATH": os.environ.get("PATH", os.defpath),
        "SBOMNIX_TEST_VULNIX_EFFECTIVE_MODE": "real",
    }
    ret = subprocess.run(
        [str(config.wrapper_dir / "vulnix"), "--json"],
        check=False,
        capture_output=True,
        encoding="utf-8",
        env=env,
    )
    assert ret.returncode != 0
    assert "SBOMNIX_TEST_REAL_VULNIX is empty" in ret.stderr


def test_ensure_real_vulnix_cache_surfaces_warmup_errors(tmp_path):
    """Warm-up failures should include stderr details in the raised error."""
    real_vulnix = tmp_path / "fake-vulnix"
    real_vulnix.write_text(
        """#!/bin/sh
set -eu
echo 'vulnix boom' >&2
exit 7
""",
        encoding="utf-8",
    )
    real_vulnix.chmod(0o755)
    result = tmp_path / "build" / "result"
    result.parent.mkdir(parents=True, exist_ok=True)
    result.write_text("placeholder", encoding="utf-8")

    with pytest.raises(
        RuntimeError, match="vulnix cache warm-up scan failed: vulnix boom"
    ):
        vulnix_test_support.ensure_real_vulnix_cache(
            tmp_path / "cache",
            build_root=tmp_path / "build",
            real_vulnix=real_vulnix.as_posix(),
            test_derivation=tmp_path / "derivation.nix",
        )


@pytest.mark.real_vulnix
def test_real_vulnix_wrapper_executes_real_binary(tmp_path):
    """Opt-in smoke test that executes the real vulnix binary via the wrapper."""
    real_vulnix = shutil.which("vulnix")
    if real_vulnix is None:
        pytest.skip("'vulnix' is not available in PATH")
    cache_dir = tmp_path / "real-cache"
    config = vulnix_test_support.configure_vulnix_for_tests(
        tmp_root=tmp_path,
        effective_mode="real",
        cache_dir=cache_dir,
        real_vulnix=real_vulnix,
    )
    env = vulnix_test_support.build_vulnix_test_env({}, config=config)
    env = {"PATH": os.environ.get("PATH", os.defpath), **env}
    ret = subprocess.run(
        [str(config.wrapper_dir / "vulnix"), "--version"],
        check=True,
        capture_output=True,
        encoding="utf-8",
        env=env,
    )
    assert "vulnix" in ret.stdout.lower() or "vulnix" in ret.stderr.lower()


================================================
FILE: tests/test_vulnxscan_engine.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Focused tests for vulnxscan parser and reporting helpers."""

from pathlib import Path
from types import SimpleNamespace

import pandas as pd
import pytest

from vulnxscan.parsers import parse_grype_json, parse_vulnix_json
from vulnxscan.reporting import build_report_dataframe, write_reports
from vulnxscan.vulnscan import VulnScan


def test_parse_vulnix_json_updates_cvss_cache():
    """Populate vulnerability rows and severity cache from vulnix JSON."""
    cvss_cache = {}

    df = parse_vulnix_json(
        '[{"pname":"hello","version":"1.0","affected_by":["CVE-1"],'
        '"cvssv3_basescore":{"CVE-1":"7.5"}}]',
        cvss_cache=cvss_cache,
    )

    assert df.to_dict("records") == [
        {
            "package": "hello",
            "version": "1.0",
            "vuln_id": "CVE-1",
            "severity": "7.5",
            "scanner": "vulnix",
        }
    ]
    assert cvss_cache == {"CVE-1": "7.5"}


def test_parse_grype_json_prefers_cvss_v3_scores():
    """Select CVSS v3 severity when grype reports multiple CVSS entries."""
    cvss_cache = {}
    json_str = """
    {
      "matches": [
        {
          "artifact": {"name": "hello", "version": "1.0"},
          "vulnerability": {
            "id": "CVE-2",
            "cvss": [
              {"version": "2.0", "metrics": {"baseScore": 4.0}},
              {"version": "3.1", "metrics": {"baseScore": 9.8}}
            ]
          }
        }
      ]
    }
    """

    df = parse_grype_json(json_str, cvss_cache=cvss_cache)

    assert df.to_dict("records") == [
        {
            "package": "hello",
            "version": "1.0",
            "vuln_id": "CVE-2",
            "severity": 9.8,
            "scanner": "grype",
        }
    ]
    assert cvss_cache == {"CVE-2": 9.8}


def test_build_report_dataframe_merges_scanner_counts():
    """Aggregate scanner findings into the final report layout."""
    df_report = build_report_dataframe(
        df_vulnix=pd.DataFrame(
            [
                {
                    "package": "hello",
                    "version": "1.0",
                    "vuln_id": "CVE-1",
                    "severity": "7.5",
                    "scanner": "vulnix",
                }
            ]
        ),
        df_grype=pd.DataFrame(
            [
                {
                    "package": "hello",
                    "version": "1.0",
                    "vuln_id": "CVE-1",
                    "severity": "7.5",
                    "scanner": "grype",
                }
            ]
        ),
        df_osv=pd.DataFrame(),
    )

    assert df_report.to_dict("records") == [
        {
            "vuln_id": "CVE-1",
            "url": "https://nvd.nist.gov/vuln/detail/CVE-1",
            "package": "hello",
            "version": "1.0",
            "severity": "7.5",
            "grype": "1",
            "osv": "0",
            "vulnix": "1",
            "sum": 2,
            "sortcol": df_report.iloc[0]["sortcol"],
        }
    ]


def test_write_reports_writes_triage_report(tmp_path):
    """Write both the main report and the derived triage report files."""
    main_out = tmp_path / "vulns.csv"
    df_report = pd.DataFrame([{"vuln_id": "CVE-1"}])
    df_triaged = pd.DataFrame([{"vuln_id": "CVE-1", "classify": "triaged"}])

    write_reports(df_report, main_out, df_triaged=df_triaged)

    assert main_out.exists()
    assert (tmp_path / "vulns.triage.csv").exists()
    assert Path(main_out).read_text(encoding="utf-8")


@pytest.mark.parametrize(
    ("buildtime", "expected_cmd"),
    [
        (False, ["vulnix", "/nix/store/my target", "-C", "--json"]),
        (True, ["vulnix", "/nix/store/my target", "--json"]),
    ],
)
def test_scan_vulnix_uses_argv_lists(monkeypatch, buildtime, expected_cmd):
    """Build vulnix subprocess argv without splitting whitespace-containing paths."""
    calls = []
    parsed = []

    def fake_exec_cmd(cmd, **kwargs):
        calls.append((cmd, kwargs))
        return SimpleNamespace(
            stdout='[{"pname": "hello", "version": "1.0", "affected_by": []}]',
            stderr="",
            returncode=0,
        )

    monkeypatch.setattr("vulnxscan.vulnscan.exec_cmd", fake_exec_cmd)
    monkeypatch.setattr(
        VulnScan,
        "_parse_vulnix",
        lambda self, stdout: parsed.append(stdout),
    )

    VulnScan().scan_vulnix("/nix/store/my target", buildtime=buildtime)

    assert calls == [
        (
            expected_cmd,
            {"raise_on_error": False, "return_error": True, "log_error": False},
        )
    ]
    assert parsed == ['[{"pname": "hello", "version": "1.0", "affected_by": []}]']


================================================
FILE: tests/test_vulnxscan_triage.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Unit tests for vulnxscan triage and lookup helpers."""

from types import SimpleNamespace

import pandas as pd
import pytest

from vulnxscan.github_prs import GitHubPrLookup
from vulnxscan.repology_lookup import RepologyVulnerabilityLookup
from vulnxscan.triage import classify_vulnerability, triage_vulnerabilities


class FakeRepologyLookup:
    def __init__(self):
        self.vulnerable_checks = []
        self.query_inputs = []

    def is_vulnerable(self, package, version, vuln_id=None):
        self.vulnerable_checks.append((package, str(version), vuln_id))
        return str(version) == "1.0.0"

    def query_repology_versions(self, df_vuln_pkgs):
        self.query_inputs.append(df_vuln_pkgs.copy(deep=True))
        return pd.DataFrame(
            [
                {
                    "vuln_id": "CVE-2024-1",
                    "url": "https://nvd.nist.gov/vuln/detail/CVE-2024-1",
                    "package": "openssl",
                    "severity": "7.0",
                    "version_local": "1.0.0",
                    "version_nixpkgs": "1.1.0",
                    "version_upstream": "1.2.0",
                    "package_repology": "openssl",
                    "sortcol": "2024A0000000001",
                }
            ]
        )


class FakeGitHubLookup:
    def __init__(self):
        self.rows = []

    def find_nixpkgs_prs(self, row):
        self.rows.append(row)
        return "https://github.com/NixOS/nixpkgs/pull/1"


class FakeAdapter:
    def __init__(self):
        self.queries = []

    def query(self, repology_query):
        self.queries.append(repology_query)
        return pd.DataFrame(
            [
                {
                    "package": "tiff",
                    "version": "4.5.0",
                    "status": "newest",
                    "newest_upstream_release": "4.5.1",
                },
                {
                    "package": "tiff-tools",
                    "version": "4.4.0",
                    "status": "newest",
                    "newest_upstream_release": "4.4.2",
                },
            ]
        )


def test_classify_vulnerability_marks_fixable_nixpkgs_update():
    lookup = FakeRepologyLookup()
    row = SimpleNamespace(
        vuln_id="CVE-2024-1",
        package_repology="openssl",
        version_local="1.0.0",
        version_nixpkgs="1.1.0",
        version_upstream="1.2.0",
    )

    classification = classify_vulnerability(row, repology_lookup=lookup)

    assert classification == "fix_update_to_version_nixpkgs"
    assert lookup.vulnerable_checks == [
        ("openssl", "1.0.0", "CVE-2024-1"),
        ("openssl", "1.1.0", "CVE-2024-1"),
    ]


def test_triage_vulnerabilities_groups_rows_and_adds_nixpkgs_prs():
    repology_lookup = FakeRepologyLookup()
    github_lookup = FakeGitHubLookup()
    df_report = pd.DataFrame(
        [
            {
                "vuln_id": "CVE-2024-1",
                "package": "openssl",
                "severity": "7.0",
                "version": "1.0.0",
                "url": "https://nvd.nist.gov/vuln/detail/CVE-2024-1",
                "sortcol": "2024A0000000001",
            },
            {
                "vuln_id": "CVE-2024-1",
                "package": "openssl",
                "severity": "7.0",
                "version": "1.0.0",
                "url": "https://nvd.nist.gov/vuln/detail/CVE-2024-1",
                "sortcol": "2024A0000000001",
            },
        ]
    )

    triaged = triage_vulnerabilities(
        df_report,
        True,
        repology_lookup=repology_lookup,
        github_lookup=github_lookup,
    )

    assert repology_lookup.query_inputs[0]["count"].tolist() == [2]
    assert triaged["classify"].tolist() == ["fix_update_to_version_nixpkgs"]
    assert triaged["nixpkgs_pr"].tolist() == ["https://github.com/NixOS/nixpkgs/pull/1"]


def test_github_pr_lookup_queries_vuln_and_version_matches():
    queries = []
    lookup = GitHubPrLookup(
        session=SimpleNamespace(get=None), sleeper=lambda _delay: None
    )

    def fake_query(query_str, delay=60):
        queries.append((query_str, delay))
        return {
            "total_count": 1,
            "items": [
                {"html_url": f"https://github.com/NixOS/nixpkgs/pull/{len(queries)}"}
            ],
        }

    lookup.query = fake_query
    row = SimpleNamespace(
        vuln_id="CVE-2024-1",
        classify="fix_update_to_version_nixpkgs",
        version_nixpkgs="1.2.3",
        version_upstream="",
        package="openssl",
        whitelist=False,
    )

    prs = lookup.find_nixpkgs_prs(row)

    assert queries == [
        ("repo:NixOS/nixpkgs is:pr is:unmerged is:open CVE-2024-1", 60),
        ("repo:NixOS/nixpkgs is:pr is:merged CVE-2024-1", 60),
        (
            "repo:NixOS/nixpkgs is:pr is:unmerged is:open openssl in:title 1.2.3 in:title",
            60,
        ),
        ("repo:NixOS/nixpkgs is:pr is:merged openssl in:title 1.2.3 in:title", 60),
    ]
    assert prs == (
        "https://github.com/NixOS/nixpkgs/pull/1 \n"
        "https://github.com/NixOS/nixpkgs/pull/2 \n"
        "https://github.com/NixOS/nixpkgs/pull/3 \n"
        "https://github.com/NixOS/nixpkgs/pull/4"
    )


def test_query_repology_versions_prefers_exact_version_match():
    adapter = FakeAdapter()
    lookup = RepologyVulnerabilityLookup(adapter=adapter, cve_query=lambda *_args: None)
    df_vuln_pkgs = pd.DataFrame(
        [
            {
                "vuln_id": "CVE-2024-2",
                "url": "https://nvd.nist.gov/vuln/detail/CVE-2024-2",
                "package": "libtiff",
                "severity": "5.0",
                "version": "4.5.0",
                "sortcol": "2024A0000000002",
                "count": 1,
            }
        ]
    )

    result = lookup.query_repology_versions(df_vuln_pkgs)

    assert len(adapter.queries) == 1
    assert result.to_dict("records") == [
        {
            "vuln_id": "CVE-2024-2",
            "url": "https://nvd.nist.gov/vuln/detail/CVE-2024-2",
            "package": "libtiff",
            "severity": "5.0",
            "version_local": "4.5.0",
            "version_nixpkgs": "4.5.0",
            "version_upstream": "4.5.1",
            "package_repology": "tiff",
            "sortcol": "2024A0000000002",
        }
    ]


def test_query_repology_rejects_unknown_match_type():
    lookup = RepologyVulnerabilityLookup(
        adapter=FakeAdapter(),
        cve_query=lambda *_args: None,
    )

    with pytest.raises(ValueError, match="Unknown match_type: 'bad'"):
        lookup.query_repology("openssl", match_type="bad")


================================================
FILE: tests/test_whitelist.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Offline tests for whitelist handling."""

from common.df import df_from_csv_file
from tests.testpaths import RESOURCES_DIR
from tests.testutils import df_difference, df_to_string
from vulnxscan.whitelist import df_apply_whitelist, load_whitelist


def test_whitelist():
    """Test applying whitelist to vulnerability csv file."""
    whitelist_csv = RESOURCES_DIR / "whitelist.csv"
    assert whitelist_csv.exists()
    vulns_csv = RESOURCES_DIR / "vulns.csv"
    assert vulns_csv.exists()

    df_whitelist = load_whitelist(whitelist_csv)
    assert df_whitelist is not None
    df_vulns = df_from_csv_file(vulns_csv)
    assert df_vulns is not None

    df_vuln_id_copy = df_vulns.copy()[["vuln_id", "package"]]
    df_apply_whitelist(df_whitelist, df_vuln_id_copy)

    df_diff = df_difference(df_vulns.astype(str), df_vuln_id_copy.astype(str))
    assert df_diff.empty, df_to_string(df_diff)


================================================
FILE: tests/testpaths.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Shared paths for the test suite."""

from pathlib import Path

TESTS_DIR = Path(__file__).resolve().parent
RESOURCES_DIR = TESTS_DIR / "resources"
REPOROOT = TESTS_DIR.parent
SRCDIR = REPOROOT / "src"

COMPARE_DEPS = TESTS_DIR / "compare_deps.py"
COMPARE_SBOMS = TESTS_DIR / "compare_sboms.py"
SAMPLE_CDX_SBOM = RESOURCES_DIR / "sample_cdx_sbom.json"

SBOMNIX = SRCDIR / "sbomnix" / "main.py"
NIXGRAPH = SRCDIR / "nixgraph" / "main.py"
NIXMETA = SRCDIR / "nixmeta" / "main.py"
PROVENANCE = SRCDIR / "provenance" / "main.py"
NIX_OUTDATED = SRCDIR / "nixupdate" / "nix_outdated.py"
VULNXSCAN = SRCDIR / "vulnxscan" / "vulnxscan_cli.py"
REPOLOGY_CLI = SRCDIR / "repology" / "repology_cli.py"


================================================
FILE: tests/testutils.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Shared helper utilities for the test suite."""

import json
from pathlib import Path
from urllib.parse import urldefrag, urlparse

import jsonschema
import referencing
import referencing.retrieval

LOCAL_SCHEMA_ALIASES = {
    "spdx.schema.json": "spdx.schema.json",
    "http://cyclonedx.org/schema/spdx.schema.json": "spdx.schema.json",
    "jsf-0.82.schema.json": "jsf-0.82.schema.json",
    "http://cyclonedx.org/schema/jsf-0.82.schema.json": "jsf-0.82.schema.json",
}


def resolve_local_schema_path(uri, schema_dir):
    """Resolve a schema reference to a local file under ``schema_dir``."""
    schema_dir = Path(schema_dir)
    base_uri, _fragment = urldefrag(uri)
    if base_uri in LOCAL_SCHEMA_ALIASES:
        filename = LOCAL_SCHEMA_ALIASES[base_uri]
    else:
        parsed = urlparse(base_uri)
        filename = Path(parsed.path or base_uri).name
        filename = LOCAL_SCHEMA_ALIASES.get(filename, filename)
    path = schema_dir / filename
    if not path.exists():
        raise FileNotFoundError(f"Local schema not found for '{uri}': {path}")
    return path


def create_local_schema_retriever(schema_dir):
    """Create a cached local schema retriever for ``referencing``."""

    @referencing.retrieval.to_cached_resource()
    def _retrieve(uri):
        return resolve_local_schema_path(uri, schema_dir).read_text(encoding="utf-8")

    return _retrieve


def validate_json(file_path, schema_path):
    """Validate json file matches schema."""
    schema_path = Path(schema_path)
    with (
        open(file_path, encoding="utf-8") as json_file,
        open(
            schema_path,
            encoding="utf-8",
        ) as schema_file,
    ):
        json_obj = json.load(json_file)
        schema_obj = json.load(schema_file)
        registry = referencing.Registry(
            retrieve=create_local_schema_retriever(schema_path.parent)
        )
        jsonschema.validate(json_obj, schema_obj, registry=registry)


def df_to_string(df):
    """Convert dataframe to string."""
    return (
        "\n"
        + df.to_string(max_rows=None, max_cols=None, index=False, justify="left")
        + "\n"
    )


def df_difference(df_left, df_right):
    """Return dataframe that represents diff of two dataframes."""
    df_right = df_right.astype(df_left.dtypes.to_dict())
    df = df_left.merge(
        df_right,
        how="outer",
        indicator=True,
    )
    df = df[df["_merge"] != "both"]
    cols = df.columns.tolist()
    cols = cols[-1:] + cols[:-1]
    return df[cols]


================================================
FILE: tests/vulnix_test_support.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2026 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

"""Helpers for choosing the real or dummy vulnix binary in tests."""

from __future__ import annotations

import fcntl
import os
import shutil
import stat
import subprocess
from dataclasses import dataclass
from pathlib import Path

_WRAPPER_BASENAME = "vulnix"


@dataclass(frozen=True)
class VulnixTestConfig:
    """Resolved vulnix test execution configuration."""

    wrapper_dir: Path
    effective_mode: str
    effective_cache_dir: Path | None
    real_vulnix: str | None


def default_vulnix_cache_dir(env: dict[str, str] | None = None) -> Path:
    """Return the real vulnix cache dir for this environment."""
    env = os.environ if env is None else env
    cache_dir = env.get("SBOMNIX_TEST_VULNIX_CACHE_DIR")
    if cache_dir:
        return Path(cache_dir).expanduser()
    return Path("~/.cache/vulnix").expanduser()


def vulnix_cache_ready(cache_dir: Path) -> bool:
    """Return True when `cache_dir` already contains a usable vulnix DB."""
    data_file = cache_dir / "Data.fs"
    return data_file.is_file() and data_file.stat().st_size > 0


def write_vulnix_wrapper(wrapper_dir: Path) -> Path:
    """Create the test-only vulnix wrapper and return its path."""
    wrapper_dir.mkdir(parents=True, exist_ok=True)
    wrapper_path = wrapper_dir / _WRAPPER_BASENAME
    wrapper_path.write_text(
        """#!/bin/sh
set -eu

mode="${SBOMNIX_TEST_VULNIX_EFFECTIVE_MODE:?}"
if [ "$mode" = "dummy" ]; then
  printf '[]'
  exit 0
fi

real_vulnix="${SBOMNIX_TEST_REAL_VULNIX:-}"
if [ -z "$real_vulnix" ]; then
  echo "SBOMNIX_TEST_REAL_VULNIX is empty while vulnix test mode is real" >&2
  exit 2
fi
cache_dir="${SBOMNIX_TEST_VULNIX_EFFECTIVE_CACHE_DIR:-}"
if [ -n "$cache_dir" ]; then
  exec "$real_vulnix" --cache-dir "$cache_dir" "$@"
fi
exec "$real_vulnix" "$@"
""",
        encoding="utf-8",
    )
    wrapper_path.chmod(
        wrapper_path.stat().st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH
    )
    return wrapper_path


def build_vulnix_test_env(
    env: dict[str, str],
    *,
    config: VulnixTestConfig,
) -> dict[str, str]:
    """Return environment variables needed by the vulnix test wrapper."""
    env = env.copy()
    path_entries = [str(config.wrapper_dir)]
    path_entries.append(env.get("PATH", os.defpath))
    env["PATH"] = os.pathsep.join(path_entries)
    env["SBOMNIX_TEST_VULNIX_EFFECTIVE_MODE"] = config.effective_mode
    env["SBOMNIX_TEST_REAL_VULNIX"] = config.real_vulnix or ""
    if config.effective_cache_dir is not None:
        env["SBOMNIX_TEST_VULNIX_EFFECTIVE_CACHE_DIR"] = str(config.effective_cache_dir)
    else:
        env.pop("SBOMNIX_TEST_VULNIX_EFFECTIVE_CACHE_DIR", None)
    return env


def configure_vulnix_for_tests(
    *,
    tmp_root: Path,
    effective_mode: str,
    cache_dir: Path,
    real_vulnix: str | None = None,
) -> VulnixTestConfig:
    """Resolve vulnix wrapper mode and materialize the wrapper script."""
    if effective_mode not in {"dummy", "real"}:
        raise ValueError(
            f"invalid effective vulnix mode {effective_mode!r}; expected 'dummy' or 'real'"
        )
    if effective_mode == "real" and real_vulnix is None:
        real_vulnix = shutil.which("vulnix")
    if effective_mode == "real" and real_vulnix is None:
        raise RuntimeError(
            "real vulnix requested, but 'vulnix' is not available in PATH"
        )
    wrapper_dir = tmp_root / "tool-wrappers"
    write_vulnix_wrapper(wrapper_dir)
    return VulnixTestConfig(
        wrapper_dir=wrapper_dir,
        effective_mode=effective_mode,
        effective_cache_dir=cache_dir if effective_mode == "real" else None,
        real_vulnix=real_vulnix,
    )


def ensure_real_vulnix_cache(
    cache_dir: Path,
    *,
    build_root: Path,
    real_vulnix: str,
    test_derivation: Path,
) -> Path:
    """Warm a shared vulnix cache once for opt-in/manual real-vulnix test runs.

    The default test harness uses dummy vulnix and does not call this helper.
    """

    def _run_warmup_command(cmd: list[str], *, step: str) -> None:
        try:
            subprocess.run(
                cmd,
                check=True,
                capture_output=True,
                text=True,
            )
        except subprocess.CalledProcessError as exc:
            stderr = (exc.stderr or "").strip()
            stdout = (exc.stdout or "").strip()
            details = stderr or stdout or "no output captured"
            raise RuntimeError(f"{step} failed: {details}") from exc

    cache_dir.mkdir(parents=True, exist_ok=True)
    build_root.mkdir(parents=True, exist_ok=True)
    lock_path = build_root / "vulnix-cache.lock"
    with lock_path.open("w", encoding="utf-8") as lock_file:
        fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX)
        if vulnix_cache_ready(cache_dir):
            return cache_dir
        result_link = build_root / "result"
        if not result_link.exists():
            _run_warmup_command(
                ["nix-build", test_derivation.as_posix(), "-o", result_link.as_posix()],
                step="nix-build for vulnix cache warm-up",
            )
        _run_warmup_command(
            [
                real_vulnix,
                "--cache-dir",
                cache_dir.as_posix(),
                result_link.as_posix(),
                "-C",
                "--json",
            ],
            step="vulnix cache warm-up scan",
        )
    return cache_dir