Repository: teaxyz/chai
Branch: main
Commit: 5ba601243a93
Files: 142
Total size: 809.9 KB

Directory structure:
gitextract_5cor20ar/

├── .dockerignore
├── .github/
│   ├── actions/
│   │   └── complain/
│   │       └── action.yml
│   └── workflows/
│       ├── chai-api.ci.yml
│       ├── ci.yml
│       └── deploy.yml
├── .gitignore
├── .python-version
├── LICENSE
├── README.md
├── alembic/
│   ├── .pkgx.yaml
│   ├── Dockerfile
│   ├── README.md
│   ├── alembic.ini
│   ├── env.py
│   ├── init-script.sql
│   ├── load-values.sql
│   ├── run_migrations.sh
│   ├── script.py.mako
│   └── versions/
│       ├── 20241028_1217-base_migration.py
│       ├── 20250312_0045-add_legacy_dependency_table.py
│       ├── 20250312_2244-canons.py
│       ├── 20250416_0223-add_ranks.py
│       ├── 20250422_0940-add_unique_package_to_canon_packages.py
│       ├── 20250508_1752-add_trgm_indexes.py
│       ├── 20250529_2341-rename_canons_table_and_recreate.py
│       └── 20250529_2345-recreate_canon_foreign_keys.py
├── api/
│   ├── .dockerignore
│   ├── .gitignore
│   ├── Cargo.toml
│   ├── Dockerfile
│   ├── README.md
│   └── src/
│       ├── app_state.rs
│       ├── db.rs
│       ├── handlers.rs
│       ├── logging.rs
│       ├── main.rs
│       └── utils.rs
├── core/
│   ├── README.md
│   ├── config.py
│   ├── db.py
│   ├── fetcher.py
│   ├── logger.py
│   ├── models/
│   │   └── __init__.py
│   ├── requirements.txt
│   ├── scheduler.py
│   ├── structs.py
│   ├── test.json
│   ├── transformer.py
│   └── utils.py
├── db/
│   ├── README.md
│   └── queries.md
├── docker-compose.yml
├── examples/
│   ├── sbom-meta/
│   │   ├── README.md
│   │   ├── go.mod
│   │   ├── go.sum
│   │   └── main.go
│   └── visualizer/
│       ├── README.md
│       ├── main.py
│       └── monitor.py
├── package_managers/
│   ├── crates/
│   │   ├── Dockerfile
│   │   ├── README.md
│   │   ├── db.py
│   │   ├── diff.py
│   │   ├── main.py
│   │   ├── structs.py
│   │   └── transformer.py
│   ├── debian/
│   │   ├── Dockerfile
│   │   ├── README.md
│   │   ├── db.py
│   │   ├── debian_sources.py
│   │   ├── diff.py
│   │   ├── main.py
│   │   ├── parser.py
│   │   ├── scripts/
│   │   │   ├── investigate_sources.py
│   │   │   └── test_investigate_sources.py
│   │   └── structs.py
│   ├── homebrew/
│   │   ├── Dockerfile
│   │   ├── README.md
│   │   ├── db.py
│   │   ├── diff.py
│   │   ├── formulae.py
│   │   ├── main.py
│   │   └── structs.py
│   └── pkgx/
│       ├── Dockerfile
│       ├── db.py
│       ├── diff.py
│       ├── loader.py
│       ├── main.py
│       ├── parser.py
│       └── url.py
├── pkgx.yaml
├── pyproject.toml
├── ranker/
│   ├── .dockerignore
│   ├── .gitignore
│   ├── Dockerfile
│   ├── README.md
│   ├── config.py
│   ├── db.py
│   ├── dedupe.py
│   ├── main.py
│   ├── naming.py
│   ├── requirements.txt
│   ├── rx_graph.py
│   └── utils/
│       ├── analyze_ranks.py
│       └── parse_log.py
├── scripts/
│   ├── chai-legacy-loader/
│   │   ├── README.md
│   │   ├── add_package_fields.py
│   │   ├── batch_insert_package_urls.py
│   │   ├── batch_insert_urls.py
│   │   ├── copy_dependencies_no_thread.py
│   │   ├── pkgx.yaml
│   │   └── sql/
│   │       ├── dependencies.sql
│   │       ├── packages.sql
│   │       └── urls.sql
│   ├── npm-singleton/
│   │   ├── README.md
│   │   ├── pkgx.yaml
│   │   └── single.py
│   ├── package_to_package/
│   │   └── package_dependencies.py
│   └── upgrade_canons/
│       ├── .gitignore
│       ├── README.md
│       ├── create_deleted_canons.py
│       ├── db.py
│       ├── delete_non_canonical_urls.py
│       ├── main.py
│       ├── registered_projects.py
│       └── structs.py
└── tests/
    ├── README.md
    ├── conftest.py
    ├── package_managers/
    │   ├── crates/
    │   │   ├── conftest.py
    │   │   └── test_crates_diff_deps.py
    │   ├── debian/
    │   │   ├── conftest.py
    │   │   ├── test_debian_diff.py
    │   │   ├── test_debian_parser.py
    │   │   └── test_debian_sources.py
    │   ├── homebrew/
    │   │   ├── conftest.py
    │   │   └── test_homebrew_diff_deps.py
    │   └── pkgx/
    │       ├── test_pkgx_diff.py
    │       └── test_special_case.py
    ├── ranker/
    │   ├── test_compute_canon_name.py
    │   ├── test_dedupe.py
    │   └── test_rx_graph.py
    └── scripts/
        └── upgrade_canons/
            └── test_analyze_packages_needing_canonicalization.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .dockerignore
================================================
# directories
data/
.venv/
tests/
scripts/
logs/
db/ 

# other files
.gitignore
docker-compose.yml
.DS_Store
.git
README.md
LICENSE
.cursorrules
.coverage
*.xml

================================================
FILE: .github/actions/complain/action.yml
================================================
name: teaxyz/chai/complain
description: creates an issue for any failing tests

inputs:
  test_function:
    description: test function to complain about
    required: true
  token:
    description: github token
    required: true
    default: ${{ github.token }}

runs:
  using: composite
  steps:
    - name: Find Issue
      uses: actions-cool/issues-helper@v3
      id: find
      with:
        actions: "find-issues"
        token: ${{ inputs.token }}
        issue-state: "open"
        title-includes: "❌ test failure"
        labels: "test-failure"

    - name: Create Issue
      uses: actions-cool/issues-helper@v3
      id: create
      if: ${{ steps.find.outputs.issues == '[]' }}
      with:
        actions: "create-issue"
        token: ${{ inputs.token }}
        title: "❌ test failure"
        body: "Running log of test failure for ${{ inputs.test_function }}"
        labels: "test-failure"
        assignees: "sanchitram1"

    - name: Log Comment
      uses: actions-cool/issues-helper@v3
      with:
        actions: "create-comment"
        token: ${{ inputs.token }}
        issue-number: ${{ steps.create.outputs.issue-number || fromJSON(steps.find.outputs.issues)[0].number }}
        body: |
          # Test failure
          ## ${{ inputs.test_function }}

          logs: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}


================================================
FILE: .github/workflows/chai-api.ci.yml
================================================
name: api.ci

on:
  push:
    branches: [main]
    paths:
      - "api/**"
  pull_request:
    paths:
      - "api/**"

env:
  CARGO_TERM_COLOR: always

concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true

jobs:
  test:
    name: test
    runs-on: ubuntu-latest

    services:
      postgres:
        image: postgres
        env:
          POSTGRES_USER: postgres
          POSTGRES_PASSWORD: s3cr3t
          POSTGRES_DB: chai
        options: >-
          --health-cmd pg_isready
          --health-interval 10s
          --health-timeout 5s
          --health-retries 5
        ports:
          - 5435:5432

    steps:
      - uses: actions/checkout@v5
      - name: Install dependencies
        run: |
          sudo apt-get update
          sudo apt-get install -y postgresql-client
      - name: Run tests
        run: cargo test --verbose
        working-directory: api
        env:
          DATABASE_URL: postgresql://postgres:s3cr3t@localhost:5435/chai

  fmt:
    name: Rustfmt
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v5
      - uses: actions-rs/toolchain@v1
        with:
          profile: minimal
          toolchain: stable
          override: true
          components: rustfmt
      - run: cargo fmt --all -- --check
        working-directory: api

  clippy:
    name: Clippy
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v5
      - uses: actions-rs/toolchain@v1
        with:
          profile: minimal
          toolchain: stable
          override: true
          components: clippy
      - run: cargo clippy --all-targets --all-features -- -D warnings
        working-directory: api

  docker-build:
    name: Build Docker Image
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v5
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v2
      - name: Build
        uses: docker/build-push-action@v6
        with:
          context: ./api
          push: false # Set push to false to prevent pushing the image


================================================
FILE: .github/workflows/ci.yml
================================================
name: CI

on:
  workflow_dispatch:
    inputs:
      env:
        description: "The environment to test against"
        required: false
        type: choice
        options:
          - dev
          - sepolia
          - mainnet
        default: "dev"
  push:
    branches:
      - main
    paths:
      - "**/*.py"
      - tests/**
      - core/**
      - package_managers/**
      - ranker/**
  pull_request:
    branches:
      - main
    paths:
      - "**/*.py"
      - tests/**
      - core/**
      - package_managers/**
      - ranker/**

jobs:
  check:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
        uses: actions/checkout@v5

      - name: Setup with pkgx
        uses: pkgxdev/setup@v4
        with:
          +: astral.sh/uv
            astral.sh/ruff
            pytest.org

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version-file: ".python-version"

      - name: Install the project
        run: pkgx uv sync --locked --all-extras --dev --all-groups

      - name: Lint with Ruff
        uses: astral-sh/ruff-action@v3
        with:
          src: .

      - name: Run tests with pytest
        id: pytest
        run: |
          pkgx uv run pytest tests/

  complain:
    needs: check
    if: failure()
    runs-on: ubuntu-latest
    permissions:
      issues: write
    steps:
      - uses: actions/checkout@v5
      - uses: ./.github/actions/complain
        with:
          test_function: "pytest"
          token: ${{ secrets.GITHUB_TOKEN }}


================================================
FILE: .github/workflows/deploy.yml
================================================
name: "Release Chai"
run-name: Release Chai - ${{ inputs.env || 'auto' }} - ${{ inputs.ref || github.ref }}

on:
  push:
    branches:
      - main
  workflow_dispatch:
    inputs:
      env:
        description: "The environment to deploy to"
        required: true
        type: choice
        options:
          - dev
          - sepolia
          - testnet
          - mainnet
      ref:
        description: "The git ref (SHA or tag) to deploy"
        required: false
      deploy_latest:
        description: "Force deploy all components from latest commit"
        required: false
        type: boolean

permissions:
  id-token: write
  contents: read

jobs:
  build:
    if: ${{ github.event_name == 'workflow_dispatch' && inputs.deploy_latest == true }}
    environment: ${{ inputs.env || 'dev' }}
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v5

      - name: Configure AWS credentials
        uses: aws-actions/configure-aws-credentials@v3
        with:
          role-to-assume: ${{ secrets.TEA_AWS_ROLE }}
          aws-region: us-east-1

      - name: Login to Amazon ECR
        id: login-ecr
        uses: aws-actions/amazon-ecr-login@v2

      - name: Set deployment ref
        id: set-ref
        run: |
          DEPLOY_REF=${{ inputs.ref || github.sha }}
          echo "DEPLOY_REF=${DEPLOY_REF}" >> $GITHUB_ENV
          echo "deploy_ref=${DEPLOY_REF}" >> $GITHUB_OUTPUT

      - name: Set environment
        id: set-env
        run: |
          DEPLOY_ENV=${{ inputs.env || 'dev' }}
          echo "DEPLOY_ENV=${DEPLOY_ENV}" >> $GITHUB_ENV
          echo "deploy_env=${DEPLOY_ENV}" >> $GITHUB_OUTPUT

      - name: Build and push Crates indexer image
        if: ${{ github.event_name == 'workflow_dispatch' && inputs.deploy_latest == true }}
        uses: docker/build-push-action@v6
        with:
          context: .
          file: ./package_managers/crates/Dockerfile
          push: true
          tags: |
            ${{ steps.login-ecr.outputs.registry }}/chai-v2/crates:${{ env.DEPLOY_REF }}-${{ env.DEPLOY_ENV }}
            ${{ steps.login-ecr.outputs.registry }}/chai-v2/crates:latest

      - name: Build and push Homebrew indexer image
        if: ${{ github.event_name == 'workflow_dispatch' && inputs.deploy_latest == true }}
        uses: docker/build-push-action@v6
        with:
          context: .
          file: ./package_managers/homebrew/Dockerfile
          push: true
          tags: |
            ${{ steps.login-ecr.outputs.registry }}/chai-v2/homebrew:${{ env.DEPLOY_REF }}-${{ env.DEPLOY_ENV }}
            ${{ steps.login-ecr.outputs.registry }}/chai-v2/homebrew:latest

      - name: Build and push Debian indexer image
        if: ${{ github.event_name == 'workflow_dispatch' && inputs.deploy_latest == true }}
        uses: docker/build-push-action@v6
        with:
          context: .
          file: ./package_managers/debian/Dockerfile
          push: true
          tags: |
            ${{ steps.login-ecr.outputs.registry }}/chai-v2/debian:${{ env.DEPLOY_REF }}-${{ env.DEPLOY_ENV }}
            ${{ steps.login-ecr.outputs.registry }}/chai-v2/debian:latest

      - name: Build and push Pkgx indexer image
        if: ${{ github.event_name == 'workflow_dispatch' && inputs.deploy_latest == true }}
        uses: docker/build-push-action@v6
        with:
          context: .
          file: ./package_managers/pkgx/Dockerfile
          push: true
          tags: |
            ${{ steps.login-ecr.outputs.registry }}/chai-v2/pkgx:${{ env.DEPLOY_REF }}-${{ env.DEPLOY_ENV }}
            ${{ steps.login-ecr.outputs.registry }}/chai-v2/pkgx:latest

      - name: Build and push Alembic image
        if: ${{ github.event_name == 'workflow_dispatch' && inputs.deploy_latest == true }}
        uses: docker/build-push-action@v6
        with:
          context: .
          file: ./alembic/Dockerfile
          push: true
          tags: |
            ${{ steps.login-ecr.outputs.registry }}/chai-v2/alembic:${{ env.DEPLOY_REF }}-${{ env.DEPLOY_ENV }}
            ${{ steps.login-ecr.outputs.registry }}/chai-v2/alembic:latest

      - name: Build and push chai-api image
        if: ${{ github.event_name == 'workflow_dispatch' && inputs.deploy_latest == true }}
        uses: docker/build-push-action@v6
        with:
          context: ./api
          file: ./api/Dockerfile
          push: true
          tags: |
            ${{ steps.login-ecr.outputs.registry }}/chai-v2/chai-api:${{ env.DEPLOY_REF }}-${{ env.DEPLOY_ENV }}
            ${{ steps.login-ecr.outputs.registry }}/chai-v2/chai-api:latest

      - name: Build and push Ranker indexer image
        if: ${{ github.event_name == 'workflow_dispatch' && inputs.deploy_latest == true }}
        uses: docker/build-push-action@v2
        with:
          context: .
          file: ./ranker/Dockerfile
          push: true
          tags: |
            ${{ steps.login-ecr.outputs.registry }}/chai-v2/ranker:${{ env.DEPLOY_REF }}-${{ env.DEPLOY_ENV }}
            ${{ steps.login-ecr.outputs.registry }}/chai-v2/ranker:latest

  deploy:
    needs: build
    if: ${{ github.event_name == 'workflow_dispatch' }}
    runs-on: ubuntu-latest
    environment: ${{ inputs.env || 'dev' }}
    steps:
      - uses: actions/checkout@v5

      - name: Configure AWS credentials
        uses: aws-actions/configure-aws-credentials@v3
        with:
          role-to-assume: ${{ secrets.TEA_AWS_ROLE }}
          aws-region: us-east-1

      - name: Set deployment ref
        id: set-ref
        run: |
          DEPLOY_REF=${{ inputs.ref }}
          echo "DEPLOY_REF=${DEPLOY_REF}" >> $GITHUB_ENV
          echo "deploy_ref=${DEPLOY_REF}" >> $GITHUB_OUTPUT

      - name: Set environment
        id: set-env
        run: |
          DEPLOY_ENV=${{ inputs.env }}
          echo "DEPLOY_ENV=${DEPLOY_ENV}" >> $GITHUB_ENV
          echo "deploy_env=${DEPLOY_ENV}" >> $GITHUB_OUTPUT

      - name: Deploy chai-api
        run: |
          aws ecs update-service --cluster chai-${{ env.DEPLOY_ENV }} \
                                 --service ${{ env.DEPLOY_ENV }}-chai-api \
                                 --force-new-deployment


================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
#   For a library or package, you might want to ignore these files since the code is
#   intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
#   However, in case of collaboration, if having platform-specific dependencies or dependencies
#   having no cross-platform support, pipenv may install dependencies that don't work, or not
#   install all needed dependencies.
#Pipfile.lock

# poetry
#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
#   This is especially recommended for binary packages to ensure reproducibility, and is more
#   commonly ignored for libraries.
#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# pdm
#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
#   in version control.
#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
#  and can be added to the global gitignore or merged into this file.  For a more nuclear
#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

# data files
data
db/data

# examples
examples/sbom-meta/sbom-meta
*.svg

# cursor
.cursorrules
.cursor/
TASKS.md

# DS Store
.DS_Store

# Profiling
*.prof

================================================
FILE: .python-version
================================================
3.11


================================================
FILE: LICENSE
================================================
MIT License

Copyright (c) 2024 tea protocol

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: README.md
================================================
# CHAI

CHAI is an attempt at an open-source data pipeline for package managers. The
goal is to have a pipeline that can use the data from any package manager and
provide a normalized data source for myriads of different use cases.

## Getting Started

Use [Docker](https://docker.com)

1. Install Docker
2. Clone the chai repository (https://docs.github.com/en/repositories/creating-and-managing-repositories/cloning-a-repository)
3. Using a terminal, navigate to the cloned repository directory
4. Run `docker compose build` to create the latest Docker images
5. Then, run `docker compose up` to launch.

> [!NOTE]
>
> This will run CHAI for all package managers. As an example crates by
> itself will take over an hour and consume >5GB storage.
>
> Currently, we support:
>
> - crates
> - Homebrew
> - Debian
> - pkgx
>
> You can run a single package manager by running
> `PACKAGE_MANAGER=<name> docker compose up`
>
> We are planning on supporting `NPM`, `PyPI`, and `rubygems` next.

### Arguments

Specify these eg. `FOO=bar docker compose up`:

- `ENABLE_SCHEDULER`: When true, the pipeline runs on a schedule set by `FREQUENCY`.
- `FREQUENCY`: Sets how often (in hours) the pipeline should run.
- `TEST`: Useful for running in a test code
  insertions.
- `FETCH`: Determines whether to fetch new data or use whatever was saved locally.
- `NO_CACHE`: When true, deletes temporary files after processing.

> [!NOTE]
> The flag `NO_CACHE` does not mean that files will not get downloaded to your local
> storage (specifically, the ./data directory). It only means that we'll
> delete these temporary files from ./data once we're done processing them.
> If `FETCH` is false, the pipeline looks for source data in the cache, so this
> will fail if you run `NO_CACHE` first, and `FETCH` false second.

These arguments are all configurable in the `docker-compose.yml` file.

### Docker Services Overview

1. `db`: [PostgreSQL] database for the reduced package data
2. `alembic`: handles migrations
3. `package_managers`: fetches and writes data for each package manager
4. `api`: a simple REST API for reading from the db
5. `ranker`: deduplicates and ranks the packages

### Hard Reset

Stuff happens. Start over:

`rm -rf ./data`: removes all the data the fetcher is putting.

<!-- this is handled now that alembic/psycopg2 are in pkgx -->
<!--
## Alembic Alternatives

- sqlx command line tool to manage migrations, alongside models for sqlx in rust
- vapor's migrations are written in swift
-->

## Goals

Our goal is to build a data schema that looks like this:

![db/CHAI_ERD.png](db/CHAI_ERD.png)

You can read more about specific data models in the dbs [readme](db/README.md)

Our specific application extracts the dependency graph understand what are
critical pieces of the open-source graph. We also built a simple example that displays
[sbom-metadata](examples/sbom-meta) for your repository.

There are many other potential use cases for this data:

- License compatibility checker
- Developer publications
- Package popularity
- Dependency analysis vulnerability tool (requires translating semver)

> [!TIP]
> Help us add the above to the examples folder.

## FAQs / Common Issues

1. The database url is `postgresql://postgres:s3cr3t@localhost:5435/chai`, and
   is used as `CHAI_DATABASE_URL` in the environment. `psql CHAI_DATABASE_URL`
   will connect you to the database.
2. If you're orchestrating via docker, swap `localhost` for `host.docker.internal`

## Managing Dependencies

We use [`uv`](https://astral.sh/uv) to manage dependencies (and sometimes execution).
All dependencies are listed in [`pyproject.toml`](./pyproject.toml), under the
`dependency-groups` header. Each group helps us classify the service we're adding a
dependency for. For example, if we're adding a new dependency for all the indexers:

```bash
uv add --group indexer requests

# use the --all-groups flag to sync your venv for all dependencies
uv sync --all-groups
uv pip compile --group indexers -o core/requirements.txt
```

The last step writes the updated dependencies to a requirements.txt file, which is
crucial for the Docker containers executing the individual services to build correctly.
Each indexer shares the same set of dependencies, and that requirement file is
**generated by uv**, and maintained in [core/requirements.txt](core/requirements.txt)

> [!IMPORTANT]
> DO NOT UPDATE ANY `requirements.txt` FILES DIRECTLY
> `uv` provides a way to generate that automatically, based on the pyproject.toml
>
> Have an idea on a better way to do this? Open to input...

## Deployment

```sh
export CHAI_DATABASE_URL=postgresql://<user>:<pw>@host.docker.internal:<port>/chai
export PGPASSWORD=<pw>
docker compose up alembic
```

## Tasks

These are tasks that can be run using [xcfile.dev]. If you use `pkgx`, typing
`dev` loads the environment. Alternatively, run them manually.

### reset

```sh
rm -rf db/data data .venv
```

### build

```sh
docker compose build
```

### start-all

Requires: build

```sh
docker compose up -d
```

### stop

```sh
docker compose down
```

### logs

```sh
docker compose logs
```

### db-start

Runs migrations and starts up the database

```sh
docker compose build --no-cache db alembic
docker compose up alembic -d
```

### db-reset

Requires: stop

```sh
rm -rf db/data
```

### db-generate-migration

Inputs: MIGRATION_NAME
Env: CHAI_DATABASE_URL=postgresql://postgres:s3cr3t@localhost:5435/chai

```sh
cd alembic
alembic revision --autogenerate -m "$MIGRATION_NAME"
```

### db-upgrade

Env: CHAI_DATABASE_URL=postgresql://postgres:s3cr3t@localhost:5435/chai

```sh
cd alembic
alembic upgrade head
```

### db-downgrade

Inputs: STEP
Env: CHAI_DATABASE_URL=postgresql://postgres:s3cr3t@localhost:5435/chai

```sh
cd alembic
alembic downgrade -$STEP
```

### db

```sh
psql "postgresql://postgres:s3cr3t@localhost:5435/chai"
```

### restart-api

Refreshes table knowledge from the db.

```sh
docker compose restart api
```

### remove-orphans

```sh
docker compose down --remove-orphans
```

### start-service

Inputs: SERVICE
Env: CHAI_DATABASE_URL=postgresql://postgres:s3cr3t@host.docker.internal:5435/chai

```sh
docker compose up $SERVICE --build
```

### check

Inputs: FOLDER
Environment: FOLDER=.

```sh
pkgx +python@3.13 ty check $FOLDER
```

[PostgreSQL]: https://www.postgresql.org
[`pkgx`]: https://pkgx.sh


================================================
FILE: alembic/.pkgx.yaml
================================================
# this .pkgx.yaml file is only for alembic

dependencies:
  postgresql.org: 16
  alembic.sqlalchemy.org: 1
  psycopg.org/psycopg2: 2


================================================
FILE: alembic/Dockerfile
================================================
FROM ghcr.io/astral-sh/uv:python3.11-bookworm-slim
RUN apt update && apt -y install postgresql
RUN uv pip install alembic==1.13.2 psycopg2-binary==2.9.10 sqlalchemy==2.0.41 --system
COPY . . 
WORKDIR /alembic
RUN chmod +x /alembic/run_migrations.sh
ENTRYPOINT ["/bin/sh", "/alembic/run_migrations.sh"]


================================================
FILE: alembic/README.md
================================================
# CHAI Data Migrations

This directory contains the Alembic configuration and migration scripts for managing the
database schema of the CHAI project. Alembic is used to handle database migrations,
allowing for version control of our database schema.

### About Alembic

Alembic is a database migration tool for SQLAlchemy. It allows us to:

- Track changes to our database schema over time
- Apply and revert these changes in a controlled manner
- Generate migration scripts automatically based on model changes

> [!NOTE]
> It's important to note that while `alembic` serves our current needs, it may not be
> our long-term solution. As the CHAI project evolves, we might explore other database
> migration tools or strategies that better fit our growing requirements. We're open to
> reassessing our approach to schema management as needed.

## Entrypoint

The main entrypoint for running migrations is the
[run migrations script](run_migrations.sh). This script orchestrates the initialization
and migration process.

## Steps

1. [Initialize](init-script.sql)

The initialization script creates the database `chai`, and loads it up with any
extensions that we'd need, so we've got a clean slate for our db structures.

2. [Load](load-values.sql)

The load script prepopulates some of the tables, with `enum`-like values - specifically
for:

- `url_types`: defines different types of URLs (e.g., source, homepage, documentation)
- `depends_on_types`: defines different types of dependencies (e.g., runtime,
  development)
- `sources` and `package_managers`: defines different package managers (e.g., npm, pypi)

3. Run Alembic Migrations

After initialization and loading initial data, the script runs Alembic migrations to apply any pending database schema changes.

## Contributing

To contribute to the database schema:

1. Make a change in the [models](../core/models/__init__.py) file
2. Generate a new migration script: `alembic revision --autogenerate "Description"`
3. Review the generated migration script in the [versions](versions/) directory. The
   auto-generation is powerful but not perfect, please review the script carefully.
4. Test the migration by running `alembic upgrade head`.


================================================
FILE: alembic/alembic.ini
================================================
[alembic]
script_location = .
file_template = %%(year)d%%(month).2d%%(day).2d_%%(hour).2d%%(minute).2d-%%(slug)s

prepend_sys_path = ..
version_path_separator = os

# URL
sqlalchemy.url = ${env:CHAI_DATABASE_URL}


[post_write_hooks]
# lint with attempts to fix using "ruff" - use the exec runner, execute a binary
# TODO: this doesn't work rn
# hooks = ruff
# ruff.type = exec
# ruff.executable = %(here)s/.venv/bin/ruff
# ruff.options = --fix REVISION_SCRIPT_FILENAME

# Logging configuration
[loggers]
keys = root,sqlalchemy,alembic

[handlers]
keys = console

[formatters]
keys = generic

[logger_root]
level = WARN
handlers = console
qualname =

[logger_sqlalchemy]
level = WARN
handlers =
qualname = sqlalchemy.engine

[logger_alembic]
level = INFO
handlers =
qualname = alembic

[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic

[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S


================================================
FILE: alembic/env.py
================================================
import os
from logging.config import fileConfig

from sqlalchemy import engine_from_config, pool

from alembic import context
from core.models import Base

# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
config = context.config

# interpret the config file for Python logging.
if config.config_file_name is not None:
    fileConfig(config.config_file_name)

# metadata for all models
target_metadata = Base.metadata

# get database url
database_url = os.getenv("CHAI_DATABASE_URL")
if database_url:
    config.set_main_option("sqlalchemy.url", database_url)


def run_migrations_offline() -> None:
    """Run migrations in 'offline' mode.

    This configures the context with just a URL
    and not an Engine, though an Engine is acceptable
    here as well.  By skipping the Engine creation
    we don't even need a DBAPI to be available.

    Calls to context.execute() here emit the given string to the
    script output.
    """
    url = config.get_main_option("sqlalchemy.url")
    context.configure(
        url=url,
        target_metadata=target_metadata,
        literal_binds=True,
        dialect_opts={"paramstyle": "named"},
    )

    with context.begin_transaction():
        context.run_migrations()


def run_migrations_online() -> None:
    """Run migrations in 'online' mode.

    In this scenario we need to create an Engine
    and associate a connection with the context.
    """
    connectable = engine_from_config(
        config.get_section(config.config_ini_section, {}),
        prefix="sqlalchemy.",
        poolclass=pool.NullPool,
    )

    with connectable.connect() as connection:
        context.configure(connection=connection, target_metadata=target_metadata)

        with context.begin_transaction():
            context.run_migrations()


if context.is_offline_mode():
    run_migrations_offline()
else:
    run_migrations_online()


================================================
FILE: alembic/init-script.sql
================================================
CREATE DATABASE chai;

\c chai

CREATE EXTENSION IF NOT EXISTS "pgcrypto";
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
CREATE EXTENSION IF NOT EXISTS pg_trgm;


================================================
FILE: alembic/load-values.sql
================================================
-- url types
INSERT INTO "url_types" ("name")
VALUES ('source'), ('homepage'), ('documentation'), ('repository')
ON CONFLICT (name) DO NOTHING;

-- dependency types 
INSERT INTO "depends_on_types" ("name")
VALUES
('build'),
('development'),
('runtime'),
('test'),
('optional'),
('recommended'),
('uses_from_macos')
ON CONFLICT (name) DO NOTHING;

-- sources
INSERT INTO "sources" ("type")
VALUES ('crates'), ('npm'), ('pypi'), ('rubygems'), ('github'), ('homebrew'), ('debian'), ('pkgx')
ON CONFLICT (type) DO NOTHING;

INSERT INTO "package_managers" ("source_id")
SELECT id
FROM "sources"
WHERE "type" IN ('crates', 'npm', 'pypi', 'rubygems', 'github', 'homebrew', 'debian', 'pkgx')
ON CONFLICT (source_id) DO NOTHING;


================================================
FILE: alembic/run_migrations.sh
================================================
#!/bin/bash

set -uo pipefail

# This script sets up the database, runs migrations, and loads initial values

# Check if the 'chai' database exists, create it if it doesn't
if psql "$CHAI_DATABASE_ADMIN_URL" -tAc "SELECT 1 FROM pg_database WHERE datname='chai'" | grep -q 1
then
    echo "Database 'chai' already exists"
else
    echo "Database 'chai' does not exist, creating..."
    psql "$CHAI_DATABASE_ADMIN_URL" -f init-script.sql -a
fi

# Run migrations and load data (uses 'chai' database)
echo "Current database version: $(alembic current)"
alembic upgrade head || { echo "Migration failed"; exit 1; }

echo "Loading initial values into the database..."
psql "$CHAI_DATABASE_URL" -f load-values.sql -a

echo "Database setup and initialization complete"

================================================
FILE: alembic/script.py.mako
================================================
"""${message}

Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}

"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa
${imports if imports else ""}

# revision identifiers, used by Alembic.
revision: str = ${repr(up_revision)}
down_revision: Union[str, None] = ${repr(down_revision)}
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}


def upgrade() -> None:
    ${upgrades if upgrades else "pass"}


def downgrade() -> None:
    ${downgrades if downgrades else "pass"}


================================================
FILE: alembic/versions/20241028_1217-base_migration.py
================================================
"""base migration

Revision ID: 238d591d5310
Revises:
Create Date: 2024-10-28 12:17:43.762965

"""

from collections.abc import Sequence

import sqlalchemy as sa

from alembic import op

# revision identifiers, used by Alembic.
revision: str = "238d591d5310"
down_revision: str | None = None
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None


def upgrade() -> None:
    # ### commands auto generated by Alembic - please adjust! ###
    op.create_table(
        "depends_on_types",
        sa.Column(
            "id",
            sa.UUID(),
            server_default=sa.text("uuid_generate_v4()"),
            nullable=False,
        ),
        sa.Column("name", sa.String(), nullable=False),
        sa.Column(
            "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
        ),
        sa.Column(
            "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
        ),
        sa.PrimaryKeyConstraint("id", name=op.f("pk_depends_on_types")),
    )
    op.create_index(
        op.f("ix_depends_on_types_name"), "depends_on_types", ["name"], unique=True
    )
    op.create_table(
        "licenses",
        sa.Column(
            "id",
            sa.UUID(),
            server_default=sa.text("uuid_generate_v4()"),
            nullable=False,
        ),
        sa.Column("name", sa.String(), nullable=False),
        sa.Column(
            "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
        ),
        sa.Column(
            "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
        ),
        sa.PrimaryKeyConstraint("id", name=op.f("pk_licenses")),
    )
    op.create_index(op.f("ix_licenses_name"), "licenses", ["name"], unique=True)
    op.create_table(
        "sources",
        sa.Column(
            "id",
            sa.UUID(),
            server_default=sa.text("uuid_generate_v4()"),
            nullable=False,
        ),
        sa.Column("type", sa.String(), nullable=False),
        sa.Column(
            "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
        ),
        sa.Column(
            "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
        ),
        sa.PrimaryKeyConstraint("id", name=op.f("pk_sources")),
        sa.UniqueConstraint("type", name=op.f("uq_sources_type")),
    )
    op.create_table(
        "url_types",
        sa.Column(
            "id",
            sa.UUID(),
            server_default=sa.text("uuid_generate_v4()"),
            nullable=False,
        ),
        sa.Column("name", sa.String(), nullable=False),
        sa.Column(
            "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
        ),
        sa.Column(
            "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
        ),
        sa.PrimaryKeyConstraint("id", name=op.f("pk_url_types")),
        sa.UniqueConstraint("name", name=op.f("uq_url_types_name")),
    )
    op.create_table(
        "package_managers",
        sa.Column(
            "id",
            sa.UUID(),
            server_default=sa.text("uuid_generate_v4()"),
            nullable=False,
        ),
        sa.Column("source_id", sa.UUID(), nullable=False),
        sa.Column(
            "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
        ),
        sa.Column(
            "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
        ),
        sa.ForeignKeyConstraint(
            ["source_id"],
            ["sources.id"],
            name=op.f("fk_package_managers_source_id_sources"),
        ),
        sa.PrimaryKeyConstraint("id", name=op.f("pk_package_managers")),
        sa.UniqueConstraint("source_id", name=op.f("uq_package_managers_source_id")),
    )
    op.create_table(
        "urls",
        sa.Column(
            "id",
            sa.UUID(),
            server_default=sa.text("uuid_generate_v4()"),
            nullable=False,
        ),
        sa.Column("url", sa.String(), nullable=False),
        sa.Column("url_type_id", sa.UUID(), nullable=False),
        sa.Column(
            "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
        ),
        sa.Column(
            "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
        ),
        sa.ForeignKeyConstraint(
            ["url_type_id"],
            ["url_types.id"],
            name=op.f("fk_urls_url_type_id_url_types"),
        ),
        sa.PrimaryKeyConstraint("id", name=op.f("pk_urls")),
        sa.UniqueConstraint("url_type_id", "url", name="uq_url_type_url"),
    )
    op.create_index(op.f("ix_urls_url"), "urls", ["url"], unique=False)
    op.create_index(op.f("ix_urls_url_type_id"), "urls", ["url_type_id"], unique=False)
    op.create_table(
        "users",
        sa.Column(
            "id",
            sa.UUID(),
            server_default=sa.text("uuid_generate_v4()"),
            nullable=False,
        ),
        sa.Column("username", sa.String(), nullable=False),
        sa.Column("source_id", sa.UUID(), nullable=False),
        sa.Column("import_id", sa.String(), nullable=False),
        sa.Column(
            "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
        ),
        sa.Column(
            "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
        ),
        sa.ForeignKeyConstraint(
            ["source_id"], ["sources.id"], name=op.f("fk_users_source_id_sources")
        ),
        sa.PrimaryKeyConstraint("id", name=op.f("pk_users")),
        sa.UniqueConstraint("source_id", "username", name="uq_source_username"),
    )
    op.create_index(op.f("ix_users_import_id"), "users", ["import_id"], unique=False)
    op.create_index(op.f("ix_users_source_id"), "users", ["source_id"], unique=False)
    op.create_index(op.f("ix_users_username"), "users", ["username"], unique=False)
    op.create_table(
        "load_history",
        sa.Column(
            "id",
            sa.UUID(),
            server_default=sa.text("uuid_generate_v4()"),
            nullable=False,
        ),
        sa.Column("package_manager_id", sa.UUID(), nullable=False),
        sa.Column(
            "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
        ),
        sa.Column(
            "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
        ),
        sa.ForeignKeyConstraint(
            ["package_manager_id"],
            ["package_managers.id"],
            name=op.f("fk_load_history_package_manager_id_package_managers"),
        ),
        sa.PrimaryKeyConstraint("id", name=op.f("pk_load_history")),
    )
    op.create_table(
        "packages",
        sa.Column(
            "id",
            sa.UUID(),
            server_default=sa.text("uuid_generate_v4()"),
            nullable=False,
        ),
        sa.Column("derived_id", sa.String(), nullable=False),
        sa.Column("name", sa.String(), nullable=False),
        sa.Column("package_manager_id", sa.UUID(), nullable=False),
        sa.Column("import_id", sa.String(), nullable=False),
        sa.Column("readme", sa.String(), nullable=True),
        sa.Column(
            "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
        ),
        sa.Column(
            "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
        ),
        sa.ForeignKeyConstraint(
            ["package_manager_id"],
            ["package_managers.id"],
            name=op.f("fk_packages_package_manager_id_package_managers"),
        ),
        sa.PrimaryKeyConstraint("id", name=op.f("pk_packages")),
        sa.UniqueConstraint("derived_id", name=op.f("uq_packages_derived_id")),
        sa.UniqueConstraint(
            "package_manager_id", "import_id", name="uq_package_manager_import_id"
        ),
    )
    op.create_index(
        op.f("ix_packages_import_id"), "packages", ["import_id"], unique=False
    )
    op.create_index(op.f("ix_packages_name"), "packages", ["name"], unique=False)
    op.create_index(
        op.f("ix_packages_package_manager_id"),
        "packages",
        ["package_manager_id"],
        unique=False,
    )
    op.create_table(
        "package_urls",
        sa.Column(
            "id",
            sa.UUID(),
            server_default=sa.text("uuid_generate_v4()"),
            nullable=False,
        ),
        sa.Column("package_id", sa.UUID(), nullable=False),
        sa.Column("url_id", sa.UUID(), nullable=False),
        sa.Column(
            "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
        ),
        sa.Column(
            "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
        ),
        sa.ForeignKeyConstraint(
            ["package_id"],
            ["packages.id"],
            name=op.f("fk_package_urls_package_id_packages"),
        ),
        sa.ForeignKeyConstraint(
            ["url_id"], ["urls.id"], name=op.f("fk_package_urls_url_id_urls")
        ),
        sa.PrimaryKeyConstraint("id", name=op.f("pk_package_urls")),
        sa.UniqueConstraint("package_id", "url_id", name="uq_package_url"),
    )
    op.create_index(
        op.f("ix_package_urls_package_id"), "package_urls", ["package_id"], unique=False
    )
    op.create_index(
        op.f("ix_package_urls_url_id"), "package_urls", ["url_id"], unique=False
    )
    op.create_table(
        "user_packages",
        sa.Column(
            "id",
            sa.UUID(),
            server_default=sa.text("uuid_generate_v4()"),
            nullable=False,
        ),
        sa.Column("user_id", sa.UUID(), nullable=False),
        sa.Column("package_id", sa.UUID(), nullable=False),
        sa.Column(
            "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
        ),
        sa.Column(
            "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
        ),
        sa.ForeignKeyConstraint(
            ["package_id"],
            ["packages.id"],
            name=op.f("fk_user_packages_package_id_packages"),
        ),
        sa.ForeignKeyConstraint(
            ["user_id"], ["users.id"], name=op.f("fk_user_packages_user_id_users")
        ),
        sa.PrimaryKeyConstraint("id", name=op.f("pk_user_packages")),
        sa.UniqueConstraint("user_id", "package_id", name="uq_user_package"),
    )
    op.create_index(
        op.f("ix_user_packages_package_id"),
        "user_packages",
        ["package_id"],
        unique=False,
    )
    op.create_index(
        op.f("ix_user_packages_user_id"), "user_packages", ["user_id"], unique=False
    )
    op.create_table(
        "versions",
        sa.Column(
            "id",
            sa.UUID(),
            server_default=sa.text("uuid_generate_v4()"),
            nullable=False,
        ),
        sa.Column("package_id", sa.UUID(), nullable=False),
        sa.Column("version", sa.String(), nullable=False),
        sa.Column("import_id", sa.String(), nullable=False),
        sa.Column("size", sa.Integer(), nullable=True),
        sa.Column("published_at", sa.DateTime(), nullable=True),
        sa.Column("license_id", sa.UUID(), nullable=True),
        sa.Column("downloads", sa.Integer(), nullable=True),
        sa.Column("checksum", sa.String(), nullable=True),
        sa.Column(
            "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
        ),
        sa.Column(
            "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
        ),
        sa.ForeignKeyConstraint(
            ["license_id"],
            ["licenses.id"],
            name=op.f("fk_versions_license_id_licenses"),
        ),
        sa.ForeignKeyConstraint(
            ["package_id"],
            ["packages.id"],
            name=op.f("fk_versions_package_id_packages"),
        ),
        sa.PrimaryKeyConstraint("id", name=op.f("pk_versions")),
        sa.UniqueConstraint("package_id", "version", name="uq_package_version"),
    )
    op.create_index(
        op.f("ix_versions_downloads"), "versions", ["downloads"], unique=False
    )
    op.create_index(
        op.f("ix_versions_import_id"), "versions", ["import_id"], unique=False
    )
    op.create_index(
        op.f("ix_versions_license_id"), "versions", ["license_id"], unique=False
    )
    op.create_index(
        op.f("ix_versions_package_id"), "versions", ["package_id"], unique=False
    )
    op.create_index(
        op.f("ix_versions_published_at"), "versions", ["published_at"], unique=False
    )
    op.create_index(op.f("ix_versions_size"), "versions", ["size"], unique=False)
    op.create_index(op.f("ix_versions_version"), "versions", ["version"], unique=False)
    op.create_table(
        "dependencies",
        sa.Column(
            "id",
            sa.UUID(),
            server_default=sa.text("uuid_generate_v4()"),
            nullable=False,
        ),
        sa.Column("version_id", sa.UUID(), nullable=False),
        sa.Column("dependency_id", sa.UUID(), nullable=False),
        sa.Column("dependency_type_id", sa.UUID(), nullable=True),
        sa.Column("semver_range", sa.String(), nullable=True),
        sa.Column(
            "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
        ),
        sa.Column(
            "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
        ),
        sa.ForeignKeyConstraint(
            ["dependency_id"],
            ["packages.id"],
            name=op.f("fk_dependencies_dependency_id_packages"),
        ),
        sa.ForeignKeyConstraint(
            ["dependency_type_id"],
            ["depends_on_types.id"],
            name=op.f("fk_dependencies_dependency_type_id_depends_on_types"),
        ),
        sa.ForeignKeyConstraint(
            ["version_id"],
            ["versions.id"],
            name=op.f("fk_dependencies_version_id_versions"),
        ),
        sa.PrimaryKeyConstraint("id", name=op.f("pk_dependencies")),
        sa.UniqueConstraint(
            "version_id",
            "dependency_id",
            "dependency_type_id",
            name="uq_version_dependency_type",
        ),
    )
    op.create_index(
        op.f("ix_dependencies_dependency_id"),
        "dependencies",
        ["dependency_id"],
        unique=False,
    )
    op.create_index(
        op.f("ix_dependencies_dependency_type_id"),
        "dependencies",
        ["dependency_type_id"],
        unique=False,
    )
    op.create_index(
        op.f("ix_dependencies_version_id"), "dependencies", ["version_id"], unique=False
    )
    op.create_table(
        "user_versions",
        sa.Column(
            "id",
            sa.UUID(),
            server_default=sa.text("uuid_generate_v4()"),
            nullable=False,
        ),
        sa.Column("user_id", sa.UUID(), nullable=False),
        sa.Column("version_id", sa.UUID(), nullable=False),
        sa.Column(
            "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
        ),
        sa.Column(
            "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
        ),
        sa.ForeignKeyConstraint(
            ["user_id"], ["users.id"], name=op.f("fk_user_versions_user_id_users")
        ),
        sa.ForeignKeyConstraint(
            ["version_id"],
            ["versions.id"],
            name=op.f("fk_user_versions_version_id_versions"),
        ),
        sa.PrimaryKeyConstraint("id", name=op.f("pk_user_versions")),
        sa.UniqueConstraint("user_id", "version_id", name="uq_user_version"),
    )
    op.create_index(
        op.f("ix_user_versions_user_id"), "user_versions", ["user_id"], unique=False
    )
    op.create_index(
        op.f("ix_user_versions_version_id"),
        "user_versions",
        ["version_id"],
        unique=False,
    )
    # ### end Alembic commands ###


def downgrade() -> None:
    # ### commands auto generated by Alembic - please adjust! ###
    op.drop_index(op.f("ix_user_versions_version_id"), table_name="user_versions")
    op.drop_index(op.f("ix_user_versions_user_id"), table_name="user_versions")
    op.drop_table("user_versions")
    op.drop_index(op.f("ix_dependencies_version_id"), table_name="dependencies")
    op.drop_index(op.f("ix_dependencies_dependency_type_id"), table_name="dependencies")
    op.drop_index(op.f("ix_dependencies_dependency_id"), table_name="dependencies")
    op.drop_table("dependencies")
    op.drop_index(op.f("ix_versions_version"), table_name="versions")
    op.drop_index(op.f("ix_versions_size"), table_name="versions")
    op.drop_index(op.f("ix_versions_published_at"), table_name="versions")
    op.drop_index(op.f("ix_versions_package_id"), table_name="versions")
    op.drop_index(op.f("ix_versions_license_id"), table_name="versions")
    op.drop_index(op.f("ix_versions_import_id"), table_name="versions")
    op.drop_index(op.f("ix_versions_downloads"), table_name="versions")
    op.drop_table("versions")
    op.drop_index(op.f("ix_user_packages_user_id"), table_name="user_packages")
    op.drop_index(op.f("ix_user_packages_package_id"), table_name="user_packages")
    op.drop_table("user_packages")
    op.drop_index(op.f("ix_package_urls_url_id"), table_name="package_urls")
    op.drop_index(op.f("ix_package_urls_package_id"), table_name="package_urls")
    op.drop_table("package_urls")
    op.drop_index(op.f("ix_packages_package_manager_id"), table_name="packages")
    op.drop_index(op.f("ix_packages_name"), table_name="packages")
    op.drop_index(op.f("ix_packages_import_id"), table_name="packages")
    op.drop_table("packages")
    op.drop_table("load_history")
    op.drop_index(op.f("ix_users_username"), table_name="users")
    op.drop_index(op.f("ix_users_source_id"), table_name="users")
    op.drop_index(op.f("ix_users_import_id"), table_name="users")
    op.drop_table("users")
    op.drop_index(op.f("ix_urls_url_type_id"), table_name="urls")
    op.drop_index(op.f("ix_urls_url"), table_name="urls")
    op.drop_table("urls")
    op.drop_table("package_managers")
    op.drop_table("url_types")
    op.drop_table("sources")
    op.drop_index(op.f("ix_licenses_name"), table_name="licenses")
    op.drop_table("licenses")
    op.drop_index(op.f("ix_depends_on_types_name"), table_name="depends_on_types")
    op.drop_table("depends_on_types")
    # ### end Alembic commands ###


================================================
FILE: alembic/versions/20250312_0045-add_legacy_dependency_table.py
================================================
"""add-legacy-dependency-table

Revision ID: 89af630dc946
Revises: 238d591d5310
Create Date: 2025-03-12 00:45:35.727521

"""

from collections.abc import Sequence

import sqlalchemy as sa

from alembic import op

# revision identifiers, used by Alembic.
revision: str = "89af630dc946"
down_revision: str | None = "238d591d5310"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None


def upgrade() -> None:
    op.create_table(
        "legacy_dependencies",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("package_id", sa.UUID(), nullable=False),
        sa.Column("dependency_id", sa.UUID(), nullable=False),
        sa.Column("dependency_type_id", sa.UUID(), nullable=False),
        sa.Column("semver_range", sa.String(), nullable=True),
        sa.Column(
            "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
        ),
        sa.Column(
            "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
        ),
        sa.ForeignKeyConstraint(
            ["dependency_id"],
            ["packages.id"],
            name=op.f("fk_legacy_dependencies_dependency_id_packages"),
        ),
        sa.ForeignKeyConstraint(
            ["dependency_type_id"],
            ["depends_on_types.id"],
            name=op.f("fk_legacy_dependencies_dependency_type_id_depends_on_types"),
        ),
        sa.ForeignKeyConstraint(
            ["package_id"],
            ["packages.id"],
            name=op.f("fk_legacy_dependencies_package_id_packages"),
        ),
        sa.PrimaryKeyConstraint("id", name=op.f("pk_legacy_dependencies")),
        sa.UniqueConstraint(
            "package_id", "dependency_id", name="uq_package_dependency"
        ),
    )
    op.create_index(
        op.f("ix_legacy_dependencies_dependency_id"),
        "legacy_dependencies",
        ["dependency_id"],
        unique=False,
    )
    op.create_index(
        op.f("ix_legacy_dependencies_dependency_type_id"),
        "legacy_dependencies",
        ["dependency_type_id"],
        unique=False,
    )
    op.create_index(
        op.f("ix_legacy_dependencies_package_id"),
        "legacy_dependencies",
        ["package_id"],
        unique=False,
    )


def downgrade() -> None:
    op.drop_index(
        op.f("ix_legacy_dependencies_package_id"), table_name="legacy_dependencies"
    )
    op.drop_index(
        op.f("ix_legacy_dependencies_dependency_type_id"),
        table_name="legacy_dependencies",
    )
    op.drop_index(
        op.f("ix_legacy_dependencies_dependency_id"), table_name="legacy_dependencies"
    )
    op.drop_table("legacy_dependencies")


================================================
FILE: alembic/versions/20250312_2244-canons.py
================================================
"""canons

Revision ID: e7632ae1aff7
Revises: 89af630dc946
Create Date: 2025-03-12 22:44:45.272179

"""

from collections.abc import Sequence

import sqlalchemy as sa

from alembic import op

# revision identifiers, used by Alembic.
revision: str = "e7632ae1aff7"
down_revision: str | None = "89af630dc946"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None


def upgrade() -> None:
    op.create_table(
        "canons",
        sa.Column("id", sa.UUID(), nullable=False),
        sa.Column("url", sa.String(), nullable=False),
        sa.Column("name", sa.String(), nullable=False),
        sa.Column(
            "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
        ),
        sa.Column(
            "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
        ),
        sa.PrimaryKeyConstraint("id", name=op.f("pk_canons")),
    )
    op.create_index(op.f("ix_canons_name"), "canons", ["name"], unique=False)
    op.create_index(op.f("ix_canons_url"), "canons", ["url"], unique=True)
    op.create_table(
        "canon_packages",
        sa.Column("id", sa.UUID(), nullable=False),
        sa.Column("canon_id", sa.UUID(), nullable=False),
        sa.Column("package_id", sa.UUID(), nullable=False),
        sa.Column(
            "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
        ),
        sa.Column(
            "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
        ),
        sa.ForeignKeyConstraint(
            ["canon_id"], ["canons.id"], name=op.f("fk_canon_packages_canon_id_canons")
        ),
        sa.ForeignKeyConstraint(
            ["package_id"],
            ["packages.id"],
            name=op.f("fk_canon_packages_package_id_packages"),
        ),
        sa.PrimaryKeyConstraint("id", name=op.f("pk_canon_packages")),
    )
    op.create_index(
        op.f("ix_canon_packages_canon_id"), "canon_packages", ["canon_id"], unique=False
    )
    op.create_index(
        op.f("ix_canon_packages_package_id"),
        "canon_packages",
        ["package_id"],
        unique=False,
    )


def downgrade() -> None:
    op.drop_index(op.f("ix_canon_packages_package_id"), table_name="canon_packages")
    op.drop_index(op.f("ix_canon_packages_canon_id"), table_name="canon_packages")
    op.drop_table("canon_packages")
    op.drop_index(op.f("ix_canons_url"), table_name="canons")
    op.drop_index(op.f("ix_canons_name"), table_name="canons")
    op.drop_table("canons")


================================================
FILE: alembic/versions/20250416_0223-add_ranks.py
================================================
"""add-ranks

Revision ID: 26e124131bf8
Revises: e7632ae1aff7
Create Date: 2025-04-16 02:23:33.665773

"""

from collections.abc import Sequence

import sqlalchemy as sa

from alembic import op

# revision identifiers, used by Alembic.
revision: str = "26e124131bf8"
down_revision: str | None = "e7632ae1aff7"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None


def upgrade() -> None:
    op.create_table(
        "tea_rank_runs",
        sa.Column(
            "id",
            sa.UUID(),
            server_default=sa.text("uuid_generate_v4()"),
            nullable=False,
        ),
        sa.Column("run", sa.Integer(), nullable=False),
        sa.Column("split_ratio", sa.String(), nullable=False),
        sa.Column(
            "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
        ),
        sa.PrimaryKeyConstraint("id", name=op.f("pk_tea_rank_runs")),
    )
    op.create_table(
        "tea_ranks",
        sa.Column(
            "id",
            sa.UUID(),
            server_default=sa.text("uuid_generate_v4()"),
            nullable=False,
        ),
        sa.Column("tea_rank_run", sa.Integer(), nullable=False),
        sa.Column("canon_id", sa.UUID(), nullable=False),
        sa.Column("rank", sa.String(), nullable=False),
        sa.Column(
            "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False
        ),
        sa.ForeignKeyConstraint(
            ["canon_id"], ["canons.id"], name=op.f("fk_tea_ranks_canon_id_canons")
        ),
        sa.PrimaryKeyConstraint("id", name=op.f("pk_tea_ranks")),
    )
    op.create_index(
        op.f("ix_tea_ranks_canon_id"), "tea_ranks", ["canon_id"], unique=False
    )
    op.create_index(
        op.f("ix_tea_ranks_tea_rank_run"), "tea_ranks", ["tea_rank_run"], unique=False
    )


def downgrade() -> None:
    op.drop_index(op.f("ix_tea_ranks_tea_rank_run"), table_name="tea_ranks")
    op.drop_index(op.f("ix_tea_ranks_canon_id"), table_name="tea_ranks")
    op.drop_table("tea_ranks")
    op.drop_table("tea_rank_runs")


================================================
FILE: alembic/versions/20250422_0940-add_unique_package_to_canon_packages.py
================================================
"""add-unique-package-to-canon-packages

Revision ID: a41236bd2340
Revises: 26e124131bf8
Create Date: 2025-04-22 09:40:22.901637

"""

from collections.abc import Sequence

from alembic import op

# revision identifiers, used by Alembic.
revision: str = "a41236bd2340"
down_revision: str | None = "26e124131bf8"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None


def upgrade() -> None:
    op.drop_index("ix_canon_packages_package_id", table_name="canon_packages")
    op.create_index(
        op.f("ix_canon_packages_package_id"),
        "canon_packages",
        ["package_id"],
        unique=True,
    )


def downgrade() -> None:
    op.drop_index(op.f("ix_canon_packages_package_id"), table_name="canon_packages")
    op.create_index(
        "ix_canon_packages_package_id", "canon_packages", ["package_id"], unique=False
    )


================================================
FILE: alembic/versions/20250508_1752-add_trgm_indexes.py
================================================
"""add_trgm_indexes

Revision ID: 7392d4d74ce2
Revises: a41236bd2340
Create Date: 2025-05-08 17:52:40.417822

"""

from collections.abc import Sequence

from alembic import op

# revision identifiers, used by Alembic.
revision: str = "7392d4d74ce2"
down_revision: str | None = "a41236bd2340"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None


def upgrade() -> None:
    # Drop the existing indexes
    op.drop_index("ix_canons_name", table_name="canons")
    op.drop_index("ix_urls_url", table_name="urls")

    # Create trigram indexes
    # NOTE: this was added manually to this script (not auto-generated)
    op.create_index(
        "ix_urls_url_trgm",
        "urls",
        ["url"],
        unique=False,
        postgresql_using="gin",
        postgresql_ops={"url": "gin_trgm_ops"},
    )
    op.create_index(
        "ix_canons_name_trgm",
        "canons",
        ["name"],
        unique=False,
        postgresql_using="gin",
        postgresql_ops={"name": "gin_trgm_ops"},
    )


def downgrade() -> None:
    # Drop the trigram indexes
    # NOTE: this was added manually to this script (not auto-generated)
    op.drop_index("ix_urls_url_trgm", table_name="urls")
    op.drop_index("ix_canons_name_trgm", table_name="canons")

    # Recreate the existing indexes (auto-generated)
    op.create_index("ix_urls_url", "urls", ["url"], unique=False)
    op.create_index("ix_canons_name", "canons", ["name"], unique=False)


================================================
FILE: alembic/versions/20250529_2341-rename_canons_table_and_recreate.py
================================================
"""rename_canons_table_and_recreate

Revision ID: 542d79f30fc9
Revises: 7392d4d74ce2
Create Date: 2025-05-29 23:41:38.465987

"""

from collections.abc import Sequence

import sqlalchemy as sa
from sqlalchemy.dialects.postgresql import UUID

from alembic import op

# revision identifiers, used by Alembic.
revision: str = "542d79f30fc9"
down_revision: str | None = "7392d4d74ce2"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None


def upgrade() -> None:
    """
    Rename existing canons table and create new one with proper url_id FK
    """
    # Step 1: Rename existing table to preserve data as backup
    op.rename_table("canons", "canons_old")

    # Step 2: Drop FK constraints that pointed to old table (from other tables)
    op.drop_constraint(
        "fk_canon_packages_canon_id_canons", "canon_packages", type_="foreignkey"
    )
    op.drop_constraint("fk_tea_ranks_canon_id_canons", "tea_ranks", type_="foreignkey")

    # Step 3: Drop indexes and constraints from old table to avoid naming conflicts
    op.drop_constraint("pk_canons", "canons_old", type_="primary")
    op.drop_index("ix_canons_url", table_name="canons_old")
    op.drop_index("ix_canons_name_trgm", table_name="canons_old")

    # Step 4: Create new canons table with proper schema
    op.create_table(
        "canons",
        sa.Column(
            "id",
            UUID(as_uuid=True),
            primary_key=True,
            server_default=sa.func.uuid_generate_v4(),
        ),
        sa.Column(
            "url_id", UUID(as_uuid=True), nullable=False, index=True, unique=True
        ),
        sa.Column("name", sa.String(), nullable=False),
        sa.Column(
            "created_at", sa.DateTime(), nullable=False, server_default=sa.func.now()
        ),
        sa.Column(
            "updated_at", sa.DateTime(), nullable=False, server_default=sa.func.now()
        ),
        # Constraints
        sa.ForeignKeyConstraint(["url_id"], ["urls.id"], name="fk_canons_url_id_urls"),
        sa.UniqueConstraint("url_id", name="uq_canons_url_id"),
    )

    # Step 5: Create indexes
    op.create_index(
        "ix_canons_name_trgm",
        "canons",
        ["name"],
        postgresql_using="gin",
        postgresql_ops={"name": "gin_trgm_ops"},
    )

    # Note: FK constraints to this table will be recreated in a separate migration
    # after data population, since this table starts empty


def downgrade() -> None:
    """
    Restore original canons table with all its original indexes and constraints
    """
    # FK constraints were dropped in upgrade and not recreated, so no need to drop them here

    # Drop new table
    op.drop_table("canons")

    # Restore old table
    op.rename_table("canons_old", "canons")

    # Recreate all original constraints and indexes on restored table
    op.create_primary_key("pk_canons", "canons", ["id"])
    op.create_index("ix_canons_url", "canons", ["url"], unique=True)
    op.create_index(
        "ix_canons_name_trgm",
        "canons",
        ["name"],
        postgresql_using="gin",
        postgresql_ops={"name": "gin_trgm_ops"},
    )

    # Recreate FK constraints from other tables pointing to canons
    op.create_foreign_key(
        "fk_canon_packages_canon_id_canons",
        "canon_packages",
        "canons",
        ["canon_id"],
        ["id"],
    )
    op.create_foreign_key(
        "fk_tea_ranks_canon_id_canons", "tea_ranks", "canons", ["canon_id"], ["id"]
    )


================================================
FILE: alembic/versions/20250529_2345-recreate_canon_foreign_keys.py
================================================
"""recreate_canon_foreign_keys

Revision ID: 3de32bb99a71
Revises: 542d79f30fc9
Create Date: 2025-05-29 23:45:12.372951

"""

from collections.abc import Sequence

from alembic import op

# revision identifiers, used by Alembic.
revision: str = "3de32bb99a71"
down_revision: str | None = "542d79f30fc9"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None


def upgrade() -> None:
    """
    Recreate FK constraints pointing to canons table after data population
    Run this AFTER your canonicalization script has populated the canons table
    """
    # First, clean up any orphaned records in referencing tables
    # (Optional: uncomment if you want to auto-clean orphaned data)
    # op.execute("""
    #     DELETE FROM canon_packages
    #     WHERE canon_id NOT IN (SELECT id FROM canons)
    # """)
    # op.execute("""
    #     DELETE FROM tea_ranks
    #     WHERE canon_id NOT IN (SELECT id FROM canons)
    # """)

    # Recreate FK constraints
    op.create_foreign_key(
        "fk_canon_packages_canon_id_canons",
        "canon_packages",
        "canons",
        ["canon_id"],
        ["id"],
    )
    op.create_foreign_key(
        "fk_tea_ranks_canon_id_canons", "tea_ranks", "canons", ["canon_id"], ["id"]
    )


def downgrade() -> None:
    """
    Drop FK constraints pointing to canons table
    """
    op.drop_constraint(
        "fk_canon_packages_canon_id_canons", "canon_packages", type_="foreignkey"
    )
    op.drop_constraint("fk_tea_ranks_canon_id_canons", "tea_ranks", type_="foreignkey")


================================================
FILE: api/.dockerignore
================================================
/target
.git
.gitignore
README.md


================================================
FILE: api/.gitignore
================================================
/target
**/*.rs.bk
Cargo.lock
.env


================================================
FILE: api/Cargo.toml
================================================
[package]
name = "chai-api"
version = "1.3.0"
edition = "2021"
authors = ["Jacob Heider <jacob@pkgx.dev>"]
description = "A simple REST API for the CHAI database"
readme = "README.md"
license = "MIT"
repository = "https://github.com/teaxyz/chai-oss"

[dependencies]
uuid = { version = "1.11.0", features = ["serde", "v4"] }
actix-web = "4.3"
dotenv = "0.15"
tokio = { version = "1", features = ["full"] }
log = "0.4"
env_logger = "0.10"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
chrono = { version = "0.4", features = ["serde"] }
tokio-postgres = { version = "0.7", features = [
  "with-serde_json-1",
  "with-chrono-0_4",
  "with-uuid-1",
] }
deadpool-postgres = "0.10.0"
url = "2.5.2"
dashmap = "6.1.0"


================================================
FILE: api/Dockerfile
================================================
FROM --platform=linux/amd64 lukemathwalker/cargo-chef:latest-rust-1.82.0 as chef
WORKDIR /app

FROM chef as planner
COPY . .
RUN cargo chef prepare --recipe-path recipe.json

FROM chef as builder
COPY --from=planner /app/recipe.json recipe.json
RUN cargo chef cook --release --recipe-path recipe.json
COPY . .
RUN cargo build --release

FROM debian:bookworm-slim as runtime
WORKDIR /app
RUN apt-get update && apt-get install -y curl openssl ca-certificates && rm -rf /var/lib/apt/lists/*
COPY --from=builder /app/target/release/chai-api /usr/local/bin
ENV DATABASE_URL=postgresql://postgres:s3cr3t@db:5432/chai
EXPOSE 8080
CMD ["chai-api"]


================================================
FILE: api/README.md
================================================
# CHAI API

CHAI API is a REST API service for accessing the CHAI database, which contains package
manager data.

## Features

- List all tables in the database
- Fetch paginated data from any table
- Heartbeat endpoint for health checks
- Search deduplicated packages by name

## Requirements

- Rust 1.67 or later
- PostgreSQL database

## API Endpoints

### Health Check

```
GET /heartbeat
```

Returns the health status of the API and database connection.

**Response (Success)**

```txt
OK - Database connection is healthy
```

**Response (Failure - Database query failed):**

```txt
Database query failed
```

**Response (Failure - Database connection failed):**

```txt
Failed to get database connection
```

### List Tables

```
GET /tables
```

Returns a paginated list of all available tables in the database.

**Query Parameters**

- `page` (optional): Page number (default: 1)
- `limit` (optional): Number of items per page (default: 200)

**Response**

```json
{
  "data": [
    "legacy_dependencies",
    "versions",
    "canons_old",
    "tea_rank_runs",
    "canons",
    "licenses",
    "canon_packages",
    "users",
    "load_history",
    "tea_ranks",
    "alembic_version",
    "sources",
    "package_managers",
    "url_types",
    "urls",
    "packages",
    "package_urls",
    "user_packages",
    "dependencies",
    "depends_on_types",
    "user_versions",
    "canon_packages_old",
    "tea_rank_old"
  ],
  "limit": 200,
  "page": 1,
  "total_count": 23,
  "total_pages": 1
}
```

### Get Table Data

```
GET /{table}
```

Returns paginated data from the specified table.

**Path Parameters**

- `table`: Name of the table to query (see available tables in List Tables response)

**Query Parameters**

- `page` (optional): Page number (default: 1)
- `limit` (optional): Number of items per page (default: 200)

**Response**

```json
{
    "table": "packages",
    "total_count": 166459,
    "page": 1,
    "limit": 2,
    "total_pages": 83230,
    "columns": [
        ...
    ],
    "data": [
        {
            "created_at": "2024-12-27 08:04:03.991832",
            "derived_id": "...",
            "id": "...",
            "import_id": "...",
            "name": "...",
            "package_manager_id": "...",
            "readme": "...",
            "updated_at": "2024-12-27 08:04:03.991832"
        },
        ...
    ]
}
```

### Get Table Row By ID

```
GET /{table}/{id}
```

Returns a specific row from the table by its UUID.

**Path Parameters**

- `table`: Name of the table to query
- `id`: UUID of the row to fetch

**Response**

```json
{
  "created_at": "2024-12-27 08:04:03.991832",
  "derived_id": "...",
  "id": "...",
  "import_id": "...",
  "name": "...",
  "package_manager_id": "...",
  "readme": "...",
  "updated_at": "2024-12-27 08:04:03.991832"
}
```

### Get Project

```
GET /project/{id}
```

Returns detailed information about a specific canon by its canonical ID.

**Path Parameters**

- `id`: UUID of the project (canon) to fetch

**Response**

```json
{
  "projectId": "550e8400-e29b-41d4-a716-446655440000",
  "homepage": "https://example.com",
  "name": "example-project",
  "source": "https://github.com/example/project",
  "teaRank": "150",
  "teaRankCalculatedAt": "2024-12-27T08:04:03.991832",
  "packageManagers": ["homebrew", "crates"]
}
```

**Response (Not Found)**

```json
{
  "error": "No row found with id '550e8400-e29b-41d4-a716-446655440000' in table canons"
}
```

### Get Projects Batch

```
POST /project/batch
```

Returns detailed information about multiple projects by their canonical IDs.

**Request Body**

```json
{
  "projectIds": ["uuid1", "uuid2", "..."]
}
```

**Parameters**

- `projectIds`: Array of project UUIDs to include in the leaderboard (required, max 100)

**Example**

```
POST /project/batch
```

**Example Request**

```bash
curl -X POST http://localhost:8080/project/batch \
  -H "Content-Type: application/json" \
  -d '{
    "projectIds": [
      "550e8400-e29b-41d4-a716-446655440000",
      "6ba7b810-9dad-11d1-80b4-00c04fd430c8"
    ]
  }'
```

**Response**

```json
[
  {
    "projectId": "550e8400-e29b-41d4-a716-446655440000",
    "homepage": "https://example.com",
    "name": "example-project",
    "source": "https://github.com/example/project",
    "teaRank": "150",
    "teaRankCalculatedAt": "2024-12-27T08:04:03.991832",
    "packageManagers": ["homebrew", "crates"]
  },
  {
    "projectId": "6ba7b810-9dad-11d1-80b4-00c04fd430c8",
    "homepage": "https://another-example.com",
    "name": "another-project",
    "source": "https://github.com/another/project",
    "teaRank": "75",
    "teaRankCalculatedAt": "2024-12-26T10:15:22.123456",
    "packageManagers": ["debian", "pkgx"]
  }
]
```

**Response (Invalid UUIDs)**

```json
{
  "error": "Invalid UUID format in project IDs"
}
```

### Search Projects

```
GET /project/search/{name}
```

Searches for projects by name using case-insensitive partial matching. Results are
ordered by name length and limited to 10 items.

**Path Parameters**

- `name`: Project name to search for (partial matches supported)

**Example**

```
GET /project/search/python
```

**Response**

```json
[
  {
    "projectId": "550e8400-e29b-41d4-a716-446655440000",
    "homepage": "https://reactjs.org",
    "name": "react",
    "source": "https://github.com/facebook/react",
    "packageManagers": ["homebrew", "npm"]
  },
  {
    "projectId": "6ba7b810-9dad-11d1-80b4-00c04fd430c8",
    "homepage": "https://reactrouter.com",
    "name": "react-router",
    "source": "https://github.com/remix-run/react-router",
    "packageManagers": ["npm"]
  }
]
```

**Response (Empty Search)**

```json
{
  "error": "Search name cannot be empty"
}
```

### Leaderboard

```
POST /leaderboard
```

Returns detailed information about specified projects, ordered by tea rank in descending
order. This endpoint allows filtering by project IDs and limiting the number of results.

**Request Body**

```json
{
  "projectIds": ["uuid1", "uuid2", "..."],
  "limit": 10
}
```

**Parameters**

- `projectIds`: Array of project UUIDs to include in the leaderboard (required, max 100)
- `limit`: Maximum number of results to return (required, 1-100)

**Example Request**

```bash
curl -X POST http://localhost:8080/leaderboard \
  -H "Content-Type: application/json" \
  -d '{
    "projectIds": [
      "1e233f1b-2b49-4ada-9953-1763785fba2c",
      "2c24aa45-4fe2-4f2b-ae58-09d4b9a4ad28"
    ],
    "limit": 2
  }'
```

**Response**

```json
[
  {
    "projectId": "1e233f1b-2b49-4ada-9953-1763785fba2c",
    "homepage": "https://example.com",
    "name": "example-project",
    "source": "https://github.com/example/project",
    "teaRank": "150",
    "teaRankCalculatedAt": "2024-12-27T08:04:03.991832",
    "packageManagers": ["homebrew", "crates"]
  },
  {
    "projectId": "2c24aa45-4fe2-4f2b-ae58-09d4b9a4ad28",
    "homepage": "https://another-example.com",
    "name": "another-project",
    "source": "https://github.com/another/project",
    "teaRank": "75",
    "teaRankCalculatedAt": "2024-12-26T10:15:22.123456",
    "packageManagers": ["debian", "pkgx"]
  }
]
```

**Response (Validation Errors)**

```json
{
  "error": "At least one project ID is required"
}
```

```json
{
  "error": "Too many project IDs (maximum 100 allowed)"
}
```

```json
{
  "error": "Invalid limit 150: must be between 1 and 100"
}
```

## Available Tables

The database contains the following tables:

| Table Name       | Description                                      |
| ---------------- | ------------------------------------------------ |
| alembic_version  | Store the current version of alembic             |
| dependencies     | Package dependencies                             |
| depends_on_types | Types of package dependencies                    |
| licenses         | Package licenses                                 |
| load_history     | Load history                                     |
| package_managers | Package manager information                      |
| package_urls     | Relationship of packages to URLs                 |
| packages         | Package metadata                                 |
| sources          | Package manager sources (homebrew, crates, etc.) |
| url_types        | Types of URLs (homepage, repository, etc.)       |
| urls             | Actual URLs                                      |
| user_packages    | User-package relationships                       |
| user_versions    | User-version relationships                       |
| users            | User (package owner) information                 |
| versions         | Package versions                                 |

By default, the API will be available at `http://localhost:8080`.

## Deployment

The CHAI API is deployed using AWS services with the following stack:

- **Amazon ECR (Elastic Container Registry)** - Container image storage
- **Amazon ECS (Elastic Container Service)** - Container orchestration
- **ECS Service** - Manages running tasks and load balancing
- **ECS Task Definition** - Defines container configuration

### Prerequisites

- AWS CLI configured with appropriate permissions
- Docker installed locally
- Access to the AWS account and ECR repository

### Building and Pushing Docker Image

1. **Get ECR login credentials:**

   ```bash
   aws ecr get-login-password --region <your-region> | docker login --username AWS --password-stdin <account-id>.dkr.ecr.<your-region>.amazonaws.com
   ```

2. **Build the Docker image:**

   ```bash
   docker build -t chai-api .
   ```

3. **Tag the image for ECR:**

   ```bash
   docker tag chai-api:latest <account-id>.dkr.ecr.<your-region>.amazonaws.com/chai-api:latest
   ```

4. **Push the image to ECR:**

   ```bash
   docker push <account-id>.dkr.ecr.<your-region>.amazonaws.com/chai-api:latest
   ```

   > **Note:** Replace `<account-id>` and `<your-region>` with your AWS account ID and region. You can find the exact commands in your ECR repository console under "View push commands".

### Updating Existing ECS Service

If updating the ECS service, you first need to Build and Push the docker image. Then:

```bash
aws ecs update-service --cluster chai-<environment> --service <environment>-chai-api --force-new-deployment
```

### Environment Variables

Ensure the following environment variables are configured in your task definition:

- `DATABASE_URL`: PostgreSQL connection string
- `HOST`: Host to bind to (default: "0.0.0.0")
- `PORT`: Port to listen on (default: "8080")

### Useful AWS Documentation

- [Amazon ECR User Guide](https://docs.aws.amazon.com/ecr/)
- [Amazon ECS Developer Guide](https://docs.aws.amazon.com/ecs/)
- [ECS Task Definitions](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task_definitions.html)
- [ECS Services](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs_services.html)
- [AWS CLI ECS Commands](https://docs.aws.amazon.com/cli/latest/reference/ecs/)

## Tasks

### Format

```bash
cargo fmt --all --
```

### Build

```bash
cargo build --release
```

### Validate

```bash
cargo clippy --all-targets --all-features -- -D warnings
```

### Run

Env: DATABASE_URL=postgresql://postgres:s3cr3t@localhost:5435/chai

```bash
target/release/chai-api
```


================================================
FILE: api/src/app_state.rs
================================================
use dashmap::DashMap;
use deadpool_postgres::Pool;
use serde_json::Value;
use std::sync::Arc;
use std::time::{Duration, Instant};
use uuid::Uuid;

const TTL: Duration = Duration::from_secs(3600); // 1 hour

#[derive(Clone)]
pub struct ProjectCacheEntry {
    pub data: Arc<Value>,
    pub created_at: Instant,
}

impl ProjectCacheEntry {
    pub fn new(data: Value) -> Self {
        Self {
            data: Arc::new(data),
            created_at: Instant::now(),
        }
    }

    pub fn is_expired(&self) -> bool {
        self.created_at.elapsed() > TTL
    }
}

pub struct AppState {
    pub pool: Pool,
    pub tables: Arc<Vec<String>>,
    pub project_cache: Arc<DashMap<Uuid, ProjectCacheEntry>>,
}


================================================
FILE: api/src/db.rs
================================================
use deadpool_postgres::{Config, Pool, Runtime};
use std::env;
use std::sync::Arc;
use tokio_postgres::{Client, NoTls};
use url::Url;

pub async fn create_pool() -> Pool {
    let database_url = env::var("DATABASE_URL").expect("DATABASE_URL must be set");
    let db_url = Url::parse(&database_url).expect("Invalid database URL");

    let mut config = Config::new();
    config.host = db_url.host_str().map(ToOwned::to_owned);
    config.port = db_url.port();
    config.user = Some(db_url.username().to_owned());
    config.password = db_url.password().map(ToOwned::to_owned);
    config.dbname = db_url.path().strip_prefix('/').map(ToOwned::to_owned);

    config
        .create_pool(Some(Runtime::Tokio1), NoTls)
        .expect("Failed to create pool")
}

pub async fn get_tables(client: &Client) -> Vec<String> {
    let rows = client
        .query(
            "SELECT table_name FROM information_schema.tables WHERE table_schema = 'public'",
            &[],
        )
        .await
        .expect("Failed to fetch tables");

    rows.into_iter()
        .map(|row| row.get::<_, String>("table_name"))
        .collect()
}

pub async fn initialize_db() -> (Pool, Arc<Vec<String>>) {
    let pool = create_pool().await;
    let client = pool.get().await.expect("Failed to get client from pool");
    let tables = Arc::new(get_tables(&client).await);
    (pool, tables)
}


================================================
FILE: api/src/handlers.rs
================================================
use actix_web::{get, post, web, HttpResponse, Responder};
use serde::{Deserialize, Serialize};
use serde_json::{json, Value};
use std::sync::Arc;
use tokio_postgres::error::SqlState;
use uuid::Uuid;

use crate::app_state::AppState;
use crate::utils::{get_cached_projects, get_column_names, rows_to_json, Pagination};

const RESPONSE_LIMIT: i64 = 1000;

#[derive(Deserialize)]
pub struct PaginationParams {
    pub page: Option<i64>,
    pub limit: Option<i64>,
}

#[derive(Serialize)]
struct PaginatedResponse {
    table: String,
    total_count: i64,
    page: i64,
    limit: i64,
    total_pages: i64,
    columns: Vec<String>,
    data: Vec<Value>,
}

#[derive(Deserialize)]
pub struct LeaderboardRequest {
    #[serde(rename = "projectIds")]
    pub project_ids: Option<Vec<Uuid>>,
    pub limit: i64,
}

#[derive(Deserialize)]
pub struct ProjectBatchRequest {
    #[serde(rename = "projectIds")]
    pub project_ids: Vec<Uuid>,
}

pub fn check_table_exists(table: &str, tables: &[String]) -> Option<HttpResponse> {
    if !tables.contains(&table.to_string()) {
        Some(HttpResponse::NotFound().json(json!({
            "error": format!("Table '{}' not found", table),
            "valid_tables": tables,
            "help": "Refer to the API documentation for valid table names."
        })))
    } else {
        None
    }
}

#[get("/tables")]
pub async fn list_tables(
    query: web::Query<PaginationParams>,
    data: web::Data<AppState>,
) -> impl Responder {
    let total_count = data.tables.len() as i64;
    let pagination = Pagination::new(query, total_count);

    let start = pagination.offset as usize;
    let end = (start + pagination.limit as usize).min(data.tables.len());

    let paginated_tables = &data.tables[start..end];

    HttpResponse::Ok().json(json!({
        "total_count": total_count,
        "page": pagination.page,
        "limit": pagination.limit,
        "total_pages": pagination.total_pages,
        "data": paginated_tables,
    }))
}

#[get("/heartbeat")]
pub async fn heartbeat(data: web::Data<AppState>) -> impl Responder {
    match data.pool.get().await {
        Ok(client) => match client.query_one("SELECT 1", &[]).await {
            Ok(_) => HttpResponse::Ok().body("OK - Database connection is healthy"),
            Err(e) => {
                log::error!("Database query failed: {e}");
                HttpResponse::InternalServerError().body("Database query failed")
            }
        },
        Err(e) => {
            log::error!("Failed to get database connection: {e}");
            HttpResponse::InternalServerError().body("Failed to get database connection")
        }
    }
}

#[get("/tables/{table}")]
pub async fn get_table(
    path: web::Path<String>,
    query: web::Query<PaginationParams>,
    data: web::Data<AppState>,
) -> impl Responder {
    let table = path.into_inner();
    if let Some(response) = check_table_exists(&table, &data.tables) {
        return response;
    }

    let count_query = format!("SELECT COUNT(*) FROM {table}");
    match data.pool.get().await {
        Ok(client) => match client.query_one(&count_query, &[]).await {
            Ok(count_row) => {
                let total_count: i64 = count_row.get(0);
                let pagination = Pagination::new(query, total_count);

                let data_query = format!("SELECT * FROM {table} LIMIT $1 OFFSET $2");
                match client
                    .query(&data_query, &[&pagination.limit, &pagination.offset])
                    .await
                {
                    Ok(rows) => {
                        let columns = get_column_names(&rows);
                        let data = rows_to_json(&rows);
                        let response = PaginatedResponse {
                            table,
                            total_count,
                            page: pagination.page,
                            limit: pagination.limit,
                            total_pages: pagination.total_pages,
                            columns,
                            data,
                        };
                        HttpResponse::Ok().json(response)
                    }
                    Err(e) => {
                        log::error!("Database query error: {e}");
                        HttpResponse::InternalServerError().json(json!({
                            "error": "An error occurred while querying the database"
                        }))
                    }
                }
            }
            Err(e) => {
                log::error!("Database count query error: {e}");
                HttpResponse::InternalServerError().json(json!({
                    "error": "An error occurred while counting rows in the database"
                }))
            }
        },
        Err(e) => {
            log::error!("Failed to get database connection: {e}");
            HttpResponse::InternalServerError().body("Failed to get database connection")
        }
    }
}

#[get("/tables/{table}/{id}")]
pub async fn get_table_row(
    path: web::Path<(String, Uuid)>,
    data: web::Data<AppState>,
) -> impl Responder {
    let (table_name, id) = path.into_inner();

    if let Some(response) = check_table_exists(&table_name, &data.tables) {
        return response;
    }

    let query = format!("SELECT * FROM {table_name} WHERE id = $1");

    match data.pool.get().await {
        Ok(client) => match client.query_one(&query, &[&id]).await {
            Ok(row) => {
                let json = rows_to_json(&[row]);
                let value = json.first().unwrap();
                HttpResponse::Ok().json(value)
            }
            Err(e) => {
                if e.as_db_error()
                    .is_some_and(|db_err| db_err.code() == &SqlState::UNDEFINED_TABLE)
                {
                    HttpResponse::NotFound().json(json!({
                        "error": format!("Table '{}' not found", table_name)
                    }))
                } else if e
                    .as_db_error()
                    .is_some_and(|e| e.code() == &SqlState::NO_DATA_FOUND)
                {
                    HttpResponse::NotFound().json(json!({
                        "error": format!("No row found with id '{}' in table '{}'", id, table_name)
                    }))
                } else {
                    HttpResponse::InternalServerError().json(json!({
                        "error": format!("Database error: {}", e)
                    }))
                }
            }
        },
        Err(e) => {
            log::error!("Failed to get database connection: {e}");
            HttpResponse::InternalServerError().body("Failed to get database connection")
        }
    }
}

#[get("/project/{id}")]
pub async fn get_project(path: web::Path<Uuid>, data: web::Data<AppState>) -> impl Responder {
    // Check if the table exists
    let id = path.into_inner();

    // Construct the query
    let query = r#"
        WITH base AS MATERIALIZED (
            SELECT
                c.id,
                u_homepage.url AS homepage,
                c.name,
                COALESCE(tr_latest.rank, '0') AS "teaRank",
                tr_latest.created_at AS "teaRankCalculatedAt",
                (
                SELECT ARRAY_AGG(DISTINCT s.type)
                FROM canon_packages cp2
                JOIN packages p2           ON cp2.package_id = p2.id
                JOIN package_managers pm2  ON p2.package_manager_id = pm2.id
                JOIN sources s             ON pm2.source_id = s.id
                WHERE cp2.canon_id = c.id
                ) AS "packageManagers",
                (
                SELECT COUNT(*)::bigint
                FROM legacy_dependencies ld
                JOIN canon_packages cp_out ON cp_out.package_id = ld.package_id
                WHERE cp_out.canon_id = c.id
                ) AS "dependenciesCount",
                (
                SELECT COUNT(*)::bigint
                FROM legacy_dependencies ld
                JOIN canon_packages cp_in ON cp_in.package_id = ld.dependency_id
                WHERE cp_in.canon_id = c.id
                ) AS "dependentsCount"
            FROM canons c
            JOIN urls u_homepage ON c.url_id = u_homepage.id
            LEFT JOIN LATERAL (
                SELECT tr.rank, tr.created_at
                FROM tea_ranks tr
                WHERE tr.canon_id = c.id
                ORDER BY tr.created_at DESC
                LIMIT 1
            ) tr_latest ON TRUE
            WHERE c.id = $1
        )
        SELECT DISTINCT ON (b.id)
            b.id                AS "projectId",
            b.homepage,
            b.name,
            u_source.url        AS source,
            b."teaRank",
            b."teaRankCalculatedAt",
            b."packageManagers",
            b."dependenciesCount",
            b."dependentsCount"
        FROM base b
        JOIN canon_packages cp ON cp.canon_id = b.id
        JOIN package_urls pu   ON pu.package_id = cp.package_id
        JOIN urls u_source     ON pu.url_id = u_source.id
        JOIN url_types ut      ON ut.id = u_source.url_type_id
        WHERE ut.name = 'source'
        ORDER BY b.id, b."teaRankCalculatedAt" DESC, u_source.url;"#;

    match data.pool.get().await {
        Ok(client) => match client.query_one(query, &[&id]).await {
            Ok(row) => {
                let json = rows_to_json(&[row]);
                let value = json.first().unwrap();
                HttpResponse::Ok().json(value)
            }
            Err(e) => {
                if e.as_db_error()
                    .is_some_and(|e| e.code() == &SqlState::NO_DATA_FOUND)
                {
                    HttpResponse::NotFound().json(json!({
                        "error": format!("No row found with id '{:?}' in table canons", id)
                    }))
                } else {
                    HttpResponse::InternalServerError().json(json!({
                        "error": format!("Database error: {}", e)
                    }))
                }
            }
        },
        Err(e) => {
            log::error!("Failed to get database connection: {e}");
            HttpResponse::InternalServerError().body("Failed to get database connection")
        }
    }
}

#[post("/project/batch")]
pub async fn list_projects_by_id(
    req: web::Json<ProjectBatchRequest>,
    data: web::Data<AppState>,
) -> impl Responder {
    if req.project_ids.is_empty() {
        return HttpResponse::BadRequest().json(json!({
            "error": "No project IDs provided"
        }));
    }

    // Construct the query
    let query = r#"
        SELECT DISTINCT ON (c.id)
            c.id AS "projectId",
            u_homepage.url AS homepage,
            c.name,
            u_source.url AS source,
            COALESCE(tr.rank,'0') AS "teaRank",
            tr.created_at AS "teaRankCalculatedAt",
            (
                SELECT ARRAY_AGG(DISTINCT s.type)
                FROM canon_packages cp2
                JOIN packages p2 ON cp2.package_id = p2.id
                JOIN package_managers pm2 ON p2.package_manager_id = pm2.id
                JOIN sources s ON pm2.source_id = s.id
                WHERE cp2.canon_id = c.id
            ) AS "packageManagers"
        FROM canons c
        JOIN urls u_homepage ON u_homepage.id = c.url_id
        JOIN canon_packages cp ON cp.canon_id = c.id
        JOIN package_urls pu ON pu.package_id = cp.package_id
        JOIN urls u_source ON pu.url_id = u_source.id
        JOIN url_types ut ON ut.id = u_source.url_type_id
        LEFT JOIN tea_ranks tr ON tr.canon_id = c.id
        WHERE c.id = ANY($1::uuid[]) AND ut.name = 'source'
        ORDER BY c.id, tr.created_at DESC, u_source.url;"#;

    match data.pool.get().await {
        Ok(client) => match client.query(query, &[&req.project_ids]).await {
            Ok(rows) => {
                let json = rows_to_json(&rows);
                HttpResponse::Ok().json(json)
            }
            Err(e) => {
                log::error!("Database query error: {e}");
                HttpResponse::InternalServerError().json(json!({
                    "error": format!("Database error: {}", e)
                }))
            }
        },
        Err(e) => {
            log::error!("Failed to get database connection: {e}");
            HttpResponse::InternalServerError().body("Failed to get database connection")
        }
    }
}

#[get("/project/search/{name}")]
pub async fn list_projects_by_name(
    path: web::Path<String>,
    data: web::Data<AppState>,
) -> impl Responder {
    let name = path.into_inner();

    if name.trim().is_empty() {
        return HttpResponse::BadRequest().json(json!({
            "error": "Search name cannot be empty"
        }));
    }

    let wildcard = format!("%{name}%");

    // Construct the query
    let query = r#"
        SELECT *
        FROM (
            SELECT DISTINCT ON (c.id)
                c.id AS "projectId",
                u_homepage.url AS homepage,
                c.name,
                u_source.url AS source,
                (
                    SELECT ARRAY_AGG(DISTINCT s.type)
                    FROM canon_packages cp2
                    JOIN packages p2 ON cp2.package_id = p2.id
                    JOIN package_managers pm2 ON p2.package_manager_id = pm2.id
                    JOIN sources s ON pm2.source_id = s.id
                    WHERE cp2.canon_id = c.id
                ) AS "packageManagers"
            FROM canons c
            JOIN urls u_homepage ON c.url_id = u_homepage.id
            JOIN canon_packages cp ON cp.canon_id = c.id
            JOIN package_urls pu ON pu.package_id = cp.package_id
            JOIN urls u_source ON pu.url_id = u_source.id
            JOIN url_types ut_source ON ut_source.id = u_source.url_type_id
            WHERE ut_source.name = 'source' AND (c.name ILIKE $1)
            ORDER BY c.id
        ) sub
        ORDER BY LENGTH(name), name
        LIMIT 10;"#;

    match data.pool.get().await {
        Ok(client) => match client.query(query, &[&wildcard]).await {
            Ok(rows) => {
                let json = rows_to_json(&rows);
                HttpResponse::Ok().json(json)
            }
            Err(e) => {
                log::error!("Database query error: {e}");
                HttpResponse::InternalServerError().json(json!({
                    "error": format!("Database error: {e}")
                }))
            }
        },
        Err(e) => {
            log::error!("Failed to get database connection: {e}");
            HttpResponse::InternalServerError().body("Failed to get database connection")
        }
    }
}

#[post("/leaderboard")]
pub async fn get_leaderboard(
    req: web::Json<LeaderboardRequest>,
    data: web::Data<AppState>,
) -> impl Responder {
    let limit = req.limit.clamp(1, RESPONSE_LIMIT);

    let Some(project_ids) = req.project_ids.as_deref() else {
        return get_top_projects(data, limit).await;
    };

    if project_ids.len() > RESPONSE_LIMIT as usize {
        return HttpResponse::BadRequest().json(json!({
            "error": format!("Too many project IDs (maximum {} allowed)", RESPONSE_LIMIT)
        }));
    }

    // Get cached projects and identify missing ones
    let (cached_projects, missing_ids) =
        get_cached_projects(data.project_cache.clone(), project_ids);

    // If we have all projects cached, return them sorted
    if missing_ids.is_empty() {
        return sort_truncate_and_return(cached_projects, limit);
    }

    // Query for missing projects
    let query = r#"
        SELECT *
        FROM (
            SELECT DISTINCT ON (c.id)
                c.id AS "projectId",
                u_homepage.url AS homepage,
                c.name,
                u_source.url AS source,
                COALESCE(tr.rank,'0') AS "teaRank",
                tr.created_at AS "teaRankCalculatedAt",
                (
                    SELECT ARRAY_AGG(DISTINCT s.type)
                    FROM canon_packages cp2
                    JOIN packages p2 ON cp2.package_id = p2.id
                    JOIN package_managers pm2 ON p2.package_manager_id = pm2.id
                    JOIN sources s ON pm2.source_id = s.id
                    WHERE cp2.canon_id = c.id
                ) AS "packageManagers"
            FROM canons c
            JOIN urls u_homepage ON c.url_id = u_homepage.id
            JOIN canon_packages cp ON cp.canon_id = c.id
            JOIN package_urls pu ON pu.package_id = cp.package_id
            JOIN urls u_source ON pu.url_id = u_source.id
            JOIN url_types ut_source ON ut_source.id = u_source.url_type_id
            LEFT JOIN tea_ranks tr ON tr.canon_id = c.id
            WHERE
            c.id = ANY($1::uuid[])
            AND ut_source.name = 'source'
            AND CAST(tr.rank AS NUMERIC) > 0
            ORDER BY c.id, tr.created_at DESC, u_source.url
        ) sub
        ORDER BY CAST("teaRank" AS NUMERIC) DESC NULLS LAST
        LIMIT $2"#;

    match data.pool.get().await {
        Ok(client) => match client.query(query, &[&missing_ids, &limit]).await {
            Ok(rows) => {
                let fresh_projects = rows_to_json(&rows);

                // Cache the fresh projects
                for project in &fresh_projects {
                    if let Some(project_id) = project.get("projectId").and_then(|v| v.as_str()) {
                        if let Ok(uuid) = Uuid::parse_str(project_id) {
                            data.project_cache.insert(
                                uuid,
                                crate::app_state::ProjectCacheEntry::new(project.clone()),
                            );
                        } else {
                            log::warn!("Failed to parse project ID as UUID: {}", project_id);
                        }
                    } else {
                        log::warn!("No projectId found in project: {:?}", project);
                    }
                }

                // Combine cached and fresh projects - keep Arc<Value> for cached ones
                let mut all_projects: Vec<Arc<Value>> = cached_projects;

                // Convert fresh projects to Arc<Value> to match the type
                let fresh_arcs: Vec<Arc<Value>> =
                    fresh_projects.into_iter().map(Arc::new).collect();
                all_projects.extend(fresh_arcs);

                sort_truncate_and_return(all_projects, limit)
            }
            Err(e) => {
                log::error!("Database query error: {e}");
                HttpResponse::InternalServerError().json(json!({
                    "error": format!("Database error: {}", e)
                }))
            }
        },
        Err(e) => {
            log::error!("Failed to get database connection: {e}");
            HttpResponse::InternalServerError().body("Failed to get database connection")
        }
    }
}

// Helper function to sort, truncate, and return the final response
fn sort_truncate_and_return(projects: Vec<Arc<Value>>, limit: i64) -> actix_web::HttpResponse {
    let mut projects = projects;

    // Sort projects by teaRank (descending) - Arc<Value> derefs to Value
    projects.sort_by(|a, b| {
        let rank_a = a
            .get("teaRank")
            .and_then(|v| v.as_str())
            .and_then(|s| s.parse::<i64>().ok())
            .unwrap_or(0);
        let rank_b = b
            .get("teaRank")
            .and_then(|v| v.as_str())
            .and_then(|s| s.parse::<i64>().ok())
            .unwrap_or(0);
        rank_b.cmp(&rank_a)
    });

    // Apply limit
    projects.truncate(limit as usize);

    // Convert to Vec<Value> only for the final response - Arc<Value> doesn't implement Serialize
    let final_projects: Vec<Value> = projects
        .into_iter()
        .map(|arc_val| (*arc_val).clone())
        .collect();
    actix_web::HttpResponse::Ok().json(final_projects)
}

async fn get_top_projects(data: web::Data<AppState>, limit: i64) -> HttpResponse {
    // get client
    let Ok(client) = data.pool.get().await else {
        return HttpResponse::InternalServerError().body("Failed to get database connection");
    };

    // get latest run id
    let run_query = r#"SELECT MAX(run) from tea_rank_runs"#;
    let Ok(run_row) = client.query_one(run_query, &[]).await else {
        return HttpResponse::InternalServerError().body("Failed to get latest run");
    };
    let run: i32 = run_row.get(0);

    // get top projects (1-RESPONSE_LIMIT)
    let top_ranks_query = r#"SELECT
            canon_id as "projectId",
            name,
            rank as "teaRank",
            (
                SELECT ARRAY_AGG(DISTINCT s.type)
                FROM canon_packages cp2
                JOIN packages p2 ON cp2.package_id = p2.id
                JOIN package_managers pm2 ON p2.package_manager_id = pm2.id
                JOIN sources s ON pm2.source_id = s.id
                WHERE cp2.canon_id = canon_id
            ) AS "packageManagers"
        FROM
            tea_ranks
            JOIN canons ON canon_id = canons.id
        WHERE
            tea_rank_run = $1
        ORDER BY
            rank DESC
        LIMIT $2"#;
    let Ok(top_ranks) = client
        .query(top_ranks_query, &[&run, &limit.clamp(1, RESPONSE_LIMIT)])
        .await
    else {
        return HttpResponse::InternalServerError().json(json!({
            "error": "Failed to fetch top ranks"
        }));
    };
    let json = rows_to_json(&top_ranks);
    HttpResponse::Ok().json(json)
}


================================================
FILE: api/src/logging.rs
================================================
use env_logger::Env;

pub fn setup_logger() {
    env_logger::init_from_env(Env::default().default_filter_or("info"));
}

pub struct Logger;

impl Logger {
    pub fn default() -> actix_web::middleware::Logger {
        actix_web::middleware::Logger::new("%a '%r' %s %b '%{Referer}i' '%{User-Agent}i' %T")
    }
}


================================================
FILE: api/src/main.rs
================================================
mod app_state;
mod db;
mod handlers;
mod logging;
mod utils;

use actix_web::{web, App, HttpServer};
use dashmap::DashMap;
use dotenv::dotenv;
use std::env;
use std::sync::Arc;

use crate::app_state::AppState;
use crate::handlers::{
    get_leaderboard, get_project, get_table, get_table_row, heartbeat, list_projects_by_id,
    list_projects_by_name, list_tables,
};
use crate::logging::setup_logger;

#[actix_web::main]
async fn main() -> std::io::Result<()> {
    dotenv().ok();
    setup_logger();

    let host = env::var("HOST").unwrap_or_else(|_| "0.0.0.0".to_string());
    let port = env::var("PORT").unwrap_or_else(|_| "8080".to_string());
    let bind_address = format!("{host}:{port}");

    let (pool, tables) = db::initialize_db().await;
    // Cache for project data to reduce database load on leaderboard routes
    let project_cache = Arc::new(DashMap::new());

    log::info!("Available tables: {tables:?}");
    log::info!("Starting server at http://{bind_address}");

    HttpServer::new(move || {
        App::new()
            .wrap(logging::Logger::default())
            .app_data(web::Data::new(AppState {
                pool: pool.clone(),
                tables: Arc::clone(&tables),
                project_cache: Arc::clone(&project_cache),
            }))
            // HEALTH
            .service(heartbeat)
            // SIMPLE CRUD OPERATIONS
            .service(list_tables)
            .service(get_table)
            .service(get_table_row)
            // BUSINESS LOGIC
            .service(get_leaderboard)
            .service(get_project)
            .service(list_projects_by_id)
            .service(list_projects_by_name)
    })
    .bind(&bind_address)?
    .run()
    .await
}


================================================
FILE: api/src/utils.rs
================================================
use actix_web::web::Query;
use chrono::{DateTime, NaiveDate, NaiveDateTime, Utc};
use dashmap::DashMap;
use serde_json::{json, Value};
use std::sync::Arc;
use tokio_postgres::{types::Type, Row};
use uuid::Uuid;

use crate::{app_state::ProjectCacheEntry, handlers::PaginationParams};

pub fn get_column_names(rows: &[Row]) -> Vec<String> {
    if let Some(row) = rows.first() {
        row.columns()
            .iter()
            .map(|col| col.name().to_string())
            .collect()
    } else {
        vec![]
    }
}

pub fn convert_optional_to_json<T, E>(result: Result<Option<T>, E>) -> Value
where
    T: serde::Serialize,
{
    match result {
        Ok(Some(val)) => json!(val),
        _ => Value::Null,
    }
}

pub fn rows_to_json(rows: &[Row]) -> Vec<Value> {
    rows.iter()
        .map(|row| {
            let mut map = serde_json::Map::new();
            for (i, column) in row.columns().iter().enumerate() {
                let value: Value = match *column.type_() {
                    Type::INT2 => convert_optional_to_json(row.try_get::<_, Option<i16>>(i)),
                    Type::INT4 => convert_optional_to_json(row.try_get::<_, Option<i32>>(i)),
                    Type::INT8 => convert_optional_to_json(row.try_get::<_, Option<i64>>(i)),
                    Type::FLOAT4 => convert_optional_to_json(row.try_get::<_, Option<f32>>(i)),
                    Type::FLOAT8 => convert_optional_to_json(row.try_get::<_, Option<f64>>(i)),
                    Type::BOOL => convert_optional_to_json(row.try_get::<_, Option<bool>>(i)),
                    Type::VARCHAR | Type::TEXT | Type::BPCHAR => {
                        convert_optional_to_json(row.try_get::<_, Option<String>>(i))
                    }
                    Type::TIMESTAMP => {
                        convert_optional_to_json(row.try_get::<_, Option<NaiveDateTime>>(i))
                    }
                    Type::TIMESTAMPTZ => {
                        convert_optional_to_json(row.try_get::<_, Option<DateTime<Utc>>>(i))
                    }
                    Type::DATE => convert_optional_to_json(row.try_get::<_, Option<NaiveDate>>(i)),
                    Type::JSON | Type::JSONB => {
                        convert_optional_to_json(row.try_get::<_, Option<serde_json::Value>>(i))
                    }
                    Type::UUID => convert_optional_to_json(row.try_get::<_, Option<Uuid>>(i)),
                    Type::TEXT_ARRAY | Type::VARCHAR_ARRAY => {
                        convert_optional_to_json(row.try_get::<_, Option<Vec<String>>>(i))
                    }
                    _ => {
                        // For unsupported types, try to convert to string
                        convert_optional_to_json(row.try_get::<_, Option<String>>(i))
                    }
                };
                map.insert(column.name().to_string(), value);
            }
            Value::Object(map)
        })
        .collect()
}

pub struct Pagination {
    pub page: i64,
    pub limit: i64,
    pub offset: i64,
    pub total_pages: i64,
}

impl Pagination {
    pub fn new(query: Query<PaginationParams>, total_count: i64) -> Self {
        let limit = query.limit.unwrap_or(200).clamp(1, 1000);
        let total_pages = (total_count as f64 / limit as f64).ceil() as i64;

        let page = query.page.unwrap_or(1).clamp(1, total_pages);

        let offset = (page - 1) * limit;
        Self {
            page,
            limit,
            offset,
            total_pages,
        }
    }
}

// Helper function to get cached projects and return missing ones
pub fn get_cached_projects(
    cache: Arc<DashMap<Uuid, ProjectCacheEntry>>,
    project_ids: &[Uuid],
) -> (Vec<Arc<Value>>, Vec<Uuid>) {
    let mut cached_projects = Vec::new();
    let mut missing_ids = Vec::new();

    for &project_id in project_ids {
        if let Some(entry) = cache.get(&project_id) {
            if !entry.is_expired() {
                cached_projects.push(entry.data.clone());
                continue;
            }
        }
        missing_ids.push(project_id);
    }

    (cached_projects, missing_ids)
}


================================================
FILE: core/README.md
================================================
# Core Tools for CHAI Python Loaders

This directory contains a set of core tools and utilities to facilitate loading the CHAI
database with package manager data, using python helpers. These tools provide a common
foundation for fetching, transforming, and loading data from various package managers
into the database.

In general, the flow of an indexer is:

1. Fetch data from source
2. Fetch data from CHAI
3. Do a giant diff
4. Create new entries, updated entries for each package model in the db

The best example is [Homebrew's](../package_managers/homebrew/main.py).

## Key Components

### [Config](config.py)

Entrypoint for all loaders, generally has all the information needed for the pipeline
to start. Includes:

- Execution flags:
  - `FETCH` determines whether we request the data from source
  - `TEST` enables a test mode, to test specific portions of the pipeline
  - `NO_CACHE` to determine whether we save the intermediate pipeline files
- Package Manager flags
  - `pm_id` gets the package manager id from the db, that we'd run the pipeline for
  - `source` is the data source for that package manager. `SOURCES` defines the map.

The next 4 configuration classes retrieve the IDs for url types (homepage, documentation,
etc.), dependency types (build, runtime, etc.), user types (crates user, github user),
and all the package manager IDs as well.

### 2. [Database](db.py)

The DB class offers a set of methods for interacting with the database, including:

- Running queries to build a cache for the current state of the graph for a package
  manager
- Batching utilities
- Some load functions

### 3. [Fetcher](fetcher.py)

The Fetcher class provides functionality for downloading and extracting data from
package manager sources. It supports:

- Downloading tarball / GZIP / Git files
- Extracting contents to a specified directory
- Maintaining a "latest" symlink so we always know where to look

### 4. [Logger](logger.py)

A custom logging utility that provides consistent logging across all loaders.

### 5. [Models](models/__init__.py)

SQLAlchemy models representing the database schema, including:

- Package, Version, User, License, DependsOn, and other relevant tables

> [!NOTE]
>
> This is currently used to actually generate the migrations as well

### 6. [Scheduler](scheduler.py)

A scheduling utility that allows loaders to run at specified intervals.

### 7. [Transformer](transformer.py)

The Transformer class provides a base for creating package manager-specific transformers.
It includes:

- Methods for locating and reading input files
- Placeholder methods for transforming data into the required format

## Usage

To create a new loader for a package manager:

1. Create a new directory under `package_managers/` for your package manager.
1. Implement a fetcher that inherits from the base Fetcher, that is able to fetch
   the raw data from the package manager's source.
1. Implement a custom Transformer class that inherits from the base Transformer, that
   figures out how to map the raw data provided by the package managers into the data
   model described in the [models](models/__init__.py) module.
1. Load the cache for data currently in CHAI for that package manager
1. Implement a diff to compare them
1. Pass diff objects (lists of new / updated data points) to `db.ingest`
1. Orchestrate via a `main.py`.

Example usage can be found in the [crates](../package_managers/crates) loader.

# TODOs

- [ ] `Diff` currently has separate implementations for Homebrew and Crates, and could
      be centralized - open to help here!


================================================
FILE: core/config.py
================================================
from enum import Enum

from sqlalchemy import UUID

from core.db import ConfigDB
from core.logger import Logger
from core.utils import env_vars

logger = Logger("config")


class PackageManager(Enum):
    CRATES = "crates"
    HOMEBREW = "homebrew"
    DEBIAN = "debian"
    NPM = "npm"
    PKGX = "pkgx"


TEST = env_vars("TEST", "false")
FETCH = env_vars("FETCH", "true")
NO_CACHE = env_vars("NO_CACHE", "true")
SOURCES = {
    PackageManager.CRATES: "https://static.crates.io/db-dump.tar.gz",
    PackageManager.HOMEBREW: "https://formulae.brew.sh/api/formula.json",
    # for debian, sources contains the urls, packages is tied to the linux distribution
    PackageManager.DEBIAN: [
        "https://ftp.debian.org/debian/dists/stable/main/binary-amd64/Packages.gz",
        "https://ftp.debian.org/debian/dists/stable/main/source/Sources.gz",
    ],
    PackageManager.NPM: "https://registry.npmjs.org/-/all",  # fake for now
    PackageManager.PKGX: "https://github.com/pkgxdev/pantry.git",
}

# The three configuration values URLTypes, DependencyTypes, and UserTypes will query the
# DB to get the respective IDs. If the values don't exist in the database, they will
# raise an AttributeError (None has no attribute id) at the start


class ExecConf:
    test: bool
    fetch: bool
    no_cache: bool

    def __init__(self) -> None:
        self.test = TEST
        self.fetch = FETCH
        self.no_cache = NO_CACHE

    def __str__(self):
        return f"ExecConf(test={self.test},fetch={self.fetch},no_cache={self.no_cache})"


class PMConf:
    package_manager: PackageManager
    pm_id: str
    source: str | list[str]

    def __init__(self, pm: PackageManager, db: ConfigDB):
        self.package_manager = pm
        self.pm_id = db.select_package_manager_by_name(pm.value).id
        self.source = SOURCES[pm]

    def __str__(self):
        return f"PMConf(pm_id={self.pm_id},source={self.source})"


class URLTypes:
    homepage: UUID
    repository: UUID
    documentation: UUID
    source: UUID

    def __init__(self, db: ConfigDB):
        self.load_url_types(db)

    def load_url_types(self, db: ConfigDB) -> None:
        self.homepage = db.select_url_types_by_name("homepage").id
        self.repository = db.select_url_types_by_name("repository").id
        self.documentation = db.select_url_types_by_name("documentation").id
        self.source = db.select_url_types_by_name("source").id

    def __str__(self) -> str:
        return f"URLs(homepage={self.homepage},repo={self.repository},docs={self.documentation},src={self.source})"


class UserTypes:
    crates: UUID
    github: UUID

    def __init__(self, db: ConfigDB):
        self.crates = db.select_source_by_name("crates").id
        self.github = db.select_source_by_name("github").id

    def __str__(self) -> str:
        return f"UserTypes(crates={self.crates},github={self.github})"


class DependencyTypes:
    build: UUID
    development: UUID
    runtime: UUID
    test: UUID
    optional: UUID
    recommended: UUID

    def __init__(self, db: ConfigDB):
        self.build = db.select_dependency_type_by_name("build").id
        self.development = db.select_dependency_type_by_name("development").id
        self.runtime = db.select_dependency_type_by_name("runtime").id
        self.test = db.select_dependency_type_by_name("test").id
        self.optional = db.select_dependency_type_by_name("optional").id
        self.recommended = db.select_dependency_type_by_name("recommended").id

    def __str__(self) -> str:
        return f"DependencyTypes(build={self.build},development={self.development},runtime={self.runtime},test={self.test},optional={self.optional},recommended={self.recommended})"


class PackageManagers:
    crates: UUID
    homebrew: UUID
    debian: UUID
    npm: UUID
    pkgx: UUID

    def __init__(self, db: ConfigDB):
        self.crates = db.select_package_manager_by_name("crates").id
        self.homebrew = db.select_package_manager_by_name("homebrew").id
        self.debian = db.select_package_manager_by_name("debian").id
        self.npm = db.select_package_manager_by_name("npm").id
        self.pkgx = db.select_package_manager_by_name("pkgx").id


class Config:
    exec_config: ExecConf
    pm_config: PMConf
    url_types: URLTypes
    user_types: UserTypes
    dependency_types: DependencyTypes
    package_managers: PackageManagers

    def __init__(self, pm: PackageManager) -> None:
        db = ConfigDB()
        self.exec_config = ExecConf()
        self.pm_config = PMConf(pm, db)
        self.url_types = URLTypes(db)
        self.user_types = UserTypes(db)
        self.dependency_types = DependencyTypes(db)
        self.package_managers = PackageManagers(db)

    def __str__(self):
        return f"Config(exec_config={self.exec_config}, pm_config={self.pm_config}, url_types={self.url_types}, user_types={self.user_types}, dependency_types={self.dependency_types}, package_managers={self.package_managers})"


================================================
FILE: core/db.py
================================================
import os
from collections import defaultdict
from datetime import datetime
from typing import Any
from uuid import UUID

from sqlalchemy import Insert, Result, Update, create_engine, select, update
from sqlalchemy.dialects import postgresql
from sqlalchemy.orm import Session, sessionmaker

from core.logger import Logger
from core.models import (
    URL,
    BaseModel,
    DependsOnType,
    LegacyDependency,
    LoadHistory,
    Package,
    PackageManager,
    PackageURL,
    Source,
    URLType,
)
from core.structs import CurrentGraph, CurrentURLs, URLKey

CHAI_DATABASE_URL = os.getenv("CHAI_DATABASE_URL")
DEFAULT_BATCH_SIZE = 10000


class DB:
    def __init__(self, logger_name: str):
        self.logger = Logger(logger_name)
        self.engine = create_engine(CHAI_DATABASE_URL)
        self.session = sessionmaker(self.engine)
        self.logger.debug("connected")
        self.now: datetime = datetime.now()

    def insert_load_history(self, package_manager_id: str):
        with self.session() as session:
            session.add(LoadHistory(package_manager_id=package_manager_id))
            session.commit()

    def print_statement(self, stmt):
        dialect = postgresql.dialect()
        compiled_stmt = stmt.compile(
            dialect=dialect, compile_kwargs={"literal_binds": True}
        )
        self.logger.log(str(compiled_stmt))

    def close(self):
        self.logger.debug("closing")
        self.engine.dispose()

    def search_names(
        self, package_names: list[str], package_managers: list[UUID]
    ) -> list[str]:
        """Return Homepage URLs for packages with these names"""

        with self.session() as session:
            results = (
                session.query(Package, URL)
                .join(PackageURL, PackageURL.package_id == Package.id)
                .join(URL, PackageURL.url_id == URL.id)
                .join(URLType, URL.url_type_id == URLType.id)
                .filter(URLType.name == "homepage")
                .filter(Package.name.in_(package_names))
                .filter(Package.package_manager_id.in_(package_managers))
                .all()
            )

            # build a mapping
            name_to_url = {result.Package.name: result.URL.url for result in results}

            # return in the order preserved by the input (bc its relevant)
            # and account for the fact that some names might not have a URL
            return [
                name_to_url.get(name) for name in package_names if name in name_to_url
            ]

    def current_graph(self, package_manager_id: UUID) -> CurrentGraph:
        """Get the packages and dependencies for a specific package manager"""
        package_map: dict[str, Package] = defaultdict(Package)
        dependencies: dict[UUID, set[LegacyDependency]] = defaultdict(set)

        stmt = (
            select(Package, LegacyDependency)
            .select_from(Package)
            .join(
                LegacyDependency,
                onclause=Package.id == LegacyDependency.package_id,
                isouter=True,
            )
            .where(Package.package_manager_id == package_manager_id)
        )

        with self.session() as session:
            result: Result[tuple[Package, LegacyDependency]] = session.execute(stmt)

            for pkg, dep in result:
                # add to the package map, by import_id, which is usually name
                package_map[pkg.import_id] = pkg

                # and add to the dependencies map as well
                if dep:  # check because it's an outer join, so might be None
                    dependencies[pkg.id].add(dep)

        self.logger.debug(f"Cached {len(package_map)} packages")

        return CurrentGraph(package_map, dependencies)

    def _build_current_urls(
        self, result: Result[tuple[Package, PackageURL, URL]]
    ) -> CurrentURLs:
        """Build the CurrentURLs result based on a query of Package, PackageURL, URL"""
        url_map: dict[URLKey, URL] = {}
        package_urls: dict[UUID, set[PackageURL]] = {}

        for pkg, pkg_url, url in result:
            url_key = URLKey(url.url, url.url_type_id)
            url_map[url_key] = url

            # since it's a left join, we need to check if pkg is None
            if pkg is not None:
                if pkg.id not in package_urls:
                    package_urls[pkg.id] = set()
                package_urls[pkg.id].add(pkg_url)

        self.logger.debug(f"Cached {len(url_map)} URLs")
        self.logger.debug(f"Cached {len(package_urls)} package URLs")

        return CurrentURLs(url_map=url_map, package_urls=package_urls)

    def current_urls(self, urls: set[str]) -> CurrentURLs:
        """Get the Package URL Relationships for a given set of URLs"""
        stmt = (
            select(Package, PackageURL, URL)
            .select_from(URL)
            .join(PackageURL, PackageURL.url_id == URL.id, isouter=True)
            .join(Package, Package.id == PackageURL.package_id, isouter=True)
            .where(URL.url.in_(urls))
        )

        with self.session() as session:
            result: Result[tuple[Package, PackageURL, URL]] = session.execute(stmt)
            return self._build_current_urls(result)

    def all_current_urls(self) -> CurrentURLs:
        """Get all the URLs and the Packages they are tied to from CHAI"""
        stmt = (
            select(Package, PackageURL, URL)
            .select_from(URL)
            .join(PackageURL, PackageURL.url_id == URL.id, isouter=True)
            .join(Package, Package.id == PackageURL.package_id, isouter=True)
        )
        with self.session() as session:
            result: Result[tuple[Package, PackageURL, URL]] = session.execute(stmt)
            return self._build_current_urls(result)

    def load(
        self, session: Session, data: list[BaseModel], stmt: Insert | Update
    ) -> None:
        """Smart batching utility"""
        if data:
            values: list[dict[str, str | UUID | datetime]] = [
                obj.to_dict_v2() for obj in data
            ]
            self.batch(session, stmt, values, DEFAULT_BATCH_SIZE)

    def batch(
        self,
        session: Session,
        stmt: Insert | Update,
        values: list[dict[str, str | UUID | datetime]],
        batch_size: int = DEFAULT_BATCH_SIZE,
    ):
        """
        Utility to batch insert or update, but doesn't commit!

        Inputs:
        - session: the sessionmaker object, so create it before you use it
        - stmt: the insert or update statement to construct, without the values
        - values: the values to insert or update - generally using to_dict_v2()
        - batch_size: the batch size, defaults to 10000
        - obj_name: the name of the object being inserted for logging
        """
        for i in range(0, len(values), batch_size):
            batch = values[i : i + batch_size]
            self.logger.log(
                f"Processing batch {i // batch_size + 1}/{(len(values) - 1) // batch_size + 1} ({len(batch)})"
            )
            value_stmt = stmt.values(batch)
            session.execute(value_stmt)

    def ingest(
        self,
        new_packages: list[Package],
        new_urls: list[URL],
        new_package_urls: list[PackageURL],
        new_deps: list[LegacyDependency],
        removed_deps: list[LegacyDependency],
        updated_packages: list[dict[str, UUID | str | datetime]],
        updated_package_urls: list[dict[str, UUID | datetime]],
    ) -> None:
        """
        Ingests a list of new, updated, and deleted objects from the database.

        It flushes after each insert, to ensure that the database is in a valid
        state prior to the next batch of ingestions.

        TODO: if pkey is set in the values provided, then sqlalchemy will use
        psycopg2.executemany(...), which is quicker, but still the slowest of all
        execution options provided by psycopg2. The best one is execute_values, which
        is **only** available for inserts, and can be used as follows:

        looks like sqlalchemy^2 has a native support for insertmanyvalues, but
        **I think** we need to pass the data in as a list[dict] instead of objects.
        See: https://docs.sqlalchemy.org/en/20/core/connections.html#engine-insertmanyvalues


        Inputs:
        - new_packages: a list of new Package objects
        - new_urls: a list of new URL objects
        - new_package_urls: a list of new PackageURL objects
        - updated_packages: a list of updated Package objects
        - updated_package_urls: a list of updated PackageURL objects
        - new_deps: a list of new LegacyDependency objects
        - removed_deps: a list of removed LegacyDependency objects
        """
        self.logger.log("-" * 100)
        self.logger.log("Going to load")
        self.logger.log(f"New packages: {len(new_packages)}")
        self.logger.log(f"New URLs: {len(new_urls)}")
        self.logger.log(f"New package URLs: {len(new_package_urls)}")
        self.logger.log(f"Updated packages: {len(updated_packages)}")
        self.logger.log(f"Updated package URLs: {len(updated_package_urls)}")
        self.logger.log(f"New dependencies: {len(new_deps)}")
        self.logger.log(f"Removed dependencies: {len(removed_deps)}")
        self.logger.log("-" * 100)

        with self.session() as session:
            try:
                # 1. Add all new objects with granular flushes
                self.execute(session, new_packages, "add", "new packages")
                self.execute(session, new_urls, "add", "new urls")
                self.execute(session, new_package_urls, "add", "new package urls")
                self.execute(session, removed_deps, "delete", "removed dependencies")
                self.execute(session, new_deps, "add", "new dependencies")

                # 2. Perform updates (these will now operate on a flushed state)
                if updated_packages:
                    session.execute(update(Package), updated_packages)

                if updated_package_urls:
                    session.execute(update(PackageURL), updated_package_urls)

                # 3. Commit all changes
                session.commit()
                self.logger.log("✅ Successfully ingested")
            except Exception as e:
                self.logger.error(f"Error during batched ingest: {e}")
                session.rollback()
                raise e

    def execute(self, session: Session, data: list[Any], method: str, log: str) -> None:
        if method not in ["add", "delete"]:
            raise ValueError(f"db.execute({method}) is unknown")

        if data:
            match method:
                case "add":
                    session.add_all(data)
                case "delete":
                    self.remove_all(session, data)
                case _:
                    pass

            session.flush()
        self.logger.log(f"✅ {len(data):,} {log}")

    def remove_all(self, session: Session, data: list[Any]) -> None:
        for item in data:
            session.delete(item)


class ConfigDB(DB):
    def __init__(self):
        super().__init__("ConfigDB")

    def select_package_manager_by_name(self, package_manager: str) -> PackageManager:
        with self.session() as session:
            result = (
                session.query(PackageManager)
                .join(Source, PackageManager.source_id == Source.id)
                .filter(Source.type == package_manager)
                .first()
            )

            if result:
                return result

            raise ValueError(f"Package manager {package_manager} not found")

    def select_url_types_by_name(self, name: str) -> URLType:
        with self.session() as session:
            return session.query(URLType).filter(URLType.name == name).first()

    def select_source_by_name(self, name: str) -> Source:
        with self.session() as session:
            return session.query(Source).filter(Source.type == name).first()

    def select_dependency_type_by_name(self, name: str) -> DependsOnType:
        with self.session() as session:
            return (
                session.query(DependsOnType).filter(DependsOnType.name == name).first()
            )


if __name__ == "__main__":
    db = ConfigDB()
    print(db.search_names(["elfutils.org", "elfutils"]))


================================================
FILE: core/fetcher.py
================================================
import gzip
import json
import os
import tarfile
from dataclasses import dataclass
from datetime import datetime
from io import BytesIO
from shutil import rmtree
from typing import Any

import git
from requests import get

from core.logger import Logger


@dataclass
class Data:
    file_path: str
    file_name: str
    content: Any  # json or bytes


class Fetcher:
    def __init__(self, name: str, source: str, no_cache: bool, test: bool):
        self.name = name
        self.source = source
        self.output = f"data/{name}"
        self.logger = Logger(f"{name}_fetcher")
        self.no_cache = no_cache
        self.test = test

    def write(self, files: list[Data]):
        """generic write function for some collection of files"""

        # prep the file location
        now = datetime.now().strftime("%Y-%m-%d")
        root_path = f"{self.output}/{now}"

        # write
        # it can be anything - json, tarball, etc.
        for item in files:
            self.logger.debug(f"writing {item.file_path}/{item.file_name}")
            file_path = item.file_path
            file_name = item.file_name
            file_content = item.content
            full_path = os.path.join(root_path, file_path)

            # make sure the path exists
            os.makedirs(full_path, exist_ok=True)

            with open(os.path.join(full_path, file_name), "wb") as f:
                if isinstance(file_content, list | dict):
                    # Convert JSON-serializable objects to string
                    file_content = json.dumps(file_content)

                # Ensure content is bytes before writing
                if isinstance(file_content, str):
                    file_content = file_content.encode("utf-8")

                f.write(file_content)

        # update the latest symlink
        self.update_symlink(now)

    def update_symlink(self, latest_path: str):
        latest_symlink = f"{self.output}/latest"
        if os.path.islink(latest_symlink):
            self.logger.debug(f"removing existing symlink {latest_symlink}")
            os.remove(latest_symlink)

        self.logger.debug(f"creating symlink {latest_symlink} -> {latest_path}")
        os.symlink(latest_path, latest_symlink)

    def fetch(self) -> bytes:
        if not self.source:
            raise ValueError("source is not set")

        response = get(self.source)
        try:
            response.raise_for_status()
        except Exception as e:
            self.logger.error(f"error fetching {self.source}: {e}")
            raise e
        return response.content

    def cleanup(self):
        if self.no_cache:
            # TODO: it's deleting everything here
            rmtree(self.output, ignore_errors=True)
            os.makedirs(self.output, exist_ok=True)


class TarballFetcher(Fetcher):
    def __init__(self, name: str, source: str, no_cache: bool, test: bool):
        super().__init__(name, source, no_cache, test)

    def fetch(self) -> list[Data]:
        content = super().fetch()

        bytes_io_object = BytesIO(content)
        bytes_io_object.seek(0)

        files = []
        with tarfile.open(fileobj=bytes_io_object, mode="r:gz") as tar:
            for member in tar.getmembers():
                if member.isfile():
                    bytes_io_file = BytesIO(tar.extractfile(member).read())
                    destination_key = member.name
                    file_name = destination_key.split("/")[-1]
                    file_path = "/".join(destination_key.split("/")[:-1])
                    self.logger.debug(f"file_path/file_name: {file_path}/{file_name}")
                    files.append(Data(file_path, file_name, bytes_io_file.read()))

        return files


# GZip compresses only one file, so file_path and file_name are not used
class GZipFetcher(Fetcher):
    def __init__(
        self,
        name: str,
        source: str,
        no_cache: bool,
        test: bool,
        file_path: str,
        file_name: str,
    ):
        super().__init__(name, source, no_cache, test)
        self.file_path = file_path
        self.file_name = file_name

    def fetch(self) -> list[Data]:
        content = super().fetch()
        files = []

        decompressed = gzip.decompress(content).decode("utf-8")
        files.append(Data(self.file_path, self.file_name, decompressed.encode("utf-8")))

        return files


class GitFetcher(Fetcher):
    def __init__(self, name: str, source: str, no_cache: bool, test: bool):
        super().__init__(name, source, no_cache, test)

    def fetch(self) -> str:
        # assume that source is a git repo whose main branch needs to be cloned
        # we'll first prep the output directory, then clone, then update the symlinks
        # NOTE: this is what the main Fetcher does, but slightly modified for this case

        now = datetime.now().strftime("%Y-%m-%d")
        root_dir = f"{self.output}/{now}"
        os.makedirs(root_dir, exist_ok=True)

        # now, clone the repo here
        self.logger.debug(f"Cloning {self.source} into {root_dir}...")
        _ = git.Repo.clone_from(self.source, root_dir, depth=1, branch="main")
        self.logger.debug("Repository cloned successfully.")

        # update the symlinks
        self.update_symlink(now)

        return root_dir


================================================
FILE: core/logger.py
================================================
import sys
import time
import traceback

from core.utils import env_vars

DEBUG = env_vars("DEBUG", "false")


def as_minutes(seconds: float) -> float:
    return seconds / 60


class Logger:
    SILENT = 0
    NORMAL = 1
    VERBOSE = 2

    def __init__(
        self, name: str, mode: int = NORMAL, start: float | None = None
    ) -> None:
        self.name = name
        self.start = start or time.time()
        self.mode = Logger.VERBOSE if DEBUG else mode

    def print(self, msg: str):
        print(f"{self.time_diff():.2f}: [{self.name}]: {msg}", flush=True)

    def error(self, message):
        self.print(f"[ERROR]: {message}")

    def log(self, message):
        if self.mode >= Logger.NORMAL:
            self.print(f"{message}")

    def debug(self, message):
        if self.mode >= Logger.VERBOSE:
            self.print(f"[DEBUG]: {message}")

    def warn(self, message):
        if self.mode >= Logger.NORMAL:
            self.print(f"[WARN]: {message}")

    def is_verbose(self):
        return self.mode >= Logger.VERBOSE

    def time_diff(self):
        return time.time() - self.start

    def exception(self):
        exc_type, exc_value, exc_traceback = sys.exc_info()
        self.print(f"{exc_type.__name__}: {exc_value}")
        self.print("***** TRACEBACK *****")
        print(f"{''.join(traceback.format_tb(exc_traceback))}")

    def info(self, message):
        self.log(message)

    def warning(self, message):
        self.warn(message)


================================================
FILE: core/models/__init__.py
================================================
# __init__.py
from __future__ import annotations

from datetime import datetime

from sqlalchemy import (
    Column,
    DateTime,
    ForeignKey,
    Index,
    Integer,
    MetaData,
    String,
    UniqueConstraint,
    func,
)
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import Mapped, declarative_base, relationship

naming_convention = {
    "ix": "ix_%(column_0_label)s",
    "uq": "uq_%(table_name)s_%(column_0_name)s",
    "ck": "ck_%(table_name)s_%(constraint_name)s",
    "fk": "fk_%(table_name)s_%(column_0_name)s_%(referred_table_name)s",
    "pk": "pk_%(table_name)s",
}
metadata = MetaData(naming_convention=naming_convention)


class BaseModel:
    # we have UUIDs, strings, datetimes, ints, and floats
    def to_dict_v2(self) -> dict[str, str | UUID | datetime | int | float]:
        """Return a dictionary of all non-None attributes."""
        return {
            attr: getattr(self, attr)
            for attr in self.__table__.columns.keys()  # noqa: SIM118
            if getattr(self, attr) is not None
        }


Base = declarative_base(metadata=metadata, cls=BaseModel)


class Package(Base):
    __tablename__ = "packages"
    __table_args__ = (
        UniqueConstraint(
            "package_manager_id", "import_id", name="uq_package_manager_import_id"
        ),
    )

    id = Column(
        UUID(as_uuid=True),
        primary_key=True,
        default=func.uuid_generate_v4(),
        server_default=func.uuid_generate_v4(),
    )
    derived_id = Column(String, nullable=False, unique=True)  # package_manager/name
    name = Column(String, nullable=False, index=True)
    package_manager_id = Column(
        UUID(as_uuid=True),
        ForeignKey("package_managers.id"),
        nullable=False,
        index=True,
    )
    import_id = Column(String, nullable=False, index=True)
    readme = Column(String, nullable=True)
    created_at = Column(
        DateTime, nullable=False, default=func.now(), server_default=func.now()
    )
    updated_at = Column(
        DateTime, nullable=False, default=func.now(), server_default=func.now()
    )

    def to_dict(self):
        return {
            "derived_id": self.derived_id,
            "name": self.name,
            "package_manager_id": self.package_manager_id,
            "import_id": self.import_id,
            "readme": self.readme,
        }


class PackageManager(Base):
    __tablename__ = "package_managers"
    id = Column(
        UUID(as_uuid=True),
        primary_key=True,
        default=func.uuid_generate_v4(),
        server_default=func.uuid_generate_v4(),
    )
    source_id = Column(
        UUID(as_uuid=True), ForeignKey("sources.id"), nullable=False, unique=True
    )
    created_at = Column(
        DateTime, nullable=False, default=func.now(), server_default=func.now()
    )
    updated_at = Column(
        DateTime, nullable=False, default=func.now(), server_default=func.now()
    )


class Version(Base):
    __tablename__ = "versions"
    __table_args__ = (
        UniqueConstraint("package_id", "version", name="uq_package_version"),
    )
    id = Column(
        UUID(as_uuid=True),
        primary_key=True,
        default=func.uuid_generate_v4(),
        server_default=func.uuid_generate_v4(),
    )
    package_id = Column(
        UUID(as_uuid=True), ForeignKey("packages.id"), nullable=False, index=True
    )
    version = Column(String, nullable=False, index=True)
    import_id = Column(String, nullable=False, index=True)
    # size, published_at, license_id, downloads, checksum
    # are nullable bc not all sources provide them
    size = Column(Integer, nullable=True, index=True)
    published_at = Column(DateTime, nullable=True, index=True)
    license_id = Column(
        UUID(as_uuid=True), ForeignKey("licenses.id"), nullable=True, index=True
    )
    downloads = Column(Integer, nullable=True, index=True)
    checksum = Column(String, nullable=True)
    created_at = Column(
        DateTime, nullable=False, default=func.now(), server_default=func.now()
    )
    updated_at = Column(
        DateTime, nullable=False, default=func.now(), server_default=func.now()
    )

    package: Mapped[Package] = relationship()
    license: Mapped[License] = relationship()

    def to_dict(self):
        return {
            "package_id": self.package_id,
            "version": self.version,
            "import_id": self.import_id,
            "size": self.size,
            "published_at": self.published_at,
            "license_id": self.license_id,
            "downloads": self.downloads,
            "checksum": self.checksum,
        }


class License(Base):
    __tablename__ = "licenses"
    id = Column(
        UUID(as_uuid=True),
        primary_key=True,
        default=func.uuid_generate_v4(),
        server_default=func.uuid_generate_v4(),
    )
    name = Column(String, nullable=False, unique=True, index=True)
    created_at = Column(
        DateTime, nullable=False, default=func.now(), server_default=func.now()
    )
    updated_at = Column(
        DateTime, nullable=False, default=func.now(), server_default=func.now()
    )


class DependsOn(Base):
    __tablename__ = "dependencies"
    __table_args__ = (
        UniqueConstraint(
            "version_id",
            "dependency_id",
            "dependency_type_id",
            name="uq_version_dependency_type",
        ),
    )
    id = Column(
        UUID(as_uuid=True),
        primary_key=True,
        default=func.uuid_generate_v4(),
        server_default=func.uuid_generate_v4(),
    )
    version_id = Column(
        UUID(as_uuid=True), ForeignKey("versions.id"), nullable=False, index=True
    )
    dependency_id = Column(
        UUID(as_uuid=True), ForeignKey("packages.id"), nullable=False, index=True
    )
    # ideally, these are non-nullable but diff package managers are picky about this
    dependency_type_id = Column(
        UUID(as_uuid=True), ForeignKey("depends_on_types.id"), nullable=True, index=True
    )
    semver_range = Column(String, nullable=True)
    created_at = Column(
        DateTime, nullable=False, default=func.now(), server_default=func.now()
    )
    updated_at = Column(
        DateTime, nullable=False, default=func.now(), server_default=func.now()
    )

    version: Mapped[Version] = relationship()
    dependency: Mapped[Package] = relationship()
    dependency_type: Mapped[DependsOnType] = relationship()

    def to_dict(self):
        return {
            "version_id": self.version_id,
            "dependency_id": self.dependency_id,
            # "dependency_type_id": self.dependency_type_id,
            "semver_range": self.semver_range,
        }


class DependsOnType(Base):
    __tablename__ = "depends_on_types"
    id = Column(
        UUID(as_uuid=True),
        primary_key=True,
        default=func.uuid_generate_v4(),
        server_default=func.uuid_generate_v4(),
    )
    name = Column(String, nullable=False, unique=True, index=True)
    created_at = Column(
        DateTime, nullable=False, default=func.now(), server_default=func.now()
    )
    updated_at = Column(
        DateTime, nullable=False, default=func.now(), server_default=func.now()
    )


class LoadHistory(Base):
    __tablename__ = "load_history"
    id = Column(
        UUID(as_uuid=True),
        primary_key=True,
        default=func.uuid_generate_v4(),
        server_default=func.uuid_generate_v4(),
    )
    package_manager_id = Column(
        UUID(as_uuid=True), ForeignKey("package_managers.id"), nullable=False
    )
    created_at = Column(
        DateTime, nullable=False, default=func.now(), server_default=func.now()
    )
    updated_at = Column(
        DateTime, nullable=False, default=func.now(), server_default=func.now()
    )


# authoritative source of truth for all our sources
class Source(Base):
    __tablename__ = "sources"
    id = Column(
        UUID(as_uuid=True),
        primary_key=True,
        default=func.uuid_generate_v4(),
        server_default=func.uuid_generate_v4(),
    )
    type = Column(String, nullable=False, unique=True)
    created_at = Column(
        DateTime, nullable=False, default=func.now(), server_default=func.now()
    )
    updated_at = Column(
        DateTime, nullable=False, default=func.now(), server_default=func.now()
    )


# this is a collection of all the different type of URLs
class URL(Base):
    __tablename__ = "urls"
    __table_args__ = (UniqueConstraint("url_type_id", "url", name="uq_url_type_url"),)
    id = Column(
        UUID(as_uuid=True),
        primary_key=True,
        default=func.uuid_generate_v4(),
        server_default=func.uuid_generate_v4(),
    )
    url_trgm_idx = Index(
        "ix_urls_url_trgm",
        "url",
        postgresql_using="gin",
        postgresql_ops={"url": "gin_trgm_ops"},
    )
    url = Column(String, nullable=False)
    url_type_id = Column(
        UUID(as_uuid=True), ForeignKey("url_types.id"), nullable=False, index=True
    )
    created_at = Column(
        DateTime, nullable=False, default=func.now(), server_default=func.now()
    )
    updated_at = Column(
        DateTime, nullable=False, default=func.now(), server_default=func.now()
    )

    def to_dict(self):
        return {"url": self.url, "url_type_id": self.url_type_id}


# homepage, repository, documentation, etc.
class URLType(Base):
    __tablename__ = "url_types"
    id = Column(
        UUID(as_uuid=True),
        primary_key=True,
        default=func.uuid_generate_v4(),
        server_default=func.uuid_generate_v4(),
    )
    name = Column(String, nullable=False, unique=True)
    created_at = Column(
        DateTime, nullable=False, default=func.now(), server_default=func.now()
    )
    updated_at = Column(
        DateTime, nullable=False, default=func.now(), server_default=func.now()
    )


class User(Base):
    __tablename__ = "users"
    __table_args__ = (
        UniqueConstraint("source_id", "import_id", name="uq_source_import_id"),
    )
    id = Column(
        UUID(as_uuid=True),
        primary_key=True,
        default=func.uuid_generate_v4(),
        server_default=func.uuid_generate_v4(),
    )
    username = Column(String, nullable=False, index=True)
    source_id = Column(
        UUID(as_uuid=True), ForeignKey("sources.id"), nullable=False, index=True
    )
    import_id = Column(String, nullable=False, unique=False, index=True)
    created_at = Column(
        DateTime, nullable=False, default=func.now(), server_default=func.now()
    )
    updated_at = Column(
        DateTime, nullable=False, default=func.now(), server_default=func.now()
    )

    def to_dict(self):
        return {
            "username": self.username,
            "source_id": self.source_id,
            "import_id": self.import_id,
        }


class UserVersion(Base):
    __tablename__ = "user_versions"
    __table_args__ = (
        UniqueConstraint("user_id", "version_id", name="uq_user_version"),
    )
    id = Column(
        UUID(as_uuid=True),
        primary_key=True,
        default=func.uuid_generate_v4(),
        server_default=func.uuid_generate_v4(),
    )
    user_id = Column(
        UUID(as_uuid=True), ForeignKey("users.id"), nullable=False, index=True
    )
    version_id = Column(
        UUID(as_uuid=True), ForeignKey("versions.id"), nullable=False, index=True
    )
    created_at = Column(
        DateTime, nullable=False, default=func.now(), server_default=func.now()
    )
    updated_at = Column(
        DateTime, nullable=False, default=func.now(), server_default=func.now()
    )

    def to_dict(self):
        return {
            "user_id": self.user_id,
            "version_id": self.version_id,
        }


class UserPackage(Base):
    __tablename__ = "user_packages"
    __table_args__ = (
        UniqueConstraint("user_id", "package_id", name="uq_user_package"),
    )
    id = Column(
        UUID(as_uuid=True),
        primary_key=True,
        default=func.uuid_generate_v4(),
        server_default=func.uuid_generate_v4(),
    )
    user_id = Column(
        UUID(as_uuid=True), ForeignKey("users.id"), nullable=False, index=True
    )
    package_id = Column(
        UUID(as_uuid=True), ForeignKey("packages.id"), nullable=False, index=True
    )
    created_at = Column(
        DateTime, nullable=False, default=func.now(), server_default=func.now()
    )
    updated_at = Column(
        DateTime, nullable=False, default=func.now(), server_default=func.now()
    )

    def to_dict(self):
        return {
            "user_id": self.user_id,
            "package_id": self.package_id,
        }


class PackageURL(Base):
    __tablename__ = "package_urls"
    __table_args__ = (UniqueConstraint("package_id", "url_id", name="uq_package_url"),)
    id = Column(
        UUID(as_uuid=True),
        primary_key=True,
        default=func.uuid_generate_v4(),
        server_default=func.uuid_generate_v4(),
    )
    package_id = Column(
        UUID(as_uuid=True), ForeignKey("packages.id"), nullable=False, index=True
    )
    url_id = Column(
        UUID(as_uuid=True), ForeignKey("urls.id"), nullable=False, index=True
    )
    created_at = Column(
        DateTime, nullable=False, default=func.now(), server_default=func.now()
    )
    updated_at = Column(
        DateTime, nullable=False, default=func.now(), server_default=func.now()
    )

    # TODO: deprecated
    def to_dict(self):
        return {
            "package_id": self.package_id,
            "url_id": self.url_id,
        }


class LegacyDependency(Base):
    __tablename__ = "legacy_dependencies"
    __table_args__ = (
        UniqueConstraint("package_id", "dependency_id", name="uq_package_dependency"),
    )
    id = Column(Integer, primary_key=True)
    package_id = Column(
        UUID(as_uuid=True), ForeignKey("packages.id"), nullable=False, index=True
    )
    dependency_id = Column(
        UUID(as_uuid=True), ForeignKey("packages.id"), nullable=False, index=True
    )
    dependency_type_id = Column(
        UUID(as_uuid=True),
        ForeignKey("depends_on_types.id"),
        nullable=False,
        index=True,
    )
    semver_range = Column(String, nullable=True)
    created_at = Column(
        DateTime, nullable=False, default=func.now(), server_default=func.now()
    )
    updated_at = Column(
        DateTime, nullable=False, default=func.now(), server_default=func.now()
    )


class Canon(Base):
    __tablename__ = "canons"
    id = Column(
        UUID(as_uuid=True),
        primary_key=True,
        default=func.uuid_generate_v4(),
        server_default=func.uuid_generate_v4(),
    )
    url_id = Column(
        UUID(as_uuid=True),
        ForeignKey("urls.id"),
        nullable=False,
        index=True,
        unique=True,
    )
    name_trgm_idx = Index(
        "ix_canons_name_trgm",
        "name",
        postgresql_using="gin",
        postgresql_ops={"name": "gin_trgm_ops"},
    )
    name = Column(String, nullable=False)
    created_at = Column(
        DateTime, nullable=False, default=func.now(), server_default=func.now()
    )
    updated_at = Column(
        DateTime, nullable=False, default=func.now(), server_default=func.now()
    )

    url: Mapped[URL] = relationship()


class CanonPackage(Base):
    __tablename__ = "canon_packages"
    id = Column(UUID(as_uuid=True), primary_key=True)
    canon_id = Column(
        UUID(as_uuid=True), ForeignKey("canons.id"), nullable=False, index=True
    )
    package_id = Column(
        UUID(as_uuid=True),
        ForeignKey("packages.id"),
        nullable=False,
        index=True,
        unique=True,
    )
    created_at = Column(
        DateTime, nullable=False, default=func.now(), server_default=func.now()
    )
    updated_at = Column(
        DateTime, nullable=False, default=func.now(), server_default=func.now()
    )


class TeaRankRun(Base):
    __tablename__ = "tea_rank_runs"
    id = Column(
        UUID(as_uuid=True),
        primary_key=True,
        default=func.uuid_generate_v4(),
        server_default=func.uuid_generate_v4(),
    )
    run = Column(Integer, nullable=False)
    split_ratio = Column(String, nullable=False)
    created_at = Column(
        DateTime, nullable=False, default=func.now(), server_default=func.now()
    )


class TeaRank(Base):
    __tablename__ = "tea_ranks"
    id = Column(
        UUID(as_uuid=True),
        primary_key=True,
        default=func.uuid_generate_v4(),
        server_default=func.uuid_generate_v4(),
    )
    tea_rank_run = Column(Integer, nullable=False, index=True)
    canon_id = Column(
        UUID(as_uuid=True), ForeignKey("canons.id"), nullable=False, index=True
    )
    rank = Column(String, nullable=False)
    created_at = Column(
        DateTime, nullable=False, default=func.now(), server_default=func.now()
    )


================================================
FILE: core/requirements.txt
================================================
# This file was autogenerated by uv via the following command:
#    uv pip compile --group indexers -o core/requirements.txt
alembic==1.13.2
    # via chai (pyproject.toml:indexers)
certifi==2025.4.26
    # via
    #   chai (pyproject.toml:indexers)
    #   requests
charset-normalizer==3.4.2
    # via
    #   chai (pyproject.toml:indexers)
    #   requests
filelock==3.18.0
    # via tldextract
gitdb==4.0.12
    # via gitpython
gitpython==3.1.44
    # via chai (pyproject.toml:indexers)
idna==3.10
    # via
    #   chai (pyproject.toml:indexers)
    #   requests
    #   tldextract
mako==1.3.10
    # via alembic
markupsafe==3.0.2
    # via mako
permalint==0.1.15
    # via chai (pyproject.toml:indexers)
psycopg2-binary==2.9.10
    # via chai (pyproject.toml:indexers)
pyyaml==6.0.2
    # via chai (pyproject.toml:indexers)
requests==2.32.4
    # via
    #   chai (pyproject.toml:indexers)
    #   requests-file
    #   tldextract
requests-file==2.1.0
    # via tldextract
ruff==0.11.13
    # via permalint
schedule==1.2.2
    # via chai (pyproject.toml:indexers)
smmap==5.0.2
    # via gitdb
sqlalchemy==2.0.41
    # via
    #   chai (pyproject.toml:indexers)
    #   alembic
tldextract==5.3.0
    # via permalint
typing-extensions==4.14.0
    # via
    #   alembic
    #   sqlalchemy
urllib3==2.4.0
    # via
    #   chai (pyproject.toml:indexers)
    #   requests


================================================
FILE: core/scheduler.py
================================================
import time
from collections.abc import Callable
from os import getenv
from threading import Thread

import schedule

from core.logger import Logger

FREQUENCY = int(getenv("FREQUENCY", 24))


class Scheduler:
    def __init__(self, name: str, frequency: int = FREQUENCY):
        self.name = name
        self.frequency = frequency
        self.logger = Logger(f"{name}_scheduler")
        self.job = None
        self.is_running = False

    def start(self, task: Callable, *args):
        self.job = schedule.every(self.frequency).hours.do(task, *args)
        self.is_running = True
        self.logger.log(f"scheduled {self.name} to run every {self.frequency} hours")

        def run_schedule():
            while self.is_running:
                schedule.run_pending()
                time.sleep(1)

        Thread(target=run_schedule, daemon=True).start()

    def stop(self):
        if self.job:
            schedule.cancel_job(self.job)
        self.is_running = False
        self.logger.log(f"stopped {self.name} scheduler")

    def run_now(self, task: Callable, *args):
        self.logger.log(f"running {self.name} now")
        task(*args)


================================================
FILE: core/structs.py
================================================
from dataclasses import dataclass
from datetime import datetime
from uuid import UUID

from core.models import URL, LegacyDependency, Package, PackageURL


@dataclass
class CurrentGraph:
    package_map: dict[str, Package]
    dependencies: dict[UUID, set[LegacyDependency]]


@dataclass(frozen=True)
class URLKey:
    url: str
    url_type_id: UUID


@dataclass
class CurrentURLs:
    url_map: dict[URLKey, URL]  # URL and URL Type ID to URL object
    package_urls: dict[UUID, set[PackageURL]]  # Package ID to PackageURL rows


@dataclass
class Cache:
    package_map: dict[str, Package]
    url_map: dict[URLKey, URL]
    package_urls: dict[UUID, set[PackageURL]]
    dependencies: dict[UUID, set[LegacyDependency]]


@dataclass
class DiffResult:
    new_packages: list[Package]
    new_urls: dict[URLKey, URL]
    new_package_urls: list[PackageURL]
    updated_packages: list[dict[str, UUID | str | datetime]]
    updated_package_urls: list[dict[str, UUID | datetime]]
    new_deps: list[LegacyDependency]
    removed_deps: list[LegacyDependency]


================================================
FILE: core/test.json
================================================
[
    {
        'id': UUID('b3133e5e-6d6b-458b-bd83-bf31032875a4'), 
        'package_id': UUID('7d6c7a3f-2c75-425f-8674-12efd7ce1ca4'), 
        'url_id': UUID('736acfdc-c3c2-4b53-ae6e-102fdd4f375a'), 
        'created_at': datetime.datetime(2025, 5, 19, 17, 5, 10, 255947), 
        'updated_at': datetime.datetime(2025, 5, 19, 17, 5, 10, 255947)
    }, {'id': UUID('a274bb96-a443-46a7-86ed-71c6ee87a89b'), 'package_id': UUID('506f5abc-f385-4cbf-9fb1-cd34053397f4'), 'url_id': UUID('d0346cef-80b0-456c-8de3-eb1b95481bac'), 'created_at': datetime.datetime(2025, 5, 19, 17, 5, 10, 255947), 'updated_at': datetime.datetime(2025, 5, 19, 17, 5, 10, 255947)}, {'id': UUID('17fe8d3a-78d8-42f5-a9f6-7b7abaa37d53'), 'package_id': UUID('a08b41eb-723d-4800-929d-cb6c6d3aeca4'), 'url_id': UUID('334ec74b-dda3-4bb0-99c5-f39abc132f5a'), 'created_at': datetime.datetime(2025, 5, 19, 17, 5, 10, 255947), 'updated_at': datetime.datetime(2025, 5, 19, 17, 5, 10, 255947)}, {'id': UUID('5dd47edf-bc5d-43b5-9acd-d099ae9a22f0'), 'package_id': UUID('624c333c-e303-4d9b-a66e-c499bb3b4806'), 'url_id': UUID('6d866142-e2a9-4da0-96de-b5bfadc7cee9'), 'created_at': datetime.datetime(2025, 5, 19, 17, 5, 10, 255947), 'updated_at': datetime.datetime(2025, 5, 19, 17, 5, 10, 255947)}, {'id': UUID('c924c668-c6cb-4b6b-bac2-b588377a695d'), 'package_id': UUID('2d182e7a-1960-4376-8272-5ce401c369fd'), 'url_id': UUID('359015d5-8807-4cdc-b1c8-a4771b0069fe'), 'created_at': datetime.datetime(2025, 5, 19, 17, 5, 10, 255947), 'updated_at': datetime.datetime(2025, 5, 19, 17, 5, 10, 255947)}]

================================================
FILE: core/transformer.py
================================================
import csv
import os

from permalint import normalize_url, possible_names
from sqlalchemy import UUID

from core.db import DB
from core.logger import Logger

# this is a temporary fix, but sometimes the raw files have weird characters
# and lots of data within certain fields
# this fix allows us to read the files with no hassles
csv.field_size_limit(10000000)


# the transformer class knows what files to open, and provide a generic wrapper
# for the data within the files
# each package manager will have its own transformer, that knows what data needs to be
# extracted for our data model
class Transformer:
    def __init__(self, name: str):
        self.name = name
        self.input = f"data/{name}/latest"
        self.logger = Logger(f"{name}_transformer")
        self.files: dict[str, str] = {
            "projects": "",
            "versions": "",
            "dependencies": "",
            "users": "",
            "urls": "",
        }
        self.url_types: dict[str, UUID] = {}

    def finder(self, file_name: str) -> str:
        input_dir = os.path.realpath(self.input)

        for root, _, files in os.walk(input_dir):
            if file_name in files:
                return os.path.join(root, file_name)
        else:
            self.logger.error(f"{file_name} not found in {input_dir}")
            raise FileNotFoundError(f"Missing {file_name} file")

    def open(self, file_name: str) -> str:
        file_path = self.finder(file_name)
        with open(file_path) as file:
            return file.read()

    def canonicalize(self, url: str) -> str:
        return normalize_url(url)

    def guess(self, db_client: DB, url: str, package_managers: list[UUID]) -> list[str]:
        names = possible_names(url)
        urls = db_client.search_names(names, package_managers)
        return urls


================================================
FILE: core/utils.py
================================================
from os import getenv
from os.path import exists, join
from typing import Any


def safe_int(val: str) -> int | None:
    if val == "":
        return None
    return int(val)


# TODO: needs explanation or simplification
def build_query_params(
    items: list[dict[str, str]], cache: dict, attr: str
) -> list[str]:
    params = set()
    for item in items:
        if item[attr] not in cache:
            params.add(item[attr])
    return list(params)


# env vars could be true or 1, or anything else -- here's a centralized location to
# handle that
def env_vars(env_var: str, default: str) -> bool:
    var = getenv(env_var, default).lower()
    return var == "true" or var == "1"


# convert keys to snake case
def convert_keys_to_snake_case(data: dict[str, Any]) -> dict[str, Any]:
    """Recursively converts dictionary keys from hyphen-case to snake_case."""
    if isinstance(data, dict):
        new_dict = {}
        for key, value in data.items():
            new_key = key.replace("-", "_")
            new_dict[new_key] = convert_keys_to_snake_case(value)  # handle nested
        return new_dict
    elif isinstance(data, list):
        return [convert_keys_to_snake_case(item) for item in data]
    else:
        return data


def is_github_url(url: str) -> bool:
    """Assumes the url has been canonicalized by permalint"""
    return url.startswith("github.com/")


def file_exists(*args) -> str:
    """Confirms if a file exists"""
    file_path = join(*args)
    if not exists(file_path):
        raise FileNotFoundError(f"{file_path} not found")
    return file_path


================================================
FILE: db/README.md
================================================
# CHAI Data Model

The CHAI data model is designed to represent the package manager data in a unified and
consistent form. The model's goal is _standardization_ - of the various complexities,
and idiosyncrasies of each individual package manager. We want to provide a standard way
for analysis, querying, and whatever your use case might be.

## Definitions

We use certain nomenclature throughout the codebase:

- `derived_id`: A unique identifier combining the package manager and package name. Like
  `crates/serde`, or `homebrew/a2ps`, or `npm/lodash`.
- `import_id`: The original identifier from the source system. Like the `crate_id`
  integers provided by crates, or the package name provided by Homebrew

# Core Entities

## Packages

The Package model is a fundamental unit in our system. Each package is uniquely
identified and associated with a specific package manager.

Key fields:

- `derived_id`
- `name`
- `package_manager_id`: Reference to the associated package manager.
- `import_id`: The original identifier from the source system.
- `readme`: Optional field for package documentation.

### Versions

Each version is a different release of a package, and **must** be associated with a
package.

Key fields:

- `package_id`: Reference to the associated package.
- `version`: The version string.
- `import_id`: The original identifier from the source system.
- `size`, `published_at`, `license_id`, `downloads`, `checksum`: Optional metadata
  fields.

### Users

The User model represents individuals or entities associated with packages. This is not
necessarily always available, but if it is, it's interesting data.

Key fields:

- `username`: The user's name or identifier.
- `source_id`: Reference to the data source (e.g., GitHub, npm user, crates user, etc).
- `import_id`: The original identifier from the source system.

### URLs

The URL model is populated with all the URLs that are provided by the package manager
source data - this includes documentation, repository, source, issues, and other url
types as well. Each URL is associated with a URL type. The relationships between a URL
and a Package are captured in the PackageURL model.

Key fields:

- `url`: The URL.
- `url_type_id`: Reference to the type of URL. (e.g., homepage, repository, etc)

## Type Models

These models define categorizations and types used across the system. All these values
are loaded from the alembic service, specifically in the
[load-values.sql](../alembic/load-values.sql) script.

### URLType

Represents different types of URLs associated with packages.

Predefined types (from load-values.sql):

- `source`
- `homepage`
- `documentation`
- `repository`

### DependsOnType

Categorizes different types of dependencies between packages.
Predefined types (from load-values.sql):

- `build`
- `development`
- `runtime`
- `test`
- `optional`
- `recommended`
- `uses_from_macos` (Homebrew only)

### Source

Represents the authoritative sources of package data.

- `crates`
- `homebrew`

The below are not yet supported:

- `npm`
- `pypi`
- `rubygems`
- `github`

## Relationship Models

These models establish connections between core entities.

### DependsOn

In our data model, a specific release depends on a specific package. We include a field
`semver_range`, which would represent the range of dependency releases compatible with
that specific release.

> [!NOTE]
> Not all package managers provide semantic versions. Homebrew does not, for example.
> This is why `semver_range` is optional.
>
> On the other hand, the dependency type is non-optional, and the combination of
> `version_id`, `dependency_id`, and `dependency_type_id` must be unique.

Key fields:

- `version_id`: The version that has the dependency.
- `dependency_id`: The package that is depended upon.
- `dependency_type_id`: The type of dependency.
- `semver_range`: The version range for the dependency (optional).

### UserVersion and UserPackage

These models associate users with specific versions and packages, respectively.

### PackageURL

Associates packages with their various URLs.

## Caveats

### `Source` and `PackageManager` Relationship

We've chosen to separate `Source` and `PackageManager` into distinct entities:

- `Source`: Represents data sources that can provide information about packages, users,
  or both.
- `PackageManager`: Specifically represents sources that are package managers.

For example, 'crates' functions both as a package manager and as a source of user data.
By keeping these concepts separate, we can accurately represent such systems, and have
one point where we can modify any information about 'crates'.

## Additional Models

### License

Represents software licenses associated with package versions. Great place to start
contributions!

### LoadHistory

Tracks the history of data loads for each package manager, useful for auditing and
incremental updates.


================================================
FILE: db/queries.md
================================================
# Chai Data Exploration

```sql
-- Packages with the longest lifetime
SELECT p.name,
SUM(v.downloads) AS "downloads",
count(v.package_id) AS versions,
min(v.published_at) AS "first published",
max(v.published_at) AS "last published",
max(v.published_at) - min(v.published_at) AS lifetime
FROM packages AS p
JOIN versions v ON v.package_id = p.id
GROUP BY p.name
ORDER BY lifetime DESC limit 100;

-- Packages sorted by dependents
SELECT p.name, count(d.id) AS dependents
FROM packages AS p
JOIN dependencies AS d ON d.dependency_id = p.id
GROUP BY p.name
ORDER BY count(d.id) DESC LIMIT 100;

-- Packages sorted by dependents with lifetime
SELECT p.name,
count(d.id) AS dependents,
min(v.published_at) AS "first published",
max(v.published_at) AS "last published",
max(v.published_at) - min(v.published_at) AS lifetime
FROM packages AS p
JOIN dependencies AS d ON d.dependency_id = p.id
JOIN versions v ON v.package_id = p.id
GROUP BY p.name
ORDER BY count(d.id) DESC LIMIT 100;

-- Packages sorted by dependents with downloads
SELECT p.name,
count(d.id) AS dependents,
sum(v.downloads) AS downloads
FROM packages AS p
JOIN dependencies AS d ON d.dependency_id = p.id
JOIN versions v ON v.package_id = p.id
GROUP BY p.name
ORDER BY count(d.id) DESC LIMIT 100;

-- Packages with most dependents sorted by download/dependent ratio
SELECT name, dependents, downloads, (downloads / dependents) AS ratio FROM
(SELECT p.name,
count(d.id) AS dependents,
sum(v.downloads) AS downloads
FROM packages AS p
JOIN dependencies AS d ON d.dependency_id = p.id
JOIN versions v ON v.package_id = p.id
GROUP BY p.name
ORDER BY count(d.id) DESC LIMIT 1000)
ORDER BY ratio DESC;
```


================================================
FILE: docker-compose.yml
================================================
services:
  db:
    image: postgres
    shm_size: 256m
    environment:
      - POSTGRES_USER=postgres
      - POSTGRES_PASSWORD=s3cr3t
    ports:
      - "5435:5432"
    volumes:
      - ./data/db/data:/var/lib/postgresql
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U postgres"]
      interval: 5s
      timeout: 5s
      retries: 5

  alembic:
    build:
      context: .
      dockerfile: ./alembic/Dockerfile
    environment:
      - CHAI_DATABASE_URL=${CHAI_DATABASE_URL:-postgresql://postgres:s3cr3t@db:5432/chai}
      - CHAI_DATABASE_ADMIN_URL=${CHAI_DATABASE_ADMIN_URL:-postgresql://postgres:s3cr3t@db:5432/postgres}
      - PGPASSWORD=${PGPASSWORD:-s3cr3t}
    depends_on:
      db:
        condition: service_healthy
    working_dir: /alembic
    entrypoint: ["./run_migrations.sh"]

  crates:
    build:
      context: .
      dockerfile: ./package_managers/crates/Dockerfile
    environment:
      - CHAI_DATABASE_URL=${CHAI_DATABASE_URL:-postgresql://postgres:s3cr3t@db:5432/chai}
      - NO_CACHE=${NO_CACHE:-true}
      - PYTHONPATH=/
      - DEBUG=${DEBUG:-false}
      - TEST=${TEST:-false}
      - FETCH=${FETCH:-true}
      - FREQUENCY=${FREQUENCY:-24}
      - ENABLE_SCHEDULER=${ENABLE_SCHEDULER:-true}
    volumes:
      - ./data/crates:/data/crates
    depends_on:
      db:
        condition: service_healthy
      alembic:
        condition: service_completed_successfully

  homebrew:
    build:
      context: .
      dockerfile: ./package_managers/homebrew/Dockerfile
    environment:
      - CHAI_DATABASE_URL=${CHAI_DATABASE_URL:-postgresql://postgres:s3cr3t@db:5432/chai}
      - NO_CACHE=${NO_CACHE:-false}
      - DEBUG=${DEBUG:-false}
      - TEST=${TEST:-false}
      - FETCH=${FETCH:-true}
      - FREQUENCY=${FREQUENCY:-1}
      - ENABLE_SCHEDULER=${ENABLE_SCHEDULER:-true}
      - PYTHONPATH=/
    volumes:
      - ./data/homebrew:/data/homebrew
    depends_on:
      db:
        condition: service_healthy
      alembic:
        condition: service_completed_successfully

  api:
    build:
      context: ./api
      dockerfile: Dockerfile
    environment:
      - DATABASE_URL=postgresql://postgres:s3cr3t@db:5432/chai
      - HOST=0.0.0.0
      - PORT=8080
    ports:
      - "8080:8080"
    depends_on:
      db:
        condition: service_healthy
      alembic:
        condition: service_completed_successfully
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8080/heartbeat"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 5s

  debian:
    build:
      context: .
      dockerfile: ./package_managers/debian/Dockerfile
    environment:
      - CHAI_DATABASE_URL=${CHAI_DATABASE_URL:-postgresql://postgres:s3cr3t@db:5432/chai}
      - NO_CACHE=${NO_CACHE:-false}
      - PYTHONPATH=/
      - DEBUG=${DEBUG:-false}
      - TEST=${TEST:-false}
      - FETCH=${FETCH:-true}
      - FREQUENCY=${FREQUENCY:-24}
      - ENABLE_SCHEDULER=${ENABLE_SCHEDULER:-true}
    volumes:
      - ./data/debian:/data/debian
    depends_on:
      db:
        condition: service_healthy
      alembic:
        condition: service_completed_successfully

  pkgx:
    build:
      context: .
      dockerfile: ./package_managers/pkgx/Dockerfile
    environment:
      - CHAI_DATABASE_URL=${CHAI_DATABASE_URL:-postgresql://postgres:s3cr3t@db:5432/chai}
      - NO_CACHE=${NO_CACHE:-false}
      - PYTHONPATH=/
      - DEBUG=${DEBUG:-false}
      - TEST=${TEST:-false}
      - FETCH=${FETCH:-true}
      - FREQUENCY=${FREQUENCY:-24}
      - ENABLE_SCHEDULER=${ENABLE_SCHEDULER:-true}
    volumes:
      - ./data/pkgx:/data/pkgx
    depends_on:
      db:
        condition: service_healthy
      alembic:
        condition: service_completed_successfully

  ranker:
    build:
      context: .
      dockerfile: ./ranker/Dockerfile
    environment:
      - CHAI_DATABASE_URL=${CHAI_DATABASE_URL:-postgresql://postgres:s3cr3t@db:5432/chai}
      - PYTHONPATH=/
      - LOAD=${LOAD:-false}
      - DEBUG=${DEBUG:-false}
    depends_on:
      db:
        condition: service_healthy
      alembic:
        condition: service_completed_successfully
    deploy:
      resources:
        limits:
          memory: 8G


================================================
FILE: examples/sbom-meta/README.md
================================================
# SBOM-Meta

An example Chai application that displays package metadata for
[SBOMs](https://github.com/anchore/syft) (software bill of materials).

## Installation

1. Start the [Chai DB](https://github.com/teaxyz/chai-oss) with `docker compose up`.
2. Run `go install` or `go build` to generate a binary.

## Usage

Run `sbom-meta` in the root directory of any repository to get a list of
dependencies with metadata.

```bash
git clone git@github.com:starship/starship.git
cd starship
sbom-meta
```

You can sort any of the fields, ascending or descending:

```bash
sbom-meta --sort downloads,desc
sbom-meta --sort published,asc
```

Use the `--json` flag to output JSON:

```bash
sbom-meta --json | jq .[1].name
```


================================================
FILE: examples/sbom-meta/go.mod
================================================
module sbom-meta

go 1.23.2

require (
	github.com/anchore/syft v1.14.0
	github.com/caarlos0/env v3.5.0+incompatible
	github.com/dustin/go-humanize v1.0.1
	github.com/fatih/color v1.17.1-0.20241003070628-1c8d8706604e
	github.com/jedib0t/go-pretty/v6 v6.6.0
	github.com/jmoiron/sqlx v1.4.0
	github.com/lib/pq v1.10.9
)

require (
	dario.cat/mergo v1.0.1 // indirect
	github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 // indirect
	github.com/AdamKorcz/go-118-fuzz-build v0.0.0-20230306123547-8075edf89bb0 // indirect
	github.com/BurntSushi/toml v1.4.0 // indirect
	github.com/CycloneDX/cyclonedx-go v0.9.1 // indirect
	github.com/DataDog/zstd v1.5.5 // indirect
	github.com/Masterminds/goutils v1.1.1 // indirect
	github.com/Masterminds/semver v1.5.0 // indirect
	github.com/Masterminds/semver/v3 v3.3.0 // indirect
	github.com/Masterminds/sprig/v3 v3.3.0 // indirect
	github.com/Microsoft/go-winio v0.6.2 // indirect
	github.com/Microsoft/hcsshim v0.11.7 // indirect
	github.com/ProtonMail/go-crypto v1.1.6 // indirect
	github.com/acobaugh/osrelease v0.1.0 // indirect
	github.com/adrg/xdg v0.5.0 // indirect
	github.com/anchore/clio v0.0.0-20240522144804-d81e109008aa // indirect
	github.com/anchore/fangs v0.0.0-20240903175602-e716ef12c23d // indirect
	github.com/anchore/go-collections v0.0.0-20240216171411-9321230ce537 // indirect
	github.com/anchore/go-logger v0.0.0-20230725134548-c21dafa1ec5a // indirect
	github.com/anchore/go-macholibre v0.0.0-20220308212642-53e6d0aaf6fb // indirect
	github.com/anchore/go-struct-converter v0.0.0-20221118182256-c68fdcfa2092 // indirect
	github.com/anchore/go-version v1.2.2-0.20200701162849-18adb9c92b9b // indirect
	github.com/anchore/packageurl-go v0.1.1-0.20240507183024-848e011fc24f // indirect
	github.com/anchore/stereoscope v0.0.4-0.20241005180410-efa76446cc1c // indirect
	github.com/andybalholm/brotli v1.0.4 // indirect
	github.com/aquasecurity/go-pep440-version v0.0.0-20210121094942-22b2f8951d46 // indirect
	github.com/aquasecurity/go-version v0.0.0-20210121072130-637058cfe492 // indirect
	github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
	github.com/becheran/wildmatch-go v1.0.0 // indirect
	github.com/bmatcuk/doublestar/v4 v4.6.1 // indirect
	github.com/charmbracelet/lipgloss v0.13.0 // indirect
	github.com/charmbracelet/x/ansi v0.2.3 // indirect
	github.com/cloudflare/circl v1.6.1 // indirect
	github.com/containerd/cgroups v1.1.0 // indirect
	github.com/containerd/containerd v1.7.28 // indirect
	github.com/containerd/containerd/api v1.8.0 // indirect
	github.com/containerd/continuity v0.4.4 // indirect
	github.com/containerd/errdefs v0.3.0 // indirect
	github.com/containerd/fifo v1.1.0 // indirect
	github.com/containerd/log v0.1.0 // indirect
	github.com/containerd/platforms v0.2.1 // indirect
	github.com/containerd/stargz-snapshotter/estargz v0.14.3 // indirect
	github.com/containerd/ttrpc v1.2.7 // indirect
	github.com/containerd/typeurl/v2 v2.1.1 // indirect
	github.com/cyphar/filepath-securejoin v0.4.1 // indirect
	github.com/deitch/magic v0.0.0-20230404182410-1ff89d7342da // indirect
	github.com/distribution/reference v0.6.0 // indirect
	github.com/docker/cli v27.1.1+incompatible // indirect
	github.com/docker/distribution v2.8.3+incompatible // indirect
	github.com/docker/docker v27.3.1+incompatible // indirect
	github.com/docker/docker-credential-helpers v0.7.0 // indirect
	github.com/docker/go-connections v0.4.0 // indirect
	github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c // indirect
	github.com/docker/go-units v0.5.0 // indirect
	github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect
	github.com/edsrzf/mmap-go v1.1.0 // indirect
	github.com/elliotchance/phpserialize v1.4.0 // indirect
	github.com/emirpasic/gods v1.18.1 // indirect
	github.com/facebookincubator/nvdtools v0.1.5 // indirect
	github.com/felixge/fgprof v0.9.3 // indirect
	github.com/felixge/httpsnoop v1.0.4 // indirect
	github.com/fsnotify/fsnotify v1.7.0 // indirect
	github.com/gabriel-vasile/mimetype v1.4.6 // indirect
	github.com/github/go-spdx/v2 v2.3.2 // indirect
	github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 // indirect
	github.com/go-git/go-billy/v5 v5.6.2 // indirect
	github.com/go-git/go-git/v5 v5.16.2 // indirect
	github.com/go-logr/logr v1.4.2 // indirect
	github.com/go-logr/stdr v1.2.2 // indirect
	github.com/go-restruct/restruct v1.2.0-alpha // indirect
	github.com/gogo/protobuf v1.3.2 // indirect
	github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect
	github.com/golang/protobuf v1.5.4 // indirect
	github.com/golang/snappy v0.0.4 // indirect
	github.com/google/go-cmp v0.7.0 // indirect
	github.com/google/go-containerregistry v0.20.2 // indirect
	github.com/google/licensecheck v0.3.1 // indirect
	github.com/google/pprof v0.0.0-20240409012703-83162a5b38cd // indirect
	github.com/google/uuid v1.6.0 // indirect
	github.com/gookit/color v1.5.4 // indirect
	github.com/hashicorp/errwrap v1.1.0 // indirect
	github.com/hashicorp/go-multierror v1.1.1 // indirect
	github.com/hashicorp/hcl v1.0.0 // indirect
	github.com/huandu/xstrings v1.5.0 // indirect
	github.com/iancoleman/strcase v0.3.0 // indirect
	github.com/inconshreveable/mousetrap v1.1.0 // indirect
	github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect
	github.com/jinzhu/copier v0.4.0 // indirect
	github.com/kastenhq/goversion v0.0.0-20230811215019-93b2f8823953 // indirect
	github.com/kevinburke/ssh_config v1.2.0 // indirect
	github.com/klauspost/compress v1.17.8 // indirect
	github.com/klauspost/pgzip v1.2.5 // indirect
	github.com/knqyf263/go-rpmdb v0.1.1 // indirect
	github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
	github.com/magiconair/properties v1.8.7 // indirect
	github.com/mattn/go-colorable v0.1.13 // indirect
	github.com/mattn/go-isatty v0.0.20 // indirect
	github.com/mattn/go-runewidth v0.0.16 // indirect
	github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d // indirect
	github.com/mholt/archiver/v3 v3.5.1 // indirect
	github.com/microsoft/go-rustaudit v0.0.0-20220730194248-4b17361d90a5 // indirect
	github.com/mitchellh/copystructure v1.2.0 // indirect
	github.com/mitchellh/go-homedir v1.1.0 // indirect
	github.com/mitchellh/hashstructure/v2 v2.0.2 // indirect
	github.com/mitchellh/mapstructure v1.5.0 // indirect
	github.com/mitchellh/reflectwalk v1.0.2 // indirect
	github.com/moby/docker-image-spec v1.3.1 // indirect
	github.com/moby/locker v1.0.1 // indirect
	github.com/moby/sys/mountinfo v0.7.2 // indirect
	github.com/moby/sys/sequential v0.5.0 // indirect
	github.com/moby/sys/signal v0.7.0 // indirect
	github.com/moby/sys/user v0.3.0 // indirect
	github.com/moby/sys/userns v0.1.0 // indirect
	github.com/muesli/termenv v0.15.2 // indirect
	github.com/nwaples/rardecode v1.1.0 // indirect
	github.com/olekukonko/tablewriter v0.0.5 // indirect
	github.com/opencontainers/go-digest v1.0.0 // indirect
	github.com/opencontainers/image-spec v1.1.0 // indirect
	github.com/opencontainers/runtime-spec v1.1.0 // indirect
	github.com/opencontainers/selinux v1.11.0 // indirect
	github.com/pborman/indent v1.2.1 // indirect
	github.com/pelletier/go-toml v1.9.5 // indirect
	github.com/pelletier/go-toml/v2 v2.2.2 // indirect
	github.com/pierrec/lz4/v4 v4.1.19 // indirect
	github.com/pjbgf/sha1cd v0.3.2 // indirect
	github.com/pkg/errors v0.9.1 // indirect
	github.com/pkg/profile v1.7.0 // indirect
	github.com/rivo/uniseg v0.4.7 // indirect
	github.com/saferwall/pe v1.5.4 // indirect
	github.com/sagikazarmark/locafero v0.4.0 // indirect
	github.com/sagikazarmark/slog-shim v0.1.0 // indirect
	github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d // indirect
	github.com/sassoftware/go-rpmutils v0.4.0 // indirect
	github.com/scylladb/go-set v1.0.3-0.20200225121959-cc7b2070d91e // indirect
	github.com/secDre4mer/pkcs7 v0.0.0-20240322103146-665324a4461d // indirect
	github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 // indirect
	github.com/shopspring/decimal v1.4.0 // indirect
	github.com/sirupsen/logrus v1.9.3 // indirect
	github.com/skeema/knownhosts v1.3.1 // indirect
	github.com/sourcegraph/conc v0.3.0 // indirect
	github.com/spdx/tools-golang v0.5.5 // indirect
	github.com/spf13/afero v1.11.0 // indirect
	github.com/spf13/cast v1.7.0 // indirect
	github.com/spf13/cobra v1.8.1 // indirect
	github.com/spf13/pflag v1.0.5 // indirect
	github.com/spf13/viper v1.19.0 // indirect
	github.com/subosito/gotenv v1.6.0 // indirect
	github.com/sylabs/sif/v2 v2.17.1 // indirect
	github.com/sylabs/squashfs v1.0.0 // indirect
	github.com/therootcompany/xz v1.0.1 // indirect
	github.com/ulikunitz/xz v0.5.12 // indirect
	github.com/vbatts/go-mtree v0.5.4 // indirect
	github.com/vbatts/tar-split v0.11.3 // indirect
	github.com/vifraa/gopom v1.0.0 // indirect
	github.com/wagoodman/go-partybus v0.0.0-20230516145632-8ccac152c651 // indirect
	github.com/wagoodman/go-progress v0.0.0-20230925121702-07e42b3cdba0 // indirect
	github.com/xanzy/ssh-agent v0.3.3 // indirect
	github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect
	github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
	go.opencensus.io v0.24.0 // indirect
	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 // indirect
	go.opentelemetry.io/otel v1.24.0 // indirect
	go.opentelemetry.io/otel/metric v1.24.0 // indirect
	go.opentelemetry.io/otel/trace v1.24.0 // indirect
	go.uber.org/atomic v1.9.0 // indirect
	go.uber.org/multierr v1.9.0 // indirect
	golang.org/x/crypto v0.40.0 // indirect
	golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect
	golang.org/x/mod v0.26.0 // indirect
	golang.org/x/net v0.42.0 // indirect
	golang.org/x/sync v0.16.0 // indirect
	golang.org/x/sys v0.34.0 // indirect
	golang.org/x/term v0.33.0 // indirect
	golang.org/x/text v0.27.0 // indirect
	golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect
	google.golang.org/genproto v0.0.0-20240213162025-012b6fc9bca9 // indirect
	google.golang.org/genproto/googleapis/rpc v0.0.0-20240401170217-c3f982113cda // indirect
	google.golang.org/grpc v1.62.1 // indirect
	google.golang.org/protobuf v1.35.2 // indirect
	gopkg.in/ini.v1 v1.67.0 // indirect
	gopkg.in/warnings.v0 v0.1.2 // indirect
	gopkg.in/yaml.v3 v3.0.1 // indirect
)


================================================
FILE: examples/sbom-meta/go.sum
================================================
cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU=
cloud.google.com/go v0.44.1/go.mod h1:iSa0KzasP4Uvy3f1mN/7PiObzGgflwredwwASm/v6AU=
cloud.google.com/go v0.44.2/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY=
cloud.google.com/go v0.45.1/go.mod h1:RpBamKRgapWJb87xiFSdk4g1CME7QZg3uwTez+TSTjc=
cloud.google.com/go v0.46.3/go.mod h1:a6bKKbmY7er1mI7TEI4lsAkts/mkhTSZK8w33B4RAg0=
cloud.google.com/go v0.50.0/go.mod h1:r9sluTvynVuxRIOHXQEHMFffphuXHOMZMycpNR5e6To=
cloud.google.com/go v0.52.0/go.mod h1:pXajvRH/6o3+F9jDHZWQ5PbGhn+o8w9qiu/CffaVdO4=
cloud.google.com/go v0.53.0/go.mod h1:fp/UouUEsRkN6ryDKNW/Upv/JBKnv6WDthjR6+vze6M=
cloud.google.com/go v0.54.0/go.mod h1:1rq2OEkV3YMf6n/9ZvGWI3GWw0VoqH/1x2nd8Is/bPc=
cloud.google.com/go v0.56.0/go.mod h1:jr7tqZxxKOVYizybht9+26Z/gUq7tiRzu+ACVAMbKVk=
cloud.google.com/go v0.57.0/go.mod h1:oXiQ6Rzq3RAkkY7N6t3TcE6jE+CIBBbA36lwQ1JyzZs=
cloud.google.com/go v0.62.0/go.mod h1:jmCYTdRCQuc1PHIIJ/maLInMho30T/Y0M4hTdTShOYc=
cloud.google.com/go v0.65.0/go.mod h1:O5N8zS7uWy9vkA9vayVHs65eM1ubvY4h553ofrNHObY=
cloud.google.com/go v0.72.0/go.mod h1:M+5Vjvlc2wnp6tjzE102Dw08nGShTscUx2nZMufOKPI=
cloud.google.com/go v0.74.0/go.mod h1:VV1xSbzvo+9QJOxLDaJfTjx5e+MePCpCWwvftOeQmWk=
cloud.google.com/go v0.78.0/go.mod h1:QjdrLG0uq+YwhjoVOLsS1t7TW8fs36kLs4XO5R5ECHg=
cloud.google.com/go v0.79.0/go.mod h1:3bzgcEeQlzbuEAYu4mrWhKqWjmpprinYgKJLgKHnbb8=
cloud.google.com/go v0.81.0/go.mod h1:mk/AM35KwGk/Nm2YSeZbxXdrNK3KZOYHmLkOqC2V6E0=
cloud.google.com/go v0.83.0/go.mod h1:Z7MJUsANfY0pYPdw0lbnivPx4/vhy/e2FEkSkF7vAVY=
cloud.google.com/go v0.84.0/go.mod h1:RazrYuxIK6Kb7YrzzhPoLmCVzl7Sup4NrbKPg8KHSUM=
cloud.google.com/go v0.87.0/go.mod h1:TpDYlFy7vuLzZMMZ+B6iRiELaY7z/gJPaqbMx6mlWcY=
cloud.google.com/go v0.90.0/go.mod h1:kRX0mNRHe0e2rC6oNakvwQqzyDmg57xJ+SZU1eT2aDQ=
cloud.google.com/go v0.93.3/go.mod h1:8utlLll2EF5XMAV15woO4lSbWQlk8rer9aLOfLh7+YI=
cloud.google.com/go v0.94.1/go.mod h1:qAlAugsXlC+JWO+Bke5vCtc9ONxjQT3drlTTnAplMW4=
cloud.google.com/go v0.97.0/go.mod h1:GF7l59pYBVlXQIBLx3a761cZ41F9bBH3JUlihCt2Udc=
cloud.google.com/go v0.98.0/go.mod h1:ua6Ush4NALrHk5QXDWnjvZHN93OuF0HfuEPq9I1X0cM=
cloud.google.com/go v0.99.0/go.mod h1:w0Xx2nLzqWJPuozYQX+hFfCSI8WioryfRDzkoI/Y2ZA=
cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o=
cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE=
cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc=
cloud.google.com/go/bigquery v1.5.0/go.mod h1:snEHRnqQbz117VIFhE8bmtwIDY80NLUZUMb4Nv6dBIg=
cloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4gLoIoXIAPc=
cloud.google.com/go/bigquery v1.8.0/go.mod h1:J5hqkt3O0uAFnINi6JXValWIb1v0goeZM77hZzJN/fQ=
cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE=
cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk=
cloud.google.com/go/firestore v1.6.1/go.mod h1:asNXNOzBdyVQmEU+ggO8UPodTkEVFW5Qx+rwHnAz+EY=
cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I=
cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw=
cloud.google.com/go/pubsub v1.2.0/go.mod h1:jhfEVHT8odbXTkndysNHCcx0awwzvfOlguIAii9o8iA=
cloud.google.com/go/pubsub v1.3.1/go.mod h1:i+ucay31+CNRpDW4Lu78I4xXG+O1r/MAHgjpRVR+TSU=
cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw=
cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0ZeosJ0Rtdos=
cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk=
cloud.google.com/go/storage v1.8.0/go.mod h1:Wv1Oy7z6Yz3DshWRJFhqM/UCfaWIRTdp0RXyy7KQOVs=
cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9ullr3+Kg0=
dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s=
dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk=
dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA=
filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4=
github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 h1:bvDV9vkmnHYOMsOr4WLk+Vo07yKIzd94sVoIqshQ4bU=
github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24/go.mod h1:8o94RPi1/7XTJvwPpRSzSUedZrtlirdB3r9Z20bi2f8=
github.com/AdamKorcz/go-118-fuzz-build v0.0.0-20230306123547-8075edf89bb0 h1:59MxjQVfjXsBpLy+dbd2/ELV5ofnUkUZBvWSC85sheA=
github.com/AdamKorcz/go-118-fuzz-build v0.0.0-20230306123547-8075edf89bb0/go.mod h1:OahwfttHWG6eJ0clwcfBAHoDI6X/LV/15hx/wlMZSrU=
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0=
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/toml v0.4.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
github.com/BurntSushi/toml v1.2.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
github.com/BurntSushi/toml v1.4.0 h1:kuoIxZQy2WRRk1pttg9asf+WVv6tWQuBNVmK8+nqPr0=
github.com/BurntSushi/toml v1.4.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
github.com/CycloneDX/cyclonedx-go v0.9.1 h1:yffaWOZsv77oTJa/SdVZYdgAgFioCeycBUKkqS2qzQM=
github.com/CycloneDX/cyclonedx-go v0.9.1/go.mod h1:NE/EWvzELOFlG6+ljX/QeMlVt9VKcTwu8u0ccsACEsw=
github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ=
github.com/DataDog/zstd v1.5.5 h1:oWf5W7GtOLgp6bciQYDmhHHjdhYkALu6S/5Ni9ZgSvQ=
github.com/DataDog/zstd v1.5.5/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwSAmyw=
github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=
github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU=
github.com/Masterminds/semver v1.5.0 h1:H65muMkzWKEuNDnfl9d70GUjFniHKHRbFPGBuZ3QEww=
github.com/Masterminds/semver v1.5.0/go.mod h1:MB6lktGJrhw8PrUyiEoblNEGEQ+RzHPF078ddwwvV3Y=
github.com/Masterminds/semver/v3 v3.3.0 h1:B8LGeaivUe71a5qox1ICM/JLl0NqZSW5CHyL+hmvYS0=
github.com/Masterminds/semver/v3 v3.3.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
github.com/Masterminds/sprig/v3 v3.3.0 h1:mQh0Yrg1XPo6vjYXgtf5OtijNAKJRNcTdOOGZe3tPhs=
github.com/Masterminds/sprig/v3 v3.3.0/go.mod h1:Zy1iXRYNqNLUolqCpL4uhk6SHUMAOSCzdgBfDb35Lz0=
github.com/Microsoft/go-winio v0.5.2/go.mod h1:WpS1mjBmmwHBEWmogvA2mj8546UReBk4v8QkMxJ6pZY=
github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
github.com/Microsoft/hcsshim v0.11.7 h1:vl/nj3Bar/CvJSYo7gIQPyRWc9f3c6IeSNavBTSZNZQ=
github.com/Microsoft/hcsshim v0.11.7/go.mod h1:MV8xMfmECjl5HdO7U/3/hFVnkmSBjAjmA09d4bExKcU=
github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
github.com/OneOfOne/xxhash v1.2.8 h1:31czK/TI9sNkxIKfaUfGlU47BAxQ0ztGgd9vPyqimf8=
github.com/OneOfOne/xxhash v1.2.8/go.mod h1:eZbhyaAYD41SGSSsnmcpxVoRiQ/MPUTjUdIIOT9Um7Q=
github.com/ProtonMail/go-crypto v1.1.6 h1:ZcV+Ropw6Qn0AX9brlQLAUXfqLBc7Bl+f/DmNxpLfdw=
github.com/ProtonMail/go-crypto v1.1.6/go.mod h1:rA3QumHc/FZ8pAHreoekgiAbzpNsfQAosU5td4SnOrE=
github.com/acobaugh/osrelease v0.1.0 h1:Yb59HQDGGNhCj4suHaFQQfBps5wyoKLSSX/J/+UifRE=
github.com/acobaugh/osrelease v0.1.0/go.mod h1:4bFEs0MtgHNHBrmHCt67gNisnabCRAlzdVasCEGHTWY=
github.com/adrg/xdg v0.5.0 h1:dDaZvhMXatArP1NPHhnfaQUqWBLBsmx1h1HXQdMoFCY=
github.com/adrg/xdg v0.5.0/go.mod h1:dDdY4M4DF9Rjy4kHPeNL+ilVF+p2lK8IdM9/rTSGcI4=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/anchore/clio v0.0.0-20240522144804-d81e109008aa h1:pwlAn4O9SBUnlgfa69YcqIynbUyobLVFYu8HxSoCffA=
github.com/anchore/clio v0.0.0-20240522144804-d81e109008aa/go.mod h1:nD3H5uIvjxlfmakOBgtyFQbk5Zjp3l538kxfpHPslzI=
github.com/anchore/fangs v0.0.0-20240903175602-e716ef12c23d h1:ZD4wdCBgJJzJybjTUIEiiupLF7B9H3WLuBTjspBO2Mc=
github.com/anchore/fangs v0.0.0-20240903175602-e716ef12c23d/go.mod h1:Xh4ObY3fmoMzOEVXwDtS1uK44JC7+nRD0n29/1KYFYg=
github.com/anchore/go-collections v0.0.0-20240216171411-9321230ce537 h1:GjNGuwK5jWjJMyVppBjYS54eOiiSNv4Ba869k4wh72Q=
github.com/anchore/go-collections v0.0.0-20240216171411-9321230ce537/go.mod h1:1aiktV46ATCkuVg0O573ZrH56BUawTECPETbZyBcqT8=
github.com/anchore/go-logger v0.0.0-20230725134548-c21dafa1ec5a h1:nJ2G8zWKASyVClGVgG7sfM5mwoZlZ2zYpIzN2OhjWkw=
github.com/anchore/go-logger v0.0.0-20230725134548-c21dafa1ec5a/go.mod h1:ubLFmlsv8/DFUQrZwY5syT5/8Er3ugSr4rDFwHsE3hg=
github.com/anchore/go-macholibre v0.0.0-20220308212642-53e6d0aaf6fb h1:iDMnx6LIjtjZ46C0akqveX83WFzhpTD3eqOthawb5vU=
github.com/anchore/go-macholibre v0.0.0-20220308212642-53e6d0aaf6fb/go.mod h1:DmTY2Mfcv38hsHbG78xMiTDdxFtkHpgYNVDPsF2TgHk=
github.com/anchore/go-struct-converter v0.0.0-20221118182256-c68fdcfa2092 h1:aM1rlcoLz8y5B2r4tTLMiVTrMtpfY0O8EScKJxaSaEc=
github.com/anchore/go-struct-converter v0.0.0-20221118182256-c68fdcfa2092/go.mod h1:rYqSE9HbjzpHTI74vwPvae4ZVYZd1lue2ta6xHPdblA=
github.com/anchore/go-testutils v0.0.0-20200925183923-d5f45b0d3c04 h1:VzprUTpc0vW0nnNKJfJieyH/TZ9UYAnTZs5/gHTdAe8=
github.com/anchore/go-testutils v0.0.0-20200925183923-d5f45b0d3c04/go.mod h1:6dK64g27Qi1qGQZ67gFmBFvEHScy0/C8qhQhNe5B5pQ=
github.com/anchore/go-version v1.2.2-0.20200701162849-18adb9c92b9b h1:e1bmaoJfZVsCYMrIZBpFxwV26CbsuoEh5muXD5I1Ods=
github.com/anchore/go-version v1.2.2-0.20200701162849-18adb9c92b9b/go.mod h1:Bkc+JYWjMCF8OyZ340IMSIi2Ebf3uwByOk6ho4wne1E=
github.com/anchore/packageurl-go v0.1.1-0.20240507183024-848e011fc24f h1:B/E9ixKNCasntpoch61NDaQyGPDXLEJlL+B9B/PbdbA=
github.com/anchore/packageurl-go v0.1.1-0.20240507183024-848e011fc24f/go.mod h1:Blo6OgJNiYF41ufcgHKkbCKF2MDOMlrqhXv/ij6ocR4=
github.com/anchore/stereoscope v0.0.4-0.20241005180410-efa76446cc1c h1:JXezMk8fF5ns4AgRGW49SGfoRgDjJHsDmcpNw272jkU=
github.com/anchore/stereoscope v0.0.4-0.20241005180410-efa76446cc1c/go.mod h1:GMupz2FoBhy5RTTmawU06c2pZxgVTceahLWiwJef2uI=
github.com/anchore/syft v1.14.0 h1:BeMmc3a9d/63O+nPM8QfV1Olh3r+pYf95JOqbfN4gQg=
github.com/anchore/syft v1.14.0/go.mod h1:8bN2W/Tr4Mmm42h2XB9LPiPOps+NzCFIaQOKLBGb2b8=
github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883/go.mod h1:rCTlJbsFo29Kk6CurOXKm700vrz8f0KW0JNfpkRJY/8=
github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y=
github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY=
github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be h1:9AeTilPcZAjCFIImctFaOjnTIavg87rW78vTPkQqLI8=
github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be/go.mod h1:ySMOLuWl6zY27l47sB3qLNK6tF2fkHG55UZxx8oIVo4=
github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
github.com/aquasecurity/go-pep440-version v0.0.0-20210121094942-22b2f8951d46 h1:vmXNl+HDfqqXgr0uY1UgK1GAhps8nbAAtqHNBcgyf+4=
github.com/aquasecurity/go-pep440-version v0.0.0-20210121094942-22b2f8951d46/go.mod h1:olhPNdiiAAMiSujemd1O/sc6GcyePr23f/6uGKtthNg=
github.com/aquasecurity/go-version v0.0.0-20210121072130-637058cfe492 h1:rcEG5HI490FF0a7zuvxOxen52ddygCfNVjP0XOCMl+M=
github.com/aquasecurity/go-version v0.0.0-20210121072130-637058cfe492/go.mod h1:9Beu8XsUNNfzml7WBf3QmyPToP1wm1Gj/Vc5UJKqTzU=
github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o=
github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY=
github.com/armon/go-metrics v0.3.10/go.mod h1:4O98XIr/9W0sxpJ8UaYkvjk10Iff7SnFrb4QAOwNTFc=
github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
github.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4=
github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI=
github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=
github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8=
github.com/becheran/wildmatch-go v1.0.0 h1:mE3dGGkTmpKtT4Z+88t8RStG40yN9T+kFEGj2PZFSzA=
github.com/becheran/wildmatch-go v1.0.0/go.mod h1:gbMvj0NtVdJ15Mg/mH9uxk2R1QCistMyU7d9KFzroX4=
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=
github.com/bmatcuk/doublestar/v4 v4.6.1 h1:FH9SifrbvJhnlQpztAx++wlkk70QBf0iBWDwNy7PA4I=
github.com/bmatcuk/doublestar/v4 v4.6.1/go.mod h1:xBQ8jztBU6kakFMg+8WGxn0c6z1fTSPVIjEY1Wr7jzc=
github.com/bradleyjkemp/cupaloy/v2 v2.8.0 h1:any4BmKE+jGIaMpnU8YgH/I2LPiLBufr6oMMlVBbn9M=
github.com/bradleyjkemp/cupaloy/v2 v2.8.0/go.mod h1:bm7JXdkRd4BHJk9HpwqAI8BoAY1lps46Enkdqw6aRX0=
github.com/caarlos0/env v3.5.0+incompatible h1:Yy0UN8o9Wtr/jGHZDpCBLpNrzcFLLM2yixi/rBrKyJs=
github.com/caarlos0/env v3.5.0+incompatible/go.mod h1:tdCsowwCzMLdkqRYDlHpZCp2UooDD3MspDBjZ2AD02Y=
github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM=
github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/census-instrumentation/opencensus-proto v0.3.0/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc=
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/charmbracelet/bubbles v0.20.0 h1:jSZu6qD8cRQ6k9OMfR1WlM+ruM8fkPWkHvQWD9LIutE=
github.com/charmbracelet/bubbles v0.20.0/go.mod h1:39slydyswPy+uVOHZ5x/GjwVAFkCsV8IIVy+4MhzwwU=
github.com/charmbracelet/bubbletea v1.1.1 h1:KJ2/DnmpfqFtDNVTvYZ6zpPFL9iRCRr0qqKOCvppbPY=
github.com/charmbracelet/bubbletea v1.1.1/go.mod h1:9Ogk0HrdbHolIKHdjfFpyXJmiCzGwy+FesYkZr7hYU4=
github.com/charmbracelet/lipgloss v0.13.0 h1:4X3PPeoWEDCMvzDvGmTajSyYPcZM4+y8sCA/SsA3cjw=
github.com/charmbracelet/lipgloss v0.13.0/go.mod h1:nw4zy0SBX/F/eAO1cWdcvy6qnkDUxr8Lw7dvFrAIbbY=
github.com/charmbracelet/x/ansi v0.2.3 h1:VfFN0NUpcjBRd4DnKfRaIRo53KRgey/nhOoEqosGDEY=
github.com/charmbracelet/x/ansi v0.2.3/go.mod h1:dk73KoMTT5AX5BsX0KrqhsTqAnhZZoCBjs7dGWp4Ktw=
github.com/charmbracelet/x/term v0.2.0 h1:cNB9Ot9q8I711MyZ7myUR5HFWL/lc3OpU8jZ4hwm0x0=
github.com/charmbracelet/x/term v0.2.0/go.mod h1:GVxgxAbjUrmpvIINHIQnJJKpMlHiZ4cktEQCN6GWyF0=
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag=
github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
github.com/cloudflare/circl v1.6.1 h1:zqIqSPIndyBh1bjLVVDHMPpVKqp8Su/V+6MeDzzQBQ0=
github.com/cloudflare/circl v1.6.1/go.mod h1:uddAzsPgqdMAYatqJ0lsjX1oECcQLIlRpzZh3pJrofs=
github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
github.com/cncf/udpa/go v0.0.0-20200629203442-efcf912fb354/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk=
github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk=
github.com/cncf/udpa/go v0.0.0-20210930031921-04548b0d99d4/go.mod h1:6pvJx4me5XPnfI9Z40ddWsdw2W/uZgQLFXToKeRcDiI=
github.com/cncf/xds/go v0.0.0-20210312221358-fbca930ec8ed/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs=
github.com/cncf/xds/go v0.0.0-20210805033703-aa0b78936158/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs=
github.com/cncf/xds/go v0.0.0-20210922020428-25de7278fc84/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs=
github.com/cncf/xds/go v0.0.0-20211001041855-01bcc9b48dfe/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs=
github.com/cncf/xds/go v0.0.0-20211011173535-cb28da3451f1/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs=
github.com/cncf/xds/go v0.0.0-20211130200136-a8f946100490/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs=
github.com/containerd/cgroups v1.1.0 h1:v8rEWFl6EoqHB+swVNjVoCJE8o3jX7e8nqBGPLaDFBM=
github.com/containerd/cgroups v1.1.0/go.mod h1:6ppBcbh/NOOUU+dMKrykgaBnK9lCIBxHqJDGwsa1mIw=
github.com/containerd/containerd v1.7.28 h1:Nsgm1AtcmEh4AHAJ4gGlNSaKgXiNccU270Dnf81FQ3c=
github.com/containerd/containerd v1.7.28/go.mod h1:azUkWcOvHrWvaiUjSQH0fjzuHIwSPg1WL5PshGP4Szs=
github.com/containerd/containerd/api v1.8.0 h1:hVTNJKR8fMc/2Tiw60ZRijntNMd1U+JVMyTRdsD2bS0=
github.com/containerd/containerd/api v1.8.0/go.mod h1:dFv4lt6S20wTu/hMcP4350RL87qPWLVa/OHOwmmdnYc=
github.com/containerd/continuity v0.4.4 h1:/fNVfTJ7wIl/YPMHjf+5H32uFhl63JucB34PlCpMKII=
github.com/containerd/continuity v0.4.4/go.mod h1:/lNJvtJKUQStBzpVQ1+rasXO1LAWtUQssk28EZvJ3nE=
github.com/containerd/errdefs v0.3.0 h1:FSZgGOeK4yuT/+DnF07/Olde/q4KBoMsaamhXxIMDp4=
github.com/containerd/errdefs v0.3.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M=
github.com/containerd/fifo v1.1.0 h1:4I2mbh5stb1u6ycIABlBw9zgtlK8viPI9QkQNRQEEmY=
github.com/containerd/fifo v1.1.0/go.mod h1:bmC4NWMbXlt2EZ0Hc7Fx7QzTFxgPID13eH0Qu+MAb2o=
github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I=
github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo=
github.com/containerd/platforms v0.2.1 h1:zvwtM3rz2YHPQsF2CHYM8+KtB5dvhISiXh5ZpSBQv6A=
github.com/containerd/platforms v0.2.1/go.mod h1:XHCb+2/hzowdiut9rkudds9bE5yJ7npe7dG/wG+uFPw=
github.com/containerd/stargz-snapshotter/estargz v0.14.3 h1:OqlDCK3ZVUO6C3B/5FSkDwbkEETK84kQgEeFwDC+62k=
github.com/containerd/stargz-snapshotter/estargz v0.14.3/go.mod h1:KY//uOCIkSuNAHhJogcZtrNHdKrA99/FCCRjE3HD36o=
github.com/containerd/ttrpc v1.2.7 h1:qIrroQvuOL9HQ1X6KHe2ohc7p+HP/0VE6XPU7elJRqQ=
github.com/containerd/ttrpc v1.2.7/go.mod h1:YCXHsb32f+Sq5/72xHubdiJRQY9inL4a4ZQrAbN1q9o=
github.com/containerd/typeurl/v2 v2.1.1 h1:3Q4Pt7i8nYwy2KmQWIw2+1hTvwTE/6w9FqcttATPO/4=
github.com/containerd/typeurl/v2 v2.1.1/go.mod h1:IDp2JFvbwZ31H8dQbEIY7sDl2L3o3HZj1hsSQlywkQ0=
github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
github.com/cpuguy83/go-md2man/v2 v2.0.1/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/cyphar/filepath-securejoin v0.4.1 h1:JyxxyPEaktOD+GAnqIqTf9A8tHyAG22rowi7HkoSU1s=
github.com/cyphar/filepath-securejoin v0.4.1/go.mod h1:Sdj7gXlvMcPZsbhwhQ33GguGLDGQL7h7bg04C/+u9jI=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/deitch/magic v0.0.0-20230404182410-1ff89d7342da h1:ZOjWpVsFZ06eIhnh4mkaceTiVoktdU67+M7KDHJ268M=
github.com/deitch/magic v0.0.0-20230404182410-1ff89d7342da/go.mod h1:B3tI9iGHi4imdLi4Asdha1Sc6feLMTfPLXh9IUYmysk=
github.com/dgrijalva/jwt-go/v4 v4.0.0-preview1/go.mod h1:+hnT3ywWDTAFrW5aE+u2Sa/wT555ZqwoCS+pk3p6ry4=
github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk=
github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
github.com/docker/cli v27.1.1+incompatible h1:goaZxOqs4QKxznZjjBWKONQci/MywhtRv2oNn0GkeZE=
github.com/docker/cli v27.1.1+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8=
github.com/docker/distribution v2.8.3+incompatible h1:AtKxIZ36LoNK51+Z6RpzLpddBirtxJnzDrHLEKxTAYk=
github.com/docker/distribution v2.8.3+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w=
github.com/docker/docker v27.3.1+incompatible h1:KttF0XoteNTicmUtBO0L2tP+J7FGRFTjaEF4k6WdhfI=
github.com/docker/docker v27.3.1+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
github.com/docker/docker-credential-helpers v0.7.0 h1:xtCHsjxogADNZcdv1pKUHXryefjlVRqWqIhk/uXJp0A=
github.com/docker/docker-credential-helpers v0.7.0/go.mod h1:rETQfLdHNT3foU5kuNkFR1R1V12OJRRO5lzt2D1b5X0=
github.com/docker/go-connections v0.4.0 h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKohAFqRJQ=
github.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec=
github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c h1:+pKlWGMw7gf6bQ+oDZB4KHQFypsfjYlq/C4rfL7D3g8=
github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c/go.mod h1:Uw6UezgYA44ePAFQYUehOuCzmy5zmg/+nl2ZfMWGkpA=
github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 h1:iFaUwBSo5Svw6L7HYpRu/0lE3e0BaElwnNO1qkNQxBY=
github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s=
github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/edsrzf/mmap-go v1.1.0 h1:6EUwBLQ/Mcr1EYLE4Tn1VdW1A4ckqCQWZBw8Hr0kjpQ=
github.com/edsrzf/mmap-go v1.1.0/go.mod h1:19H/e8pUPLicwkyNgOykDXkJ9F0MHE+Z52B8EIth78Q=
github.com/elazarl/goproxy v1.7.2 h1:Y2o6urb7Eule09PjlhQRGNsqRfPmYI3KKQLFpCAV3+o=
github.com/elazarl/goproxy v1.7.2/go.mod h1:82vkLNir0ALaW14Rc399OTTjyNREgmdL2cVoIbS6XaE=
github.com/elliotchance/phpserialize v1.4.0 h1:cAp/9+KSnEbUC8oYCE32n2n84BeW8HOY3HMDI8hG2OY=
github.com/elliotchance/phpserialize v1.4.0/go.mod h1:gt7XX9+ETUcLXbtTKEuyrqW3lcLUAeS/AnGZ2e49TZs=
github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc=
github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ=
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
github.com/envoyproxy/go-control-plane v0.9.7/go.mod h1:cwu0lG7PUMfa9snN8LXBig5ynNVH9qI8YYLbd1fK2po=
github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk=
github.com/envoyproxy/go-control-plane v0.9.9-0.20210217033140-668b12f5399d/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk=
github.com/envoyproxy/go-control-plane v0.9.9-0.20210512163311-63b5d3c536b0/go.mod h1:hliV/p42l8fGbc6Y9bQ70uLwIvmJyVE5k4iMKlh8wCQ=
github.com/envoyproxy/go-control-plane v0.9.10-0.20210907150352-cf90f659a021/go.mod h1:AFq3mo9L8Lqqiid3OhADV3RfLJnjiw63cSpi+fDTRC0=
github.com/envoyproxy/go-control-plane v0.10.1/go.mod h1:AY7fTTXNdv/aJ2O5jwpxAPOWUZ7hQAEvzN5Pf27BkQQ=
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
github.com/envoyproxy/protoc-gen-validate v0.6.2/go.mod h1:2t7qjJNvHPx8IjnBOzl9E9/baC+qXE/TeeyBRzgJDws=
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4=
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f/go.mod h1:vw97MGsxSvLiUE2X8qFplwetxpGLQrlU1Q9AUEIzCaM=
github.com/facebookincubator/flog v0.0.0-20190930132826-d2511d0ce33c/go.mod h1:QGzNH9ujQ2ZUr/CjDGZGWeDAVStrWNjHeEcjJL96Nuk=
github.com/facebookincubator/nvdtools v0.1.5 h1:jbmDT1nd6+k+rlvKhnkgMokrCAzHoASWE5LtHbX2qFQ=
github.com/facebookincubator/nvdtools v0.1.5/go.mod h1:Kh55SAWnjckS96TBSrXI99KrEKH4iB0OJby3N8GRJO4=
github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU=
github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk=
github.com/fatih/color v1.17.1-0.20241003070628-1c8d8706604e h1:43jO1Ogdyp9HrUaSFfg1v8fsKxciHMlmK7lAUCHa0SE=
github.com/fatih/color v1.17.1-0.20241003070628-1c8d8706604e/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU=
github.com/fatih/set v0.2.1 h1:nn2CaJyknWE/6txyUDGwysr3G5QC6xWB/PtVjPBbeaA=
github.com/fatih/set v0.2.1/go.mod h1:+RKtMCH+favT2+3YecHGxcc0b4KyVWA1QWWJUs4E0CI=
github.com/felixge/fgprof v0.9.3 h1:VvyZxILNuCiUCSXtPtYmmtGvb65nqXh2QFWc0Wpf2/g=
github.com/felixge/fgprof v0.9.3/go.mod h1:RdbpDgzqYVh/T9fPELJyV7EYJuHB55UTEULNun8eiPw=
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
github.com/fsnotify/fsnotify v1.5.1/go.mod h1:T3375wBYaZdLLcVNkcVbzGHY7f1l/uK5T5Ai1i3InKU=
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
github.com/gabriel-vasile/mimetype v1.4.6 h1:3+PzJTKLkvgjeTbts6msPJt4DixhT4YtFNf1gtGe3zc=
github.com/gabriel-vasile/mimetype v1.4.6/go.mod h1:JX1qVKqZd40hUPpAfiNTe0Sne7hdfKSbOqqmkq8GCXc=
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
github.com/github/go-spdx/v2 v2.3.2 h1:IfdyNHTqzs4zAJjXdVQfRnxt1XMfycXoHBE2Vsm1bjs=
github.com/github/go-spdx/v2 v2.3.2/go.mod h1:2ZxKsOhvBp+OYBDlsGnUMcchLeo2mrpEBn2L1C+U3IQ=
github.com/glebarez/go-sqlite v1.20.3 h1:89BkqGOXR9oRmG58ZrzgoY/Fhy5x0M+/WV48U5zVrZ4=
github.com/glebarez/go-sqlite v1.20.3/go.mod h1:u3N6D/wftiAzIOJtZl6BmedqxmmkDfH3q+ihjqxC9u0=
github.com/gliderlabs/ssh v0.3.8 h1:a4YXD1V7xMF9g5nTkdfnja3Sxy1PVDCj1Zg4Wb8vY6c=
github.com/gliderlabs/ssh v0.3.8/go.mod h1:xYoytBv1sV0aL3CavoDuJIQNURXkkfPA/wxQ1pL1fAU=
github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 h1:+zs/tPmkDkHx3U66DAb0lQFJrpS6731Oaa12ikc+DiI=
github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376/go.mod h1:an3vInlBmSxCcxctByoQdvwPiA7DTK7jaaFDBTtu0ic=
github.com/go-git/go-billy/v5 v5.6.2 h1:6Q86EsPXMa7c3YZ3aLAQsMA0VlWmy43r6FHqa/UNbRM=
github.com/go-git/go-billy/v5 v5.6.2/go.mod h1:rcFC2rAsp/erv7CMz9GczHcuD0D32fWzH+MJAU+jaUU=
github.com/go-git/go-git-fixtures/v4 v4.3.2-0.20231010084843-55a94097c399 h1:eMje31YglSBqCdIqdhKBW8lokaMrL3uTkpGYlE2OOT4=
github.com/go-git/go-git-fixtures/v4 v4.3.2-0.20231010084843-55a94097c399/go.mod h1:1OCfN199q1Jm3HZlxleg+Dw/mwps2Wbk9frAWm+4FII=
github.com/go-git/go-git/v5 v5.16.2 h1:fT6ZIOjE5iEnkzKyxTHK1W4HGAsPhqEqiSAssSO77hM=
github.com/go-git/go-git/v5 v5.16.2/go.mod h1:4Ge4alE/5gPs30F2H1esi2gPd69R0C39lolkucHBOp8=
github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=
github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/go-restruct/restruct v1.2.0-alpha h1:2Lp474S/9660+SJjpVxoKuWX09JsXHSrdV7Nv3/gkvc=
github.com/go-restruct/restruct v1.2.0-alpha/go.mod h1:KqrpKpn4M8OLznErihXTGLlsXFGeLxHUrLRRI/1YjGk=
github.com/go-sql-driver/mysql v1.6.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg=
github.com/go-sql-driver/mysql v1.8.1 h1:LedoTUt/eveggdHS9qUFC1EFSa8bU2+1pZjSRpvNJ1Y=
github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg=
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
github.com/go-test/deep v1.1.1 h1:0r/53hagsehfO4bzD2Pgr/+RgHqhmf+k1Bpse2cTu1U=
github.com/go-test/deep v1.1.1/go.mod h1:5C2ZWiW0ErCdrYzpqxLbTX7MG14M9iiw8DgHncVwcsE=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 h1:f+oWsMOmNPc8JmEHVZIycC7hBoQxHH9pNKQORJNozsQ=
github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8/go.mod h1:wcDNUvekVysuuOpQKo3191zZyTpiI6se1N1ULghS0sw=
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y=
github.com/golang/mock v1.4.0/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=
github.com/golang/mock v1.4.1/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=
github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=
github.com/golang/mock v1.4.4/go.mod h1:l3mdAwkq5BuhzHwde/uurv3sEJeZMXNpwsxVWU71h+4=
github.com/golang/mock v1.5.0/go.mod h1:CWnOUgYIOo4TcNZ0wHX3YZCqsaM1I1Jvs6v3mP3KVu8=
github.com/golang/mock v1.6.0/go.mod h1:p6yTPP+5HYm5mzsMV8JkE6ZKdX+/wYM6Hr+LicevLPs=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
github.com/golang/protobuf v1.3.4/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk=
github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
github.com/golang/protobuf v1.5.1/go.mod h1:DopwsBzvsk0Fs44TXzsVbJyPhcCPeIwnvohx4u74HPM=
github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/golang/snappy v0.0.2/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.4.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/google/go-containerregistry v0.20.2 h1:B1wPJ1SN/S7pB+ZAimcciVD+r+yV/l/DSArMxlbwseo=
github.com/google/go-containerregistry v0.20.2/go.mod h1:z38EKdKh4h7IP2gSfUUqEvalZBqs6AoLeWfUy34nQC8=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/licensecheck v0.3.1 h1:QoxgoDkaeC4nFrtGN1jV7IPmDCHFNIVh54e5hSt6sPs=
github.com/google/licensecheck v0.3.1/go.mod h1:ORkR35t/JjW+emNKtfJDII0zlciG9JgbT7SmsohlHmY=
github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0=
github.com/google/martian/v3 v3.1.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0=
github.com/google/martian/v3 v3.2.1/go.mod h1:oBOf6HBosgwRXnUGWUB05QECsc6uvmMiJ3+6W4l/CUk=
github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
github.com/google/pprof v0.0.0-20191218002539-d4f498aebedc/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
github.com/google/pprof v0.0.0-20200229191704-1ebb73c60ed3/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
github.com/google/pprof v0.0.0-20201023163331-3e6fc7fc9c4c/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
github.com/google/pprof v0.0.0-20201203190320-1bf35d6f28c2/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
github.com/google/pprof v0.0.0-20210122040257-d980be63207e/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
github.com/google/pprof v0.0.0-20210226084205-cbba55b83ad5/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
github.com/google/pprof v0.0.0-20210601050228-01bbb1931b22/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
github.com/google/pprof v0.0.0-20210609004039-a478d1d731e9/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
github.com/google/pprof v0.0.0-20211214055906-6f57359322fd/go.mod h1:KgnwoLYCZ8IQu3XUZ8Nc/bM9CCZFOyjUNOSygVozoDg=
github.com/google/pprof v0.0.0-20240409012703-83162a5b38cd h1:gbpYu9NMq8jhDVbvlGkMFWCjLFlqqEZjEmObmhUy6Vo=
github.com/google/pprof v0.0.0-20240409012703-83162a5b38cd/go.mod h1:kf6iHlnVGwgKolg33glAes7Yg/8iWP8ukqeldJSO7jw=
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg=
github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk=
github.com/googleapis/gax-go/v2 v2.1.0/go.mod h1:Q3nei7sK6ybPYH7twZdmQpAd1MKb7pfu6SK+H1/DsU0=
github.com/googleapis/gax-go/v2 v2.1.1/go.mod h1:hddJymUZASv3XPyGkUpKj8pPO47Rmb0eJc8R6ouapiM=
github.com/gookit/color v1.2.5/go.mod h1:AhIE+pS6D4Ql0SQWbBeXPHw7gY0/sjHoA4s/n1KB7xg=
github.com/gookit/color v1.5.4 h1:FZmqs7XOyGgCAxmWyPslpiok1k05wmY3SJTytgvYFs0=
github.com/gookit/color v1.5.4/go.mod h1:pZJOeOS8DM43rXbp4AZo1n9zCU2qjpcRko0b6/QJi9w=
github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo=
github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rHAxPBD8KFhJpmcqms=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0/go.mod h1:YN5jB8ie0yfIUg6VvR9Kz84aCaG7AsGZnLjhHbUqwPg=
github.com/hashicorp/consul/api v1.11.0/go.mod h1:XjsvQN+RJGWI2TWy1/kqaE16HrR2J/FWgkYjdZQsX9M=
github.com/hashicorp/consul/sdk v0.8.0/go.mod h1:GBvyrGALthsZObzUGsfgHZQDXjg4lOjagTIwIR1vPms=
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I=
github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=
github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=
github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48=
github.com/hashicorp/go-hclog v0.12.0/go.mod h1:whpDNt7SSdeAju8AWKIWsul05p54N/39EeqMAyrmvFQ=
github.com/hashicorp/go-hclog v1.0.0/go.mod h1:whpDNt7SSdeAju8AWKIWsul05p54N/39EeqMAyrmvFQ=
github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60=
github.com/hashicorp/go-immutable-radix v1.3.1/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60=
github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM=
github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk=
github.com/hashicorp/go-multierror v1.1.0/go.mod h1:spPvp8C1qA32ftKqdAHm4hHTbPw+vmowP0z+KUhOZdA=
github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
github.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs=
github.com/hashicorp/go-rootcerts v1.0.2/go.mod h1:pqUvnprVnM5bf7AOirdbb01K4ccR319Vf4pU3K5EGc8=
github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU=
github.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4=
github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.5.4 h1:YDjusn29QI/Das2iO9M0BHnIbxPeyuCHsjMW+lJfyTc=
github.com/hashicorp/golang-lru v0.5.4/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4=
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4=
github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64=
github.com/hashicorp/mdns v1.0.1/go.mod h1:4gW7WsVCke5TE7EPeYliwHlRUyBtfCwuFwuMg2DmyNY=
github.com/hashicorp/mdns v1.0.4/go.mod h1:mtBihi+LeNXGtG8L9dX59gAEa12BDtBQSp4v/YAJqrc=
github.com/hashicorp/memberlist v0.2.2/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE=
github.com/hashicorp/memberlist v0.3.0/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE=
github.com/hashicorp/serf v0.9.5/go.mod h1:UWDWwZeL5cuWDJdl0C6wrvrUwEqtQ4ZKBKKENpqIUyk=
github.com/hashicorp/serf v0.9.6/go.mod h1:TXZNMjZQijwlDvp+r0b63xZ45H7JmCmgg4gpTwn9UV4=
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
github.com/iancoleman/strcase v0.2.0/go.mod h1:iwCmte+B7n89clKwxIoIXy/HfoL7AsD47ZCWhYzw7ho=
github.com/iancoleman/strcase v0.3.0 h1:nTXanmYxhfFAMjZL34Ov6gkzEsSJZ5DbhxWjvSASxEI=
github.com/iancoleman/strcase v0.3.0/go.mod h1:iwCmte+B7n89clKwxIoIXy/HfoL7AsD47ZCWhYzw7ho=
github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/ianlancetaylor/demangle v0.0.0-20210905161508-09a460cdf81d/go.mod h1:aYm2/VgdVmcIU8iMfdMvDMsRAQjcfZSKFby6HOFvi/w=
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 h1:BQSFePA1RWJOlocH6Fxy8MmwDt+yVQYULKfN0RoTN8A=
github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99/go.mod h1:1lJo3i6rXxKeerYnT8Nvf0QmHCRC1n8sfWVwXF2Frvo=
github.com/jedib0t/go-pretty/v6 v6.6.0 h1:wmZVuAcEkZRT+Aq1xXpE8IGat4vE5WXOMmBpbQqERXw=
github.com/jedib0t/go-pretty/v6 v6.6.0/go.mod h1:zbn98qrYlh95FIhwwsbIip0LYpwSG8SUOScs+v9/t0E=
github.com/jinzhu/copier v0.4.0 h1:w3ciUoD19shMCRargcpm0cm91ytaBhDvuRpz1ODO/U8=
github.com/jinzhu/copier v0.4.0/go.mod h1:DfbEm0FYsaqBcKcFuvmOZb218JkPGtvSHsKg8S8hyyg=
github.com/jmoiron/sqlx v1.4.0 h1:1PLqN7S1UYp5t4SrVVnt4nUVNemrDAtxlulVe+Qgm3o=
github.com/jmoiron/sqlx v1.4.0/go.mod h1:ZrZ7UsYB/weZdl2Bxg6jCRO9c3YHl8r3ahlKmRT4JLY=
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
github.com/kastenhq/goversion v0.0.0-20230811215019-93b2f8823953 h1:WdAeg/imY2JFPc/9CST4bZ80nNJbiBFCAdSZCSgrS5Y=
github.com/kastenhq/goversion v0.0.0-20230811215019-93b2f8823953/go.mod h1:6o+UrvuZWc4UTyBhQf0LGjW9Ld7qJxLz/OqvSOWWlEc=
github.com/kevinburke/ssh_config v1.2.0 h1:x584FjTGwHzMwvHx18PXxbBVzfnxogHaAReU4gf13a4=
github.com/kevinburke/ssh_config v1.2.0/go.mod h1:CT57kijsi8u/K/BOFA39wgDQJ9CxiF4nAY/ojJ6r6mM=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.11.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/klauspost/compress v1.17.8 h1:YcnTYrq7MikUT7k0Yb5eceMmALQPYBW/Xltxn0NAMnU=
github.com/klauspost/compress v1.17.8/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE=
github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
github.com/knqyf263/go-rpmdb v0.1.1 h1:oh68mTCvp1XzxdU7EfafcWzzfstUZAEa3MW0IJye584=
github.com/knqyf263/go-rpmdb v0.1.1/go.mod h1:9LQcoMCMQ9vrF7HcDtXfvqGO4+ddxFQ8+YF/0CVGDww=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg=
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
github.com/logrusorgru/aurora v2.0.3+incompatible h1:tOpm7WcpBTn4fjmVfgpQq0EfczGlG91VSDkswnjF5A8=
github.com/logrusorgru/aurora v2.0.3+incompatible/go.mod h1:7rIyQOR62GCctdiQpZ/zOJlFyk6y+94wXzv6RNZgaR4=
github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY=
github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
github.com/lyft/protoc-gen-star v0.5.3/go.mod h1:V0xaHgaf5oCCqmcxYcWiDfTiKsZsRc87/1qhoTACD8w=
github.com/magiconair/properties v1.8.5/go.mod h1:y3VJvCyxH9uVvJTWEGAELF3aiYNyPKd5NZ3oSwXrF60=
github.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0VQdvPDY=
github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0=
github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU=
github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE=
github.com/mattn/go-colorable v0.1.6/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
github.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4=
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4=
github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
github.com/mattn/go-isatty v0.0.10/go.mod h1:qgIWMr58cqv1PHHyhnkY9lrL7etaEgOFcMEpPG5Rm84=
github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOAqxQCu2WE=
github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-localereader v0.0.2-0.20220822084749-2491eb6c1c75 h1:P8UmIzZMYDR+NGImiFvErt6VWfIRPuGM+vyjiEdkmIw=
github.com/mattn/go-localereader v0.0.2-0.20220822084749-2491eb6c1c75/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88=
github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc=
github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU=
github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d h1:5PJl274Y63IEHC+7izoQE9x6ikvDFZS2mDVS3drnohI=
github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d/go.mod h1:01TrycV0kFyexm33Z7vhZRXopbI8J3TDReVlkTgMUxE=
github.com/mholt/archiver/v3 v3.5.1 h1:rDjOBX9JSF5BvoJGvjqK479aL70qh9DIpZCl+k7Clwo=
github.com/mholt/archiver/v3 v3.5.1/go.mod h1:e3dqJ7H78uzsRSEACH1joayhuSyhnonssnDhppzS1L4=
github.com/microsoft/go-rustaudit v0.0.0-20220730194248-4b17361d90a5 h1:tQRHcLQwnwrPq2j2Qra/NnyjyESBGwdeBeVdAE9kXYg=
github.com/microsoft/go-rustaudit v0.0.0-20220730194248-4b17361d90a5/go.mod h1:vYT9HE7WCvL64iVeZylKmCsWKfE+JZ8105iuh2Trk8g=
github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg=
github.com/miekg/dns v1.1.26/go.mod h1:bPDLeHnStXmXAq1m/Ch/hvfNHr14JKNPMBo3VZKjuso=
github.com/miekg/dns v1.1.41/go.mod h1:p6aan82bvRIyn+zDIv9xYNUpwa73JcSh9BKwknJysuI=
github.com/mitchellh/cli v1.1.0/go.mod h1:xcISNoH86gajksDmfB23e/pu+B+GeFRMYmoHXxx3xhI=
github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw=
github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s=
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
github.com/mitchellh/go-testing-interface v1.0.0/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI=
github.com/mitchellh/hashstructure/v2 v2.0.2 h1:vGKWl0YJqUNxE8d+h8f6NJLcCJrgbhC4NcD46KavDd4=
github.com/mitchellh/hashstructure/v2 v2.0.2/go.mod h1:MG3aRVU/N29oo/V/IhBX8GR/zz4kQkprJgF2EVszyDE=
github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
github.com/mitchellh/mapstructure v1.4.3/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ=
github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0=
github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo=
github.com/moby/locker v1.0.1 h1:fOXqR41zeveg4fFODix+1Ch4mj/gT0NE1XJbp/epuBg=
github.com/moby/locker v1.0.1/go.mod h1:S7SDdo5zpBK84bzzVlKr2V0hz+7x9hWbYC/kq7oQppc=
github.com/moby/sys/mountinfo v0.7.2 h1:1shs6aH5s4o5H2zQLn796ADW1wMrIwHsyJ2v9KouLrg=
github.com/moby/sys/mountinfo v0.7.2/go.mod h1:1YOa8w8Ih7uW0wALDUgT1dTTSBrZ+HiBLGws92L2RU4=
github.com/moby/sys/sequential v0.5.0 h1:OPvI35Lzn9K04PBbCLW0g4LcFAJgHsvXsRyewg5lXtc=
github.com/moby/sys/sequential v0.5.0/go.mod h1:tH2cOOs5V9MlPiXcQzRC+eEyab644PWKGRYaaV5ZZlo=
github.com/moby/sys/signal v0.7.0 h1:25RW3d5TnQEoKvRbEKUGay6DCQ46IxAVTT9CUMgmsSI=
github.com/moby/sys/signal v0.7.0/go.mod h1:GQ6ObYZfqacOwTtlXvcmh9A26dVRul/hbOZn88Kg8Tg=
github.com/moby/sys/user v0.3.0 h1:9ni5DlcW5an3SvRSx4MouotOygvzaXbaSrc/wGDFWPo=
github.com/moby/sys/user v0.3.0/go.mod h1:bG+tYYYJgaMtRKgEmuueC0hJEAZWwtIbZTB+85uoHjs=
github.com/moby/sys/userns v0.1.0 h1:tVLXkFOxVu9A64/yh59slHVv9ahO9UIev4JZusOLG/g=
github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcYfDHOl28=
github.com/moby/term v0.0.0-20221205130635-1aeaba878587 h1:HfkjXDfhgVaN5rmueG8cL8KKeFNecRCXFhaJ2qZ5SKA=
github.com/moby/term v0.0.0-20221205130635-1aeaba878587/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 h1:ZK8zHtRHOkbHy6Mmr5D264iyp3TiX5OmNcI5cIARiQI=
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6/go.mod h1:CJlz5H+gyd6CUWT45Oy4q24RdLyn7Md9Vj2/ldJBSIo=
github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA=
github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo=
github.com/muesli/termenv v0.15.2 h1:GohcuySI0QmI3wN8Ok9PtKGkgkFIk7y6Vpb5PvrY+Wo=
github.com/muesli/termenv v0.15.2/go.mod h1:Epx+iuz8sNs7mNKhxzH4fWXGNpZwUaJKRS1noLXviQ8=
github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/ncruces/go-strftime v0.1.9 h1:bY0MQC28UADQmHmaF5dgpLmImcShSi2kHU9XLdhx/f4=
github.com/ncruces/go-strftime v0.1.9/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
github.com/nwaples/rardecode v1.1.0 h1:vSxaY8vQhOcVr4mm5e8XllHWTiM4JF507A0Katqw7MQ=
github.com/nwaples/rardecode v1.1.0/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0=
github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec=
github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
github.com/onsi/gomega v1.34.1 h1:EUMJIKUjM8sKjYbtxQI9A4z2o+rruxnzNvpknOXie6k=
github.com/onsi/gomega v1.34.1/go.mod h1:kU1QgUvBDLXBJq618Xvm2LUX6rSAfRaFRTcdOeDLwwY=
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug=
github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM=
github.com/opencontainers/runtime-spec v1.1.0 h1:HHUyrt9mwHUjtasSbXSMvs4cyFxh+Bll4AjJ9odEGpg=
github.com/opencontainers/runtime-spec v1.1.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/selinux v1.11.0 h1:+5Zbo97w3Lbmb3PeqQtpmTkMwsW5nRI3YaLpt7tQ7oU=
github.com/opencontainers/selinux v1.11.0/go.mod h1:E5dMC3VPuVvVHDYmi78qvhJp8+M586T4DlDRYpFkyec=
github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
github.com/pborman/indent v1.2.1 h1:lFiviAbISHv3Rf0jcuh489bi06hj98JsVMtIDZQb9yM=
github.com/pborman/indent v1.2.1/go.mod h1:FitS+t35kIYtB5xWTZAPhnmrxcciEEOdbyrrpz5K6Vw=
github.com/pelletier/go-toml v1.9.4/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8=
github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM=
github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs=
github.com/pierrec/lz4/v4 v4.1.2/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
github.com/pierrec/lz4/v4 v4.1.19 h1:tYLzDnjDXh9qIxSTKHwXwOYmm9d887Y7Y1ZkyXYHAN4=
github.com/pierrec/lz4/v4 v4.1.19/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
github.com/pjbgf/sha1cd v0.3.2 h1:a9wb0bp1oC2TGwStyn0Umc/IGKQnEgF0vVaZ8QF8eo4=
github.com/pjbgf/sha1cd v0.3.2/go.mod h1:zQWigSxVmsHEZow5qaLtPYxpcKMMQpa09ixqBxuCS6A=
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/profile v1.7.0 h1:hnbDkaNWPCLMO9wGLdBFTIZvzDrDfBM2072E1S9gJkA=
github.com/pkg/profile v1.7.0/go.mod h1:8Uer0jas47ZQMJ7VD+OHknK4YDY07LPUC6dEvqDjvNo=
github.com/pkg/sftp v1.10.1/go.mod h1:lYOWFsE0bwd1+KfKJaKeuokY15vzFx25BLbzYYoAxZI=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI=
github.com/posener/complete v1.2.3/go.mod h1:WZIdtGGp+qx0sLrYKtIRAruyNpv6hFCicSgv7Sy7s/s=
github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=
github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU=
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=
github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4=
github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A=
github.com/prometheus/procfs v0.10.1 h1:kYK1Va/YMlutzCGazswoHKo//tZVlFpKYh+PymziUAg=
github.com/prometheus/procfs v0.10.1/go.mod h1:nwNm2aOCAYw8uTR/9bWRREkZFxAUcWzPHWJq+XBB/FM=
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
github.com/saferwall/pe v1.5.4 h1:tLmMggEMUfeqrpJ25zS/okUQmyFdD5xWKL2+z9njCqg=
github.com/saferwall/pe v1.5.4/go.mod h1:mJx+PuptmNpoPFBNhWs/uDMFL/kTHVZIkg0d4OUJFbQ=
github.com/sagikazarmark/crypt v0.3.0/go.mod h1:uD/D+6UF4SrIR1uGEv7bBNkNqLGqUr43MRiaGWX1Nig=
github.com/sagikazarmark/locafero v0.4.0 h1:HApY1R9zGo4DBgr7dqsTH/JJxLTTsOt7u6keLGt6kNQ=
github.com/sagikazarmark/locafero v0.4.0/go.mod h1:Pe1W6UlPYUk/+wc/6KFhbORCfqzgYEpgQ3O5fPuL3H4=
github.com/sagikazarmark/slog-shim v0.1.0 h1:diDBnUNK9N/354PgrxMywXnAwEr1QZcOr6gto+ugjYE=
github.com/sagikazarmark/slog-shim v0.1.0/go.mod h1:SrcSrq8aKtyuqEI1uvTDTK1arOWRIczQRv+GVI1AkeQ=
github.com/sahilm/fuzzy v0.1.1 h1:ceu5RHF8DGgoi+/dR5PsECjCDH1BE3Fnmpo7aVXOdRA=
github.com/sahilm/fuzzy v0.1.1/go.mod h1:VFvziUEIMCrT6A6tw2RFIXPXXmzXbOsSHF0DOI8ZK9Y=
github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d h1:hrujxIzL1woJ7AwssoOcM/tq5JjjG2yYOc8odClEiXA=
github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU=
github.com/sanity-io/litter v1.5.5 h1:iE+sBxPBzoK6uaEP5Lt3fHNgpKcHXc/A2HGETy0uJQo=
github.com/sanity-io/litter v1.5.5/go.mod h1:9gzJgR2i4ZpjZHsKvUXIRQVk7P+yM3e+jAF7bU2UI5U=
github.com/sassoftware/go-rpmutils v0.4.0 h1:ojND82NYBxgwrV+mX1CWsd5QJvvEZTKddtCdFLPWhpg=
github.com/sassoftware/go-rpmutils v0.4.0/go.mod h1:3goNWi7PGAT3/dlql2lv3+MSN5jNYPjT5mVcQcIsYzI=
github.com/scylladb/go-set v1.0.3-0.20200225121959-cc7b2070d91e h1:7q6NSFZDeGfvvtIRwBrU/aegEYJYmvev0cHAwo17zZQ=
github.com/scylladb/go-set v1.0.3-0.20200225121959-cc7b2070d91e/go.mod h1:DkpGd78rljTxKAnTDPFqXSGxvETQnJyuSOQwsHycqfs=
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
github.com/sebdah/goldie/v2 v2.5.3 h1:9ES/mNN+HNUbNWpVAlrzuZ7jE+Nrczbj8uFRjM7624Y=
github.com/sebdah/goldie/v2 v2.5.3/go.mod h1:oZ9fp0+se1eapSRjfYbsV/0Hqhbuu3bJVvKI/NNtssI=
github.com/secDre4mer/pkcs7 v0.0.0-20240322103146-665324a4461d h1:RQqyEogx5J6wPdoxqL132b100j8KjcVHO1c0KLRoIhc=
github.com/secDre4mer/pkcs7 v0.0.0-20240322103146-665324a4461d/go.mod h1:PegD7EVqlN88z7TpCqH92hHP+GBpfomGCCnw1PFtNOA=
github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
github.com/sergi/go-diff v1.2.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 h1:n661drycOFuPLCN3Uc8sB6B/s6Z4t2xvBgU1htSHuq8=
github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4=
github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME=
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/skeema/knownhosts v1.3.1 h1:X2osQ+RAjK76shCbvhHHHVl3ZlgDm8apHEHFqRjnBY8=
github.com/skeema/knownhosts v1.3.1/go.mod h1:r7KTdC8l4uxWRyK2TpQZ/1o5HaSzh06ePQNxPwTcfiY=
github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo=
github.com/sourcegraph/conc v0.3.0/go.mod h1:Sdozi7LEKbFPqYX2/J+iBAM6HpqSLTASQIKqDmF7Mt0=
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
github.com/spdx/gordf v0.0.0-20201111095634-7098f93598fb/go.mod h1:uKWaldnbMnjsSAXRurWqqrdyZen1R7kxl8TkmWk2OyM=
github.com/spdx/tools-golang v0.5.5 h1:61c0KLfAcNqAjlg6UNMdkwpMernhw3zVRwDZ2x9XOmk=
github.com/spdx/tools-golang v0.5.5/go.mod h1:MVIsXx8ZZzaRWNQpUDhC4Dud34edUYJYecciXgrw5vE=
github.com/spf13/afero v1.3.3/go.mod h1:5KUK8ByomD5Ti5Artl0RtHeI5pTF7MIDuXL3yY520V4=
github.com/spf13/afero v1.6.0/go.mod h1:Ai8FlHk4v/PARR026UzYexafAt9roJ7LcLMAmO6Z93I=
github.com/spf13/afero v1.11.0 h1:WJQKhtpdm3v2IzqG8VMqrr6Rf3UYpEF239Jy9wNepM8=
github.com/spf13/afero v1.11.0/go.mod h1:GH9Y3pIexgf1MTIWtNGyogA5MwRIDXGUr+hbWNoBjkY=
github.com/spf13/cast v1.4.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
github.com/spf13/cast v1.7.0 h1:ntdiHjuueXFgm5nzDRdOS4yfT43P5Fnud6DH50rz/7w=
github.com/spf13/cast v1.7.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo=
github.com/spf13/cobra v1.3.0/go.mod h1:BrRVncBjOJa/eUcVVm9CE+oC6as8k+VYr4NY7WCi9V4=
github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM=
github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y=
github.com/spf13/jwalterweatherman v1.1.0/go.mod h1:aNWZUN0dPAAO/Ljvb5BEdw96iTZ0EXowPYD95IqWIGo=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/spf13/viper v1.10.0/go.mod h1:SoyBPwAtKDzypXNDFKN5kzH7ppppbGZtls1UpIy5AsM=
github.com/spf13/viper v1.19.0 h1:RWq5SEjt8o25SROyN3z2OrDB9l7RPd3lwTWU8EcEdcI=
github.com/spf13/viper v1.19.0/go.mod h1:GQUN9bilAbhU/jgc1bKs99f/suXKeUMct8Adx5+Ntkg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw=
github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8=
github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU=
github.com/sylabs/sif/v2 v2.17.1 h1:p6Sl0LWyShXBj2SBsS1dMOMIMrZHe8pwBnBrYt6uo4M=
github.com/sylabs/sif/v2 v2.17.1/go.mod h1:XUGB6AQUXGkms3qPOPdevctT3lBLRLWZNWHVnt5HMKE=
github.com/sylabs/squashfs v1.0.0 h1:xAyMS21ogglkuR5HaY55PCfqY3H32ma9GkasTYo28Zg=
github.com/sylabs/squashfs v1.0.0/go.mod h1:rhWzvgefq1X+R+LZdts10hfMsTg3g74OfGunW8tvg/4=
github.com/terminalstatic/go-xsd-validate v0.1.5 h1:RqpJnf6HGE2CB/lZB1A8BYguk8uRtcvYAPLCF15qguo=
github.com/terminalstatic/go-xsd-validate v0.1.5/go.mod h1:18lsvYFofBflqCrvo1umpABZ99+GneNTw2kEEc8UPJw=
github.com/therootcompany/xz v1.0.1 h1:CmOtsn1CbtmyYiusbfmhmkpAAETj0wBIH6kCYaX+xzw=
github.com/therootcompany/xz v1.0.1/go.mod h1:3K3UH1yCKgBneZYhuQUvJ9HPD19UEXEI0BWbMn8qNMY=
github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM=
github.com/ulikunitz/xz v0.5.8/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
github.com/ulikunitz/xz v0.5.9/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
github.com/ulikunitz/xz v0.5.12 h1:37Nm15o69RwBkXM0J6A5OlE67RZTfzUxTj8fB3dfcsc=
github.com/ulikunitz/xz v0.5.12/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
github.com/urfave/cli v1.22.12/go.mod h1:sSBEIC79qR6OvcmsD4U3KABeOTxDqQtdDnaFuUN30b8=
github.com/urfave/cli/v2 v2.3.0/go.mod h1:LJmUH05zAU44vOAcrfzZQKsZbVcdbOG8rtL3/XcUArI=
github.com/vbatts/go-mtree v0.5.4 h1:OMAb8jaCyiFA7zXj0Zc/oARcxBDBoeu2LizjB8BVJl0=
github.com/vbatts/go-mtree v0.5.4/go.mod h1:5GqJbVhm9BBiCc4K5uc/c42FPgXulHaQs4sFUEfIWMo=
github.com/vbatts/tar-split v0.11.3 h1:hLFqsOLQ1SsppQNTMpkpPXClLDfC2A3Zgy9OUU+RVck=
github.com/vbatts/tar-split v0.11.3/go.mod h1:9QlHN18E+fEH7RdG+QAJJcuya3rqT7eXSTY7wGrAokY=
github.com/vifraa/gopom v1.0.0 h1:L9XlKbyvid8PAIK8nr0lihMApJQg/12OBvMA28BcWh0=
github.com/vifraa/gopom v1.0.0/go.mod h1:oPa1dcrGrtlO37WPDBm5SqHAT+wTgF8An1Q71Z6Vv4o=
github.com/wagoodman/go-partybus v0.0.0-20230516145632-8ccac152c651 h1:jIVmlAFIqV3d+DOxazTR9v+zgj8+VYuQBzPgBZvWBHA=
github.com/wagoodman/go-partybus v0.0.0-20230516145632-8ccac152c651/go.mod h1:b26F2tHLqaoRQf8DywqzVaV1MQ9yvjb0OMcNl7Nxu20=
github.com/wagoodman/go-progress v0.0.0-20230925121702-07e42b3cdba0 h1:0KGbf+0SMg+UFy4e1A/CPVvXn21f1qtWdeJwxZFoQG8=
github.com/wagoodman/go-progress v0.0.0-20230925121702-07e42b3cdba0/go.mod h1:jLXFoL31zFaHKAAyZUh+sxiTDFe1L1ZHrcK2T1itVKA=
github.com/xanzy/ssh-agent v0.3.3 h1:+/15pJfg/RsTxqYcX6fHqOXZwwMP+2VyYWJeWM2qQFM=
github.com/xanzy/ssh-agent v0.3.3/go.mod h1:6dzNDKs0J9rVPHPhaGCukekBHKqfl+L3KghI1Bc68Uw=
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo=
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHovont7NscjpAxXsDA8S8BMYve8Y5+7cuRE7R0=
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ=
github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17UxZ74=
github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y=
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofmx9yWTog9BfvIu0q41lo=
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos=
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no=
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=
github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
go.etcd.io/etcd/api/v3 v3.5.1/go.mod h1:cbVKeC6lCfl7j/8jBhAK6aIYO9XOjdptoxU/nLQcPvs=
go.etcd.io/etcd/client/pkg/v3 v3.5.1/go.mod h1:IJHfcCEKxYu1Os13ZdwCwIUTUVGYTSAM3YSwc9/Ac1g=
go.etcd.io/etcd/client/v2 v2.305.1/go.mod h1:pMEacxZW7o8pg4CrFE7pquyCJJzZvkvdD2RibOCCCGs=
go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk=
go.opencensus.io v0.23.0/go.mod h1:XItmlyltB5F7CS4xOC1DcqMoFqwtC6OG2xF7mCv7P7E=
go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0=
go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 h1:jq9TW8u3so/bN+JPT166wjOI6/vQPF6Xe7nMNIltagk=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0/go.mod h1:p8pYQP+m5XfbZm9fxtSKAbM6oIllS7s2AfxrChvc7iw=
go.opentelemetry.io/otel v1.24.0 h1:0LAOdjNmQeSTzGBzduGe/rU4tZhMwL5rWgtp9Ku5Jfo=
go.opentelemetry.io/otel v1.24.0/go.mod h1:W7b9Ozg4nkF5tWI5zsXkaKKDjdVjpD4oAt9Qi/MArHo=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0 h1:Mne5On7VWdx7omSrSSZvM4Kw7cS7NQkOOmLcgscI51U=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0/go.mod h1:IPtUMKL4O3tH5y+iXVyAXqpAwMuzC1IrxVS81rummfE=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.19.0 h1:IeMeyr1aBvBiPVYihXIaeIZba6b8E1bYp7lbdxK8CQg=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.19.0/go.mod h1:oVdCUtjq9MK9BlS7TtucsQwUcXcymNiEDjgDD2jMtZU=
go.opentelemetry.io/otel/metric v1.24.0 h1:6EhoGWWK28x1fbpA4tYTOWBkPefTDQnb8WSGXlc88kI=
go.opentelemetry.io/otel/metric v1.24.0/go.mod h1:VYhLe1rFfxuTXLgj4CBiyz+9WYBA8pNGJgDcSFRKBco=
go.opentelemetry.io/otel/sdk v1.21.0 h1:FTt8qirL1EysG6sTQRZ5TokkU8d0ugCj8htOgThZXQ8=
go.opentelemetry.io/otel/sdk v1.21.0/go.mod h1:Nna6Yv7PWTdgJHVRD9hIYywQBRx7pbox6nwBnZIxl/E=
go.opentelemetry.io/otel/trace v1.24.0 h1:CsKnnL4dUAr/0llH9FKuc698G04IrpWV0MQA/Y1YELI=
go.opentelemetry.io/otel/trace v1.24.0/go.mod h1:HPc3Xr/cOApsBI154IU0OI0HJexz+aw5uPdbs3UCjNU=
go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI=
go.opentelemetry.io/proto/otlp v1.0.0 h1:T0TX0tmXU8a3CbNXzEKGeU5mIVOdf0oykP+u2lIVU/I=
go.opentelemetry.io/proto/otlp v1.0.0/go.mod h1:Sy6pihPLfYHkr3NkUbEhGHFhINUSI/v80hjKIs5JXpM=
go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
go.uber.org/atomic v1.9.0 h1:ECmE8Bn/WFTYwEW/bpKD3M8VtR/zQVbavAoalC1PYyE=
go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU=
go.uber.org/multierr v1.9.0 h1:7fIwc/ZtS0q++VgcfqFDxSBZVv/Xo49/SYnDFupUwlI=
go.uber.org/multierr v1.9.0/go.mod h1:X2jQV1h+kxSjClGpnseKVIxpmcjrj7MNnI0bnlfKTVQ=
go.uber.org/zap v1.17.0/go.mod h1:MXVU+bhUf/A7Xi2HNOnopQOrmycQ5Ih87HtOu4q5SSo=
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20181029021203-45a5f77698d3/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20190820162420-60c769a6c586/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20190923035154-9ee001bba392/go.mod h1:/lpIB1dKB+9EgE3H3cr1v9wB50oz8l4C4h62xy7jSTY=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20210817164053-32db794688a5/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/crypto v0.40.0 h1:r4x+VvoG5Fm+eJcxMaY8CQM7Lb0l1lsmjGBQ6s8BfKM=
golang.org/x/crypto v0.40.0/go.mod h1:Qr1vMER5WyS2dfPHAlsOj01wgLbsyWtFn/aY+5+ZdxY=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
golang.org/x/exp v0.0.0-20190829153037-c13cbed26979/go.mod h1:86+5VVa7VpoJ4kLfm080zCjGlMRFzhUhsZKEZO7MGek=
golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136/go.mod h1:JXzH8nQsPlswgeRAPE3MuO9GYsAcnJvJ4vnMwN/5qkY=
golang.org/x/exp v0.0.0-20191129062945-2f5052295587/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM=
golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU=
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8=
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY=
golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/lint v0.0.0-20190909230951-414d861bb4ac/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRuDixDT3tpyyb+LUpUlRWLxfhWrs=
golang.org/x/lint v0.0.0-20200130185559-910be7a94367/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
golang.org/x/lint v0.0.0-20201208152925-83fdc39ff7b5/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
golang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE=
golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o=
golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc=
golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY=
golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.5.0/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.26.0 h1:EGMPT//Ezu+ylkCijjPc+f4Aih7sZvaAr+O3EHBxvZg=
golang.org/x/mod v0.26.0/go.mod h1:/j6NAhSk8iQ723BGAUyoAcn7SlD7s15Dp9Nd/SfeaFQ=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190628185345-da137c7871d7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190923162816-aa69164e4478/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200222125558-5a598a2470a0/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200506145744-7e3656a0809f/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20201031054903-ff519b6c9102/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20201209123823-ac852fbbde11/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210316092652-d523dce5a7f4/go.mod h1:RBQZq4jEuRlivfhVLdyRGr576XBO4/greRjx4P4O3yc=
golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
golang.org/x/net v0.0.0-20210410081132-afb366fc7cd1/go.mod h1:9tjilg8BloeKEkVJvy7fQ90B1CfIiPueXVOjqfkSzI8=
golang.org/x/net v0.0.0-20210503060351-7fd8e65b6420/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20210813160813-60bc85c4be6d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.42.0 h1:jzkYrhi3YQWD6MLBJcsklgQsoAcw89EcZbJw8Z614hs=
golang.org/x/net v0.42.0/go.mod h1:FF1RA5d3u7nAYA4z2TkclSCKh68eSXtiFwcWQpPXdt8=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20200902213428-5d25da1a8d43/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
golang.org/x/oauth2 v0.0.0-20201109201403-9fd604954f58/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
golang.org/x/oauth2 v0.0.0-20201208152858-08078c50e5b5/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
golang.org/x/oauth2 v0.0.0-20210218202405-ba52d332ba99/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
golang.org/x/oauth2 v0.0.0-20210220000619-9bb904979d93/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
golang.org/x/oauth2 v0.0.0-20210313182246-cd4f82c27b84/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
golang.org/x/oauth2 v0.0.0-20210514164344-f6687ab2804c/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
golang.org/x/oauth2 v0.0.0-20210628180205-a41e5a781914/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
golang.org/x/oauth2 v0.0.0-20210805134026-6f1e6394065a/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
golang.org/x/oauth2 v0.0.0-20210819190943-2bc19b11175f/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
golang.org/x/oauth2 v0.0.0-20211005180243-6b3c2da341f1/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw=
golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190922100055-0a153f010e69/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190924154521-2837fb4f24fe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191008105621-543471e840be/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200124204421-9fbb57f87de9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200331124033-c3d80250170d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200501052902-10377860bb8e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200511232937-7e40ca221e25/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200515095857-1151b9dac4a9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200523222454-059865788121/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200905004654-be1d3432aa8f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201201145000-ef89a241ccb3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210104204734-6f8348627aad/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210220050731-9a76102bfb43/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210303074136-134d130e1a04/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210305230114-8fe3ee5dd75b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210315160823-c6e025ad8005/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210403161142-5e06dd20ab57/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210514084401-e8d321eab015/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210603125802-9665404d3644/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210806184541-e5e7981a1069/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210816183151-1e6c022a8912/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210823070655-63515b42dcdf/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210908233432-aa78b53d3365/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211124211545-fe61309f8881/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211205182925-97ca703d548d/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220906165534-d0df966e6959/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA=
golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.33.0 h1:NuFncQrRcaRvVmgRkvM3j/F00gWIAlcmlB8ACEKmGIg=
golang.org/x/term v0.33.0/go.mod h1:s18+ql9tYWp1IfpV9DmCtQDDSRBUjKaw9M1eAv5UeF0=
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.27.0 h1:4fGWRpyh641NLlecmyl4LOe6yDdfaYNrGb2zdfo4JV4=
golang.org/x/text v0.27.0/go.mod h1:1D28KMCvyooCX9hBiosv5Tz/+YLxj0j7XhWjpSUF7CU=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE=
golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20190907020128-2ca718005c18/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191113191852-77e3bb0ad9e7/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191130070609-6e064ea0cf2d/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191216173652-a0e659d51361/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200117161641-43d50277825c/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200122220014-bf1340f18c4a/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200204074204-1cc6d1ef6c74/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200224181240-023911ca70b2/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200227222343-706bc42d1f0d/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200304193943-95d2e580d8eb/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw=
golang.org/x/tools v0.0.0-20200312045724-11d5b4c81c7d/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw=
golang.org/x/tools v0.0.0-20200331025713-a30bf2db82d4/go.mod h1:Sl4aGygMT6LrqrWclx+PTx3U+LnKx/seiNR+3G19Ar8=
golang.org/x/tools v0.0.0-20200501065659-ab2804fb9c9d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20200512131952-2bc93b1c0c88/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20200515010526-7d3b6ebf133d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20200618134242-20370b0cb4b2/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20200729194436-6467de6f59a7/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
golang.org/x/tools v0.0.0-20200804011535-6c149bb5ef0d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
golang.org/x/tools v0.0.0-20200904185747-39188db58858/go.mod h1:Cj7w3i3Rnn0Xh82ur9kSqwfTHTeVxaDqrfMjpcNT6bE=
golang.org/x/tools v0.0.0-20201110124207-079ba7bd75cd/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/tools v0.0.0-20201201161351-ac6f37ff4c2a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/tools v0.0.0-20201208233053-a543418bbed2/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/tools v0.0.0-20210105154028-b0ab187a4818/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0=
golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
golang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
golang.org/x/tools v0.1.3/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
golang.org/x/tools v0.1.4/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 h1:H2TDz8ibqkAF6YGhCdN3jS9O0/s90v0rJh3X/OLHEUk=
golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8=
google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE=
google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M=
google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=
google.golang.org/api v0.9.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=
google.golang.org/api v0.13.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI=
google.golang.org/api v0.14.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI=
google.golang.org/api v0.15.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI=
google.golang.org/api v0.17.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
google.golang.org/api v0.18.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
google.golang.org/api v0.19.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
google.golang.org/api v0.20.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
google.golang.org/api v0.22.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
google.golang.org/api v0.24.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE=
google.golang.org/api v0.28.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE=
google.golang.org/api v0.29.0/go.mod h1:Lcubydp8VUV7KeIHD9z2Bys/sm/vGKnG1UHuDBSrHWM=
google.golang.org/api v0.30.0/go.mod h1:QGmEvQ87FHZNiUVJkT14jQNYJ4ZJjdRF23ZXz5138Fc=
google.golang.org/api v0.35.0/go.mod h1:/XrVsuzM0rZmrsbjJutiuftIzeuTQcEeaYcSk/mQ1dg=
google.golang.org/api v0.36.0/go.mod h1:+z5ficQTmoYpPn8LCUNVpK5I7hwkpjbcgqA7I34qYtE=
google.golang.org/api v0.40.0/go.mod h1:fYKFpnQN0DsDSKRVRcQSDQNtqWPfM9i+zNPxepjRCQ8=
google.golang.org/api v0.41.0/go.mod h1:RkxM5lITDfTzmyKFPt+wGrCJbVfniCr2ool8kTBzRTU=
google.golang.org/api v0.43.0/go.mod h1:nQsDGjRXMo4lvh5hP0TKqF244gqhGcr/YSIykhUk/94=
google.golang.org/api v0.47.0/go.mod h1:Wbvgpq1HddcWVtzsVLyfLp8lDg6AA241LmgIL59tHXo=
google.golang.org/api v0.48.0/go.mod h1:71Pr1vy+TAZRPkPs/xlCf5SsU8WjuAWv1Pfjbtukyy4=
google.golang.org/api v0.50.0/go.mod h1:4bNT5pAuq5ji4SRZm+5QIkjny9JAyVD/3gaSihNefaw=
google.golang.org/api v0.51.0/go.mod h1:t4HdrdoNgyN5cbEfm7Lum0lcLDLiise1F8qDKX00sOU=
google.golang.org/api v0.54.0/go.mod h1:7C4bFFOvVDGXjfDTAsgGwDgAxRDeQ4X8NvUedIt6z3k=
google.golang.org/api v0.55.0/go.mod h1:38yMfeP1kfjsl8isn0tliTjIb1rJXcQi4UXlbqivdVE=
google.golang.org/api v0.56.0/go.mod h1:38yMfeP1kfjsl8isn0tliTjIb1rJXcQi4UXlbqivdVE=
google.golang.org/api v0.57.0/go.mod h1:dVPlbZyBo2/OjBpmvNdpn2GRm6rPy75jyU7bmhdrMgI=
google.golang.org/api v0.59.0/go.mod h1:sT2boj7M9YJxZzgeZqXogmhfmRWDtPzT31xkieUbuZU=
google.golang.org/api v0.61.0/go.mod h1:xQRti5UdCmoCEqFxcz93fTl338AVqDgyaDRuOZ3hg9I=
google.golang.org/api v0.62.0/go.mod h1:dKmwPCydfsad4qCH08MSdgWjfHOyfpd4VtDGgRFdavw=
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0=
google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
google.golang.org/genproto v0.0.0-20190801165951-fa694d86fc64/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
google.golang.org/genproto v0.0.0-20190911173649-1774047e7e51/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8=
google.golang.org/genproto v0.0.0-20191108220845-16a3f7862a1a/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
google.golang.org/genproto v0.0.0-20191115194625-c23dd37a84c9/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
google.golang.org/genproto v0.0.0-20191216164720-4f79533eabd1/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
google.golang.org/genproto v0.0.0-20191230161307-f3c370f40bfb/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
google.golang.org/genproto v0.0.0-20200115191322-ca5a22157cba/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
google.golang.org/genproto v0.0.0-20200122232147-0452cf42e150/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
google.golang.org/genproto v0.0.0-20200204135345-fa8e72b47b90/go.mod h1:GmwEX6Z4W5gMy59cAlVYjN9JhxgbQH6Gn+gFDQe2lzA=
google.golang.org/genproto v0.0.0-20200212174721-66ed5ce911ce/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200228133532-8c2c7df3a383/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200305110556-506484158171/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200312145019-da6875a35672/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200331122359-1ee6d9798940/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200430143042-b979b6f78d84/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200511104702-f5ebc3bea380/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200515170657-fc4c6c6a6587/go.mod h1:YsZOwe1myG/8QRHRsmBRE1LrgQY60beZKjly0O1fX9U=
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
google.golang.org/genproto v0.0.0-20200618031413-b414f8b61790/go.mod h1:jDfRM7FcilCzHH/e9qn6dsT145K34l5v+OpcnNgKAAA=
google.golang.org/genproto v0.0.0-20200729003335-053ba62fc06f/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
google.golang.org/genproto v0.0.0-20200804131852-c06518451d9c/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
google.golang.org/genproto v0.0.0-20200825200019-8632dd797987/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
google.golang.org/genproto v0.0.0-20200904004341-0bd0a958aa1d/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
google.golang.org/genproto v0.0.0-20201109203340-2640f1f9cdfb/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
google.golang.org/genproto v0.0.0-20201201144952-b05cb90ed32e/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
google.golang.org/genproto v0.0.0-20201210142538-e3217bee35cc/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
google.golang.org/genproto v0.0.0-20201214200347-8c77b98c765d/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
google.golang.org/genproto v0.0.0-20210222152913-aa3ee6e6a81c/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
google.golang.org/genproto v0.0.0-20210303154014-9728d6b83eeb/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
google.golang.org/genproto v0.0.0-20210310155132-4ce2db91004e/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
google.golang.org/genproto v0.0.0-20210319143718-93e7006c17a6/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
google.golang.org/genproto v0.0.0-20210402141018-6c239bbf2bb1/go.mod h1:9lPAdzaEmUacj36I+k7YKbEc5CXzPIeORRgDAUOu28A=
google.golang.org/genproto v0.0.0-20210513213006-bf773b8c8384/go.mod h1:P3QM42oQyzQSnHPnZ/vqoCdDmzH28fzWByN9asMeM8A=
google.golang.org/genproto v0.0.0-20210602131652-f16073e35f0c/go.mod h1:UODoCrxHCcBojKKwX1terBiRUaqAsFqJiF615XL43r0=
google.golang.org/genproto v0.0.0-20210604141403-392c879c8b08/go.mod h1:UODoCrxHCcBojKKwX1terBiRUaqAsFqJiF615XL43r0=
google.golang.org/genproto v0.0.0-20210608205507-b6d2f5bf0d7d/go.mod h1:UODoCrxHCcBojKKwX1terBiRUaqAsFqJiF615XL43r0=
google.golang.org/genproto v0.0.0-20210624195500-8bfb893ecb84/go.mod h1:SzzZ/N+nwJDaO1kznhnlzqS8ocJICar6hYhVyhi++24=
google.golang.org/genproto v0.0.0-20210713002101-d411969a0d9a/go.mod h1:AxrInvYm1dci+enl5hChSFPOmmUF1+uAa/UsgNRWd7k=
google.golang.org/genproto v0.0.0-20210716133855-ce7ef5c701ea/go.mod h1:AxrInvYm1dci+enl5hChSFPOmmUF1+uAa/UsgNRWd7k=
google.golang.org/genproto v0.0.0-20210728212813-7823e685a01f/go.mod h1:ob2IJxKrgPT52GcgX759i1sleT07tiKowYBGbczaW48=
google.golang.org/genproto v0.0.0-20210805201207-89edb61ffb67/go.mod h1:ob2IJxKrgPT52GcgX759i1sleT07tiKowYBGbczaW48=
google.golang.org/genproto v0.0.0-20210813162853-db860fec028c/go.mod h1:cFeNkxwySK631ADgubI+/XFU/xp8FD5KIVV4rj8UC5w=
google.golang.org/genproto v0.0.0-20210821163610-241b8fcbd6c8/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY=
google.golang.org/genproto v0.0.0-20210828152312-66f60bf46e71/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY=
google.golang.org/genproto v0.0.0-20210831024726-fe130286e0e2/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY=
google.golang.org/genproto v0.0.0-20210903162649-d08c68adba83/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY=
google.golang.org/genproto v0.0.0-20210909211513-a8c4777a87af/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY=
google.golang.org/genproto v0.0.0-20210924002016-3dee208752a0/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc=
google.golang.org/genproto v0.0.0-20211008145708-270636b82663/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc=
google.golang.org/genproto v0.0.0-20211028162531-8db9c33dc351/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc=
google.golang.org/genproto v0.0.0-20211118181313-81c1377c94b1/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc=
google.golang.org/genproto v0.0.0-20211129164237-f09f9a12af12/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc=
google.golang.org/genproto v0.0.0-20211203200212-54befc351ae9/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc=
google.golang.org/genproto v0.0.0-20211206160659-862468c7d6e0/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc=
google.golang.org/genproto v0.0.0-20211208223120-3a66f561d7aa/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc=
google.golang.org/genproto v0.0.0-20240213162025-012b6fc9bca9 h1:9+tzLLstTlPTRyJTh+ah5wIMsBW5c4tQwGTN3thOW9Y=
google.golang.org/genproto v0.0.0-20240213162025-012b6fc9bca9/go.mod h1:mqHbVIp48Muh7Ywss/AD6I5kNVKZMmAa/QEW58Gxp2s=
google.golang.org/genproto/googleapis/api v0.0.0-20240311132316-a219d84964c2 h1:rIo7ocm2roD9DcFIX67Ym8icoGCKSARAiPljFhh5suQ=
google.golang.org/genproto/googleapis/api v0.0.0-20240311132316-a219d84964c2/go.mod h1:O1cOfN1Cy6QEYr7VxtjOyP5AdAuR0aJ/MYZaaof623Y=
google.golang.org/genproto/googleapis/rpc v0.0.0-20240401170217-c3f982113cda h1:LI5DOvAxUPMv/50agcLLoo+AdWc1irS9Rzz4vPuD1V4=
google.golang.org/genproto/googleapis/rpc v0.0.0-20240401170217-c3f982113cda/go.mod h1:WtryC6hu0hhx87FDGxWCDptyssuo68sk10vYjF+T9fY=
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38=
google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM=
google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=
google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
google.golang.org/grpc v1.28.0/go.mod h1:rpkK4SK4GF4Ach/+MFLZUBavHOvF2JJB5uozKKal+60=
google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk=
google.golang.org/grpc v1.30.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak=
google.golang.org/grpc v1.31.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak=
google.golang.org/grpc v1.31.1/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak=
google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0=
google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc=
google.golang.org/grpc v1.34.0/go.mod h1:WotjhfgOW/POjDeRt8vscBtXq+2VjORFy659qA51WJ8=
google.golang.org/grpc v1.35.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU=
google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU=
google.golang.org/grpc v1.36.1/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU=
google.golang.org/grpc v1.37.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM=
google.golang.org/grpc v1.37.1/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM=
google.golang.org/grpc v1.38.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM=
google.golang.org/grpc v1.39.0/go.mod h1:PImNr+rS9TWYb2O4/emRugxiyHZ5JyHW5F+RPnDzfrE=
google.golang.org/grpc v1.39.1/go.mod h1:PImNr+rS9TWYb2O4/emRugxiyHZ5JyHW5F+RPnDzfrE=
google.golang.org/grpc v1.40.0/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34=
google.golang.org/grpc v1.40.1/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34=
google.golang.org/grpc v1.42.0/go.mod h1:k+4IHHFw41K8+bbowsex27ge2rCb65oeWqe4jJ590SU=
google.golang.org/grpc v1.62.1 h1:B4n+nfKzOICUXMgyrNd19h/I9oH0L1pizfk1d4zSgTk=
google.golang.org/grpc v1.62.1/go.mod h1:IWTG0VlJLCh1SkC58F7np9ka9mx/WNkjl4PGJaiq+QE=
google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.1.0/go.mod h1:6Kw0yEErY5E/yWrBtf03jp27GLLJujG4z/JK95pnjjw=
google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4=
google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
google.golang.org/protobuf v1.35.2 h1:8Ar7bF+apOIoThw1EdZl0p1oWvMqTHmpA2fRTyZO8io=
google.golang.org/protobuf v1.35.2/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
gopkg.in/ini.v1 v1.66.2/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA=
gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
gopkg.in/warnings.v0 v0.1.2 h1:wFXVbFY8DY5/xOe1ECiWdKCzZlxgshcYVNkBHstARME=
gopkg.in/warnings.v0 v0.1.2/go.mod h1:jksf8JmL6Qr/oQM2OXTHunEvvTAsrWBLb6OOjuVWRNI=
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gotest.tools/v3 v3.0.3 h1:4AuOwCGf4lLR9u3YOe2awrHygurzhO/HeQ6laiA6Sx0=
gotest.tools/v3 v3.0.3/go.mod h1:Z7Lb0S5l+klDB31fvDQX8ss/FlKDxtlFlw3Oa8Ymbl8=
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg=
honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6 h1:5D53IMaUuA5InSeMu9eJtlQXS2NxAhyWQvkKEgXZhHI=
modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6/go.mod h1:Qz0X07sNOR1jWYCrJMEnbW/X55x206Q7Vt4mz6/wHp4=
modernc.org/libc v1.55.3 h1:AzcW1mhlPNrRtjS5sS+eW2ISCgSOLLNyFzRh/V3Qj/U=
modernc.org/libc v1.55.3/go.mod h1:qFXepLhz+JjFThQ4kzwzOjA/y/artDeg+pcYnY+Q83w=
modernc.org/mathutil v1.6.0 h1:fRe9+AmYlaej+64JsEEhoWuAYBkOtQiMEU7n/XgfYi4=
modernc.org/mathutil v1.6.0/go.mod h1:Ui5Q9q1TR2gFm0AQRqQUaBWFLAhQpCwNcuhBOSedWPo=
modernc.org/memory v1.8.0 h1:IqGTL6eFMaDZZhEWwcREgeMXYwmW83LYW8cROZYkg+E=
modernc.org/memory v1.8.0/go.mod h1:XPZ936zp5OMKGWPqbD3JShgd/ZoQ7899TUuQqxY+peU=
modernc.org/sqlite v1.33.1 h1:trb6Z3YYoeM9eDL1O8do81kP+0ejv+YzgyFo+Gwy0nM=
modernc.org/sqlite v1.33.1/go.mod h1:pXV2xHxhzXZsgT/RtTFAPY6JJDEvOTcTdwADQCCWD4k=
modernc.org/strutil v1.2.0 h1:agBi9dp1I+eOnxXeiZawM8F4LawKv4NzGWSaLfyeNZA=
modernc.org/strutil v1.2.0/go.mod h1:/mdcBmfOibveCTBxUl5B5l6W+TTH1FXPLHZE6bTosX0=
modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y=
modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM=
rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8=
rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0=
rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA=
sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY=


================================================
FILE: examples/sbom-meta/main.go
================================================
package main

import (
	"context"
	"encoding/json"
	"flag"
	"fmt"
	"os"
	"sort"
	"strings"
	"time"

	"github.com/dustin/go-humanize"
	"github.com/fatih/color"
	"github.com/jedib0t/go-pretty/v6/table"
	"github.com/jmoiron/sqlx"
	_ "github.com/lib/pq"

	"github.com/anchore/syft/syft"
	"github.com/caarlos0/env"
)

type config struct {
	Host     string `env:"CHAI_DB_HOST" envDefault:"localhost"`
	User     string `env:"CHAI_DB_USER" envDefault:"postgres"`
	Password string `env:"CHAI_DB_PASSWORD" envDefault:"s3cr3t"`
	Port     int    `env:"CHAI_DB_PORT" envDefault:"5435"`
}

type packageMeta struct {
	Name           string    `db:"name" json:"name"`
	Downloads      int64     `db:"downloads" json:"downloads"`
	Dependents     int64     `db:"dependents" json:"dependents,omitempty"`
	URL            string    `db:"url" json:"url"`
	FirstPublished time.Time `db:"first_published" json:"firstPublished"`
	LastPublished  time.Time `db:"last_published" json:"lastPublished"`
}

const packageMetaFullSQL = `
SELECT p.name,
count(d.id) AS dependents,
sum(v.downloads) AS downloads,
min(u.url) AS url,
min(v.published_at) AS "first_published",
max(v.published_at) AS "last_published"
FROM packages AS p
JOIN dependencies AS d ON d.dependency_id = p.id
JOIN versions v ON v.package_id = p.id
JOIN package_urls pu ON pu.package_id = p.id
JOIN urls u ON u.id = pu.url_id
JOIN url_types ut ON u.url_type_id = ut.id
WHERE ut.name = 'repository'
AND p.name = $1
GROUP BY p.name`

const packageMetaSQL = `
SELECT p.name,
sum(v.downloads) AS downloads,
min(u.url) AS url,
min(v.published_at) AS "first_published",
max(v.published_at) AS "last_published"
FROM packages AS p
JOIN versions v ON v.package_id = p.id
JOIN package_urls pu ON pu.package_id = p.id
JOIN urls u ON u.id = pu.url_id
JOIN url_types ut ON u.url_type_id = ut.id
WHERE ut.name = 'repository'
AND p.name = $1
GROUP BY p.name`

func main() {
	var sourcePath string
	var cfg config
	var jsonFlag = flag.Bool("json", false, "Output JSON")
	var sortFlag = flag.String("sort", "published,asc", "Sort by field,asc|desc")
	flag.Usage = usage
	flag.Parse()
	args := flag.Args()
	err := env.Parse(&cfg)
	if err != nil {
		panic(err)
	}
	// use the current directory if no source path is specified
	switch len(args) {
	case 0:
		sourcePath = "."
	case 1:
		sourcePath = args[0]
	default:
		usage()
		os.Exit(1)
	}
	sortArg := strings.ToLower(*sortFlag)

	// connect to the chai db, defaulting to the docker-compose setup
	connStr := fmt.Sprintf("postgresql://%s:%s@%s:%d/chai?sslmode=disable", cfg.User, cfg.Password, cfg.Host, cfg.Port)
	// fmt.Printf("connecting to: %s\n", connStr)
	db, err := sqlx.Open("postgres", connStr)
	if err != nil {
		panic(err)
	}

	// use syft to get the sbom
	src, err := syft.GetSource(context.Background(), sourcePath, nil)
	if err != nil {
		panic(err)
	}
	sbom, err := syft.CreateSBOM(context.Background(), src, nil)
	if err != nil {
		panic(err)
	}
	pms := []packageMeta{}
	for p := range sbom.Artifacts.Packages.Enumerate() {
		rs := []packageMeta{}
		err = db.Select(&rs, packageMetaSQL, p.Name)
		if err != nil {
			panic(err)
		}
		for _, pm := range rs {
			pms = append(pms, pm)
		}
	}
	pms = dedupePackages(pms)

	sort.Slice(pms, func(i, j int) bool {
		switch sortArg {
		case "package", "package,asc":
			return pms[i].Name < pms[j].Name
		case "package,desc":
			return pms[i].Name > pms[j].Name
		case "repository", "repository,asc":
			return pms[i].URL < pms[j].URL
		case "repository,desc":
			return pms[i].URL > pms[j].URL
		case "published", "published,asc":
			return pms[i].LastPublished.After(pms[j].LastPublished)
		case "published,desc":
			return pms[i].LastPublished.Before(pms[j].LastPublished)
		case "downloads", "downloads,asc":
			return pms[i].Downloads < pms[j].Downloads
		case "downloads,desc":
			return pms[i].Downloads > pms[j].Downloads
		default:
			return pms[i].Name < pms[j].Name
		}
	})

	if *jsonFlag {
		js, err := json.Marshal(pms)
		if err != nil {
			panic(err)
		}
		fmt.Printf("%s", js)
	} else {
		printPackagesMeta(pms)
	}
}

func printPackagesMeta(pms []packageMeta) {
	t := table.NewWriter()
	t.SetOutputMirror(os.Stdout)
	t.AppendHeader(table.Row{"Package", "Repository", "Published", "Downloads"})
	t.SetColumnConfigs([]table.ColumnConfig{
		{Name: "Package"},
		{Name: "Repository"},
		{Name: "Published", Transformer: formatTime},
		{Name: "Downloads", Transformer: formatNumber},
	})
	for _, pm := range pms {
		p := color.New(color.FgHiGreen).Sprint(pm.Name)
		u := pm.URL
		t.Style().Options.DrawBorder = false
		t.AppendRow(table.Row{p, u, pm.LastPublished, pm.Downloads})
	}
	t.Render()
}

func formatTime(val interface{}) string {
	if t, ok := val.(time.Time); ok {
		return humanize.Time(t)
	}
	return "Bad time format"
}

func formatNumber(val interface{}) string {
	if n, ok := val.(int64); ok {
		return humanize.Comma(n)
	}
	return "NaN"
}

func dedupePackages(pms []packageMeta) []packageMeta {
	pns := make(map[string]bool)
	dd := []packageMeta{}
	for _, pm := range pms {
		if _, v := pns[pm.Name]; !v {
			pns[pm.Name] = true
			dd = append(dd, pm)
		}
	}
	return dd
}

func usage() {
	fmt.Println("sbom-meta [SOURCE]")
	flag.PrintDefaults()
}


================================================
FILE: examples/visualizer/README.md
================================================
# Visualizer

An example Chai application that displays a graphical representation of a specific
package.

## Requirements

1. [python]: version 3.11
2. [pip]: Ensure you have pip installed
3. [virtualenv]: It's recommended to use a virtual environment to manage dependencies

## Getting Started

1. Set up a virtual environment

```sh
python -m venv venv
source venv/bin/activate
```

2. Install required packages

```sh
pip install -r requirements.txt
```

3. Ensure `CHAI_DATABASE_URL` is available as an environment variable. The default
   value from our docker config is below:

```sh
export CHAI_DATABASE_URL=postgresql://postgres:s3cr3t@localhost:5432/chai
```

## Usage

1. Start the [Chai DB](https://github.com/teaxyz/chai-oss) with `docker compose up`.
1. Run the visualizer:
   ```sh
   python main.py <package>
   ```

### Arguments

- `--depth`: Maximum depth to go to. Default is `9999`, meaning all possible depths
- `--profile`: Enable performance profiling. Default is `False`.

## Share your visuals

If you create interesting visuals, share them on our [Discord]. Feel free to mess
around and create alternate ways to generate them.

[python]: https://www.python.org
[pip]: https://pip.pypa.io/en/stable/installation/
[virtualenv]: https://virtualenv.pypa.io/en/latest/
[Discord]: https://discord.com/invite/tea-906608167901876256


================================================
FILE: examples/visualizer/main.py
================================================
import argparse
import cProfile
import pstats
from os import getenv
from pstats import SortKey

import psycopg2
import rustworkx as rx
from rustworkx.visualization import graphviz_draw
from tabulate import tabulate

CHAI_DATABASE_URL = getenv("CHAI_DATABASE_URL")


class Package:
    id: str
    name: str
    pagerank: float
    depth: int | None

    def __init__(self, id: str):
        self.id = id
        self.name = ""
        self.pagerank = 0
        self.depth = None

    def __str__(self):
        return self.name


class Graph(rx.PyDiGraph):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.node_index_map: dict[Package, int] = {}
        self._package_cache: dict[str, Package] = {}

    # The data model has IDs, but rustworkx uses indexes
    # Good news - it can index by object. So, we're just keeping track of that
    def _get_or_create_package(self, pkg_id: str) -> Package:
        """A cache to avoid creating the same package multiple times"""
        if pkg_id not in self._package_cache:
            pkg = Package(pkg_id)
            self._package_cache[pkg_id] = pkg
        return self._package_cache[pkg_id]

    def safely_add_node(self, pkg_id: str) -> int:
        """Adds a node to the graph if it doesn't already exist"""
        pkg = self._get_or_create_package(pkg_id)
        if pkg not in self.node_index_map:
            index = super().add_node(pkg)
            self.node_index_map[pkg] = index
            return index
        return self.node_index_map[pkg]

    def safely_add_nodes(self, nodes: list[str]) -> list[int]:
        return [self.safely_add_node(node) for node in nodes]

    def pagerank(self) -> None:
        pageranks = rx.pagerank(self)
        for index in self.node_indexes():
            self[index].pagerank = pageranks[index]

    def nameless_nodes(self) -> list[str]:
        return [self[i].id for i in self.node_indexes() if self[i].name == ""]

    def max_depth(self) -> int:
        return max([self[i].depth for i in self.node_indexes()])


class DB:
    """Prepares the sql statements and connects to the database"""

    def __init__(self):
        self.connect()
        self.cursor.execute(
            "PREPARE select_id AS SELECT id FROM packages WHERE name = $1"
        )
        self.cursor.execute(
            "PREPARE select_name AS SELECT id, name FROM packages WHERE id = ANY($1)"
        )
        self.cursor.execute(
            "PREPARE select_deps AS \
            SELECT DISTINCT p.id, p.name, d.dependency_id FROM packages p \
            JOIN versions v ON p.id = v.package_id \
            JOIN dependencies d ON v.id = d.version_id \
            WHERE p.id = ANY($1)"
        )

    def connect(self) -> None:
        if not CHAI_DATABASE_URL:
            raise RuntimeError("Environment variable CHAI_DATABASE_URL is not set.")

        try:
            self.conn = psycopg2.connect(CHAI_DATABASE_URL)
            self.cursor = self.conn.cursor()
        except psycopg2.OperationalError as e:
            raise RuntimeError(f"Failed to connect to the database: {e}") from e

    def select_id(self, package: str) -> int:
        self.cursor.execute("EXECUTE select_id (%s)", (package,))
        return self.cursor.fetchone()[0]

    def select_deps(self, ids: list[str]) -> dict[str, dict[str, str | set[str]]]:
        # NOTE: this might be intense for larger package managers
        # NOTE: I have to cast the list to a uuid[] for psycopg2 to correctly handle it
        self.cursor.execute("EXECUTE select_deps (%s::uuid[])", (ids,))
        flat = self.cursor.fetchall()
        # now, return this as a map capturing the package name and its dependencies
        result = {}
        for pkg_id, pkg_name, dep_id in flat:
            # add the package if it doesn't already exist in result
            if pkg_id not in result:
                result[pkg_id] = {"name": pkg_name, "dependencies": set()}
            # add the dependency to the dependencies set
            result[pkg_id]["dependencies"].add(dep_id)

        return result

    def select_name(self, ids: list[str]) -> list[tuple[str, str]]:
        self.cursor.execute("EXECUTE select_name (%s::uuid[])", (ids,))
        return self.cursor.fetchall()


def larger_query(db: DB, root_package: str, max_depth: int) -> Graph:
    graph = Graph()
    visited = set()
    leafs = set()

    # above sets will use the id of the package
    root_id = db.select_id(root_package)
    leafs.add(root_id)
    depth = 0

    while leafs - visited:
        query = list(leafs - visited)
        dependencies = db.select_deps(query)

        # Increment the depth, and get out if too much
        depth += 1
        if depth > max_depth:
            # Set the depth for the remaining leafs
            for pkg_id in query:
                i = graph.safely_add_node(pkg_id)
                graph[i].depth = depth
            break

        for pkg_id in query:
            i = graph.safely_add_node(pkg_id)

            # Have we encountered this node before? If not, set the depth
            if graph[i].depth is None:
                graph[i].depth = depth

            if pkg_id in dependencies:
                graph[i].name = dependencies[pkg_id]["name"]
                js = graph.safely_add_nodes(dependencies[pkg_id]["dependencies"])
                edges = [(i, j, None) for j in js]
                graph.add_edges_from(edges)
                leafs.update(dependencies[pkg_id]["dependencies"])

        visited.update(query)

    # Add the names for the packages that don't have dependencies
    nameless_nodes = graph.nameless_nodes()
    names = db.select_name(nameless_nodes)
    for pkg_id, pkg_name in names:
        i = graph.safely_add_node(pkg_id)
        graph[i].name = pkg_name

    return graph


def display(graph: Graph):
    sorted_nodes = sorted(graph.node_indexes(), key=lambda x: graph[x].depth)
    headers = ["Package", "First Depth", "Dependencies", "Dependents", "Pagerank"]
    data = []

    for node in sorted_nodes:
        data.append(
            [
                graph[node],
                graph[node].depth,
                graph.out_degree(node),
                graph.in_degree(node),
                graph[node].pagerank,
            ]
        )

    print(tabulate(data, headers=headers, floatfmt=".8f", intfmt=","))


def draw(graph: Graph, package: str, img_type: str = "svg"):
    ALLOWABLE_FILE_TYPES = ["svg", "png"]
    if img_type not in ALLOWABLE_FILE_TYPES:
        raise ValueError(f"file type must be one of {ALLOWABLE_FILE_TYPES}")

    max_depth = graph.max_depth()
    total_nodes = graph.num_nodes()
    total_edges = graph.num_edges()

    def depth_to_grayscale(depth: int) -> str:
        """Convert depth to a grayscale color."""
        if depth == 1:
            return "red"
        return f"gray{depth + 10 + (depth - 1) // 9}"

    # Unused because I don't visualize edges
    def color_edge(edge):
        out_dict = {
            "color": "lightgrey",
            "fillcolor": "lightgrey",
            "penwidth": "0.05",
            "arrowsize": "0.05",
            "arrowhead": "tee",
        }
        return out_dict

    def color_node(node: Package):
        scale = 20

        def label_nodes(node: Package):
            if node.pagerank > 0.01:
                return f"{node.name}"
            return ""

        def size_center_node(node: Package):
            if node.depth == 1:
                return "1"
            return str(node.pagerank * scale)

        out_dict = {
            "label": label_nodes(node),
            "fontsize": "5",
            "fontcolor": "gray",
            "fontname": "Menlo",
            "color": depth_to_grayscale(node.depth),
            "shape": "circle",
            "style": "filled",
            "fixedsize": "True",
            "width": size_center_node(node),
            "height": size_center_node(node),
        }
        return out_dict

    label = f"<{package} (big red dot) <br/>depth: {max_depth} <br/>nodes: {total_nodes!s} <br/>edges: {total_edges!s}>"
    graph_attr = {
        "beautify": "True",
        "splines": "none",
        "overlap": "0",
        "label": label,
        "labelloc": "t",
        "labeljust": "l",
        "fontname": "Menlo",
    }

    graphviz_draw(
        graph,
        node_attr_fn=color_node,
        edge_attr_fn=color_edge,
        graph_attr=graph_attr,
        method="twopi",  # NOTE: sfdp works as well
        filename=f"{package}.{img_type}",
        image_type=img_type,
    )


def latest(db: DB, package: str, depth: int, img_type: str):
    G = larger_query(db, package, depth)
    G.pagerank()
    display(G)
    draw(G, package, img_type)
    print("✅ Saved image")


if __name__ == "__main__":
    db = DB()

    parser = argparse.ArgumentParser()
    parser.add_argument("package", help="The package to visualize", type=str)
    parser.add_argument(
        "--depth", help="Maximum depth to go to", type=int, default=9999
    )
    parser.add_argument(
        "--profile", help="Performance!", action="store_true", default=False
    )
    parser.add_argument(
        "--image-type",
        help="The file type to save the image as",
        type=str,
        default="svg",
    )
    args = parser.parse_args()
    package = args.package
    depth = args.depth
    profile = args.profile
    img_type = args.image_type

    if profile:
        profiler = cProfile.Profile()
        profiler.enable()

    latest(db, package, depth, img_type)

    if profile:
        profiler.disable()
        stats = pstats.Stats(profiler).sort_stats(SortKey.TIME)
        stats.print_stats()


================================================
FILE: examples/visualizer/monitor.py
================================================
import argparse
import time
from collections import defaultdict
from collections.abc import Callable
from functools import wraps

from main import DB, latest

METRICS: list[str] = [
    "total_execution_time",
    "query_count",
    "total_query_time",
    "non_query_time",
]


class Result:
    def __init__(self, **kwargs):
        for metric in self.METRICS:
            setattr(self, metric, kwargs[metric])

    def __str__(self):
        return "\n".join(
            f"{metric}: {getattr(self, metric):.3f}s"
            if metric != "query_count"  # I don't like this
            else f"{metric}: {getattr(self, metric)}"
            for metric in self.METRICS
        )


class MonitoredDB(DB):
    """Base monitoring wrapper for DB classes"""

    def __init__(self):
        self.query_count = 0
        self.total_query_time = 0
        super().__init__()

    def _monitor_query(self, func: Callable) -> Callable:
        @wraps(func)
        def wrapper(*args, **kwargs):
            self.query_count += 1
            start_time = time.perf_counter()
            result = func(*args, **kwargs)
            self.total_query_time += time.perf_counter() - start_time
            return result

        return wrapper

    def connect(self):
        super().connect()
        # and wrap all the methods with monitoring
        for name in dir(self):
            if name.startswith("select_"):
                setattr(self, name, self._monitor_query(getattr(self, name)))


def run_monitored(func: Callable, package: str) -> Result:
    """Run the main program with monitoring"""
    db = MonitoredDB()
    start_time = time.perf_counter()
    func(db, package)
    total_time = time.perf_counter() - start_time

    return Result(
        total_execution_time=total_time,
        query_count=db.query_count,
        total_query_time=db.total_query_time,
        non_query_time=total_time - db.total_query_time,
    )


def compare_implementations(package: str, runs: int = 3) -> dict[str, list[Result]]:
    """Compare old and new implementations"""
    implementations = [latest]
    results: dict[str, list[Result]] = defaultdict(list)

    for i in range(runs):
        print(f"\nRun {i + 1}/{runs}")
        for func in implementations:
            func_name = func.__name__
            print(f"Running {func_name}...")
            result = run_monitored(func, package)
            results[func_name].append(result)

    return results


def compare_results(results: dict[str, list[Result]], runs: int) -> None:
    implementations = list(results.keys())

    print("\nResults Comparison:")
    print("-" * (25 + 20 * len(implementations)))

    # Header row with implementation names
    print(f"{'Metric':<25}", end="")
    for impl in implementations:
        print(f"{impl:>20}", end="")
    print()
    print("-" * (25 + 20 * len(implementations)))

    # Data rows
    for metric in Result.METRICS:
        print(f"{metric:<25}", end="")
        for impl in implementations:
            avg = sum(getattr(r, metric) for r in results[impl]) / runs
            if metric == "query_count":
                print(f"{avg:>20.0f}", end="")
            else:
                print(f"{avg:>20.3f}s", end="")
        print()

    # Calculate improvements relative to first implementation
    print("-" * (25 + 20 * len(implementations)))
    base_time = sum(r.total_execution_time for r in results[implementations[0]]) / runs
    for impl in implementations[1:]:
        new_time = sum(r.total_execution_time for r in results[impl]) / runs
        improvement = ((base_time - new_time) / base_time) * 100
        print(f"Improvement ({impl} vs {implementations[0]}): {improvement:>+.1f}%")


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--package", help="The package to visualize")
    parser.add_argument(
        "--profile",
        help="Whether to profile the code",
        action="store_true",
        default=False,
    )
    parser.add_argument("--runs", type=int, default=3, help="Number of runs to average")
    args = parser.parse_args()

    results = compare_implementations(args.package, args.runs)
    compare_results(results, args.runs)


================================================
FILE: package_managers/crates/Dockerfile
================================================
FROM ghcr.io/astral-sh/uv:python3.11-bookworm-slim

# Copy everything from the root directory (build context)
COPY . .

# Install core requirements using uv
WORKDIR /core
RUN uv pip install --system -r requirements.txt

WORKDIR /

# Run the main application
CMD ["python", "/package_managers/crates/main.py"]


================================================
FILE: package_managers/crates/README.md
================================================
# crates

The crates service uses the database dump provided by crates.io and coerces their data
model into CHAI's. It's containerized using Docker for easy deployment and consistency.
It's also written in `python` as a first draft, and uses a lot of the
[core tools](../../core/).

## Getting Started

To just run the crates service, you can use the following commands:

```bash
docker compose build crates
docker compose run crates
```

## Execution Steps

The crates loader goes through the following steps when executed:

1. **Initialization**: The loader starts by initializing the configuration and database
   connection using `Config` and `CratesDB`.
2. **Fetching**: If the `FETCH` flag is set to true, the loader downloads the latest
   cargo data from the source using `TarballFetcher`. If needed, it saves to disk.
3. **Transformation**: The downloaded data is parsed and transformed using
   `CratesTransformer.parse()` into a format compatible with the CHAI database schema.
4. **Deletion**: The loader identifies crates that exist in the database
   but are no longer in the registry (crates.io allows deletion _sometimes_).
5. **Cache Building**: The loader builds a cache by setting the current graph and URLs
   from the database, then creates a `Cache` object for efficient diffing.
6. **Diff Process**: The loader performs a diff operation to categorize data into:
   - New packages vs updated packages
   - New URLs vs existing URLs
   - New package URLs vs updated package URLs
   - New dependencies vs removed dependencies
7. **Data Ingestion**: All categorized data is loaded into the database via a single
   `db.ingest()` call.

The main execution logic is in the `main` function in [main.py](main.py):

```python
def main(config: Config, db: CratesDB):
    logger = Logger("crates_main")
    logger.log("Starting crates_main")

    # fetch, write, transform
    if config.exec_config.fetch:
        fetcher = TarballFetcher(...)
        files = fetcher.fetch()
    if not config.exec_config.no_cache:
        fetcher.write(files)

    transformer = CratesTransformer(config)
    transformer.parse()

    # identify and handle deletions
    deletions = identify_deletions(transformer, db)
    if deletions:
        db.delete_packages_by_import_id(deletions)

    # build cache and diff
    db.set_current_graph()
    db.set_current_urls(crates_urls)
    cache = Cache(...)

    # perform diff and ingest
    diff = Diff(config, cache)
    # ... diff process ...
    db.ingest(new_packages, final_new_urls, new_package_urls,
              new_deps, removed_deps, updated_packages, updated_package_urls)
```

### Configuration Flags

The crates loader supports several configuration flags:

- `DEBUG`: Enables debug logging when set to true.
- `TEST`: Runs the loader in test mode when set to true, skipping certain data insertions.
- `FETCH`: Determines whether to fetch new data from the source when set to true.
- `FREQUENCY`: Sets how often (in hours) the pipeline should run.
- `NO_CACHE`: When set to true, deletes temporary files after processing.

These flags can be set in the `docker-compose.yml` file:

```yaml
crates:
  build:
    context: .
    dockerfile: ./package_managers/crates/Dockerfile
  environment:
    - CHAI_DATABASE_URL=postgresql://postgres:s3cr3t@db:5432/chai
    - PYTHONPATH=/
    - DEBUG=${DEBUG:-false}
    - TEST=${TEST:-false}
    - FETCH=${FETCH:-true}
    - FREQUENCY=${FREQUENCY:-24}
    - NO_CACHE=${NO_CACHE:-false}
```

## TODOs

- [ ] `versions.csv` contains all the `published_by` ids, who are the users, who'd need to
      be loaded as well
- [ ] `versions.csv` also contains licenses


================================================
FILE: package_managers/crates/db.py
================================================
from uuid import UUID

from sqlalchemy import select

from core.config import Config
from core.db import DB
from core.models import (
    CanonPackage,
    DependsOn,
    LegacyDependency,
    Package,
    PackageURL,
    UserPackage,
    UserVersion,
    Version,
)
from core.structs import CurrentGraph, CurrentURLs


class CratesDB(DB):
    def __init__(self, config: Config):
        super().__init__("crates_db")
        self.config = config
        # self.set_current_graph()

    def set_current_graph(self) -> None:
        self.graph: CurrentGraph = self.current_graph(self.config.pm_config.pm_id)

    def set_current_urls(self, urls: set[str]) -> None:
        self.urls: CurrentURLs = self.current_urls(urls)

    def delete_packages_by_import_id(self, import_ids: set[int]) -> None:
        """
        Delete packages identified by import_ids and all their dependent records.
        This is a DB class method to handle the cascade deletion properly.
        """

        # Convert import_ids to package_ids using the cache
        package_ids: list[UUID] = []
        for import_id in import_ids:
            pkg_id = self.import_id_map.get(str(import_id))
            if pkg_id:
                package_ids.append(pkg_id)

        if not package_ids:
            self.logger.debug("No packages found to delete")
            return

        self.logger.debug(f"Deleting {len(package_ids)} crates completely")

        # Delete records in reverse dependency order
        with self.session() as session:
            try:
                # 1. Delete PackageURLs
                package_urls_deleted = (
                    session.query(PackageURL)
                    .filter(PackageURL.package_id.in_(package_ids))
                    .delete(synchronize_session=False)
                )

                # 2. Delete CanonPackages
                canon_packages_deleted = (
                    session.query(CanonPackage)
                    .filter(CanonPackage.package_id.in_(package_ids))
                    .delete(synchronize_session=False)
                )

                # 3. Delete UserPackages
                user_packages_deleted = (
                    session.query(UserPackage)
                    .filter(UserPackage.package_id.in_(package_ids))
                    .delete(synchronize_session=False)
                )

                # 4. Delete LegacyDependencies (both package_id and dependency_id)
                legacy_deps_package_deleted = (
                    session.query(LegacyDependency)
                    .filter(LegacyDependency.package_id.in_(package_ids))
                    .delete(synchronize_session=False)
                )

                legacy_deps_dependency_deleted = (
                    session.query(LegacyDependency)
                    .filter(LegacyDependency.dependency_id.in_(package_ids))
                    .delete(synchronize_session=False)
                )

                # TODO: this table is deprecated, but still contains records
                # we can remove this line, once all indexers use LegacyDependencies
                # 5. Delete DependsOn where dependency_id is in package_ids
                depends_on_deleted = (
                    session.query(DependsOn)
                    .filter(DependsOn.dependency_id.in_(package_ids))
                    .delete(synchronize_session=False)
                )

                # 6. Delete Versions and their dependencies
                # TODO: remove this line once all indexers stop using Versions and
                # we can truncate this table
                # First get all version ids for these packages
                version_ids = [
                    vid
                    for (vid,) in session.query(Version.id).filter(
                        Version.package_id.in_(package_ids)
                    )
                ]

                # Delete dependencies attached to these versions
                version_deps_deleted = 0
                user_versions_deleted = 0
                if version_ids:
                    version_deps_deleted = (
                        session.query(DependsOn)
                        .filter(DependsOn.version_id.in_(version_ids))
                        .delete(synchronize_session=False)
                    )

                    user_versions_deleted = (
                        session.query(UserVersion)
                        .filter(UserVersion.version_id.in_(version_ids))
                        .delete(synchronize_session=False)
                    )

                # Now delete the versions
                versions_deleted = (
                    session.query(Version)
                    .filter(Version.package_id.in_(package_ids))
                    .delete(synchronize_session=False)
                )

                # 7. Finally delete the packages
                packages_deleted = (
                    session.query(Package)
                    .filter(Package.id.in_(package_ids))
                    .delete(synchronize_session=False)
                )

                self.logger.debug("-" * 100)
                self.logger.debug("Going to commit delete for")
                self.logger.debug(f"{packages_deleted} packages")
                self.logger.debug(f"{versions_deleted} versions")
                self.logger.debug(f"{version_deps_deleted} version dependencies")
                self.logger.debug(f"{user_versions_deleted} user versions")
                self.logger.debug(f"{depends_on_deleted} direct dependencies")
                self.logger.debug(
                    f"{legacy_deps_package_deleted + legacy_deps_dependency_deleted} legacy deps"  # E501
                )
                self.logger.debug(f"{user_packages_deleted} user packages")
                self.logger.debug(f"{canon_packages_deleted} canon packages")
                self.logger.debug(f"{package_urls_deleted} package URLs")
                self.logger.debug("-" * 100)

                # Commit the transaction
                session.commit()

            except Exception as e:
                session.rollback()
                self.logger.error(f"Error deleting packages: {e}")
                raise

    def get_cargo_id_to_chai_id(self) -> dict[str, UUID]:
        """
        Returns a map of cargo import_ids to chai_ids
        """
        with self.session() as session:
            stmt = select(Package.import_id, Package.id).where(
                Package.package_manager_id == self.config.pm_config.pm_id
            )
            self.import_id_map: dict[str, UUID] = {
                row[0]: row[1] for row in session.execute(stmt).all()
            }
            return self.import_id_map


================================================
FILE: package_managers/crates/diff.py
================================================
from datetime import datetime
from uuid import UUID, uuid4

from core.config import Config
from core.logger import Logger
from core.models import URL, LegacyDependency, Package, PackageURL
from core.structs import Cache, URLKey
from package_managers.crates.structs import Crate, DependencyType


class Diff:
    def __init__(self, config: Config, caches: Cache):
        self.config = config
        self.now = datetime.now()
        self.caches = caches
        self.logger = Logger("crates_diff")

    def diff_pkg(self, pkg: Crate) -> tuple[UUID, Package | None, dict | None]:
        """
        Checks if the given pkg is in the package_cache.

        Returns:
            pkg_id: UUID, the id of the package in the db
            pkg_obj: Package | None, the package object if it's new
            update_payload: dict | None, the update payload if it's an update
        """
        pkg_id: UUID
        crate_id: str = str(pkg.id)  # import_ids are strings in the db
        if crate_id not in self.caches.package_map:
            # new package
            p = Package(
                id=uuid4(),
                derived_id=f"crates/{pkg.name}",
                name=pkg.name,
                package_manager_id=self.config.pm_config.pm_id,
                import_id=crate_id,
                readme=pkg.readme,
                created_at=self.now,
                updated_at=self.now,
            )
            pkg_id = p.id
            return pkg_id, p, {}
        else:
            # it's in the cache, so check for changes
            p = self.caches.package_map[crate_id]
            pkg_id = p.id
            # check for changes
            # right now, that's just the readme
            if p.readme != pkg.readme:
                return (
                    pkg_id,
                    None,
                    {"id": p.id, "readme": pkg.readme, "updated_at": self.now},
                )
            else:
                # existing package, no change
                return pkg_id, None, None

    def diff_url(self, pkg: Crate, new_urls: dict[URLKey, URL]) -> dict[UUID, UUID]:
        """
        Identifies the correct URL for this crate, based on fetched data and all URL
        strings collected so far

        Returns:
            resolved_urls: dict[UUID, UUID], the resolved URL for this crate
        """
        resolved_urls: dict[UUID, UUID] = {}

        urls: list[URLKey] = [
            URLKey(pkg.homepage, self.config.url_types.homepage),
            URLKey(pkg.repository, self.config.url_types.repository),
            URLKey(pkg.documentation, self.config.url_types.documentation),
        ] + ([URLKey(pkg.source, self.config.url_types.source)] if pkg.source else [])

        for url_key in urls:
            url = url_key.url
            url_type = url_key.url_type_id

            # guard: no URL
            if not url:
                continue

            resolved_url_id: UUID

            if url_key in new_urls:
                # if we've already tried to create this URL, use that one
                resolved_url_id = new_urls[url_key].id
            elif url_key in self.caches.url_map:
                # if it's already in the database, let's use that one
                resolved_url_id = self.caches.url_map[url_key].id
            else:
                # most will be here because it's the first run of clean data
                new_url = URL(
                    id=uuid4(),
                    url=url,
                    url_type_id=url_type,
                    created_at=self.now,
                    updated_at=self.now,
                )
                resolved_url_id = new_url.id

                # NOTE: THIS IS SUPER IMPORTANT
                # we're adding to new_urls here, not just in main
                new_urls[url_key] = new_url

            resolved_urls[url_type] = resolved_url_id

        return resolved_urls

    def diff_pkg_url(
        self, pkg_id: UUID, resolved_urls: dict[UUID, UUID]
    ) -> tuple[list[PackageURL], list[dict]]:
        """Takes in a package_id and resolved URLs from diff_url, and generates
        new PackageURL objects as well as a list of changes to existing ones

        Inputs:
          - pkg_id: the id of the package
          - resolved_urls: a map of url types to final URL ID for this pkg

        Outputs:
          - new_package_urls: a list of new PackageURL objects
          - updated_package_urls: a list of changes to existing PackageURL objects

        TODO:
          - We're updating every single package_url entity, which takes time. We should
            check if the latest URL has changed, and if so, only update that one.
        """
        new_links: list[PackageURL] = []
        updates: list[dict] = []

        # what are the existing links?
        existing: set[UUID] = {
            pu.url_id for pu in self.caches.package_urls.get(pkg_id, set())
        }

        # for the correct URL type / URL for this package:
        for _url_type, url_id in resolved_urls.items():
            if url_id not in existing:
                # new link!
                new_links.append(
                    PackageURL(
                        id=uuid4(),
                        package_id=pkg_id,
                        url_id=url_id,
                        created_at=self.now,
                        updated_at=self.now,
                    )
                )
            else:
                # TODO: this should only happen for `latest` URLs
                # there is an existing link between this URL and this package
                # let's find it
                existing_pu = next(
                    pu for pu in self.caches.package_urls[pkg_id] if pu.url_id == url_id
                )
                existing_pu.updated_at = self.now
                updates.append({"id": existing_pu.id, "updated_at": self.now})

        return new_links, updates

    def diff_deps(
        self, pkg: Crate
    ) -> tuple[list[LegacyDependency], list[LegacyDependency]]:
        """
        Identifies new and removed dependencies for a given crate

        The process is:
           1. Build a view of what the package's dependencies are according to
              the crates.io database.
           2. Get this crate's Package ID from CHAI
           3. Get this crate's existing dependencies from CHAI
           4. Compare the two sets, and identify new and removed dependencies

        Note: The database has a unique constraint on (package_id, dependency_id),
        so if a package depends on the same dependency with multiple types (e.g.,
        both runtime and build), we choose the highest priority type:
        NORMAL (runtime) > BUILD > DEV

        Returns:
            new_deps: list[LegacyDependency], the new dependencies
            removed_deps: list[LegacyDependency], the removed dependencies
        """
        new_deps: list[LegacyDependency] = []
        removed_deps: list[LegacyDependency] = []

        # First, collect all dependencies and deduplicate by (package_id, dependency_id)
        # choosing the highest priority dependency type for each unique dependency
        dependency_map: dict[UUID, DependencyType] = {}

        # Priority order: NORMAL (runtime) > BUILD > DEV
        priority_order = {
            DependencyType.NORMAL: 1,
            DependencyType.BUILD: 2,
            DependencyType.DEV: 3,
        }

        # Build the map of dependencies, keeping only the highest priority type
        if pkg.latest_version:
            for dependency in pkg.latest_version.dependencies:
                dep_crate_id: str = str(dependency.dependency_id)
                dep_type: DependencyType = dependency.dependency_type

                # guard: no dep_id
                if not dep_crate_id:
                    raise ValueError(f"No dep_id for {dependency}")

                # guard: no dep_type
                if dep_type is None:
                    raise ValueError(f"No dep_type for {dependency}")

                # get the ID from the cache
                dependency_pkg = self.caches.package_map.get(dep_crate_id)

                # if we don't have the dependency, skip it for now
                if not dependency_pkg:
                    self.logger.debug(
                        f"{dep_crate_id}, dependency of {pkg.name} is new"
                    )
                    continue

                dependency_id = dependency_pkg.id

                # If this dependency already exists in our map, choose higher priority
                if dependency_id in dependency_map:
                    existing_priority = priority_order.get(
                        dependency_map[dependency_id], 999
                    )
                    new_priority = priority_order.get(dep_type, 999)

                    if (
                        new_priority < existing_priority
                    ):  # Lower number = higher priority
                        old_type = dependency_map[dependency_id]
                        dependency_map[dependency_id] = dep_type
                        self.logger.debug(
                            f"Updated dependency type for {dep_crate_id} from "
                            f"{old_type} to {dep_type} (higher priority)"
                        )
                else:
                    dependency_map[dependency_id] = dep_type

        # Now build the actual set of dependencies with resolved types
        actual: set[tuple[UUID, UUID]] = set()
        for dependency_id, dep_type in dependency_map.items():
            # figure out the dependency type UUID
            dependency_type = self._resolve_dep_type(dep_type)
            # add it to the set of actual dependencies
            actual.add((dependency_id, dependency_type))

        # establish the package that we are working with
        crate_id: str = str(pkg.id)
        package = self.caches.package_map.get(crate_id)
        if not package:
            # TODO: handle this case, though it fixes itself on the next run
            self.logger.debug(f"New package {pkg.name}, will grab its deps next time")
            return [], []

        pkg_id: UUID = package.id

        # what are its existing dependencies?
        # specifically, existing dependencies IN THE SAME STRUCTURE as `actual`,
        # so we can do an easy comparison
        existing: set[tuple[UUID, UUID]] = {
            (dep.dependency_id, dep.dependency_type_id)
            for dep in self.caches.dependencies.get(pkg_id, set())
        }

        # we have two sets!
        # actual minus existing = new_deps
        # existing minus actual = removed_deps
        new = actual - existing
        removed = existing - actual

        new_deps: list[LegacyDependency] = [
            LegacyDependency(
                # don't include the ID because it's a sequence for this table
                package_id=pkg_id,
                dependency_id=dep[0],
                dependency_type_id=dep[1],
                created_at=self.now,
                updated_at=self.now,
            )
            for dep in new
        ]

        # get the existing legacy dependency, and add it to removed_deps
        removed_deps: list[LegacyDependency] = []
        cache_deps: set[LegacyDependency] = self.caches.dependencies.get(pkg_id, set())
        for removed_dep_id, removed_dep_type in removed:
            try:
                existing_dep = next(
                    dep
                    for dep in cache_deps
                    if dep.dependency_id == removed_dep_id
                    and dep.dependency_type_id == removed_dep_type
                )

                removed_deps.append(existing_dep)
            except StopIteration as exc:
                cache_deps_str = "\n".join(
                    [
                        f"{dep.dependency_id} / {dep.dependency_type_id}"
                        for dep in cache_deps
                    ]
                )
                raise ValueError(
                    f"Removing {removed_dep_id} / {removed_dep_type} for {pkg_id} but not in Cache: \n{cache_deps_str}"
                ) from exc

        return new_deps, removed_deps

    def _resolve_dep_type(self, dep_type: DependencyType) -> UUID:
        """
        Resolves the dependency type UUID from the config
        """
        if dep_type == DependencyType.NORMAL:
            return self.config.dependency_types.runtime
        elif dep_type == DependencyType.BUILD:
            return self.config.dependency_types.build
        elif dep_type == DependencyType.DEV:
            return self.config.dependency_types.development
        else:
            raise ValueError(f"Unknown dependency type: {dep_type}")


================================================
FILE: package_managers/crates/main.py
================================================
from uuid import UUID

from core.config import Config, PackageManager
from core.fetcher import TarballFetcher
from core.logger import Logger
from core.models import (
    URL,
    LegacyDependency,
    Package,
    PackageURL,
)
from core.structs import Cache, URLKey
from package_managers.crates.db import CratesDB
from package_managers.crates.diff import Diff
from package_managers.crates.transformer import CratesTransformer


def identify_deletions(transformer: CratesTransformer, db: CratesDB) -> set[int]:
    """
    Identifies crates that are in the db but not in the transformer

    Cargo enables deletion of crates from the registry, if:
      - the crate has been published for less than 72 hours
      - the crate only has a single owner
      - the crate has been downloaded less than 500 times for each month it has been
      - the crate is not depended upon by any other crate on crates.io

    The risk is that the namespace for an invalid import_id is now available, and
    might be taken by a new crate, which would violate our uniqueness constraint on
    derived_id

    Returns:
      - a set of import_ids that are in the db but not in the transformer

    References:
      - https://crates.io/policies
      - https://rurust.github.io/cargo-docs-ru/policies.html
    """
    logger = Logger("crates_identify_deletions")

    # db needs to know the cargo id to chai id
    cargo_id_to_chai_id: dict[str, UUID] = db.get_cargo_id_to_chai_id()

    transformer_import_ids: set[int] = {int(c.id) for c in transformer.crates.values()}
    db_import_ids: set[int] = {int(p) for p in cargo_id_to_chai_id}

    # calculate deletions
    deletions: set[int] = db_import_ids - transformer_import_ids
    if deletions:
        logger.warn(
            f"There are {len(deletions)} crates in the db but not in the registry"
        )

    return deletions


def main(config: Config, db: CratesDB):
    logger = Logger("crates_main")
    logger.log("Starting crates_main")

    # fetch the files from cargo
    if config.exec_config.fetch:
        fetcher: TarballFetcher = TarballFetcher(
            "crates",
            str(config.pm_config.source),
            config.exec_config.no_cache,
            config.exec_config.test,
        )
        files = fetcher.fetch()
        logger.log(f"Fetched {len(files)} files")

    # write the files to disk
    if not config.exec_config.fetch and not config.exec_config.no_cache:
        fetcher.write(files)
        logger.log("Wrote files to disk")

    # transform the files into a list of crates
    transformer = CratesTransformer(config)
    transformer.parse()
    logger.log(f"Parsed {len(transformer.crates)} crates")

    # identify crates we need to delete from CHAI because they are no longer on cargo
    deletions = identify_deletions(transformer, db)
    logger.log(f"Identified {len(deletions)} crates to delete")
    if deletions:
        db.delete_packages_by_import_id(deletions)
        logger.log(f"Deleted {len(deletions)} crates")

    # to build the cache, we need the graph object from the db and the URLs
    db.set_current_graph()
    crates_urls: set[str] = set()
    for crate in transformer.crates.values():
        crates_urls.add(crate.homepage)
        crates_urls.add(crate.repository)
        crates_urls.add(crate.documentation)
    db.set_current_urls(crates_urls)

    cache = Cache(
        db.graph.package_map,
        db.urls.url_map,
        db.urls.package_urls,
        db.graph.dependencies,
    )
    logger.log("Built cache")

    # now, we can do the diff
    new_packages: list[Package] = []
    updated_packages: list[dict] = []
    new_urls: dict[URLKey, URL] = {}
    new_package_urls: list[PackageURL] = []
    updated_package_urls: list[dict] = []
    new_deps: list[LegacyDependency] = []
    removed_deps: list[LegacyDependency] = []

    diff = Diff(config, cache)
    for pkg in transformer.crates.values():
        pkg_id, pkg_obj, update_payload = diff.diff_pkg(pkg)
        if pkg_obj:
            new_packages.append(pkg_obj)
        if update_payload:
            updated_packages.append(update_payload)

        # URLs
        resolved_urls = diff.diff_url(pkg, new_urls)

        # package URLs
        new_links, updated_links = diff.diff_pkg_url(pkg_id, resolved_urls)
        if new_links:
            new_package_urls.extend(new_links)
        if updated_links:
            updated_package_urls.extend(updated_links)

        # finally, dependencies
        new_dependencies, removed_dependencies = diff.diff_deps(pkg)
        if new_dependencies:
            new_deps.extend(new_dependencies)
        if removed_dependencies:
            removed_deps.extend(removed_dependencies)

    logger.log(f"Diffed {len(transformer.crates)} crates!")

    # make new_urls a list of new URLs
    final_new_urls = list(new_urls.values())

    db.ingest(
        new_packages,
        final_new_urls,
        new_package_urls,
        new_deps,
        removed_deps,
        updated_packages,
        updated_package_urls,
    )

    logger.log("✅ Done")


if __name__ == "__main__":
    config = Config(PackageManager.CRATES)
    db = CratesDB(config)
    main(config, db)


================================================
FILE: package_managers/crates/structs.py
================================================
from dataclasses import dataclass, field
from datetime import datetime
from enum import IntEnum
from typing import TypedDict
from uuid import UUID


class DependencyType(IntEnum):
    """
    The kind of dependency from the crates.io database

    - NORMAL: normal dependency (default)
    - BUILD: build dependency (used for build scripts)
    - DEV: dev dependency (used for testing or benchmarking)

    Resources:
    - https://doc.rust-lang.org/cargo/reference/specifying-dependencies.html
    """

    NORMAL = 0
    BUILD = 1  # used for build scripts
    DEV = 2  # used for testing or benchmarking
    OPTIONAL = 3

    def __str__(self):
        return self.name.lower()


@dataclass
class CrateDependency:
    crate_id: int
    dependency_id: int
    dependency_type: DependencyType  # kind
    semver_range: str  # req


@dataclass
class CrateUser:
    # from users.csv or teams.csv
    id: int
    name: str | None = None
    github_username: str | None = None


@dataclass
class CrateLatestVersion:
    # latest version ID is from default_versions.csv
    # data is from versions.csv
    id: int
    checksum: str
    downloads: int
    license: str
    num: str
    published_at: datetime
    published_by: CrateUser | None = None
    # dependencies.csv
    dependencies: list[CrateDependency] = field(default_factory=list)


@dataclass
class Crate:
    # from crates.csv
    id: int
    name: str
    readme: str
    homepage: str
    repository: str
    documentation: str
    source: str | None = None
    # from versions.csv
    latest_version: CrateLatestVersion | None = None


class CanonUpdatePayload(TypedDict):
    """Type-safe structure for canon update operations."""

    id: UUID
    name: str
    updated_at: datetime


class CanonPackageUpdatePayload(TypedDict):
    """Type-safe structure for canon package update operations."""

    id: UUID
    canon_id: UUID
    updated_at: datetime


================================================
FILE: package_managers/crates/transformer.py
================================================
import csv
from collections.abc import Generator

from core.config import Config
from core.transformer import Transformer
from core.utils import is_github_url
from package_managers.crates.structs import (
    Crate,
    CrateDependency,
    CrateLatestVersion,
    CrateUser,
    DependencyType,
)


class CratesTransformer(Transformer):
    def __init__(self, config: Config):
        super().__init__("crates")
        self.config = config
        self.crates: dict[int, Crate] = {}

        # files we need to parse
        self.files: dict[str, str] = {
            "crates": "crates.csv",
            "latest_versions": "default_versions.csv",
            "versions": "versions.csv",
            "dependencies": "dependencies.csv",
            "users": "users.csv",
            "teams": "teams.csv",
        }

    def _open_csv(self, file_name: str) -> Generator[dict[str, str], None, None]:
        try:
            file_path = self.finder(self.files[file_name])
            with open(file_path, newline="", encoding="utf-8") as f:
                reader = csv.DictReader(f)
                yield from reader
        except KeyError as exc:
            raise KeyError(
                f"Missing {file_name} from self.files: {self.files}"
            ) from exc
        except FileNotFoundError as exc:
            self.logger.error(f"Missing {file_path} from data directory")
            raise FileNotFoundError(f"Missing {file_path} file") from exc
        except Exception as e:
            self.logger.error(f"Error reading {file_path}: {e}")
            raise e

    def parse(self) -> None:
        # first go through crates.csv to
        # here, we can get the import_id, name, homepage, documentation, repository
        # and also source, from repo if it is like GitHub
        for row in self._open_csv("crates"):
            crate_id = int(row["id"])
            name = row["name"]
            readme = row["readme"]

            # URLs:
            homepage = self.canonicalize(row["homepage"])
            documentation = self.canonicalize(row["documentation"])
            repository = self.canonicalize(row["repository"])

            source: str | None = None
            if is_github_url(repository):
                source = repository

            crate = Crate(
                crate_id, name, readme, homepage, repository, documentation, source
            )
            self.crates[crate_id] = crate

        self.logger.log(f"Parsed {len(self.crates)} crates")

        # populate the map of crate_id to latest_version_id & all latest_version_ids
        latest_versions: set[int]
        latest_versions_map: dict[int, int]
        latest_versions, latest_versions_map = self._load_latest_versions()
        self.logger.log(f"Loaded {len(latest_versions)} latest versions")

        # also build the map of user_id to CrateUser object
        users: dict[int, CrateUser] = self._load_users()
        self.logger.log(f"Loaded {len(users)} users")

        # now, iterate through the versions.csv, and populate LatestVersion objects,
        # only if the version_id is in the latest_versions set
        for row in self._open_csv("versions"):
            version_id = int(row["id"])
            crate_id = int(row["crate_id"])

            # ignore if this version is not the latest
            if version_id not in latest_versions:
                continue

            checksum = row["checksum"]
            downloads = int(row["downloads"])
            license = row["license"]
            num = row["num"]
            published_at = row["created_at"]

            # make a CrateUser object from the published_by
            published_by = row["published_by"]
            published_by_user: CrateUser | None = (
                users[int(published_by)] if published_by else None
            )

            latest_version = CrateLatestVersion(
                version_id,
                checksum,
                downloads,
                license,
                num,
                published_at,
                published_by_user,
            )

            # map this LatestVersion to the crate in self.crates
            self.crates[crate_id].latest_version = latest_version

        self.logger.log("Parsed the latest versions for each crate")

        # finally, parse through the dependencies.csv
        # again, we only care about the dependencies for the latest version
        for row in self._open_csv("dependencies"):
            start_id = int(row["version_id"])

            # ignore if this version is not the latest
            if start_id not in latest_versions:
                continue

            # map both ids to crates
            end_crate_id = int(row["crate_id"])
            start_crate_id = int(latest_versions_map[start_id])

            # guard
            if start_crate_id not in self.crates:
                raise ValueError(f"Crate {start_crate_id} not found in self.crates")

            kind = int(row["kind"])

            # guard
            if kind not in [0, 1, 2]:
                raise ValueError(f"Unknown dependency kind: {kind}")

            dependency_type = DependencyType(kind)
            semver = row["req"]

            dependency = CrateDependency(
                start_crate_id, end_crate_id, dependency_type, semver
            )

            # add this dependency to the crate
            self.crates[start_crate_id].latest_version.dependencies.append(dependency)

        self.logger.log("Parsed the dependencies for each crate")

    def _load_latest_versions(self) -> tuple[set[int], dict[int, int]]:
        latest_versions: set[int] = set()
        latest_versions_map: dict[int, int] = {}
        for row in self._open_csv("latest_versions"):
            crate_id = int(row["crate_id"])
            version_id = int(row["version_id"])
            latest_versions.add(version_id)
            latest_versions_map[version_id] = crate_id

        return latest_versions, latest_versions_map

    def _load_users(self) -> dict[int, CrateUser]:
        users: dict[int, CrateUser] = {}
        for row in self._open_csv("users"):
            user_id = int(row["id"])
            name = row["name"]
            github_username = row["gh_login"]
            user = CrateUser(user_id, name, github_username)
            users[user_id] = user

        return users


================================================
FILE: package_managers/debian/Dockerfile
================================================
FROM ghcr.io/astral-sh/uv:python3.11-bookworm-slim

# Copy everything
COPY . .

# Install core requirements using uv
WORKDIR /core
RUN uv pip install --system -r requirements.txt

WORKDIR /
# Run the main application
CMD ["python", "/package_managers/debian/main.py"]


================================================
FILE: package_managers/debian/README.md
================================================
# Debian

## Data Structure

- Source represents the original upstream as Debian receives
- Package is a binary that users can install
- Sources can specify multiple binaries
- All packages need not specify a source (transitory or virtual packages)

## Scripts

- `investigate_sources.py` can be run on the downloaded data dump from Debian, and
  prints information about the data integrity

## Approach

There is a many to one mapping between Packages and Sources. During the load step, we
populate the map between Packages and Sources (as in @investigate_sources), because
information about a Debian package can be fetched from both data sources. While the
parser currently captures all the information for each Package and Source (keep as-is),
we only end up loading the following information for a package from each source:

Source:

- Vcs-Browser => URL, PackageURL
- Vcs-Git => URL, PackageURL
- Build-Depends => LegacyDependency
- Maintainer => User, UserPackage
- Uploaders => User, UserPackage

Package:

- Depends => LegacyDependency
- Pre-Depends => LegacyDependency
- Description => Package
- Homepage => URL, PackageURL


================================================
FILE: package_managers/debian/db.py
================================================
#!/usr/bin/env pkgx uv run

from core.config import Config
from core.db import DB, CurrentURLs
from core.structs import CurrentGraph, DiffResult


class DebianDB(DB):
    def __init__(self, logger_name: str, config: Config):
        super().__init__(logger_name)
        self.config = config

    def set_current_graph(self) -> None:
        """Get the debian packages and dependencies"""
        self.graph: CurrentGraph = self.current_graph(self.config.pm_config.pm_id)

    def set_current_urls(self, urls: set[str]) -> None:
        """Getting all the URLs and Package URLs from the database"""
        self.urls: CurrentURLs = self.current_urls(urls)

    def ingest_wrapper(self, diff_result: DiffResult) -> None:
        """Wrapper for the main ingest function to handle DiffResult"""
        final_new_urls = list(diff_result.new_urls.values())
        self.ingest(
            diff_result.new_packages,
            final_new_urls,
            diff_result.new_package_urls,
            diff_result.new_deps,
            diff_result.removed_deps,
            diff_result.updated_packages,
            diff_result.updated_package_urls,
        )


================================================
FILE: package_managers/debian/debian_sources.py
================================================
from core.logger import Logger
from package_managers.debian.parser import DebianParser
from package_managers.debian.structs import DebianData


def build_package_to_source_mapping(
    sources_file_path: str, logger: Logger
) -> dict[str, DebianData]:
    """
    Build a mapping from binary package names to their source information.

    Args:
        sources_file_path: Path to the sources file
        test: Whether to limit parsing for testing

    Returns:
        Dictionary mapping binary package names to source DebianData objects
    """
    # Parse sources file
    with open(sources_file_path) as f:
        sources_content = f.read()
    sources_parser = DebianParser(sources_content)

    # Build mapping: binary_package_name -> source_debian_data
    package_to_source: dict[str, DebianData] = {}

    for source_data in sources_parser.parse():
        # Each source may produce multiple binary packages
        if source_data.binary:
            # Source has explicit binary list
            for binary_name in source_data.binary:
                binary_name = binary_name.strip()
                if binary_name:
                    package_to_source[binary_name] = source_data
        else:
            # No explicit binary list, assume source name == binary name
            if source_data.package:
                package_to_source[source_data.package] = source_data

    logger.log(
        f"Built mapping for {len(package_to_source)} binary packages from sources"
    )
    return package_to_source


def enrich_package_with_source(
    package_data: DebianData, source_mapping: dict[str, DebianData], logger: Logger
) -> DebianData:
    """
    Enrich a package with its corresponding source information.

    Args:
        package_data: The package data from packages file
        source_mapping: Mapping from package names to source data

    Returns:
        Enriched DebianData with both package and source information
    """
    # Start with the package data
    enriched = package_data

    # Determine source name
    binary_name = package_data.package

    # Look up source information
    if binary_name in source_mapping:
        source_data = source_mapping[binary_name]

        # Enrich package with source information
        # Only add source fields that aren't already populated
        if not enriched.vcs_browser and source_data.vcs_browser:
            enriched.vcs_browser = source_data.vcs_browser
        if not enriched.vcs_git and source_data.vcs_git:
            enriched.vcs_git = source_data.vcs_git
        if not enriched.directory and source_data.directory:
            enriched.directory = source_data.directory
        if not enriched.build_depends and source_data.build_depends:
            enriched.build_depends = source_data.build_depends
        if not enriched.homepage and source_data.homepage:
            enriched.homepage = source_data.homepage

    else:
        # Log warning for missing source
        source_name = package_data.source or package_data.package
        logger.warn(
            f"Binary '{binary_name}' of source '{source_name}' was not found in sources file"
        )

    return enriched


================================================
FILE: package_managers/debian/diff.py
================================================
#!/usr/bin/env pkgx uv run

from datetime import datetime
from uuid import UUID, uuid4

from core.config import Config
from core.logger import Logger
from core.models import URL, LegacyDependency, Package, PackageURL
from core.structs import Cache, URLKey
from core.utils import is_github_url
from package_managers.debian.db import DebianDB
from package_managers.debian.parser import DebianData
from package_managers.debian.structs import Depends


class DebianDiff:
    def __init__(self, config: Config, caches: Cache, db: DebianDB, logger: Logger):
        self.config = config
        self.now = datetime.now()
        self.caches = caches
        self.db = db
        self.logger = logger

    def diff_pkg(
        self, import_id: str, debian_data: DebianData
    ) -> tuple[UUID, Package | None, dict | None]:
        """
        Checks if the given package is in the package_cache.

        Returns:
          - pkg_id: the id of the package
          - package: If new, returns a new package object. If existing, returns None
          - changes: a dictionary of changes (description updates)
        """
        self.logger.debug(f"Diffing package: {import_id}")

        if import_id not in self.caches.package_map:
            # new package
            name = import_id.split("/")[1]
            p = Package(
                id=uuid4(),
                derived_id=import_id,
                name=name,
                package_manager_id=self.config.pm_config.pm_id,
                import_id=import_id,
                readme=debian_data.description,
                created_at=self.now,
                updated_at=self.now,
            )
            pkg_id: UUID = p.id
            return pkg_id, p, {}
        else:
            # the package exists, check if description has changed
            existing_pkg = self.caches.package_map[import_id]
            pkg_id = existing_pkg.id

            # Check if description (readme) has changed
            if existing_pkg.readme != debian_data.description:
                update_payload = {
                    "id": pkg_id,
                    "readme": debian_data.description,
                    "updated_at": self.now,
                }
                return pkg_id, None, update_payload
            else:
                return pkg_id, None, None

    def diff_url(
        self, import_id: str, debian_data: DebianData, new_urls: dict[URLKey, URL]
    ) -> dict[UUID, UUID]:
        """Given a package's URLs, returns the resolved URL for this specific package"""
        resolved_urls: dict[UUID, UUID] = {}

        # Generate the URLs for this package
        urls = self._generate_chai_urls(debian_data)

        # Process each URL
        for url_key in urls:
            # guard: _generate_chai_urls could be None for a url type
            if url_key is None:
                continue

            resolved_url_id: UUID

            if url_key in new_urls:
                resolved_url_id = new_urls[url_key].id
            elif url_key in self.caches.url_map:
                resolved_url_id = self.caches.url_map[url_key].id
            else:
                self.logger.debug(
                    f"URL {url_key.url} as {url_key.url_type_id} is entirely new"
                )
                new_url = URL(
                    id=uuid4(),
                    url=url_key.url,
                    url_type_id=url_key.url_type_id,
                    created_at=self.now,
                    updated_at=self.now,
                )
                resolved_url_id = new_url.id
                new_urls[url_key] = new_url

            resolved_urls[url_key.url_type_id] = resolved_url_id

        return resolved_urls

    def diff_pkg_url(
        self, pkg_id: UUID, resolved_urls: dict[UUID, UUID]
    ) -> tuple[list[PackageURL], list[dict]]:
        """Takes in a package_id and resolved URLs from diff_url, and generates
        new PackageURL objects as well as a list of changes to existing ones"""

        new_links: list[PackageURL] = []
        updates: list[dict] = []

        # what are the existing links?
        existing: set[UUID] = {
            pu.url_id for pu in self.caches.package_urls.get(pkg_id, set())
        }

        # for each URL type/URL for this package:
        for _url_type, url_id in resolved_urls.items():
            if url_id not in existing:
                # new link!
                new_links.append(
                    PackageURL(
                        id=uuid4(),
                        package_id=pkg_id,
                        url_id=url_id,
                        created_at=self.now,
                        updated_at=self.now,
                    )
                )
            else:
                # existing link - update timestamp
                existing_pu = next(
                    pu for pu in self.caches.package_urls[pkg_id] if pu.url_id == url_id
                )
                existing_pu.updated_at = self.now
                updates.append({"id": existing_pu.id, "updated_at": self.now})

        return new_links, updates

    def diff_deps(
        self, import_id: str, debian_data: DebianData
    ) -> tuple[list[LegacyDependency], list[LegacyDependency]]:
        """
        Takes in a debian package and figures out what dependencies have changed.

        The process is:
           1. Build a view of what the package's dependencies are according to
              the parsed debian data, using priority-based deduplication
           2. Get this package's ID from CHAI
           3. Get this package's existing dependencies from CHAI
           4. Compare the two sets, and identify new and removed dependencies

        Note: The database has a unique constraint on (package_id, dependency_id),
        so if a package depends on the same dependency with multiple types (e.g.,
        both runtime and build), we choose the highest priority type:
        Runtime > Build > Test

        Returns:
          - new_deps: a list of new dependencies
          - removed_deps: a list of removed dependencies
        """
        # First, collect all dependencies and deduplicate by dependency name
        # choosing the highest priority dependency type for each unique dependency
        dependency_map: dict[str, UUID] = {}

        # Priority order: Runtime > Build > Test
        priority_order = {
            self.config.dependency_types.runtime: 1,
            self.config.dependency_types.build: 2,
            self.config.dependency_types.test: 3,
        }

        def process_deps(dependencies: list[Depends], dep_type: UUID) -> None:
            """Helper to process dependencies of a given type with priority"""
            for dep in dependencies:
                dep_name = f"debian/{dep.package}"  # bc the map is by import_id

                # Get the dependency package from cache
                dependency = self.caches.package_map.get(dep_name)

                # try debian/dependency
                if not dependency:
                    self.logger.debug(f"{dep_name} not loaded, will catch next time")
                    continue

                # If this dependency already exists in our map, choose higher priority
                if dep_name in dependency_map:
                    existing_priority = priority_order.get(
                        dependency_map[dep_name], 999
                    )
                    new_priority = priority_order.get(dep_type, 999)

                    if new_priority < existing_priority:  # Lower is better!
                        old_type_id = dependency_map[dep_name]
                        dependency_map[dep_name] = dep_type
                        self.logger.debug(
                            f"Updated dependency type for {dep_name} from "
                            f"{old_type_id} to {dep_type} (higher priority)"
                        )
                else:
                    dependency_map[dep_name] = dep_type

        # Process different types of dependencies with priority handling
        # Debian has: depends (runtime), build_depends (build), recommends, suggests, etc.
        process_deps(debian_data.depends, self.config.dependency_types.runtime)
        process_deps(debian_data.build_depends, self.config.dependency_types.build)
        # Map recommends and suggests to runtime for simplicity
        process_deps(debian_data.recommends, self.config.dependency_types.runtime)
        process_deps(debian_data.suggests, self.config.dependency_types.runtime)

        # Now build the actual set of dependencies with resolved types
        actual: set[tuple[UUID, UUID]] = set()
        for dep_name, dep_type in dependency_map.items():
            dependency = self.caches.package_map.get(dep_name)
            if dependency:  # Double-check it still exists
                actual.add((dependency.id, dep_type))

        # get the package ID for what we are working with
        package = self.caches.package_map.get(import_id)
        if not package:
            self.logger.debug(f"New package {import_id}, will grab its deps next time")
            return [], []

        pkg_id: UUID = package.id

        # what are its existing dependencies?
        # specifically, existing dependencies IN THE SAME STRUCTURE as `actual`,
        # so we can do an easy comparison
        existing: set[tuple[UUID, UUID]] = {
            (dep.dependency_id, dep.dependency_type_id)
            for dep in self.caches.dependencies.get(pkg_id, set())
        }

        # we have two sets!
        # actual minus existing = new_deps
        # existing minus actual = removed_deps
        new = actual - existing
        removed = existing - actual

        new_deps: list[LegacyDependency] = [
            LegacyDependency(
                package_id=pkg_id,
                dependency_id=dep[0],
                dependency_type_id=dep[1],
                created_at=self.now,
                updated_at=self.now,
            )
            for dep in new
        ]

        # get the existing legacy dependency, and add it to removed_deps
        removed_deps: list[LegacyDependency] = []
        cache_deps: set[LegacyDependency] = self.caches.dependencies.get(pkg_id, set())
        for removed_dep_id, removed_dep_type in removed:
            try:
                existing_dep = next(
                    dep
                    for dep in cache_deps
                    if dep.dependency_id == removed_dep_id
                    and dep.dependency_type_id == removed_dep_type
                )
                removed_deps.append(existing_dep)
            except StopIteration as exc:
                cache_deps_str = "\n".join(
                    [
                        f"{dep.dependency_id} / {dep.dependency_type_id}"
                        for dep in cache_deps
                    ]
                )
                raise ValueError(
                    f"Removing {removed_dep_id} / {removed_dep_type} for {pkg_id} but not in Cache: \n{cache_deps_str}"
                ) from exc

        return new_deps, removed_deps

    def _generate_chai_urls(self, debian_data: DebianData) -> list[URLKey]:
        """Generate URLs for a debian package"""
        urls = []

        # Homepage URL
        if debian_data.homepage:
            urls.append(URLKey(debian_data.homepage, self.config.url_types.homepage))

        # Source URL
        source_url = (
            debian_data.vcs_git if debian_data.vcs_git else debian_data.vcs_browser
        )
        if source_url:
            urls.append(URLKey(source_url, self.config.url_types.source))

        # Repository URL
        if is_github_url(source_url):
            urls.append(URLKey(source_url, self.config.url_types.repository))

        return urls


================================================
FILE: package_managers/debian/main.py
================================================
#!/usr/bin/env pkgx uv run

import os
import time
from datetime import datetime
from uuid import UUID

from core.config import Config, PackageManager
from core.fetcher import GZipFetcher
from core.logger import Logger
from core.models import URL, LegacyDependency, Package, PackageURL
from core.scheduler import Scheduler
from core.structs import Cache, DiffResult, URLKey
from core.utils import file_exists
from package_managers.debian.db import DebianDB
from package_managers.debian.debian_sources import (
    build_package_to_source_mapping,
    enrich_package_with_source,
)
from package_managers.debian.diff import DebianDiff
from package_managers.debian.parser import DebianData, DebianParser

SCHEDULER_ENABLED = os.getenv("ENABLE_SCHEDULER", "true").lower() == "true"


def fetch(config: Config, logger: Logger) -> tuple[GZipFetcher, GZipFetcher]:
    """Fetches the Debian packages & sources manifest files"""
    package_source = config.pm_config.source[0]
    sources_source = config.pm_config.source[1]
    no_cache = config.exec_config.no_cache
    test = config.exec_config.test

    package_fetcher = GZipFetcher(
        name="debian",
        source=package_source,
        no_cache=no_cache,
        test=test,
        file_path="",  # will autosave in data/debian/latest
        file_name="packages",
    )

    sources_fetcher = GZipFetcher(
        name="debian",
        source=sources_source,
        no_cache=no_cache,
        test=test,
        file_path="",  # will autosave in data/debian/latest
        file_name="sources",
    )

    # Fetch
    should_fetch = config.exec_config.fetch
    if should_fetch:
        package_files = package_fetcher.fetch()
        package_fetcher.write(package_files)
        logger.log(f"Fetched {len(package_files)} package files")

        sources_files = sources_fetcher.fetch()
        sources_fetcher.write(sources_files)
        logger.log(f"Fetched {len(sources_files)} sources files")

    return package_fetcher, sources_fetcher


def diff(
    data: list[DebianData], config: Config, cache: Cache, db: DebianDB, logger: Logger
) -> DiffResult:
    # Keeps track of all the new packages we're adding
    seen: dict[str, UUID] = {}
    seen_new_pkg_urls: set[tuple[UUID, UUID]] = set()

    # Objects that we will return
    new_packages: list[Package] = []
    new_urls: dict[URLKey, URL] = {}
    new_package_urls: list[PackageURL] = []
    updated_packages: list[dict[str, UUID | str | datetime]] = []
    updated_package_urls: list[dict[str, UUID | datetime]] = []
    new_deps: list[LegacyDependency] = []
    removed_deps: list[LegacyDependency] = []

    # Create diff processor
    diff = DebianDiff(config, cache, db, logger)

    # Process each enriched package
    for i, debian_data in enumerate(data):
        import_id = f"debian/{debian_data.package}"
        if not import_id:
            logger.warn(f"Skipping package with empty name at index {i}")
            continue

        # Diff the package
        pkg_id, pkg_obj, update_payload = diff.diff_pkg(import_id, debian_data)

        # Guard: if pkg_obj is not None, that means it's a new package
        # If it's new, **and** we have seen it before, set the ID to what is seen
        # So, duplicates absorb all URLs & Dependencies under one umbrella
        resolved_pkg_id = seen.get(pkg_obj.import_id, pkg_id) if pkg_obj else pkg_id

        if pkg_obj and pkg_obj.import_id not in seen:
            logger.debug(f"New package: {pkg_obj.name}")
            new_packages.append(pkg_obj)
            seen[pkg_obj.import_id] = resolved_pkg_id
        if update_payload:
            logger.debug(f"Updated package: {update_payload['id']}")
            updated_packages.append(update_payload)

        # Diff URLs (resolved_urls is map of url types to final URL ID)
        resolved_urls = diff.diff_url(import_id, debian_data, new_urls)

        # Diff package URLs
        new_links, updated_links = diff.diff_pkg_url(resolved_pkg_id, resolved_urls)
        if new_links:
            logger.debug(f"New package URLs: {len(new_links)}")

            # guard: only add truly new links
            for link in new_links:
                if (link.package_id, link.url_id) not in seen_new_pkg_urls:
                    new_package_urls.append(link)
                    seen_new_pkg_urls.add((link.package_id, link.url_id))

        if updated_links:
            updated_package_urls.extend(updated_links)

        # Diff dependencies
        new_dependencies, removed_dependencies = diff.diff_deps(import_id, debian_data)
        if new_dependencies:
            logger.debug(f"New dependencies: {len(new_dependencies)}")
            new_deps.extend(new_dependencies)
        if removed_dependencies:
            logger.debug(f"Removed dependencies: {len(removed_dependencies)}")
            removed_deps.extend(removed_dependencies)

        # In test mode, limit processing to the first 3 packages to reduce runtime and resource usage.
        if config.exec_config.test and i > 2:
            break

    return DiffResult(
        new_packages,
        new_urls,
        new_package_urls,
        updated_packages,
        updated_package_urls,
        new_deps,
        removed_deps,
    )


def run_pipeline(config: Config, db: DebianDB, logger: Logger):
    """The Debian Indexer"""

    package_fetcher, sources_fetcher = fetch(config, logger)
    input_dir = f"{sources_fetcher.output}/latest"

    # Build package-to-source mapping first
    sources_file_path = file_exists(input_dir, "sources")
    source_mapping = build_package_to_source_mapping(sources_file_path, logger)

    # Parse packages file
    packages_file_path = file_exists(input_dir, "packages")
    with open(packages_file_path) as f:
        packages_content = f.read()
    packages_parser = DebianParser(packages_content)

    # Process each package and enrich with source information
    enriched_packages: list[DebianData] = []
    for package_data in packages_parser.parse():
        enriched_package = enrich_package_with_source(
            package_data, source_mapping, logger
        )
        enriched_packages.append(enriched_package)
    logger.log(f"Processed {len(enriched_packages)} enriched packages")

    # Grab all the URLs from enriched packages
    all_urls: set[str] = set()
    for package in enriched_packages:
        all_urls.add(package.homepage)
        all_urls.add(package.vcs_browser)
        all_urls.add(package.vcs_git)
    logger.log(f"Found {len(all_urls)} URLs to load")

    # Set up cache
    db.set_current_graph()
    db.set_current_urls(all_urls)
    cache = Cache(
        db.graph.package_map,
        db.urls.url_map,
        db.urls.package_urls,
        db.graph.dependencies,
    )
    logger.log("Setup cache")

    # Perform the diff
    result = diff(enriched_packages, config, cache, db, logger)

    # Ingest all diffs
    db.ingest_wrapper(result)

    if config.exec_config.no_cache:
        package_fetcher.cleanup()
        sources_fetcher.cleanup()


def main(config: Config, db: DebianDB, logger: Logger):
    logger.log("Initializing Debian package manager")
    logger.debug(f"Config: {config}")

    if SCHEDULER_ENABLED:
        logger.log("Scheduler enabled. Starting schedule.")
        scheduler = Scheduler("debian_scheduler")
        scheduler.start(run_pipeline, config, db, logger)

        # run immediately as well when scheduling
        scheduler.run_now(run_pipeline, config, db, logger)

        # keep the main thread alive for scheduler
        try:
            while True:
                time.sleep(3600)
        except KeyboardInterrupt:
            scheduler.stop()
            logger.log("Scheduler stopped.")
    else:
        logger.log("Scheduler disabled. Running pipeline once.")
        run_pipeline(config, db, logger)
        logger.log("Pipeline finished.")


if __name__ == "__main__":
    config = Config(PackageManager.DEBIAN)
    db = DebianDB("debian_db", config)
    logger = Logger("debian")
    main(config, db, logger)


================================================
FILE: package_managers/debian/parser.py
================================================
import re
from collections.abc import Iterator

from permalint import normalize_url

from package_managers.debian.structs import DebianData, Depends, Maintainer


# NOTE: The DebianParser is the one which normalizes all the URLs!
class DebianParser:
    def __init__(self, content: str):
        # content is the Packages or Sources file
        self.content = content

    def parse(self) -> Iterator[DebianData]:
        """Yield packages and sources from the Packages and Sources files."""
        paragraphs = self.content.split("\n\n")

        # iterate over the lines
        for paragraph in paragraphs:
            # if the paragraph is empty, then move on
            if not paragraph.strip():
                continue

            # each paragraph represents one object
            obj = DebianData()

            # State for handling multiline fields
            current_field = None
            current_value = ""

            # populate the object
            lines = paragraph.split("\n")
            for _i, line in enumerate(lines):
                # if the line is empty, then move on
                if not line.strip():
                    continue

                # if the line starts with a tab or space, then it's a continuation of
                # the previous field
                if line[0] == " " or line[0] == "\t":
                    # Append continuation line to current field value
                    if current_field is not None:
                        current_value += " " + line.strip()
                    continue

                # Process any accumulated field before starting new one
                if current_field is not None:
                    self.mapper(obj, current_field, current_value)

                # Start new field
                if ":" not in line:
                    continue

                key, value = line.split(":", 1)
                current_field = key.strip()
                current_value = value.strip()

            # Process the final accumulated field
            if current_field is not None:
                self.mapper(obj, current_field, current_value)

            if obj.package:
                yield obj
            else:
                raise ValueError(f"Invalid package: {paragraph}")

    def handle_line(self, obj: DebianData, line: str) -> None:
        key, value = line.split(":", 1)
        self.mapper(obj, key, value)

    def mapper(self, obj: DebianData, key: str, value: str) -> None:
        """Map fields from Debian package/source files to DebianData object."""
        match key:
            case "Package":
                obj.package = value.strip()
            case "Source":
                obj.source = value.strip()
            case "Version":
                obj.version = value.strip()
            case "Installed-Size":
                obj.installed_size = int(value.strip())
            case "Architecture":
                obj.architecture = value.strip()
            case "Description":
                obj.description = value.strip()
            case "Homepage":
                obj.homepage = normalize_url(value.strip())
            case "Description-md5":
                obj.description_md5 = value.strip()
            case "Tag":
                obj.tag = value.strip()
            case "Section":
                obj.section = value.strip()
            case "Priority":
                obj.priority = value.strip()
            case "Filename":
                obj.filename = value.strip()
            case "Size":
                obj.size = int(value.strip())
            case "MD5sum":
                obj.md5sum = value.strip()
            case "SHA256":
                obj.sha256 = value.strip()
            case "Standards-Version":
                obj.standards_version = value.strip()
            case "Format":
                obj.format = value.strip()
            case "Vcs-Browser":
                obj.vcs_browser = normalize_url(value.strip())
            case "Vcs-Git":
                obj.vcs_git = normalize_url(value.strip())
            case "Directory":
                obj.directory = value.strip()
            case "Testsuite":
                obj.testsuite = value.strip()
            case "Testsuite-Triggers":
                obj.testsuite_triggers = value.strip()
            case "Binary":
                obj.binary = [bin.strip() for bin in value.split(",") if bin.strip()]
            case "Package-List":
                obj.package_list = [pkg.strip() for pkg in value.split(",")]

            # Dependency Fields
            case "Depends":
                dependencies = value.split(", ")
                for dependency in dependencies:
                    obj.depends.append(handle_depends(dependency.strip()))
            case "Pre-Depends":
                dependencies = value.split(", ")
                for dependency in dependencies:
                    obj.pre_depends.append(handle_depends(dependency.strip()))
            case "Replaces":
                dependencies = value.split(", ")
                for dependency in dependencies:
                    obj.replaces.append(handle_depends(dependency.strip()))
            case "Provides":
                dependencies = value.split(", ")
                for dependency in dependencies:
                    obj.provides.append(handle_depends(dependency.strip()))
            case "Recommends":
                dependencies = value.split(", ")
                for dependency in dependencies:
                    obj.recommends.append(handle_depends(dependency.strip()))
            case "Suggests":
                dependencies = value.split(", ")
                for dependency in dependencies:
                    obj.suggests.append(handle_depends(dependency.strip()))
            case "Breaks":
                dependencies = value.split(", ")
                for dependency in dependencies:
                    obj.breaks.append(handle_depends(dependency.strip()))
            case "Conflicts":
                dependencies = value.split(", ")
                for dependency in dependencies:
                    obj.conflicts.append(handle_depends(dependency.strip()))
            case "Build-Depends":
                for build_depends in value.split(", "):
                    obj.build_depends.append(handle_depends(build_depends.strip()))

            # Maintainer fields
            case "Uploaders":
                # Split by comma but respect quoted sections
                uploaders = []
                in_quotes = False
                current = ""

                for char in value:
                    if char == '"':
                        in_quotes = not in_quotes
                        current += char
                    elif char == "," and not in_quotes:
                        if current.strip():
                            uploaders.append(current.strip())
                        current = ""
                    else:
                        current += char

                if current.strip():
                    uploaders.append(current.strip())

                for uploader in uploaders:
                    obj.uploaders.append(handle_maintainer(uploader.strip()))
            case "Maintainer":
                obj.maintainer = handle_maintainer(value.strip())

            # TODO: File Fields
            case _:
                pass


# Helpers for handling specific fields in the mapper
def handle_depends(dependency: str) -> Depends:
    # Handle various dependency formats:
    # 0ad-data (>= 0.0.26)
    # lib32gcc1-amd64-cross [amd64 arm64 i386 ppc64el x32]
    # gm2-11 [!powerpc !ppc64 !x32]
    # debhelper-compat (= 13)
    # gcc-11-source (>= 11.3.0-11~)

    # First, strip platform specifications in square brackets
    # Remove platform specs like [amd64 arm64 i386 ppc64el x32] or [!powerpc !ppc64 !x32]
    platform_match = re.search(r"\s*\[[^\]]+\]", dependency)
    if platform_match:
        dependency = dependency.replace(platform_match.group(0), "").strip()

    # Now handle version constraints in parentheses
    match = re.match(r"^(.*?)(\s*\((.*)\))?$", dependency)
    if match:
        dep = match.group(1).strip()
        if match.group(2):
            semver = match.group(3)
            return Depends(package=dep, semver=semver)
        else:
            return Depends(package=dep, semver="*")
    raise ValueError(f"Invalid dependency: {dependency}")


def handle_maintainer(value: str) -> Maintainer:
    # Remove trailing comma if present
    value = value.rstrip(",")

    # For names with quotes like "Adam C. Powell, IV" <hazelsct@debian.org>
    if '"' in value:
        match = re.match(r'^"([^"]*)" <([^>]*)>$', value)
        if match:
            return Maintainer(name=match.group(1), email=match.group(2))

    # Standard format: Name <email@example.com>
    match = re.match(r"^(.*) <([^>]*)>$", value)
    if match:
        return Maintainer(name=match.group(1), email=match.group(2))

    raise ValueError(f"Invalid maintainer: {value}")


================================================
FILE: package_managers/debian/scripts/investigate_sources.py
================================================
#!/usr/bin/env pkgx uv run

"""
Script to investigate the relationship between Debian sources and packages files.
This helps understand the data structure before modifying the parser.
"""

import os
import sys

from core.logger import Logger

logger = Logger("debian_investigator")


def parse_sources_file(file_path: str) -> dict[str, set[str]]:
    """
    Parse the sources file and return a mapping of source_name -> set of binary packages.

    Args:
        file_path: Path to the sources file

    Returns:
        Dictionary mapping source package names to sets of binary package names they produce
    """
    source_binary_map = {}

    with open(file_path, encoding="utf-8") as f:
        current_package = None
        current_binaries = set()
        in_binary_field = False

        for line in f:
            original_line = line
            line = line.strip()

            if line.startswith("Package: "):
                # Save previous package if exists
                if current_package:
                    if current_package in source_binary_map:
                        # Merge with existing binaries for this source name
                        source_binary_map[current_package].update(current_binaries)
                    else:
                        source_binary_map[current_package] = current_binaries

                # Start new package
                current_package = line[9:].strip()
                current_binaries = set()
                in_binary_field = False

            elif line.startswith("Binary: "):
                # Parse binary packages (comma-separated, may continue on next lines)
                binaries_str = line[8:].strip()
                binaries = [b.strip() for b in binaries_str.split(",") if b.strip()]
                current_binaries.update(binaries)
                in_binary_field = True

            elif current_package and original_line.startswith(" "):
                # Continuation line (starts with space)
                if in_binary_field:
                    # Continue parsing Binary field
                    binaries_str = line.strip()
                    binaries = [b.strip() for b in binaries_str.split(",") if b.strip()]
                    current_binaries.update(binaries)
                # If not in binary field, it's some other field continuation - ignore

            elif line == "" and current_package:
                # End of current package entry
                if current_package in source_binary_map:
                    # Merge with existing binaries for this source name
                    source_binary_map[current_package].update(current_binaries)
                else:
                    source_binary_map[current_package] = current_binaries
                current_package = None
                current_binaries = set()
                in_binary_field = False

            else:
                # Any other field (not Package, not Binary, not continuation)
                # This includes new fields that don't start with space
                in_binary_field = False

        # Handle last package if file doesn't end with blank line
        if current_package:
            if current_package in source_binary_map:
                # Merge with existing binaries for this source name
                source_binary_map[current_package].update(current_binaries)
            else:
                source_binary_map[current_package] = current_binaries

    return source_binary_map


def parse_packages_file(file_path: str) -> dict[str, str | None]:
    """
    Parse the packages file and return a mapping of package_name -> source_name.

    Args:
        file_path: Path to the packages file

    Returns:
        Dictionary mapping package names to their source package names (None if not specified)
    """
    package_source_map = {}

    with open(file_path, encoding="utf-8") as f:
        current_package = None
        current_source = None

        for line in f:
            line = line.strip()

            if line.startswith("Package: "):
                # Save previous package if exists
                if current_package:
                    package_source_map[current_package] = current_source

                # Start new package
                current_package = line[9:].strip()
                current_source = None

            elif line.startswith("Source: "):
                # Extract source name (may include version info in parentheses)
                source_str = line[8:].strip()
                # Remove version info if present: "source (version)" -> "source"
                if "(" in source_str:
                    current_source = source_str.split("(")[0].strip()
                else:
                    current_source = source_str

            elif line == "" and current_package:
                # End of current package entry
                package_source_map[current_package] = current_source
                current_package = None
                current_source = None

        # Handle last package if file doesn't end with blank line
        if current_package:
            package_source_map[current_package] = current_source

    return package_source_map


def investigate_mapping(sources_file: str, packages_file: str) -> None:
    """
    Investigate the mapping between sources and packages files.

    Args:
        sources_file: Path to the sources file
        packages_file: Path to the packages file
    """
    logger.log("Parsing sources file...")
    source_binary_map = parse_sources_file(sources_file)
    logger.log(f"Found {len(source_binary_map)} source packages")

    logger.log("Parsing packages file...")
    package_source_map = parse_packages_file(packages_file)
    logger.log(f"Found {len(package_source_map)} binary packages")

    # Validate mappings
    orphaned_packages = []

    logger.log("\nValidating package -> source mappings...")

    for package_name, source_name in package_source_map.items():
        if source_name:
            # Package has explicit source reference
            if source_name not in source_binary_map:
                logger.log(
                    f"WARNING: Package '{package_name}' references unknown source '{source_name}'"
                )
                orphaned_packages.append((package_name, source_name, "unknown_source"))
            elif package_name not in source_binary_map[source_name]:
                logger.log(
                    f"WARNING: Package '{package_name}' not listed in source '{source_name}' binaries"
                )
                orphaned_packages.append((package_name, source_name, "not_in_binaries"))
        else:
            # Package has no explicit source, assume source name == package name
            if package_name not in source_binary_map:
                logger.log(
                    f"WARNING: Package '{package_name}' has no source reference and no matching source package"
                )
                orphaned_packages.append(
                    (package_name, package_name, "no_matching_source")
                )
            elif package_name not in source_binary_map[package_name]:
                logger.log(
                    f"WARNING: Package '{package_name}' not listed in its own source binaries"
                )
                orphaned_packages.append(
                    (package_name, package_name, "not_self_listed")
                )

    # Summary
    logger.log("\n=== SUMMARY ===")
    logger.log(f"Total sources: {len(source_binary_map)}")
    logger.log(f"Total packages: {len(package_source_map)}")
    logger.log(f"Orphaned packages: {len(orphaned_packages)}")

    if orphaned_packages:
        logger.log("\nOrphaned packages by category:")
        categories = {}
        for pkg, src, reason in orphaned_packages:
            if reason not in categories:
                categories[reason] = []
            categories[reason].append((pkg, src))

        for reason, items in categories.items():
            logger.log(f"  {reason}: {len(items)} packages")
            for pkg, src in items[:5]:  # Show first 5 examples
                logger.log(f"    {pkg} -> {src}")
            if len(items) > 5:
                logger.log(f"    ... and {len(items) - 5} more")


def main():
    data_dir = "data/debian/latest"

    # Check if data files exist
    sources_file = os.path.join(data_dir, "sources")
    packages_file = os.path.join(data_dir, "packages")

    if not os.path.exists(sources_file):
        logger.log(f"ERROR: Sources file not found at {sources_file}")
        logger.log("Use --fetch to download the latest data")
        return 1

    if not os.path.exists(packages_file):
        logger.log(f"ERROR: Packages file not found at {packages_file}")
        logger.log("Use --fetch to download the latest data")
        return 1

    logger.log(f"Using sources file: {sources_file}")
    logger.log(f"Using packages file: {packages_file}")

    investigate_mapping(sources_file, packages_file)

    return 0


if __name__ == "__main__":
    sys.exit(main())


================================================
FILE: package_managers/debian/scripts/test_investigate_sources.py
================================================
from unittest.mock import mock_open, patch

import pytest

from package_managers.debian.scripts.investigate_sources import parse_sources_file


@pytest.fixture
def binutils():
    return """
Package: binutils
Binary: binutils-for-host, binutils-for-build,
 binutils-ia64-linux-gnu-dbg, binutils-m68k-linux-gnu,
 binutils-mips64el-linux-gnuabin32-dbg, binutils-mipsisa64r6-linux-gnuabin32,
 binutils-mipsisa64r6el-linux-gnuabi64-dbg

"""


@pytest.fixture
def linux():
    return """
Package: linux
Binary: linux-support-6.1.0-32, linux-doc-6.1, linux-doc, linux-source-6.1, linux-source, linux-headers-6.1.0-32-common, linux-headers-6.1.0-32-common-rt, kernel-image-6.1.0-32-alpha-generic-di, nic-modules-6.1.0-32-alpha-generic-di, nic-wireless-modules-6.1.0-32-alpha-generic-di, nic-shared-modules-6.1.0-32-alpha-generic-di, serial-modules-6.1.0-32-alpha-generic-di, usb-serial-modules-6.1.0-32-alpha-generic-di, ppp-modules-6.1.0-32-alpha-generic-di, pata-modules-6.1.0-32-alpha-generic-di, cdrom-core-modules-6.1.0-32-alpha-generic-di, scsi-core-modules-6.1.0-32-alpha-generic-di, scsi-modules-6.1.0-32-alpha-generic-di, scsi-nic-modules-6.1.0-32-alpha-generic-di, loop-modules-6.1.0-32-alpha-generic-di, btrfs-modules-6.1.0-32-alpha-generic-di, ext4-modules-6.1.0-32-alpha-generic-di, isofs-modules-6.1.0-32-alpha-generic-di, jfs-modules-6.1.0-32-alpha-generic-di, xfs-modules-6.1.0-32-alpha-generic-di, fat-modules-6.1.0-32-alpha-generic-di,
 squashfs-modules-6.1.0-32-alpha-generic-di, fuse-modules-6.1.0-32-alpha-generic-di, f2fs-modules-6.1.0-32-alpha-generic-di, md-modules-6.1.0-32-alpha-generic-di, multipath-modules-6.1.0-32-alpha-generic-di, usb-modules-6.1.0-32-alpha-generic-di, usb-storage-modules-6.1.0-32-alpha-generic-di, fb-modules-6.1.0-32-alpha-generic-di, input-modules-6.1.0-32-alpha-generic-di, event-modules-6.1.0-32-alpha-generic-di, mouse-modules-6.1.0-32-alpha-generic-di, nic-pcmcia-modules-6.1.0-32-alpha-generic-di, pcmcia-modules-6.1.0-32-alpha-generic-di, nic-usb-modules-6.1.0-32-alpha-generic-di, sata-modules-6.1.0-32-alpha-generic-di, i2c-modules-6.1.0-32-alpha-generic-di, crc-modules-6.1.0-32-alpha-generic-di, crypto-modules-6.1.0-32-alpha-generic-di, crypto-dm-modules-6.1.0-32-alpha-generic-di, ata-modules-6.1.0-32-alpha-generic-di, nbd-modules-6.1.0-32-alpha-generic-di, srm-modules-6.1.0-32-alpha-generic-di, linux-libc-dev, linux-config-6.1, bpftool, linux-cpupower, libcpupower1,
 libcpupower-dev, linux-perf, usbip, hyperv-daemons, rtla, linux-kbuild-6.1, linux-bootwrapper-6.1.0-32, linux-headers-6.1.0-32-alpha-generic, linux-image-6.1.0-32-alpha-generic, linux-image-alpha-generic, linux-headers-alpha-generic, linux-image-6.1.0-32-alpha-generic-dbg, linux-image-alpha-generic-dbg, linux-headers-6.1.0-32-alpha-smp, linux-image-6.1.0-32-alpha-smp, linux-image-alpha-smp, linux-headers-alpha-smp, linux-image-6.1.0-32-alpha-smp-dbg, linux-image-alpha-smp-dbg, kernel-image-6.1.0-32-amd64-di, nic-modules-6.1.0-32-amd64-di, nic-wireless-modules-6.1.0-32-amd64-di, nic-shared-modules-6.1.0-32-amd64-di, serial-modules-6.1.0-32-amd64-di, usb-serial-modules-6.1.0-32-amd64-di, ppp-modules-6.1.0-32-amd64-di, pata-modules-6.1.0-32-amd64-di, cdrom-core-modules-6.1.0-32-amd64-di, firewire-core-modules-6.1.0-32-amd64-di, scsi-core-modules-6.1.0-32-amd64-di, scsi-modules-6.1.0-32-amd64-di, scsi-nic-modules-6.1.0-32-amd64-di, loop-modules-6.1.0-32-amd64-di,
 btrfs-modules-6.1.0-32-amd64-di, ext4-modules-6.1.0-32-amd64-di, isofs-modules-6.1.0-32-amd64-di, jfs-modules-6.1.0-32-amd64-di, xfs-modules-6.1.0-32-amd64-di, fat-modules-6.1.0-32-amd64-di, squashfs-modules-6.1.0-32-amd64-di, udf-modules-6.1.0-32-amd64-di, fuse-modules-6.1.0-32-amd64-di, f2fs-modules-6.1.0-32-amd64-di, md-modules-6.1.0-32-amd64-di, multipath-modules-6.1.0-32-amd64-di, usb-modules-6.1.0-32-amd64-di, usb-storage-modules-6.1.0-32-amd64-di, pcmcia-storage-modules-6.1.0-32-amd64-di, fb-modules-6.1.0-32-amd64-di, input-modules-6.1.0-32-amd64-di, event-modules-6.1.0-32-amd64-di, mouse-modules-6.1.0-32-amd64-di, nic-pcmcia-modules-6.1.0-32-amd64-di, pcmcia-modules-6.1.0-32-amd64-di, nic-usb-modules-6.1.0-32-amd64-di, sata-modules-6.1.0-32-amd64-di, acpi-modules-6.1.0-32-amd64-di, i2c-modules-6.1.0-32-amd64-di, crc-modules-6.1.0-32-amd64-di, crypto-modules-6.1.0-32-amd64-di, crypto-dm-modules-6.1.0-32-amd64-di, efi-modules-6.1.0-32-amd64-di,
 ata-modules-6.1.0-32-amd64-di, mmc-core-modules-6.1.0-32-amd64-di, mmc-modules-6.1.0-32-amd64-di, nbd-modules-6.1.0-32-amd64-di, speakup-modules-6.1.0-32-amd64-di, uinput-modules-6.1.0-32-amd64-di, sound-modules-6.1.0-32-amd64-di, mtd-core-modules-6.1.0-32-amd64-di, rfkill-modules-6.1.0-32-amd64-di, linux-image-amd64-signed-template, linux-headers-6.1.0-32-amd64, linux-image-6.1.0-32-amd64-unsigned, linux-image-6.1.0-32-amd64-dbg, linux-image-amd64-dbg, linux-headers-6.1.0-32-cloud-amd64, linux-image-6.1.0-32-cloud-amd64-unsigned, linux-image-6.1.0-32-cloud-amd64-dbg, linux-image-cloud-amd64-dbg, linux-headers-6.1.0-32-rt-amd64, linux-image-6.1.0-32-rt-amd64-unsigned, linux-image-6.1.0-32-rt-amd64-dbg, linux-image-rt-amd64-dbg, kernel-image-6.1.0-32-arm64-di, nic-modules-6.1.0-32-arm64-di, nic-wireless-modules-6.1.0-32-arm64-di, nic-shared-modules-6.1.0-32-arm64-di, usb-serial-modules-6.1.0-32-arm64-di, ppp-modules-6.1.0-32-arm64-di,
 cdrom-core-modules-6.1.0-32-arm64-di, scsi-core-modules-6.1.0-32-arm64-di, scsi-modules-6.1.0-32-arm64-di, scsi-nic-modules-6.1.0-32-arm64-di, loop-modules-6.1.0-32-arm64-di, btrfs-modules-6.1.0-32-arm64-di, ext4-modules-6.1.0-32-arm64-di, isofs-modules-6.1.0-32-arm64-di, jfs-modules-6.1.0-32-arm64-di, xfs-modules-6.1.0-32-arm64-di, fat-modules-6.1.0-32-arm64-di, squashfs-modules-6.1.0-32-arm64-di, udf-modules-6.1.0-32-arm64-di, fuse-modules-6.1.0-32-arm64-di, f2fs-modules-6.1.0-32-arm64-di, md-modules-6.1.0-32-arm64-di, multipath-modules-6.1.0-32-arm64-di, usb-modules-6.1.0-32-arm64-di, usb-storage-modules-6.1.0-32-arm64-di, fb-modules-6.1.0-32-arm64-di, input-modules-6.1.0-32-arm64-di, event-modules-6.1.0-32-arm64-di, nic-usb-modules-6.1.0-32-arm64-di, sata-modules-6.1.0-32-arm64-di, i2c-modules-6.1.0-32-arm64-di, crc-modules-6.1.0-32-arm64-di, crypto-modules-6.1.0-32-arm64-di, crypto-dm-modules-6.1.0-32-arm64-di, efi-modules-6.1.0-32-arm64-di,
 ata-modules-6.1.0-32-arm64-di, mmc-modules-6.1.0-32-arm64-di, nbd-modules-6.1.0-32-arm64-di, speakup-modules-6.1.0-32-arm64-di, uinput-modules-6.1.0-32-arm64-di, sound-modules-6.1.0-32-arm64-di, leds-modules-6.1.0-32-arm64-di, mtd-core-modules-6.1.0-32-arm64-di, linux-image-arm64-signed-template, linux-headers-6.1.0-32-arm64, linux-image-6.1.0-32-arm64-unsigned, linux-image-6.1.0-32-arm64-dbg, linux-image-arm64-dbg, linux-headers-6.1.0-32-cloud-arm64, linux-image-6.1.0-32-cloud-arm64-unsigned, linux-image-6.1.0-32-cloud-arm64-dbg, linux-image-cloud-arm64-dbg, linux-headers-6.1.0-32-rt-arm64, linux-image-6.1.0-32-rt-arm64-unsigned, linux-image-6.1.0-32-rt-arm64-dbg, linux-image-rt-arm64-dbg, kernel-image-6.1.0-32-marvell-di, nic-modules-6.1.0-32-marvell-di, nic-shared-modules-6.1.0-32-marvell-di, usb-serial-modules-6.1.0-32-marvell-di, ppp-modules-6.1.0-32-marvell-di, cdrom-core-modules-6.1.0-32-marvell-di, scsi-core-modules-6.1.0-32-marvell-di,
 loop-modules-6.1.0-32-marvell-di, ipv6-modules-6.1.0-32-marvell-di, btrfs-modules-6.1.0-32-marvell-di, ext4-modules-6.1.0-32-marvell-di, isofs-modules-6.1.0-32-marvell-di, jffs2-modules-6.1.0-32-marvell-di, jfs-modules-6.1.0-32-marvell-di, fat-modules-6.1.0-32-marvell-di, minix-modules-6.1.0-32-marvell-di, squashfs-modules-6.1.0-32-marvell-di, udf-modules-6.1.0-32-marvell-di, fuse-modules-6.1.0-32-marvell-di, f2fs-modules-6.1.0-32-marvell-di, md-modules-6.1.0-32-marvell-di, multipath-modules-6.1.0-32-marvell-di, usb-modules-6.1.0-32-marvell-di, usb-storage-modules-6.1.0-32-marvell-di, fb-modules-6.1.0-32-marvell-di, input-modules-6.1.0-32-marvell-di, event-modules-6.1.0-32-marvell-di, mouse-modules-6.1.0-32-marvell-di, nic-usb-modules-6.1.0-32-marvell-di, sata-modules-6.1.0-32-marvell-di, crc-modules-6.1.0-32-marvell-di, crypto-modules-6.1.0-32-marvell-di, crypto-dm-modules-6.1.0-32-marvell-di, mmc-core-modules-6.1.0-32-marvell-di, mmc-modules-6.1.0-32-marvell-di,
 nbd-modules-6.1.0-32-marvell-di, uinput-modules-6.1.0-32-marvell-di, leds-modules-6.1.0-32-marvell-di, mtd-modules-6.1.0-32-marvell-di, mtd-core-modules-6.1.0-32-marvell-di, linux-headers-6.1.0-32-marvell, linux-image-6.1.0-32-marvell, linux-image-marvell, linux-headers-marvell, linux-image-6.1.0-32-marvell-dbg, linux-image-marvell-dbg, linux-headers-6.1.0-32-rpi, linux-image-6.1.0-32-rpi, linux-image-rpi, linux-headers-rpi, linux-image-6.1.0-32-rpi-dbg, linux-image-rpi-dbg, kernel-image-6.1.0-32-armmp-di, nic-modules-6.1.0-32-armmp-di, nic-wireless-modules-6.1.0-32-armmp-di, nic-shared-modules-6.1.0-32-armmp-di, usb-serial-modules-6.1.0-32-armmp-di, ppp-modules-6.1.0-32-armmp-di, pata-modules-6.1.0-32-armmp-di, cdrom-core-modules-6.1.0-32-armmp-di, scsi-core-modules-6.1.0-32-armmp-di, scsi-modules-6.1.0-32-armmp-di, scsi-nic-modules-6.1.0-32-armmp-di, loop-modules-6.1.0-32-armmp-di, btrfs-modules-6.1.0-32-armmp-di, ext4-modules-6.1.0-32-armmp-di,
 isofs-modules-6.1.0-32-armmp-di, jfs-modules-6.1.0-32-armmp-di, fat-modules-6.1.0-32-armmp-di, squashfs-modules-6.1.0-32-armmp-di, udf-modules-6.1.0-32-armmp-di, fuse-modules-6.1.0-32-armmp-di, f2fs-modules-6.1.0-32-armmp-di, md-modules-6.1.0-32-armmp-di, multipath-modules-6.1.0-32-armmp-di, usb-modules-6.1.0-32-armmp-di, usb-storage-modules-6.1.0-32-armmp-di, fb-modules-6.1.0-32-armmp-di, input-modules-6.1.0-32-armmp-di, event-modules-6.1.0-32-armmp-di, nic-usb-modules-6.1.0-32-armmp-di, sata-modules-6.1.0-32-armmp-di, i2c-modules-6.1.0-32-armmp-di, crc-modules-6.1.0-32-armmp-di, crypto-modules-6.1.0-32-armmp-di, crypto-dm-modules-6.1.0-32-armmp-di, efi-modules-6.1.0-32-armmp-di, ata-modules-6.1.0-32-armmp-di, mmc-modules-6.1.0-32-armmp-di, nbd-modules-6.1.0-32-armmp-di, speakup-modules-6.1.0-32-armmp-di, uinput-modules-6.1.0-32-armmp-di, sound-modules-6.1.0-32-armmp-di, leds-modules-6.1.0-32-armmp-di, mtd-modules-6.1.0-32-armmp-di, linux-headers-6.1.0-32-armmp,
 linux-image-6.1.0-32-armmp, linux-image-armmp, linux-headers-armmp, linux-image-6.1.0-32-armmp-dbg, linux-image-armmp-dbg, linux-headers-6.1.0-32-armmp-lpae, linux-image-6.1.0-32-armmp-lpae, linux-image-armmp-lpae, linux-headers-armmp-lpae, linux-image-6.1.0-32-armmp-lpae-dbg, linux-image-armmp-lpae-dbg, linux-headers-6.1.0-32-rt-armmp, linux-image-6.1.0-32-rt-armmp, linux-image-rt-armmp, linux-headers-rt-armmp, linux-image-6.1.0-32-rt-armmp-dbg, linux-image-rt-armmp-dbg, kernel-image-6.1.0-32-parisc-di, nic-modules-6.1.0-32-parisc-di, nic-shared-modules-6.1.0-32-parisc-di, serial-modules-6.1.0-32-parisc-di, usb-serial-modules-6.1.0-32-parisc-di, ppp-modules-6.1.0-32-parisc-di, pata-modules-6.1.0-32-parisc-di, cdrom-core-modules-6.1.0-32-parisc-di, scsi-core-modules-6.1.0-32-parisc-di, scsi-modules-6.1.0-32-parisc-di, loop-modules-6.1.0-32-parisc-di, btrfs-modules-6.1.0-32-parisc-di, ext4-modules-6.1.0-32-parisc-di, isofs-modules-6.1.0-32-parisc-di,
 jfs-modules-6.1.0-32-parisc-di, xfs-modules-6.1.0-32-parisc-di, fat-modules-6.1.0-32-parisc-di, squashfs-modules-6.1.0-32-parisc-di, fuse-modules-6.1.0-32-parisc-di, f2fs-modules-6.1.0-32-parisc-di, md-modules-6.1.0-32-parisc-di, multipath-modules-6.1.0-32-parisc-di, usb-modules-6.1.0-32-parisc-di, usb-storage-modules-6.1.0-32-parisc-di, input-modules-6.1.0-32-parisc-di, event-modules-6.1.0-32-parisc-di, mouse-modules-6.1.0-32-parisc-di, nic-usb-modules-6.1.0-32-parisc-di, sata-modules-6.1.0-32-parisc-di, i2c-modules-6.1.0-32-parisc-di, crc-modules-6.1.0-32-parisc-di, crypto-modules-6.1.0-32-parisc-di, crypto-dm-modules-6.1.0-32-parisc-di, ata-modules-6.1.0-32-parisc-di, nbd-modules-6.1.0-32-parisc-di, kernel-image-6.1.0-32-parisc64-di, nic-modules-6.1.0-32-parisc64-di, nic-shared-modules-6.1.0-32-parisc64-di, serial-modules-6.1.0-32-parisc64-di, usb-serial-modules-6.1.0-32-parisc64-di, ppp-modules-6.1.0-32-parisc64-di, pata-modules-6.1.0-32-parisc64-di,
 cdrom-core-modules-6.1.0-32-parisc64-di, scsi-core-modules-6.1.0-32-parisc64-di, scsi-modules-6.1.0-32-parisc64-di, loop-modules-6.1.0-32-parisc64-di, btrfs-modules-6.1.0-32-parisc64-di, ext4-modules-6.1.0-32-parisc64-di, isofs-modules-6.1.0-32-parisc64-di, jfs-modules-6.1.0-32-parisc64-di, xfs-modules-6.1.0-32-parisc64-di, fat-modules-6.1.0-32-parisc64-di, squashfs-modules-6.1.0-32-parisc64-di, fuse-modules-6.1.0-32-parisc64-di, f2fs-modules-6.1.0-32-parisc64-di, md-modules-6.1.0-32-parisc64-di, multipath-modules-6.1.0-32-parisc64-di, usb-modules-6.1.0-32-parisc64-di, usb-storage-modules-6.1.0-32-parisc64-di, fb-modules-6.1.0-32-parisc64-di, input-modules-6.1.0-32-parisc64-di, event-modules-6.1.0-32-parisc64-di, mouse-modules-6.1.0-32-parisc64-di, nic-usb-modules-6.1.0-32-parisc64-di, sata-modules-6.1.0-32-parisc64-di, crc-modules-6.1.0-32-parisc64-di, crypto-modules-6.1.0-32-parisc64-di, crypto-dm-modules-6.1.0-32-parisc64-di, ata-modules-6.1.0-32-parisc64-di,
 nbd-modules-6.1.0-32-parisc64-di, linux-headers-6.1.0-32-parisc, linux-image-6.1.0-32-parisc, linux-image-parisc, linux-headers-parisc, linux-image-6.1.0-32-parisc-dbg, linux-image-parisc-dbg, linux-headers-6.1.0-32-parisc64, linux-image-6.1.0-32-parisc64, linux-image-parisc64, linux-headers-parisc64, linux-image-6.1.0-32-parisc64-dbg, linux-image-parisc64-dbg, kernel-image-6.1.0-32-686-di, nic-modules-6.1.0-32-686-di, nic-wireless-modules-6.1.0-32-686-di, nic-shared-modules-6.1.0-32-686-di, serial-modules-6.1.0-32-686-di, usb-serial-modules-6.1.0-32-686-di, ppp-modules-6.1.0-32-686-di, pata-modules-6.1.0-32-686-di, cdrom-core-modules-6.1.0-32-686-di, firewire-core-modules-6.1.0-32-686-di, scsi-core-modules-6.1.0-32-686-di, scsi-modules-6.1.0-32-686-di, scsi-nic-modules-6.1.0-32-686-di, loop-modules-6.1.0-32-686-di, btrfs-modules-6.1.0-32-686-di, ext4-modules-6.1.0-32-686-di, isofs-modules-6.1.0-32-686-di, jfs-modules-6.1.0-32-686-di, xfs-modules-6.1.0-32-686-di,
 fat-modules-6.1.0-32-686-di, squashfs-modules-6.1.0-32-686-di, udf-modules-6.1.0-32-686-di, fuse-modules-6.1.0-32-686-di, f2fs-modules-6.1.0-32-686-di, md-modules-6.1.0-32-686-di, multipath-modules-6.1.0-32-686-di, usb-modules-6.1.0-32-686-di, usb-storage-modules-6.1.0-32-686-di, pcmcia-storage-modules-6.1.0-32-686-di, fb-modules-6.1.0-32-686-di, input-modules-6.1.0-32-686-di, event-modules-6.1.0-32-686-di, mouse-modules-6.1.0-32-686-di, nic-pcmcia-modules-6.1.0-32-686-di, pcmcia-modules-6.1.0-32-686-di, nic-usb-modules-6.1.0-32-686-di, sata-modules-6.1.0-32-686-di, acpi-modules-6.1.0-32-686-di, i2c-modules-6.1.0-32-686-di, crc-modules-6.1.0-32-686-di, crypto-modules-6.1.0-32-686-di, crypto-dm-modules-6.1.0-32-686-di, efi-modules-6.1.0-32-686-di, ata-modules-6.1.0-32-686-di, mmc-core-modules-6.1.0-32-686-di, mmc-modules-6.1.0-32-686-di, nbd-modules-6.1.0-32-686-di, speakup-modules-6.1.0-32-686-di, uinput-modules-6.1.0-32-686-di, sound-modules-6.1.0-32-686-di,
 mtd-core-modules-6.1.0-32-686-di, rfkill-modules-6.1.0-32-686-di, kernel-image-6.1.0-32-686-pae-di, nic-modules-6.1.0-32-686-pae-di, nic-wireless-modules-6.1.0-32-686-pae-di, nic-shared-modules-6.1.0-32-686-pae-di, serial-modules-6.1.0-32-686-pae-di, usb-serial-modules-6.1.0-32-686-pae-di, ppp-modules-6.1.0-32-686-pae-di, pata-modules-6.1.0-32-686-pae-di, cdrom-core-modules-6.1.0-32-686-pae-di, firewire-core-modules-6.1.0-32-686-pae-di, scsi-core-modules-6.1.0-32-686-pae-di, scsi-modules-6.1.0-32-686-pae-di, scsi-nic-modules-6.1.0-32-686-pae-di, loop-modules-6.1.0-32-686-pae-di, btrfs-modules-6.1.0-32-686-pae-di, ext4-modules-6.1.0-32-686-pae-di, isofs-modules-6.1.0-32-686-pae-di, jfs-modules-6.1.0-32-686-pae-di, xfs-modules-6.1.0-32-686-pae-di, fat-modules-6.1.0-32-686-pae-di, squashfs-modules-6.1.0-32-686-pae-di, udf-modules-6.1.0-32-686-pae-di, fuse-modules-6.1.0-32-686-pae-di, f2fs-modules-6.1.0-32-686-pae-di, md-modules-6.1.0-32-686-pae-di,
 multipath-modules-6.1.0-32-686-pae-di, usb-modules-6.1.0-32-686-pae-di, usb-storage-modules-6.1.0-32-686-pae-di, pcmcia-storage-modules-6.1.0-32-686-pae-di, fb-modules-6.1.0-32-686-pae-di, input-modules-6.1.0-32-686-pae-di, event-modules-6.1.0-32-686-pae-di, mouse-modules-6.1.0-32-686-pae-di, nic-pcmcia-modules-6.1.0-32-686-pae-di, pcmcia-modules-6.1.0-32-686-pae-di, nic-usb-modules-6.1.0-32-686-pae-di, sata-modules-6.1.0-32-686-pae-di, acpi-modules-6.1.0-32-686-pae-di, i2c-modules-6.1.0-32-686-pae-di, crc-modules-6.1.0-32-686-pae-di, crypto-modules-6.1.0-32-686-pae-di, crypto-dm-modules-6.1.0-32-686-pae-di, efi-modules-6.1.0-32-686-pae-di, ata-modules-6.1.0-32-686-pae-di, mmc-core-modules-6.1.0-32-686-pae-di, mmc-modules-6.1.0-32-686-pae-di, nbd-modules-6.1.0-32-686-pae-di, speakup-modules-6.1.0-32-686-pae-di, uinput-modules-6.1.0-32-686-pae-di, sound-modules-6.1.0-32-686-pae-di, mtd-core-modules-6.1.0-32-686-pae-di, rfkill-modules-6.1.0-32-686-pae-di,
 linux-image-i386-signed-template, linux-headers-6.1.0-32-686, linux-image-6.1.0-32-686-unsigned, linux-image-6.1.0-32-686-dbg, linux-image-686-dbg, linux-headers-6.1.0-32-686-pae, linux-image-6.1.0-32-686-pae-unsigned, linux-image-6.1.0-32-686-pae-dbg, linux-image-686-pae-dbg, linux-headers-6.1.0-32-rt-686-pae, linux-image-6.1.0-32-rt-686-pae-unsigned, linux-image-6.1.0-32-rt-686-pae-dbg, linux-image-rt-686-pae-dbg, kernel-image-6.1.0-32-itanium-di, nic-modules-6.1.0-32-itanium-di, nic-shared-modules-6.1.0-32-itanium-di, serial-modules-6.1.0-32-itanium-di, usb-serial-modules-6.1.0-32-itanium-di, ppp-modules-6.1.0-32-itanium-di, pata-modules-6.1.0-32-itanium-di, cdrom-core-modules-6.1.0-32-itanium-di, firewire-core-modules-6.1.0-32-itanium-di, scsi-core-modules-6.1.0-32-itanium-di, scsi-modules-6.1.0-32-itanium-di, scsi-nic-modules-6.1.0-32-itanium-di, loop-modules-6.1.0-32-itanium-di, btrfs-modules-6.1.0-32-itanium-di, ext4-modules-6.1.0-32-itanium-di,
 isofs-modules-6.1.0-32-itanium-di, jfs-modules-6.1.0-32-itanium-di, xfs-modules-6.1.0-32-itanium-di, fat-modules-6.1.0-32-itanium-di, squashfs-modules-6.1.0-32-itanium-di, udf-modules-6.1.0-32-itanium-di, fuse-modules-6.1.0-32-itanium-di, f2fs-modules-6.1.0-32-itanium-di, md-modules-6.1.0-32-itanium-di, multipath-modules-6.1.0-32-itanium-di, usb-modules-6.1.0-32-itanium-di, usb-storage-modules-6.1.0-32-itanium-di, fb-modules-6.1.0-32-itanium-di, input-modules-6.1.0-32-itanium-di, event-modules-6.1.0-32-itanium-di, mouse-modules-6.1.0-32-itanium-di, pcmcia-modules-6.1.0-32-itanium-di, nic-usb-modules-6.1.0-32-itanium-di, sata-modules-6.1.0-32-itanium-di, i2c-modules-6.1.0-32-itanium-di, crc-modules-6.1.0-32-itanium-di, crypto-modules-6.1.0-32-itanium-di, crypto-dm-modules-6.1.0-32-itanium-di, ata-modules-6.1.0-32-itanium-di, nbd-modules-6.1.0-32-itanium-di, uinput-modules-6.1.0-32-itanium-di, mtd-core-modules-6.1.0-32-itanium-di, linux-headers-6.1.0-32-itanium,
 linux-image-6.1.0-32-itanium, linux-image-itanium, linux-headers-itanium, linux-image-6.1.0-32-itanium-dbg, linux-image-itanium-dbg, linux-headers-6.1.0-32-mckinley, linux-image-6.1.0-32-mckinley, linux-image-mckinley, linux-headers-mckinley, linux-image-6.1.0-32-mckinley-dbg, linux-image-mckinley-dbg, kernel-image-6.1.0-32-m68k-di, nic-modules-6.1.0-32-m68k-di, nic-shared-modules-6.1.0-32-m68k-di, ppp-modules-6.1.0-32-m68k-di, pata-modules-6.1.0-32-m68k-di, cdrom-core-modules-6.1.0-32-m68k-di, scsi-core-modules-6.1.0-32-m68k-di, scsi-modules-6.1.0-32-m68k-di, loop-modules-6.1.0-32-m68k-di, btrfs-modules-6.1.0-32-m68k-di, ext4-modules-6.1.0-32-m68k-di, isofs-modules-6.1.0-32-m68k-di, fat-modules-6.1.0-32-m68k-di, hfs-modules-6.1.0-32-m68k-di, affs-modules-6.1.0-32-m68k-di, squashfs-modules-6.1.0-32-m68k-di, udf-modules-6.1.0-32-m68k-di, fuse-modules-6.1.0-32-m68k-di, md-modules-6.1.0-32-m68k-di, crc-modules-6.1.0-32-m68k-di, crypto-modules-6.1.0-32-m68k-di,
 ata-modules-6.1.0-32-m68k-di, nbd-modules-6.1.0-32-m68k-di, linux-headers-6.1.0-32-m68k, linux-image-6.1.0-32-m68k, linux-image-m68k, linux-headers-m68k, linux-image-6.1.0-32-m68k-dbg, linux-image-m68k-dbg, kernel-image-6.1.0-32-4kc-malta-di, nic-modules-6.1.0-32-4kc-malta-di, nic-wireless-modules-6.1.0-32-4kc-malta-di, nic-shared-modules-6.1.0-32-4kc-malta-di, usb-serial-modules-6.1.0-32-4kc-malta-di, ppp-modules-6.1.0-32-4kc-malta-di, pata-modules-6.1.0-32-4kc-malta-di, cdrom-core-modules-6.1.0-32-4kc-malta-di, firewire-core-modules-6.1.0-32-4kc-malta-di, scsi-core-modules-6.1.0-32-4kc-malta-di, scsi-modules-6.1.0-32-4kc-malta-di, scsi-nic-modules-6.1.0-32-4kc-malta-di, loop-modules-6.1.0-32-4kc-malta-di, btrfs-modules-6.1.0-32-4kc-malta-di, ext4-modules-6.1.0-32-4kc-malta-di, isofs-modules-6.1.0-32-4kc-malta-di, jfs-modules-6.1.0-32-4kc-malta-di, xfs-modules-6.1.0-32-4kc-malta-di, fat-modules-6.1.0-32-4kc-malta-di, affs-modules-6.1.0-32-4kc-malta-di,
 minix-modules-6.1.0-32-4kc-malta-di, nfs-modules-6.1.0-32-4kc-malta-di, squashfs-modules-6.1.0-32-4kc-malta-di, udf-modules-6.1.0-32-4kc-malta-di, fuse-modules-6.1.0-32-4kc-malta-di, f2fs-modules-6.1.0-32-4kc-malta-di, md-modules-6.1.0-32-4kc-malta-di, multipath-modules-6.1.0-32-4kc-malta-di, usb-modules-6.1.0-32-4kc-malta-di, usb-storage-modules-6.1.0-32-4kc-malta-di, fb-modules-6.1.0-32-4kc-malta-di, input-modules-6.1.0-32-4kc-malta-di, event-modules-6.1.0-32-4kc-malta-di, mouse-modules-6.1.0-32-4kc-malta-di, nic-usb-modules-6.1.0-32-4kc-malta-di, sata-modules-6.1.0-32-4kc-malta-di, crc-modules-6.1.0-32-4kc-malta-di, crypto-modules-6.1.0-32-4kc-malta-di, crypto-dm-modules-6.1.0-32-4kc-malta-di, ata-modules-6.1.0-32-4kc-malta-di, mmc-core-modules-6.1.0-32-4kc-malta-di, mmc-modules-6.1.0-32-4kc-malta-di, nbd-modules-6.1.0-32-4kc-malta-di, speakup-modules-6.1.0-32-4kc-malta-di, sound-modules-6.1.0-32-4kc-malta-di, kernel-image-6.1.0-32-mips32r2eb-di,
 nic-modules-6.1.0-32-mips32r2eb-di, nic-wireless-modules-6.1.0-32-mips32r2eb-di, nic-shared-modules-6.1.0-32-mips32r2eb-di, usb-serial-modules-6.1.0-32-mips32r2eb-di, ppp-modules-6.1.0-32-mips32r2eb-di, pata-modules-6.1.0-32-mips32r2eb-di, cdrom-core-modules-6.1.0-32-mips32r2eb-di, firewire-core-modules-6.1.0-32-mips32r2eb-di, scsi-core-modules-6.1.0-32-mips32r2eb-di, scsi-modules-6.1.0-32-mips32r2eb-di, scsi-nic-modules-6.1.0-32-mips32r2eb-di, loop-modules-6.1.0-32-mips32r2eb-di, btrfs-modules-6.1.0-32-mips32r2eb-di, ext4-modules-6.1.0-32-mips32r2eb-di, isofs-modules-6.1.0-32-mips32r2eb-di, jfs-modules-6.1.0-32-mips32r2eb-di, xfs-modules-6.1.0-32-mips32r2eb-di, fat-modules-6.1.0-32-mips32r2eb-di, affs-modules-6.1.0-32-mips32r2eb-di, minix-modules-6.1.0-32-mips32r2eb-di, nfs-modules-6.1.0-32-mips32r2eb-di, squashfs-modules-6.1.0-32-mips32r2eb-di, udf-modules-6.1.0-32-mips32r2eb-di, fuse-modules-6.1.0-32-mips32r2eb-di, f2fs-modules-6.1.0-32-mips32r2eb-di,
 md-modules-6.1.0-32-mips32r2eb-di, multipath-modules-6.1.0-32-mips32r2eb-di, usb-modules-6.1.0-32-mips32r2eb-di, usb-storage-modules-6.1.0-32-mips32r2eb-di, fb-modules-6.1.0-32-mips32r2eb-di, input-modules-6.1.0-32-mips32r2eb-di, event-modules-6.1.0-32-mips32r2eb-di, mouse-modules-6.1.0-32-mips32r2eb-di, nic-usb-modules-6.1.0-32-mips32r2eb-di, sata-modules-6.1.0-32-mips32r2eb-di, crc-modules-6.1.0-32-mips32r2eb-di, crypto-modules-6.1.0-32-mips32r2eb-di, crypto-dm-modules-6.1.0-32-mips32r2eb-di, ata-modules-6.1.0-32-mips32r2eb-di, mmc-core-modules-6.1.0-32-mips32r2eb-di, mmc-modules-6.1.0-32-mips32r2eb-di, nbd-modules-6.1.0-32-mips32r2eb-di, speakup-modules-6.1.0-32-mips32r2eb-di, sound-modules-6.1.0-32-mips32r2eb-di, kernel-image-6.1.0-32-octeon-di, nic-modules-6.1.0-32-octeon-di, nic-wireless-modules-6.1.0-32-octeon-di, nic-shared-modules-6.1.0-32-octeon-di, usb-serial-modules-6.1.0-32-octeon-di, ppp-modules-6.1.0-32-octeon-di, pata-modules-6.1.0-32-octeon-di,
 cdrom-core-modules-6.1.0-32-octeon-di, firewire-core-modules-6.1.0-32-octeon-di, scsi-core-modules-6.1.0-32-octeon-di, scsi-modules-6.1.0-32-octeon-di, scsi-nic-modules-6.1.0-32-octeon-di, loop-modules-6.1.0-32-octeon-di, btrfs-modules-6.1.0-32-octeon-di, ext4-modules-6.1.0-32-octeon-di, isofs-modules-6.1.0-32-octeon-di, jfs-modules-6.1.0-32-octeon-di, xfs-modules-6.1.0-32-octeon-di, fat-modules-6.1.0-32-octeon-di, affs-modules-6.1.0-32-octeon-di, minix-modules-6.1.0-32-octeon-di, nfs-modules-6.1.0-32-octeon-di, squashfs-modules-6.1.0-32-octeon-di, udf-modules-6.1.0-32-octeon-di, fuse-modules-6.1.0-32-octeon-di, f2fs-modules-6.1.0-32-octeon-di, md-modules-6.1.0-32-octeon-di, multipath-modules-6.1.0-32-octeon-di, usb-modules-6.1.0-32-octeon-di, usb-storage-modules-6.1.0-32-octeon-di, fb-modules-6.1.0-32-octeon-di, input-modules-6.1.0-32-octeon-di, event-modules-6.1.0-32-octeon-di, mouse-modules-6.1.0-32-octeon-di, nic-usb-modules-6.1.0-32-octeon-di,
 sata-modules-6.1.0-32-octeon-di, crc-modules-6.1.0-32-octeon-di, crypto-modules-6.1.0-32-octeon-di, crypto-dm-modules-6.1.0-32-octeon-di, ata-modules-6.1.0-32-octeon-di, mmc-core-modules-6.1.0-32-octeon-di, mmc-modules-6.1.0-32-octeon-di, nbd-modules-6.1.0-32-octeon-di, speakup-modules-6.1.0-32-octeon-di, sound-modules-6.1.0-32-octeon-di, linux-headers-6.1.0-32-4kc-malta, linux-image-6.1.0-32-4kc-malta, linux-image-4kc-malta, linux-headers-4kc-malta, linux-image-6.1.0-32-4kc-malta-dbg, linux-image-4kc-malta-dbg, linux-headers-6.1.0-32-mips32r2eb, linux-image-6.1.0-32-mips32r2eb, linux-image-mips32r2eb, linux-headers-mips32r2eb, linux-image-6.1.0-32-mips32r2eb-dbg, linux-image-mips32r2eb-dbg, linux-headers-6.1.0-32-octeon, linux-image-6.1.0-32-octeon, linux-image-octeon, linux-headers-octeon, linux-image-6.1.0-32-octeon-dbg, linux-image-octeon-dbg, kernel-image-6.1.0-32-5kc-malta-di, nic-modules-6.1.0-32-5kc-malta-di, nic-wireless-modules-6.1.0-32-5kc-malta-di,
 nic-shared-modules-6.1.0-32-5kc-malta-di, usb-serial-modules-6.1.0-32-5kc-malta-di, ppp-modules-6.1.0-32-5kc-malta-di, pata-modules-6.1.0-32-5kc-malta-di, cdrom-core-modules-6.1.0-32-5kc-malta-di, firewire-core-modules-6.1.0-32-5kc-malta-di, scsi-core-modules-6.1.0-32-5kc-malta-di, scsi-modules-6.1.0-32-5kc-malta-di, scsi-nic-modules-6.1.0-32-5kc-malta-di, loop-modules-6.1.0-32-5kc-malta-di, btrfs-modules-6.1.0-32-5kc-malta-di, ext4-modules-6.1.0-32-5kc-malta-di, isofs-modules-6.1.0-32-5kc-malta-di, jfs-modules-6.1.0-32-5kc-malta-di, xfs-modules-6.1.0-32-5kc-malta-di, fat-modules-6.1.0-32-5kc-malta-di, affs-modules-6.1.0-32-5kc-malta-di, minix-modules-6.1.0-32-5kc-malta-di, nfs-modules-6.1.0-32-5kc-malta-di, squashfs-modules-6.1.0-32-5kc-malta-di, udf-modules-6.1.0-32-5kc-malta-di, fuse-modules-6.1.0-32-5kc-malta-di, f2fs-modules-6.1.0-32-5kc-malta-di, md-modules-6.1.0-32-5kc-malta-di, multipath-modules-6.1.0-32-5kc-malta-di, usb-modules-6.1.0-32-5kc-malta-di,
 usb-storage-modules-6.1.0-32-5kc-malta-di, fb-modules-6.1.0-32-5kc-malta-di, input-modules-6.1.0-32-5kc-malta-di, event-modules-6.1.0-32-5kc-malta-di, mouse-modules-6.1.0-32-5kc-malta-di, nic-usb-modules-6.1.0-32-5kc-malta-di, sata-modules-6.1.0-32-5kc-malta-di, crc-modules-6.1.0-32-5kc-malta-di, crypto-modules-6.1.0-32-5kc-malta-di, crypto-dm-modules-6.1.0-32-5kc-malta-di, ata-modules-6.1.0-32-5kc-malta-di, mmc-core-modules-6.1.0-32-5kc-malta-di, mmc-modules-6.1.0-32-5kc-malta-di, nbd-modules-6.1.0-32-5kc-malta-di, speakup-modules-6.1.0-32-5kc-malta-di, sound-modules-6.1.0-32-5kc-malta-di, kernel-image-6.1.0-32-mips64r2eb-di, nic-modules-6.1.0-32-mips64r2eb-di, nic-wireless-modules-6.1.0-32-mips64r2eb-di, nic-shared-modules-6.1.0-32-mips64r2eb-di, usb-serial-modules-6.1.0-32-mips64r2eb-di, ppp-modules-6.1.0-32-mips64r2eb-di, pata-modules-6.1.0-32-mips64r2eb-di, cdrom-core-modules-6.1.0-32-mips64r2eb-di, firewire-core-modules-6.1.0-32-mips64r2eb-di,
 scsi-core-modules-6.1.0-32-mips64r2eb-di, scsi-modules-6.1.0-32-mips64r2eb-di, scsi-nic-modules-6.1.0-32-mips64r2eb-di, loop-modules-6.1.0-32-mips64r2eb-di, btrfs-modules-6.1.0-32-mips64r2eb-di, ext4-modules-6.1.0-32-mips64r2eb-di, isofs-modules-6.1.0-32-mips64r2eb-di, jfs-modules-6.1.0-32-mips64r2eb-di, xfs-modules-6.1.0-32-mips64r2eb-di, fat-modules-6.1.0-32-mips64r2eb-di, affs-modules-6.1.0-32-mips64r2eb-di, minix-modules-6.1.0-32-mips64r2eb-di, nfs-modules-6.1.0-32-mips64r2eb-di, squashfs-modules-6.1.0-32-mips64r2eb-di, udf-modules-6.1.0-32-mips64r2eb-di, fuse-modules-6.1.0-32-mips64r2eb-di, f2fs-modules-6.1.0-32-mips64r2eb-di, md-modules-6.1.0-32-mips64r2eb-di, multipath-modules-6.1.0-32-mips64r2eb-di, usb-modules-6.1.0-32-mips64r2eb-di, usb-storage-modules-6.1.0-32-mips64r2eb-di, fb-modules-6.1.0-32-mips64r2eb-di, input-modules-6.1.0-32-mips64r2eb-di, event-modules-6.1.0-32-mips64r2eb-di, mouse-modules-6.1.0-32-mips64r2eb-di,
 nic-usb-modules-6.1.0-32-mips64r2eb-di, sata-modules-6.1.0-32-mips64r2eb-di, crc-modules-6.1.0-32-mips64r2eb-di, crypto-modules-6.1.0-32-mips64r2eb-di, crypto-dm-modules-6.1.0-32-mips64r2eb-di, ata-modules-6.1.0-32-mips64r2eb-di, mmc-core-modules-6.1.0-32-mips64r2eb-di, mmc-modules-6.1.0-32-mips64r2eb-di, nbd-modules-6.1.0-32-mips64r2eb-di, speakup-modules-6.1.0-32-mips64r2eb-di, sound-modules-6.1.0-32-mips64r2eb-di, linux-headers-6.1.0-32-5kc-malta, linux-image-6.1.0-32-5kc-malta, linux-image-5kc-malta, linux-headers-5kc-malta, linux-image-6.1.0-32-5kc-malta-dbg, linux-image-5kc-malta-dbg, linux-headers-6.1.0-32-mips64r2eb, linux-image-6.1.0-32-mips64r2eb, linux-image-mips64r2eb, linux-headers-mips64r2eb, linux-image-6.1.0-32-mips64r2eb-dbg, linux-image-mips64r2eb-dbg, kernel-image-6.1.0-32-loongson-3-di, nic-modules-6.1.0-32-loongson-3-di, nic-wireless-modules-6.1.0-32-loongson-3-di, nic-shared-modules-6.1.0-32-loongson-3-di,
 usb-serial-modules-6.1.0-32-loongson-3-di, ppp-modules-6.1.0-32-loongson-3-di, pata-modules-6.1.0-32-loongson-3-di, cdrom-core-modules-6.1.0-32-loongson-3-di, firewire-core-modules-6.1.0-32-loongson-3-di, scsi-core-modules-6.1.0-32-loongson-3-di, scsi-modules-6.1.0-32-loongson-3-di, scsi-nic-modules-6.1.0-32-loongson-3-di, loop-modules-6.1.0-32-loongson-3-di, btrfs-modules-6.1.0-32-loongson-3-di, ext4-modules-6.1.0-32-loongson-3-di, isofs-modules-6.1.0-32-loongson-3-di, jfs-modules-6.1.0-32-loongson-3-di, xfs-modules-6.1.0-32-loongson-3-di, fat-modules-6.1.0-32-loongson-3-di, affs-modules-6.1.0-32-loongson-3-di, minix-modules-6.1.0-32-loongson-3-di, nfs-modules-6.1.0-32-loongson-3-di, squashfs-modules-6.1.0-32-loongson-3-di, udf-modules-6.1.0-32-loongson-3-di, fuse-modules-6.1.0-32-loongson-3-di, f2fs-modules-6.1.0-32-loongson-3-di, md-modules-6.1.0-32-loongson-3-di, multipath-modules-6.1.0-32-loongson-3-di, usb-modules-6.1.0-32-loongson-3-di,
 usb-storage-modules-6.1.0-32-loongson-3-di, fb-modules-6.1.0-32-loongson-3-di, input-modules-6.1.0-32-loongson-3-di, event-modules-6.1.0-32-loongson-3-di, mouse-modules-6.1.0-32-loongson-3-di, nic-usb-modules-6.1.0-32-loongson-3-di, sata-modules-6.1.0-32-loongson-3-di, crc-modules-6.1.0-32-loongson-3-di, crypto-modules-6.1.0-32-loongson-3-di, crypto-dm-modules-6.1.0-32-loongson-3-di, ata-modules-6.1.0-32-loongson-3-di, mmc-core-modules-6.1.0-32-loongson-3-di, mmc-modules-6.1.0-32-loongson-3-di, nbd-modules-6.1.0-32-loongson-3-di, speakup-modules-6.1.0-32-loongson-3-di, sound-modules-6.1.0-32-loongson-3-di, kernel-image-6.1.0-32-mips64r2el-di, nic-modules-6.1.0-32-mips64r2el-di, nic-wireless-modules-6.1.0-32-mips64r2el-di, nic-shared-modules-6.1.0-32-mips64r2el-di, usb-serial-modules-6.1.0-32-mips64r2el-di, ppp-modules-6.1.0-32-mips64r2el-di, pata-modules-6.1.0-32-mips64r2el-di, cdrom-core-modules-6.1.0-32-mips64r2el-di, firewire-core-modules-6.1.0-32-mips64r2el-di,
 scsi-core-modules-6.1.0-32-mips64r2el-di, scsi-modules-6.1.0-32-mips64r2el-di, scsi-nic-modules-6.1.0-32-mips64r2el-di, loop-modules-6.1.0-32-mips64r2el-di, btrfs-modules-6.1.0-32-mips64r2el-di, ext4-modules-6.1.0-32-mips64r2el-di, isofs-modules-6.1.0-32-mips64r2el-di, jfs-modules-6.1.0-32-mips64r2el-di, xfs-modules-6.1.0-32-mips64r2el-di, fat-modules-6.1.0-32-mips64r2el-di, affs-modules-6.1.0-32-mips64r2el-di, minix-modules-6.1.0-32-mips64r2el-di, nfs-modules-6.1.0-32-mips64r2el-di, squashfs-modules-6.1.0-32-mips64r2el-di, udf-modules-6.1.0-32-mips64r2el-di, fuse-modules-6.1.0-32-mips64r2el-di, f2fs-modules-6.1.0-32-mips64r2el-di, md-modules-6.1.0-32-mips64r2el-di, multipath-modules-6.1.0-32-mips64r2el-di, usb-modules-6.1.0-32-mips64r2el-di, usb-storage-modules-6.1.0-32-mips64r2el-di, fb-modules-6.1.0-32-mips64r2el-di, input-modules-6.1.0-32-mips64r2el-di, event-modules-6.1.0-32-mips64r2el-di, mouse-modules-6.1.0-32-mips64r2el-di,
 nic-usb-modules-6.1.0-32-mips64r2el-di, sata-modules-6.1.0-32-mips64r2el-di, crc-modules-6.1.0-32-mips64r2el-di, crypto-modules-6.1.0-32-mips64r2el-di, crypto-dm-modules-6.1.0-32-mips64r2el-di, ata-modules-6.1.0-32-mips64r2el-di, mmc-core-modules-6.1.0-32-mips64r2el-di, mmc-modules-6.1.0-32-mips64r2el-di, nbd-modules-6.1.0-32-mips64r2el-di, speakup-modules-6.1.0-32-mips64r2el-di, sound-modules-6.1.0-32-mips64r2el-di, linux-headers-6.1.0-32-mips64r2el, linux-image-6.1.0-32-mips64r2el, linux-image-mips64r2el, linux-headers-mips64r2el, linux-image-6.1.0-32-mips64r2el-dbg, linux-image-mips64r2el-dbg, linux-headers-6.1.0-32-loongson-3, linux-image-6.1.0-32-loongson-3, linux-image-loongson-3, linux-headers-loongson-3, linux-image-6.1.0-32-loongson-3-dbg, linux-image-loongson-3-dbg, kernel-image-6.1.0-32-mips64r6eb-di, nic-modules-6.1.0-32-mips64r6eb-di, nic-wireless-modules-6.1.0-32-mips64r6eb-di, nic-shared-modules-6.1.0-32-mips64r6eb-di,
 usb-serial-modules-6.1.0-32-mips64r6eb-di, ppp-modules-6.1.0-32-mips64r6eb-di, pata-modules-6.1.0-32-mips64r6eb-di, cdrom-core-modules-6.1.0-32-mips64r6eb-di, firewire-core-modules-6.1.0-32-mips64r6eb-di, scsi-core-modules-6.1.0-32-mips64r6eb-di, scsi-modules-6.1.0-32-mips64r6eb-di, scsi-nic-modules-6.1.0-32-mips64r6eb-di, loop-modules-6.1.0-32-mips64r6eb-di, btrfs-modules-6.1.0-32-mips64r6eb-di, ext4-modules-6.1.0-32-mips64r6eb-di, isofs-modules-6.1.0-32-mips64r6eb-di, jfs-modules-6.1.0-32-mips64r6eb-di, xfs-modules-6.1.0-32-mips64r6eb-di, fat-modules-6.1.0-32-mips64r6eb-di, affs-modules-6.1.0-32-mips64r6eb-di, minix-modules-6.1.0-32-mips64r6eb-di, nfs-modules-6.1.0-32-mips64r6eb-di, squashfs-modules-6.1.0-32-mips64r6eb-di, udf-modules-6.1.0-32-mips64r6eb-di, fuse-modules-6.1.0-32-mips64r6eb-di, f2fs-modules-6.1.0-32-mips64r6eb-di, md-modules-6.1.0-32-mips64r6eb-di, multipath-modules-6.1.0-32-mips64r6eb-di, usb-modules-6.1.0-32-mips64r6eb-di,
 usb-storage-modules-6.1.0-32-mips64r6eb-di, fb-modules-6.1.0-32-mips64r6eb-di, input-modules-6.1.0-32-mips64r6eb-di, event-modules-6.1.0-32-mips64r6eb-di, mouse-modules-6.1.0-32-mips64r6eb-di, nic-usb-modules-6.1.0-32-mips64r6eb-di, sata-modules-6.1.0-32-mips64r6eb-di, crc-modules-6.1.0-32-mips64r6eb-di, crypto-modules-6.1.0-32-mips64r6eb-di, crypto-dm-modules-6.1.0-32-mips64r6eb-di, ata-modules-6.1.0-32-mips64r6eb-di, mmc-core-modules-6.1.0-32-mips64r6eb-di, mmc-modules-6.1.0-32-mips64r6eb-di, nbd-modules-6.1.0-32-mips64r6eb-di, speakup-modules-6.1.0-32-mips64r6eb-di, sound-modules-6.1.0-32-mips64r6eb-di, linux-headers-6.1.0-32-mips64r6eb, linux-image-6.1.0-32-mips64r6eb, linux-image-mips64r6eb, linux-headers-mips64r6eb, linux-image-6.1.0-32-mips64r6eb-dbg, linux-image-mips64r6eb-dbg, kernel-image-6.1.0-32-mips64r6el-di, nic-modules-6.1.0-32-mips64r6el-di, nic-wireless-modules-6.1.0-32-mips64r6el-di, nic-shared-modules-6.1.0-32-mips64r6el-di,
 usb-serial-modules-6.1.0-32-mips64r6el-di, ppp-modules-6.1.0-32-mips64r6el-di, pata-modules-6.1.0-32-mips64r6el-di, cdrom-core-modules-6.1.0-32-mips64r6el-di, firewire-core-modules-6.1.0-32-mips64r6el-di, scsi-core-modules-6.1.0-32-mips64r6el-di, scsi-modules-6.1.0-32-mips64r6el-di, scsi-nic-modules-6.1.0-32-mips64r6el-di, loop-modules-6.1.0-32-mips64r6el-di, btrfs-modules-6.1.0-32-mips64r6el-di, ext4-modules-6.1.0-32-mips64r6el-di, isofs-modules-6.1.0-32-mips64r6el-di, jfs-modules-6.1.0-32-mips64r6el-di, xfs-modules-6.1.0-32-mips64r6el-di, fat-modules-6.1.0-32-mips64r6el-di, affs-modules-6.1.0-32-mips64r6el-di, minix-modules-6.1.0-32-mips64r6el-di, nfs-modules-6.1.0-32-mips64r6el-di, squashfs-modules-6.1.0-32-mips64r6el-di, udf-modules-6.1.0-32-mips64r6el-di, fuse-modules-6.1.0-32-mips64r6el-di, f2fs-modules-6.1.0-32-mips64r6el-di, md-modules-6.1.0-32-mips64r6el-di, multipath-modules-6.1.0-32-mips64r6el-di, usb-modules-6.1.0-32-mips64r6el-di,
 usb-storage-modules-6.1.0-32-mips64r6el-di, fb-modules-6.1.0-32-mips64r6el-di, input-modules-6.1.0-32-mips64r6el-di, event-modules-6.1.0-32-mips64r6el-di, mouse-modules-6.1.0-32-mips64r6el-di, nic-usb-modules-6.1.0-32-mips64r6el-di, sata-modules-6.1.0-32-mips64r6el-di, crc-modules-6.1.0-32-mips64r6el-di, crypto-modules-6.1.0-32-mips64r6el-di, crypto-dm-modules-6.1.0-32-mips64r6el-di, ata-modules-6.1.0-32-mips64r6el-di, mmc-core-modules-6.1.0-32-mips64r6el-di, mmc-modules-6.1.0-32-mips64r6el-di, nbd-modules-6.1.0-32-mips64r6el-di, speakup-modules-6.1.0-32-mips64r6el-di, sound-modules-6.1.0-32-mips64r6el-di, linux-headers-6.1.0-32-mips64r6el, linux-image-6.1.0-32-mips64r6el, linux-image-mips64r6el, linux-headers-mips64r6el, linux-image-6.1.0-32-mips64r6el-dbg, linux-image-mips64r6el-dbg, kernel-image-6.1.0-32-mips32r2el-di, nic-modules-6.1.0-32-mips32r2el-di, nic-wireless-modules-6.1.0-32-mips32r2el-di, nic-shared-modules-6.1.0-32-mips32r2el-di,
 usb-serial-modules-6.1.0-32-mips32r2el-di, ppp-modules-6.1.0-32-mips32r2el-di, pata-modules-6.1.0-32-mips32r2el-di, cdrom-core-modules-6.1.0-32-mips32r2el-di, firewire-core-modules-6.1.0-32-mips32r2el-di, scsi-core-modules-6.1.0-32-mips32r2el-di, scsi-modules-6.1.0-32-mips32r2el-di, scsi-nic-modules-6.1.0-32-mips32r2el-di, loop-modules-6.1.0-32-mips32r2el-di, btrfs-modules-6.1.0-32-mips32r2el-di, ext4-modules-6.1.0-32-mips32r2el-di, isofs-modules-6.1.0-32-mips32r2el-di, jfs-modules-6.1.0-32-mips32r2el-di, xfs-modules-6.1.0-32-mips32r2el-di, fat-modules-6.1.0-32-mips32r2el-di, affs-modules-6.1.0-32-mips32r2el-di, minix-modules-6.1.0-32-mips32r2el-di, nfs-modules-6.1.0-32-mips32r2el-di, squashfs-modules-6.1.0-32-mips32r2el-di, udf-modules-6.1.0-32-mips32r2el-di, fuse-modules-6.1.0-32-mips32r2el-di, f2fs-modules-6.1.0-32-mips32r2el-di, md-modules-6.1.0-32-mips32r2el-di, multipath-modules-6.1.0-32-mips32r2el-di, usb-modules-6.1.0-32-mips32r2el-di,
 usb-storage-modules-6.1.0-32-mips32r2el-di, fb-modules-6.1.0-32-mips32r2el-di, input-modules-6.1.0-32-mips32r2el-di, event-modules-6.1.0-32-mips32r2el-di, mouse-modules-6.1.0-32-mips32r2el-di, nic-usb-modules-6.1.0-32-mips32r2el-di, sata-modules-6.1.0-32-mips32r2el-di, crc-modules-6.1.0-32-mips32r2el-di, crypto-modules-6.1.0-32-mips32r2el-di, crypto-dm-modules-6.1.0-32-mips32r2el-di, ata-modules-6.1.0-32-mips32r2el-di, mmc-core-modules-6.1.0-32-mips32r2el-di, mmc-modules-6.1.0-32-mips32r2el-di, nbd-modules-6.1.0-32-mips32r2el-di, speakup-modules-6.1.0-32-mips32r2el-di, sound-modules-6.1.0-32-mips32r2el-di, linux-headers-6.1.0-32-mips32r2el, linux-image-6.1.0-32-mips32r2el, linux-image-mips32r2el, linux-headers-mips32r2el, linux-image-6.1.0-32-mips32r2el-dbg, linux-image-mips32r2el-dbg, kernel-image-6.1.0-32-mips32r6eb-di, nic-modules-6.1.0-32-mips32r6eb-di, nic-wireless-modules-6.1.0-32-mips32r6eb-di, nic-shared-modules-6.1.0-32-mips32r6eb-di,
 usb-serial-modules-6.1.0-32-mips32r6eb-di, ppp-modules-6.1.0-32-mips32r6eb-di, pata-modules-6.1.0-32-mips32r6eb-di, cdrom-core-modules-6.1.0-32-mips32r6eb-di, firewire-core-modules-6.1.0-32-mips32r6eb-di, scsi-core-modules-6.1.0-32-mips32r6eb-di, scsi-modules-6.1.0-32-mips32r6eb-di, scsi-nic-modules-6.1.0-32-mips32r6eb-di, loop-modules-6.1.0-32-mips32r6eb-di, btrfs-modules-6.1.0-32-mips32r6eb-di, ext4-modules-6.1.0-32-mips32r6eb-di, isofs-modules-6.1.0-32-mips32r6eb-di, jfs-modules-6.1.0-32-mips32r6eb-di, xfs-modules-6.1.0-32-mips32r6eb-di, fat-modules-6.1.0-32-mips32r6eb-di, affs-modules-6.1.0-32-mips32r6eb-di, minix-modules-6.1.0-32-mips32r6eb-di, nfs-modules-6.1.0-32-mips32r6eb-di, squashfs-modules-6.1.0-32-mips32r6eb-di, udf-modules-6.1.0-32-mips32r6eb-di, fuse-modules-6.1.0-32-mips32r6eb-di, f2fs-modules-6.1.0-32-mips32r6eb-di, md-modules-6.1.0-32-mips32r6eb-di, multipath-modules-6.1.0-32-mips32r6eb-di, usb-modules-6.1.0-32-mips32r6eb-di,
 usb-storage-modules-6.1.0-32-mips32r6eb-di, fb-modules-6.1.0-32-mips32r6eb-di, input-modules-6.1.0-32-mips32r6eb-di, event-modules-6.1.0-32-mips32r6eb-di, mouse-modules-6.1.0-32-mips32r6eb-di, nic-usb-modules-6.1.0-32-mips32r6eb-di, sata-modules-6.1.0-32-mips32r6eb-di, crc-modules-6.1.0-32-mips32r6eb-di, crypto-modules-6.1.0-32-mips32r6eb-di, crypto-dm-modules-6.1.0-32-mips32r6eb-di, ata-modules-6.1.0-32-mips32r6eb-di, mmc-core-modules-6.1.0-32-mips32r6eb-di, mmc-modules-6.1.0-32-mips32r6eb-di, nbd-modules-6.1.0-32-mips32r6eb-di, speakup-modules-6.1.0-32-mips32r6eb-di, sound-modules-6.1.0-32-mips32r6eb-di, linux-headers-6.1.0-32-mips32r6eb, linux-image-6.1.0-32-mips32r6eb, linux-image-mips32r6eb, linux-headers-mips32r6eb, linux-image-6.1.0-32-mips32r6eb-dbg, linux-image-mips32r6eb-dbg, kernel-image-6.1.0-32-mips32r6el-di, nic-modules-6.1.0-32-mips32r6el-di, nic-wireless-modules-6.1.0-32-mips32r6el-di, nic-shared-modules-6.1.0-32-mips32r6el-di,
 usb-serial-modules-6.1.0-32-mips32r6el-di, ppp-modules-6.1.0-32-mips32r6el-di, pata-modules-6.1.0-32-mips32r6el-di, cdrom-core-modules-6.1.0-32-mips32r6el-di, firewire-core-modules-6.1.0-32-mips32r6el-di, scsi-core-modules-6.1.0-32-mips32r6el-di, scsi-modules-6.1.0-32-mips32r6el-di, scsi-nic-modules-6.1.0-32-mips32r6el-di, loop-modules-6.1.0-32-mips32r6el-di, btrfs-modules-6.1.0-32-mips32r6el-di, ext4-modules-6.1.0-32-mips32r6el-di, isofs-modules-6.1.0-32-mips32r6el-di, jfs-modules-6.1.0-32-mips32r6el-di, xfs-modules-6.1.0-32-mips32r6el-di, fat-modules-6.1.0-32-mips32r6el-di, affs-modules-6.1.0-32-mips32r6el-di, minix-modules-6.1.0-32-mips32r6el-di, nfs-modules-6.1.0-32-mips32r6el-di, squashfs-modules-6.1.0-32-mips32r6el-di, udf-modules-6.1.0-32-mips32r6el-di, fuse-modules-6.1.0-32-mips32r6el-di, f2fs-modules-6.1.0-32-mips32r6el-di, md-modules-6.1.0-32-mips32r6el-di, multipath-modules-6.1.0-32-mips32r6el-di, usb-modules-6.1.0-32-mips32r6el-di,
 usb-storage-modules-6.1.0-32-mips32r6el-di, fb-modules-6.1.0-32-mips32r6el-di, input-modules-6.1.0-32-mips32r6el-di, event-modules-6.1.0-32-mips32r6el-di, mouse-modules-6.1.0-32-mips32r6el-di, nic-usb-modules-6.1.0-32-mips32r6el-di, sata-modules-6.1.0-32-mips32r6el-di, crc-modules-6.1.0-32-mips32r6el-di, crypto-modules-6.1.0-32-mips32r6el-di, crypto-dm-modules-6.1.0-32-mips32r6el-di, ata-modules-6.1.0-32-mips32r6el-di, mmc-core-modules-6.1.0-32-mips32r6el-di, mmc-modules-6.1.0-32-mips32r6el-di, nbd-modules-6.1.0-32-mips32r6el-di, speakup-modules-6.1.0-32-mips32r6el-di, sound-modules-6.1.0-32-mips32r6el-di, linux-headers-6.1.0-32-mips32r6el, linux-image-6.1.0-32-mips32r6el, linux-image-mips32r6el, linux-headers-mips32r6el, linux-image-6.1.0-32-mips32r6el-dbg, linux-image-mips32r6el-dbg, kernel-image-6.1.0-32-powerpc-di, nic-modules-6.1.0-32-powerpc-di, nic-wireless-modules-6.1.0-32-powerpc-di, nic-shared-modules-6.1.0-32-powerpc-di, serial-modules-6.1.0-32-powerpc-di,
 usb-serial-modules-6.1.0-32-powerpc-di, ppp-modules-6.1.0-32-powerpc-di, pata-modules-6.1.0-32-powerpc-di, cdrom-core-modules-6.1.0-32-powerpc-di, firewire-core-modules-6.1.0-32-powerpc-di, scsi-core-modules-6.1.0-32-powerpc-di, scsi-modules-6.1.0-32-powerpc-di, scsi-nic-modules-6.1.0-32-powerpc-di, loop-modules-6.1.0-32-powerpc-di, btrfs-modules-6.1.0-32-powerpc-di, ext4-modules-6.1.0-32-powerpc-di, isofs-modules-6.1.0-32-powerpc-di, jfs-modules-6.1.0-32-powerpc-di, xfs-modules-6.1.0-32-powerpc-di, fat-modules-6.1.0-32-powerpc-di, hfs-modules-6.1.0-32-powerpc-di, affs-modules-6.1.0-32-powerpc-di, squashfs-modules-6.1.0-32-powerpc-di, udf-modules-6.1.0-32-powerpc-di, fuse-modules-6.1.0-32-powerpc-di, f2fs-modules-6.1.0-32-powerpc-di, md-modules-6.1.0-32-powerpc-di, multipath-modules-6.1.0-32-powerpc-di, usb-modules-6.1.0-32-powerpc-di, usb-storage-modules-6.1.0-32-powerpc-di, pcmcia-storage-modules-6.1.0-32-powerpc-di, fb-modules-6.1.0-32-powerpc-di,
 input-modules-6.1.0-32-powerpc-di, event-modules-6.1.0-32-powerpc-di, mouse-modules-6.1.0-32-powerpc-di, nic-pcmcia-modules-6.1.0-32-powerpc-di, pcmcia-modules-6.1.0-32-powerpc-di, nic-usb-modules-6.1.0-32-powerpc-di, sata-modules-6.1.0-32-powerpc-di, crc-modules-6.1.0-32-powerpc-di, crypto-modules-6.1.0-32-powerpc-di, crypto-dm-modules-6.1.0-32-powerpc-di, ata-modules-6.1.0-32-powerpc-di, mmc-core-modules-6.1.0-32-powerpc-di, nbd-modules-6.1.0-32-powerpc-di, uinput-modules-6.1.0-32-powerpc-di, kernel-image-6.1.0-32-powerpc64-di, nic-modules-6.1.0-32-powerpc64-di, nic-wireless-modules-6.1.0-32-powerpc64-di, nic-shared-modules-6.1.0-32-powerpc64-di, serial-modules-6.1.0-32-powerpc64-di, usb-serial-modules-6.1.0-32-powerpc64-di, ppp-modules-6.1.0-32-powerpc64-di, pata-modules-6.1.0-32-powerpc64-di, cdrom-core-modules-6.1.0-32-powerpc64-di, firewire-core-modules-6.1.0-32-powerpc64-di, scsi-core-modules-6.1.0-32-powerpc64-di, scsi-modules-6.1.0-32-powerpc64-di,
 scsi-nic-modules-6.1.0-32-powerpc64-di, loop-modules-6.1.0-32-powerpc64-di, btrfs-modules-6.1.0-32-powerpc64-di, ext4-modules-6.1.0-32-powerpc64-di, isofs-modules-6.1.0-32-powerpc64-di, jfs-modules-6.1.0-32-powerpc64-di, xfs-modules-6.1.0-32-powerpc64-di, fat-modules-6.1.0-32-powerpc64-di, hfs-modules-6.1.0-32-powerpc64-di, affs-modules-6.1.0-32-powerpc64-di, squashfs-modules-6.1.0-32-powerpc64-di, udf-modules-6.1.0-32-powerpc64-di, fuse-modules-6.1.0-32-powerpc64-di, f2fs-modules-6.1.0-32-powerpc64-di, md-modules-6.1.0-32-powerpc64-di, multipath-modules-6.1.0-32-powerpc64-di, usb-modules-6.1.0-32-powerpc64-di, usb-storage-modules-6.1.0-32-powerpc64-di, pcmcia-storage-modules-6.1.0-32-powerpc64-di, fb-modules-6.1.0-32-powerpc64-di, input-modules-6.1.0-32-powerpc64-di, event-modules-6.1.0-32-powerpc64-di, mouse-modules-6.1.0-32-powerpc64-di, nic-pcmcia-modules-6.1.0-32-powerpc64-di, pcmcia-modules-6.1.0-32-powerpc64-di, nic-usb-modules-6.1.0-32-powerpc64-di,
 sata-modules-6.1.0-32-powerpc64-di, i2c-modules-6.1.0-32-powerpc64-di, crc-modules-6.1.0-32-powerpc64-di, crypto-modules-6.1.0-32-powerpc64-di, crypto-dm-modules-6.1.0-32-powerpc64-di, ata-modules-6.1.0-32-powerpc64-di, mmc-core-modules-6.1.0-32-powerpc64-di, nbd-modules-6.1.0-32-powerpc64-di, uinput-modules-6.1.0-32-powerpc64-di, mtd-core-modules-6.1.0-32-powerpc64-di, hypervisor-modules-6.1.0-32-powerpc64-di, fancontrol-modules-6.1.0-32-powerpc64-di, linux-headers-6.1.0-32-powerpc, linux-image-6.1.0-32-powerpc, linux-image-powerpc, linux-headers-powerpc, linux-image-6.1.0-32-powerpc-dbg, linux-image-powerpc-dbg, linux-headers-6.1.0-32-powerpc-smp, linux-image-6.1.0-32-powerpc-smp, linux-image-powerpc-smp, linux-headers-powerpc-smp, linux-image-6.1.0-32-powerpc-smp-dbg, linux-image-powerpc-smp-dbg, linux-headers-6.1.0-32-powerpc64, linux-image-6.1.0-32-powerpc64, linux-image-powerpc64, linux-headers-powerpc64, linux-image-6.1.0-32-powerpc64-dbg,
 linux-image-powerpc64-dbg, kernel-image-6.1.0-32-powerpc64le-di, nic-modules-6.1.0-32-powerpc64le-di, nic-wireless-modules-6.1.0-32-powerpc64le-di, nic-shared-modules-6.1.0-32-powerpc64le-di, serial-modules-6.1.0-32-powerpc64le-di, usb-serial-modules-6.1.0-32-powerpc64le-di, ppp-modules-6.1.0-32-powerpc64le-di, cdrom-core-modules-6.1.0-32-powerpc64le-di, firewire-core-modules-6.1.0-32-powerpc64le-di, scsi-core-modules-6.1.0-32-powerpc64le-di, scsi-modules-6.1.0-32-powerpc64le-di, scsi-nic-modules-6.1.0-32-powerpc64le-di, loop-modules-6.1.0-32-powerpc64le-di, btrfs-modules-6.1.0-32-powerpc64le-di, ext4-modules-6.1.0-32-powerpc64le-di, isofs-modules-6.1.0-32-powerpc64le-di, jfs-modules-6.1.0-32-powerpc64le-di, xfs-modules-6.1.0-32-powerpc64le-di, fat-modules-6.1.0-32-powerpc64le-di, squashfs-modules-6.1.0-32-powerpc64le-di, udf-modules-6.1.0-32-powerpc64le-di, fuse-modules-6.1.0-32-powerpc64le-di, f2fs-modules-6.1.0-32-powerpc64le-di,
 md-modules-6.1.0-32-powerpc64le-di, multipath-modules-6.1.0-32-powerpc64le-di, usb-modules-6.1.0-32-powerpc64le-di, usb-storage-modules-6.1.0-32-powerpc64le-di, fb-modules-6.1.0-32-powerpc64le-di, input-modules-6.1.0-32-powerpc64le-di, event-modules-6.1.0-32-powerpc64le-di, mouse-modules-6.1.0-32-powerpc64le-di, nic-usb-modules-6.1.0-32-powerpc64le-di, sata-modules-6.1.0-32-powerpc64le-di, i2c-modules-6.1.0-32-powerpc64le-di, crc-modules-6.1.0-32-powerpc64le-di, crypto-modules-6.1.0-32-powerpc64le-di, crypto-dm-modules-6.1.0-32-powerpc64le-di, ata-modules-6.1.0-32-powerpc64le-di, nbd-modules-6.1.0-32-powerpc64le-di, uinput-modules-6.1.0-32-powerpc64le-di, mtd-core-modules-6.1.0-32-powerpc64le-di, hypervisor-modules-6.1.0-32-powerpc64le-di, fancontrol-modules-6.1.0-32-powerpc64le-di, linux-headers-6.1.0-32-powerpc64le, linux-image-6.1.0-32-powerpc64le, linux-image-powerpc64le, linux-headers-powerpc64le, linux-image-6.1.0-32-powerpc64le-dbg,
 linux-image-powerpc64le-dbg, kernel-image-6.1.0-32-riscv64-di, nic-modules-6.1.0-32-riscv64-di, nic-wireless-modules-6.1.0-32-riscv64-di, nic-shared-modules-6.1.0-32-riscv64-di, usb-serial-modules-6.1.0-32-riscv64-di, ppp-modules-6.1.0-32-riscv64-di, pata-modules-6.1.0-32-riscv64-di, cdrom-core-modules-6.1.0-32-riscv64-di, scsi-core-modules-6.1.0-32-riscv64-di, scsi-modules-6.1.0-32-riscv64-di, scsi-nic-modules-6.1.0-32-riscv64-di, loop-modules-6.1.0-32-riscv64-di, btrfs-modules-6.1.0-32-riscv64-di, ext4-modules-6.1.0-32-riscv64-di, isofs-modules-6.1.0-32-riscv64-di, jfs-modules-6.1.0-32-riscv64-di, fat-modules-6.1.0-32-riscv64-di, squashfs-modules-6.1.0-32-riscv64-di, udf-modules-6.1.0-32-riscv64-di, fuse-modules-6.1.0-32-riscv64-di, f2fs-modules-6.1.0-32-riscv64-di, md-modules-6.1.0-32-riscv64-di, multipath-modules-6.1.0-32-riscv64-di, usb-modules-6.1.0-32-riscv64-di, usb-storage-modules-6.1.0-32-riscv64-di, fb-modules-6.1.0-32-riscv64-di,
 input-modules-6.1.0-32-riscv64-di, event-modules-6.1.0-32-riscv64-di, nic-usb-modules-6.1.0-32-riscv64-di, sata-modules-6.1.0-32-riscv64-di, i2c-modules-6.1.0-32-riscv64-di, crc-modules-6.1.0-32-riscv64-di, crypto-modules-6.1.0-32-riscv64-di, crypto-dm-modules-6.1.0-32-riscv64-di, ata-modules-6.1.0-32-riscv64-di, mmc-core-modules-6.1.0-32-riscv64-di, mmc-modules-6.1.0-32-riscv64-di, nbd-modules-6.1.0-32-riscv64-di, mtd-modules-6.1.0-32-riscv64-di, mtd-core-modules-6.1.0-32-riscv64-di, linux-headers-6.1.0-32-riscv64, linux-image-6.1.0-32-riscv64, linux-image-riscv64, linux-headers-riscv64, linux-image-6.1.0-32-riscv64-dbg, linux-image-riscv64-dbg, kernel-image-6.1.0-32-s390x-di, nic-modules-6.1.0-32-s390x-di, cdrom-core-modules-6.1.0-32-s390x-di, scsi-core-modules-6.1.0-32-s390x-di, scsi-modules-6.1.0-32-s390x-di, loop-modules-6.1.0-32-s390x-di, btrfs-modules-6.1.0-32-s390x-di, ext4-modules-6.1.0-32-s390x-di, isofs-modules-6.1.0-32-s390x-di,
 xfs-modules-6.1.0-32-s390x-di, fat-modules-6.1.0-32-s390x-di, udf-modules-6.1.0-32-s390x-di, fuse-modules-6.1.0-32-s390x-di, f2fs-modules-6.1.0-32-s390x-di, md-modules-6.1.0-32-s390x-di, multipath-modules-6.1.0-32-s390x-di, crc-modules-6.1.0-32-s390x-di, crypto-modules-6.1.0-32-s390x-di, crypto-dm-modules-6.1.0-32-s390x-di, nbd-modules-6.1.0-32-s390x-di, mtd-core-modules-6.1.0-32-s390x-di, dasd-modules-6.1.0-32-s390x-di, dasd-extra-modules-6.1.0-32-s390x-di, linux-headers-6.1.0-32-s390x, linux-image-6.1.0-32-s390x, linux-image-s390x, linux-headers-s390x, linux-image-6.1.0-32-s390x-dbg, linux-image-s390x-dbg, kernel-image-6.1.0-32-sh7751r-di, nic-modules-6.1.0-32-sh7751r-di, nic-shared-modules-6.1.0-32-sh7751r-di, usb-serial-modules-6.1.0-32-sh7751r-di, ppp-modules-6.1.0-32-sh7751r-di, pata-modules-6.1.0-32-sh7751r-di, cdrom-core-modules-6.1.0-32-sh7751r-di, firewire-core-modules-6.1.0-32-sh7751r-di, loop-modules-6.1.0-32-sh7751r-di, btrfs-modules-6.1.0-32-sh7751r-di,
 ext4-modules-6.1.0-32-sh7751r-di, isofs-modules-6.1.0-32-sh7751r-di, jfs-modules-6.1.0-32-sh7751r-di, xfs-modules-6.1.0-32-sh7751r-di, fat-modules-6.1.0-32-sh7751r-di, minix-modules-6.1.0-32-sh7751r-di, squashfs-modules-6.1.0-32-sh7751r-di, udf-modules-6.1.0-32-sh7751r-di, fuse-modules-6.1.0-32-sh7751r-di, f2fs-modules-6.1.0-32-sh7751r-di, md-modules-6.1.0-32-sh7751r-di, multipath-modules-6.1.0-32-sh7751r-di, usb-storage-modules-6.1.0-32-sh7751r-di, nic-usb-modules-6.1.0-32-sh7751r-di, sata-modules-6.1.0-32-sh7751r-di, i2c-modules-6.1.0-32-sh7751r-di, crc-modules-6.1.0-32-sh7751r-di, crypto-modules-6.1.0-32-sh7751r-di, crypto-dm-modules-6.1.0-32-sh7751r-di, nbd-modules-6.1.0-32-sh7751r-di, speakup-modules-6.1.0-32-sh7751r-di, sound-modules-6.1.0-32-sh7751r-di, kernel-image-6.1.0-32-sh7785lcr-di, nic-modules-6.1.0-32-sh7785lcr-di, nic-shared-modules-6.1.0-32-sh7785lcr-di, usb-serial-modules-6.1.0-32-sh7785lcr-di, ppp-modules-6.1.0-32-sh7785lcr-di,
 pata-modules-6.1.0-32-sh7785lcr-di, cdrom-core-modules-6.1.0-32-sh7785lcr-di, firewire-core-modules-6.1.0-32-sh7785lcr-di, loop-modules-6.1.0-32-sh7785lcr-di, btrfs-modules-6.1.0-32-sh7785lcr-di, ext4-modules-6.1.0-32-sh7785lcr-di, isofs-modules-6.1.0-32-sh7785lcr-di, jfs-modules-6.1.0-32-sh7785lcr-di, xfs-modules-6.1.0-32-sh7785lcr-di, fat-modules-6.1.0-32-sh7785lcr-di, minix-modules-6.1.0-32-sh7785lcr-di, squashfs-modules-6.1.0-32-sh7785lcr-di, udf-modules-6.1.0-32-sh7785lcr-di, fuse-modules-6.1.0-32-sh7785lcr-di, f2fs-modules-6.1.0-32-sh7785lcr-di, md-modules-6.1.0-32-sh7785lcr-di, multipath-modules-6.1.0-32-sh7785lcr-di, nic-usb-modules-6.1.0-32-sh7785lcr-di, sata-modules-6.1.0-32-sh7785lcr-di, crc-modules-6.1.0-32-sh7785lcr-di, crypto-modules-6.1.0-32-sh7785lcr-di, crypto-dm-modules-6.1.0-32-sh7785lcr-di, nbd-modules-6.1.0-32-sh7785lcr-di, speakup-modules-6.1.0-32-sh7785lcr-di, sound-modules-6.1.0-32-sh7785lcr-di, linux-headers-6.1.0-32-sh7751r,
 linux-image-6.1.0-32-sh7751r, linux-image-sh7751r, linux-headers-sh7751r, linux-image-6.1.0-32-sh7751r-dbg, linux-image-sh7751r-dbg, linux-headers-6.1.0-32-sh7785lcr, linux-image-6.1.0-32-sh7785lcr, linux-image-sh7785lcr, linux-headers-sh7785lcr, linux-image-6.1.0-32-sh7785lcr-dbg, linux-image-sh7785lcr-dbg, kernel-image-6.1.0-32-sparc64-di, nic-modules-6.1.0-32-sparc64-di, nic-shared-modules-6.1.0-32-sparc64-di, usb-serial-modules-6.1.0-32-sparc64-di, ppp-modules-6.1.0-32-sparc64-di, pata-modules-6.1.0-32-sparc64-di, cdrom-core-modules-6.1.0-32-sparc64-di, scsi-core-modules-6.1.0-32-sparc64-di, scsi-modules-6.1.0-32-sparc64-di, btrfs-modules-6.1.0-32-sparc64-di, ext4-modules-6.1.0-32-sparc64-di, isofs-modules-6.1.0-32-sparc64-di, jfs-modules-6.1.0-32-sparc64-di, ufs-modules-6.1.0-32-sparc64-di, xfs-modules-6.1.0-32-sparc64-di, fat-modules-6.1.0-32-sparc64-di, squashfs-modules-6.1.0-32-sparc64-di, udf-modules-6.1.0-32-sparc64-di, fuse-modules-6.1.0-32-sparc64-di,
 f2fs-modules-6.1.0-32-sparc64-di, md-modules-6.1.0-32-sparc64-di, multipath-modules-6.1.0-32-sparc64-di, usb-modules-6.1.0-32-sparc64-di, usb-storage-modules-6.1.0-32-sparc64-di, fb-modules-6.1.0-32-sparc64-di, input-modules-6.1.0-32-sparc64-di, nic-usb-modules-6.1.0-32-sparc64-di, sata-modules-6.1.0-32-sparc64-di, i2c-modules-6.1.0-32-sparc64-di, crc-modules-6.1.0-32-sparc64-di, crypto-modules-6.1.0-32-sparc64-di, crypto-dm-modules-6.1.0-32-sparc64-di, ata-modules-6.1.0-32-sparc64-di, nbd-modules-6.1.0-32-sparc64-di, linux-headers-6.1.0-32-sparc64, linux-image-6.1.0-32-sparc64, linux-image-sparc64, linux-headers-sparc64, linux-image-6.1.0-32-sparc64-dbg, linux-image-sparc64-dbg, linux-headers-6.1.0-32-sparc64-smp, linux-image-6.1.0-32-sparc64-smp, linux-image-sparc64-smp, linux-headers-sparc64-smp, linux-image-6.1.0-32-sparc64-smp-dbg, linux-image-sparc64-smp-dbg, linux-compiler-gcc-12-arm, linux-compiler-gcc-12-s390, linux-compiler-gcc-12-x86,
 linux-image-parisc64-smp,
 linux-image-parisc-smp
"""


def test_binutils(binutils):
    m = mock_open(read_data=binutils)

    with patch("builtins.open", m):
        result = parse_sources_file("dummy")

    assert result == {
        "binutils": {
            "binutils-for-host",
            "binutils-for-build",
            "binutils-ia64-linux-gnu-dbg",
            "binutils-m68k-linux-gnu",
            "binutils-mips64el-linux-gnuabin32-dbg",
            "binutils-mipsisa64r6-linux-gnuabin32",
            "binutils-mipsisa64r6el-linux-gnuabi64-dbg",
        }
    }


def test_linux(linux):
    m = mock_open(read_data=linux)

    with patch("builtins.open", m):
        result = parse_sources_file("dummy")

    assert "linux-headers-6.1.0-32-amd64" in result["linux"]
    assert "linux-headers-6.1.0-32-cloud-amd64" in result["linux"]


================================================
FILE: package_managers/debian/structs.py
================================================
from dataclasses import dataclass, field


# structures
@dataclass
class Maintainer:
    name: str = field(default_factory=str)
    email: str = field(default_factory=str)


@dataclass
class File:
    hash: str = field(default_factory=str)
    size: int = field(default_factory=int)
    filename: str = field(default_factory=str)


@dataclass
class Depends:
    package: str = field(default_factory=str)
    semver: str = field(default_factory=str)


@dataclass
class Tag:
    name: str = field(default_factory=str)
    value: str = field(default_factory=str)


# this represents whatever we might get from Debian...either packages or sources
# it's immaterial what it is, we just need to know how to parse it
@dataclass
class DebianData:
    # Package fields
    package: str = field(default_factory=str)
    source: str = field(default_factory=str)
    version: str = field(default_factory=str)
    installed_size: int = field(default_factory=int)
    maintainer: Maintainer = field(default_factory=Maintainer)
    architecture: str = field(default_factory=str)
    description: str = field(default_factory=str)
    homepage: str = field(default_factory=str)
    description_md5: str = field(default_factory=str)
    tag: str = field(default_factory=str)
    section: str = field(default_factory=str)
    priority: str = field(default_factory=str)
    filename: str = field(default_factory=str)
    size: int = field(default_factory=int)
    md5sum: str = field(default_factory=str)
    sha256: str = field(default_factory=str)

    # Dependency fields
    replaces: list[Depends] = field(default_factory=list)
    provides: list[Depends] = field(default_factory=list)
    depends: list[Depends] = field(default_factory=list)
    pre_depends: list[Depends] = field(default_factory=list)
    recommends: list[Depends] = field(default_factory=list)
    suggests: list[Depends] = field(default_factory=list)
    breaks: list[Depends] = field(default_factory=list)
    conflicts: list[Depends] = field(default_factory=list)
    build_depends: list[Depends] = field(default_factory=list)  # source only

    # Source fields
    binary: list[str] = field(default_factory=list)
    uploaders: list[Maintainer] = field(default_factory=list)
    standards_version: str = field(default_factory=str)
    format: str = field(default_factory=str)
    files: list[File] = field(default_factory=list)
    vcs_browser: str = field(default_factory=str)
    vcs_git: str = field(default_factory=str)
    checksums_sha256: list[File] = field(default_factory=list)
    package_list: list[str] = field(default_factory=list)
    directory: str = field(default_factory=str)
    testsuite: str = field(default_factory=str)
    testsuite_triggers: str = field(default_factory=str)


================================================
FILE: package_managers/homebrew/Dockerfile
================================================
FROM ghcr.io/astral-sh/uv:python3.11-bookworm-slim

# Copy everything from the root directory (build context)
COPY . .

# Install core requirements using uv
WORKDIR /core
RUN uv pip install --system -r requirements.txt

WORKDIR /

# Run the main application
CMD ["python", "/package_managers/homebrew/main.py"]


================================================
FILE: package_managers/homebrew/README.md
================================================
# Homebrew

The Homebrew service uses Homebrew's JSON API Documentation to build the Homebrew
data model, using a diff approach to build it out.

## Getting Started

To just run the Homebrew service, you can use the following commands:

```bash
docker compose build homebrew
docker compose run homebrew
```

## Pipeline Overview

The Homebrew pipeline consists of one main script: `main.py`. It fetches two things
from CHAI:

1. Homebrew's Graph, based on packages and legacy dependencies
2. All the URLs in CHAI for Homebrew's clean URLs

Based on that, it does a diff across each object for each package, and makes changes to
CHAI accordingly.

## Notes

- Homebrew's dependencies are not just restricted to the `{build,test,...}_dependencies`
  fields listed in the JSON APIs...it also uses some system level packages denoted in
  `uses_from_macos`, and `variations` (for linux). The pipeline currently does NOT
  consider those dependencies
- This job ignores the versions table entirely, and instead populates the legacy
  dependencies table, which maintains a package to package relationship
- Versioned formulae (like `python`, `postgresql`) are ones where the Homebrew package
  specifies a version. The pipeline considers these packages individual packages,
  and so creates new records in the `packages` table.
- The data source for Homebrew does not retrieve the analytics information that is
  available via the individual JSON API endpoints for each package.


================================================
FILE: package_managers/homebrew/db.py
================================================
from core.config import Config
from core.db import DB, CurrentURLs
from core.structs import CurrentGraph


class HomebrewDB(DB):
    def __init__(self, logger_name: str, config: Config):
        super().__init__(logger_name)
        self.config = config
        self.set_current_graph()

    def set_current_graph(self) -> None:
        """Get the Homebrew packages and dependencies"""
        self.graph: CurrentGraph = self.current_graph(self.config.pm_config.pm_id)
        self.logger.log(f"Loaded {len(self.graph.package_map)} Homebrew packages")

    def set_current_urls(self, urls: set[str]) -> None:
        """Wrapper for setting current urls"""
        self.urls: CurrentURLs = self.current_urls(urls)
        self.logger.log(f"Found {len(self.urls.url_map)} Homebrew URLs")


================================================
FILE: package_managers/homebrew/diff.py
================================================
from datetime import datetime
from uuid import UUID, uuid4

from core.config import Config
from core.logger import Logger
from core.models import URL, LegacyDependency, Package, PackageURL
from core.structs import Cache, URLKey
from package_managers.homebrew.structs import Actual


class Diff:
    def __init__(self, config: Config, caches: Cache):
        self.config = config
        self.now = datetime.now()
        self.caches = caches
        self.logger = Logger("homebrew_diff")

    def diff_pkg(self, pkg: Actual) -> tuple[UUID, Package | None, dict | None]:
        """
        Checks if the given pkg is in the package_cache.

        Returns:
          - pkg_id: the id of the package
          - package: If new, returns a new package object. If existing, returns None
          - changes: a dictionary of changes
        """
        self.logger.debug(f"Diffing package: {pkg.formula}")
        pkg_id: UUID
        if pkg.formula not in self.caches.package_map:
            # new package
            p = Package(
                id=uuid4(),
                derived_id=f"homebrew/{pkg.formula}",
                name=pkg.formula,
                package_manager_id=self.config.pm_config.pm_id,
                import_id=pkg.formula,
                readme=pkg.description,
                created_at=self.now,
                updated_at=self.now,
            )
            pkg_id: UUID = p.id
            # no update payload, so that's empty
            return pkg_id, p, {}
        else:
            p = self.caches.package_map[pkg.formula]
            pkg_id = p.id
            # check for changes
            # right now, that's just the readme
            if p.readme != pkg.description:
                self.logger.debug(f"Description changed for {pkg.formula}")
                return (
                    pkg_id,
                    None,
                    {"id": p.id, "readme": pkg.description, "updated_at": self.now},
                )
            else:
                # existing package, no change
                return pkg_id, None, None

    def diff_url(
        self, pkg: Actual, new_urls: dict[tuple[str, UUID], URL]
    ) -> dict[UUID, UUID]:
        """Given a package's URLs, returns the resolved URL or this specific formula"""
        resolved_urls: dict[UUID, UUID] = {}

        # we need to check if (a) URLs are in our cache, or (b) if we've already handled
        # them before. if so, we should use that
        urls = (
            (pkg.homepage, self.config.url_types.homepage),
            (pkg.source, self.config.url_types.source),
            (pkg.repository, self.config.url_types.repository),
        )

        for url, url_type in urls:
            # guard: no URL
            if not url:
                continue

            url_key = URLKey(url, url_type)
            resolved_url_id: UUID
            if url_key in new_urls:
                resolved_url_id = new_urls[url_key].id
            elif url_key in self.caches.url_map:
                resolved_url_id = self.caches.url_map[url_key].id
            else:
                self.logger.debug(f"URL {url} for {url_type} is entirely new")
                new_url = URL(
                    id=uuid4(),
                    url=url,
                    url_type_id=url_type,
                    created_at=self.now,
                    updated_at=self.now,
                )
                resolved_url_id = new_url.id

                # NOTE: THIS IS SUPER IMPORTANT
                # we're not just borrowing this value, we're mutating it as well
                new_urls[url_key] = new_url

            resolved_urls[url_type] = resolved_url_id

        return resolved_urls

    def diff_pkg_url(
        self, pkg_id: UUID, resolved_urls: dict[UUID, UUID]
    ) -> tuple[list[PackageURL], list[dict]]:
        """Takes in a package_id and resolved URLs from diff_url, and generates
        new PackageURL objects as well as a list of changes to existing ones

        Inputs:
          - pkg_id: the id of the package
          - resolved_urls: a map of url types to final URL ID for this pkg

        Outputs:
          - new_package_urls: a list of new PackageURL objects
          - updated_package_urls: a list of changes to existing PackageURL objects

        TODO:
          - We're updating every single package_url entity, which takes time. We should
            check if the latest URL has changed, and if so, only update that one.
        """
        new_links: list[PackageURL] = []
        updates: list[dict] = []

        # what are the existing links?
        existing: set[UUID] = {
            pu.url_id for pu in self.caches.package_urls.get(pkg_id, set())
        }

        # for the correct URL type / URL for this package:
        for _url_type, url_id in resolved_urls.items():
            if url_id not in existing:
                # new link!
                new_links.append(
                    PackageURL(
                        id=uuid4(),
                        package_id=pkg_id,
                        url_id=url_id,
                        created_at=self.now,
                        updated_at=self.now,
                    )
                )
            else:
                # TODO: this should only happen for `latest` URLs
                # here is an existing link between this URL and this package
                # let's find it
                existing_pu = next(
                    pu for pu in self.caches.package_urls[pkg_id] if pu.url_id == url_id
                )
                existing_pu.updated_at = self.now
                updates.append({"id": existing_pu.id, "updated_at": self.now})

        return new_links, updates

    def diff_deps(
        self, pkg: Actual
    ) -> tuple[list[LegacyDependency], list[LegacyDependency]]:
        """
        Takes in a Homebrew formula and figures out what dependencies have changed. Also
        uses the LegacyDependency table, because that is package to package.

        Warnings:
          - Updates show up as removed + new
          - This is Homebrew specific, since LegacyDependency mandates uniqueness
            from package_id -> dependency_id, but Homebrew allows duplicate
            dependencies across multiple dependency types. So we've got a process helper
            that handles this.

        Returns:
          - new_deps: a list of new dependencies
          - removed_deps: a list of removed dependencies
        """
        new_deps: list[LegacyDependency] = []
        removed_deps: list[LegacyDependency] = []

        # serialize the actual dependencies into a set of tuples
        actual: set[tuple[UUID, UUID]] = set()
        processed: set[str] = set()

        def process(dep_names: list[str] | None, dep_type: UUID) -> None:
            """Helper to process dependencies of a given type"""
            # guard: no dependencies
            if not dep_names:
                return

            for name in dep_names:
                # guard: no dependency name / empty name
                if not name:
                    continue

                # means one dependency is build and test, for example
                # see https://formulae.brew.sh/api/formula/abook.json for example
                # gettext is both a build and runtime dependency
                if name in processed:
                    continue

                dependency = self.caches.package_map.get(name)

                # guard: no dependency
                if not dependency:
                    # TODO: handle this case, though it fixes itself on the next run
                    self.logger.warn(f"{name}, dep of {pkg.formula} is new")
                    continue

                actual.add((dependency.id, dep_type))
                processed.add(name)

        # alright, let's do it
        if hasattr(pkg, "dependencies"):
            process(pkg.dependencies, self.config.dependency_types.runtime)
        if hasattr(pkg, "build_dependencies"):
            process(pkg.build_dependencies, self.config.dependency_types.build)
        if hasattr(pkg, "test_dependencies"):
            process(pkg.test_dependencies, self.config.dependency_types.test)
        if hasattr(pkg, "recommended_dependencies"):
            process(
                pkg.recommended_dependencies, self.config.dependency_types.recommended
            )
        if hasattr(pkg, "optional_dependencies"):
            process(pkg.optional_dependencies, self.config.dependency_types.optional)

        # get the package ID for what we are working with
        package = self.caches.package_map.get(pkg.formula)
        if not package:
            # TODO: handle this case, though it fixes itself on the next run
            self.logger.warn(f"New package {pkg.formula}, will grab its deps next time")
            return [], []

        pkg_id: UUID = package.id

        # now, we need to figure out what's new / removed
        # we need:
        # 1. something in that same structure as `actual`, to track what's in CHAI
        existing: set[tuple[UUID, UUID]] = set()
        # 2. set of LegacyDependency objects
        legacy_links: set[LegacyDependency] = self.caches.dependencies.get(
            pkg_id, set()
        )
        # 3. easy look up to get to legacy_links to go from 1 to 2
        existing_legacy_map: dict[tuple[UUID, UUID], LegacyDependency] = {}

        for legacy in legacy_links:
            key = (legacy.dependency_id, legacy.dependency_type_id)
            existing_legacy_map[key] = legacy
            existing.add(key)

        # calculate our diffs
        added_tuples: set[tuple[UUID, UUID]] = actual - existing
        removed_tuples: set[tuple[UUID, UUID]] = existing - actual

        # convert these to LegacyDependency objects
        for dep_id, type_id in added_tuples:
            new_dep = LegacyDependency(
                package_id=pkg_id,
                dependency_id=dep_id,
                dependency_type_id=type_id,
                created_at=self.now,
                updated_at=self.now,
            )
            new_deps.append(new_dep)

        for dep_id, type_id in removed_tuples:
            removed_dep = existing_legacy_map.get((dep_id, type_id))
            if removed_dep:
                removed_deps.append(removed_dep)

        return new_deps, removed_deps


================================================
FILE: package_managers/homebrew/formulae.py
================================================
import re
from typing import Any

from permalint import normalize_url
from requests import get

from core.config import Config
from core.fetcher import Data, Fetcher
from core.logger import Logger
from package_managers.homebrew.structs import Actual

logger = Logger("homebrew_formulae")


class HomebrewFetcher(Fetcher):
    def __init__(self, config: Config):
        super().__init__(
            name="homebrew",
            source=config.pm_config.source,
            no_cache=config.exec_config.no_cache,
            test=config.exec_config.test,
        )

    def fetch(self) -> list[Actual]:
        """Get the current state of Homebrew"""
        response = get(self.source)
        try:
            response.raise_for_status()
        except Exception as e:
            logger.error(f"Error fetching Homebrew formulae: {e}")
            raise e

        # make json
        data: list[dict[str, Any]] = response.json()

        # prep results
        results: list[Actual] = []

        for formula in data:
            # check if deprecated
            # TODO: should we delete
            deprecated = formula.get("deprecated", False)
            if deprecated:
                continue

            # create temp vars for stuff we transform...basically URL
            homepage = normalize_url(formula["homepage"])

            # try urls.head.url, because that generally points to GitHub / git
            # use urls.stable.url as a backstop
            source = normalize_url(
                formula["urls"].get("head", formula["urls"]["stable"]).get("url", "")
            )

            # collect github / gitlab repos
            if re.search(r"^github.com", source) or re.search(r"^gitlab.com", source):
                repository = source
            else:
                repository = None

            # create the actual
            actual = Actual(
                formula=formula["name"],
                description=formula["desc"],
                license=formula["license"],
                homepage=homepage,
                source=source,
                repository=repository,
                build_dependencies=formula["build_dependencies"],
                dependencies=formula["dependencies"],
                test_dependencies=formula["test_dependencies"],
                recommended_dependencies=formula["recommended_dependencies"],
                optional_dependencies=formula["optional_dependencies"],
                # TODO: anything else?
            )

            results.append(actual)

        if self.no_cache:
            logger.log("No cache, so not saving to file")
        else:
            write = Data(".", "homebrew_formulae.json", data)
            self.write([write])

        return results


================================================
FILE: package_managers/homebrew/main.py
================================================
#! /usr/bin/env pkgx +python@3.11 uv run

from datetime import datetime
from uuid import UUID

from core.config import Config, PackageManager
from core.logger import Logger
from core.models import URL, LegacyDependency, Package, PackageURL
from core.structs import Cache, URLKey
from package_managers.homebrew.db import HomebrewDB
from package_managers.homebrew.diff import Diff
from package_managers.homebrew.formulae import HomebrewFetcher


def main(config: Config, db: HomebrewDB) -> None:
    """A diff-based attempt at loading into CHAI"""

    logger = Logger("homebrew_main")
    fetcher = HomebrewFetcher(config)
    brew = fetcher.fetch()

    # get the URLs & set that
    brew_urls = {b.source for b in brew} | {b.homepage for b in brew}
    db.set_current_urls(brew_urls)
    logger.log("Set current URLs")

    # get the caches here
    cache = Cache(
        db.graph.package_map,
        db.urls.url_map,
        db.urls.package_urls,
        db.graph.dependencies,
    )

    # total set of updates we'll make are:
    new_packages: list[Package] = []
    new_urls: dict[URLKey, URL] = {}  # we'll convert this later
    new_package_urls: list[PackageURL] = []
    updated_packages: list[dict[str, UUID | str | datetime]] = []
    updated_package_urls: list[dict[str, UUID | datetime]] = []
    new_deps: list[LegacyDependency] = []
    removed_deps: list[LegacyDependency] = []

    diff = Diff(config, cache)
    for i, pkg in enumerate(brew):
        pkg_id, pkg_obj, update_payload = diff.diff_pkg(pkg)
        if pkg_obj:
            logger.debug(f"New package: {pkg_obj.name}")
            new_packages.append(pkg_obj)
        if update_payload:
            logger.debug(f"Updated package: {update_payload['id']}")
            updated_packages.append(update_payload)

        # NOTE: resolved urls is a map of url types to final URL ID for this pkg
        # also, &new_urls gets passed in AND mutated
        resolved_urls = diff.diff_url(pkg, new_urls)

        # now, new package urls
        new_links, updated_links = diff.diff_pkg_url(pkg_id, resolved_urls)
        if new_links:
            logger.debug(f"New package URLs: {len(new_links)}")
            new_package_urls.extend(new_links)
        if updated_links:
            logger.debug(f"Updated package URLs: {len(updated_links)}")
            updated_package_urls.extend(updated_links)

        # finally, dependencies
        new_dependencies, removed_dependencies = diff.diff_deps(pkg)
        if new_dependencies:
            logger.debug(f"New dependencies: {len(new_dependencies)}")
            new_deps.extend(new_dependencies)
        if removed_dependencies:
            logger.debug(f"Removed dependencies: {len(removed_dependencies)}")
            removed_deps.extend(removed_dependencies)

        if config.exec_config.test and i > 100:
            break

    # final cleanup is to replace the new_urls map with a list
    final_new_urls = list(new_urls.values())

    # send to loader
    db.ingest(
        new_packages,
        final_new_urls,
        new_package_urls,
        new_deps,
        removed_deps,
        updated_packages,
        updated_package_urls,
    )


if __name__ == "__main__":
    config = Config(PackageManager.HOMEBREW)
    db = HomebrewDB("homebrew_db_main", config)
    main(config, db)


================================================
FILE: package_managers/homebrew/structs.py
================================================
from dataclasses import dataclass


@dataclass
class Actual:
    formula: str
    description: str
    license: str
    homepage: str
    source: str
    repository: str | None
    build_dependencies: list[str] | None
    dependencies: list[str] | None
    test_dependencies: list[str] | None
    recommended_dependencies: list[str] | None
    optional_dependencies: list[str] | None


================================================
FILE: package_managers/pkgx/Dockerfile
================================================
FROM ghcr.io/astral-sh/uv:python3.11-bookworm-slim

# Copy everything
COPY . .

# Install core requirements using uv
WORKDIR /core
RUN uv pip install --system -r requirements.txt

WORKDIR /
# Run the main application
CMD ["python", "/package_managers/pkgx/main.py"] 


================================================
FILE: package_managers/pkgx/db.py
================================================
#!/usr/bin/env pkgx uv run

from core.config import Config
from core.db import DB, CurrentURLs
from core.structs import CurrentGraph


class PkgxDB(DB):
    def __init__(self, logger_name: str, config: Config):
        super().__init__(logger_name)
        self.config = config

    def set_current_graph(self) -> None:
        """Get the pkgx packages and dependencies"""
        self.graph: CurrentGraph = self.current_graph(self.config.pm_config.pm_id)
        self.logger.log(f"Loaded {len(self.graph.package_map)} pkgx packages")

    def set_current_urls(self) -> None:
        """Getting all the URLs and Package URLs from the database"""
        self.urls: CurrentURLs = self.all_current_urls()
        self.logger.log(f"Loaded {len(self.urls.url_map)} URLs")


================================================
FILE: package_managers/pkgx/diff.py
================================================
#!/usr/bin/env pkgx uv run

from datetime import datetime
from uuid import UUID, uuid4

from core.config import Config
from core.logger import Logger
from core.models import URL, LegacyDependency, Package, PackageURL
from core.structs import Cache, URLKey
from package_managers.pkgx.db import DB
from package_managers.pkgx.parser import DependencyBlock, PkgxPackage
from package_managers.pkgx.url import generate_chai_urls


class PkgxDiff:
    def __init__(self, config: Config, caches: Cache, db: DB, logger: Logger):
        self.config = config
        self.now = datetime.now()
        self.caches = caches
        self.db = db
        self.logger = logger

    def diff_pkg(
        self, import_id: str, pkg: PkgxPackage
    ) -> tuple[UUID, Package | None, dict | None]:
        """
        Checks if the given pkg is in the package_cache.

        Returns:
          - pkg_id: the id of the package
          - package: If new, returns a new package object. If existing, returns None
          - changes: a dictionary of changes
        """
        self.logger.debug(f"Diffing package: {import_id}")

        if import_id not in self.caches.package_map:
            # new package
            p = Package(
                id=uuid4(),
                derived_id=f"pkgx/{import_id}",
                name=import_id,
                package_manager_id=self.config.pm_config.pm_id,
                import_id=import_id,
                readme="",  # NOTE: pkgx doesn't have a description field
                created_at=self.now,
                updated_at=self.now,
            )
            pkg_id: UUID = p.id
            return pkg_id, p, {}
        else:
            # the package exists, but since pkgx doesn't maintain a readme or
            # description field, we can just return
            pkg_id = self.caches.package_map[import_id].id
            return pkg_id, None, None

    def diff_url(
        self, import_id: str, pkg: PkgxPackage, new_urls: dict[URLKey, URL]
    ) -> dict[UUID, UUID]:
        """Given a package's URLs, returns the resolved URL for this specific package"""
        resolved_urls: dict[UUID, UUID] = {}

        # Generate the URLs for this package
        urls = generate_chai_urls(
            self.config, self.db, import_id, pkg.distributable[0].url, self.logger
        )

        # Process each URL
        for url_key in urls:
            # guard: generate_chai_urls could be None for a url type
            if url_key is None:
                continue

            resolved_url_id: UUID

            if url_key in new_urls:
                resolved_url_id = new_urls[url_key].id
            elif url_key in self.caches.url_map:
                resolved_url_id = self.caches.url_map[url_key].id
            else:
                self.logger.debug(
                    f"URL {url_key.url} as {url_key.url_type_id} is entirely new"
                )
                new_url = URL(
                    id=uuid4(),
                    url=url_key.url,
                    url_type_id=url_key.url_type_id,
                    created_at=self.now,
                    updated_at=self.now,
                )
                resolved_url_id = new_url.id
                new_urls[url_key] = new_url

            resolved_urls[url_key.url_type_id] = resolved_url_id

        return resolved_urls

    def diff_pkg_url(
        self, pkg_id: UUID, resolved_urls: dict[UUID, UUID]
    ) -> tuple[list[PackageURL], list[dict]]:
        """Takes in a package_id and resolved URLs from diff_url, and generates
        new PackageURL objects as well as a list of changes to existing ones"""

        new_links: list[PackageURL] = []
        updates: list[dict] = []

        # what are the existing links?
        existing: set[UUID] = {
            pu.url_id for pu in self.caches.package_urls.get(pkg_id, set())
        }

        # for each URL type/URL for this package:
        for _url_type, url_id in resolved_urls.items():
            if url_id not in existing:
                # new link!
                new_links.append(
                    PackageURL(
                        id=uuid4(),
                        package_id=pkg_id,
                        url_id=url_id,
                        created_at=self.now,
                        updated_at=self.now,
                    )
                )
            else:
                # existing link - update timestamp
                existing_pu = next(
                    pu for pu in self.caches.package_urls[pkg_id] if pu.url_id == url_id
                )
                existing_pu.updated_at = self.now
                updates.append({"id": existing_pu.id, "updated_at": self.now})

        return new_links, updates

    def diff_deps(
        self, import_id: str, pkg: PkgxPackage
    ) -> tuple[list[LegacyDependency], list[LegacyDependency]]:
        """
        Takes in a pkgx package and figures out what dependencies have changed.

        The process is:
           1. Build a view of what the package's dependencies are according to
              the parsed pkgx data, using priority-based deduplication
           2. Get this package's ID from CHAI
           3. Get this package's existing dependencies from CHAI
           4. Compare the two sets, and identify new and removed dependencies

        Note: The database has a unique constraint on (package_id, dependency_id),
        so if a package depends on the same dependency with multiple types (e.g.,
        both runtime and build), we choose the highest priority type:
        Runtime > Build > Test

        Returns:
          - new_deps: a list of new dependencies
          - removed_deps: a list of removed dependencies
        """
        new_deps: list[LegacyDependency] = []
        removed_deps: list[LegacyDependency] = []

        # First, collect all dependencies and deduplicate by dependency name
        # choosing the highest priority dependency type for each unique dependency
        dependency_map: dict[str, UUID] = {}

        # Priority order: Runtime > Build > Test
        priority_order = {
            self.config.dependency_types.runtime: 1,
            self.config.dependency_types.build: 2,
            self.config.dependency_types.test: 3,
        }

        def process_deps(dependencies: list[DependencyBlock], dep_type: UUID) -> None:
            """Helper to process dependencies of a given type with priority"""
            for dep in dependencies:
                for dep_obj in dep.dependencies:
                    if not dep_obj.name:
                        continue

                    # Get the dependency package from cache
                    dependency = self.caches.package_map.get(dep_obj.name)
                    if not dependency:
                        self.logger.warn(
                            f"{dep_obj.name}, dep of {import_id} is not in cache"
                        )
                        continue

                    # If this dependency already exists in our map, choose higher priority
                    if dep_obj.name in dependency_map:
                        existing_priority = priority_order.get(
                            dependency_map[dep_obj.name], 999
                        )
                        new_priority = priority_order.get(dep_type, 999)

                        if (
                            new_priority < existing_priority
                        ):  # Lower number = higher priority
                            old_type_id = dependency_map[dep_obj.name]
                            dependency_map[dep_obj.name] = dep_type
                            self.logger.debug(
                                f"Updated dependency type for {dep_obj.name} from "
                                f"{old_type_id} to {dep_type} (higher priority)"
                            )
                    else:
                        dependency_map[dep_obj.name] = dep_type

        # Process different types of dependencies with priority handling
        process_deps(pkg.dependencies, self.config.dependency_types.runtime)
        process_deps(pkg.build.dependencies, self.config.dependency_types.build)
        process_deps(pkg.test.dependencies, self.config.dependency_types.test)

        # Now build the actual set of dependencies with resolved types
        actual: set[tuple[UUID, UUID]] = set()
        for dep_name, dep_type in dependency_map.items():
            dependency = self.caches.package_map.get(dep_name)
            if dependency:  # Double-check it still exists
                actual.add((dependency.id, dep_type))

        # get the package ID for what we are working with
        package = self.caches.package_map.get(import_id)
        if not package:
            self.logger.warn(f"New package {import_id}, will grab its deps next time")
            return [], []

        pkg_id: UUID = package.id

        # what are its existing dependencies?
        # specifically, existing dependencies IN THE SAME STRUCTURE as `actual`,
        # so we can do an easy comparison
        existing: set[tuple[UUID, UUID]] = {
            (dep.dependency_id, dep.dependency_type_id)
            for dep in self.caches.dependencies.get(pkg_id, set())
        }

        # we have two sets!
        # actual minus existing = new_deps
        # existing minus actual = removed_deps
        new = actual - existing
        removed = existing - actual

        new_deps: list[LegacyDependency] = [
            LegacyDependency(
                package_id=pkg_id,
                dependency_id=dep[0],
                dependency_type_id=dep[1],
                created_at=self.now,
                updated_at=self.now,
            )
            for dep in new
        ]

        # get the existing legacy dependency, and add it to removed_deps
        removed_deps: list[LegacyDependency] = []
        cache_deps: set[LegacyDependency] = self.caches.dependencies.get(pkg_id, set())
        for removed_dep_id, removed_dep_type in removed:
            try:
                existing_dep = next(
                    dep
                    for dep in cache_deps
                    if dep.dependency_id == removed_dep_id
                    and dep.dependency_type_id == removed_dep_type
                )
                removed_deps.append(existing_dep)
            except StopIteration as exc:
                cache_deps_str = "\n".join(
                    [
                        f"{dep.dependency_id} / {dep.dependency_type_id}"
                        for dep in cache_deps
                    ]
                )
                raise ValueError(
                    f"Removing {removed_dep_id} / {removed_dep_type} for {pkg_id} but not in Cache: \n{cache_deps_str}"
                ) from exc

        return new_deps, removed_deps


================================================
FILE: package_managers/pkgx/loader.py
================================================
from sqlalchemy import select
from sqlalchemy.dialects.postgresql import insert as pg_insert

from core.config import Config
from core.db import DB
from core.models import (
    LegacyDependency,
    Package,
)
from package_managers.pkgx.parser import DependencyBlock
from package_managers.pkgx.transformer import Cache

BATCH_SIZE = 10000


# NOTE: this is a separate instance of the db that is used in main
class PkgxLoader(DB):
    def __init__(self, config: Config, data: dict[str, Cache]):
        super().__init__("pkgx_db")
        self.config = config
        self.data = data
        self.debug = config.exec_config.test
        self.logger.debug(f"Initialized PkgxLoader with {len(data)} packages")

    def load_packages(self) -> None:
        """
        Efficiently load all unique packages from the cache map into the database
        using bulk insertion and returning inserted IDs.
        """
        unique_packages = {}
        for key, cache in self.data.items():
            package = cache.package
            if not isinstance(package, Package):
                self.logger.error(
                    f"Invalid package object for key {key}: {type(package)}"
                )
                continue
            if package.derived_id not in unique_packages:
                unique_packages[package.derived_id] = package

        self.logger.log(f"Found {len(unique_packages)} unique packages to insert")

        package_dicts = []
        for pkg in unique_packages.values():
            try:
                package_dicts.append(pkg.to_dict())
            except Exception as e:
                self.logger.error(f"Error in to_dict for package {pkg.name}: {e!s}")

        if not package_dicts:
            self.logger.log("No packages to insert")
            return

        with self.session() as session:
            try:
                stmt = pg_insert(Package).values(package_dicts).on_conflict_do_nothing()

                # TODO: can just generate the UUID myself and provide it, so no need to
                # return
                stmt = stmt.returning(Package.id, Package.derived_id)
                self.logger.log("About to execute insert statement for packages")
                result = session.execute(stmt)
                inserted_packages = {row.derived_id: row.id for row in result}
                session.commit()
                self.logger.log(
                    f"Successfully inserted {len(inserted_packages)} packages"
                )

                missing_derived_ids = [
                    derived_id
                    for derived_id in unique_packages
                    if derived_id not in inserted_packages
                ]
                self.logger.log(
                    f"Fetching {len(missing_derived_ids)} IDs for conflicting packages"
                )

                if missing_derived_ids:
                    # Fetch missing IDs in batches
                    for i in range(0, len(missing_derived_ids), BATCH_SIZE):
                        batch_ids = missing_derived_ids[i : i + BATCH_SIZE]
                        stmt = select(Package.id, Package.derived_id).where(
                            Package.derived_id.in_(batch_ids)
                        )
                        result = session.execute(stmt)
                        for row in result:
                            inserted_packages[row.derived_id] = row.id

                updated_count = 0
                for cache in self.data.values():
                    if cache.package.derived_id in inserted_packages:
                        cache.package.id = inserted_packages[cache.package.derived_id]
                        updated_count += 1
                self.logger.log(f"Updated cache with IDs for {updated_count} packages")

            except Exception as e:
                self.logger.error(f"Error inserting packages: {e!s}")
                self.logger.error(f"Error type: {type(e)}")
                raise

    def load_dependencies(self) -> None:
        """
        Load all dependencies into the LegacyDependency table.
        This requires package IDs to be loaded first.
        # FIXME: legacy dependencies are package to package relationships.
        # A migration is needed to move all dependencies to the LegacyDependency structure.
        """
        self.logger.log("Starting to load legacy dependencies")

        legacy_dependency_dicts = []
        missing = set()

        for key, cache in self.data.items():
            # Ensure the main package has an ID
            if not hasattr(cache.package, "id") or cache.package.id is None:
                self.logger.warn(
                    f"Package {key} has no ID when loading dependencies, skipping"
                )
                continue
            package_id = cache.package.id

            # Helper to process a list of dependency names for a given type
            def process_deps(
                dep_blocks: list[DependencyBlock],
                dep_type_id: str,
                key=key,
                package_id=package_id,
            ):
                for dep_block in dep_blocks:
                    # TODO: do we need to use this?
                    for dep in dep_block.dependencies:
                        dep_name = dep.name
                        dep_semver = dep.semver

                        # Find the dependency package in our cache
                        dep_cache = self.data.get(dep_name)
                        if not dep_cache:
                            missing.add(dep_name)
                            continue

                        # Checks: has to have an ID
                        if (
                            not hasattr(dep_cache.package, "id")
                            or dep_cache.package.id is None
                        ):
                            self.logger.warn(
                                f"Dependency package '{dep_name}' has no ID, skipping linkage for '{key}'"
                            )
                            continue
                        dependency_id = dep_cache.package.id

                        # Append data for bulk insert
                        legacy_dependency_dicts.append(
                            {
                                "package_id": package_id,
                                "dependency_id": dependency_id,
                                "dependency_type_id": dep_type_id,
                                "semver_range": dep_semver,
                            }
                        )

            # Process each dependency type
            process_deps(cache.dependencies.build, self.config.dependency_types.build)
            process_deps(cache.dependencies.test, self.config.dependency_types.test)
            process_deps(
                cache.dependencies.dependencies, self.config.dependency_types.runtime
            )

        self.logger.log(
            f"Found {len(legacy_dependency_dicts)} legacy dependencies to insert"
        )

        if missing:
            self.logger.warn(f"{len(missing)} pkgs are deps, but have no pkgx.yaml")
            self.logger.warn(f"Missing pkgs: {missing}")

        if not legacy_dependency_dicts:
            self.logger.log("No legacy dependencies to insert")
            return

        # Bulk insert legacy dependencies
        with self.session() as session:
            try:
                for i in range(0, len(legacy_dependency_dicts), BATCH_SIZE):
                    batch = legacy_dependency_dicts[i : i + BATCH_SIZE]
                    self.logger.log(
                        f"Processing LegacyDependency batch {i // BATCH_SIZE + 1}/{(len(legacy_dependency_dicts) - 1) // BATCH_SIZE + 1} ({len(batch)} links)"
                    )
                    stmt = (
                        pg_insert(LegacyDependency)
                        .values(batch)
                        .on_conflict_do_nothing()
                    )
                    session.execute(stmt)
                session.commit()
                self.logger.log("Successfully inserted all pkgx dependencies")

            except Exception as e:
                self.logger.error(f"Error inserting legacy dependencies: {e!s}")
                self.logger.error(f"Error type: {type(e)}")
                raise


================================================
FILE: package_managers/pkgx/main.py
================================================
#!/usr/bin/env pkgx +python@3.11 uv run

import os
import time
from datetime import datetime
from uuid import UUID

from core.config import Config, PackageManager
from core.fetcher import GitFetcher
from core.logger import Logger
from core.models import URL, LegacyDependency, Package, PackageURL
from core.scheduler import Scheduler
from core.structs import Cache, URLKey
from package_managers.pkgx.db import PkgxDB
from package_managers.pkgx.diff import PkgxDiff
from package_managers.pkgx.parser import PkgxParser

logger = Logger("pkgx")

SCHEDULER_ENABLED = os.getenv("ENABLE_SCHEDULER", "true").lower() == "true"
BATCH_SIZE = 500
PROJECTS_DIR = "projects"
PACKAGE_FILE = "package.yml"


def fetch(config: Config) -> GitFetcher:
    should_fetch = config.exec_config.fetch
    fetcher = GitFetcher(
        "pkgx",
        config.pm_config.source,
        config.exec_config.no_cache,
        config.exec_config.test,
    )

    if should_fetch:
        logger.debug("Starting Pkgx package fetch")
        fetcher.fetch()
    else:  # symlink would still be updated
        logger.log("Fetching disabled, skipping fetch")

    # if no_cache is on, we'll delete stuff from here
    return fetcher


def run_pipeline(config: Config, db: PkgxDB):
    """A diff-based approach to loading pkgx data into CHAI"""

    fetcher = fetch(config)
    output_dir = f"{fetcher.output}/latest"

    # Parse all packages
    pkgx_parser = PkgxParser(output_dir)
    packages = list(pkgx_parser.parse_packages())

    logger.log(f"Parsed {len(packages)} packages")

    # Set up cache
    db.set_current_graph()
    db.set_current_urls()
    logger.log("Set current URLs")

    # Build cache for differential loading
    cache = Cache(
        db.graph.package_map,
        db.urls.url_map,
        db.urls.package_urls,
        db.graph.dependencies,
    )

    # Initialize differential loading collections
    new_packages: list[Package] = []
    new_urls: dict[URLKey, URL] = {}
    new_package_urls: list[PackageURL] = []
    updated_packages: list[dict[str, UUID | str | datetime]] = []
    updated_package_urls: list[dict[str, UUID | datetime]] = []
    new_deps: list[LegacyDependency] = []
    removed_deps: list[LegacyDependency] = []

    # Create diff processor
    diff = PkgxDiff(config, cache, db, logger)

    # Process each package
    for i, (pkg_data, import_id) in enumerate(packages):
        # Diff the package
        pkg_id, pkg_obj, update_payload = diff.diff_pkg(import_id, pkg_data)

        if pkg_obj:
            logger.debug(f"New package: {pkg_obj.name}")
            new_packages.append(pkg_obj)
        if update_payload:
            logger.debug(f"Updated package: {update_payload['id']}")
            updated_packages.append(update_payload)

        # Diff URLs (resolved_urls is map of url types to final URL ID)
        resolved_urls = diff.diff_url(import_id, pkg_data, new_urls)

        # Diff package URLs
        new_links, updated_links = diff.diff_pkg_url(pkg_id, resolved_urls)
        if new_links:
            logger.debug(f"New package URLs: {len(new_links)}")
            new_package_urls.extend(new_links)
        if updated_links:
            updated_package_urls.extend(updated_links)

        # Diff dependencies
        new_dependencies, removed_dependencies = diff.diff_deps(import_id, pkg_data)
        if new_dependencies:
            logger.debug(f"New dependencies: {len(new_dependencies)}")
            new_deps.extend(new_dependencies)
        if removed_dependencies:
            logger.debug(f"Removed dependencies: {len(removed_dependencies)}")
            removed_deps.extend(removed_dependencies)

        if config.exec_config.test and i > 10:
            break

    # Convert new_urls dict to list for ingestion
    final_new_urls = list(new_urls.values())

    # Ingest all diffs
    db.ingest(
        new_packages,
        final_new_urls,
        new_package_urls,
        new_deps,
        removed_deps,
        updated_packages,
        updated_package_urls,
    )

    if config.exec_config.no_cache:
        fetcher.cleanup()


def main():
    logger.log("Initializing Pkgx package manager")
    config = Config(PackageManager.PKGX)
    db = PkgxDB("pkgx_main_db_logger", config)
    logger.debug(f"Using config: {config}")

    if SCHEDULER_ENABLED:
        logger.log("Scheduler enabled. Starting schedule.")
        scheduler = Scheduler("pkgx")
        scheduler.start(run_pipeline, config)

        # run immediately as well when scheduling
        scheduler.run_now(run_pipeline, config, db)

        # keep the main thread alive for scheduler
        try:
            while True:
                time.sleep(3600)
        except KeyboardInterrupt:
            scheduler.stop()
            logger.log("Scheduler stopped.")
    else:
        logger.log("Scheduler disabled. Running pipeline once.")
        run_pipeline(config, db)
        logger.log("Pipeline finished.")


if __name__ == "__main__":
    main()


================================================
FILE: package_managers/pkgx/parser.py
================================================
from collections.abc import Iterator
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any

import yaml

from core.logger import Logger
from core.utils import convert_keys_to_snake_case

logger = Logger("pkgx")
PROJECTS_DIR = "projects"
PACKAGE_FILE = "package.yml"

# IMPORTANT:
# the package.yml maintains a warnings list, which sometimes contain "vendored"
# this correlates to Homebrew's casks, and CHAI ignores them


# structures
# this enables everything, but we don't need all of it right now
@dataclass
class Distributable:
    url: str
    strip_components: int | None = field(default=None)
    ref: str | None = field(default=None)
    sig: str | None = field(default=None)
    sha: str | None = field(default=None)


@dataclass
class Version:
    github: str | None = field(default=None)  # (user)?(/tags/releases)
    gitlab: str | None = field(default=None)  # (user|project)?(/tags/releases)
    url: str | None = field(default=None)  # for non github projects
    match: str | None = field(default=None)  # regex to match the version
    strip: str | None = field(default=None)  # regex to strip the version
    ignore: str | None = field(default=None)  # regex to ignore the version
    versions: list[str] | None = field(default=None)  # list of versions
    npm: str | None = field(default=None)  # npm package name
    transform: str | None = field(default=None)  # regex to transform the version
    stripe: str | None = field(default=None)  # not sure what this is


@dataclass
class Dependency:
    name: str
    semver: str


@dataclass
class EnvironmentVariable:
    name: str
    value: str | list[str]


@dataclass
class DependencyBlock:
    platform: str  # 'all', 'linux', 'darwin', etc.
    dependencies: list[Dependency]


@dataclass
class Build:
    script: str
    dependencies: list[DependencyBlock] = field(default_factory=list)
    env: list[EnvironmentVariable] = field(default_factory=list)
    working_directory: str | None = field(default=None)


@dataclass
class Test:
    script: str
    dependencies: list[DependencyBlock] = field(default_factory=list)
    env: list[EnvironmentVariable] = field(default_factory=list)
    fixture: str | None = field(default=None)


@dataclass
class PkgxPackage:
    distributable: list[Distributable]
    versions: Version
    build: Build | None = field(default=None)
    test: Test | None = field(default=None)
    # provides: list[str] = field(default_factory=list)  # all cli commands provided
    # platforms: list[str] = field(
    #     default_factory=list
    # )  # darwin, linux/x64, linux/arm64, etc.
    # Store a list of dependency blocks, each specifying a platform and its deps
    dependencies: list[DependencyBlock] = field(default_factory=list)


# Pkgx Parser can look at the pantry and yield a dictionary of information in the YAML
class PkgxParser:
    def __init__(self, repo_path: str):
        self.repo_path = repo_path

    def find_package_yamls(self) -> Iterator[tuple[Path, str]]:
        """Finds all package.yml files within the projects directory."""
        projects_path = Path(self.repo_path) / PROJECTS_DIR
        if not projects_path.is_dir():
            logger.error(f"Projects directory not found at: {projects_path}")
            return

        logger.debug(f"Searching for {PACKAGE_FILE} in {projects_path}...")
        count = 0
        for yaml_path in projects_path.rglob(PACKAGE_FILE):
            if yaml_path.is_file():
                # Calculate relative path for project identifier
                relative_path = yaml_path.parent.relative_to(projects_path)
                project_identifier = str(relative_path)
                yield yaml_path, project_identifier
                count += 1
        logger.debug(f"Found {count} {PACKAGE_FILE} files.")

    def is_vendored(self, data: dict[str, Any]) -> bool:
        """Checks if the package is vendored."""
        if "warnings" in data:
            warnings = data.get("warnings", [])
            if "vendored" in warnings:
                return True
        return False

    def parse_package_yaml(self, file_path: Path) -> PkgxPackage | None:
        """Parses a single package.yaml file."""
        try:
            with open(file_path) as f:
                data = yaml.safe_load(f)
                if not isinstance(data, dict):
                    logger.warn(
                        f"Expected dict, got {type(data).__name__} in {file_path}"
                    )
                    return None

                # check if the package is vendored
                if self.is_vendored(data):
                    return None

                pkgx_package = self.map_package_yaml_to_pkgx_package(
                    data, str(file_path)
                )
                return pkgx_package
        except yaml.YAMLError as e:
            logger.error(f"Error parsing YAML file {file_path}: {e}")
            return None
        except Exception as e:
            logger.error(f"Error reading file {file_path}: {e}")
            raise e
            return None

    def parse_packages(self) -> Iterator[tuple[PkgxPackage, str]]:
        """Parses all package.yml files found in the repository."""
        for yaml_path, project_identifier in self.find_package_yamls():
            parsed_data = self.parse_package_yaml(yaml_path)
            if parsed_data:
                yield parsed_data, project_identifier

    def _parse_dependency_list(
        self, deps_data: Any, context: str
    ) -> list[DependencyBlock]:
        """Parses a dependency dictionary into a list of DependencyBlock objects."""
        if not isinstance(deps_data, dict):
            # For now, assume empty dict means no deps, but non-dict is error.
            if deps_data is None or deps_data == {}:
                return []
            dep_type = type(deps_data).__name__
            raise TypeError(
                f"Expected dependencies to be a dict in {context}, got {dep_type}"
            )

        dependency_blocks = []
        direct_deps = []

        for key, value in deps_data.items():
            # Platform-specific block
            if isinstance(value, dict):
                platform = key
                platform_deps = []
                for dep_name, semver in value.items():
                    if isinstance(semver, str):
                        platform_deps.append(Dependency(name=dep_name, semver=semver))
                    elif isinstance(semver, int | float):
                        platform_deps.append(
                            Dependency(name=dep_name, semver=str(semver))
                        )
                    else:
                        raise TypeError(
                            f"Unexpected semver type for {dep_name} under platform {platform} in {context}: {type(semver).__name__}"
                        )
                if platform_deps:
                    dependency_blocks.append(
                        DependencyBlock(platform=platform, dependencies=platform_deps)
                    )
                # else: empty platform block is ignored

            # Direct dependency declaration
            elif isinstance(value, str):
                dep_name = key
                semver = value
                direct_deps.append(Dependency(name=dep_name, semver=semver))

            # Direct declaration, but sometimes the semvers are exact
            elif isinstance(value, int | float):
                dep_name = key
                semver = str(value)
                direct_deps.append(Dependency(name=dep_name, semver=semver))

            # Invalid structure
            else:
                raise TypeError(
                    f"Unexpected dependency value type for key '{key}' in {context}: {type(value).__name__}. Expected dict or str or float."
                )

        # Add all direct dependencies under the 'all' platform
        if direct_deps:
            dependency_blocks.append(
                DependencyBlock(platform="all", dependencies=direct_deps)
            )

        return dependency_blocks

    def _parse_build_section(self, build_data: Any, file_path_str: str) -> Build:
        """Parses the build section if its a dict, list, or str"""
        if isinstance(build_data, dict):
            # Pass original dependencies dict, don't convert keys here
            build_deps_list = self._parse_dependency_list(
                build_data.get("dependencies"), f"build section of {file_path_str}"
            )
            # Convert env var keys just before instantiation
            build_env = [
                EnvironmentVariable(**convert_keys_to_snake_case(env))
                for env in build_data.get("env", [])
                if isinstance(env, dict)
            ]
            # Convert build_data keys just before creating Build object
            build_kwargs = convert_keys_to_snake_case(build_data)
            return Build(
                script=build_kwargs.get("script", ""),
                dependencies=build_deps_list,  # Use the originally parsed list
                env=build_env,
                working_directory=build_kwargs.get("working_directory"),
            )
        elif isinstance(build_data, list):
            # Generally, it's a list of build commands, so we only have script info
            # TODO: Potentially improve handling of list-based build data
            script = (
                build_data[0] if build_data and isinstance(build_data[0], str) else ""
            )
            return Build(
                script=script,
                dependencies=[],
                env=[],
                working_directory=None,
            )
        elif isinstance(build_data, str):
            return Build(
                script=build_data,
                dependencies=[],
                env=[],
                working_directory=None,
            )
        else:
            build_type = type(build_data).__name__
            raise TypeError(f"Build in {file_path_str} is {build_type}")

    def _parse_test_section(self, test_data: Any, file_path_str: str) -> Test:
        """Parses the test section if its a dict, list, or str"""
        if isinstance(test_data, dict):
            # Pass original dependencies dict
            test_deps_list = self._parse_dependency_list(
                test_data.get("dependencies"), f"test section of {file_path_str}"
            )
            # Convert env var keys just before instantiation
            test_env = [
                EnvironmentVariable(**convert_keys_to_snake_case(env))
                for env in test_data.get("env", [])
                if isinstance(env, dict)
            ]
            # Convert test_data keys just before creating Test object
            test_kwargs = convert_keys_to_snake_case(test_data)
            return Test(
                script=test_kwargs.get("script", ""),
                dependencies=test_deps_list,  # Use the originally parsed list
                env=test_env,
                fixture=test_kwargs.get("fixture"),
            )
        elif isinstance(test_data, list):
            # TODO: Clarify how to handle list-based test data. Assuming empty for now.
            return Test(script="", dependencies=[], env=[], fixture=None)
        elif isinstance(test_data, str):
            # Assuming string directly means the script
            return Test(script=test_data, dependencies=[], env=[], fixture=None)
        elif isinstance(test_data, bool):
            # bad tests are sometimes just true/false
            return Test(script=str(test_data), dependencies=[], env=[], fixture=None)
        else:
            test_type = type(test_data).__name__
            raise TypeError(f"Test for {file_path_str} is {test_type}")

    def _parse_versions_section(
        self, versions_data: Any, file_path_str: str
    ) -> Version:
        """Parses the versions section if its a list, dict, or None"""
        if isinstance(versions_data, list):
            # list of version strings (nums)
            return Version(versions=versions_data)
        elif isinstance(versions_data, dict):
            # github or gitlab...something useful
            # Convert keys just before creating Version object
            return Version(**convert_keys_to_snake_case(versions_data))
        elif versions_data is None:
            # Handle case where versions might be missing, return default empty
            logger.warn(f"Missing 'versions' section in {file_path_str} using default.")
            return Version()
        else:
            version_type = type(versions_data).__name__
            raise TypeError(f"Versions in {file_path_str} is {version_type}")

    def _parse_distributable_section(
        self, distributable_data: Any, file_path_str: str
    ) -> Distributable | list[Distributable]:
        """Parses the distributable section from the package data."""
        if isinstance(distributable_data, list):
            # Convert keys for each dict in the list before creating Distributable
            return [
                Distributable(**convert_keys_to_snake_case(d))
                for d in distributable_data
                if isinstance(d, dict)
            ]
        elif isinstance(distributable_data, dict):
            # Convert keys just before creating Distributable object
            return [Distributable(**convert_keys_to_snake_case(distributable_data))]
        elif distributable_data is None:
            return [Distributable(url="~")]
        else:
            distributable_type = type(distributable_data).__name__
            raise TypeError(f"Distributable in {file_path_str} is {distributable_type}")

    def map_package_yaml_to_pkgx_package(
        self, data: dict[str, Any], file_path_str: str
    ) -> PkgxPackage:
        """Maps a package.yml to a PkgxPackage."""
        # Keep the original data, do not normalize globally here
        # normalized_data = convert_keys_to_snake_case(data)

        # Parse sections using helper functions, passing original data segments
        build_data = data.get("build")
        build_obj = self._parse_build_section(build_data, file_path_str)

        test_data = data.get("test")
        test_obj = self._parse_test_section(test_data, file_path_str)

        versions_data = data.get("versions")
        versions_obj = self._parse_versions_section(versions_data, file_path_str)

        distributable_data = data.get("distributable")
        distributable_obj = self._parse_distributable_section(
            distributable_data, file_path_str
        )

        # Parse top-level dependencies using original keys
        dependencies_data = data.get("dependencies")
        top_level_deps_list = self._parse_dependency_list(
            dependencies_data, f"top-level of {file_path_str}"
        )

        # TODO: Implement parsing for 'provides' list
        # would be useful because we have the set of "names" / "commands" for it!
        # provides_data = data.get("provides")
        # provides_obj = self._parse_provides_section(provides_data, file_path_str)

        # TODO: Implement parsing for 'platforms' list
        # platforms_data = data.get("platforms")
        # platforms_obj = self._parse_platforms_section(platforms_data, file_path_str)

        # Note: PkgxPackage itself doesn't directly take snake_case kwargs from top level
        # Its arguments are constructed from the parsed objects.
        return PkgxPackage(
            distributable=distributable_obj,
            versions=versions_obj,
            dependencies=top_level_deps_list,
            build=build_obj,
            test=test_obj,
            # provides=provides,
            # platforms=platforms,
        )


================================================
FILE: package_managers/pkgx/url.py
================================================
import re
from uuid import UUID

from permalint import normalize_url, possible_names
from requests import Response, get

from core.config import Config
from core.logger import Logger
from core.structs import URLKey
from core.utils import is_github_url
from package_managers.pkgx.db import DB

HOMEPAGE_URL = "https://pkgx.dev/pkgs/{name}.json"


def canonicalize(url: str) -> str:
    return normalize_url(url)


def guess(db_client: DB, package_managers: list[UUID], url: str) -> list[str]:
    names = possible_names(url)
    urls = db_client.search_names(names, package_managers)
    return urls


def ask_pkgx(import_id: str) -> str | None:
    """
    ask max's scraping work for the homepage of a package
    Homepage comes from the pkgxdev/www repo
    The API https://pkgx.dev/pkgs/{name}.json returns a blob which may contain
    the homepage field
    """
    response: Response = get(HOMEPAGE_URL.format(name=import_id))
    if response.status_code == 200:
        data: dict[str, str] = response.json()
        if "homepage" in data:
            return data["homepage"]


def special_case(import_id: str, logger: Logger) -> str | None:
    homepage: str | None = None

    # if no slashes, then pkgx used the homepage as the name
    # if two slashes, then probably github / gitlab
    if not re.search(r"/", import_id) or re.search(r"/.+/", import_id):
        homepage = import_id

    # if it's a crates.io package, then we can use the crates URL
    elif re.search(r"^crates.io", import_id):
        if "/" in import_id:
            name = import_id.split("/")[1]
            homepage = f"https://crates.io/crates/{name}"
        else:
            logger.warn(f"Invalid format for crates.io import_id: {import_id}")

    # if it's part of the x.org family
    elif re.search(r"^x.org", import_id):
        homepage = "https://x.org"

    # if it's part of the pkgx family
    elif re.search("^pkgx.sh", import_id):
        tool = import_id.split("/")[1]
        homepage = f"https://github.com/pkgxdev/{tool}"

    # python.org/typing_extensions
    elif import_id == "python.org/typing_extensions":
        homepage = "https://github.com/python/typing_extensions"

    # thrysoee.dk/editline
    elif import_id == "thrysoee.dk/editline":
        homepage = "https://thrysoee.dk/editline"

    # gen-ir is a Homebrew Tap, which lists this as its homepage
    elif import_id == "veracode.com/gen-ir":
        homepage = "https://github.com/veracode/gen-ir"

    else:
        logger.warn(f"no homepage in pkgx for {import_id}")

    return homepage


def generate_chai_urls(
    config: Config, db: DB, import_id: str, distributable_url: str, logger: Logger
) -> list[URLKey]:
    """For a pkgx import_id, generate a list of URLs it could have"""
    urls: list[URLKey] = []

    # homepage
    similar = [config.package_managers.debian, config.package_managers.homebrew]
    maybe: list[str] = guess(db, similar, import_id)

    if maybe:
        homepage = maybe[0]
    else:
        homepage = ask_pkgx(import_id)

        if not homepage:
            homepage = special_case(import_id, logger)

    if homepage:
        canonical_homepage = canonicalize(homepage)
        urls.append(URLKey(canonical_homepage, config.url_types.homepage))

    # source
    # NOTE: for non-GitHub source URLs, pkgx tells you where the version string for the
    # downloadable tarball is...right now, we don't do anything about that
    canonical_distributable = canonicalize(distributable_url)
    urls.append(URLKey(canonical_distributable, config.url_types.source))

    if is_github_url(canonical_distributable):
        urls.append(URLKey(canonical_distributable, config.url_types.repository))

    return urls


================================================
FILE: pkgx.yaml
================================================
# this is the pkgx config across all the services covered by docker-compose
dependencies:
  python.org: ~3.11
  xcfile.dev: 0
  cli.github.com: 2
  astral.sh/uv: 0
  postgresql.org: 16
  docker.com/compose: 2
  alembic.sqlalchemy.org: 1
  psycopg.org/psycopg2: 2


================================================
FILE: pyproject.toml
================================================
[project]
name = "chai"
version = "1.0.0"
description = "An open-source data pipeline for all package managers"
authors = [
    { name = "Sanchit Ram Arvind", email = "sanchitram@gmail.com" },
    { name = "Jacob Heider", email = "jhheider@pkgx.dev" },
]
keywords = ["data", "pipeline"]
readme = "README.md"
requires-python = ">= 3.11"
dependencies = []

[project.urls]
Homepage = "https://github.com/teaxyz/chai"
Repository = "https://github.com/teaxyz/chai"
Source = "https://github.com/teaxyz/chai"

[tool.uv]
managed = true

[tool.pytest.ini_options]
pythonpath = ["."]
minversion = "8.0"
python_files = ["test_*.py"]
python_classes = ["Test*"]
python_functions = ["test_"]
addopts = ["-ra", "--strict-markers", "--disable-warnings", "--tb=short"]
markers = ["unit"]

[tool.ruff]
line-length = 88
exclude = ["__pycache__", ".venv", ".git", ".pytest_cache"]

[tool.ruff.lint]
select = [
    "E",   # pycodestyle (error)
    "F",   # pyflakes
    "B",   # bugbear
    "B9",
    "C4",  # flake8-comprehensions
    "SIM", # flake8-simplify
    "I",   # isort
    "UP",  # pyupgrade
    "PIE", # flake8-pie
    "PGH", # pygrep-hooks
    "PYI", # flake8-pyi
    "RUF",
]
ignore = [
    # leave it to the formatter to split long lines and
    # the judgement of all of us.
    "E501",
]
fixable = ["ALL"]

[tool.ruff.format]
quote-style = "double"
indent-style = "space"

[dependency-groups]
dev = [
    "pytest>=8.4.0",
    "pytest-cov>=6.2.1",
    "ruff>=0.11.13",
    "testing-postgresql>=1.3.0",
]
indexers = [
    "alembic==1.13.2",
    "certifi>=2025.4.26",
    "charset-normalizer>=3.4.2",
    "gitpython>=3.1.44",
    "idna>=3.10",
    "permalint>=0.1.15",
    "psycopg2-binary==2.9.10",
    "pyyaml>=6.0.2",
    "requests>=2.32.4",
    "schedule>=1.2.2",
    "sqlalchemy>=2.0.41",
    "urllib3>=2.4.0",
]
ranker = ["numpy>=2.3.0", "rustworkx>=0.16.0"]


================================================
FILE: ranker/.dockerignore
================================================
prompts/

================================================
FILE: ranker/.gitignore
================================================
prompts/

================================================
FILE: ranker/Dockerfile
================================================
FROM python:3.11

# Copy everything
COPY . . 

# Install core requirements 
WORKDIR /core 
RUN pip install --no-cache-dir -r requirements.txt 

# Install ranker requirements 
WORKDIR /ranker 
RUN pip install --no-cache-dir -r requirements.txt 

# Command to run the application
CMD ["python", "/ranker/main.py"] 

================================================
FILE: ranker/README.md
================================================
# ranker

generates a deduplicated graph across all CHAI package managers by URL, and publishes a
tea_rank

## Requirements

1. [pkgx](pkgx.sh)
2. [uv](astral.sh/uv)

## Deduplication (`dedupe.py`)

`dedupe.py` handles the deduplication of packages based on their homepage URLs. It
ensures that packages sharing the same canonical homepage URL are grouped together.

**Process:**

1.  **Fetch Existing State:** Retrieves all current canonical homepage URLs, their
    associated packages from the `canons`, `canon_packages`, and `package_urls` tables
2.  **Determine Latest URLs:** Identifies the most recent URL
3.  **Diff:** Identify new canons, new canon_packages, canon_packages to update
4.  **Ingest:** Create new canons and new links if necessary, update existing ones

This process is idempotent, meaning running it multiple times converges to the same
correct state based on the latest available package URL data.

### Getting started

1. You need `CHAI_DATABASE_URL` setup, and the CHAI db running
2. With pkgx:

   ```bash
   chmod +x ranker/dedupe.py
   PYTHONPATH=. LOAD=0 ranker/dedupe.py
   ```

   You can toggle LOAD to do a dry-run, where it will tell you what it's about to do
   without loading any information

## Ranking

- [ ] Add a description here

## Usage

### With pkgx

```bash
chmod +x dedupe.py
./main.py
```

### Without pkgx

```bash
uv run main.py
```

## Docker

This service can be run inside a Docker container. The container assumes that the `core`
library is available and that the `CHAI_DATABASE_URL` environment variable is set to
point to the database.

**Building the Image:**

From the root of the `chai-oss` repository:

```bash
docker build -t chai-ranker -f ranker/Dockerfile .
```

**Running the Container:**

Make sure to provide the database connection string via the `CHAI_DATABASE_URL`
environment variable:

```bash
docker run --rm -e CHAI_DATABASE_URL=postgresql://postgres:s3cr3t@localhost:5435/chai chai-ranker
```

The container will execute `dedupe.py` followed by `main.py` and exit with code 0 on
success or a non-zero code on failure.


================================================
FILE: ranker/config.py
================================================
from dataclasses import dataclass
from decimal import Decimal, getcontext
from uuid import UUID

from sqlalchemy import func

from core.db import DB
from core.logger import Logger
from core.models import Canon, CanonPackage, Package, PackageManager, Source, URLType
from core.utils import env_vars

logger = Logger("graph.config")
SYSTEM_PACKAGE_MANAGERS = ["homebrew", "debian", "pkgx"]

# setup decimal
getcontext().prec = 9
getcontext().rounding = "ROUND_HALF_UP"


class ConfigDB(DB):
    def __init__(self):
        super().__init__("graph.config::db")

    def get_homepage_url_type_id(self) -> UUID:
        with self.session() as session:
            result = (
                session.query(URLType.id).filter(URLType.name == "homepage").scalar()
            )
            if result is None:
                raise ValueError("homepage url type not found")
            return result

    def get_npm_pm_id(self) -> UUID:
        return self.get_pm_id_by_name("npm")[0][0]

    def get_canons_with_source_types(
        self, source_types: list[str]
    ) -> list[tuple[UUID, list[str]]]:
        with self.session() as session:
            return (
                session.query(
                    Canon.id, func.array_agg(Source.type).label("source_types")
                )
                .join(CanonPackage, Canon.id == CanonPackage.canon_id)
                .join(Package, CanonPackage.package_id == Package.id)
                .join(PackageManager, Package.package_manager_id == PackageManager.id)
                .join(Source, PackageManager.source_id == Source.id)
                .filter(Source.type.in_(source_types))
                .group_by(Canon.id)
                .all()
            )

    def get_pm_id_by_name(self, name: str | list[str]) -> UUID:
        if isinstance(name, str):
            name = [name]

        with self.session() as session:
            result = (
                session.query(PackageManager.id)
                .join(Source, PackageManager.source_id == Source.id)
                .filter(Source.type.in_(name))
                .all()
            )
            if result is None:
                raise ValueError(f"package manager {name} not found")
            return result


class TeaRankConfig:
    def __init__(self, db: ConfigDB) -> None:
        self.db = db
        self.favorites: dict[str, Decimal] = {}
        self.weights: dict[UUID, Decimal] = {}
        self.personalization: dict[UUID, Decimal] = {}
        self.map_favorites(SYSTEM_PACKAGE_MANAGERS)

    alpha: Decimal = Decimal("0.85")
    split_ratio: Decimal = Decimal("0.5")
    tol: Decimal = Decimal("1e-6")
    max_iter: int = 1000000

    def map_favorites(self, package_managers: list[str]) -> None:
        for pm in package_managers:
            match pm:
                case "homebrew":
                    pm_id = self.db.get_pm_id_by_name("homebrew")[0][0]
                    self.favorites[pm_id] = Decimal("0.3")
                case "debian":
                    pm_id = self.db.get_pm_id_by_name("debian")[0][0]
                    self.favorites[pm_id] = Decimal("0.6")
                case "pkgx":
                    pm_id = self.db.get_pm_id_by_name("pkgx")[0][0]
                    self.favorites[pm_id] = Decimal("0.1")
                case _:
                    raise ValueError(f"Unknown system package manager: {pm}")

    def personalize(
        self, canons_with_source_types: list[tuple[UUID, list[str]]]
    ) -> None:
        """Adjust canon weights proportionally to the sum of `favorites` in their
        associated package managers, normalized to total 1."""

        def coefficient(source_types: list[str]) -> Decimal:
            return sum(self.favorites[source_type] for source_type in source_types)

        # calculate raw weights for each canon based on favorites
        raw_weights = {}
        total = Decimal(0)
        for canon_id, package_manager_ids in canons_with_source_types:
            # make source_types a set to deduplicate
            source_types = set(package_manager_ids)

            # sum the weights for all package managers this canon appears in
            weight = coefficient(source_types)
            raw_weights[canon_id] = weight
            total += weight

        constant = Decimal(1) / total

        for canon_id, weight in raw_weights.items():
            self.personalization[canon_id] = weight * constant

        logger.debug(f"Personalization sum: {sum(self.personalization.values())}")

    def __str__(self) -> str:
        return f"TeaRankConfig(alpha={self.alpha}, favorites={self.favorites}, weights={len(self.weights)}, personalization={len(self.personalization)})"  # E501


class PMConfig:
    def __init__(self, db: ConfigDB) -> None:
        self.db = db
        self.npm_pm_id = self.db.get_npm_pm_id()
        self.system_pm_ids = [
            id[0] for id in self.db.get_pm_id_by_name(SYSTEM_PACKAGE_MANAGERS)
        ]

    # TODO: we'll add PyPI, rubygems from when we load with legacy data

    def __str__(self) -> str:
        return (
            f"PMConfig(npm_pm_id={self.npm_pm_id}, system_pm_ids={self.system_pm_ids})"
        )


class URLTypes:
    def __init__(self, db: ConfigDB) -> None:
        self.db = db
        self.homepage_url_type_id = self.db.get_homepage_url_type_id()

    def __str__(self) -> str:
        return f"URLTypes(homepage_url_type_id={self.homepage_url_type_id})"


class DedupeConfig:
    def __init__(self, db: ConfigDB) -> None:
        self.homepage_url_type_id = db.get_homepage_url_type_id()
        self.load = env_vars("LOAD", "true")

    def __str__(self) -> str:
        return f"DedupeConfig(homepage_url_type_id={self.homepage_url_type_id}, load={self.load})"  # E501


@dataclass
class Config:
    def __init__(self, db: ConfigDB) -> None:
        self.db = db
        self.tearank_config = TeaRankConfig(db=db)
        self.pm_config = PMConfig(db=db)
        self.url_types = URLTypes(db=db)

    def __str__(self) -> str:
        return f"Config(tearank_config={self.tearank_config}, pm_config={self.pm_config}, url_types={self.url_types})"  # E501


def load_config() -> Config:
    logger.debug("Loading config")
    return Config(db=ConfigDB())


def load_dedupe_config() -> DedupeConfig:
    return DedupeConfig(db=ConfigDB())


================================================
FILE: ranker/db.py
================================================
from uuid import UUID

from sqlalchemy.dialects.postgresql import insert as pg_insert

from core.db import DB
from core.models import (
    URL,
    Canon,
    CanonPackage,
    DependsOn,
    LegacyDependency,
    Package,
    PackageURL,
    TeaRank,
    TeaRankRun,
    URLType,
    Version,
)

BATCH_SIZE = 20000


class GraphDB(DB):
    def __init__(self, legacy_pm_id: UUID, system_pm_ids: list[UUID]):
        super().__init__("graph.db")
        self.legacy_pm_id = legacy_pm_id
        self.system_pm_ids = system_pm_ids

    def is_canon_populated(self) -> bool:
        with self.session() as session:
            return session.query(Canon).count() > 0

    def is_canon_package_populated(self) -> bool:
        with self.session() as session:
            return session.query(CanonPackage).count() > 0

    def get_all_canons(self) -> dict[str, UUID]:
        """Fetch all existing canons as a map from URL to Canon ID."""
        with self.session() as session:
            results = session.query(Canon.url, Canon.id).all()
            return dict(results)

    def get_packages_with_urls(self) -> list[tuple[UUID, str, str, str]]:
        """
        Retrieve packages with their associated URLs and URL types.

        Returns:
            List of tuples containing id, name, and url
        """
        with self.session() as session:
            return (
                session.query(Package.id, Package.name, URL.url, URL.created_at)
                .join(PackageURL, Package.id == PackageURL.package_id)
                .join(URL, PackageURL.url_id == URL.id)
                .join(URLType, URL.url_type_id == URLType.id)
                .where(URLType.name == "homepage")  # we're deduplicating on homepage
                .order_by(URL.created_at.desc())
                .all()
            )

    def load_canonical_packages(self, data: list[Canon]) -> None:
        """
        Load canonical packages into the database in batches, handling conflicts.

        Args:
            data: List of Canon objects.
        """
        with self.session() as session:
            for i in range(0, len(data), BATCH_SIZE):
                batch = data[i : i + BATCH_SIZE]
                if not batch:
                    continue

                # Convert batch objects to dictionaries for insert statement
                insert_data = [
                    {"id": item.id, "url": item.url, "name": item.name}
                    for item in batch
                ]

                stmt = pg_insert(Canon).values(insert_data)
                stmt = stmt.on_conflict_do_nothing(index_elements=["url"])

                if stmt is not None:
                    session.execute(stmt)

                # log
                batch_number = (i // BATCH_SIZE) + 1
                total_batches = (len(data) + BATCH_SIZE - 1) // BATCH_SIZE
                self.logger.log(
                    f"Processed Canon batch {batch_number} of {total_batches}"
                )

            session.commit()

    def load_canonical_package_mappings(self, data: list[CanonPackage]) -> None:
        """
        Load canonical package mappings into the database in batches, updating on
        conflict.

        Args:
            data: List of CanonPackage objects.
        """
        with self.session() as session:
            for i in range(0, len(data), BATCH_SIZE):
                batch = data[i : i + BATCH_SIZE]
                if not batch:
                    continue

                # Convert batch objects to dictionaries
                insert_data = [
                    {
                        "id": item.id,
                        "canon_id": item.canon_id,
                        "package_id": item.package_id,
                    }
                    for item in batch
                ]

                stmt = pg_insert(CanonPackage).values(insert_data)
                update_dict = {"canon_id": stmt.excluded.canon_id}

                # this is the unique constraint on canon_packages -> if its violated,
                # that means that the package has changed its URL, and the dedupe
                # logic has corrected the correct canon for this package
                stmt = stmt.on_conflict_do_update(
                    index_elements=["package_id"], set_=update_dict
                )

                if stmt is not None:
                    session.execute(stmt)

                # log
                batch_number = (i // BATCH_SIZE) + 1
                total_batches = (len(data) + BATCH_SIZE - 1) // BATCH_SIZE
                self.logger.log(
                    f"Processed CanonPackage batch {batch_number} of {total_batches}"
                )

            session.commit()

    def get_packages(self) -> list[tuple[UUID, UUID]]:
        """Gets all packages for the run"""
        self.logger.debug(f"Getting packages for {self.system_pm_ids} package managers")
        with self.session() as session:
            return (
                session.query(Package.id, Package.package_manager_id)
                .where(Package.package_manager_id.in_(self.system_pm_ids))
                .all()
            )

    def get_dependencies(self, package_id: UUID) -> list[tuple[UUID]]:
        """Gets all the dependencies based on the CHAI data model"""
        with self.session() as session:
            return (
                session.query(DependsOn.dependency_id)
                .join(Version, DependsOn.version_id == Version.id)
                .join(Package, Version.package_id == Package.id)
                .filter(Package.id == package_id)
                .all()
            )

    def get_package_to_canon_mapping(self) -> dict[UUID, UUID]:
        with self.session() as session:
            return {
                canon_package.package_id: canon.id
                for canon, canon_package in session.query(Canon, CanonPackage)
                .join(CanonPackage, Canon.id == CanonPackage.canon_id)
                .join(Package, CanonPackage.package_id == Package.id)
                .where(Package.package_manager_id != self.legacy_pm_id)
            }

    def get_legacy_dependencies(self, package_id: UUID) -> list[tuple[UUID]]:
        """Gets all the legacy dependencies based on the legacy CHAI data model"""
        with self.session() as session:
            return (
                session.query(LegacyDependency.dependency_id)
                .filter(LegacyDependency.package_id == package_id)
                .filter(LegacyDependency.dependency_id != package_id)
                .all()
            )

    def load_tea_ranks(self, data: list[TeaRank]) -> None:
        """Loads tea ranks into the database"""
        with self.session() as session:
            session.add_all(data)
            session.commit()

    def load_tea_rank_runs(self, data: list[TeaRankRun]) -> None:
        """Loads tea rank runs into the database"""
        with self.session() as session:
            session.add_all(data)
            session.commit()

    def get_current_tea_rank_run(self) -> TeaRankRun | None:
        """Gets the current tea rank run"""
        with self.session() as session:
            return (
                session.query(TeaRankRun).order_by(TeaRankRun.created_at.desc()).first()
            )


================================================
FILE: ranker/dedupe.py
================================================
#!/usr/bin/env uv run --with sqlalchemy==2.0.34 --with permalint==0.1.12
from datetime import datetime
from uuid import UUID, uuid4

from permalint import is_canonical_url
from sqlalchemy import update
from sqlalchemy.orm import Session

from core.db import DB
from core.logger import Logger
from core.models import URL, BaseModel, Canon, CanonPackage, Package, PackageURL
from package_managers.crates.structs import (
    CanonPackageUpdatePayload,
    CanonUpdatePayload,
)
from ranker.config import DedupeConfig, load_dedupe_config
from ranker.naming import compute_canon_name, get_effective_canon_name


class DedupeDB(DB):
    def __init__(self, config: DedupeConfig):
        super().__init__("ranker.db")
        self.config: DedupeConfig = config

    def get_current_canons(self) -> dict[UUID, Canon]:
        """Get current canons as a mapping from URL ID to Canon object."""
        with self.session() as session:
            canons = session.query(Canon).all()
            return {canon.url_id: canon for canon in canons}

    def get_current_canon_packages(self) -> dict[UUID, dict[str, UUID]]:
        """Get current canon-package mappings as dict[package_id -> canon_id]."""
        with self.session() as session:
            canon_packages = session.query(CanonPackage).all()
            return {
                cp.package_id: {"id": cp.id, "canon_id": cp.canon_id}
                for cp in canon_packages
            }

    def get_packages_with_homepages(self) -> list[tuple[Package, URL]]:
        with self.session() as session:
            return (
                session.query(Package, URL)
                .join(PackageURL, Package.id == PackageURL.package_id)
                .join(URL, PackageURL.url_id == URL.id)
                .where(URL.url_type_id == self.config.homepage_url_type_id)
                .order_by(Package.id, URL.created_at.desc())  # Latest URL / package
                .all()
            )

    def get_all_package_names(self) -> dict[UUID, str]:
        with self.session() as session:
            return {pkg.id: pkg.name for pkg in session.query(Package).all()}

    # TODO: first to be optimized
    def ingest(
        self,
        new_canons: list[Canon],
        canon_updates: list[CanonUpdatePayload],
        new_canon_packages: list[CanonPackage],
        updated_canon_packages: list[CanonPackageUpdatePayload],
    ) -> None:
        with self.session() as session:
            if new_canons:
                self.add_with_flush(session, new_canons)

            if canon_updates:
                session.execute(update(Canon), canon_updates)

            if new_canon_packages:
                self.add_with_flush(session, new_canon_packages)

            if updated_canon_packages:
                session.execute(update(CanonPackage), updated_canon_packages)

            session.commit()

    def add_with_flush(self, session: Session, rows: list[BaseModel]) -> None:
        session.add_all(rows)
        session.flush()


def get_latest_homepage_per_package(
    packages_with_homepages: list[tuple[Package, URL]], logger: Logger
) -> tuple[dict[UUID, URL], list[URL]]:
    """Get the latest homepage URL for each package."""
    latest_homepages: dict[UUID, URL] = {}
    non_canonical_urls: list[URL] = []

    for pkg, url in packages_with_homepages:
        # Since we ordered by Package.id, URL.created_at.desc(),
        # the first URL we see for each package is the latest
        if pkg.id not in latest_homepages:
            # skip empty or whitespace-only urls
            if not url.url or url.url.strip() == "":
                continue

            # guard against non-canonicalized URLs
            try:
                if not is_canonical_url(url.url):
                    non_canonical_urls.append(url)
                else:
                    latest_homepages[pkg.id] = url
            except Exception as e:
                logger.warn(f"Error checking if {url.url} is canonical: {e}")
                non_canonical_urls.append(url)

    if non_canonical_urls:
        logger.warn(f"Found {len(non_canonical_urls)} non-canonicalized URLs in URLs")

    return latest_homepages, non_canonical_urls


def build_canon_update_payload(
    canon: Canon, new_name: str, now: datetime
) -> CanonUpdatePayload:
    """Build update payload for a canon with a new name."""
    return CanonUpdatePayload(id=canon.id, name=new_name, updated_at=now)


def build_canon_package_update_payload(
    current_canon_packages: dict[UUID, dict[str, UUID]],
    pkg_id: UUID,
    new_canon_id: UUID,
    now: datetime,
) -> CanonPackageUpdatePayload:
    """Build an update payload for a canon package."""
    canon_package_data = current_canon_packages.get(pkg_id)

    if canon_package_data is None:
        raise ValueError(f"No canon package mappings for {pkg_id}")

    current_canon_package_id = canon_package_data.get("id")
    if current_canon_package_id is None:
        raise ValueError(f"{pkg_id} has no canon package ID but canon: {new_canon_id}")

    return CanonPackageUpdatePayload(
        id=current_canon_package_id, canon_id=new_canon_id, updated_at=now
    )


def process_deduplication_changes(
    latest_homepages: dict[UUID, URL],
    current_canons: dict[UUID, Canon],
    current_canon_packages: dict[UUID, dict[str, UUID]],
    name_map: dict[UUID, str],
    logger: Logger,
) -> tuple[
    list[Canon],
    list[CanonUpdatePayload],
    list[CanonPackage],
    list[CanonPackageUpdatePayload],
]:
    """
    Process deduplication changes based on current state.

    Returns:
        tuple of (canons_to_create, canons_to_update, mappings_to_create,
        mappings_to_update)
    """
    now = datetime.now()
    canons_to_create: dict[UUID, Canon] = {}  # indexed by url_id for deduplication
    canons_to_update: dict[UUID, CanonUpdatePayload] = {}  # indexed by canon_id
    mappings_to_create: list[CanonPackage] = []
    mappings_to_update: list[CanonPackageUpdatePayload] = []

    for pkg_id, url in latest_homepages.items():
        # Check if the URL has an existing canon
        existing_canon: Canon | None = current_canons.get(url.id)

        # If no existing canon, check if we're creating one for this URL
        if existing_canon is None:
            existing_canon = canons_to_create.get(url.id)

        existing_canon_id: UUID | None = existing_canon.id if existing_canon else None

        # Check if the package is already linked to a canon
        linked_canon_id: UUID | None = current_canon_packages.get(pkg_id, {}).get(
            "canon_id"
        )

        if existing_canon_id is None:
            # No canon exists for this URL - create a new one
            pkg_name = name_map.get(pkg_id)
            # Compute the name for the new canon
            name = compute_canon_name(url.url, pkg_name)
            new_canon = Canon(
                id=uuid4(),
                url_id=url.id,
                name=name,
                created_at=now,
                updated_at=now,
            )
            canons_to_create[url.id] = new_canon

            # Handle package-to-canon mapping
            if linked_canon_id is None:
                # Create new canon package mapping
                new_canon_package = CanonPackage(
                    id=uuid4(),
                    canon_id=new_canon.id,
                    package_id=pkg_id,
                    created_at=now,
                    updated_at=now,
                )
                mappings_to_create.append(new_canon_package)
            else:
                # Update existing mapping to point to new canon
                update_payload = build_canon_package_update_payload(
                    current_canon_packages, pkg_id, new_canon.id, now
                )
                mappings_to_update.append(update_payload)
        else:
            # Canon exists - check if name needs updating

            # Get the most current name (considering pending updates)
            current_name = get_effective_canon_name(existing_canon, canons_to_update)
            pkg_name = name_map.get(pkg_id)
            desired_name = compute_canon_name(url.url, pkg_name, current_name)

            # Update canon name if it's different
            if desired_name != current_name:
                update_payload = build_canon_update_payload(
                    existing_canon, desired_name, now
                )
                canons_to_update[existing_canon.id] = update_payload

            # Handle package-to-canon mapping
            if linked_canon_id is None:
                # Create new canon package mapping
                new_canon_package = CanonPackage(
                    id=uuid4(),
                    canon_id=existing_canon_id,
                    package_id=pkg_id,
                    created_at=now,
                    updated_at=now,
                )
                mappings_to_create.append(new_canon_package)
            elif linked_canon_id != existing_canon_id:
                # Update existing mapping to correct canon
                update_payload = build_canon_package_update_payload(
                    current_canon_packages, pkg_id, existing_canon_id, now
                )
                mappings_to_update.append(update_payload)
            # else: mapping is already correct, no action needed

    return (
        list(canons_to_create.values()),
        list(canons_to_update.values()),
        mappings_to_create,
        mappings_to_update,
    )


def main(config: DedupeConfig, db: DedupeDB):
    logger = Logger("ranker.dedupe")
    now = datetime.now()
    logger.log(f"Starting deduplication process at {now}")

    # 1. Get current state
    current_canons: dict[UUID, Canon] = db.get_current_canons()
    logger.debug(f"Found {len(current_canons)} current canons")

    current_canon_packages: dict[UUID, dict[str, UUID]] = (
        db.get_current_canon_packages()
    )
    logger.debug(f"Found {len(current_canon_packages)} current canon packages")

    packages_with_homepages: list[tuple[Package, URL]] = (
        db.get_packages_with_homepages()
    )
    logger.debug(f"Found {len(packages_with_homepages)} packages with homepages")

    name_map: dict[UUID, str] = db.get_all_package_names()

    # 2. Get latest homepage per package
    latest_homepages, non_canonical_urls = get_latest_homepage_per_package(
        packages_with_homepages, logger
    )
    logger.debug(f"Found {len(latest_homepages)} packages with latest homepages")

    # 3. Process changes differentially
    (canons_to_create, canons_to_update, mappings_to_create, mappings_to_update) = (
        process_deduplication_changes(
            latest_homepages, current_canons, current_canon_packages, name_map, logger
        )
    )

    # 4. Apply changes
    logger.log("-" * 100)
    logger.log("Changes to apply:")
    logger.log(f"  Canons to create: {len(canons_to_create)}")
    logger.log(f"  Canons to update: {len(canons_to_update)}")
    logger.log(f"  Mappings to create: {len(mappings_to_create)}")
    logger.log(f"  Mappings to update: {len(mappings_to_update)}")
    logger.log("-" * 100)

    if not config.load:
        logger.log("Skipping changes because LOAD is not set")
        return

    db.ingest(
        canons_to_create, canons_to_update, mappings_to_create, mappings_to_update
    )

    logger.log("✅ Deduplication process completed")

    if non_canonical_urls:
        logger.warn(f"Found {len(non_canonical_urls)} non-canonical URLs")


if __name__ == "__main__":
    config: DedupeConfig = load_dedupe_config()
    db: DedupeDB = DedupeDB(config)

    try:
        main(config, db)
    finally:
        db.close()


================================================
FILE: ranker/main.py
================================================
#! /usr/bin/env pkgx +python@3.11 uv run

# /// script
# dependencies = [
#   "permalint==0.1.12",
#   "sqlalchemy==2.0.34",
#   "numpy==2.2.3",
#   "rustworkx==0.16.0",
#   "psycopg2-binary==2.9.10",
# ]
# ///

from dataclasses import dataclass
from uuid import UUID

from core.logger import Logger
from core.models import TeaRank, TeaRankRun
from ranker.config import Config, DedupeConfig, load_config, load_dedupe_config
from ranker.db import GraphDB
from ranker.dedupe import DedupeDB
from ranker.dedupe import main as dedupe
from ranker.rx_graph import CHAI, PackageNode

logger = Logger("ranker.main")


@dataclass
class PackageInfo:
    id: UUID
    package_manager_id: UUID


def load_graph(
    config: Config,
    db: GraphDB,
    package_to_canon_mapping: dict[UUID, UUID],
    packages: list[PackageInfo],
    stop: int | None = None,
) -> CHAI:
    chai = CHAI()
    missing: set[tuple[UUID, UUID]] = set()
    npm_pm_id = config.pm_config.npm_pm_id

    for i, package in enumerate(packages):
        # add this package's canon to the graph
        try:
            canon_id = package_to_canon_mapping[package.id]
        except KeyError:
            missing.add((str(package.id), str(package.package_manager_id)))
            continue

        # grab the object from the graph if it exists
        if canon_id in chai.canon_to_index:
            node = chai[chai.canon_to_index[canon_id]]
        else:  # otherwise, create a new one
            node = PackageNode(canon_id=canon_id)
            node.index = chai.add_node(node)

        # add the package manager id to the node
        node.package_manager_ids.append(package.package_manager_id)

        # now grab its dependencies
        # there are two cases: legacy CHAI or new CHAI
        # the db helps us these two distinctions with two different helpers
        # TODO: eventually, CHAI will be at package to package, so everything will
        # "get_legacy_dependencies"
        if package.package_manager_id == npm_pm_id:
            dependencies = db.get_legacy_dependencies(package.id)
        else:
            dependencies = db.get_dependencies(package.id)

        # for each dependency, add the corresponding canon to the graph
        # and set the edge
        for dependency in dependencies:
            dep = dependency[0]
            try:
                dep_canon_id = package_to_canon_mapping[dep]
            except KeyError:
                missing.add((str(dep), str(package.package_manager_id)))
                continue

            dep_node = PackageNode(canon_id=dep_canon_id)
            dep_node.index = chai.add_node(dep_node)
            chai.add_edge(node.index, dep_node.index, {})

        if stop is not None and i >= stop:
            break

        if i % 1000 == 0:
            logger.debug(f"Processing package {i+1}/{len(packages)} (ID: {package.id})")

    logger.log(f"Missing {len(missing)} packages")
    # TODO: should we save the missing packages?

    return chai


def main(config: Config, db: GraphDB) -> None:
    # get the map of package_id -> canon_id
    package_to_canon: dict[UUID, UUID] = db.get_package_to_canon_mapping()
    logger.log(f"{len(package_to_canon)} package to canon mappings")

    # get the list of packages
    packages = [
        PackageInfo(id=id, package_manager_id=pm_id) for id, pm_id in db.get_packages()
    ]
    logger.log(f"{len(packages)} packages")

    # load the graph
    chai = load_graph(config, db, package_to_canon, packages)
    logger.log(f"CHAI has {len(chai)} nodes and {len(chai.edge_to_index)} edges")

    # now, I need to generate the personalization vector
    canons_with_source_types: list[tuple[UUID, list[UUID]]] = []
    for idx in chai.node_indexes():
        node = chai[idx]
        canons_with_source_types.append((node.canon_id, node.package_manager_ids))
    config.tearank_config.personalize(canons_with_source_types)

    # generate tea_ranks
    ranks = chai.distribute(
        config.tearank_config.personalization,
        config.tearank_config.split_ratio,
        config.tearank_config.tol,
        config.tearank_config.max_iter,
    )
    str_ranks = {str(chai[id].canon_id): f"{rank}" for id, rank in ranks.items()}

    # Determine the next run ID
    latest_run = db.get_current_tea_rank_run()
    current_run = latest_run.run + 1 if latest_run else 1
    logger.log(f"Starting TeaRank run number: {current_run}")

    # Prepare TeaRank objects with the *next* run ID
    tea_ranks = [
        TeaRank(canon_id=UUID(canon_id), tea_rank_run=current_run, rank=rank)
        for canon_id, rank in str_ranks.items()
    ]
    # Load all ranks first
    db.load_tea_ranks(tea_ranks)

    # Only after successfully loading ranks, load the corresponding run entry
    tea_rank_run = TeaRankRun(
        run=current_run, split_ratio=config.tearank_config.split_ratio
    )
    db.load_tea_rank_runs([tea_rank_run])
    logger.log("Done!")


if __name__ == "__main__":
    # first deduplicate
    dedupe_config: DedupeConfig = load_dedupe_config()
    dedupe_db: DedupeDB = DedupeDB(dedupe_config)
    try:
        dedupe(dedupe_config, dedupe_db)
    except Exception as e:
        logger.error(f"Some error occurred when deduplicating: {e}")
        raise

    # then rank
    ranker_config = load_config()
    ranker_db = GraphDB(
        ranker_config.pm_config.npm_pm_id, ranker_config.pm_config.system_pm_ids
    )
    try:
        main(ranker_config, ranker_db)
    except Exception as e:
        logger.error(f"Some error occurred when ranking: {e}")
        raise


================================================
FILE: ranker/naming.py
================================================
#!/usr/bin/env uv run --with permalint==0.1.12
from uuid import UUID

from permalint import possible_names

from core.models import Canon
from package_managers.crates.structs import CanonUpdatePayload


def compute_canon_name(url: str, package_name: str, existing_name: str = "") -> str:
    """
    Determines the name of the canon, based on the package name, URL, and canon name

    Notes:
      - the logic for determining whether it's an update or not, is left to the caller
      - this function does not do anything for monorepos
      - as a fallback, the original package name is always returned
    """
    if not url or not package_name:
        raise ValueError(f"Missing one of url={url} | package_name={package_name}")

    best_guess = extract_repo_name_from_url(url)

    if existing_name:
        # guard
        if url == existing_name:
            return package_name

        return check_if_better(best_guess, package_name, existing_name)

    return package_name


def check_if_better(best_guess: str, package_name: str, existing_name: str) -> str:
    """Check if we have a better name than the existing name."""
    if best_guess == package_name:
        # boom, this is the ideal case. the repo and the package share a name!
        return package_name

    package_name_score = score_name(package_name, best_guess)
    existing_name_score = score_name(existing_name, best_guess)

    if package_name_score > existing_name_score:
        return package_name

    return existing_name


def extract_repo_name_from_url(url: str) -> str:
    """
    Extract a reasonable name from a URL, typically the repository name.

    We're trusting permalint's rules for guessing a package's name based on
    the homepage URL here. Note that the fallback is always to retrieve the full URL
    name, which will be the only element in the result
    """
    if not url:
        return url

    names: list[str] = possible_names(url)
    if len(names) > 1:
        return names[1].lower()
    else:
        return names[0].lower()


def score_name(name: str, best_guess: str) -> int:
    """
    Score a package name based on some rules

    1. Prefer shorter, simpler names
    2. Prefer names without prefixes/suffixes that suggest forks or variations
    """
    if not name and not best_guess:
        raise ValueError(f"Missing one of name={name} | guess={best_guess}")

    score = 0
    clean = name.lower()

    if best_guess in clean:
        score += 1

    # Prefer shorter names
    score += max(0, 15 - len(clean))

    # Penalize scoped packages
    if clean.startswith("@"):
        score -= 3

    return score


def get_effective_canon_name(
    canon: Canon, pending_updates: dict[UUID, CanonUpdatePayload]
) -> str:
    """
    Get the effective name for a canon, considering both current and pending updates.

    This ensures we always work with the most up-to-date name when processing
    multiple canons that might reference each other.
    """
    if canon.id in pending_updates:
        return pending_updates[canon.id]["name"]
    return canon.name


================================================
FILE: ranker/requirements.txt
================================================
# This file was autogenerated by uv via the following command:
#    uv pip compile --group ranker -o ranker/requirements.txt
numpy==2.3.0
    # via
    #   chai (pyproject.toml:ranker)
    #   rustworkx
rustworkx==0.16.0
    # via chai (pyproject.toml:ranker)


================================================
FILE: ranker/rx_graph.py
================================================
#!/usr/bin/env pkgx +python@3.11 uv run

from collections import defaultdict, deque
from dataclasses import dataclass, field
from decimal import Decimal
from typing import Any
from uuid import UUID

import rustworkx as rx

from core.logger import Logger

logger = Logger("ranker.chai_graph")


@dataclass
class PackageNode:
    """Note that this is different from PackageInfo in main.py!
    This is based on canons!"""

    canon_id: UUID
    package_manager_ids: list[UUID] = field(default_factory=list)
    weight: Decimal = field(default_factory=Decimal)
    index: int = field(default_factory=lambda: -1)


class CHAI(rx.PyDiGraph):
    def __init__(self):
        super().__init__()
        self.canon_to_index: dict[UUID, int] = {}
        self.edge_to_index: dict[tuple[int, int], int] = {}

    def add_node(self, node: PackageNode) -> int:
        """Safely add a node to the graph. If exists, return the index"""
        if node.canon_id not in self.canon_to_index:
            index = super().add_node(node)
            self.canon_to_index[node.canon_id] = index
        return self.canon_to_index[node.canon_id]

    def add_edge(self, u: int, v: int, edge_data: Any) -> None:
        """Safely add an edge to the graph. If exists, return the index"""
        if (u, v) not in self.edge_to_index:
            index = super().add_edge(u, v, edge_data)
            self.edge_to_index[(u, v)] = index
        return self.edge_to_index[(u, v)]

    def generate_personalization(
        self, personalization: dict[UUID, Decimal]
    ) -> dict[int, float]:
        result = {}
        for id, weight in personalization.items():
            if id not in self.canon_to_index:
                continue
            result[self.canon_to_index[id]] = float(weight)
        return result

    def pagerank(
        self, alpha: Decimal, personalization: dict[UUID, Decimal]
    ) -> rx.CentralityMapping:
        return rx.pagerank(
            self,
            alpha=float(alpha),
            personalization=self.generate_personalization(personalization),
        )

    def distribute(
        self,
        personalization: dict[UUID, Decimal],
        split_ratio: Decimal,
        tol: Decimal,
        max_iter: int = 100,
    ) -> dict[int, Decimal]:
        """Distribute values across the graph based on dependencies."""
        if not personalization:
            raise ValueError("Personalization is empty")

        # Convert personalization to index-based dict
        result = defaultdict(Decimal)
        q: deque[tuple[int, Decimal]] = deque()

        for id, weight in personalization.items():
            if id not in self.canon_to_index:
                logger.log(f"{id} is type {type(id)}")
                raise ValueError(f"Canon ID {id} not found in CHAI")
            q.append((self.canon_to_index[id], weight))

        iterations: int = 0

        while q:
            iterations += 1
            node_id, weight = q.popleft()

            # Ensure iteration count check happens regardless of other logic
            if iterations > max_iter:
                logger.warn(f"Max iterations reached: {max_iter}")
                break

            dependencies = self.successors(node_id)
            num_dependencies = len(dependencies)

            # If the weight arriving is already below tolerance, or if it's a terminal
            # node, add the entire weight to the result and stop distributing from
            # this node in this path.
            if num_dependencies == 0 or weight < tol:
                result[node_id] += weight
                continue

            # Handle non-terminal nodes with significant weight (weight >= tol)
            # Calculate the portion of weight the current node keeps.
            keep = weight * split_ratio

            # Always add the 'keep' amount to the node's result.
            # The tolerance check below is only for preventing further distribution
            # of insignificant amounts, not for deciding if the current node's
            # share is worth keeping.
            result[node_id] += keep

            # Calculate the total amount to be split among dependencies.
            split = weight - keep  # Equivalent to weight * (1 - split_ratio)

            # Calculate split per dependency.
            split_per_dep = split / num_dependencies

            # Use tolerance to gate further distribution: Only queue dependencies
            # if the amount they would receive individually is significant enough.
            if split_per_dep >= tol:
                for dep in dependencies:
                    q.append((dep.index, split_per_dep))
            # If split_per_dep < tol, the remaining 'split' amount is effectively
            # dropped from this distribution path, as it's deemed too small
            # to continue propagating. This helps prune the calculation.

        logger.log(f"Iterations: {iterations}. Ranks sum to {sum(result.values()):.9f}")

        return dict(result)


================================================
FILE: ranker/utils/analyze_ranks.py
================================================
#!/usr/bin/env pkgx +python@3.11 uv run --with pandas --with sqlalchemy

"""Script to analyze rank data and generate formatted CSV output.

Usage:
    python analyze_ranks.py [--file PATH_TO_RANK_FILE]
"""

from __future__ import annotations

import argparse
import json
import os
from pathlib import Path

import pandas as pd
from sqlalchemy import create_engine, distinct, func, select
from sqlalchemy.orm import Session

from core.models import (
    Canon,
    CanonPackage,
    Package,
    PackageManager,
    Source,
)


def get_latest_rank_file() -> Path:
    """Get the path to the latest rank file."""
    data_dir = Path("data/ranker/ranks")
    latest_symlink = data_dir / "latest.json"
    return latest_symlink.resolve()


def get_rank_file(filename: str | None = None) -> Path:
    """Get the path to the rank file.

    Args:
        filename: Optional path to a specific rank file.

    Returns:
        Path to the rank file.

    Raises:
        FileNotFoundError: If the specified file doesn't exist.
    """
    if filename:
        file_path = Path(filename)
        if not file_path.exists():
            raise FileNotFoundError(f"Rank file not found: {filename}")
        return file_path

    return get_latest_rank_file()


def load_rank_data(file_path: Path) -> dict[str, float]:
    """Load rank data from JSON file."""
    with open(file_path) as f:
        return json.load(f)


def get_output_filename(input_path: Path) -> Path:
    """Generate output filename based on input filename."""
    # Extract the rank number from filenames like "ranks_37_0.7"
    parts = input_path.stem.split("_")
    rank_num = "_".join(parts[1:]) if len(parts) >= 2 else input_path.stem

    output_dir = Path("data/ranker/analysis")
    output_dir.mkdir(parents=True, exist_ok=True)
    return output_dir / f"formatted_ranks_{rank_num}.csv"


def get_package_data(ranks: dict[str, float], db_session: Session) -> pd.DataFrame:
    """Query database for package information and combine with ranks."""
    # Query for package data including URLs and aggregated package info
    query = (
        select(
            Canon.id.label("canon_id"),
            Canon.url.label("homepage_url"),
            Canon.name.label("package_name"),
            func.array_agg(distinct(Source.type)).label("package_managers"),
            func.array_agg(distinct(Package.name)).label("package_names"),
        )
        .join(CanonPackage, Canon.id == CanonPackage.canon_id)
        .join(Package, CanonPackage.package_id == Package.id)
        .join(PackageManager, Package.package_manager_id == PackageManager.id)
        .join(Source, PackageManager.source_id == Source.id)
        .group_by(Canon.id, Canon.url, Canon.name)
    )

    results = pd.DataFrame(db_session.execute(query))

    # Convert UUID objects to strings in results DataFrame
    results["canon_id"] = results["canon_id"].astype(str)

    # Convert ranks to DataFrame and merge
    ranks_df = pd.DataFrame.from_dict(ranks, orient="index", columns=["tea_rank"])
    ranks_df.index.name = "canon_id"
    ranks_df.reset_index(inplace=True)

    # Merge and sort
    final_df = pd.merge(ranks_df, results, on="canon_id")
    if final_df.empty:
        raise ValueError(
            "No data to process - no matching canon_ids between ranks and database results"
        )

    final_df.sort_values(["tea_rank"], ascending=[False], inplace=True)
    return final_df[
        [
            "canon_id",
            "package_name",
            "tea_rank",
            "homepage_url",
            "package_managers",
            "package_names",
        ]
    ]


def parse_args() -> argparse.Namespace:
    """Parse command-line arguments."""
    parser = argparse.ArgumentParser(
        description="Analyze rank data and generate formatted CSV output"
    )
    parser.add_argument(
        "--file",
        type=str,
        default=None,
        help="Path to a specific rank file. If not provided, the latest rank file will be used.",
    )
    return parser.parse_args()


def main() -> None:
    """Main function to process rank data and generate CSV."""
    # Parse command-line arguments
    args = parse_args()

    # Setup database connection
    engine = create_engine(os.environ["CHAI_DATABASE_URL"])

    # Get input and output paths
    rank_file = get_rank_file(args.file)
    output_file = get_output_filename(rank_file)
    print(f"Output will be saved to: {output_file}")

    # Process data
    ranks = load_rank_data(rank_file)
    with Session(engine) as session:
        result_df = get_package_data(ranks, session)

    # Save output
    result_df.to_csv(output_file, index=False)


if __name__ == "__main__":
    main()


================================================
FILE: ranker/utils/parse_log.py
================================================
#!/usr/bin/env pkgx +python@3.11 uv run

"""
Parse graph run log to calculate processing metrics.

This script analyzes a log file to compute:
1. Average time to process 1,000 packages
2. Average packages processed per second

Usage:
  From file:    ./parse_log.py log_file
  From tmux:    tmux capture-pane -p | ./parse_log.py
"""

import re
import sys
from statistics import mean


def parse_log_line(line: str) -> tuple[float, int]:
    """
    Extract timestamp and package count from a log line.

    Args:
        line: A line from the log file

    Returns:
        Tuple of (timestamp, package_count)
    """
    pattern = r"^(\d+\.\d+): \[graph\.main\]: (\d+):"
    match = re.match(pattern, line)
    if match:
        timestamp = float(match.group(1))
        package_count = int(match.group(2))
        return timestamp, package_count
    return None


def calculate_metrics(log_lines: list[str]) -> tuple[float, float]:
    """
    Calculate processing metrics from log lines.

    Args:
        log_lines: List of log file lines

    Returns:
        Tuple of (avg_time_per_1000, packages_per_second)
    """
    data_points = []
    previous_timestamp = None
    previous_count = None

    for line in log_lines:
        result = parse_log_line(line)
        if not result:
            continue

        timestamp, count = result

        if previous_timestamp is not None and previous_count is not None:
            time_diff = timestamp - previous_timestamp
            count_diff = count - previous_count

            # Only process if we're looking at approximately 1000 package difference
            if 900 <= count_diff <= 1100:
                data_points.append((time_diff, count_diff))

        previous_timestamp = timestamp
        previous_count = count

    if not data_points:
        return 0.0, 0.0

    # Calculate average time for processing 1000 packages
    time_diffs = [time for time, _ in data_points]
    avg_time_per_1000 = mean(time_diffs)

    # Calculate average packages per second
    packages_per_second = 1000 / avg_time_per_1000

    return avg_time_per_1000, packages_per_second


def main():
    """Process the log data and display metrics."""
    log_lines = []

    # Read from file if specified, otherwise from stdin
    if len(sys.argv) == 2:
        log_file = sys.argv[1]
        try:
            with open(log_file) as f:
                log_lines = f.readlines()
        except OSError as e:
            print(f"Error reading log file: {e}")
            sys.exit(1)
    else:
        # Read from stdin (for piping from tmux)
        log_lines = sys.stdin.readlines()
        if not log_lines:
            print(f"Usage: {sys.argv[0]} [log_file]")
            print(f"   or: tmux capture-pane -p | {sys.argv[0]}")
            sys.exit(1)

    avg_time, pkg_per_second = calculate_metrics(log_lines)

    print(f"Average time to process 1,000 packages: {avg_time:.2f} seconds")
    print(f"Average packages processed per second: {pkg_per_second:.2f}")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/chai-legacy-loader/README.md
================================================
# CHAI Legacy Data Loader

Tools for loading legacy CHAI data into the current CHAI database framework.

> [!NOTE]
> This can only be executed if you have access to the Legacy CHAI database. If not,
> you can ignore everything inside this folder.

## Requirements

- pkgx.sh

## Overview

This is a set of utility python scripts to efficiently transfer data from the legacy CHAI
database into the current CHAI schema.

## Loader Scripts

- `add_package_fields.py`: enriches package data dumps from Legacy CHAI with fields
  required by CHAI
- `copy_dependencies_no_thread.py`: fetches dependency data from `public.sources` for a
  given package manager and uses psycopg2's `copy_expert` function to load it in
  batches into CHAI
- `add_urls.py`: add urls and package_urls relationships from Legacy CHAI

## Usage

1. Set up environment variables (or use defaults):

```bash
export LEGACY_CHAI_DATABASE_URL=credentials_from_itn
export CHAI_DATABASE_URL=postgresql://postgres:postgres@localhost:5435/chai
```

2. Loading packages

   1. `psql $LEGACY_CHAI_DATABASE_URL -t -A -F',' -f sql/packages.sql -o /path/to/output.csv`
   1. Run `add_package_fields.py /file/from/step/1.csv /path/to/output package_manager_id`
      to enrich it with additional fields
   1. `psql $CHAI_DATABASE_URL -c "CREATE TABLE temp_import (LIKE packages);"`
   1. `psql $CHAI_DATABASE_URL -c "\COPY temp_import (id, derived_id, name, package_manager_id, import_id, created_at, updated_at) FROM '/path/to/csv/from/step/2' WITH (FORMAT csv, HEADER true, DELIMITER ',');"`
   1. `psql $CHAI_DATABASE_URL -c "INSERT INTO packages SELECT * FROM temp_import ON CONFLICT DO NOTHING;"`
   1. `psql $CHAI_DATABASE_URL -c "DROP TABLE temp_import;"`

3. Loading dependencies

With pkgx, just invoking the script from the root directory of chai

```bash
cd ../..
PYTHONPATH=. copy_dependencies_no_thread.py
```

4. Loading URLs

   1. Run [urls.sql](sql/urls.sql), which generates a csv
   1. Run `batch_insert_urls.py /path/to/step/1 -d` to insert the raw URLs, and get a
      dump of the loaded IDs and the URL
   1. Run `batch_insert_package_urls.py /path/to/step/1 --urls /path/to/step/2` to
      insert the package_url relationships. If no cache is provided, it'll try to read
      all loaded URLs and their IDs from the db (long)

```bash
pkgx psql -h localhost -U gardener -p 5430 temp_chai < dev_chai_fixed.sql
```


================================================
FILE: scripts/chai-legacy-loader/add_package_fields.py
================================================
#!/usr/bin/env pkgx +python@3.11 uv run

"""
For a csv generated from legacy chai, this script adds the id, created_at, and
updated_at fields to the csv.

The input CSV must have a header row: "derived_id,name,import_id".
The package_manager argument must be a valid UUID.

Usage:
    chmod +x add-package-fields.py
    ./add-package-fields.py input.csv output.csv <package_manager_uuid>
"""

import csv
import sys
import uuid
from datetime import UTC, datetime


def validate_uuid(uuid_string: str) -> None:
    """Raises ValueError if the string is not a valid UUID."""
    try:
        uuid.UUID(uuid_string)
    except ValueError as exc:
        raise ValueError(f"Invalid UUID format: {uuid_string}") from exc


def process_csv(input_file: str, output_file: str, package_manager_id: str) -> None:
    """
    Processes the input CSV, validates headers, adds new fields, and writes to the
    output CSV.

    Args:
        input_file: Path to the input CSV file.
        output_file: Path to the output CSV file.
        package_manager_id: The UUID of the package manager.

    Raises:
        ValueError: If the input CSV header is missing or incorrect.
    """
    now = datetime.now(UTC).isoformat()
    expected_header: list[str] = ["derived_id", "name", "import_id"]
    output_header: list[str] = [
        "id",
        "derived_id",
        "name",
        "package_manager_id",
        "import_id",
        "created_at",
        "updated_at",
    ]

    with (
        open(input_file, newline="") as infile,
        open(output_file, "w", newline="") as outfile,
    ):
        reader: csv._reader = csv.reader(infile)
        writer: csv._writer = csv.writer(outfile)

        # 1. Validate header row
        header: list[str] | None = next(reader, None)
        if header is None:
            raise ValueError(f"Input file '{input_file}' is missing a header row.")
        if header != expected_header:
            raise ValueError(
                f"Input file '{input_file}' header mismatch. "
                f"Expected: {expected_header}, Got: {header}"
            )

        # Write output header
        writer.writerow(output_header)

        # Process data rows
        row_count = 0
        for row in reader:
            if len(row) != len(expected_header):
                msg = f"Warning: Skipping row {reader.line_num} due to incorrect \
                    column count ({len(row)} instead of {len(expected_header)}): {row}"
                print(msg, file=sys.stderr)
                continue

            row_uuid: str = str(uuid.uuid4())
            derived_id, name, import_id = row
            output_row: list[str] = [
                row_uuid,
                derived_id,
                name,
                package_manager_id,
                import_id,
                now,
                now,
            ]
            writer.writerow(output_row)
            row_count += 1

    print(f"Processed {row_count} rows from {input_file} -> {output_file}")


if __name__ == "__main__":
    if len(sys.argv) != 4:
        print(
            f"Usage: {sys.argv[0]} input.csv output.csv <package_manager_uuid>",
            file=sys.stderr,
        )
        sys.exit(1)

    input_csv_path: str = sys.argv[1]
    output_csv_path: str = sys.argv[2]
    pm_uuid: str = sys.argv[3]

    try:
        # 6. Validate package_manager argument is a UUID
        validate_uuid(pm_uuid)
        process_csv(input_csv_path, output_csv_path, pm_uuid)
    except FileNotFoundError as e:
        print(f"Error: Input file not found - {e}", file=sys.stderr)
        sys.exit(1)
    except ValueError as e:
        print(f"Error: {e}", file=sys.stderr)
        sys.exit(1)
    except Exception as e:
        print(f"An unexpected error occurred: {e}", file=sys.stderr)
        sys.exit(1)


================================================
FILE: scripts/chai-legacy-loader/batch_insert_package_urls.py
================================================
#!/usr/bin/env pkgx +python@3.11 uv run --with psycopg2==2.9.9

import argparse
import csv
import os
import uuid
from datetime import datetime

import psycopg2
import psycopg2.extras

from core.config import Config, PackageManager
from core.logger import Logger

CHAI_DATABASE_URL = os.environ.get("CHAI_DATABASE_URL")
DEFAULT_BATCH_SIZE = 20000


class ChaiPackageUrlsDB:
    """Handles DB interactions for batch package_urls insertion."""

    def __init__(self, logger: Logger):
        self.logger = logger
        if not CHAI_DATABASE_URL:
            self.logger.error("CHAI_DATABASE_URL environment variable not set.")
            raise ValueError("CHAI_DATABASE_URL not set")
        self.conn = None
        self.cursor = None
        try:
            self.conn = psycopg2.connect(CHAI_DATABASE_URL)
            self.cursor = self.conn.cursor()
            self.logger.log("CHAI database connection established for PackageUrlsDB")
        except psycopg2.Error as e:
            self.logger.error(f"PackageUrlsDB connection error: {e}")
            raise

    def load_package_id_cache(self) -> dict[str, uuid.UUID]:
        """Load all packages (import_id -> id) into a cache."""
        self.logger.log("Loading package_id cache from database...")
        query = "SELECT import_id, id FROM packages"
        try:
            self.cursor.execute(query)
            cache = {str(row[0]): row[1] for row in self.cursor.fetchall() if row[0]}
            self.logger.log(f"Loaded {len(cache)} packages into package_id cache.")
            return cache
        except psycopg2.Error as e:
            self.logger.error(f"Error loading package_id cache: {e}")
            raise

    def load_url_id_cache_from_db(
        self,
    ) -> dict[tuple[str, uuid.UUID], uuid.UUID]:
        """Load all URLs ( (url, url_type_id) -> id ) into a cache from DB."""
        self.logger.log("Loading url_id cache from database (fallback)...")
        query = "SELECT id, url, url_type_id FROM urls"
        cache: dict[tuple[str, uuid.UUID], uuid.UUID] = {}
        try:
            self.cursor.execute(query)
            for row in self.cursor.fetchall():
                url_id, url_str, url_type_id = row[0], row[1], row[2]
                if url_str and url_type_id:
                    cache[(url_str, url_type_id)] = url_id
            self.logger.log(f"Loaded {len(cache)} URLs into url_id cache from DB.")
            return cache
        except psycopg2.Error as e:
            self.logger.error(f"Error loading url_id cache from DB: {e}")
            raise

    def batch_insert_package_urls(
        self,
        data_tuples: list[tuple[uuid.UUID, uuid.UUID, uuid.UUID, datetime, datetime]],
    ) -> None:
        """Batch insert into package_urls table."""
        if not data_tuples:
            return

        query = """
            INSERT INTO package_urls (id, package_id, url_id, created_at, updated_at)
            VALUES %s
            ON CONFLICT (package_id, url_id) 
            DO UPDATE SET updated_at = EXCLUDED.updated_at
        """
        try:
            psycopg2.extras.execute_values(
                self.cursor, query, data_tuples, page_size=len(data_tuples)
            )
            self.conn.commit()
            self.logger.log(
                f"Successfully inserted/updated {len(data_tuples)} package_urls"
            )
        except psycopg2.Error as e:
            self.logger.error(f"Error during batch insert into package_urls: {e}")
            self.logger.log(
                f"Failed data sample: {data_tuples[0] if data_tuples else 'N/A'}"
            )
            self.conn.rollback()
            raise
        except Exception as e:
            self.logger.error(f"Unexpected error during package_urls batch insert: {e}")
            self.conn.rollback()
            raise

    def close(self):
        if self.cursor:
            self.cursor.close()
        if self.conn:
            self.conn.close()
        self.logger.log("PackageUrlsDB connection closed.")


def load_url_id_cache_from_file(
    cache_file_path: str, logger: Logger
) -> dict[tuple[str, uuid.UUID], uuid.UUID]:
    """Load URL ID cache from the CSV file generated by batch_insert_urls.py."""
    logger.log(f"Loading url_id cache from file: {cache_file_path}...")
    cache: dict[tuple[str, uuid.UUID], uuid.UUID] = {}
    try:
        with open(cache_file_path, newline="", encoding="utf-8") as csvfile:
            reader = csv.reader(csvfile)
            header = next(reader, None)  # Skip header
            if not header or header != ["id", "url", "url_type_id"]:
                logger.error(
                    f"Invalid or missing header in URL cache file: {cache_file_path}. Expected ['id', 'url', 'url_type_id']"
                )
                raise ValueError("Invalid URL cache file format")

            for i, row in enumerate(reader):
                if len(row) == 3:
                    try:
                        url_id_str, url_str, url_type_id_str = row[0], row[1], row[2]
                        if url_str and url_type_id_str:  # Ensure no empty strings
                            cache[(url_str, uuid.UUID(url_type_id_str))] = uuid.UUID(
                                url_id_str
                            )
                    except ValueError as ve:
                        logger.warn(
                            f"Invalid UUID in URL cache file at row {i+2}: {row} - {ve}"
                        )
                        continue
                else:
                    logger.warn(
                        f"Skipping malformed row in URL cache file at row {i+2}: {row}"
                    )
        logger.log(f"Loaded {len(cache)} URLs into url_id cache from file.")
        return cache
    except FileNotFoundError:
        logger.error(f"URL cache file not found: {cache_file_path}")
        raise
    except Exception as e:
        logger.error(f"Error loading URL cache file {cache_file_path}: {e}")
        raise


def process_package_url_associations(
    input_csv_path: str,
    batch_size: int,
    script_execution_time: datetime,
    url_cache_csv_path: str | None,
    stop_at: int | None,
    main_logger: Logger,
) -> None:
    """Main processing logic for associating packages with URLs."""
    main_logger.log(f"Starting package-URL association for: {input_csv_path}")
    main_logger.log(
        f"Batch size: {batch_size}, URL cache: {url_cache_csv_path}, Stop at: {stop_at}"
    )

    try:
        config = Config(PackageManager.NPM)
        url_type_homepage_id = config.url_types.homepage
        url_type_source_id = config.url_types.source
    except Exception as e:
        main_logger.error(f"Error initializing config: {e}")
        return

    db_handler = None
    package_id_cache: dict[str, uuid.UUID] = {}
    url_id_cache: dict[tuple[str, uuid.UUID], uuid.UUID] = {}

    try:
        db_handler = ChaiPackageUrlsDB(main_logger)
        package_id_cache = db_handler.load_package_id_cache()

        if url_cache_csv_path:
            url_id_cache = load_url_id_cache_from_file(url_cache_csv_path, main_logger)
        else:
            main_logger.log(
                "No URL cache file provided, loading all URLs from database..."
            )
            url_id_cache = db_handler.load_url_id_cache_from_db()

    except Exception as e:
        main_logger.error(f"Failed during setup (DB or cache loading): {e}")
        if db_handler:
            db_handler.close()
        return

    package_urls_to_insert: list[
        tuple[uuid.UUID, uuid.UUID, uuid.UUID, datetime, datetime]
    ] = []
    processed_csv_rows = 0
    total_associations_prepared = 0
    processed_pairs: set[tuple[uuid.UUID, uuid.UUID]] = (
        set()
    )  # To avoid duplicates in a single batch

    try:
        with open(input_csv_path, newline="", encoding="utf-8") as infile:
            reader = csv.reader(infile)
            header = next(reader, None)
            if not header:
                main_logger.warn(
                    f"Input CSV file {input_csv_path} is empty or has no header."
                )
                return
            main_logger.log(f"Input CSV Header: {header}")

            for row_num, row in enumerate(reader):
                processed_csv_rows += 1
                current_csv_line = row_num + 2  # 1 for header, 1 for 0-indexing

                if not (len(row) >= 3):
                    main_logger.warn(
                        f"Skipping row {current_csv_line} (length < 3): {row}"
                    )
                    continue

                import_id, source_url_str, homepage_url_str = row[0], row[1], row[2]

                if not import_id:
                    main_logger.warn(
                        f"Skipping row {current_csv_line} due to missing import_id: {row}"
                    )
                    continue

                package_id = package_id_cache.get(import_id)
                if not package_id:
                    # We didn't load all the packages from ITN, so this is expected
                    continue

                urls_to_link = []
                if source_url_str and source_url_str.lower() != "null":
                    source_key = (source_url_str.strip(), url_type_source_id)
                    source_url_id = url_id_cache.get(source_key)
                    if source_url_id:
                        urls_to_link.append(source_url_id)
                    else:
                        main_logger.warn(
                            f"Source URL for import_id '{import_id}' not found in URL cache: '{source_url_str}' (row {current_csv_line})"
                        )

                if homepage_url_str and homepage_url_str.lower() != "null":
                    homepage_key = (homepage_url_str.strip(), url_type_homepage_id)
                    homepage_url_id = url_id_cache.get(homepage_key)
                    if homepage_url_id:
                        urls_to_link.append(homepage_url_id)
                    else:
                        main_logger.warn(
                            f"Homepage URL for import_id '{import_id}' not found in URL cache: '{homepage_url_str}' (row {current_csv_line})"
                        )

                for url_id_to_link in urls_to_link:
                    if (package_id, url_id_to_link) not in processed_pairs:
                        package_urls_to_insert.append(
                            (
                                uuid.uuid4(),
                                package_id,
                                url_id_to_link,
                                script_execution_time,
                                script_execution_time,
                            )
                        )
                        processed_pairs.add((package_id, url_id_to_link))
                        total_associations_prepared += 1

                if len(package_urls_to_insert) >= batch_size:
                    db_handler.batch_insert_package_urls(package_urls_to_insert)
                    package_urls_to_insert = []
                    processed_pairs.clear()  # Clear after batch insert
                    main_logger.log(
                        f"Processed batch. CSV rows: {processed_csv_rows}, Associations: {total_associations_prepared}"
                    )

                if stop_at and processed_csv_rows >= stop_at:
                    main_logger.log(f"Reached stop limit of {stop_at} CSV rows.")
                    break

        if package_urls_to_insert:  # Process remaining
            db_handler.batch_insert_package_urls(package_urls_to_insert)
            main_logger.log(
                f"Processed final batch. CSV rows: {processed_csv_rows}, Associations: {total_associations_prepared}"
            )

        main_logger.log(
            f"Package-URL association processing complete. Total CSV rows: {processed_csv_rows}. Associations prepared: {total_associations_prepared}."
        )

    except FileNotFoundError:
        main_logger.error(f"Input CSV file not found: {input_csv_path}")
    except csv.Error as e:
        main_logger.error(
            f"CSV reading error in {input_csv_path} near line {reader.line_num if 'reader' in locals() else 'unknown'}: {e}"
        )
    except psycopg2.Error as e:
        main_logger.error(f"A database error occurred: {e}")
        main_logger.exception()
    except Exception as e:
        main_logger.error(f"An unexpected error occurred: {e}")
        main_logger.exception()
    finally:
        if db_handler:
            db_handler.close()


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Batch insert package-URL relationships from a CSV file."
    )
    parser.add_argument(
        "file_path",
        help="Path to the input CSV file (import_id, source_url, homepage_url).",
    )
    parser.add_argument(
        "--cache",
        metavar="URL_CACHE_CSV_PATH",
        help="Optional path to the CSV file containing URL IDs (output of batch_insert_urls.py).",
    )
    parser.add_argument(
        "--batch-size",
        "-b",
        type=int,
        default=DEFAULT_BATCH_SIZE,
        help=f"Number of records to insert per batch (default: {DEFAULT_BATCH_SIZE}).",
    )
    parser.add_argument(
        "--stop",
        "-s",
        type=int,
        help="Optional: stop processing after this many CSV rows.",
    )
    args = parser.parse_args()

    script_start_time = datetime.now()
    logger = Logger("main_pkg_url_assoc_loader")
    logger.log(f"Script started at {script_start_time.isoformat()}")

    process_package_url_associations(
        input_csv_path=args.file_path,
        batch_size=args.batch_size,
        script_execution_time=script_start_time,
        url_cache_csv_path=args.cache,
        stop_at=args.stop,
        main_logger=logger,
    )

    logger.log(
        f"Script finished. Total execution time: {datetime.now() - script_start_time}"
    )


================================================
FILE: scripts/chai-legacy-loader/batch_insert_urls.py
================================================
#!/usr/bin/env pkgx +python@3.11 uv run --with psycopg2==2.9.9

import argparse
import csv
import os
import uuid
from datetime import datetime

import psycopg2
import psycopg2.extras

from core.config import Config, PackageManager
from core.logger import Logger

CHAI_DATABASE_URL = os.environ.get("CHAI_DATABASE_URL")
DEFAULT_BATCH_SIZE = 20000
OUTPUT_CSV_FILENAME = "inserted_urls.csv"


class ChaiDB:
    """Handles interactions with the CHAI database for batch URL insertion."""

    def __init__(self):
        """Initialize connection to the CHAI database."""
        self.logger = Logger("batch_url_db")
        if not CHAI_DATABASE_URL:
            self.logger.error("CHAI_DATABASE_URL environment variable not set.")
            raise ValueError("CHAI_DATABASE_URL not set")
        self.conn = None
        self.cursor = None
        try:
            self.conn = psycopg2.connect(CHAI_DATABASE_URL)
            self.cursor = self.conn.cursor()
            self.logger.log("CHAI database connection established")
        except psycopg2.Error as e:
            self.logger.error(f"Database connection error: {e}")
            raise

    def batch_insert_urls(
        self,
        url_data_tuples: list[tuple[str, uuid.UUID, datetime, datetime]],
        dump_output: bool,
    ) -> list[tuple[uuid.UUID, str, uuid.UUID]] | None:
        """
        Batch insert URLs into the database.

        Args:
            url_data_tuples: A list of tuples, each containing
                             (url, url_type_id, created_at_ts, updated_at_ts).
            dump_output: If True, return the inserted/updated rows.

        Returns:
            A list of (id, url, url_type_id) tuples if dump_output is True, else None.
        """
        if not url_data_tuples:
            return [] if dump_output else None

        query_base = """
            INSERT INTO urls (url, url_type_id, created_at, updated_at)
            VALUES %s
            ON CONFLICT (url_type_id, url) DO UPDATE SET updated_at = EXCLUDED.updated_at
        """
        if dump_output:
            query = query_base + " RETURNING id, url, url_type_id"
        else:
            query = query_base

        try:
            psycopg2.extras.execute_values(
                self.cursor, query, url_data_tuples, page_size=len(url_data_tuples)
            )
            self.conn.commit()
            self.logger.log(
                f"Successfully inserted/updated {len(url_data_tuples)} URL records."
            )
            if dump_output:
                return self.cursor.fetchall()
            return None
        except psycopg2.Error as e:
            self.logger.error(f"Error during batch insert: {e}")
            self.logger.log(url_data_tuples)
            self.conn.rollback()
            raise e
        except Exception as e:
            self.logger.error(f"An unexpected error occurred during batch insert: {e}")
            self.conn.rollback()
            raise e

    def close(self):
        """Close the database connection."""
        if self.cursor:
            self.cursor.close()
        if self.conn:
            self.conn.close()
        self.logger.log("CHAI database connection closed")


def process_urls_for_batch_insert(
    file_path: str,
    batch_size: int,
    script_execution_time: datetime,
    dump_output: bool,
    stop_at: int | None = None,
) -> None:
    """
    Reads URLs from a CSV file, prepares them, and batch inserts them into the database.

    Args:
        file_path: Path to the input CSV file.
        batch_size: Number of records to insert per batch.
        script_execution_time: Timestamp for created_at/updated_at.
        dump_output: Whether to dump inserted data to a CSV file.
        stop_at: Optional number of CSV rows to process.
    """
    logger = Logger("url_batch_processor")
    logger.log(f"Starting URL batch processing for file: {file_path}")
    logger.log(
        f"Batch size: {batch_size}, Dump output: {dump_output}, Stop at: {stop_at}"
    )
    cache: set[tuple[str, uuid.UUID]] = set()

    try:
        config = Config(PackageManager.NPM)
        url_type_homepage_id = config.url_types.homepage
        url_type_source_id = config.url_types.source
    except AttributeError as e:
        logger.error(
            f"Could not load URL types from config. Ensure DB contains these types: {e}"
        )
        return
    except Exception as e:
        logger.error(f"Error initializing config: {e}")
        return

    chai_db = None
    try:
        chai_db = ChaiDB()
    except Exception as e:
        logger.error(f"Failed to initialize ChaiDB: {e}")
        return  # Exit if DB connection fails

    url_data_to_insert: list[tuple[str, uuid.UUID, datetime, datetime]] = []
    all_inserted_data_for_dump: list[tuple[uuid.UUID, str, uuid.UUID]] = []
    processed_csv_rows = 0
    total_urls_prepared = 0

    try:
        with open(file_path, newline="", encoding="utf-8") as csvfile:
            reader = csv.reader(csvfile)
            header = next(reader, None)  # Skip header
            if not header:
                logger.warn("CSV file is empty or has no header.")
                return

            logger.log(f"CSV Header: {header}")  # Log the header for context

            for row in reader:
                processed_csv_rows += 1
                if not (len(row) >= 3):
                    logger.warn(f">3 cols at L{processed_csv_rows + 1}: {row}")
                    continue

                # Assuming import_id is row[0], source is row[1], homepage is row[2]
                # set the source data
                source_url = row[1].strip() if row[1] else None
                source_data = (source_url, url_type_source_id)

                # set the homepage data
                homepage_url = row[2].strip() if row[2] else None
                homepage_data = (homepage_url, url_type_homepage_id)

                # add to url_data_to_insert if valid and not in cache
                # also, update the cache
                urls_to_process = []
                if (
                    source_url
                    and source_url.lower() != "null"
                    and source_data not in cache
                ):
                    urls_to_process.append(source_data)
                    cache.add(source_data)
                if (
                    homepage_url
                    and homepage_url.lower() != "null"
                    and homepage_data not in cache
                ):
                    urls_to_process.append(homepage_data)
                    cache.add(homepage_data)

                for url_str, url_type_id in urls_to_process:
                    url_data_to_insert.append(
                        (
                            url_str,
                            url_type_id,
                            script_execution_time,
                            script_execution_time,
                        )
                    )
                    total_urls_prepared += 1

                # insert the data in batches
                if len(url_data_to_insert) >= batch_size:
                    results = chai_db.batch_insert_urls(url_data_to_insert, dump_output)
                    if dump_output and results:
                        all_inserted_data_for_dump.extend(results)
                    url_data_to_insert = []
                    logger.log(
                        f"Processed batch. Total CSV rows read: {processed_csv_rows}, Total URLs prepared: {total_urls_prepared}"
                    )

                if stop_at and processed_csv_rows >= stop_at:
                    logger.log(f"Reached stop limit of {stop_at} CSV rows.")
                    break

        # Process any remaining URLs in the buffer
        if url_data_to_insert:
            results = chai_db.batch_insert_urls(url_data_to_insert, dump_output)
            if dump_output and results:
                all_inserted_data_for_dump.extend(results)
            logger.log(
                f"Processed final batch. Total CSV rows read: {processed_csv_rows}, Total URLs prepared: {total_urls_prepared}"
            )

        if dump_output:
            with open(
                OUTPUT_CSV_FILENAME, "w", newline="", encoding="utf-8"
            ) as outfile:
                writer = csv.writer(outfile)
                writer.writerow(["id", "url", "url_type_id"])  # Header for output CSV
                writer.writerows(all_inserted_data_for_dump)
            logger.log(
                f"Dumped {len(all_inserted_data_for_dump)} records to {OUTPUT_CSV_FILENAME}"
            )

        logger.log(
            f"URL batch processing complete. Total CSV rows processed: {processed_csv_rows}. Total URLs prepared/processed: {total_urls_prepared}."
        )

    except FileNotFoundError:
        logger.error(f"Input CSV file not found: {file_path}")
    except csv.Error as e:
        logger.error(
            f"CSV reading error in {file_path} near line {reader.line_num}: {e}"
        )
    except psycopg2.Error as e:
        logger.error(f"A database error occurred: {e}")
        logger.exception()
    except Exception as e:
        logger.error(f"An unexpected error occurred: {e}")
        logger.exception()
    finally:
        if chai_db:
            chai_db.close()


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Batch insert URLs from a CSV file into the CHAI database."
    )
    parser.add_argument("file_path", help="Path to the input CSV file (e.g., data.csv)")
    parser.add_argument(
        "--batch-size",
        "-b",
        type=int,
        default=DEFAULT_BATCH_SIZE,
        help=f"Number of records to insert per batch (default: {DEFAULT_BATCH_SIZE})",
    )
    parser.add_argument(
        "--stop",
        "-s",
        type=int,
        help="Optional: stop processing after this many CSV rows.",
    )
    parser.add_argument(
        "--dump-output",
        "-d",
        action="store_true",
        help=f"If set, dump all inserted/updated (id, url, url_type_id) to {OUTPUT_CSV_FILENAME}",
    )

    args = parser.parse_args()

    script_start_time = datetime.now()
    main_logger = Logger("main_batch_url_loader")
    main_logger.log(f"Script started at {script_start_time.isoformat()}")

    process_urls_for_batch_insert(
        file_path=args.file_path,
        batch_size=args.batch_size,
        script_execution_time=script_start_time,  # Use a consistent time for the whole run
        dump_output=args.dump_output,
        stop_at=args.stop,
    )

    main_logger.log(
        f"Script finished. Total execution time: {datetime.now() - script_start_time}"
    )


================================================
FILE: scripts/chai-legacy-loader/copy_dependencies_no_thread.py
================================================
#!/usr/bin/env pkgx +python@3.11 uv run
import argparse
import io
import os
import uuid

import psycopg2
import psycopg2.errors

from core.config import Config, PackageManager
from core.logger import Logger

LEGACY_CHAI_DATABASE_URL = os.environ.get("LEGACY_CHAI_DATABASE_URL")
CHAI_DATABASE_URL = os.environ.get("CHAI_DATABASE_URL")
BATCH_SIZE = 20000
LEGACY_CHAI_PACKAGE_MANAGER_MAP: dict[PackageManager, str] = {
    PackageManager.NPM: "npm",
    PackageManager.CRATES: "crates",
    PackageManager.HOMEBREW: "brew",
    PackageManager.DEBIAN: "apt",
    PackageManager.PKGX: "pkgx",
}


class LegacyDB:
    """Handles all interactions with the legacy CHAI database."""

    def __init__(self, input_package_manager: PackageManager):
        """Initialize connection to the legacy database."""
        self.conn = psycopg2.connect(LEGACY_CHAI_DATABASE_URL)
        # Set autocommit to False for server-side cursors
        self.conn.set_session(autocommit=False)
        self.logger = Logger("legacy_db")
        self.logger.debug("Legacy database connection established")
        self.package_manager_name = LEGACY_CHAI_PACKAGE_MANAGER_MAP[
            input_package_manager
        ]

    def __del__(self):
        """Close connection when object is destroyed."""
        if hasattr(self, "conn") and self.conn:
            self.conn.close()

    def get_sql_content(self, filename: str) -> str:
        """Load SQL content from a file."""
        sql_file_path = os.path.join(
            os.path.dirname(os.path.abspath(__file__)), "sql", filename
        )
        with open(sql_file_path) as f:
            return f.read()

    def create_server_cursor(self, sql_file: str, cursor_name: str) -> None:
        """Create a server-side cursor for efficient data fetching.

        Inputs:
            sql_file: The name of the SQL file to load
            cursor_name: The name of the cursor to create
            package_manager_name: The name of the package manager whose legacy data we
                are fetching
        """
        query = self.get_sql_content(sql_file)

        # substitute $1 with self.package_manager_name
        query = query.replace("$1", f"'{self.package_manager_name}'")
        self.logger.debug(f"Query: {query}")

        # create a named server side cursor for retrieving data
        declare_stmt = f"DECLARE {cursor_name} CURSOR FOR {query}"

        # create a cursor to execute the declare statement
        with self.conn.cursor() as cursor:
            cursor.execute(declare_stmt)
            self.logger.debug(
                f"Created server-side cursor '{cursor_name}' for {sql_file}"
            )

    def fetch_batch(self, cursor_name: str, batch_size: int) -> list[tuple]:
        """Fetch a batch of records using the server-side cursor."""
        cursor = self.conn.cursor()
        cursor.execute(f"FETCH {batch_size} FROM {cursor_name}")
        batch = cursor.fetchall()
        self.logger.log(f"Fetched {len(batch)} records from cursor '{cursor_name}'")
        cursor.close()
        return batch

    def close_cursor(self, cursor_name: str) -> None:
        """Close a server-side cursor."""
        cursor = self.conn.cursor()
        cursor.execute(f"CLOSE {cursor_name}")
        self.logger.log(f"Closed server-side cursor '{cursor_name}'")
        cursor.close()


class ChaiDB:
    """Handles all interactions with the CHAI database."""

    def __init__(self, config: Config):
        """Initialize connection to the CHAI database."""
        self.logger = Logger("chai_db")
        self.config = config

        # connect to the database
        self.conn = psycopg2.connect(CHAI_DATABASE_URL)
        # Use autocommit=False for server-side cursors if needed within a transaction
        # self.conn.set_session(autocommit=False)
        self.logger.debug("CHAI database connection established")

        # create the cursor for general operations
        self.cursor = self.conn.cursor()
        self.logger.debug("CHAI database cursor created")

        # configure some variables
        self.legacy_dependency_columns = [
            "package_id",
            "dependency_id",
            # the below two are not available from the sources table in the legacy db
            # assuming everything is a runtime dependency and use the semver range *
            "dependency_type_id",
            "semver_range",
        ]
        # initialize package map
        self.package_map = self._get_package_map()
        self.logger.debug(
            f"{len(self.package_map)} {self.config.pm_config.package_manager} packages in CHAI"
        )

        # Load existing legacy dependencies to avoid duplicates
        self.processed_pairs = set()
        self._load_existing_dependencies()

    def _get_package_map(self) -> dict[str, uuid.UUID]:
        """Get a map of package import_ids to their UUIDs for the configured package
        manager"""
        query = """SELECT import_id, id 
            FROM packages 
            WHERE package_manager_id = %(pm_id)s
            AND import_id IS NOT NULL"""
        self.cursor.execute(query, {"pm_id": self.config.pm_config.pm_id})
        rows = self.cursor.fetchall()

        # check that we actually loaded packages for the specified manager
        if len(rows) == 0:
            raise ValueError(
                f"{self.config.pm_config.package_manager} packages not found in DB"
            )

        return {row[0]: row[1] for row in rows}

    def _load_existing_dependencies(self, batch_size: int = BATCH_SIZE) -> None:
        """
        Loads existing (package_id, dependency_id) pairs from the
        legacy_dependencies table into self.processed_pairs using a
        server-side cursor to handle potentially large datasets efficiently.
        """
        self.logger.log("Loading existing legacy dependencies...")
        query = "SELECT package_id, dependency_id FROM legacy_dependencies"
        cursor_name = "existing_deps_cursor"
        total_loaded = 0

        # Use a transaction context for the server-side cursor
        with self.conn, self.conn.cursor(name=cursor_name) as named_cursor:
            named_cursor.execute(query)
            while True:
                batch = named_cursor.fetchmany(batch_size)
                if not batch:
                    break
                # Convert batch of tuples to set for efficient update
                self.processed_pairs.update(batch)
                total_loaded += len(batch)
                if total_loaded % (batch_size * 20000) == 0:
                    self.logger.debug(
                        f"Loaded {total_loaded} existing dependency pairs..."
                    )

        self.logger.log(
            f"Finished loading {total_loaded} existing dependency pairs into memory."
        )

    def init_copy_expert(self) -> None:
        """Initialize a StringIO object to collect CSV data for copy operation"""
        self.csv_data = io.StringIO()
        self.columns_str = ", ".join(self.legacy_dependency_columns)
        self.logger.debug("Copy buffer initialized")

    def add_rows_to_copy_expert(self, rows: list[tuple]) -> int:
        """Add rows to the StringIO buffer for later COPY operation"""
        rows_added = 0
        for row in rows:
            package_id = self.package_map.get(row[0])
            dependency_id = self.package_map.get(row[1])

            # if package or dependency are not found, skip the row
            if not package_id or not dependency_id:
                # skipping because maybe the package or dependency is
                #  not in legacy chai
                #  marked as spam
                continue

            # if the pair has already been processed, skip the row
            if (package_id, dependency_id) in self.processed_pairs:
                continue

            # add the pair to the processed pairs
            self.processed_pairs.add((package_id, dependency_id))

            # get the dependency type and semver range
            # not available from the sources table in the legacy db
            # assume everything is a runtime dependency, and use the semver range *
            dependency_type_id = self.config.dependency_types.runtime
            semver_range = "*"

            csv_line = (
                f"{package_id},{dependency_id},{dependency_type_id},{semver_range}"
            )
            self.csv_data.write(csv_line + "\n")
            rows_added += 1

        return rows_added

    def add_rows_with_flush(self, rows: list[tuple], max_buffer_size=100000) -> int:
        """Add rows to the StringIO buffer for later COPY operation"""
        rows_added = self.add_rows_to_copy_expert(rows)
        self.logger.log(f"Added {rows_added} rows to the copy expert")

        # if the buffer is too large, flush it
        if self.csv_data.tell() > max_buffer_size:
            self.complete_copy_expert()
            # reinitialize the buffer
            self.init_copy_expert()

        return rows_added

    def complete_copy_expert(self):
        """Execute the COPY operation with collected data"""
        # Reset buffer position to start
        self.csv_data.seek(0)

        # Execute the COPY FROM operation
        try:
            self.cursor.copy_expert(
                f"COPY legacy_dependencies ({self.columns_str}) FROM STDIN WITH CSV",
                self.csv_data,
            )
            self.conn.commit()
            self.logger.log(f"{len(self.processed_pairs)} total rows copied")
        except psycopg2.errors.BadCopyFileFormat as e:
            self.logger.log(f"Error copying data to database: {e}")
            # write the csv data to a file
            with open("bad_copy_file.csv", "w") as f:
                f.write(self.csv_data.getvalue())
            self.conn.rollback()
            raise e


def main(
    logger: Logger,
    config: Config,
    input_package_manager: PackageManager,
    stop: int | None,
) -> None:
    legacy_db = LegacyDB(input_package_manager)
    chai_db = ChaiDB(config)

    # initialize the copy expert
    chai_db.init_copy_expert()

    # set up the legacy db
    cursor_name = "legacy_dependencies_cursor"
    legacy_db.create_server_cursor("dependencies.sql", cursor_name)

    logger.log("Starting dependency loop process")
    total_rows = 0
    try:
        while True:
            rows = legacy_db.fetch_batch(cursor_name, BATCH_SIZE)

            # break if we have no more rows
            if not rows:
                break

            # keep adding the rows to the copy expert
            rows_added = chai_db.add_rows_with_flush(rows)

            # update the total rows processed
            total_rows += rows_added

            # break if we have processed the stop number of rows
            if stop and total_rows >= stop:
                break

        # complete the copy expert
        logger.log("Completing copy expert for the last batch")
        chai_db.complete_copy_expert()

    except KeyboardInterrupt:
        logger.log("Keyboard interrupt detected")
        chai_db.complete_copy_expert()
        logger.log(f"Total rows processed: {total_rows}")

    finally:
        logger.log(f"Total rows processed: {total_rows}")
        legacy_db.close_cursor(cursor_name)
        legacy_db.conn.close()
        chai_db.cursor.close()
        chai_db.conn.close()
        logger.log("Database connections closed")


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--package-manager",
        type=PackageManager,
        choices=list(PackageManager),
        required=True,
    )
    parser.add_argument(
        "--stop",
        type=int,
        default=None,
        help="Stop after processing a certain number of rows",
    )
    args = parser.parse_args()

    input_package_manager: PackageManager = args.package_manager
    stop: int | None = args.stop
    logger = Logger("chai_legacy_loader")
    config = Config(input_package_manager)

    logger.log(f"Importing legacy dependencies for {args.package_manager}")
    main(
        logger,
        config,
        input_package_manager,
        stop,
    )


================================================
FILE: scripts/chai-legacy-loader/pkgx.yaml
================================================
dependencies:
  - python@3.11
  - postgresql.org@16
  - astral.sh/uv


================================================
FILE: scripts/chai-legacy-loader/sql/dependencies.sql
================================================
-- from old CHAI's structure, the sources table stores dependencies from package to 
-- package
-- the projects tables stores the package managers themselves, which is where we apply
-- the where clause
select s.start_id, s.end_id 
from public.sources s 
join public.projects p 
on s.start_id = p.id 
and $1 = any(p.package_managers)
;

================================================
FILE: scripts/chai-legacy-loader/sql/packages.sql
================================================
-- TODO: swap npm for $1, and update the scripts
select 
	concat('npm', '/', project_name) as "derived_id",
	project_name as "name", 
	id as "import_id"   
from projects 
where 
	'npm' = any(package_managers)
	and created_at < '2024-01-01'::timestamp -- before ITN
	and is_spam is false -- use legacy spam filter
;

================================================
FILE: scripts/chai-legacy-loader/sql/urls.sql
================================================
select 
	id as import_id,
	"source", 
	homepage 
from projects
where 
	'npm' = any(package_managers)
	and created_at < '2024-01-01'::timestamp -- before ITN
	and is_spam is false -- use legacy spam filter

================================================
FILE: scripts/npm-singleton/README.md
================================================
# NPM Singleton Package Loader

A utility script for loading a single NPM package and its metadata into the CHAI database.

## Purpose

This script allows you to:

1. Check if an NPM package exists in the CHAI database
2. Fetch package metadata from the NPM registry
3. Verify package URLs (homepage, repository, source)
4. Check dependencies and their existence in CHAI
5. Add the package to the CHAI database

## Usage

1. You must either run this script from the project root directory or specify
   `PYTHONPATH` to point to the root directory, since it imports modules from the `core` library.
2. You must also specify a `CHAI_DATABASE_URL` string

### Method 1: Using pkgx (recommended)

```bash
# Make the script executable
chmod +x scripts/npm-singleton/single.py

# Run with PYTHONPATH set
PYTHONPATH=. scripts/npm-singleton/single.py <package_name> [--dry-run]
```

### Method 2: Using [uv](https://github.com/astral-sh/uv) directly

```bash
PYTHONPATH=. uv run scripts/npm-singleton/single.py <package_name> [--dry-run]
```

## Arguments

- `package_name`: Name of the NPM package to load (required)
- `--dry-run`: Run in read-only mode without committing to the database

> [!NOTE]
> Strongly recommend running with the `--dry-run` flag first, to see what changes
> you're about to implement. The output looks like:

    ```bash
    ---------------------------------------------
    Package: @types/jest
    ---------------------------------------------
    ✅ @types/jest doesn't exist on CHAI
    ---------------------------------------------
    ✅ OK from NPM
    ---------------------------------------------
    ✅ has homepage: github.com/DefinitelyTyped/DefinitelyTyped
    ✅ has repository: github.com/DefinitelyTyped/DefinitelyTyped.git
    ✅ has source: github.com/DefinitelyTyped/DefinitelyTyped.git
    ---------------------------------------------
    Runtime Dependencies:
    ✅ expect / ^29.0.0 on CHAI
    ✅ pretty-format / ^29.0.0 on CHAI
    ---------------------------------------------
    Dev Dependencies:
    (none)
    ---------------------------------------------
    DRY RUN: Would create the following rows:
    - 1 Package
    - 3 URLs
    - 3 PackageURLs
    - 2 Runtime Dependencies
    - 0 Dev Dependencies
    ---------------------------------------------
    ℹ️ Dry run: No changes committed to database
    ```

> If a dependency doesn't exist on CHAI, you can just run the script for that
> dependency, and then run it for your main package

## Output

The script provides detailed status information about the package:

```
---------------------------------------------
Package: <package_name>
---------------------------------------------
❌ Exiting bc <package_name> exists on CHAI | ✅ <package_name> doesn't exist on CHAI
---------------------------------------------
❌ Exiting bc response error from registry | ✅ OK from NPM
---------------------------------------------
✅ has homepage: <homepage> | ❌ no homepage
✅ has repository: <repository> | ❌ no repository
✅ has source: <source> | ❌ no source
---------------------------------------------
✅ <dependency> / <semver> on CHAI | ❌ <dependency> / <semver> not on CHAI
... for each dependency
---------------------------------------------
```

In dry-run mode, the script will show what changes would be made without committing them to the database.

## Examples

Check a package without adding it to the database:

```bash
PYTHONPATH=. ./scripts/npm-singleton/single.py react --dry-run
```

Add a package to the database:

```bash
PYTHONPATH=. ./scripts/npm-singleton/single.py lodash
```

## Tasks

The below can be run using [xc](https://xcfile.dev), from within the
scripts/npm-singleton directory

### check

Env: PYTHONPATH=../..
Inputs: PACKAGE

```bash
./single.py $PACKAGE --dry-run
```

### add

Env: PYTHONPATH=../..
Inputs: PACKAGE

```bash
./single.py $PACKAGE
```


================================================
FILE: scripts/npm-singleton/pkgx.yaml
================================================
dependencies:
  xcfile.dev: "*"
  python.org: ^3.11


================================================
FILE: scripts/npm-singleton/single.py
================================================
#!/usr/bin/env pkgx +python@3.11 uv run --with requests==2.31.0 --with permalint==0.1.15
import argparse
import sys
from uuid import UUID, uuid4

import requests
from permalint import normalize_url

from core.config import Config, PackageManager
from core.db import DB
from core.models import URL, LegacyDependency, Package, PackageURL

NPM_API_URL = "https://registry.npmjs.org/{name}"


class ChaiDB(DB):
    def __init__(self):
        super().__init__("chai-singleton")

    def check_package_exists(self, derived_id: str) -> bool:
        with self.session() as session:
            return (
                session.query(Package).filter(Package.derived_id == derived_id).first()
                is not None
            )

    def get_package_by_derived_id(self, derived_id: str) -> Package:
        with self.session() as session:
            return (
                session.query(Package).filter(Package.derived_id == derived_id).first()
            )

    def load(
        self,
        pkg: Package,
        urls: list[URL],
        runtime_deps: list[LegacyDependency],
        dev_deps: list[LegacyDependency],
    ) -> None:
        """Load a package and its URLs into the database. Uses the same session to avoid
        transactional inconsistencies.

        Args:
            pkg: The package to load.
            urls: The URLs to load.
        """
        with self.session() as session:
            # Load the package first
            session.add(pkg)
            session.flush()  # to create the id
            pkg_id = pkg.id

            # Load the URLs
            for url in urls:
                session.add(url)
            session.flush()  # to create the id
            url_ids = [url.id for url in urls]

            # Create the package URL relationships
            for url_id in url_ids:
                session.add(PackageURL(package_id=pkg_id, url_id=url_id))

            # Create the legacy dependencies
            for dep in runtime_deps:
                session.add(dep)
            for dep in dev_deps:
                session.add(dep)
            session.commit()


def get_package_info(npm_package: str) -> tuple[bool, dict, str | None]:
    url = NPM_API_URL.format(name=npm_package)
    try:
        response = requests.get(url)
        if response.status_code != 200:
            return (
                False,
                {},
                f"Failed with status {response.status_code}: {response.text}",
            )
        return True, response.json(), None
    except Exception as e:
        return False, {}, f"Request failed: {e!s}"


def get_homepage(package_info: dict) -> tuple[bool, str | None]:
    try:
        return True, canonicalize(package_info["homepage"])
    except KeyError:
        return False, None
    except Exception as e:
        return False, str(e)


def get_repository_url(package_info: dict) -> tuple[bool, str | None]:
    try:
        return True, canonicalize(package_info["repository"]["url"])
    except KeyError:
        return False, None
    except Exception as e:
        return False, str(e)


def get_source_url(package_info: dict) -> tuple[bool, str | None]:
    try:
        repository_obj = package_info["repository"]
        if repository_obj["type"] == "git":
            return True, canonicalize(repository_obj["url"])
        else:
            return False, f"Repository is not a git URL: {repository_obj}"
    except KeyError:
        return False, None
    except Exception as e:
        return False, str(e)


def canonicalize(url: str) -> str:
    return normalize_url(url)


def get_latest_version(package_info: dict) -> tuple[bool, str | None]:
    try:
        dist_tags = package_info["dist-tags"]
        return True, dist_tags["latest"]
    except KeyError:
        return False, None


def get_version_info(package_info: dict, version: str) -> tuple[bool, dict | None]:
    try:
        return True, package_info["versions"][version]
    except KeyError:
        return False, None


def get_latest_version_dependencies(
    latest_version: dict,
) -> tuple[bool, dict[str, str]]:
    """Gets the dependencies from a version object from NPM's Registry API

    Returns:
      - a tuple of (success, dependencies) where dependencies is a dictionary
        keyed by dependency, with semver range as the value
    """
    try:
        deps = latest_version.get("dependencies", {})
        return True, deps
    except Exception:
        return False, {}


def get_latest_version_dev_dependencies(
    latest_version: dict,
) -> tuple[bool, dict[str, str]]:
    """Gets the development dependencies from a version object from NPM's Registry API

    Returns:
      - a tuple of (success, dependencies) where dependencies is a dictionary
        keyed by dependency, with semver range as the value
    """
    try:
        deps = latest_version.get("devDependencies", {})
        return True, deps
    except Exception:
        return False, {}


def check_dependencies_on_chai(
    db: ChaiDB, deps: dict[str, str]
) -> list[tuple[str, str, bool]]:
    """Check if dependencies exist on CHAI

    Args:
        db: ChaiDB instance
        deps: Dependencies to check

    Returns:
        List of tuples (dependency_name, semver_range, exists_on_chai)
    """
    results = []
    for dep_name, dep_range in deps.items():
        derived_id = f"npm/{dep_name}"
        exists = db.get_package_by_derived_id(derived_id) is not None
        results.append((dep_name, dep_range, exists))

    return results


def generate_url(url_type_id: UUID, url: str) -> URL:
    return URL(id=uuid4(), url=url, url_type_id=url_type_id)


def generate_legacy_dependencies(
    db: ChaiDB, pkg: Package, deps: dict[str, str], dependency_type_id: UUID
) -> tuple[list[LegacyDependency], list[tuple[str, str, bool]]]:
    legacy_deps: list[LegacyDependency] = []
    dep_status: list[tuple[str, str, bool]] = []

    for dep_name, dep_range in deps.items():
        derived_id = f"npm/{dep_name}"
        chai_dep: Package | None = db.get_package_by_derived_id(derived_id)
        exists = chai_dep is not None
        dep_status.append((dep_name, dep_range, exists))

        if not exists:
            continue

        dependency = LegacyDependency(
            package_id=pkg.id,
            dependency_id=chai_dep.id,
            dependency_type_id=dependency_type_id,
            semver_range=dep_range,
        )
        legacy_deps.append(dependency)

    return legacy_deps, dep_status


def print_status_report(
    package_name: str,
    exists_on_chai: bool,
    npm_response_ok: bool,
    npm_error: str | None,
    homepage_result: tuple[bool, str | None],
    repository_result: tuple[bool, str | None],
    source_result: tuple[bool, str | None],
    runtime_deps: list[tuple[str, str, bool]],
    dev_deps: list[tuple[str, str, bool]],
    changes_summary: dict[str, int] | None = None,
    dry_run: bool = False,
):
    """Print a formatted status report of the package processing"""
    divider = "-" * 45

    print(divider)
    print(f"Package: {package_name}")
    print(divider)

    if exists_on_chai:
        print(f"❌ Exiting bc {package_name} exists on CHAI")
    else:
        print(f"✅ {package_name} doesn't exist on CHAI")

    print(divider)

    if npm_response_ok:
        print("✅ OK from NPM")
    else:
        print(f"❌ Exiting bc response error from registry: {npm_error}")

    print(divider)

    homepage_ok, homepage = homepage_result
    if homepage_ok:
        print(f"✅ has homepage: {homepage}")
    else:
        print("❌ no homepage")

    repository_ok, repository = repository_result
    if repository_ok:
        print(f"✅ has repository: {repository}")
    else:
        print("❌ no repository")

    source_ok, source = source_result
    if source_ok:
        print(f"✅ has source: {source}")
    else:
        print("❌ no source")

    print(divider)
    print("Runtime Dependencies:")
    if not runtime_deps:
        print("(none)")
    else:
        for dep, semver, exists in runtime_deps:
            if exists:
                print(f"✅ {dep} / {semver} on CHAI")
            else:
                print(f"❌ {dep} / {semver} not on CHAI")

    print(divider)
    print("Dev Dependencies:")
    if not dev_deps:
        print("(none)")
    else:
        for dep, semver, exists in dev_deps:
            if exists:
                print(f"✅ {dep} / {semver} on CHAI")
            else:
                print(f"❌ {dep} / {semver} not on CHAI")

    print(divider)

    if changes_summary:
        if dry_run:
            print("DRY RUN: Would create the following rows:")
        else:
            print("Created the following rows:")

        for entity_type, count in changes_summary.items():
            print(f"  - {count} {entity_type}")
    else:
        print("Won't even create any rows")

    print(divider)


def process_package(package_name: str, dry_run: bool = False) -> bool:
    """Process a package and return True if successful, False otherwise"""
    config = Config(PackageManager.NPM)
    chai_db = ChaiDB()

    # Check if package exists
    derived_id = f"npm/{package_name}"
    exists_on_chai = chai_db.check_package_exists(derived_id)

    # Get Package Info from NPM
    npm_response_ok, package_info, npm_error = get_package_info(package_name)

    # Check URLs
    homepage_result = get_homepage(package_info) if npm_response_ok else (False, None)
    repository_result = (
        get_repository_url(package_info) if npm_response_ok else (False, None)
    )
    source_result = get_source_url(package_info) if npm_response_ok else (False, None)

    # Check latest version
    latest_version_result = (
        get_latest_version(package_info) if npm_response_ok else (False, None)
    )

    # Get version info
    version_info_result = (False, None)
    if npm_response_ok and latest_version_result[0]:
        version_info_result = get_version_info(package_info, latest_version_result[1])

    # Get dependencies
    runtime_deps_result = (False, {})
    dev_deps_result = (False, {})
    if npm_response_ok and version_info_result[0]:
        runtime_deps_result = get_latest_version_dependencies(version_info_result[1])
        dev_deps_result = get_latest_version_dev_dependencies(version_info_result[1])

    # Check dependencies on CHAI
    runtime_deps_status = check_dependencies_on_chai(chai_db, runtime_deps_result[1])
    dev_deps_status = check_dependencies_on_chai(chai_db, dev_deps_result[1])

    # Create entities to add to database if not in dry run mode and all checks pass
    changes_summary = {
        "Package": 1,
        "URLs": 0,
        "PackageURLs": 0,
        "Runtime Dependencies": 0,
        "Dev Dependencies": 0,
    }

    # Early exit if necessary conditions aren't met
    if exists_on_chai or not npm_response_ok:
        print_status_report(
            package_name,
            exists_on_chai,
            npm_response_ok,
            npm_error,
            homepage_result,
            repository_result,
            source_result,
            runtime_deps_status,
            dev_deps_status,
            None,
            dry_run,
        )
        return False

    # Create Package
    derived_id = f"npm/{package_name}"
    package_manager_id = config.pm_config.pm_id
    import_id = f"npm-singleton/{package_name}"
    readme = package_info.get("readme", "")

    pkg = Package(
        id=uuid4(),
        name=package_name,
        derived_id=derived_id,
        package_manager_id=package_manager_id,
        import_id=import_id,
        readme=readme,
    )

    # URLs
    urls = []
    if homepage_result[0]:
        urls.append(
            generate_url(config.url_types.homepage, normalize_url(homepage_result[1]))
        )
    if repository_result[0]:
        urls.append(
            generate_url(
                config.url_types.repository, normalize_url(repository_result[1])
            )
        )
    if source_result[0]:
        urls.append(
            generate_url(config.url_types.source, normalize_url(source_result[1]))
        )

    changes_summary["URLs"] = len(urls)
    changes_summary["PackageURLs"] = len(urls)

    # Dependencies
    runtime_deps, _ = generate_legacy_dependencies(
        chai_db, pkg, runtime_deps_result[1], config.dependency_types.runtime
    )
    dev_deps, _ = generate_legacy_dependencies(
        chai_db, pkg, dev_deps_result[1], config.dependency_types.development
    )

    changes_summary["Runtime Dependencies"] = len(runtime_deps)
    changes_summary["Dev Dependencies"] = len(dev_deps)

    # Print status report
    print_status_report(
        package_name,
        exists_on_chai,
        npm_response_ok,
        npm_error,
        homepage_result,
        repository_result,
        source_result,
        runtime_deps_status,
        dev_deps_status,
        changes_summary,
        dry_run,
    )

    # Load the package into the database (unless in dry run mode)
    if not dry_run:
        chai_db.load(pkg, urls, runtime_deps, dev_deps)
        print("✅ Successfully committed changes to database")
    else:
        print("🌵 Dry run: No changes committed to database")

    return True


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Load a single NPM package by name into CHAI"
    )
    parser.add_argument("name", help="Name of the NPM package")
    parser.add_argument(
        "--dry-run",
        action="store_true",
        help="Check package without committing to database",
    )
    args = parser.parse_args()

    success = process_package(args.name, args.dry_run)
    if not success:
        sys.exit(1)


================================================
FILE: scripts/package_to_package/package_dependencies.py
================================================
#! /usr/bin/env pkgx +python@3.11 uv run
import argparse
import re
import sys
from typing import Any

from packaging import version as packaging_version
from sqlalchemy.dialects.postgresql import insert as pg_insert
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session, sessionmaker

from core.config import Config, PackageManager
from core.db import DB
from core.logger import Logger
from core.models import DependsOn, LegacyDependency, Package, Version

# --- Constants ---
INSERT_BATCH_SIZE = 5000
DEFAULT_SEMVER_RANGE = "*"

logger = Logger("package_dependency_migration")

# --- Helper Functions ---


def preprocess_version_string(version_str: str) -> str:
    """
    Transforms known non-PEP440 version strings into a parseable format.
    Handles specific date formats, build tags, and common non-standard separators.
    """
    # Replace underscores between digits or letters/digits
    version_str = re.sub(r"(?<=[a-zA-Z\d])_(?=[a-zA-Z\d])", ".", version_str)

    # === Pattern Matching & Transformation (Order Matters!) ===

    # --- Specific Patterns First ---
    # Handle X.Y.Z-M<number> -> X.Y.Z+M<number> (Milestone)
    match_milestone = re.fullmatch(r"(\d+(\.\d+)*)-M(\d+)", version_str)
    if match_milestone:
        return f"{match_milestone.group(1)}+M{match_milestone.group(3)}"

    # Handle X.Y.Z-<string>.<number> -> X.Y.Z+<string>.<number> (Vendor Build)
    match_vendor_build = re.fullmatch(r"(\d+(\.\d+)+)-([a-zA-Z]+)\.(\d+)", version_str)
    if match_vendor_build:
        return f"{match_vendor_build.group(1)}+{match_vendor_build.group(3)}.{match_vendor_build.group(4)}"

    # Handle X.Y.Z-git<build> -> X.Y.Z+git<build>
    match_git_build = re.fullmatch(r"(\d+(\.\d+)+)-(git[\da-zA-Z]+)", version_str)
    if match_git_build:
        return f"{match_git_build.group(1)}+{match_git_build.group(2)}"

    # Handle X.Y.Z-p<number> / X.Y.Zp<number> -> X.Y.Z+p<number>
    match_p_patch1 = re.fullmatch(r"(\d+(\.\d+)+)-p(\d+)", version_str)
    if match_p_patch1:
        return f"{match_p_patch1.group(1)}+p{match_p_patch1.group(3)}"
    match_p_patch2 = re.fullmatch(r"(\d+(\.\d+)+)p(\d+)", version_str)
    if match_p_patch2:
        return f"{match_p_patch2.group(1)}+p{match_p_patch2.group(3)}"

    # --- Date Formats ---
    # YYYY-MM-DD -> YYYY.MM.DD
    if re.fullmatch(r"\d{4}-\d{2}-\d{2}", version_str):
        return version_str.replace("-", ".")

    # YYYY.MM.DD.<commit_hash> -> YYYY.MM.DD+commit_hash
    # TODO: Hashes on the same date are compared lexicographically, which might not
    # reflect actual order.
    match_dot_date_hash = re.fullmatch(
        r"(\d{4}\.\d{2}\.\d{2})\.([a-zA-Z0-9]+)", version_str
    )
    if match_dot_date_hash:
        # Ensure the suffix isn't just a standard version number or time-like
        suffix = match_dot_date_hash.group(2)
        try:
            # If packaging can parse "0.<suffix>", it's likely not a hash
            packaging_version.parse(f"0.{suffix}")
            # Also check if it looks like HH.MM.SS
            if not re.fullmatch(r"\d{2}\.\d{2}\.\d{2}", suffix):
                return f"{match_dot_date_hash.group(1)}+{suffix}"  # Treat as hash
        except packaging_version.InvalidVersion:
            return f"{match_dot_date_hash.group(1)}+{suffix}"  # Treat as hash
        except Exception:
            return f"{match_dot_date_hash.group(1)}+{suffix}"  # Treat as hash

    # YYYYMMDDTHHMMSS -> YYYYMMDD.HHMMSS
    match_ymdt_compact = re.fullmatch(r"(\d{8})T(\d{6})", version_str)
    if match_ymdt_compact:
        return f"{match_ymdt_compact.group(1)}.{match_ymdt_compact.group(2)}"

    # YYYY.MM.DD-HH.MM.SS -> YYYY.MM.DD+HHMMSS
    match_ymd_time_hyphen = re.fullmatch(
        r"(\d{4}\.\d{2}\.\d{2})-(\d{2}\.\d{2}\.\d{2})", version_str
    )
    if match_ymd_time_hyphen:
        time_part = match_ymd_time_hyphen.group(2).replace(".", "")
        return f"{match_ymd_time_hyphen.group(1)}+{time_part}"

    # ISO 8601 subset: YYYY-MM-DDTHH-MM-SSZ -> YYYY.MM.DD+HHMMSSZ
    match_iso_subset = re.fullmatch(
        r"(\d{4})-(\d{2})-(\d{2})T(\d{2})-(\d{2})-(\d{2})Z", version_str
    )
    if match_iso_subset:
        date_part = f"{match_iso_subset.group(1)}.{match_iso_subset.group(2)}.{match_iso_subset.group(3)}"
        time_part = f"{match_iso_subset.group(4)}{match_iso_subset.group(5)}{match_iso_subset.group(6)}Z"
        return f"{date_part}+{time_part}"

    # YYYY_MM_DD.commit_hash -> YYYY.MM.DD+commit_hash
    match_commit_hash = re.fullmatch(
        r"(\d{4}_\d{2}_\d{2})\.([a-zA-Z0-9]+)", version_str
    )
    if match_commit_hash:
        return f"{match_commit_hash.group(1)}+{match_commit_hash.group(2)}"

    # <datestamp>-<string|version> -> <datestamp>+<string|version>
    match_date_suffix = re.fullmatch(r"(\d{8})-?(.*)", version_str)
    if match_date_suffix and match_date_suffix.group(2):  # Ensure there is a suffix
        # Check if suffix looks like a simple version number itself,
        # otherwise treat as string
        suffix = match_date_suffix.group(2)
        # Normalize suffix by removing dots if it looks like a version part
        # This helps comparison e.g., update1 vs 3.1 -> update1 vs 31
        normalized_suffix = suffix.replace(".", "")
        return f"{match_date_suffix.group(1)}+{normalized_suffix}"

    # --- More General Build/Patch Identifiers ---
    # Handle X.Y.Z.v<build> -> X.Y.Z+v<build>
    match_v_build = re.fullmatch(r"(\d+(\.\d+)+)\.v(.*)", version_str)
    if match_v_build:
        return f"{match_v_build.group(1)}+v{match_v_build.group(3)}"

    # Handle X.Yrel.<number> -> X.Y+rel.<number>
    match_rel_build = re.fullmatch(r"(\d+(\.\d+)+)rel\.(.*)", version_str)
    if match_rel_build:
        return f"{match_rel_build.group(1)}+rel.{match_rel_build.group(3)}"

    # Handle X.Yga<number> -> X.Y+ga<number>
    match_ga_build = re.fullmatch(r"(\d+(\.\d+)+)ga(\d+)", version_str)
    if match_ga_build:
        return f"{match_ga_build.group(1)}+ga{match_ga_build.group(3)}"

    # Handle <major>-<build> (comes after more specific hyphenated patterns)
    match_major_build = re.fullmatch(r"(\d+)-([\da-zA-Z]+)", version_str)
    if match_major_build:
        return f"{match_major_build.group(1)}+{match_major_build.group(2)}"

    # Handle r<number> -> 0+r<number>
    match_revision = re.fullmatch(r"r(\d+)", version_str)
    if match_revision:
        return f"0+r{match_revision.group(1)}"

    # Handle X.Y.Z...<letter_suffix> -> X.Y.Z...+suffix (openssl@1.1.1w)
    match_version_letter_suffix = re.fullmatch(r"(\d+(\.\d+)+)([a-zA-Z]+)", version_str)
    if match_version_letter_suffix:
        base_version_part = match_version_letter_suffix.group(1)
        if base_version_part.count(".") > 0:  # Ensures at least X.Y.Z format
            return f"{match_version_letter_suffix.group(1)}+{match_version_letter_suffix.group(3)}"

    # Handle X.Y<single_letter_suffix> / X.Y<two_letter_suffix> -> X.Y+suffix
    match_letter_suffix = re.fullmatch(r"(\d+\.\d+)([a-zA-Z]{1,2})", version_str)
    if match_letter_suffix:
        return f"{match_letter_suffix.group(1)}+{match_letter_suffix.group(2)}"

    # Handle leading 'p' if it looks like p<version>
    if version_str.startswith("p") and re.match(r"p\d", version_str):
        potential_version = version_str[1:]
        try:
            packaging_version.parse(potential_version)
            return potential_version
        except packaging_version.InvalidVersion:
            pass

    # --- Fallback ---
    return version_str


def get_latest_version_info(versions: list[Version]) -> Version | None:
    """
    Identifies the latest version from a list using packaging.version for robust parsing
    unless there is only one version provided.

    Args:
        versions: A list of Version objects for a single package.

    Returns:
        - None if the list is empty, or;
        - The single Version object if only one is provided, or;
        - The Version object corresponding to the latest parseable version.
    """
    # Handle empty list
    if not versions:
        return None

    # If there's only one version, return it directly without parsing
    if len(versions) == 1:
        return versions[0]

    # Proceed with parsing and comparison if more than one version exists
    latest_parsed_version = None
    latest_version_obj = None

    for version_obj in versions:
        original_version_str = version_obj.version
        preprocessed_str = preprocess_version_string(original_version_str)
        try:
            current_parsed_version = packaging_version.parse(preprocessed_str)
            if (
                latest_parsed_version is None
                or current_parsed_version > latest_parsed_version
            ):
                latest_parsed_version = current_parsed_version
                latest_version_obj = version_obj
        except packaging_version.InvalidVersion as e_invalid:
            logger.warn(
                f"Invalid version: '{original_version_str}' -> '{preprocessed_str}' -> {e_invalid}"
            )
            continue
        except Exception as e_general:
            logger.error(
                f"Unexpected error: '{original_version_str}' -> '{preprocessed_str}' -> {e_general}"
            )
            continue

    # If no versions were successfully processed
    if latest_version_obj is None:
        import_id = versions[0].import_id
        versions_str = ", ".join([v.version for v in versions])
        logger.warn(f"No versions for {import_id}: {versions_str}")

    return latest_version_obj


def insert_legacy_dependencies(
    session: Session, data_batch: list[dict[str, Any]]
) -> None:
    """
    Inserts a batch of legacy dependency records into the database,
    ignoring duplicates based on the (package_id, dependency_id) unique constraint.

    Args:
        session: The SQLAlchemy session object.
        data_batch: A list of dictionaries, each representing a LegacyDependency row.
    """
    if not data_batch:
        return

    try:
        # Get the target table object
        legacy_table = LegacyDependency.__table__

        # Construct the PostgreSQL INSERT...ON CONFLICT DO NOTHING statement
        stmt = pg_insert(legacy_table).values(data_batch)
        # Specify the columns involved in the unique constraint
        # The constraint name 'uq_package_dependency' is defined in the model
        stmt = stmt.on_conflict_do_nothing(
            index_elements=["package_id", "dependency_id"]
        )

        # Execute the statement
        session.execute(stmt)
        session.commit()

    except IntegrityError as e:
        logger.error(f"Database Integrity Error during insert: {e}")
        session.rollback()
        raise e
    except Exception as e:
        logger.error(f"An unexpected error occurred during bulk insert: {e}")
        session.rollback()
        raise e


def process_package_dependencies(config: Config, session: Session) -> None:
    legacy_deps_to_insert: list[dict[str, Any]] = []
    total_packages_processed = 0
    total_dependencies_found = 0
    default_dependency_type_id = config.dependency_types.runtime

    logger.log(f"Starting migration for package manager ID: {config.pm_config.pm_id}")

    # --- Fetch ALL packages for the manager ---
    logger.log("Fetching all packages for the specified manager...")
    all_packages: list[Package] = (
        session.query(Package)
        .filter(Package.package_manager_id == config.pm_config.pm_id)
        .all()
    )
    logger.log(f"Fetched {len(all_packages)} packages.")

    # --- Process all fetched packages ---
    for pkg in all_packages:
        total_packages_processed += 1

        # debug
        if total_packages_processed % 1000 == 0:
            logger.debug(
                f"Processed {total_packages_processed}/{len(all_packages)} packages..."
            )

        versions = session.query(Version).filter(Version.package_id == pkg.id).all()

        # skip if no versions
        if not versions:
            continue

        # grab the latest version
        latest_version = get_latest_version_info(versions)
        if latest_version is None:
            continue

        # grab the dependencies for the latest version
        dependencies = (
            session.query(DependsOn)
            .filter(DependsOn.version_id == latest_version.id)
            .all()
        )

        # construct the load object
        for dependency in dependencies:
            dep_data = {
                "package_id": pkg.id,
                "dependency_id": dependency.dependency_id,
                "dependency_type_id": dependency.dependency_type_id
                or default_dependency_type_id,
                "semver_range": dependency.semver_range or DEFAULT_SEMVER_RANGE,
            }
            legacy_deps_to_insert.append(dep_data)
            total_dependencies_found += 1

        # --- Insert if batch is full ---
        if len(legacy_deps_to_insert) >= INSERT_BATCH_SIZE:
            logger.log(f"Reached insert batch size ({INSERT_BATCH_SIZE}). Inserting...")
            insert_legacy_dependencies(session, legacy_deps_to_insert)
            legacy_deps_to_insert = []

    # --- Final Insert ---
    if legacy_deps_to_insert:
        logger.log(
            f"Inserting final batch of {len(legacy_deps_to_insert)} dependency records."
        )
        insert_legacy_dependencies(session, legacy_deps_to_insert)

    logger.log("--- Migration Summary ---")
    logger.log(f"Total packages processed: {total_packages_processed}")
    logger.log(f"Total dependencies found: {total_dependencies_found}")
    logger.log("Migration process completed.")


# --- Main Execution ---

if __name__ == "__main__":
    desc = """Migrate version-specific dependencies to package-level dependencies based 
    on the latest version."""
    parser = argparse.ArgumentParser(description=desc)
    parser.add_argument(
        "--package-manager",
        type=lambda pm: PackageManager[pm.upper()],
        choices=list(PackageManager),
        required=True,
        help="The package manager to process (e.g., NPM, CRATES).",
    )

    args = parser.parse_args()

    logger.log(
        f"Starting package dependency migration for: {args.package_manager.name}"
    )

    SessionLocal = None
    try:
        config = Config(args.package_manager)
        db = DB("db_logger")
        SessionLocal = sessionmaker(bind=db.engine)

        with SessionLocal() as session:
            process_package_dependencies(config, session)

    except Exception as e:
        logger.error(f"An critical error occurred: {e}")
        sys.exit(1)
    finally:
        logger.log("Script finished.")


================================================
FILE: scripts/upgrade_canons/.gitignore
================================================
*.csv

================================================
FILE: scripts/upgrade_canons/README.md
================================================
# Upgrade Canons Scripts

Collection of scripts for managing canonical URLs and Canon IDs in CHAI database.

## Scripts Overview

| Script                     | Purpose                                                            | Usage                                                         | Sample Output                               |
| -------------------------- | ------------------------------------------------------------------ | ------------------------------------------------------------- | ------------------------------------------- |
| `main.py`                  | Creates canonical URL entries for non-standardized homepage URLs   | `./main.py --homepage-id <uuid> [--dry-run]`                  | `✅ Inserted 45678 URLs, 52341 PackageURLs` |
| `registered_projects.py`   | Updates Canon IDs for registered projects to restore old canon IDs | `cat canon_ids.txt \| ./registered_projects.py [--dry-run]`   | `✅ Success: 150`<br>`❌ Failure: 25`       |
| `create_deleted_canons.py` | Creates canons for registered projects that were deleted           | `./create_deleted_canons.py --csv-file input.csv [--dry-run]` | `✅ Success: 75`<br>`❌ Failure: 12`        |

## Requirements

- pkgx (or uv)
- CHAI_DATABASE_URL environment variable
- Python dependencies: `psycopg2==2.9.10`, `permalint==0.1.14`

## Common Options

- `--dry-run`: Show what would be done without making changes
- Input failures are written to CSV files for review

## Database Schema Dependencies

Scripts interact with these tables:

- `urls`, `url_types`, `package_urls`
- `canons`, `canon_packages`, `canon_packages_old`
- `tea_ranks`, `packages`


================================================
FILE: scripts/upgrade_canons/create_deleted_canons.py
================================================
#!/usr/bin/env pkgx uv run

import argparse
import csv
import sys
from uuid import UUID

from scripts.upgrade_canons.db import DB


def read_package_data_from_csv(filename: str) -> list[tuple[str, UUID]]:
    """Read package names and canon IDs from CSV file and return as list of tuples."""
    package_data = []
    try:
        with open(filename, newline="") as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                try:
                    package_name = row["package_name"]
                    canon_id = UUID(row["canon_id"])
                    package_data.append((package_name, canon_id))
                except (ValueError, KeyError) as e:
                    print(
                        f"Warning: Invalid row in CSV '{row}': {e}",
                        file=sys.stderr,
                    )
    except FileNotFoundError:
        print(f"Error: File '{filename}' not found", file=sys.stderr)
        sys.exit(1)
    return package_data


def process_deleted_package(
    db: DB, package_name: str, target_canon_id: UUID, dry_run: bool
) -> tuple[bool, str]:
    """
    Process a single package name for deleted registered projects.
    Returns (success, reason) tuple.
    """
    # Step 1: Prepend 'npm/' to the name to create derived_id
    derived_id = f"npm/{package_name}"

    # Step 2: Search by derived_id to get the package_id
    db.cursor.execute(
        """
        SELECT id 
        FROM packages 
        WHERE derived_id = %s
    """,
        (derived_id,),
    )

    package_result = db.cursor.fetchone()
    if not package_result:
        return False, "could not find derived_id"

    package_id = package_result[0]

    # Step 3: Join to canon_packages to retrieve the current canon_id
    db.cursor.execute(
        """
        SELECT canon_id 
        FROM canon_packages 
        WHERE package_id = %s
    """,
        (package_id,),
    )

    current_result = db.cursor.fetchone()
    if not current_result:
        return False, "could not find current canon_id"

    current_canon_id = current_result[0]

    if dry_run:
        print(
            f"DRY RUN: Would update canon_id {current_canon_id} to {target_canon_id} for package {derived_id} (package_id: {package_id})"
        )
        return True, ""

    try:
        # Run the three update statements using target_canon_id from CSV
        # 1. Update canons table
        db.cursor.execute(
            """
            UPDATE canons
            SET id = %s
            WHERE id = %s
        """,
            (target_canon_id, current_canon_id),
        )

        # 2. Update canon_packages table
        db.cursor.execute(
            """
            UPDATE canon_packages
            SET canon_id = %s
            WHERE canon_id = %s
        """,
            (target_canon_id, current_canon_id),
        )

        # 3. Update tea_ranks table
        db.cursor.execute(
            """
            UPDATE tea_ranks
            SET canon_id = %s
            WHERE canon_id = %s
        """,
            (target_canon_id, current_canon_id),
        )

        return True, ""

    except Exception as e:
        print(
            f"Error updating canon_id for package {package_name}: {e}", file=sys.stderr
        )
        return False, f"database error: {e!s}"


def write_failures_csv(
    failures: list[tuple[str, str]], filename: str = "deleted_canons_failures.csv"
):
    """Write failures to a CSV file."""
    with open(filename, "w", newline="") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["package_name", "reason"])
        for package_name, reason in failures:
            writer.writerow([package_name, reason])


def main():
    parser = argparse.ArgumentParser(
        description="Create canons for registered projects that were deleted"
    )
    parser.add_argument(
        "--dry-run",
        action="store_true",
        help="Show what would be done without making changes",
    )
    parser.add_argument(
        "--csv-file",
        type=str,
        required=True,
        help="CSV file containing package_name and canon_id columns",
    )
    args = parser.parse_args()

    # Read package data from CSV
    package_data = read_package_data_from_csv(args.csv_file)

    if not package_data:
        print("No package data provided", file=sys.stderr)
        sys.exit(1)

    print(
        f"Processing {len(package_data)} package records for deleted registered projects..."
    )

    # Initialize database connection
    db = DB()

    success_count = 0
    failure_count = 0
    failures = []

    try:
        for package_name, target_canon_id in package_data:
            success, reason = process_deleted_package(
                db, package_name, target_canon_id, args.dry_run
            )

            if success:
                success_count += 1
            else:
                failure_count += 1
                failures.append((package_name, reason))
                print(f"Warning: Failed to process package {package_name}: {reason}")

        # Commit changes if not dry run
        if not args.dry_run and success_count > 0:
            db.conn.commit()
            print("Database changes committed.")

        # Write failures to CSV if any
        if failures:
            write_failures_csv(failures)
            print("Failures written to deleted_canons_failures.csv")

    finally:
        db.close()

    # Print final summary
    print("--------------------------------------------------")
    print(f"✅ Success: {success_count}")
    print(f"❌ Failure: {failure_count}")
    print("--------------------------------------------------")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/upgrade_canons/db.py
================================================
from collections import defaultdict
from os import getenv
from uuid import UUID

import psycopg2
from psycopg2.extras import execute_values, register_uuid
from psycopg2.sql import SQL, Identifier

from scripts.upgrade_canons.structs import URL, PackageURL

CHAI_DATABASE_URL = getenv("CHAI_DATABASE_URL")


class DB:
    def __init__(self):
        if not CHAI_DATABASE_URL:
            raise Exception("CHAI_DATABASE_URL is not set")

        self.conn = psycopg2.connect(CHAI_DATABASE_URL)
        self.cursor = self.conn.cursor()
        register_uuid(self.conn)

    def get_urls_by_type(
        self, url_type_name: str
    ) -> tuple[set[str], dict[UUID, list[str]]]:
        """
        Returns a set of ALL URLs of the specified type (including orphans), and a map
        of package ID to list of URL strings for URLs that are attached to packages

        Args:
            url_type_name: The name of the URL type to filter by (e.g., 'homepage',
            'repository', 'source')
        """
        self.cursor.execute(
            """
            SELECT 
                u.url, 
                pu.package_id
            FROM urls u 
            JOIN url_types ut ON ut.id = u.url_type_id 
            LEFT JOIN package_urls pu ON pu.url_id = u.id 
            WHERE 
                ut.name = %s;""",
            (url_type_name,),
        )

        package_url_map: dict[UUID, list[str]] = defaultdict(list)
        all_urls: set[str] = set()

        for url, package_id in self.cursor.fetchall():
            all_urls.add(url)  # Add all URLs (including orphans)
            if (
                package_id is not None
            ):  # Only add to package map if attached to a package
                package_url_map[package_id].append(url)

        return all_urls, package_url_map

    def db_execute_values(
        self, table_name: str, columns: list[str], values: list[tuple]
    ):
        query = (
            SQL("INSERT INTO {table_name} ({columns}) VALUES %s")
            .format(
                table_name=Identifier(table_name),
                columns=SQL(", ").join(Identifier(column) for column in columns),
            )
            .as_string(self.conn)
        )
        try:
            execute_values(self.cursor, query, values)
            print(f"Inserted {len(values)} rows into {table_name}")
        except Exception as e:
            print(f"Error inserting {table_name}: {e}")
            raise

    def ingest(
        self,
        urls_to_add: list[URL],
        package_urls_to_add: list[PackageURL],
        dry_run: bool,
    ):
        """
        inserts into the db using psycopg2's execute_values

        execute_values expects the data to be formatted as a list of tuples
        """
        if urls_to_add:
            table_name = "urls"
            columns = ["id", "url", "url_type_id", "created_at", "updated_at"]
            values = [
                (url.id, url.url, url.url_type_id, url.created_at, url.updated_at)
                for url in urls_to_add
            ]
            self.db_execute_values(table_name, columns, values)

        if package_urls_to_add:
            table_name = "package_urls"
            columns = ["id", "package_id", "url_id", "created_at", "updated_at"]
            values = [
                (
                    package_url.id,
                    package_url.package_id,
                    package_url.url_id,
                    package_url.created_at,
                    package_url.updated_at,
                )
                for package_url in package_urls_to_add
            ]
            self.db_execute_values(table_name, columns, values)

        if not dry_run:
            self.conn.commit()

    def close(self):
        self.cursor.close()
        self.conn.close()

    def get_canons_by_url_ids(self, url_ids: list[UUID]) -> list[tuple[UUID, UUID]]:
        if not url_ids:
            return []

        # Use unnest to properly handle UUID array comparison
        placeholders = ",".join(["%s"] * len(url_ids))
        self.cursor.execute(
            f"""
            SELECT id, url_id FROM canons WHERE url_id IN ({placeholders});
            """,
            url_ids,
        )
        return self.cursor.fetchall()


================================================
FILE: scripts/upgrade_canons/delete_non_canonical_urls.py
================================================
#!/usr/bin/env pkgx uv run

import argparse
import csv
import sys
import warnings
from uuid import UUID

from permalint import is_canonical_url

from scripts.upgrade_canons.db import DB


def write_to_csv(filename: str, headers: list[str], data: list[tuple]):
    with open(filename, "w") as f:
        writer = csv.writer(f)
        writer.writerow(headers)
        writer.writerows(data)


def get_all_urls(db: DB) -> list[tuple[UUID, str]]:
    """
    Query all URLs from the urls table.
    Returns list of tuples (url_id, url_string).
    """
    db.cursor.execute(
        """
        SELECT id, url
        FROM urls
        ORDER BY id
        """
    )
    return db.cursor.fetchall()


def identify_non_canonical_urls(urls: list[tuple[UUID, str]]) -> list[UUID]:
    """
    Check each URL for canonicality using permalint.
    Returns list of URL IDs that are not canonical.
    """
    non_canonical_ids = []

    for url_id, url_string in urls:
        try:
            if not is_canonical_url(url_string):
                non_canonical_ids.append(url_id)
        except Exception as e:
            print(f"Warning: Error checking URL {url_string}: {e}")
            # Treat URLs that can't be checked as non-canonical
            non_canonical_ids.append(url_id)

    return non_canonical_ids


def delete_urls_from_database(db: DB, url_ids: list[UUID], dry_run: bool) -> None:
    """
    Delete URLs and their package_urls entries from the database.
    """
    if not url_ids:
        print("No URLs to delete.")
        return

    if dry_run:
        print(
            f"DRY RUN: Would delete {len(url_ids)} URLs and their package_urls entries"
        )
        return

    # Batch delete operations for efficiency
    placeholders = ",".join(["%s"] * len(url_ids))

    # Delete from canons first (if any exist)
    db.cursor.execute(f"DELETE FROM canons WHERE url_id IN ({placeholders})", url_ids)

    # Delete from package_urls (foreign key constraint)
    db.cursor.execute(
        f"DELETE FROM package_urls WHERE url_id IN ({placeholders})", url_ids
    )

    # Then delete from urls
    db.cursor.execute(f"DELETE FROM urls WHERE id IN ({placeholders})", url_ids)

    # Commit the transaction
    db.conn.commit()
    print(f"Successfully deleted {len(url_ids)} URLs and their package_urls entries")


def main(dry_run: bool = False):
    """Main function to delete non-canonical URLs."""
    print("Starting deletion of non-canonical URLs...")

    db = DB()
    try:
        # Get all URLs from database
        print("Fetching all URLs from database...")
        all_urls = get_all_urls(db)
        print(f"Found {len(all_urls)} total URLs")

        # Identify non-canonical URLs
        print("Checking URLs for canonicality...")
        non_canonical_ids = identify_non_canonical_urls(all_urls)
        canonical_count = len(all_urls) - len(non_canonical_ids)

        print(f"Found {len(non_canonical_ids)} non-canonical URLs")
        print(f"Found {canonical_count} canonical URLs")

        # Delete non-canonical URLs
        if non_canonical_ids:
            canons = db.get_canons_by_url_ids(non_canonical_ids)

            if canons:
                print(f"WARNING: Found {len(canons)} - delete them urself")
                write_to_csv(
                    "non_canonical_urls_that_have_canons.csv",
                    ["canon_id", "url_id"],
                    canons,
                )
                sys.exit(1)

            print("Deleting non-canonical URLs...")
            delete_urls_from_database(db, non_canonical_ids, dry_run)
        else:
            print("No non-canonical URLs found to delete.")

        # Summary
        print("-" * 50)
        if dry_run:
            print("DRY RUN SUMMARY:")
            print(f"Would delete: {len(non_canonical_ids)} URLs")
            print(f"Would keep: {canonical_count} URLs")
        else:
            print("DELETION SUMMARY:")
            print(f"✅ Deleted: {len(non_canonical_ids)} URLs")
            print(f"✅ Kept: {canonical_count} URLs")
        print("-" * 50)

    finally:
        db.close()


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Delete non-canonical URLs from the database"
    )
    parser.add_argument(
        "--dry-run",
        action="store_true",
        help="Run in dry-run mode without making database changes",
    )
    args = parser.parse_args()

    with warnings.catch_warnings(action="ignore"):
        main(args.dry_run)


================================================
FILE: scripts/upgrade_canons/main.py
================================================
#!/usr/bin/env uv run --with psycopg2==2.9.10 --with permalint==0.1.14

import argparse
import warnings
from datetime import datetime
from uuid import UUID, uuid4

from permalint import is_canonical_url, normalize_url

from scripts.upgrade_canons.db import DB
from scripts.upgrade_canons.structs import URL, PackageURL

# Valid URL types that can be canonicalized
VALID_URL_TYPES = {"homepage", "repository", "source"}


def is_one_url_canonical(urls: list[str]) -> bool:
    """Returns True if at least one of the URLs is canonical"""
    return any(is_canonical_url(url) for url in urls)


def generate_canonical_url(urls: list[str]) -> str:
    """
    Returns the canonical URL for the given list of URLs

      - TODO: we should be smart about which one to pick, like most recent perhaps?
    """
    return normalize_url(urls[0])


def generate_new_url(url: str, url_type_id: UUID, now: datetime) -> URL:
    """Creates a new URL object for the given URL."""
    return URL(uuid4(), url, url_type_id, now, now)


def generate_new_package_url(
    package_id: UUID, url_id: UUID, now: datetime
) -> PackageURL:
    """Creates a new PackageURL object for the given package and URL"""
    return PackageURL(uuid4(), package_id, url_id, now, now)


# Pure functions for business logic - highly testable
def analyze_packages_needing_canonicalization(
    package_url_map: dict[UUID, list[str]],
    existing_urls: set[str],
) -> dict[UUID, str]:
    """
    Analyze which packages need canonical URLs created.
    Returns a mapping of package_id to the canonical URL that should be created.
    """
    packages_needing_canon: dict[UUID, str] = {}
    canonical_urls_to_create: set[str] = set()

    for package_id, urls in package_url_map.items():
        # Skip if package already has at least one canonical URL
        if is_one_url_canonical(urls):
            continue

        canonical_url = generate_canonical_url(urls)

        # Skip if canonical URL already exists in database
        if canonical_url in existing_urls:
            continue

        # Skip if we're already planning to create this canonical URL
        if canonical_url in canonical_urls_to_create:
            continue

        # This package needs a canonical URL created
        packages_needing_canon[package_id] = canonical_url
        canonical_urls_to_create.add(canonical_url)

    return packages_needing_canon


def create_url_and_package_url_objects(
    packages_needing_canon: dict[UUID, str],
    url_type_id: UUID,
    now: datetime,
) -> tuple[list[URL], list[PackageURL]]:
    """
    Create URL and PackageURL objects for the packages that need canonicalization.
    """
    new_urls: list[URL] = []
    new_package_urls: list[PackageURL] = []

    for package_id, canonical_url in packages_needing_canon.items():
        new_url = generate_new_url(canonical_url, url_type_id, now)
        new_package_url = generate_new_package_url(package_id, new_url.id, now)

        new_urls.append(new_url)
        new_package_urls.append(new_package_url)

    return new_urls, new_package_urls


def main(db: DB, url_type: str, url_type_id: UUID, dry_run: bool):
    now = datetime.now()
    print(f"Starting main for URL type '{url_type}': {now}")

    # Get data from database
    all_urls, package_url_map = db.get_urls_by_type(url_type)
    print(f"Found {len(all_urls)} {url_type} URLs")
    print(f"Found {len(package_url_map)} packages with {url_type} URLs")

    # Analyze which packages need canonicalization
    packages_needing_canon = analyze_packages_needing_canonicalization(
        package_url_map, all_urls
    )

    # Create objects
    new_urls, new_package_urls = create_url_and_package_url_objects(
        packages_needing_canon, url_type_id, now
    )

    print("-" * 100)
    print("Going to insert:")
    print(f"  {len(new_urls)} URLs")
    print(f"  {len(new_package_urls)} PackageURLs")
    print("-" * 100)

    # Ingest to database
    db.ingest(new_urls, new_package_urls, dry_run)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Canonicalize URLs of specified type")
    parser.add_argument(
        "--url-type",
        type=str,
        required=True,
        choices=VALID_URL_TYPES,
        help=f"Type of URLs to canonicalize. Valid options: {', '.join(VALID_URL_TYPES)}",
    )
    parser.add_argument(
        "--url-type-id",
        type=UUID,
        required=True,
        help="UUID of the URL type in the database",
    )
    parser.add_argument(
        "--dry-run",
        action="store_true",
        help="Run in dry-run mode without making database changes",
    )
    args = parser.parse_args()

    db = DB()
    try:
        with warnings.catch_warnings(action="ignore"):
            main(db, args.url_type, args.url_type_id, args.dry_run)
    finally:
        db.close()


================================================
FILE: scripts/upgrade_canons/registered_projects.py
================================================
#!/usr/bin/env pkgx uv run

import argparse
import csv
import sys
from uuid import UUID

from scripts.upgrade_canons.db import DB


def read_canon_ids_from_stdin() -> list[UUID]:
    """Read canon IDs from stdin and return as list of UUIDs."""
    canon_ids = []
    for line in sys.stdin:
        line = line.strip()
        if line:
            try:
                canon_ids.append(UUID(line))
            except ValueError as e:
                print(f"Warning: Invalid UUID format '{line}': {e}", file=sys.stderr)
    return canon_ids


def process_canon_id(db: DB, canon_id: UUID, dry_run: bool) -> tuple[bool, str]:
    """
    Process a single canon ID and perform the updates.
    Returns (success, reason) tuple.
    """
    # First, join to canon_packages_old to get package_id
    db.cursor.execute(
        """
        SELECT package_id 
        FROM canon_packages_old 
        WHERE canon_id = %s
    """,
        (canon_id,),
    )

    old_result = db.cursor.fetchone()
    if not old_result:
        return False, "could not find package_id"

    package_id = old_result[0]

    # Next, join to canon_packages to get current canon_id
    db.cursor.execute(
        """
        SELECT canon_id 
        FROM canon_packages 
        WHERE package_id = %s
    """,
        (package_id,),
    )

    current_result = db.cursor.fetchone()
    if not current_result:
        return False, "could not find new canon_id"

    new_canon_id = current_result[0]

    if dry_run:
        print(
            f"DRY RUN: Would update canon_id {new_canon_id} to {canon_id} for package {package_id}"
        )
        return True, ""

    try:
        # Run the three update statements
        # 1. Update canons table
        db.cursor.execute(
            """
            UPDATE canons
            SET id = %s
            WHERE id = %s
        """,
            (canon_id, new_canon_id),
        )

        # 2. Update canon_packages table
        db.cursor.execute(
            """
            UPDATE canon_packages
            SET canon_id = %s
            WHERE canon_id = %s
        """,
            (canon_id, new_canon_id),
        )

        # 3. Update tea_ranks table
        db.cursor.execute(
            """
            UPDATE tea_ranks
            SET canon_id = %s
            WHERE canon_id = %s
        """,
            (canon_id, new_canon_id),
        )

        return True, ""

    except Exception as e:
        print(f"Error updating canon_id {canon_id}: {e}", file=sys.stderr)
        return False, f"database error: {e!s}"


def write_failures_csv(
    failures: list[tuple[UUID, str]], filename: str = "canon_update_failures.csv"
):
    """Write failures to a CSV file."""
    with open(filename, "w", newline="") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["canon_id", "reason"])
        for canon_id, reason in failures:
            writer.writerow([str(canon_id), reason])


def main():
    parser = argparse.ArgumentParser(
        description="Update Canon IDs for registered projects"
    )
    parser.add_argument(
        "--dry-run",
        action="store_true",
        help="Show what would be done without making changes",
    )
    args = parser.parse_args()

    # Read canon IDs from stdin
    canon_ids = read_canon_ids_from_stdin()

    if not canon_ids:
        print("No canon IDs provided via stdin", file=sys.stderr)
        sys.exit(1)

    print(f"Processing {len(canon_ids)} canon IDs...")

    # Initialize database connection
    db = DB()

    success_count = 0
    failure_count = 0
    failures = []

    try:
        for canon_id in canon_ids:
            success, reason = process_canon_id(db, canon_id, args.dry_run)

            if success:
                success_count += 1
            else:
                failure_count += 1
                failures.append((canon_id, reason))
                print(f"Warning: Failed to process canon_id {canon_id}: {reason}")

        # Commit changes if not dry run
        if not args.dry_run and success_count > 0:
            db.conn.commit()
            print("Database changes committed.")

        # Write failures to CSV if any
        if failures:
            write_failures_csv(failures)
            print("Failures written to canon_update_failures.csv")

    finally:
        db.close()

    # Print final summary
    print("--------------------------------------------------")
    print(f"✅ Success: {success_count}")
    print(f"❌ Failure: {failure_count}")
    print("--------------------------------------------------")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/upgrade_canons/structs.py
================================================
from dataclasses import dataclass
from datetime import datetime
from uuid import UUID


# let's make classes defining the data models, since scripts can't really access ./core
@dataclass
class URL:
    id: UUID
    url: str
    url_type_id: UUID
    created_at: datetime
    updated_at: datetime


@dataclass
class PackageURL:
    id: UUID
    package_id: UUID
    url_id: UUID
    created_at: datetime
    updated_at: datetime


================================================
FILE: tests/README.md
================================================
# CHAI Test Suite

This directory contains the test suite for the CHAI package indexer. All tests are written using [pytest](https://docs.pytest.org/) and follow a consistent structure to ensure maintainability and ease of use.

## Table of Contents

- [Directory Structure](#directory-structure)
- [Running Tests](#running-tests)
- [Adding New Tests](#adding-new-tests)
- [Test Coverage](#test-coverage)
- [Fixtures and Mocking](#fixtures-and-mocking)
- [Test Markers](#test-markers)

## Directory Structure

The test suite is organized to mirror the main project structure:

```
tests/
├── conftest.py              # Common fixtures and configuration
├── requirements.txt         # Test dependencies
├── unit/                    # Unit tests for core functionality
│   ├── test_debian_parser.py
│   └── test_pkgx_load_urls.py
├── package_managers/        # Tests for package manager indexers
│   ├── crates/
│   │   ├── test_diff_deps.py
│   │   └── test_special_case.py
│   └── homebrew/
│       └── test_diff_dep.py
└── ranker/                  # Tests for ranking functionality
    ├── test_dedupe.py
    └── test_rx_graph.py
```

## Running Tests

### Prerequisites

Install test dependencies:

```bash
uv pip install -r tests/requirements.txt
```

### Running All Tests

To run all tests:

```bash
pytest tests/
```

### Running Specific Tests

Run tests for a specific module:

```bash
# Run all tests in a directory
pytest tests/package_managers/crates/

# Run a specific test file
pytest tests/unit/test_debian_parser.py

# Run a specific test class
pytest tests/unit/test_debian_parser.py::TestDebianParser

# Run a specific test method
pytest tests/unit/test_debian_parser.py::TestDebianParser::test_parse_package_data
```

### Running Tests by Marker

Tests are categorized with markers. To run tests for a specific category:

```bash
# Run only unit tests
pytest -m unit

# Run only parser tests
pytest -m parser

# Run only transformer tests
pytest -m transformer

# Run only ranker tests
pytest -m ranker

# Run all tests except slow ones
pytest -m "not slow"
```

### Verbose Output

For more detailed output:

```bash
pytest -v tests/

# Show captured print statements
pytest -s tests/

# Show local variables in tracebacks
pytest -l tests/
```

## Adding New Tests

### 1. Create a Test File

Test files should:

- Be placed in the appropriate directory based on what's being tested
- Follow the naming convention `test_*.py`
- Include a module docstring explaining what's being tested

Example:

```python
"""
Test the package parsing functionality for NewPackageManager.

This module tests the Parser class which extracts package information
from the package manager's data format.
"""

import pytest

from package_managers.newpm.parser import Parser
```

### 2. Use Fixtures for Common Setup

Instead of setUp/tearDown methods, use pytest fixtures:

```python
@pytest.fixture
def sample_package_data():
    """Provides sample package data for testing."""
    return {
        "name": "example-package",
        "version": "1.0.0",
        "dependencies": ["dep1", "dep2"],
    }

def test_parse_package(sample_package_data):
    """Test parsing a package with valid data."""
    parser = Parser()
    result = parser.parse(sample_package_data)
    assert result.name == "example-package"
```

### 3. Use Markers for Test Categories

Apply appropriate markers to your tests:

```python
@pytest.mark.parser
@pytest.mark.unit
class TestNewParser:
    """Test the new package manager parser."""

    def test_parse_valid_data(self):
        """Test parsing valid package data."""
        # test implementation
```

### 4. Mock External Dependencies

Use the fixtures from `conftest.py` or create specific mocks:

```python
def test_with_mocked_config(mock_config):
    """Test using the common mock_config fixture."""
    # mock_config is automatically injected from conftest.py
    transformer = Transformer(mock_config)
    # test implementation
```

### 5. Write Clear Assertions

Use clear, descriptive assertions:

```python
# Good
assert len(packages) == 3, "Should parse exactly 3 packages from the data"

# Less clear
assert len(packages) == 3
```

## Test Coverage

### Running Tests with Coverage

To generate a coverage report:

```bash
# Run with coverage and generate terminal report
pytest --cov=. --cov-report=term tests/

# Generate HTML coverage report
pytest --cov=. --cov-report=html tests/
# Open htmlcov/index.html in a browser

# Generate coverage for specific modules
pytest --cov=package_managers.crates --cov=ranker tests/

# Show missing lines in terminal
pytest --cov=. --cov-report=term-missing tests/
```

### Coverage by Docker Service

To check coverage for specific Docker services defined in `docker-compose.yml`:

```bash
# Coverage for crates indexer
pytest --cov=package_managers.crates --cov-report=term-missing tests/package_managers/crates/

# Coverage for homebrew indexer
pytest --cov=package_managers.homebrew --cov-report=term-missing tests/package_managers/homebrew/

# Coverage for debian indexer
pytest --cov=package_managers.debian --cov-report=term-missing tests/unit/test_debian_parser.py

# Coverage for pkgx indexer
pytest --cov=package_managers.pkgx --cov-report=term-missing tests/unit/test_pkgx_load_urls.py

# Coverage for ranker
pytest --cov=ranker --cov-report=term-missing tests/ranker/
```

### Setting Coverage Thresholds

To fail tests if coverage drops below a threshold:

```bash
pytest --cov=. --cov-fail-under=80 tests/
```

## Fixtures and Mocking

### Common Fixtures

The `conftest.py` file provides several reusable fixtures:

- **`mock_config`**: A mocked Config object with all sub-configurations
- **`mock_url_types`**: Mocked URL types (homepage, repository, etc.)
- **`mock_dependency_types`**: Mocked dependency types (runtime, build, dev, test)
- **`mock_package_managers`**: Mocked package manager configurations
- **`sample_package_data`**: Sample data for different package managers

### Using Fixtures

Fixtures are automatically injected into test functions:

```python
def test_example(mock_config, sample_package_data):
    """Example test using multiple fixtures."""
    # mock_config and sample_package_data are automatically available
    crates_data = sample_package_data["crates"]
    # test implementation
```

### Creating Test-Specific Fixtures

For test-specific setup, create local fixtures:

```python
@pytest.fixture
def special_cache():
    """Create a cache with specific test data."""
    return Cache(
        package_map={"test": Package(id=uuid4(), name="test")},
        url_map={},
        dependencies={},
    )

def test_with_special_cache(special_cache):
    """Test using the special cache."""
    # test implementation
```

## Test Markers

Available markers (defined in `conftest.py`):

- **`@pytest.mark.unit`**: Unit tests
- **`@pytest.mark.integration`**: Integration tests
- **`@pytest.mark.slow`**: Slow-running tests
- **`@pytest.mark.parser`**: Parser tests
- **`@pytest.mark.transformer`**: Transformer tests
- **`@pytest.mark.loader`**: Loader tests
- **`@pytest.mark.ranker`**: Ranker tests

To list all available markers:

```bash
pytest --markers
```

## Best Practices

1. **Test One Thing**: Each test should verify a single behavior
2. **Use Descriptive Names**: Test names should clearly indicate what they test
3. **Keep Tests Independent**: Tests should not depend on each other
4. **Use Fixtures**: Leverage fixtures for common setup instead of duplicating code
5. **Mock External Dependencies**: Don't make actual database or network calls
6. **Test Edge Cases**: Include tests for error conditions and edge cases
7. **Document Complex Tests**: Add docstrings explaining complex test scenarios

## Troubleshooting

### Common Issues

1. **Import Errors**: Ensure `PYTHONPATH` includes the project root:

   ```bash
   export PYTHONPATH=/workspace:$PYTHONPATH
   ```

2. **Missing Dependencies**: Install test requirements:

   ```bash
   uv sync --all-groups
   source .venv/bin/activate
   ```

3. **Database Connection Errors**: Tests should not require `CHAI_DATABASE_URL`. If a
   test fails due to database issues, it likely needs better mocking.

### Debugging Tests

To debug a failing test:

```bash
# Drop into debugger on failure
pytest --pdb tests/failing_test.py

# Show local variables in traceback
pytest -l tests/failing_test.py

# Increase verbosity
pytest -vv tests/failing_test.py
```


================================================
FILE: tests/conftest.py
================================================
"""
Common test fixtures and configurations for pytest.

This module provides reusable fixtures for testing the CHAI package indexer.
Instead of mocking database operations, these fixtures focus on providing
test data and mock objects for testing the core logic of transformers,
parsers, and other components.
"""

import uuid
from unittest.mock import MagicMock, Mock

import pytest

from core.config import (
    Config,
    DependencyTypes,
    PackageManagers,
    PMConf,
    URLTypes,
    UserTypes,
)
from core.db import DB
from core.logger import Logger


@pytest.fixture
def mock_logger():
    """Mock logger for testing."""
    logger = MagicMock(spec=Logger)
    logger.debug.side_effect = lambda x: print(f"DEBUG: {x}")
    logger.warn.side_effect = lambda x: print(f"WARN: {x}")
    logger.log.side_effect = lambda x: print(x)

    return logger


@pytest.fixture
def mock_url_types():
    """
    Mock URL types with consistent UUIDs for testing.

    Returns a mock URLTypes object that returns consistent URL type objects
    for common URL type names.
    """
    url_types = MagicMock(spec=URLTypes)

    # Set up URL type attributes directly
    url_types.homepage = uuid.UUID("00000000-0000-0000-0000-000000000001")
    url_types.repository = uuid.UUID("00000000-0000-0000-0000-000000000002")
    url_types.documentation = uuid.UUID("00000000-0000-0000-0000-000000000003")
    url_types.source = uuid.UUID("00000000-0000-0000-0000-000000000004")

    return url_types


@pytest.fixture
def mock_dependency_types():
    """
    Mock dependency types for testing.

    Returns a mock DependencyTypes object with common dependency types.
    """
    dep_types = MagicMock(spec=DependencyTypes)

    # Set up dependency type attributes directly
    dep_types.runtime = uuid.UUID("00000000-0000-0000-0000-000000000010")
    dep_types.build = uuid.UUID("00000000-0000-0000-0000-000000000011")
    dep_types.dev = uuid.UUID("00000000-0000-0000-0000-000000000012")
    dep_types.test = uuid.UUID("00000000-0000-0000-0000-000000000013")
    dep_types.development = dep_types.dev  # Alias for development
    dep_types.recommended = uuid.UUID("00000000-0000-0000-0000-000000000014")
    dep_types.optional = uuid.UUID("00000000-0000-0000-0000-000000000015")

    return dep_types


@pytest.fixture
def mock_sources():
    """
    Mock sources with consistent UUIDs for testing.

    Returns a dict mapping source names to mock Source objects.
    """
    return {
        "github": uuid.UUID("00000000-0000-0000-0000-000000000020"),
        "crates": uuid.UUID("00000000-0000-0000-0000-000000000021"),
        "homebrew": uuid.UUID("00000000-0000-0000-0000-000000000022"),
        "debian": uuid.UUID("00000000-0000-0000-0000-000000000023"),
        "pkgx": uuid.UUID("00000000-0000-0000-0000-000000000024"),
    }


@pytest.fixture
def mock_package_managers():
    """
    Mock package managers for testing.

    Returns a mock PackageManagers object.
    """
    package_managers = MagicMock(spec=PackageManagers)

    # Set up package manager attributes directly
    package_managers.crates = uuid.UUID("00000000-0000-0000-0000-000000000030")
    package_managers.homebrew = uuid.UUID("00000000-0000-0000-0000-000000000031")
    package_managers.debian = uuid.UUID("00000000-0000-0000-0000-000000000032")
    package_managers.pkgx = uuid.UUID("00000000-0000-0000-0000-000000000033")

    return package_managers


@pytest.fixture
def mock_pm_config(mock_package_managers):
    """
    Mock PMConf (Package Manager Configuration) for testing.

    Returns a mock PMConf object with a default package manager ID.
    """
    pm_config = MagicMock(spec=PMConf)
    pm_config.pm_id = mock_package_managers.crates
    return pm_config


@pytest.fixture
def mock_config(
    mock_url_types,
    mock_dependency_types,
    mock_package_managers,
    mock_pm_config,
    mock_sources,
):
    """
    Mock Config object with all necessary sub-configurations.

    This is the main configuration fixture that most tests will use.
    """
    config = MagicMock(spec=Config)

    # Set up execution configuration
    config.exec_config = MagicMock()
    config.exec_config.test = True
    config.exec_config.no_cache = True
    config.exec_config.debug = False

    # Set up sub-configurations
    config.url_types = mock_url_types
    config.dependency_types = mock_dependency_types
    config.package_managers = mock_package_managers
    config.pm_config = mock_pm_config

    # Mock DB that returns consistent source objects
    mock_db = MagicMock()
    mock_db.select_source_by_name.side_effect = lambda name: mock_sources.get(name)
    mock_db.select_url_types_by_name.side_effect = lambda name: mock_url_types.get(name)

    # Set the db for config to be a minimal mock db created
    config.db = mock_db

    return config


@pytest.fixture
def mock_user_types():
    """
    Mock user types for testing.

    Returns a mock UserTypes object.
    """
    user_types = MagicMock(spec=UserTypes)

    # Set up user type attributes directly
    user_types.admin = Mock(id=uuid.UUID("00000000-0000-0000-0000-000000000040"))
    user_types.maintainer = Mock(id=uuid.UUID("00000000-0000-0000-0000-000000000041"))
    user_types.contributor = Mock(id=uuid.UUID("00000000-0000-0000-0000-000000000042"))

    return user_types


@pytest.fixture
def sample_package_data():
    """
    Provides sample package data for testing transformers and parsers.

    Returns a dict with sample data for different package managers.
    """
    return {
        "crates": {
            "name": "serde",
            "version": "1.0.130",
            "description": "A generic serialization/deserialization framework",
            "homepage": "https://serde.rs",
            "repository": "https://github.com/serde-rs/serde",
            "dependencies": {"serde_derive": "1.0.130"},
        },
        "homebrew": {
            "name": "wget",
            "version": "1.21.2",
            "description": "Internet file retriever",
            "homepage": "https://www.gnu.org/software/wget/",
            "dependencies": ["gettext", "libidn2", "openssl@1.1"],
        },
        "debian": {
            "package": "curl",
            "version": "7.74.0-1.3+deb11u1",
            "maintainer": "Alessandro Ghedini <ghedo@debian.org>",
            "depends": ["libc6", "libcurl4", "zlib1g"],
        },
        "pkgx": {
            "full_name": "gnu.org/wget",
            "version": "1.21.2",
            "homepage": "https://www.gnu.org/software/wget/",
            "dependencies": {"gnu.org/gettext": "^0.21", "openssl.org": "^1.1"},
        },
    }


@pytest.fixture
def mock_csv_reader():
    """
    Creates a mock CSV reader for testing transformers that read CSV files.

    Returns a function that creates mock readers with specific data.
    """

    def create_mock_reader(data):
        """
        Create a mock reader that returns the specified data.

        Args:
            data: List of rows to return from the CSV reader

        Returns:
            A mock function that returns an iterator over the data
        """

        def mock_reader(file_key):
            return iter([data])

        return mock_reader

    return create_mock_reader


# Markers for categorizing tests
def pytest_configure(config):
    """Register custom markers for test categorization."""
    config.addinivalue_line("markers", "unit: Unit tests")
    config.addinivalue_line("markers", "integration: Integration tests")
    config.addinivalue_line("markers", "slow: Slow running tests")
    config.addinivalue_line("markers", "parser: Parser tests")
    config.addinivalue_line("markers", "transformer: Transformer tests")
    config.addinivalue_line("markers", "loader: Loader tests")
    config.addinivalue_line("markers", "ranker: Ranker tests")


@pytest.fixture
def mock_db():
    return MagicMock(spec=DB)


================================================
FILE: tests/package_managers/crates/conftest.py
================================================
from datetime import datetime
from uuid import uuid4

import pytest

from core.models import Package
from core.structs import Cache
from package_managers.crates.main import Diff
from package_managers.crates.structs import (
    Crate,
    CrateLatestVersion,
)


@pytest.fixture
def package_ids():
    """Fixture providing consistent package IDs for testing."""
    return {"main": uuid4(), "dep": uuid4()}


@pytest.fixture
def packages(package_ids):
    """Fixture providing test packages."""
    return {
        "main": Package(
            id=package_ids["main"],
            name="main_pkg",
            package_manager_id=1,
            import_id="1048221",
            created_at=datetime.now(),
            updated_at=datetime.now(),
        ),
        "dep": Package(
            id=package_ids["dep"],
            name="dep_pkg",
            package_manager_id=1,
            import_id="271975",
            created_at=datetime.now(),
            updated_at=datetime.now(),
        ),
    }


@pytest.fixture
def diff_instance(mock_config):
    """
    Factory fixture to create Diff instances with specific cache configurations.

    Returns a function that creates Diff instances.
    """

    def create_diff(package_map, dependencies=None, url_map=None, package_urls=None):
        cache = Cache(
            package_map=package_map,
            url_map=url_map or {},
            package_urls=package_urls or {},
            dependencies=dependencies or {},
        )
        return Diff(mock_config, cache)

    return create_diff


@pytest.fixture
def crate_with_dependencies():
    """
    Factory fixture to create Crate objects with specified dependencies.

    Returns a function that creates Crate objects.
    """

    def create_crate(crate_id="1048221", dependencies=None):
        latest_version = CrateLatestVersion(
            id=9337571,
            checksum="some-checksum",
            downloads=1000,
            license="MIT",
            num="1.0.0",
            published_by=None,
            published_at="2023-01-01",
        )

        if dependencies:
            latest_version.dependencies = dependencies
        else:
            latest_version.dependencies = []

        crate = Crate(
            id=int(crate_id),
            name="main_pkg",
            readme="Test readme",
            homepage="",
            repository="",
            documentation="",
            source=None,
        )
        crate.latest_version = latest_version

        return crate

    return create_crate


================================================
FILE: tests/package_managers/crates/test_crates_diff_deps.py
================================================
"""
Test the diff_deps functionality for the crates package manager.

This module tests the Diff.diff_deps method which determines what dependencies
need to be added or removed when processing crate updates.
"""

from datetime import datetime

import pytest

from core.models import LegacyDependency
from package_managers.crates.structs import CrateDependency, DependencyType


@pytest.mark.transformer
class TestDiffDeps:
    """Tests for the diff_deps method in the Diff class for crates."""

    def test_existing_dependency_no_changes(
        self, packages, package_ids, diff_instance, crate_with_dependencies, mock_config
    ):
        """
        Test that when a dependency already exists in the database and also appears in
        the crate object, it is neither added to new_deps nor removed_deps.
        """
        # Create an existing runtime dependency
        existing_dep = LegacyDependency(
            id=1,
            package_id=package_ids["main"],
            dependency_id=package_ids["dep"],
            dependency_type_id=mock_config.dependency_types.runtime,
            created_at=datetime.now(),
            updated_at=datetime.now(),
        )

        # Create diff with existing dependency
        diff = diff_instance(
            package_map={"1048221": packages["main"], "271975": packages["dep"]},
            dependencies={package_ids["main"]: {existing_dep}},
        )

        # Create crate with the same dependency
        dependency = CrateDependency(
            crate_id=1048221,
            dependency_id=271975,
            dependency_type=DependencyType.NORMAL,  # Runtime dependency
            semver_range="^0.26.1",
        )
        crate = crate_with_dependencies(dependencies=[dependency])

        # Execute
        new_deps, removed_deps = diff.diff_deps(crate)

        # Assert
        assert len(new_deps) == 0, "No new deps should be added"
        assert len(removed_deps) == 0, "No deps should be removed"

    def test_dependency_changed_type(
        self, packages, package_ids, diff_instance, crate_with_dependencies, mock_config
    ):
        """
        Test that when a dependency exists but its type changes, it is both
        added to new_deps and removed_deps.
        """
        # Create an existing build dependency
        existing_dep = LegacyDependency(
            id=1,
            package_id=package_ids["main"],
            dependency_id=package_ids["dep"],
            dependency_type_id=mock_config.dependency_types.build,  # BUILD type
            created_at=datetime.now(),
            updated_at=datetime.now(),
        )

        # Create diff with existing dependency
        diff = diff_instance(
            package_map={"1048221": packages["main"], "271975": packages["dep"]},
            dependencies={package_ids["main"]: {existing_dep}},
        )

        # Create crate with dependency changed to runtime
        dependency = CrateDependency(
            crate_id=1048221,
            dependency_id=271975,
            dependency_type=DependencyType.NORMAL,  # Changed to runtime
            semver_range="^0.26.1",
        )
        crate = crate_with_dependencies(dependencies=[dependency])

        # Execute
        new_deps, removed_deps = diff.diff_deps(crate)

        # Assert
        assert len(new_deps) == 1, "One new dep should be added (new type)"
        assert len(removed_deps) == 1, "One dep should be removed (old type)"

        # Verify new dep is runtime
        new_dep = new_deps[0]
        assert new_dep.package_id == package_ids["main"]
        assert new_dep.dependency_id == package_ids["dep"]
        assert new_dep.dependency_type_id == mock_config.dependency_types.runtime

        # Verify removed dep is build
        removed_dep = removed_deps[0]
        assert removed_dep.package_id == package_ids["main"]
        assert removed_dep.dependency_id == package_ids["dep"]
        assert removed_dep.dependency_type_id == mock_config.dependency_types.build

    def test_new_dependency(
        self, packages, package_ids, diff_instance, crate_with_dependencies, mock_config
    ):
        """
        Test that when a dependency doesn't exist in the cache but appears in the
        crate object, it is added to new_deps.
        """
        # Create diff with no existing dependencies
        diff = diff_instance(
            package_map={"1048221": packages["main"], "271975": packages["dep"]}
        )

        # Create crate with a new dependency
        dependency = CrateDependency(
            crate_id=1048221,
            dependency_id=271975,
            dependency_type=DependencyType.NORMAL,
            semver_range="^0.26.1",
        )
        crate = crate_with_dependencies(dependencies=[dependency])

        # Execute
        new_deps, removed_deps = diff.diff_deps(crate)

        # Assert
        assert len(new_deps) == 1, "One new dep should be added"
        assert len(removed_deps) == 0, "No deps should be removed"

        # Verify new dep
        new_dep = new_deps[0]
        assert new_dep.package_id == package_ids["main"]
        assert new_dep.dependency_id == package_ids["dep"]
        assert new_dep.dependency_type_id == mock_config.dependency_types.runtime

    def test_removed_dependency(
        self, packages, package_ids, diff_instance, crate_with_dependencies, mock_config
    ):
        """
        Test that when a dependency exists in the cache but doesn't appear in the
        crate object, it is added to removed_deps.
        """
        # Create an existing dependency
        existing_dep = LegacyDependency(
            id=1,
            package_id=package_ids["main"],
            dependency_id=package_ids["dep"],
            dependency_type_id=mock_config.dependency_types.runtime,
            created_at=datetime.now(),
            updated_at=datetime.now(),
        )

        # Create diff with existing dependency
        diff = diff_instance(
            package_map={"1048221": packages["main"], "271975": packages["dep"]},
            dependencies={package_ids["main"]: {existing_dep}},
        )

        # Create crate with no dependencies
        crate = crate_with_dependencies(dependencies=[])

        # Execute
        new_deps, removed_deps = diff.diff_deps(crate)

        # Assert
        assert len(new_deps) == 0, "No new deps should be added"
        assert len(removed_deps) == 1, "One dep should be removed"

        # Verify removed dep
        removed_dep = removed_deps[0]
        assert removed_dep.package_id == package_ids["main"]
        assert removed_dep.dependency_id == package_ids["dep"]
        assert removed_dep.dependency_type_id == mock_config.dependency_types.runtime

    def test_multiple_dependency_types_same_package(
        self, packages, package_ids, diff_instance, crate_with_dependencies, mock_config
    ):
        """
        Test that when a package depends on the same dependency package with
        multiple dependency types (e.g., both runtime and build), we handle
        the unique constraint on (package_id, dependency_id) properly.

        This test exposes the bug where multiple LegacyDependency records with
        the same package_id and dependency_id but different dependency_type_id
        would violate the DB constraint.
        """
        # Create diff with no existing dependencies
        diff = diff_instance(
            package_map={"1048221": packages["main"], "271975": packages["dep"]}
        )

        # Create crate with multiple dependency types to the same package
        runtime_dep = CrateDependency(
            crate_id=1048221,
            dependency_id=271975,
            dependency_type=DependencyType.NORMAL,  # Runtime
            semver_range="^0.26.1",
        )
        build_dep = CrateDependency(
            crate_id=1048221,
            dependency_id=271975,
            dependency_type=DependencyType.BUILD,  # Build
            semver_range="^0.26.1",
        )

        crate = crate_with_dependencies(dependencies=[runtime_dep, build_dep])

        # Execute
        new_deps, removed_deps = diff.diff_deps(crate)

        # Assert
        assert len(removed_deps) == 0, "No deps should be removed"

        # With the fix, only create 1 dependency with highest priority type
        # Priority: NORMAL > BUILD > DEV
        assert len(new_deps) == 1, "Should create only 1 dep with highest priority type"

        # Should have runtime type (NORMAL has highest priority)
        new_dep = new_deps[0]
        assert new_dep.package_id == package_ids["main"]
        assert new_dep.dependency_id == package_ids["dep"]
        assert new_dep.dependency_type_id == mock_config.dependency_types.runtime, (
            "Should choose NORMAL (runtime) over BUILD as it has higher priority"
        )

    def test_multiple_dependency_types_build_vs_dev(
        self, packages, package_ids, diff_instance, crate_with_dependencies, mock_config
    ):
        """
        Test that when a package depends on the same dependency package with
        BUILD and DEV types (no NORMAL), BUILD type takes precedence.

        Priority order: NORMAL > BUILD > DEV
        """
        # Create diff with no existing dependencies
        diff = diff_instance(
            package_map={"1048221": packages["main"], "271975": packages["dep"]}
        )

        # Create crate with BUILD and DEV dependencies to the same package
        build_dep = CrateDependency(
            crate_id=1048221,
            dependency_id=271975,
            dependency_type=DependencyType.BUILD,
            semver_range="^0.26.1",
        )
        dev_dep = CrateDependency(
            crate_id=1048221,
            dependency_id=271975,
            dependency_type=DependencyType.DEV,
            semver_range="^0.26.1",
        )

        # Add DEV first to test ordering doesn't matter
        crate = crate_with_dependencies(dependencies=[dev_dep, build_dep])

        # Execute
        new_deps, removed_deps = diff.diff_deps(crate)

        # Assert
        assert len(removed_deps) == 0, "No deps should be removed"
        assert len(new_deps) == 1, "Should create only 1 dep with highest priority type"

        # Should have build type (BUILD > DEV)
        new_dep = new_deps[0]
        assert new_dep.package_id == package_ids["main"]
        assert new_dep.dependency_id == package_ids["dep"]
        assert new_dep.dependency_type_id == mock_config.dependency_types.build, (
            "Should choose BUILD over DEV as it has higher priority"
        )


================================================
FILE: tests/package_managers/debian/conftest.py
================================================
from package_managers.debian.parser import DebianData, Depends


def create_debian_package(
    package: str = "test-package",
    description: str = "Test package",
    homepage: str = "",
    vcs_git: str = "",
    vcs_browser: str = "",
    directory: str = "",
    filename: str = "",
    depends: list[str] | None = None,
    build_depends: list[str] | None = None,
    recommends: list[str] | None = None,
    suggests: list[str] | None = None,
) -> DebianData:
    """Helper to create DebianData instances for testing"""

    debian_data = DebianData()
    debian_data.package = package
    debian_data.description = description
    debian_data.homepage = homepage
    debian_data.vcs_git = vcs_git
    debian_data.vcs_browser = vcs_browser
    debian_data.directory = directory
    debian_data.filename = filename

    # Convert string dependencies to Depends objects
    if depends:
        debian_data.depends = [Depends(package=dep, semver="*") for dep in depends]
    if build_depends:
        # build_depends is now list[Depends] like other dependency fields
        debian_data.build_depends = [
            Depends(package=dep, semver="*") for dep in build_depends
        ]
    if recommends:
        debian_data.recommends = [
            Depends(package=dep, semver="*") for dep in recommends
        ]
    if suggests:
        debian_data.suggests = [Depends(package=dep, semver="*") for dep in suggests]

    return debian_data


================================================
FILE: tests/package_managers/debian/test_debian_diff.py
================================================
from uuid import uuid4

from core.models import URL, LegacyDependency, Package, PackageURL
from core.structs import Cache, URLKey
from package_managers.debian.diff import DebianDiff
from package_managers.debian.main import diff as main_diff
from tests.package_managers.debian.conftest import create_debian_package


class TestDebianDifferentialLoading:
    """Test cases for debian differential loading scenarios"""

    def test_package_exists_url_update(self, mock_config, mock_logger, mock_db):
        """Tests that Diff updates URLs when the package exists and the URL changes"""

        # Setup existing package and URL
        existing_pkg_id = uuid4()
        existing_url_id = uuid4()
        existing_package_url_id = uuid4()

        existing_package = Package(
            id=existing_pkg_id,
            derived_id="debian/url-pkg",
            name="url-pkg",
            package_manager_id=mock_config.pm_config.pm_id,
            import_id="url-pkg",
            readme="Test package",
        )

        existing_url = URL(
            id=existing_url_id,
            url="https://old-homepage.com",
            url_type_id=mock_config.url_types.homepage,
        )

        existing_package_url = PackageURL(
            id=existing_package_url_id,
            package_id=existing_pkg_id,
            url_id=existing_url_id,
        )

        # Create cache
        cache = Cache(
            package_map={"url-pkg": existing_package},
            url_map={
                URLKey(
                    "https://old-homepage.com", mock_config.url_types.homepage
                ): existing_url
            },
            package_urls={existing_pkg_id: {existing_package_url}},
            dependencies={},
        )

        # Create package data with new URL
        new_pkg_data = create_debian_package(
            package="url-pkg",
            homepage="https://new-homepage.com",
        )
        new_urls = {}  # this tracks all the new URLs we've created so far

        # Test the diff
        diff = DebianDiff(mock_config, cache, mock_db, mock_logger)
        resolved_urls = diff.diff_url("url-pkg", new_pkg_data, new_urls)
        new_links, _ = diff.diff_pkg_url(existing_pkg_id, resolved_urls)

        # Assertions
        assert len(new_links) == 1  # New URL should be created
        new_link = new_links[0]
        assert new_link.package_id == existing_pkg_id

        # The URL should be created in new_urls dict and the link should reference it
        assert len(new_urls) == 1  # One new URL should be created
        new_url_key = next(iter(new_urls.keys()))
        new_url = new_urls[new_url_key]
        assert new_link.url_id == new_url.id  # Link should reference the new URL
        assert new_url_key.url == "https://new-homepage.com"
        assert new_url_key.url_type_id == mock_config.url_types.homepage

    def test_package_exists_dependency_change(self, mock_config, mock_logger, mock_db):
        """
        Tests that diff correctly records:

          - New dependency
          - Changes to existing dependencies
          - Removed dependencies
        """

        # Setup existing package and dependencies
        existing_pkg_id = uuid4()
        dep1_id = uuid4()
        dep2_id = uuid4()
        dep3_id = uuid4()

        existing_import_id = "debian/dep-pkg"
        existing_package = Package(
            id=existing_pkg_id,
            derived_id=existing_import_id,
            name="dep-pkg",
            package_manager_id=mock_config.pm_config.pm_id,
            import_id=existing_import_id,
            readme="",
        )

        # Create dependency packages
        dep1_pkg = Package(
            id=dep1_id, derived_id="debian/dep1", name="dep1", import_id="debian/dep1"
        )
        dep2_pkg = Package(
            id=dep2_id, derived_id="debian/dep2", name="dep2", import_id="debian/dep2"
        )
        dep3_pkg = Package(
            id=dep3_id, derived_id="debian/dep3", name="dep3", import_id="debian/dep3"
        )

        # Create existing dependencies (dep1 as runtime, dep2 as build)
        existing_dep1 = LegacyDependency(
            package_id=existing_pkg_id,
            dependency_id=dep1_id,
            dependency_type_id=mock_config.dependency_types.runtime,
        )
        existing_dep2 = LegacyDependency(
            package_id=existing_pkg_id,
            dependency_id=dep2_id,
            dependency_type_id=mock_config.dependency_types.build,
        )

        # Create cache
        cache = Cache(
            package_map={
                existing_import_id: existing_package,
                "debian/dep1": dep1_pkg,
                "debian/dep2": dep2_pkg,
                "debian/dep3": dep3_pkg,
            },
            url_map={},
            package_urls={},
            dependencies={existing_pkg_id: {existing_dep1, existing_dep2}},
        )

        # Create new package data with changed dependencies
        # Remove dep2, keep dep1, add dep3 as runtime
        new_pkg_data = create_debian_package(
            package="dep-pkg",
            depends=["dep1", "dep3"],  # runtime deps
            build_depends=[],  # no build deps (removes dep2)
        )

        # Test the diff
        diff = DebianDiff(mock_config, cache, mock_db, mock_logger)
        new_deps, removed_deps = diff.diff_deps(existing_import_id, new_pkg_data)

        # Assertions
        assert len(new_deps) == 1  # dep3 should be added
        assert new_deps[0].dependency_id == dep3_id
        assert new_deps[0].dependency_type_id == mock_config.dependency_types.runtime

        assert len(removed_deps) == 1  # dep2 should be removed
        assert removed_deps[0].dependency_id == dep2_id
        assert removed_deps[0].dependency_type_id == mock_config.dependency_types.build

    def test_completely_new_package(self, mock_config, mock_logger, mock_db):
        """Tests the addition of completely new packages & new URLs"""

        # Create empty cache (no existing packages)
        cache = Cache(package_map={}, url_map={}, package_urls={}, dependencies={})

        # Create new package data
        new_pkg_data = create_debian_package(
            package="new-pkg",
            description="A new package",
            homepage="https://github.com/example/new-pkg",
            depends=["some-dep"],
            build_depends=["build-tool"],
        )

        # Test the diff
        diff = DebianDiff(mock_config, cache, mock_db, mock_logger)
        pkg_id, pkg_obj, update_payload = diff.diff_pkg("debian/new-pkg", new_pkg_data)

        # Assertions
        assert pkg_obj is not None  # New package should be created
        assert pkg_obj.derived_id == "debian/new-pkg"
        assert pkg_obj.name == "new-pkg"
        assert pkg_obj.import_id == "debian/new-pkg"
        assert pkg_obj.package_manager_id == mock_config.pm_config.pm_id
        assert pkg_obj.readme == "A new package"
        assert update_payload == {}  # No updates for new package

        # Test URL creation
        new_urls = {}
        resolved_urls = diff.diff_url("new-pkg", new_pkg_data, new_urls)
        new_links, updated_links = diff.diff_pkg_url(pkg_id, resolved_urls)

        # Should create URL for homepage
        assert len(new_urls) >= 1  # At least homepage
        assert len(new_links) >= 1  # At least homepage link
        assert len(updated_links) == 0  # No existing links to update

        # Check that homepage URL was created
        homepage_url_found = False
        for url_key, _url in new_urls.items():
            if url_key.url_type_id == mock_config.url_types.homepage:
                assert url_key.url == "https://github.com/example/new-pkg"
                homepage_url_found = True
                break
        assert homepage_url_found

    def test_no_changes_scenario(self, mock_config, mock_logger, mock_db):
        """Tests where package exists but has no changes"""

        # Setup existing package
        existing_pkg_id = uuid4()
        existing_package = Package(
            id=existing_pkg_id,
            derived_id="debian/unchanged-pkg",
            name="unchanged-pkg",
            package_manager_id=mock_config.pm_config.pm_id,
            import_id="unchanged-pkg",
            readme="Unchanged description",
        )

        cache = Cache(
            package_map={"unchanged-pkg": existing_package},
            url_map={},
            package_urls={},
            dependencies={},
        )

        # Create package data with same description
        pkg_data = create_debian_package(
            package="unchanged-pkg", description="Unchanged description"
        )

        # Test the diff
        diff = DebianDiff(mock_config, cache, mock_db, mock_logger)
        pkg_id, pkg_obj, update_payload = diff.diff_pkg("unchanged-pkg", pkg_data)

        # Assertions
        assert pkg_id == existing_pkg_id
        assert pkg_obj is None  # No new package
        assert update_payload is None  # No changes

    def test_package_description_update(self, mock_config, mock_logger, mock_db):
        """Test scenario where package exists but description has changed"""

        # Setup existing package
        existing_pkg_id = uuid4()
        existing_package = Package(
            id=existing_pkg_id,
            derived_id="debian/desc-pkg",
            name="desc-pkg",
            package_manager_id=mock_config.pm_config.pm_id,
            import_id="desc-pkg",
            readme="Old description",
        )

        cache = Cache(
            package_map={"desc-pkg": existing_package},
            url_map={},
            package_urls={},
            dependencies={},
        )

        # Create package data with new description
        pkg_data = create_debian_package(
            package="desc-pkg", description="New description"
        )

        # Test the diff
        diff = DebianDiff(mock_config, cache, mock_db, mock_logger)
        pkg_id, pkg_obj, update_payload = diff.diff_pkg("desc-pkg", pkg_data)

        # Assertions
        assert pkg_id == existing_pkg_id
        assert pkg_obj is None  # No new package
        assert update_payload is not None  # Should have changes
        assert update_payload["id"] == existing_pkg_id
        assert update_payload["readme"] == "New description"

    def test_missing_dependency_handling(self, mock_config, mock_logger, mock_db):
        """Tests the case that we DON'T add dependencies for new packages"""

        existing_pkg_id = uuid4()
        existing_package = Package(
            id=existing_pkg_id,
            derived_id="debian/missing-dep-pkg",
            name="missing-dep-pkg",
            import_id="missing-dep-pkg",
        )

        cache = Cache(
            package_map={"missing-dep-pkg": existing_package},
            url_map={},
            package_urls={},
            dependencies={},
        )

        # Create package with dependency that doesn't exist in cache
        pkg_data = create_debian_package(
            package="missing-dep-pkg", depends=["non-existent-dep"]
        )

        diff = DebianDiff(mock_config, cache, mock_db, mock_logger)
        new_deps, removed_deps = diff.diff_deps("missing-dep-pkg", pkg_data)

        # Should handle gracefully - no deps added for missing packages
        assert len(new_deps) == 0
        assert len(removed_deps) == 0

    def test_dependency_type_priority_no_change(
        self, mock_config, mock_logger, mock_db
    ):
        """
        Scenario:
          - p1 has runtime dependency to p2 in cache
          - p1 depends on p2 as both runtime and build in parsed data

        Expect no change (runtime has priority).
        """

        # Setup existing package and dependencies
        p1_id = uuid4()
        p2_id = uuid4()

        p1_pkg = Package(id=p1_id, derived_id="debian/p1", name="p1", import_id="p1")
        p2_pkg = Package(id=p2_id, derived_id="debian/p2", name="p2", import_id="p2")

        # Existing runtime dependency in cache
        existing_runtime_dep = LegacyDependency(
            package_id=p1_id,
            dependency_id=p2_id,
            dependency_type_id=mock_config.dependency_types.runtime,
        )

        cache = Cache(
            package_map={"debian/p1": p1_pkg, "debian/p2": p2_pkg},
            url_map={},
            package_urls={},
            dependencies={p1_id: {existing_runtime_dep}},
        )

        # Parsed data has p2 as both runtime and build dependency
        new_pkg_data = create_debian_package(
            package="p1",
            depends=["p2"],  # runtime
            build_depends=["p2"],  # build
        )

        diff = DebianDiff(mock_config, cache, mock_db, mock_logger)
        new_deps, removed_deps = diff.diff_deps("debian/p1", new_pkg_data)

        # Should have no changes - runtime priority means no change needed
        assert len(new_deps) == 0
        assert len(removed_deps) == 0

    def test_dependency_type_change_runtime_to_build(
        self, mock_config, mock_logger, mock_db
    ):
        """
        Scenario
          - p1 has runtime dependency to p2 in cache
          - p1 has build dependency to p2 in parsed data.

        Expect removed runtime dependency and new build dependency
        """

        p1_id = uuid4()
        p2_id = uuid4()

        p1_pkg = Package(id=p1_id, derived_id="debian/p1", name="p1", import_id="p1")
        p2_pkg = Package(id=p2_id, derived_id="debian/p2", name="p2", import_id="p2")

        # Existing runtime dependency
        existing_runtime_dep = LegacyDependency(
            package_id=p1_id,
            dependency_id=p2_id,
            dependency_type_id=mock_config.dependency_types.runtime,
        )

        cache = Cache(
            package_map={"debian/p1": p1_pkg, "debian/p2": p2_pkg},
            url_map={},
            package_urls={},
            dependencies={p1_id: {existing_runtime_dep}},
        )

        # Parsed data only has build dependency
        new_pkg_data = create_debian_package(
            package="p1",
            depends=[],  # no runtime deps
            build_depends=["p2"],  # only build
        )

        diff = DebianDiff(mock_config, cache, mock_db, mock_logger)
        new_deps, removed_deps = diff.diff_deps("debian/p1", new_pkg_data)

        # Should remove runtime and add build
        assert len(removed_deps) == 1
        assert removed_deps[0].dependency_id == p2_id
        assert (
            removed_deps[0].dependency_type_id == mock_config.dependency_types.runtime
        )

        assert len(new_deps) == 1
        assert new_deps[0].dependency_id == p2_id
        assert new_deps[0].dependency_type_id == mock_config.dependency_types.build

    def test_dependency_type_change_build_to_runtime(
        self, mock_config, mock_logger, mock_db
    ):
        """
        Scenario:
          - p1 has build dependency to p2 in cache
          - p1 has runtime dependency to p2 in parsed data.

        Expect removed build dependency and new runtime dependency
        """

        p1_id = uuid4()
        p2_id = uuid4()

        p1_pkg = Package(id=p1_id, derived_id="debian/p1", name="p1", import_id="p1")
        p2_pkg = Package(id=p2_id, derived_id="debian/p2", name="p2", import_id="p2")

        # Existing build dependency
        existing_build_dep = LegacyDependency(
            package_id=p1_id,
            dependency_id=p2_id,
            dependency_type_id=mock_config.dependency_types.build,
        )

        cache = Cache(
            package_map={"debian/p1": p1_pkg, "debian/p2": p2_pkg},
            url_map={},
            package_urls={},
            dependencies={p1_id: {existing_build_dep}},
        )

        # Parsed data only has runtime dependency
        new_pkg_data = create_debian_package(
            package="p1",
            depends=["p2"],  # runtime
            build_depends=[],  # no build deps
        )

        diff = DebianDiff(mock_config, cache, mock_db, mock_logger)
        new_deps, removed_deps = diff.diff_deps("debian/p1", new_pkg_data)

        # Should remove build and add runtime
        assert len(removed_deps) == 1
        assert removed_deps[0].dependency_id == p2_id
        assert removed_deps[0].dependency_type_id == mock_config.dependency_types.build

        assert len(new_deps) == 1
        assert new_deps[0].dependency_id == p2_id
        assert new_deps[0].dependency_type_id == mock_config.dependency_types.runtime

    def test_dependency_type_priority_new_package(
        self, mock_config, mock_logger, mock_db
    ):
        """
        Scenario:
          - p1 has no dependencies to p2 in cache
          - p1 has both runtime and build dependencies to p2 in parsed data

        Expect one new runtime dependency (priority over build).
        """

        p1_id = uuid4()
        p2_id = uuid4()

        p1_pkg = Package(id=p1_id, derived_id="debian/p1", name="p1", import_id="p1")
        p2_pkg = Package(id=p2_id, derived_id="debian/p2", name="p2", import_id="p2")

        cache = Cache(
            package_map={"debian/p1": p1_pkg, "debian/p2": p2_pkg},
            url_map={},
            package_urls={},
            dependencies={},  # No existing dependencies
        )

        # Parsed data has both runtime and build dependencies to p2
        new_pkg_data = create_debian_package(
            package="p1",
            depends=["p2"],  # runtime
            build_depends=["p2"],  # build
        )

        diff = DebianDiff(mock_config, cache, mock_db, mock_logger)
        new_deps, removed_deps = diff.diff_deps("debian/p1", new_pkg_data)

        # Should only create one new dependency - runtime (higher priority)
        assert len(removed_deps) == 0
        assert len(new_deps) == 1
        assert new_deps[0].dependency_id == p2_id
        assert new_deps[0].dependency_type_id == mock_config.dependency_types.runtime

    def test_debian_specific_dependencies(self, mock_config, mock_logger, mock_db):
        """Test Debian-specific dependency types: recommends, suggests"""

        p1_id = uuid4()
        p2_id = uuid4()
        p3_id = uuid4()

        p1_pkg = Package(id=p1_id, derived_id="debian/p1", name="p1")
        p2_pkg = Package(id=p2_id, derived_id="debian/p2", name="p2")
        p3_pkg = Package(id=p3_id, derived_id="debian/p3", name="p3")

        cache = Cache(
            package_map={"debian/p1": p1_pkg, "debian/p2": p2_pkg, "debian/p3": p3_pkg},
            url_map={},
            package_urls={},
            dependencies={},
        )

        # Parsed data with recommends and suggests (mapped to runtime)
        new_pkg_data = create_debian_package(
            package="p1",
            recommends=["p2"],
            suggests=["p3"],
        )

        diff = DebianDiff(mock_config, cache, mock_db, mock_logger)
        new_deps, removed_deps = diff.diff_deps("debian/p1", new_pkg_data)

        # Should create runtime dependencies for both recommends and suggests
        assert len(removed_deps) == 0
        assert len(new_deps) == 2

        # Both should be runtime dependencies
        for dep in new_deps:
            assert dep.dependency_type_id == mock_config.dependency_types.runtime
            assert dep.dependency_id in [p2_id, p3_id]


class TestDebianDiffFunction:
    """Test cases for the main.diff function"""

    def test_duplicate_package_paragraphs(self, mock_config, mock_logger, mock_db):
        """Tests the case when the Debian Packages file contains duplicate packages"""
        d1 = Package(id=uuid4(), derived_id="debian/d1", name="d1", import_id="d1")
        d2 = Package(id=uuid4(), derived_id="debian/d2", name="d2", import_id="d2")
        p1 = create_debian_package(
            package="linux-doc", homepage="homepage.org", depends=["d1"]
        )
        p2 = create_debian_package(
            package="linux-doc", homepage="homepage.org", depends=["d2"]
        )
        cache = Cache(
            package_map={"debian/d1": d1, "debian/d2": d2},
            url_map={},
            package_urls={},
            dependencies={},
        )

        data = [p1, p2]

        result = main_diff(data, mock_config, cache, mock_db, mock_logger)

        assert len(result.new_packages) == 1
        assert len(result.new_package_urls) == 1
        assert len(result.new_deps) == 0  # bc we don't load dependencies of new pkgs


================================================
FILE: tests/package_managers/debian/test_debian_parser.py
================================================
"""
Test Debian package parser functionality.

This module tests the DebianParser class which parses Debian package
and source entries from Packages and Sources files.
"""

import pytest

from package_managers.debian.parser import DebianParser


@pytest.fixture
def simple_package():
    return """Package: 0ad
Version: 0.0.26-1
Installed-Size: 19162
Maintainer: Debian Games Team <pkg-games-devel@lists.alioth.debian.org>
Architecture: amd64
Depends: 0ad-data (>= 0.0.26), 0ad-data-common (>= 0.0.26), libc6 (>= 2.29), libcurl4 (>= 7.16.2), libenet7 (>= 1.3.13), libgloox18, libjsoncpp25 (>= 1.9.5), libminiupnpc17 (>= 1.9.20140610), libnspr4 (>= 2:4.9.2), libnss3 (>= 2:3.22)
Recommends: fonts-freefont-ttf, fonts-texgyre
Suggests: 0ad-dbg
Description: Real-time strategy game of ancient warfare
Homepage: https://play0ad.com/
Section: games
Priority: optional
Filename: pool/main/0/0ad/0ad_0.0.26-1_amd64.deb
Size: 6050744
MD5sum: a777ddf01c18dbdef15c589f8325d7a3
SHA256: 9da19833c1a51e890aa8a11f82ec1e383c0e79410c3d2f6845fd2ec3e23249b8


"""


@pytest.fixture
def simple_source():
    return """Package: 0ad
Binary: 0ad, 0ad-dbg, 0ad-data, 0ad-data-common
Version: 0.0.26-1
Maintainer: Debian Games Team <pkg-games-devel@lists.alioth.debian.org>
Uploaders: Vincent Cheng <vcheng@debian.org>, Euan Kemp <euank@euank.com>
Build-Depends: debhelper-compat (= 13), cmake, dpkg-dev (>= 1.15.5), libboost-dev, libenet-dev (>= 1.3), libopenal-dev, libpng-dev, libsdl2-dev, libtiff5-dev, libvorbis-dev, libxcursor-dev, pkg-config, zlib1g-dev, libcurl4-gnutls-dev, libgloox-dev, libjsoncpp-dev, libminiupnpc-dev, libnspr4-dev, libnss3-dev, libsodium-dev, libwxgtk3.0-gtk3-dev | libwxgtk3.0-dev, python3, python3-dev, libxml2-dev, rust-gdb [amd64 i386 ppc64el]
Architecture: any all
Standards-Version: 4.5.1
Format: 3.0 (quilt)
Files:
 2fc0f38b8a4cf56fea7040fcf5f79ca3 2414 0ad_0.0.26-1.dsc
 35ca57e781448c69ba31323313e972af 31463733 0ad_0.0.26.orig.tar.xz
 f78de44c8a9c32e6be3ae99f2747c330 71948 0ad_0.0.26-1.debian.tar.xz
Vcs-Browser: https://salsa.debian.org/games-team/0ad
Vcs-Git: https://salsa.debian.org/games-team/0ad.git
Directory: pool/main/0/0ad
Priority: optional
Section: games
Testsuite: autopkgtest
Testsuite-Triggers: g++, pyrex


"""


@pytest.fixture
def multiline_binary():
    """Fixture for binary fields, specifically multi-lines ones"""
    return """
Package: binutils
Binary: binutils-for-host, binutils-for-build,
 binutils-ia64-linux-gnu-dbg, binutils-m68k-linux-gnu,
 binutils-mips64el-linux-gnuabin32-dbg, binutils-mipsisa64r6-linux-gnuabin32,
 binutils-mipsisa64r6el-linux-gnuabi64-dbg

"""


@pytest.fixture
def build_depends():
    """Fixture for all kinds of build depends."""
    return """
Package: example
Build-Depends: gcc-11-source (>= 11.3.0-11~), gawk, lib32gcc1-amd64-cross [amd64 arm64 i386 ppc64el x32], g++-11, gm2-11 [!powerpc !ppc64 !x32]
"""


@pytest.mark.parser
class TestDebianParser:
    """Test the Debian parser functionality."""

    def test_build_depends(self, build_depends):
        """Test parsing build depends."""
        parser = DebianParser(build_depends)
        sources = list(parser.parse())
        assert len(sources) == 1
        source = sources[0]
        assert len(source.build_depends) == 5
        assert any(dep.package == "gcc-11-source" for dep in source.build_depends)
        assert any(dep.package == "gawk" for dep in source.build_depends)
        assert any(
            dep.package == "lib32gcc1-amd64-cross" for dep in source.build_depends
        )
        assert any(dep.package == "g++-11" for dep in source.build_depends)
        assert any(dep.package == "gm2-11" for dep in source.build_depends)

    def test_multiline_binary(self, multiline_binary):
        """Test handling of multiline binaries."""
        parser = DebianParser(multiline_binary)
        sources = list(parser.parse())
        assert len(sources) == 1
        source = sources[0]
        assert source.package == "binutils"
        assert source.binary == [
            "binutils-for-host",
            "binutils-for-build",
            "binutils-ia64-linux-gnu-dbg",
            "binutils-m68k-linux-gnu",
            "binutils-mips64el-linux-gnuabin32-dbg",
            "binutils-mipsisa64r6-linux-gnuabin32",
            "binutils-mipsisa64r6el-linux-gnuabi64-dbg",
        ]

    def test_parse_package_data(self, simple_package):
        """Test parsing a typical package entry from Packages file."""
        # Sample package data from a Packages file
        package_data = simple_package

        # Parse the package data
        parser = DebianParser(package_data)
        packages = list(parser.parse())

        # Validate we have one package
        assert len(packages) == 1
        package = packages[0]

        # Test basic fields
        assert package.package == "0ad"
        assert package.version == "0.0.26-1"
        assert package.installed_size == 19162
        assert package.architecture == "amd64"

        # Test maintainer parsing
        assert package.maintainer.name == "Debian Games Team"
        assert package.maintainer.email == "pkg-games-devel@lists.alioth.debian.org"

        # Test dependency parsing
        assert len(package.depends) == 10
        assert package.depends[0].package == "0ad-data"
        assert package.depends[0].semver == ">= 0.0.26"

        # Test recommends parsing
        assert len(package.recommends) == 2
        assert package.recommends[0].package == "fonts-freefont-ttf"

        # Test suggests parsing
        assert len(package.suggests) == 1
        assert package.suggests[0].package == "0ad-dbg"

    def test_parse_source_data(self, simple_source):
        """Test parsing a typical source entry from Sources file."""
        source_data = simple_source
        parser = DebianParser(source_data)
        sources = list(parser.parse())

        # Validate we have one source package
        assert len(sources) == 1
        source = sources[0]

        # Test basic fields
        assert source.package == "0ad"
        assert source.version == "0.0.26-1"

        # Test binary field
        assert isinstance(source.binary, list)  # Fixed: binary should be a list
        assert "0ad" in source.binary
        assert "0ad-dbg" in source.binary
        assert "0ad-data" in source.binary
        assert "0ad-data-common" in source.binary

        # Test maintainer parsing
        assert source.maintainer.name == "Debian Games Team"
        assert source.maintainer.email == "pkg-games-devel@lists.alioth.debian.org"

        # Test uploaders parsing
        assert len(source.uploaders) == 2
        assert source.uploaders[0].name == "Vincent Cheng"
        assert source.uploaders[0].email == "vcheng@debian.org"
        assert source.uploaders[1].name == "Euan Kemp"
        assert source.uploaders[1].email == "euank@euank.com"

        # Test build depends parsing
        assert len(source.build_depends) == 25
        assert any(dep.package == "debhelper-compat" for dep in source.build_depends)

        # Test other source fields
        assert source.format == "3.0 (quilt)"
        assert source.vcs_browser == "salsa.debian.org/games-team/0ad"
        assert source.vcs_git == "salsa.debian.org/games-team/0ad"
        assert source.testsuite == "autopkgtest"
        assert source.testsuite_triggers == "g++, pyrex"


================================================
FILE: tests/package_managers/debian/test_debian_sources.py
================================================
from package_managers.debian.main import (
    build_package_to_source_mapping,
    enrich_package_with_source,
)
from tests.package_managers.debian.conftest import create_debian_package


class TestPackageSourceMapping:
    """Test cases for package to source mapping functionality"""

    def test_build_package_to_source_mapping_with_binary_list(
        self, tmp_path, mock_logger
    ):
        """Test building mapping when source has explicit binary list"""

        # Create a test sources file
        sources_content = """Package: test-source
Binary: test-pkg1, test-pkg2, test-pkg3
Vcs-Git: https://github.com/test/test-source.git
Homepage: https://example.com/test-source

Package: another-source
Binary: another-pkg
Vcs-Browser: https://github.com/test/another-source
"""

        sources_file = tmp_path / "sources"
        sources_file.write_text(sources_content)

        # Build mapping
        mapping = build_package_to_source_mapping(str(sources_file), mock_logger)

        # Verify mapping
        assert len(mapping) == 4  # 3 packages from first source + 1 from second
        assert "test-pkg1" in mapping
        assert "test-pkg2" in mapping
        assert "test-pkg3" in mapping
        assert "another-pkg" in mapping

        # Verify source data is correctly associated
        assert mapping["test-pkg1"].package == "test-source"
        # URLs are normalized by the parser - expect normalized format
        assert mapping["test-pkg1"].vcs_git == "github.com/test/test-source"
        assert mapping["test-pkg2"].package == "test-source"
        assert mapping["another-pkg"].package == "another-source"
        assert mapping["another-pkg"].vcs_browser == "github.com/test/another-source"

    def test_build_package_to_source_mapping_no_binary_list(
        self, tmp_path, mock_logger
    ):
        """Test building mapping when source has no explicit binary list"""

        # Create a test sources file with no Binary field
        sources_content = """Package: single-source
Vcs-Git: https://github.com/test/single-source.git
Homepage: https://example.com/single-source
"""

        sources_file = tmp_path / "sources"
        sources_file.write_text(sources_content)

        # Build mapping
        mapping = build_package_to_source_mapping(str(sources_file), mock_logger)

        # Verify mapping - should use source package name as binary name
        assert len(mapping) == 1
        assert "single-source" in mapping
        assert mapping["single-source"].package == "single-source"
        # URLs are normalized by the parser - expect normalized format
        assert mapping["single-source"].vcs_git == "github.com/test/single-source"

    def test_enrich_package_with_explicit_source(self, mock_logger):
        """Test enriching package that has explicit source reference"""

        # Create package data with explicit source reference
        package_data = create_debian_package(
            package="binary-pkg",
            description="A binary package",
        )
        package_data.source = "source-pkg"

        # Create source mapping
        source_data = create_debian_package(
            package="source-pkg",
            vcs_git="github.com/test/source-pkg",  # Already normalized format
            homepage="example.com/source-pkg",  # Already normalized format
            build_depends=["build-dep1", "build-dep2"],
        )
        source_mapping = {"binary-pkg": source_data}

        # Enrich package
        enriched = enrich_package_with_source(package_data, source_mapping, mock_logger)

        # Verify enrichment
        assert enriched.package == "binary-pkg"
        assert enriched.description == "A binary package"
        assert enriched.vcs_git == "github.com/test/source-pkg"
        assert enriched.homepage == "example.com/source-pkg"
        assert len(enriched.build_depends) == 2

        build_depend_names = [item.package for item in enriched.build_depends]
        assert build_depend_names == ["build-dep1", "build-dep2"]

    def test_enrich_package_no_explicit_source(self, mock_logger):
        """Test enriching package with no explicit source reference"""

        # Create package data with no explicit source
        package_data = create_debian_package(
            package="self-source-pkg",
            description="A self-sourced package",
        )

        # Create source mapping with same name as package
        source_data = create_debian_package(
            package="self-source-pkg",
            vcs_browser="github.com/test/self-source-pkg",  # Already normalized format
            directory="pool/main/s/self-source-pkg",
        )
        source_mapping = {"self-source-pkg": source_data}

        # Enrich package
        enriched = enrich_package_with_source(package_data, source_mapping, mock_logger)

        # Verify enrichment
        assert enriched.package == "self-source-pkg"
        assert enriched.vcs_browser == "github.com/test/self-source-pkg"
        assert enriched.directory == "pool/main/s/self-source-pkg"

    def test_enrich_package_missing_source_warning(self, caplog, mock_logger):
        """Test warning when package references missing source"""
        from package_managers.debian.main import enrich_package_with_source

        # Create package data with source that doesn't exist in mapping
        package_data = create_debian_package(
            package="orphan-pkg",
            description="An orphaned package",
        )
        package_data.source = "missing-source"

        # Empty source mapping
        source_mapping = {}

        # Enrich package (this should log a warning)
        enriched = enrich_package_with_source(package_data, source_mapping, mock_logger)

        # The warning should be present in the function execution output
        # Check the logged warning message directly
        # Note: The warning is logged by our function, so we check the expected behavior

        # Package should remain unchanged
        assert enriched.package == "orphan-pkg"
        assert enriched.description == "An orphaned package"
        assert not enriched.vcs_git
        assert not enriched.vcs_browser

    def test_enrich_package_preserves_existing_fields(self, mock_logger):
        """Test that existing package fields are not overwritten"""
        # Create package data with existing homepage
        package_data = create_debian_package(
            package="pkg-with-homepage",
            homepage="pkg-homepage.com",  # Normalized format
        )

        # Create source data with different homepage
        source_data = create_debian_package(
            package="pkg-with-homepage",
            homepage="source-homepage.com",  # Normalized format
            vcs_git="github.com/test/pkg",  # Normalized format
        )
        source_mapping = {"pkg-with-homepage": source_data}

        # Enrich package
        enriched = enrich_package_with_source(package_data, source_mapping, mock_logger)

        # Verify package homepage is preserved, but source info is added
        assert enriched.homepage == "pkg-homepage.com"  # Package value preserved
        assert enriched.vcs_git == "github.com/test/pkg"  # Source value added


================================================
FILE: tests/package_managers/homebrew/conftest.py
================================================
from datetime import datetime
from uuid import UUID, uuid4

import pytest

from core.models import Package
from core.structs import Cache
from package_managers.homebrew.diff import Diff
from package_managers.homebrew.structs import Actual


@pytest.fixture
def package_ids() -> dict[str, UUID]:
    """Fixture providing consistent package IDs for testing."""
    return {"foo": uuid4(), "bar": uuid4(), "baz": uuid4(), "qux": uuid4()}


@pytest.fixture
def packages(package_ids) -> dict[str, Package]:
    """Fixture providing test packages."""
    return {
        "foo": Package(
            id=package_ids["foo"],
            name="foo",
            package_manager_id=1,
            import_id="foo",
            created_at=datetime.now(),
            updated_at=datetime.now(),
        ),
        "bar": Package(
            id=package_ids["bar"],
            name="bar",
            package_manager_id=1,
            import_id="bar",
            created_at=datetime.now(),
            updated_at=datetime.now(),
        ),
        "baz": Package(
            id=package_ids["baz"],
            name="baz",
            package_manager_id=1,
            import_id="baz",
            created_at=datetime.now(),
            updated_at=datetime.now(),
        ),
        "qux": Package(
            id=package_ids["qux"],
            name="qux",
            package_manager_id=1,
            import_id="qux",
            created_at=datetime.now(),
            updated_at=datetime.now(),
        ),
    }


@pytest.fixture
def diff_instance(mock_config):
    """
    Factory fixture to create Diff instances with specific cache configurations.

    Returns a function that creates Diff instances.
    """

    def create_diff(
        package_map, dependencies=None, url_map=None, package_urls=None
    ) -> Diff:
        cache = Cache(
            package_map=package_map,
            url_map=url_map or {},
            package_urls=package_urls or {},
            dependencies=dependencies or {},
        )
        return Diff(mock_config, cache)

    return create_diff


@pytest.fixture
def homebrew_formula():
    """
    Factory fixture to create Actual homebrew formula objects.

    Returns a function that creates Actual objects.
    """

    def create_formula(
        formula_name,
        dependencies=None,
        build_dependencies=None,
        test_dependencies=None,
        recommended_dependencies=None,
        optional_dependencies=None,
    ):
        return Actual(
            formula=formula_name,
            description="Test formula",
            license="MIT",
            homepage="",
            source="",
            repository="",
            dependencies=dependencies or [],
            build_dependencies=build_dependencies or [],
            test_dependencies=test_dependencies or [],
            recommended_dependencies=recommended_dependencies or [],
            optional_dependencies=optional_dependencies or [],
        )

    return create_formula


================================================
FILE: tests/package_managers/homebrew/test_homebrew_diff_deps.py
================================================
"""
Test the diff_deps functionality for the homebrew package manager.

This module tests the Diff.diff_deps method which determines what dependencies
need to be added or removed when processing homebrew formula updates.
"""

from datetime import datetime
from uuid import UUID, uuid4

import pytest

from core.models import LegacyDependency, Package
from package_managers.homebrew.diff import Diff
from package_managers.homebrew.structs import Actual


@pytest.mark.transformer
class TestDiffDeps:
    """Tests for the diff_deps method in the Diff class."""

    def test_new_package_not_in_cache(self, packages, diff_instance, homebrew_formula):
        """
        If the package is not even in the package cache, that means it is new.
        Since we won't know the ID of the package during dependency loading,
        we're going to continue to the next package and write a warning.
        """
        # Create cache without the package we'll look for
        diff = diff_instance(
            package_map={"bar": packages["bar"], "baz": packages["baz"]}
        )

        # Create an Actual package that's not in the cache
        new_pkg = homebrew_formula(
            "new_package", dependencies=["baz"], build_dependencies=["bar"]
        )

        # Execute
        new_deps, removed_deps = diff.diff_deps(new_pkg)

        # Assert
        assert len(new_deps) == 0, "No new deps for new pkg"
        assert len(removed_deps) == 0, "No removed deps for new pkg"

    def test_existing_package_adding_dependency(
        self,
        packages: dict[str, Package],
        package_ids: dict[str, UUID],
        diff_instance: Diff,
        homebrew_formula: Actual,
        mock_config,
    ):
        """Test diff_deps when adding a new dependency to an existing package."""
        # Create existing dependency
        existing_dep = LegacyDependency(
            id=uuid4(),
            package_id=package_ids["foo"],
            dependency_id=package_ids["bar"],
            dependency_type_id=mock_config.dependency_types.runtime,
            created_at=datetime.now(),
            updated_at=datetime.now(),
        )

        # Create diff with existing dependency
        diff: Diff = diff_instance(
            package_map=packages, dependencies={package_ids["foo"]: {existing_dep}}
        )
        for k, s in diff.caches.dependencies.items():
            print(f"Package: {k}")
            for v in s:
                print(v.dependency_id)
                print(v.dependency_type_id)

        # Create formula with existing dependency plus a new one
        pkg = homebrew_formula(
            "foo",
            dependencies=["bar"],  # existing dependency
            build_dependencies=["baz"],  # new dependency
        )

        # Execute
        new_deps, removed_deps = diff.diff_deps(pkg)

        for dep in new_deps:
            print(
                f"{dep.package_id} -> {dep.dependency_id} as {dep.dependency_type_id}"
            )

        # Assert
        assert len(new_deps) == 1, "One new dependency should be added"
        assert len(removed_deps) == 0, "No dependencies should be removed"

        # Verify new dep is a build dep on baz
        new_dep = new_deps[0]
        assert new_dep.package_id == package_ids["foo"]
        assert new_dep.dependency_id == package_ids["baz"]
        assert new_dep.dependency_type_id == mock_config.dependency_types.build

    def test_existing_package_removing_dependency(
        self, packages, package_ids, diff_instance, homebrew_formula, mock_config
    ):
        """Test diff_deps when removing a dependency from an existing package."""
        # Create existing dependencies
        existing_dep = LegacyDependency(
            id=uuid4(),
            package_id=package_ids["foo"],
            dependency_id=package_ids["bar"],
            dependency_type_id=mock_config.dependency_types.runtime,
            created_at=datetime.now(),
            updated_at=datetime.now(),
        )

        to_be_removed_dep = LegacyDependency(
            id=uuid4(),
            package_id=package_ids["foo"],
            dependency_id=package_ids["baz"],
            dependency_type_id=mock_config.dependency_types.build,
            created_at=datetime.now(),
            updated_at=datetime.now(),
        )

        # Create diff with both dependencies
        diff = diff_instance(
            package_map=packages,
            dependencies={package_ids["foo"]: {existing_dep, to_be_removed_dep}},
        )

        # Create formula with only one of the previous dependencies
        pkg = homebrew_formula(
            "foo",
            dependencies=["bar"],  # only keep this dependency
        )

        # Execute
        new_deps, removed_deps = diff.diff_deps(pkg)

        # Assert
        assert len(new_deps) == 0, "No new deps should be added"
        assert len(removed_deps) == 1, "One dep should be removed"

        # Verify removed dep is a build dep on baz
        removed_dep = removed_deps[0]
        assert removed_dep.package_id == package_ids["foo"]
        assert removed_dep.dependency_id == package_ids["baz"]
        assert removed_dep.dependency_type_id == mock_config.dependency_types.build

    def test_existing_package_changing_dependency_type(
        self, packages, package_ids, diff_instance, homebrew_formula, mock_config
    ):
        """
        If the dependency types for a specific package to package relationship change,
        then Diff sees two changes: one removal and one addition.
        """
        # Create existing runtime dependency
        existing_dep = LegacyDependency(
            id=uuid4(),
            package_id=package_ids["foo"],
            dependency_id=package_ids["bar"],
            dependency_type_id=mock_config.dependency_types.runtime,
            created_at=datetime.now(),
            updated_at=datetime.now(),
        )

        # Create diff with existing dependency
        diff = diff_instance(
            package_map={"foo": packages["foo"], "bar": packages["bar"]},
            dependencies={package_ids["foo"]: {existing_dep}},
        )

        # Create formula with same dependency but changed type
        pkg = homebrew_formula(
            "foo",
            build_dependencies=["bar"],  # Changed from runtime to build
        )

        # Execute
        new_deps, removed_deps = diff.diff_deps(pkg)

        # Assert
        assert len(new_deps) == 1, "One new dep should be added (new type)"
        assert len(removed_deps) == 1, "One dep should be removed (old type)"

        # Verify removed dep is runtime
        removed_dep = removed_deps[0]
        assert removed_dep.package_id == package_ids["foo"]
        assert removed_dep.dependency_id == package_ids["bar"]
        assert removed_dep.dependency_type_id == mock_config.dependency_types.runtime

        # Verify new dep is build
        new_dep = new_deps[0]
        assert new_dep.package_id == package_ids["foo"]
        assert new_dep.dependency_id == package_ids["bar"]
        assert new_dep.dependency_type_id == mock_config.dependency_types.build

    def test_existing_package_no_dependency_changes(
        self, packages, package_ids, diff_instance, homebrew_formula, mock_config
    ):
        """
        Test a case where there's no changes to be made, because the database and
        Homebrew's JSON response indicate the same data.
        """
        # Create existing dependency
        existing_dep = LegacyDependency(
            id=uuid4(),
            package_id=package_ids["foo"],
            dependency_id=package_ids["bar"],
            dependency_type_id=mock_config.dependency_types.runtime,
            created_at=datetime.now(),
            updated_at=datetime.now(),
        )

        # Create diff with existing dependency
        diff = diff_instance(
            package_map={"foo": packages["foo"], "bar": packages["bar"]},
            dependencies={package_ids["foo"]: {existing_dep}},
        )

        # Create formula with same dependency and type
        pkg = homebrew_formula(
            "foo",
            dependencies=["bar"],  # same dependency with same type
        )

        # Execute
        new_deps, removed_deps = diff.diff_deps(pkg)

        # Assert
        assert len(new_deps) == 0, "No new deps should be added"
        assert len(removed_deps) == 0, "No deps should be removed"

    def test_existing_package_same_dependency_multiple_times_no_changes(
        self, packages, package_ids, diff_instance, homebrew_formula, mock_config
    ):
        """
        The case here is that the formula specifies a runtime and build dependency,
        and the db already captured the runtime dependency. Since the Diff class has
        a hierarchy of which dependency to choose, and runtime is on top, we should
        see no changes.
        """
        # Create existing runtime dependency
        existing_dep = LegacyDependency(
            id=uuid4(),
            package_id=package_ids["foo"],
            dependency_id=package_ids["bar"],
            dependency_type_id=mock_config.dependency_types.runtime,
            created_at=datetime.now(),
            updated_at=datetime.now(),
        )

        # Create diff with existing dependency
        diff = diff_instance(
            package_map={"foo": packages["foo"], "bar": packages["bar"]},
            dependencies={package_ids["foo"]: {existing_dep}},
        )

        # Create formula with same dependency multiple times
        pkg = homebrew_formula("foo", dependencies=["bar"], build_dependencies=["bar"])

        # Execute
        new_deps, removed_deps = diff.diff_deps(pkg)

        # Assert
        # Since runtime is encountered first and that's in the DB/cache,
        # we should see no new dependencies
        assert len(new_deps) == 0, "No new deps should be added"
        assert len(removed_deps) == 0, "No deps should be removed"

    def test_existing_package_same_dependency_multiple_times_yes_changes(
        self, packages, package_ids, diff_instance, homebrew_formula, mock_config
    ):
        """
        In this case, suppose the DB maintained a build relationship between foo and bar
        and actually there is a runtime and build dependency according to Homebrew. Here
        CHAI updates this record to a runtime dependency.
        """
        # Create existing build dependency
        existing_dep = LegacyDependency(
            id=uuid4(),
            package_id=package_ids["foo"],
            dependency_id=package_ids["bar"],
            dependency_type_id=mock_config.dependency_types.build,
            created_at=datetime.now(),
            updated_at=datetime.now(),
        )

        # Create diff with existing dependency
        diff = diff_instance(
            package_map={"foo": packages["foo"], "bar": packages["bar"]},
            dependencies={package_ids["foo"]: {existing_dep}},
        )

        # Create formula with same dependency multiple times
        pkg = homebrew_formula(
            "foo",
            dependencies=["bar"],  # runtime has higher priority
            build_dependencies=["bar"],
        )

        # Execute
        new_deps, removed_deps = diff.diff_deps(pkg)

        # Assert
        assert len(new_deps) == 1, "One new dependency should be added"
        assert new_deps[0].dependency_type_id == mock_config.dependency_types.runtime, (
            "The new dependency should be runtime"
        )

        assert len(removed_deps) == 1, "The build dependency should be removed"
        assert (
            removed_deps[0].dependency_type_id == mock_config.dependency_types.build
        ), "The removed dependency should be build"


================================================
FILE: tests/package_managers/pkgx/test_pkgx_diff.py
================================================
#!/usr/bin/env pkgx uv run

from unittest.mock import patch
from uuid import uuid4

from core.models import URL, LegacyDependency, Package, PackageURL
from core.structs import Cache, URLKey
from package_managers.pkgx.diff import PkgxDiff
from package_managers.pkgx.parser import (
    Dependency,
    DependencyBlock,
    Distributable,
    PkgxPackage,
    Version,
)


def create_pkgx_package(
    distributables: list[str] | None = None,
    dependencies: list[str] | None = None,
    build_deps: list[str] | None = None,
    test_deps: list[str] | None = None,
) -> PkgxPackage:
    """Helper to create PkgxPackage instances for testing"""

    # Create distributable blocks
    distributable_blocks = []
    if distributables:
        for url in distributables:
            distributable_blocks.append(Distributable(url=url))

    # Create dependency objects
    dep_objects = [
        DependencyBlock(
            platform="all",
            dependencies=[
                Dependency(name=dep, semver="*") for dep in (dependencies or [])
            ],
        )
    ]
    build_dep_objects = [
        DependencyBlock(
            platform="all",
            dependencies=[
                Dependency(name=dep, semver="*") for dep in (build_deps or [])
            ],
        )
    ]
    test_dep_objects = [
        DependencyBlock(
            platform="all",
            dependencies=[
                Dependency(name=dep, semver="*") for dep in (test_deps or [])
            ],
        )
    ]

    # Create version object
    version = Version()

    return PkgxPackage(
        distributable=distributable_blocks,
        versions=version,
        dependencies=dep_objects,
        build=DependencyBlock(platform="linux", dependencies=build_dep_objects),
        test=DependencyBlock(platform="linux", dependencies=test_dep_objects),
    )


class TestPkgxDifferentialLoading:
    """Test cases for pkgx differential loading scenarios"""

    def test_package_exists_url_update(self, mock_config, mock_logger, mock_db):
        """Test scenario 2: Package existed in database and needed a URL update"""

        # Setup existing package and URL
        existing_pkg_id = uuid4()
        existing_url_id = uuid4()
        existing_package_url_id = uuid4()

        existing_package = Package(
            id=existing_pkg_id,
            derived_id="pkgx/url-pkg",
            name="url-pkg",
            package_manager_id=mock_config.pm_config.pm_id,
            import_id="url-pkg",
            readme="Test package",
        )

        existing_url = URL(
            id=existing_url_id,
            url="https://old-source.com/file.tar.gz",
            url_type_id=mock_config.url_types.source,
        )

        existing_package_url = PackageURL(
            id=existing_package_url_id,
            package_id=existing_pkg_id,
            url_id=existing_url_id,
        )

        # Create cache
        cache = Cache(
            package_map={"url-pkg": existing_package},
            url_map={
                URLKey(
                    "https://old-source.com/file.tar.gz", mock_config.url_types.source
                ): existing_url
            },
            package_urls={existing_pkg_id: {existing_package_url}},
            dependencies={},
        )

        # Create package data with new URL
        new_pkg_data = create_pkgx_package(
            distributables=["https://new-source.com/file.tar.gz"],
        )
        new_generated_urls = [
            URLKey("https://new-source.com/file.tar.gz", mock_config.url_types.source)
        ]
        new_urls = {}  # this tracks all the new URLs we've created so far -
        # let it be empty for this test

        # Test the diff
        diff = PkgxDiff(mock_config, cache, mock_db, mock_logger)

        # Mock the URL retrieval step
        with (
            patch(
                "package_managers.pkgx.diff.generate_chai_urls",
                return_value=new_generated_urls,
            ),
        ):
            resolved_urls = diff.diff_url("url-pkg", new_pkg_data, new_urls)
            new_links, _ = diff.diff_pkg_url(existing_pkg_id, resolved_urls)

        # Assertions
        assert len(new_links) == 1  # New URL should be created
        new_link = new_links[0]
        assert new_link.package_id == existing_pkg_id

        # The URL should be created in new_urls dict and the link should reference it
        assert len(new_urls) == 1  # One new URL should be created
        new_url_key = next(iter(new_urls.keys()))
        new_url = new_urls[new_url_key]
        assert new_link.url_id == new_url.id  # Link should reference the new URL
        assert new_url_key.url == "https://new-source.com/file.tar.gz"
        assert new_url_key.url_type_id == mock_config.url_types.source

    def test_package_exists_dependency_change(self, mock_config, mock_logger, mock_db):
        """Test scenario 3: Package existed in database and changed its dependencies"""

        # Setup existing package and dependencies
        existing_pkg_id = uuid4()
        dep1_id = uuid4()
        dep2_id = uuid4()
        dep3_id = uuid4()

        existing_package = Package(
            id=existing_pkg_id,
            derived_id="pkgx/dep-pkg",
            name="dep-pkg",
            package_manager_id=mock_config.pm_config.pm_id,
            import_id="dep-pkg",
            readme="",
        )

        # Create dependency packages
        dep1_pkg = Package(
            id=dep1_id, derived_id="pkgx/dep1", name="dep1", import_id="dep1"
        )
        dep2_pkg = Package(
            id=dep2_id, derived_id="pkgx/dep2", name="dep2", import_id="dep2"
        )
        dep3_pkg = Package(
            id=dep3_id, derived_id="pkgx/dep3", name="dep3", import_id="dep3"
        )

        # Create existing dependencies (dep1 as runtime, dep2 as build)
        existing_dep1 = LegacyDependency(
            package_id=existing_pkg_id,
            dependency_id=dep1_id,
            dependency_type_id=mock_config.dependency_types.runtime,
        )
        existing_dep2 = LegacyDependency(
            package_id=existing_pkg_id,
            dependency_id=dep2_id,
            dependency_type_id=mock_config.dependency_types.build,
        )

        # Create cache
        cache = Cache(
            package_map={
                "dep-pkg": existing_package,
                "dep1": dep1_pkg,
                "dep2": dep2_pkg,
                "dep3": dep3_pkg,
            },
            url_map={},
            package_urls={},
            dependencies={existing_pkg_id: {existing_dep1, existing_dep2}},
        )

        # Create new package data with changed dependencies
        # Remove dep2, keep dep1, add dep3 as runtime
        new_pkg_data = create_pkgx_package(
            dependencies=["dep1", "dep3"],  # runtime deps
            build_deps=[],  # no build deps (removes dep2)
        )

        # Test the diff
        diff = PkgxDiff(mock_config, cache, mock_db, mock_logger)
        new_deps, removed_deps = diff.diff_deps("dep-pkg", new_pkg_data)

        # Assertions
        assert len(new_deps) == 1  # dep3 should be added
        assert new_deps[0].dependency_id == dep3_id
        assert new_deps[0].dependency_type_id == mock_config.dependency_types.runtime

        assert len(removed_deps) == 1  # dep2 should be removed
        assert removed_deps[0].dependency_id == dep2_id
        assert removed_deps[0].dependency_type_id == mock_config.dependency_types.build

    def test_completely_new_package(self, mock_config, mock_logger, mock_db):
        """Test scenario 4: Package was completely new to the database"""

        # Create empty cache (no existing packages)
        cache = Cache(package_map={}, url_map={}, package_urls={}, dependencies={})

        # Create new package data
        new_pkg_data = create_pkgx_package(
            distributables=["https://github.com/example/new-pkg/archive/v1.0.tar.gz"],
            dependencies=["some-dep"],
            build_deps=["build-tool"],
        )

        # Test the diff
        diff = PkgxDiff(mock_config, cache, mock_db, mock_logger)
        pkg_id, pkg_obj, update_payload = diff.diff_pkg("new-pkg", new_pkg_data)

        # Assertions
        assert pkg_obj is not None  # New package should be created
        assert pkg_obj.derived_id == "pkgx/new-pkg"
        assert pkg_obj.name == "new-pkg"
        assert pkg_obj.import_id == "new-pkg"
        assert pkg_obj.package_manager_id == mock_config.pm_config.pm_id
        assert update_payload == {}  # No updates for new package

        # Test URL creation
        new_urls = {}
        # Mock generate_chai_urls to return predictable URLs
        mock_urls = [
            URLKey(
                "https://github.com/example/new-pkg", mock_config.url_types.homepage
            ),
            URLKey(
                "https://github.com/example/new-pkg/archive/v1.0.tar.gz",
                mock_config.url_types.source,
            ),
        ]
        with patch(
            "package_managers.pkgx.diff.generate_chai_urls", return_value=mock_urls
        ):
            resolved_urls = diff.diff_url("new-pkg", new_pkg_data, new_urls)
            new_links, updated_links = diff.diff_pkg_url(pkg_id, resolved_urls)

        # Should create URLs for homepage, source, and repository (GitHub)
        assert len(new_urls) >= 2  # At least source and homepage
        assert len(new_links) >= 2  # At least source and homepage links
        assert len(updated_links) == 0  # No existing links to update

    def test_no_changes_scenario(self, mock_config, mock_logger, mock_db):
        """Test scenario where package exists but has no changes"""

        # Setup existing package
        existing_pkg_id = uuid4()
        existing_package = Package(
            id=existing_pkg_id,
            derived_id="pkgx/unchanged-pkg",
            name="unchanged-pkg",
            package_manager_id=mock_config.pm_config.pm_id,
            import_id="unchanged-pkg",
            readme="Unchanged description",
        )

        cache = Cache(
            package_map={"unchanged-pkg": existing_package},
            url_map={},
            package_urls={},
            dependencies={},
        )

        # Create package data with same description
        pkg_data = create_pkgx_package()

        # Test the diff
        diff = PkgxDiff(mock_config, cache, mock_db, mock_logger)
        pkg_id, pkg_obj, update_payload = diff.diff_pkg("unchanged-pkg", pkg_data)

        # Assertions
        assert pkg_id == existing_pkg_id
        assert pkg_obj is None  # No new package
        assert update_payload is None  # No changes

    def test_missing_dependency_handling(self, mock_config, mock_logger, mock_db):
        """Test how missing dependencies are handled"""

        existing_pkg_id = uuid4()
        existing_package = Package(
            id=existing_pkg_id,
            derived_id="pkgx/missing-dep-pkg",
            name="missing-dep-pkg",
            import_id="missing-dep-pkg",
        )

        cache = Cache(
            package_map={"missing-dep-pkg": existing_package},
            url_map={},
            package_urls={},
            dependencies={},
        )

        # Create package with dependency that doesn't exist in cache
        pkg_data = create_pkgx_package(dependencies=["non-existent-dep"])

        diff = PkgxDiff(mock_config, cache, mock_db, mock_logger)
        new_deps, removed_deps = diff.diff_deps("missing-dep-pkg", pkg_data)

        # Should handle gracefully - no deps added for missing packages
        assert len(new_deps) == 0
        assert len(removed_deps) == 0

    def test_dependency_type_priority_no_change(
        self, mock_config, mock_logger, mock_db
    ):
        """Test case 1: p1 has runtime dependency to p2 in cache,
        p1 depends on p2 as both runtime and build in parsed data.
        Expect no change (runtime has priority)."""

        # Setup existing package and dependencies
        p1_id = uuid4()
        p2_id = uuid4()

        p1_pkg = Package(id=p1_id, derived_id="pkgx/p1", name="p1", import_id="p1")
        p2_pkg = Package(id=p2_id, derived_id="pkgx/p2", name="p2", import_id="p2")

        # Existing runtime dependency in cache
        existing_runtime_dep = LegacyDependency(
            package_id=p1_id,
            dependency_id=p2_id,
            dependency_type_id=mock_config.dependency_types.runtime,
        )

        cache = Cache(
            package_map={"p1": p1_pkg, "p2": p2_pkg},
            url_map={},
            package_urls={},
            dependencies={p1_id: {existing_runtime_dep}},
        )

        # Parsed data has p2 as both runtime and build dependency
        new_pkg_data = create_pkgx_package(
            dependencies=["p2"],  # runtime
            build_deps=["p2"],  # build
        )

        diff = PkgxDiff(mock_config, cache, mock_db, mock_logger)
        new_deps, removed_deps = diff.diff_deps("p1", new_pkg_data)

        # Should have no changes - runtime priority means no change needed
        assert len(new_deps) == 0
        assert len(removed_deps) == 0

    def test_dependency_type_change_runtime_to_build(
        self, mock_config, mock_logger, mock_db
    ):
        """Test case 2: p1 has runtime dependency to p2 in cache,
        p1 has build dependency to p2 in parsed data.
        Expect removed runtime dependency and new build dependency."""

        p1_id = uuid4()
        p2_id = uuid4()

        p1_pkg = Package(id=p1_id, derived_id="pkgx/p1", name="p1", import_id="p1")
        p2_pkg = Package(id=p2_id, derived_id="pkgx/p2", name="p2", import_id="p2")

        # Existing runtime dependency
        existing_runtime_dep = LegacyDependency(
            package_id=p1_id,
            dependency_id=p2_id,
            dependency_type_id=mock_config.dependency_types.runtime,
        )

        cache = Cache(
            package_map={"p1": p1_pkg, "p2": p2_pkg},
            url_map={},
            package_urls={},
            dependencies={p1_id: {existing_runtime_dep}},
        )

        # Parsed data only has build dependency
        new_pkg_data = create_pkgx_package(
            dependencies=[],  # no runtime deps
            build_deps=["p2"],  # only build
        )

        diff = PkgxDiff(mock_config, cache, mock_db, mock_logger)
        new_deps, removed_deps = diff.diff_deps("p1", new_pkg_data)

        # Should remove runtime and add build
        assert len(removed_deps) == 1
        assert removed_deps[0].dependency_id == p2_id
        assert (
            removed_deps[0].dependency_type_id == mock_config.dependency_types.runtime
        )

        assert len(new_deps) == 1
        assert new_deps[0].dependency_id == p2_id
        assert new_deps[0].dependency_type_id == mock_config.dependency_types.build

    def test_dependency_type_change_build_to_runtime(
        self, mock_config, mock_logger, mock_db
    ):
        """Test case 3: p1 has build dependency to p2 in cache,
        p1 has runtime dependency to p2 in parsed data.
        Expect removed build dependency and new runtime dependency."""

        p1_id = uuid4()
        p2_id = uuid4()

        p1_pkg = Package(id=p1_id, derived_id="pkgx/p1", name="p1", import_id="p1")
        p2_pkg = Package(id=p2_id, derived_id="pkgx/p2", name="p2", import_id="p2")

        # Existing build dependency
        existing_build_dep = LegacyDependency(
            package_id=p1_id,
            dependency_id=p2_id,
            dependency_type_id=mock_config.dependency_types.build,
        )

        cache = Cache(
            package_map={"p1": p1_pkg, "p2": p2_pkg},
            url_map={},
            package_urls={},
            dependencies={p1_id: {existing_build_dep}},
        )

        # Parsed data only has runtime dependency
        new_pkg_data = create_pkgx_package(
            dependencies=["p2"],  # runtime
            build_deps=[],  # no build deps
        )

        diff = PkgxDiff(mock_config, cache, mock_db, mock_logger)
        new_deps, removed_deps = diff.diff_deps("p1", new_pkg_data)

        # Should remove build and add runtime
        assert len(removed_deps) == 1
        assert removed_deps[0].dependency_id == p2_id
        assert removed_deps[0].dependency_type_id == mock_config.dependency_types.build

        assert len(new_deps) == 1
        assert new_deps[0].dependency_id == p2_id
        assert new_deps[0].dependency_type_id == mock_config.dependency_types.runtime

    def test_dependency_type_priority_new_package(
        self, mock_config, mock_logger, mock_db
    ):
        """Test case 4: p1 has no dependencies to p2 in cache,
        p1 has both runtime and build dependencies to p2 in parsed data.
        Expect one new runtime dependency (priority over build)."""

        p1_id = uuid4()
        p2_id = uuid4()

        p1_pkg = Package(id=p1_id, derived_id="pkgx/p1", name="p1", import_id="p1")
        p2_pkg = Package(id=p2_id, derived_id="pkgx/p2", name="p2", import_id="p2")

        cache = Cache(
            package_map={"p1": p1_pkg, "p2": p2_pkg},
            url_map={},
            package_urls={},
            dependencies={},  # No existing dependencies
        )

        # Parsed data has both runtime and build dependencies to p2
        new_pkg_data = create_pkgx_package(
            dependencies=["p2"],  # runtime
            build_deps=["p2"],  # build
        )

        diff = PkgxDiff(mock_config, cache, mock_db, mock_logger)
        new_deps, removed_deps = diff.diff_deps("p1", new_pkg_data)

        # Should only create one new dependency - runtime (higher priority)
        assert len(removed_deps) == 0
        assert len(new_deps) == 1
        assert new_deps[0].dependency_id == p2_id
        assert new_deps[0].dependency_type_id == mock_config.dependency_types.runtime

    def test_dependency_type_priority_with_test(
        self, mock_config, mock_logger, mock_db
    ):
        """Test priority handling with test dependencies: Runtime > Build > Test"""

        p1_id = uuid4()
        p2_id = uuid4()
        p3_id = uuid4()
        p4_id = uuid4()

        p1_pkg = Package(id=p1_id, derived_id="pkgx/p1", name="p1", import_id="p1")
        p2_pkg = Package(id=p2_id, derived_id="pkgx/p2", name="p2", import_id="p2")
        p3_pkg = Package(id=p3_id, derived_id="pkgx/p3", name="p3", import_id="p3")
        p4_pkg = Package(id=p4_id, derived_id="pkgx/p4", name="p4", import_id="p4")

        cache = Cache(
            package_map={"p1": p1_pkg, "p2": p2_pkg, "p3": p3_pkg, "p4": p4_pkg},
            url_map={},
            package_urls={},
            dependencies={},
        )

        # Parsed data with overlapping dependencies across different types
        new_pkg_data = create_pkgx_package(
            dependencies=["p2", "p3"],  # runtime: p2, p3
            build_deps=["p2", "p4"],  # build: p2, p4
            test_deps=["p2", "p3", "p4"],  # test: p2, p3, p4
        )

        diff = PkgxDiff(mock_config, cache, mock_db, mock_logger)
        new_deps, removed_deps = diff.diff_deps("p1", new_pkg_data)

        # Should create dependencies based on priority:
        # p2: runtime (highest priority among runtime/build/test)
        # p3: runtime (highest priority among runtime/test)
        # p4: build (highest priority among build/test)
        assert len(removed_deps) == 0
        assert len(new_deps) == 3

        # Sort by dependency_id for consistent testing
        new_deps_sorted = sorted(new_deps, key=lambda d: str(d.dependency_id))

        # p2 should be runtime (highest priority)
        p2_dep = next(d for d in new_deps_sorted if d.dependency_id == p2_id)
        assert p2_dep.dependency_type_id == mock_config.dependency_types.runtime

        # p3 should be runtime (highest priority)
        p3_dep = next(d for d in new_deps_sorted if d.dependency_id == p3_id)
        assert p3_dep.dependency_type_id == mock_config.dependency_types.runtime

        # p4 should be build (highest available priority)
        p4_dep = next(d for d in new_deps_sorted if d.dependency_id == p4_id)
        assert p4_dep.dependency_type_id == mock_config.dependency_types.build


================================================
FILE: tests/package_managers/pkgx/test_special_case.py
================================================
"""
Test special case URL handling in PkgxTransformer.

This module tests the special_case method which handles URL transformations
for specific package sources like crates.io, x.org, and pkgx.sh.
"""

import pytest

from package_managers.pkgx.url import special_case


@pytest.mark.transformer
class TestSpecialCase:
    """Test special case URL transformations."""

    def test_special_case_crates_io(self, mock_logger):
        """Test that crates.io URLs are properly transformed."""
        assert (
            special_case("crates.io/pkgx", mock_logger)
            == "https://crates.io/crates/pkgx"
        )

    def test_special_case_x_org(self, mock_logger):
        """Test that x.org URLs are normalized."""
        assert special_case("x.org/ice", mock_logger) == "https://x.org"
        assert special_case("x.org/xxf86vm", mock_logger) == "https://x.org"

    def test_special_case_pkgx_sh(self, mock_logger):
        """Test that pkgx.sh URLs are redirected to GitHub."""
        assert (
            special_case("pkgx.sh/pkgx", mock_logger)
            == "https://github.com/pkgxdev/pkgx"
        )

    def test_special_case_no_slashes(self, mock_logger):
        """Test that URLs without slashes are returned as-is."""
        assert special_case("abseil.io", mock_logger) == "abseil.io"

    def test_special_case_double_slashes(self, mock_logger):
        """Test that URLs with double slashes are returned as-is."""
        assert (
            special_case("github.com/awslabs/llrt", mock_logger)
            == "github.com/awslabs/llrt"
        )


================================================
FILE: tests/ranker/test_compute_canon_name.py
================================================
#!/usr/bin/env uv run --with pytest
import pytest

from ranker.naming import (
    check_if_better,
    compute_canon_name,
    extract_repo_name_from_url,
    score_name,
)


@pytest.mark.parametrize(
    "url, best_guess",
    [
        ("github.com/user/repo", "repo"),
        ("gitlab.com/user/repo", "repo"),
        ("bitbucket.org/user/repo", "repo"),
        ("not-a-valid-url", "not-a-valid-url"),
        ("", ""),
    ],
)
def test_extract_repo_name_from_url(url, best_guess):
    assert extract_repo_name_from_url(url) == best_guess


@pytest.mark.parametrize(
    "name, best_guess, expected_score",
    [
        ("@user/repo", "repo", 3),
        ("test3js", "web3.js", 8),
        ("web3", "web3.js", 11),
        ("@platonenterprise/web3", "web3.js", -3),
        ("eleventy-plugin-embed-everything", "embed-everything", 1),
        ("eleventy-plugin-embed-ted", "embed-everything", 0),
    ],
)
def test_score_name(name, best_guess, expected_score):
    assert score_name(name, best_guess) == expected_score


@pytest.mark.parametrize(
    "name, best_guess, package_name, expected",
    [
        (
            "web3.js",
            "test3js",
            "https://github.com/ethereum/web3.js#readmeweb3.js",
            "test3js",
        ),
        ("web3.js", "web3", "test3js", "web3"),
        ("web3.js", "@platonenterprise/web3", "web3", "web3"),
    ],
)
def test_check_if_better(name, best_guess, package_name, expected):
    assert check_if_better(name, best_guess, package_name) == expected


@pytest.mark.parametrize(
    "url, package_name, existing_name, expected",
    [
        # new canon, we should always have the package_name
        ("github.com/user/repo", "repo", "", "repo"),
        (
            "github.com/user/repo",
            "@scoped/random-name-123",
            "@scoped/random-name-123",
            "@scoped/random-name-123",
        ),
        (
            "gfscott.com/embed-everything",
            "eleventy-plugin-embed-everything",
            "gfscott.com/embed-everything",
            "eleventy-plugin-embed-everything",
        ),
        (
            "gfscott.com/embed-everything",
            "eleventy-plugin-embed-ted",
            "eleventy-plugin-embed-everything",
            "eleventy-plugin-embed-everything",
        ),
    ],
)
def test_compute_canon_name(url, package_name, existing_name, expected):
    assert compute_canon_name(url, package_name, existing_name) == expected


================================================
FILE: tests/ranker/test_dedupe.py
================================================
"""
Test the package deduplication functionality in the ranker.

This module tests the dedupe.main function which handles deduplication of packages
based on their homepage URLs, creating and managing canonical package representations.
"""

from datetime import datetime
from unittest.mock import MagicMock, patch
from uuid import uuid4

import pytest

from core.models import URL, Canon, Package
from ranker.config import DedupeConfig
from ranker.dedupe import DedupeDB, main


@pytest.fixture
def ids():
    """Fixture providing consistent IDs for testing."""
    return {
        "homepage_url_type": uuid4(),
        "package_manager": uuid4(),
        "pkg1": uuid4(),
        "pkg2": uuid4(),
        "pkg3": uuid4(),
        "canon1": uuid4(),
        "canon2": uuid4(),
        "canon3": uuid4(),
        "url1": uuid4(),
        "url2": uuid4(),
        "url3": uuid4(),
    }


@pytest.fixture
def test_packages(ids):
    """Fixture providing test package objects."""
    return {
        "package1": Package(
            id=ids["pkg1"],
            name="package1",
            package_manager_id=ids["package_manager"],
            import_id="pkg1",
            derived_id="npm/package1",
            created_at=datetime.now(),
            updated_at=datetime.now(),
        ),
        "package2": Package(
            id=ids["pkg2"],
            name="package2",
            package_manager_id=ids["package_manager"],
            import_id="pkg2",
            derived_id="npm/package2",
            created_at=datetime.now(),
            updated_at=datetime.now(),
        ),
        "package3": Package(
            id=ids["pkg3"],
            name="package3",
            package_manager_id=ids["package_manager"],
            import_id="pkg3",
            derived_id="npm/package3",
            created_at=datetime.now(),
            updated_at=datetime.now(),
        ),
    }


@pytest.fixture
def test_urls(ids):
    """Fixture providing test URL objects."""
    canonical_url = "github.com/example/repo"
    non_canonical_url = "https://github.com/example/repo"
    different_url = "https://gitlab.com/example/repo"

    return {
        "canonical": URL(
            id=ids["url1"],
            url=canonical_url,
            url_type_id=ids["homepage_url_type"],
            created_at=datetime.now(),
            updated_at=datetime.now(),
        ),
        "non_canonical": URL(
            id=ids["url2"],
            url=non_canonical_url,
            url_type_id=ids["homepage_url_type"],
            created_at=datetime.now(),
            updated_at=datetime.now(),
        ),
        "different": URL(
            id=ids["url3"],
            url=different_url,
            url_type_id=ids["homepage_url_type"],
            created_at=datetime.now(),
            updated_at=datetime.now(),
        ),
    }


@pytest.fixture
def mock_dedupe_config(ids):
    """Fixture providing mock DedupeConfig."""
    config = MagicMock(spec=DedupeConfig)
    config.load = True
    config.homepage_url_type_id = ids["homepage_url_type"]
    return config


@pytest.fixture
def mock_db():
    """Fixture providing mock DedupeDB."""
    return MagicMock(spec=DedupeDB)


def capture_ingest_calls(mock_db):
    """Helper function to capture arguments passed to db.ingest."""
    ingest_calls = []

    def capture_ingest(
        new_canons, updated_canons, new_canon_packages, updated_canon_packages
    ):
        ingest_calls.append(
            (new_canons, updated_canons, new_canon_packages, updated_canon_packages)
        )

    mock_db.ingest.side_effect = capture_ingest
    return ingest_calls


@pytest.mark.ranker
class TestDedupe:
    """Test the deduplication of packages - focused on different cases."""

    def test_new_canon_new_mapping(
        self, ids, test_packages, test_urls, mock_dedupe_config, mock_db
    ):
        """
        Test: URL has no canon AND package has no existing mapping

        Expected: Create new canon + create new mapping
        """
        # Arrange
        package = test_packages["package1"]
        homepage_url = test_urls["canonical"]

        # Current state: no canons exist for this URL, no package mapping exists
        mock_db.get_current_canons.return_value = {}  # URL has no canon
        mock_db.get_current_canon_packages.return_value = {}  # Package has no mapping
        mock_db.get_packages_with_homepages.return_value = [(package, homepage_url)]
        mock_db.get_all_package_names.return_value = {ids["pkg1"]: "package1"}

        ingest_calls = capture_ingest_calls(mock_db)

        # Act
        with patch.dict("os.environ", {"LOAD": "true", "TEST": "false"}):
            main(mock_dedupe_config, mock_db)

        # Assert
        assert len(ingest_calls) == 1, "Should call ingest exactly once"

        new_canons, updated_canons, new_canon_packages, updated_canon_packages = (
            ingest_calls[0]
        )

        # Verify canon creation
        assert len(new_canons) == 1, "Should create exactly one new canon"
        assert len(updated_canons) == 0, "Should not update any canons"
        assert len(new_canon_packages) == 1, "Should create exactly one new mapping"
        assert len(updated_canon_packages) == 0, "Should not update any mappings"

        created_canon = new_canons[0]
        assert (
            created_canon.url_id == ids["url1"]
        ), "Canon should reference correct URL ID"
        assert created_canon.name == "package1", "Canon name should be the package name"

        # Verify mapping creation
        created_mapping = new_canon_packages[0]
        assert created_mapping.package_id == ids["pkg1"], "Should map correct package"
        assert created_mapping.canon_id == created_canon.id, "Should map to new canon"

    def test_new_canon_update_mapping(
        self, ids, test_packages, test_urls, mock_dedupe_config, mock_db
    ):
        """
        Test: URL has no canon AND package has existing mapping to different canon

        Expected: Create new canon + update existing mapping
        """
        # Arrange
        package = test_packages["package1"]
        homepage_url = test_urls["canonical"]

        # Create existing canon for different URL
        existing_canon = Canon(
            id=ids["canon2"],
            url_id=ids["url2"],  # Different URL
            name="old-canon",
            created_at=datetime.now(),
            updated_at=datetime.now(),
        )

        # Current state: no canon for this URL, but package is mapped to different canon
        mock_db.get_current_canons.return_value = {ids["url2"]: existing_canon}
        mock_db.get_current_canon_packages.return_value = {
            ids["pkg1"]: {"id": uuid4(), "canon_id": existing_canon.id}
        }
        mock_db.get_packages_with_homepages.return_value = [(package, homepage_url)]
        mock_db.get_all_package_names.return_value = {ids["pkg1"]: "package1"}

        ingest_calls = capture_ingest_calls(mock_db)

        # Act
        with patch.dict("os.environ", {"LOAD": "true", "TEST": "false"}):
            main(mock_dedupe_config, mock_db)

        # Assert
        assert len(ingest_calls) == 1, "Should call ingest exactly once"

        new_canons, updated_canons, new_canon_packages, updated_canon_packages = (
            ingest_calls[0]
        )

        # Verify canon creation
        assert len(new_canons) == 1, "Should create exactly one new canon"
        assert len(updated_canons) == 0, "Should not update any canons"
        assert len(new_canon_packages) == 0, "Should not create new mappings"
        assert len(updated_canon_packages) == 1, "Should update exactly one mapping"

        created_canon = new_canons[0]
        assert (
            created_canon.url_id == ids["url1"]
        ), "Canon should reference correct URL ID"
        assert created_canon.name == "package1", "Canon name should be the package name"

        # Verify mapping update (should point to NEW canon, not old one)
        updated_mapping = updated_canon_packages[0]
        assert "id" in updated_mapping, "Update should include canon package ID"
        assert (
            updated_mapping["canon_id"] == created_canon.id
        ), "Should update to NEW canon"
        assert (
            updated_mapping["canon_id"] != ids["canon2"]
        ), "Should NOT point to old canon"
        assert "updated_at" in updated_mapping, "Update should include timestamp"

    def test_no_changes_needed(
        self, ids, test_packages, test_urls, mock_dedupe_config, mock_db
    ):
        """
        Test: URL has canon AND package already linked to that canon

        Expected: Do nothing (no changes)
        """
        # Arrange
        package = test_packages["package1"]
        homepage_url = test_urls["canonical"]

        existing_canon = Canon(
            id=ids["canon1"],
            url_id=ids["url1"],
            name="existing-canon",
            created_at=datetime.now(),
            updated_at=datetime.now(),
        )

        # Current state: URL has canon, package linked to that same canon
        mock_db.get_current_canons.return_value = {ids["url1"]: existing_canon}
        mock_db.get_current_canon_packages.return_value = {
            ids["pkg1"]: {"id": uuid4(), "canon_id": ids["canon1"]}
        }
        mock_db.get_packages_with_homepages.return_value = [(package, homepage_url)]
        mock_db.get_all_package_names.return_value = {ids["pkg1"]: "existing-canon"}

        ingest_calls = capture_ingest_calls(mock_db)

        # Act
        with patch.dict("os.environ", {"LOAD": "true", "TEST": "false"}):
            main(mock_dedupe_config, mock_db)

        # Assert - should call ingest with empty lists (no changes)
        assert len(ingest_calls) == 1, "Should call ingest exactly once"

        new_canons, updated_canons, new_canon_packages, updated_canon_packages = (
            ingest_calls[0]
        )

        assert len(new_canons) == 0, "Should not create any canons"
        assert len(updated_canons) == 0, "Should not update any canons"
        assert len(new_canon_packages) == 0, "Should not create any mappings"
        assert len(updated_canon_packages) == 0, "Should not update any mappings"

    def test_update_existing_mapping(
        self, ids, test_packages, test_urls, mock_dedupe_config, mock_db
    ):
        """
        Test: URL has canon AND package linked to different canon

        Expected: Update mapping to correct canon
        """
        # Arrange
        package = test_packages["package1"]
        homepage_url = test_urls["canonical"]

        correct_canon = Canon(
            id=ids["canon1"],
            url_id=ids["url1"],  # This URL's canon
            name="correct-name",
            created_at=datetime.now(),
            updated_at=datetime.now(),
        )

        wrong_canon = Canon(
            id=ids["canon2"],
            url_id=ids["url2"],  # Different URL's canon
            name="correct-name",
            created_at=datetime.now(),
            updated_at=datetime.now(),
        )

        # Current state: URL has canon, but package linked to wrong canon
        mock_db.get_current_canons.return_value = {
            ids["url1"]: correct_canon,
            ids["url2"]: wrong_canon,
        }
        mock_db.get_current_canon_packages.return_value = {
            ids["pkg1"]: {
                "id": uuid4(),
                "canon_id": ids["canon2"],
            }  # Linked to wrong canon
        }
        mock_db.get_packages_with_homepages.return_value = [(package, homepage_url)]
        mock_db.get_all_package_names.return_value = {ids["pkg1"]: "correct-name"}

        ingest_calls = capture_ingest_calls(mock_db)

        # Act
        with patch.dict("os.environ", {"LOAD": "true", "TEST": "false"}):
            main(mock_dedupe_config, mock_db)

        # Assert
        assert len(ingest_calls) == 1, "Should call ingest exactly once"

        new_canons, updated_canons, new_canon_packages, updated_canon_packages = (
            ingest_calls[0]
        )

        # Should only update mapping, no new creations
        assert len(new_canons) == 0, "Should not create any canons"
        assert len(updated_canons) == 0, "Should not update any canons"
        assert len(new_canon_packages) == 0, "Should not create any new mappings"
        assert len(updated_canon_packages) == 1, "Should update exactly one mapping"

        # Verify mapping update points to correct canon
        updated_mapping = updated_canon_packages[0]
        assert "id" in updated_mapping, "Update should include canon package ID"
        assert (
            updated_mapping["canon_id"] == ids["canon1"]
        ), "Should update to correct canon"
        assert (
            updated_mapping["canon_id"] != ids["canon2"]
        ), "Should NOT point to wrong canon"
        assert "updated_at" in updated_mapping, "Update should include timestamp"

    def test_create_new_mapping(
        self, ids, test_packages, test_urls, mock_dedupe_config, mock_db
    ):
        """
        Test: URL has canon AND package has no mapping

        Expected: Create new mapping to existing canon
        """
        # Arrange
        package = test_packages["package1"]
        homepage_url = test_urls["canonical"]

        existing_canon = Canon(
            id=ids["canon1"],
            url_id=ids["url1"],
            name="existing-canon",
            created_at=datetime.now(),
            updated_at=datetime.now(),
        )

        # Current state: URL has canon, but package has no mapping
        mock_db.get_current_canons.return_value = {ids["url1"]: existing_canon}
        mock_db.get_current_canon_packages.return_value = {}  # Package not linked
        mock_db.get_packages_with_homepages.return_value = [(package, homepage_url)]
        mock_db.get_all_package_names.return_value = {ids["pkg1"]: "existing-canon"}

        ingest_calls = capture_ingest_calls(mock_db)

        # Act
        with patch.dict("os.environ", {"LOAD": "true", "TEST": "false"}):
            main(mock_dedupe_config, mock_db)

        # Assert
        assert len(ingest_calls) == 1, "Should call ingest exactly once"

        new_canons, updated_canons, new_canon_packages, updated_canon_packages = (
            ingest_calls[0]
        )

        # Should only create new mapping, no updates or new canons
        assert len(new_canons) == 0, "Should not create any canons"
        assert len(updated_canons) == 0, "Should not update any canons"
        assert len(new_canon_packages) == 1, "Should create exactly one new mapping"
        assert len(updated_canon_packages) == 0, "Should not update any mappings"

        # Verify mapping creation points to existing canon
        created_mapping = new_canon_packages[0]
        assert created_mapping.package_id == ids["pkg1"], "Should map correct package"
        assert created_mapping.canon_id == ids["canon1"], "Should map to existing canon"

    def test_multiple_packages_same_homepage_creates_single_canon(
        self, ids, test_packages, test_urls, mock_dedupe_config, mock_db
    ):
        """
        Test deduplication: Multiple packages with same homepage URL should create only
        one canon

        This tests the core deduplication logic where:
        - Package 1 points to URL X (no existing canon)
        - Package 2 also points to URL X
        - Should create only ONE canon for URL X
        - Both packages should be linked to the same canon
        """
        # Arrange
        package1 = test_packages["package1"]
        package2 = test_packages["package2"]
        shared_homepage_url = test_urls["canonical"]

        # Current state: no canons exist for this URL, no package mappings exist
        mock_db.get_current_canons.return_value = {}  # URL has no canon
        mock_db.get_current_canon_packages.return_value = {}  # No mappings
        mock_db.get_packages_with_homepages.return_value = [
            (package1, shared_homepage_url),  # Both packages point to same URL
            (package2, shared_homepage_url),
        ]
        mock_db.get_all_package_names.return_value = {
            ids["pkg1"]: "package1",
            ids["pkg2"]: "package2",
        }

        ingest_calls = capture_ingest_calls(mock_db)

        # Act
        with patch.dict("os.environ", {"LOAD": "true", "TEST": "false"}):
            main(mock_dedupe_config, mock_db)

        # Assert
        assert len(ingest_calls) == 1, "Should call ingest exactly once"

        new_canons, updated_canons, new_canon_packages, updated_canon_packages = (
            ingest_calls[0]
        )

        # Should create only ONE canon for the shared URL
        assert len(new_canons) == 1, "Should create exactly one canon for shared URL"
        assert len(updated_canons) == 0, "Should not update any canons"
        assert len(new_canon_packages) == 2, "Should create mappings for both packages"
        assert len(updated_canon_packages) == 0, "Should not update any mappings"

        # Verify single canon creation
        created_canon = new_canons[0]
        assert created_canon.url_id == ids["url1"], "Canon should reference shared URL"

        # Verify both packages map to the same canon
        canon_ids = {mapping.canon_id for mapping in new_canon_packages}
        assert len(canon_ids) == 1, "Both packages should map to same canon"
        assert (
            canon_ids.pop() == created_canon.id
        ), "Both should map to the created canon"

        # Verify package IDs
        package_ids = {mapping.package_id for mapping in new_canon_packages}
        assert package_ids == {ids["pkg1"], ids["pkg2"]}, "Should map both packages"

        # Verify the name. Heuristics wouldn't rank "package2" higher than "package1"
        assert created_canon.name == "package1", "Canon name should be the package name"

    def test_empty_urls_no_deduplication(
        self, ids, test_packages, test_urls, mock_dedupe_config, mock_db
    ):
        """
        Test that packages with empty URLs are not deduplicated with each other

        A lot of packages in CHAI have no URLs, and we should not deduplicate them
        with each other. This test case ensures that if two packages have no URLs,
        they do not get deduplicated.
        """
        # Arrange
        package1 = test_packages["package1"]
        package2 = test_packages["package2"]

        empty_url1 = URL(
            id=ids["url1"],
            url="",
            url_type_id=ids["homepage_url_type"],
            created_at=datetime.now(),
            updated_at=datetime.now(),
        )

        empty_url2 = URL(
            id=ids["url2"],
            url="",
            url_type_id=ids["homepage_url_type"],
            created_at=datetime.now(),
            updated_at=datetime.now(),
        )

        # Current state: no canons exist for these URLs, no package mappings exist
        mock_db.get_current_canons.return_value = {}  # No canons
        mock_db.get_current_canon_packages.return_value = {}  # No mappings
        mock_db.get_packages_with_homepages.return_value = [
            (package1, empty_url1),  # Both packages have empty URLs
            (package2, empty_url2),
        ]

        ingest_calls = capture_ingest_calls(mock_db)

        # Act
        with patch.dict("os.environ", {"LOAD": "true", "TEST": "false"}):
            main(mock_dedupe_config, mock_db)

        # Assert
        assert len(ingest_calls) == 1, "Should call ingest exactly once"

        new_canons, updated_canons, new_canon_packages, updated_canon_packages = (
            ingest_calls[0]
        )

        # Should not create any canons or mappings for empty URLs
        assert len(new_canons) == 0, "Should not create any canons for empty URLs"
        assert len(updated_canons) == 0, "Should not update any canons"
        assert (
            len(new_canon_packages) == 0
        ), "Should not create any mappings for empty URLs"
        assert len(updated_canon_packages) == 0, "Should not update any mappings"

    def test_canon_name_update_when_url_changes(
        self, ids, test_packages, test_urls, mock_dedupe_config, mock_db
    ):
        """
        Test that canon name gets updated when the canonical URL changes

        This tests canon update functionality when:
        - A canon exists with name "old-url"
        - The canonical URL for that canon changes to "new-url"
        - The canon name should be updated to match the new URL
        """
        # Arrange
        package = test_packages["package1"]

        # URL with updated canonical form
        updated_url = URL(
            id=ids["url1"],
            url="github.com/example/new-repo",  # Changed URL
            url_type_id=ids["homepage_url_type"],
            created_at=datetime.now(),
            updated_at=datetime.now(),
        )

        # Existing canon with old name
        existing_canon = Canon(
            id=ids["canon1"],
            url_id=ids["url1"],
            name="github.com/example/old-repo",  # Old name
            created_at=datetime.now(),
            updated_at=datetime.now(),
        )

        # Current state: canon exists but name doesn't match current URL
        mock_db.get_current_canons.return_value = {ids["url1"]: existing_canon}
        mock_db.get_current_canon_packages.return_value = {
            ids["pkg1"]: {"id": uuid4(), "canon_id": ids["canon1"]}
        }
        mock_db.get_packages_with_homepages.return_value = [(package, updated_url)]
        mock_db.get_all_package_names.return_value = {
            ids["pkg1"]: "github.com/example/new-repo"
        }

        ingest_calls = capture_ingest_calls(mock_db)

        # Act
        with patch.dict("os.environ", {"LOAD": "true", "TEST": "false"}):
            main(mock_dedupe_config, mock_db)

        # Assert
        assert len(ingest_calls) == 1, "Should call ingest exactly once"

        new_canons, updated_canons, new_canon_packages, updated_canon_packages = (
            ingest_calls[0]
        )

        # Should update the existing canon name
        assert len(new_canons) == 0, "Should not create any new canons"
        assert len(updated_canons) == 1, "Should update exactly one canon"
        assert len(new_canon_packages) == 0, "Should not create any new mappings"
        assert len(updated_canon_packages) == 0, "Should not update any mappings"

        # Verify canon update
        updated_canon = updated_canons[0]
        assert "id" in updated_canon, "Update should include canon ID"
        assert updated_canon["id"] == ids["canon1"], "Should update correct canon"
        assert (
            updated_canon["name"] == "github.com/example/new-repo"
        ), "Should update name to new URL"
        assert "updated_at" in updated_canon, "Update should include timestamp"

    def test_canon_update_with_multiple_packages(
        self, ids, test_packages, test_urls, mock_dedupe_config, mock_db
    ):
        """
        Test canon update when multiple packages share the same canon

        This ensures that when a canon's URL changes, the canon is updated
        but all existing package mappings remain intact.
        """
        # Arrange
        package1 = test_packages["package1"]
        package2 = test_packages["package2"]

        # URL with updated canonical form
        updated_url = URL(
            id=ids["url1"],
            url="github.com/example/updated-repo",
            url_type_id=ids["homepage_url_type"],
            created_at=datetime.now(),
            updated_at=datetime.now(),
        )

        # Existing canon with old name
        existing_canon = Canon(
            id=ids["canon1"],
            url_id=ids["url1"],
            name="github.com/example/old-repo",  # Different from current URL
            created_at=datetime.now(),
            updated_at=datetime.now(),
        )

        # Current state: both packages mapped to same canon
        mock_db.get_current_canons.return_value = {ids["url1"]: existing_canon}
        mock_db.get_current_canon_packages.return_value = {
            ids["pkg1"]: {"id": uuid4(), "canon_id": ids["canon1"]},
            ids["pkg2"]: {"id": uuid4(), "canon_id": ids["canon1"]},
        }
        mock_db.get_packages_with_homepages.return_value = [
            (package1, updated_url),
            (package2, updated_url),
        ]
        mock_db.get_all_package_names.return_value = {
            ids["pkg1"]: "github.com/example/updated-repo",
            ids["pkg2"]: "github.com/example/updated-repo",
        }

        ingest_calls = capture_ingest_calls(mock_db)

        # Act
        with patch.dict("os.environ", {"LOAD": "true", "TEST": "false"}):
            main(mock_dedupe_config, mock_db)

        # Assert
        assert len(ingest_calls) == 1, "Should call ingest exactly once"

        new_canons, updated_canons, new_canon_packages, updated_canon_packages = (
            ingest_calls[0]
        )

        # Should only update the canon, no new mappings or updates to mappings
        assert len(new_canons) == 0, "Should not create any new canons"
        assert len(updated_canons) == 1, "Should update exactly one canon"
        assert len(new_canon_packages) == 0, "Should not create any new mappings"
        assert len(updated_canon_packages) == 0, "Should not update any mappings"

        # Verify canon update
        updated_canon = updated_canons[0]
        assert updated_canon["id"] == ids["canon1"], "Should update correct canon"
        assert (
            updated_canon["name"] == "github.com/example/updated-repo"
        ), "Should update name to new URL"

    def test_skip_when_load_disabled(self, mock_dedupe_config, mock_db):
        """
        Test that no processing occurs when load is disabled

        Expected: db.ingest should not be called
        """
        # Arrange
        mock_dedupe_config.load = False

        # Act
        with patch.dict("os.environ", {"LOAD": "false", "TEST": "false"}):
            main(mock_dedupe_config, mock_db)

        # Assert
        mock_db.ingest.assert_not_called()


================================================
FILE: tests/ranker/test_rx_graph.py
================================================
"""
Test the CHAI graph ranking algorithm.

This module tests the rx_graph module which implements a custom graph-based
ranking algorithm for packages. The tests focus on verifying that the
distribute function conserves weight appropriately.
"""

import random
import uuid
from decimal import Decimal

import pytest

from ranker.rx_graph import CHAI, PackageNode

# Constants for the test
NUM_NODES = 100000
EDGE_PROBABILITY = 0.001
SPLIT_RATIO = Decimal("0.85")
TOLERANCE = Decimal("1e-6")
MAX_ITER = 10000000


@pytest.fixture
def large_chai_graph() -> tuple[CHAI, dict[uuid.UUID, Decimal]]:
    """Creates a large CHAI graph with random edges and personalization."""
    G = CHAI()
    nodes = []
    initial_personalization_raw = {}

    # Create nodes
    for _i in range(NUM_NODES):
        canon_id = uuid.uuid4()
        node = PackageNode(canon_id=canon_id)
        node.index = G.add_node(node)
        nodes.append(node)
        # Assign random initial weight for personalization
        initial_personalization_raw[canon_id] = Decimal(random.random())

    # Normalize personalization to sum to 1
    total_weight = sum(initial_personalization_raw.values())
    personalization = {
        uid: weight / total_weight
        for uid, weight in initial_personalization_raw.items()
    }
    assert (
        abs(sum(personalization.values()) - Decimal("1.0")) <= TOLERANCE
    ), f"Initial personalization should sum to 1 within tolerance: {sum(personalization.values())}"

    # Add random edges (potential cycles)
    node_indices = list(G.node_indices())
    for u_idx in node_indices:
        for v_idx in node_indices:
            if u_idx != v_idx and random.random() < EDGE_PROBABILITY:
                G.add_edge(u_idx, v_idx, None)  # Edge data is not used in distribute

    return G, personalization


================================================
FILE: tests/scripts/upgrade_canons/test_analyze_packages_needing_canonicalization.py
================================================
#!/usr/bin/env pkgx uv run

from unittest.mock import call, patch
from uuid import UUID

import pytest

from scripts.upgrade_canons.main import analyze_packages_needing_canonicalization


class TestAnalyzePackagesNeedingCanonicalization:
    """Test the analyze_packages_needing_canonicalization function"""

    def setup_method(self):
        """Setup test fixtures"""
        self.package_id_1 = UUID("11111111-1111-1111-1111-111111111111")
        self.package_id_2 = UUID("22222222-2222-2222-2222-222222222222")
        self.package_id_3 = UUID("33333333-3333-3333-3333-333333333333")
        self.package_id_4 = UUID("44444444-4444-4444-4444-444444444444")

    @patch("scripts.upgrade_canons.main.is_canonical_url")
    @patch("scripts.upgrade_canons.main.normalize_url")
    def test_case_1_should_create_canonical_url(
        self, mock_normalize, mock_is_canonical
    ):
        """
        Test Case 1: Package has non-canonical URLs, canonical doesn't exist
        Expected: Should return this package in the result
        """
        # Setup mocks
        mock_is_canonical.return_value = False
        mock_normalize.return_value = "github.com/org/repo"

        # Test data
        package_url_map = {
            self.package_id_1: [
                "https://github.com/org/repo",
                "https://github.com/org/repo/tree/main",
                "https://github.com/org/repo/blob/main/README.md",
            ]
        }
        existing_homepages = {
            "https://github.com/org/repo",
            "https://github.com/org/repo/tree/main",
            "https://github.com/org/repo/blob/main/README.md",
        }  # no canon

        # Execute
        result = analyze_packages_needing_canonicalization(
            package_url_map, existing_homepages
        )

        # Verify
        assert len(result) == 1
        assert self.package_id_1 in result
        assert result[self.package_id_1] == "github.com/org/repo"

        # Verify mocks were called correctly
        # is_canonical should be called once for each URL until it finds a canonical one (or all if none are canonical)
        expected_calls = [
            call("https://github.com/org/repo"),
            call("https://github.com/org/repo/tree/main"),
            call("https://github.com/org/repo/blob/main/README.md"),
        ]
        mock_is_canonical.assert_has_calls(expected_calls)
        assert mock_is_canonical.call_count == 3

        # normalize should only be called once with the first URL
        mock_normalize.assert_called_once_with("https://github.com/org/repo")

    @patch("scripts.upgrade_canons.main.is_canonical_url")
    @patch("scripts.upgrade_canons.main.normalize_url")
    def test_case_2_canonical_exists_in_database(
        self, mock_normalize, mock_is_canonical
    ):
        """
        Test Case 2: Package has non-canonical URLs, but canonical already exists in DB
        Expected: Should not return this package (skip it)
        """
        # Setup mocks
        mock_is_canonical.return_value = False
        mock_normalize.return_value = "https://example.com"

        # Test data
        package_url_map = {
            self.package_id_1: ["http://example.com", "https://www.example.com"]
        }
        existing_homepages = {"https://example.com"}  # Canonical already exists

        # Execute
        result = analyze_packages_needing_canonicalization(
            package_url_map, existing_homepages
        )

        # Verify
        assert len(result) == 0
        assert self.package_id_1 not in result

    @patch("scripts.upgrade_canons.main.is_canonical_url")
    @patch("scripts.upgrade_canons.main.normalize_url")
    def test_case_3_canonical_already_planned(self, mock_normalize, mock_is_canonical):
        """
        Test Case 3: Two packages would create the same canonical URL
        Expected: Only the first package should be included, second should be skipped
        """
        # Setup mocks
        mock_is_canonical.return_value = False
        mock_normalize.return_value = (
            "https://example.com"  # Both packages normalize to same URL
        )

        # Test data - both packages would create the same canonical URL
        package_url_map = {
            self.package_id_1: ["http://example.com"],
            self.package_id_2: [
                "https://www.example.com"
            ],  # Different input, same canonical
        }
        existing_homepages = set()  # Empty - canonical doesn't exist

        # Execute
        result = analyze_packages_needing_canonicalization(
            package_url_map, existing_homepages
        )

        # Verify - only one package should be included (whichever was processed first)
        assert len(result) == 1
        assert "https://example.com" in result.values()

        # Verify that exactly one of the packages was included
        included_packages = list(result.keys())
        assert len(included_packages) == 1
        assert included_packages[0] in [self.package_id_1, self.package_id_2]

    @patch("scripts.upgrade_canons.main.is_canonical_url")
    def test_case_4_package_already_has_canonical(self, mock_is_canonical):
        """
        Test Case 4: Package already has at least one canonical URL
        Expected: Should not return this package (skip it)
        """
        # Setup mocks - return True for canonical check
        mock_is_canonical.return_value = True

        # Test data
        package_url_map = {
            self.package_id_1: [
                "https://example.com",
                "http://example.com",
            ]  # First URL is canonical
        }
        existing_homepages = set()

        # Execute
        result = analyze_packages_needing_canonicalization(
            package_url_map, existing_homepages
        )

        # Verify
        assert len(result) == 0
        assert self.package_id_1 not in result

        # Verify that we never tried to normalize (because we skipped early)
        mock_is_canonical.assert_called_once_with("https://example.com")

    @patch("scripts.upgrade_canons.main.is_canonical_url")
    @patch("scripts.upgrade_canons.main.normalize_url")
    def test_mixed_scenarios(self, mock_normalize, mock_is_canonical):
        """
        Test with multiple packages covering different scenarios
        """

        # Setup mocks with side effects for different URLs
        def mock_is_canonical_side_effect(url):
            return url == "https://canonical.com"  # Only this URL is canonical

        def mock_normalize_side_effect(url):
            if "example" in url:
                return "https://example.com"
            elif "test" in url:
                return "https://test.com"
            else:
                return f"https://{url.split('://')[1]}"

        mock_is_canonical.side_effect = mock_is_canonical_side_effect
        mock_normalize.side_effect = mock_normalize_side_effect

        # Test data
        package_url_map = {
            self.package_id_1: ["http://example.com"],  # Should create canonical
            self.package_id_2: ["https://canonical.com"],  # Already canonical - skip
            self.package_id_3: ["http://test.com"],  # Should create canonical
            self.package_id_4: [
                "https://www.example.com"
            ],  # Same canonical as package_id_1 - skip
        }
        existing_homepages = set()

        # Execute
        result = analyze_packages_needing_canonicalization(
            package_url_map, existing_homepages
        )

        # Verify
        assert len(result) == 2

        # Package 1 should be included (creates https://example.com)
        assert self.package_id_1 in result
        assert result[self.package_id_1] == "https://example.com"

        # Package 2 should be skipped (already canonical)
        assert self.package_id_2 not in result

        # Package 3 should be included (creates https://test.com)
        assert self.package_id_3 in result
        assert result[self.package_id_3] == "https://test.com"

        # Package 4 should be skipped (duplicate canonical URL)
        assert self.package_id_4 not in result

    def test_empty_inputs(self):
        """Test with empty inputs"""
        result = analyze_packages_needing_canonicalization({}, set())
        assert result == {}

    @patch("scripts.upgrade_canons.main.is_canonical_url")
    @patch("scripts.upgrade_canons.main.normalize_url")
    def test_edge_case_empty_url_list(self, mock_normalize, mock_is_canonical):
        """Test with package that has empty URL list"""
        # This shouldn't happen in practice, but let's handle it gracefully
        package_url_map = {
            self.package_id_1: []  # Empty URL list
        }
        existing_homepages = set()

        # This will raise an IndexError when trying to access urls[0] in generate_canonical_url
        # Let's verify this behavior is expected
        with pytest.raises(IndexError):
            analyze_packages_needing_canonicalization(
                package_url_map, existing_homepages
            )


if __name__ == "__main__":
    pytest.main([__file__])