Repository: teaxyz/chai Branch: main Commit: 5ba601243a93 Files: 142 Total size: 809.9 KB Directory structure: gitextract_5cor20ar/ ├── .dockerignore ├── .github/ │ ├── actions/ │ │ └── complain/ │ │ └── action.yml │ └── workflows/ │ ├── chai-api.ci.yml │ ├── ci.yml │ └── deploy.yml ├── .gitignore ├── .python-version ├── LICENSE ├── README.md ├── alembic/ │ ├── .pkgx.yaml │ ├── Dockerfile │ ├── README.md │ ├── alembic.ini │ ├── env.py │ ├── init-script.sql │ ├── load-values.sql │ ├── run_migrations.sh │ ├── script.py.mako │ └── versions/ │ ├── 20241028_1217-base_migration.py │ ├── 20250312_0045-add_legacy_dependency_table.py │ ├── 20250312_2244-canons.py │ ├── 20250416_0223-add_ranks.py │ ├── 20250422_0940-add_unique_package_to_canon_packages.py │ ├── 20250508_1752-add_trgm_indexes.py │ ├── 20250529_2341-rename_canons_table_and_recreate.py │ └── 20250529_2345-recreate_canon_foreign_keys.py ├── api/ │ ├── .dockerignore │ ├── .gitignore │ ├── Cargo.toml │ ├── Dockerfile │ ├── README.md │ └── src/ │ ├── app_state.rs │ ├── db.rs │ ├── handlers.rs │ ├── logging.rs │ ├── main.rs │ └── utils.rs ├── core/ │ ├── README.md │ ├── config.py │ ├── db.py │ ├── fetcher.py │ ├── logger.py │ ├── models/ │ │ └── __init__.py │ ├── requirements.txt │ ├── scheduler.py │ ├── structs.py │ ├── test.json │ ├── transformer.py │ └── utils.py ├── db/ │ ├── README.md │ └── queries.md ├── docker-compose.yml ├── examples/ │ ├── sbom-meta/ │ │ ├── README.md │ │ ├── go.mod │ │ ├── go.sum │ │ └── main.go │ └── visualizer/ │ ├── README.md │ ├── main.py │ └── monitor.py ├── package_managers/ │ ├── crates/ │ │ ├── Dockerfile │ │ ├── README.md │ │ ├── db.py │ │ ├── diff.py │ │ ├── main.py │ │ ├── structs.py │ │ └── transformer.py │ ├── debian/ │ │ ├── Dockerfile │ │ ├── README.md │ │ ├── db.py │ │ ├── debian_sources.py │ │ ├── diff.py │ │ ├── main.py │ │ ├── parser.py │ │ ├── scripts/ │ │ │ ├── investigate_sources.py │ │ │ └── test_investigate_sources.py │ │ └── structs.py │ ├── homebrew/ │ │ ├── Dockerfile │ │ ├── README.md │ │ ├── db.py │ │ ├── diff.py │ │ ├── formulae.py │ │ ├── main.py │ │ └── structs.py │ └── pkgx/ │ ├── Dockerfile │ ├── db.py │ ├── diff.py │ ├── loader.py │ ├── main.py │ ├── parser.py │ └── url.py ├── pkgx.yaml ├── pyproject.toml ├── ranker/ │ ├── .dockerignore │ ├── .gitignore │ ├── Dockerfile │ ├── README.md │ ├── config.py │ ├── db.py │ ├── dedupe.py │ ├── main.py │ ├── naming.py │ ├── requirements.txt │ ├── rx_graph.py │ └── utils/ │ ├── analyze_ranks.py │ └── parse_log.py ├── scripts/ │ ├── chai-legacy-loader/ │ │ ├── README.md │ │ ├── add_package_fields.py │ │ ├── batch_insert_package_urls.py │ │ ├── batch_insert_urls.py │ │ ├── copy_dependencies_no_thread.py │ │ ├── pkgx.yaml │ │ └── sql/ │ │ ├── dependencies.sql │ │ ├── packages.sql │ │ └── urls.sql │ ├── npm-singleton/ │ │ ├── README.md │ │ ├── pkgx.yaml │ │ └── single.py │ ├── package_to_package/ │ │ └── package_dependencies.py │ └── upgrade_canons/ │ ├── .gitignore │ ├── README.md │ ├── create_deleted_canons.py │ ├── db.py │ ├── delete_non_canonical_urls.py │ ├── main.py │ ├── registered_projects.py │ └── structs.py └── tests/ ├── README.md ├── conftest.py ├── package_managers/ │ ├── crates/ │ │ ├── conftest.py │ │ └── test_crates_diff_deps.py │ ├── debian/ │ │ ├── conftest.py │ │ ├── test_debian_diff.py │ │ ├── test_debian_parser.py │ │ └── test_debian_sources.py │ ├── homebrew/ │ │ ├── conftest.py │ │ └── test_homebrew_diff_deps.py │ └── pkgx/ │ ├── test_pkgx_diff.py │ └── test_special_case.py ├── ranker/ │ ├── test_compute_canon_name.py │ ├── test_dedupe.py │ └── test_rx_graph.py └── scripts/ └── upgrade_canons/ └── test_analyze_packages_needing_canonicalization.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .dockerignore ================================================ # directories data/ .venv/ tests/ scripts/ logs/ db/ # other files .gitignore docker-compose.yml .DS_Store .git README.md LICENSE .cursorrules .coverage *.xml ================================================ FILE: .github/actions/complain/action.yml ================================================ name: teaxyz/chai/complain description: creates an issue for any failing tests inputs: test_function: description: test function to complain about required: true token: description: github token required: true default: ${{ github.token }} runs: using: composite steps: - name: Find Issue uses: actions-cool/issues-helper@v3 id: find with: actions: "find-issues" token: ${{ inputs.token }} issue-state: "open" title-includes: "❌ test failure" labels: "test-failure" - name: Create Issue uses: actions-cool/issues-helper@v3 id: create if: ${{ steps.find.outputs.issues == '[]' }} with: actions: "create-issue" token: ${{ inputs.token }} title: "❌ test failure" body: "Running log of test failure for ${{ inputs.test_function }}" labels: "test-failure" assignees: "sanchitram1" - name: Log Comment uses: actions-cool/issues-helper@v3 with: actions: "create-comment" token: ${{ inputs.token }} issue-number: ${{ steps.create.outputs.issue-number || fromJSON(steps.find.outputs.issues)[0].number }} body: | # Test failure ## ${{ inputs.test_function }} logs: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} ================================================ FILE: .github/workflows/chai-api.ci.yml ================================================ name: api.ci on: push: branches: [main] paths: - "api/**" pull_request: paths: - "api/**" env: CARGO_TERM_COLOR: always concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true jobs: test: name: test runs-on: ubuntu-latest services: postgres: image: postgres env: POSTGRES_USER: postgres POSTGRES_PASSWORD: s3cr3t POSTGRES_DB: chai options: >- --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5 ports: - 5435:5432 steps: - uses: actions/checkout@v5 - name: Install dependencies run: | sudo apt-get update sudo apt-get install -y postgresql-client - name: Run tests run: cargo test --verbose working-directory: api env: DATABASE_URL: postgresql://postgres:s3cr3t@localhost:5435/chai fmt: name: Rustfmt runs-on: ubuntu-latest steps: - uses: actions/checkout@v5 - uses: actions-rs/toolchain@v1 with: profile: minimal toolchain: stable override: true components: rustfmt - run: cargo fmt --all -- --check working-directory: api clippy: name: Clippy runs-on: ubuntu-latest steps: - uses: actions/checkout@v5 - uses: actions-rs/toolchain@v1 with: profile: minimal toolchain: stable override: true components: clippy - run: cargo clippy --all-targets --all-features -- -D warnings working-directory: api docker-build: name: Build Docker Image runs-on: ubuntu-latest steps: - uses: actions/checkout@v5 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v2 - name: Build uses: docker/build-push-action@v6 with: context: ./api push: false # Set push to false to prevent pushing the image ================================================ FILE: .github/workflows/ci.yml ================================================ name: CI on: workflow_dispatch: inputs: env: description: "The environment to test against" required: false type: choice options: - dev - sepolia - mainnet default: "dev" push: branches: - main paths: - "**/*.py" - tests/** - core/** - package_managers/** - ranker/** pull_request: branches: - main paths: - "**/*.py" - tests/** - core/** - package_managers/** - ranker/** jobs: check: runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v5 - name: Setup with pkgx uses: pkgxdev/setup@v4 with: +: astral.sh/uv astral.sh/ruff pytest.org - name: Set up Python uses: actions/setup-python@v5 with: python-version-file: ".python-version" - name: Install the project run: pkgx uv sync --locked --all-extras --dev --all-groups - name: Lint with Ruff uses: astral-sh/ruff-action@v3 with: src: . - name: Run tests with pytest id: pytest run: | pkgx uv run pytest tests/ complain: needs: check if: failure() runs-on: ubuntu-latest permissions: issues: write steps: - uses: actions/checkout@v5 - uses: ./.github/actions/complain with: test_function: "pytest" token: ${{ secrets.GITHUB_TOKEN }} ================================================ FILE: .github/workflows/deploy.yml ================================================ name: "Release Chai" run-name: Release Chai - ${{ inputs.env || 'auto' }} - ${{ inputs.ref || github.ref }} on: push: branches: - main workflow_dispatch: inputs: env: description: "The environment to deploy to" required: true type: choice options: - dev - sepolia - testnet - mainnet ref: description: "The git ref (SHA or tag) to deploy" required: false deploy_latest: description: "Force deploy all components from latest commit" required: false type: boolean permissions: id-token: write contents: read jobs: build: if: ${{ github.event_name == 'workflow_dispatch' && inputs.deploy_latest == true }} environment: ${{ inputs.env || 'dev' }} runs-on: ubuntu-latest steps: - uses: actions/checkout@v5 - name: Configure AWS credentials uses: aws-actions/configure-aws-credentials@v3 with: role-to-assume: ${{ secrets.TEA_AWS_ROLE }} aws-region: us-east-1 - name: Login to Amazon ECR id: login-ecr uses: aws-actions/amazon-ecr-login@v2 - name: Set deployment ref id: set-ref run: | DEPLOY_REF=${{ inputs.ref || github.sha }} echo "DEPLOY_REF=${DEPLOY_REF}" >> $GITHUB_ENV echo "deploy_ref=${DEPLOY_REF}" >> $GITHUB_OUTPUT - name: Set environment id: set-env run: | DEPLOY_ENV=${{ inputs.env || 'dev' }} echo "DEPLOY_ENV=${DEPLOY_ENV}" >> $GITHUB_ENV echo "deploy_env=${DEPLOY_ENV}" >> $GITHUB_OUTPUT - name: Build and push Crates indexer image if: ${{ github.event_name == 'workflow_dispatch' && inputs.deploy_latest == true }} uses: docker/build-push-action@v6 with: context: . file: ./package_managers/crates/Dockerfile push: true tags: | ${{ steps.login-ecr.outputs.registry }}/chai-v2/crates:${{ env.DEPLOY_REF }}-${{ env.DEPLOY_ENV }} ${{ steps.login-ecr.outputs.registry }}/chai-v2/crates:latest - name: Build and push Homebrew indexer image if: ${{ github.event_name == 'workflow_dispatch' && inputs.deploy_latest == true }} uses: docker/build-push-action@v6 with: context: . file: ./package_managers/homebrew/Dockerfile push: true tags: | ${{ steps.login-ecr.outputs.registry }}/chai-v2/homebrew:${{ env.DEPLOY_REF }}-${{ env.DEPLOY_ENV }} ${{ steps.login-ecr.outputs.registry }}/chai-v2/homebrew:latest - name: Build and push Debian indexer image if: ${{ github.event_name == 'workflow_dispatch' && inputs.deploy_latest == true }} uses: docker/build-push-action@v6 with: context: . file: ./package_managers/debian/Dockerfile push: true tags: | ${{ steps.login-ecr.outputs.registry }}/chai-v2/debian:${{ env.DEPLOY_REF }}-${{ env.DEPLOY_ENV }} ${{ steps.login-ecr.outputs.registry }}/chai-v2/debian:latest - name: Build and push Pkgx indexer image if: ${{ github.event_name == 'workflow_dispatch' && inputs.deploy_latest == true }} uses: docker/build-push-action@v6 with: context: . file: ./package_managers/pkgx/Dockerfile push: true tags: | ${{ steps.login-ecr.outputs.registry }}/chai-v2/pkgx:${{ env.DEPLOY_REF }}-${{ env.DEPLOY_ENV }} ${{ steps.login-ecr.outputs.registry }}/chai-v2/pkgx:latest - name: Build and push Alembic image if: ${{ github.event_name == 'workflow_dispatch' && inputs.deploy_latest == true }} uses: docker/build-push-action@v6 with: context: . file: ./alembic/Dockerfile push: true tags: | ${{ steps.login-ecr.outputs.registry }}/chai-v2/alembic:${{ env.DEPLOY_REF }}-${{ env.DEPLOY_ENV }} ${{ steps.login-ecr.outputs.registry }}/chai-v2/alembic:latest - name: Build and push chai-api image if: ${{ github.event_name == 'workflow_dispatch' && inputs.deploy_latest == true }} uses: docker/build-push-action@v6 with: context: ./api file: ./api/Dockerfile push: true tags: | ${{ steps.login-ecr.outputs.registry }}/chai-v2/chai-api:${{ env.DEPLOY_REF }}-${{ env.DEPLOY_ENV }} ${{ steps.login-ecr.outputs.registry }}/chai-v2/chai-api:latest - name: Build and push Ranker indexer image if: ${{ github.event_name == 'workflow_dispatch' && inputs.deploy_latest == true }} uses: docker/build-push-action@v2 with: context: . file: ./ranker/Dockerfile push: true tags: | ${{ steps.login-ecr.outputs.registry }}/chai-v2/ranker:${{ env.DEPLOY_REF }}-${{ env.DEPLOY_ENV }} ${{ steps.login-ecr.outputs.registry }}/chai-v2/ranker:latest deploy: needs: build if: ${{ github.event_name == 'workflow_dispatch' }} runs-on: ubuntu-latest environment: ${{ inputs.env || 'dev' }} steps: - uses: actions/checkout@v5 - name: Configure AWS credentials uses: aws-actions/configure-aws-credentials@v3 with: role-to-assume: ${{ secrets.TEA_AWS_ROLE }} aws-region: us-east-1 - name: Set deployment ref id: set-ref run: | DEPLOY_REF=${{ inputs.ref }} echo "DEPLOY_REF=${DEPLOY_REF}" >> $GITHUB_ENV echo "deploy_ref=${DEPLOY_REF}" >> $GITHUB_OUTPUT - name: Set environment id: set-env run: | DEPLOY_ENV=${{ inputs.env }} echo "DEPLOY_ENV=${DEPLOY_ENV}" >> $GITHUB_ENV echo "deploy_env=${DEPLOY_ENV}" >> $GITHUB_OUTPUT - name: Deploy chai-api run: | aws ecs update-service --cluster chai-${{ env.DEPLOY_ENV }} \ --service ${{ env.DEPLOY_ENV }}-chai-api \ --force-new-deployment ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ cover/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder .pybuilder/ target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv # For a library or package, you might want to ignore these files since the code is # intended to run in multiple environments; otherwise, check them in: # .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock # poetry # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. # This is especially recommended for binary packages to ensure reproducibility, and is more # commonly ignored for libraries. # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control #poetry.lock # pdm # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. #pdm.lock # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it # in version control. # https://pdm.fming.dev/latest/usage/project/#working-with-version-control .pdm.toml .pdm-python .pdm-build/ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # pytype static type analyzer .pytype/ # Cython debug symbols cython_debug/ # PyCharm # JetBrains specific template is maintained in a separate JetBrains.gitignore that can # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ # data files data db/data # examples examples/sbom-meta/sbom-meta *.svg # cursor .cursorrules .cursor/ TASKS.md # DS Store .DS_Store # Profiling *.prof ================================================ FILE: .python-version ================================================ 3.11 ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2024 tea protocol Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # CHAI CHAI is an attempt at an open-source data pipeline for package managers. The goal is to have a pipeline that can use the data from any package manager and provide a normalized data source for myriads of different use cases. ## Getting Started Use [Docker](https://docker.com) 1. Install Docker 2. Clone the chai repository (https://docs.github.com/en/repositories/creating-and-managing-repositories/cloning-a-repository) 3. Using a terminal, navigate to the cloned repository directory 4. Run `docker compose build` to create the latest Docker images 5. Then, run `docker compose up` to launch. > [!NOTE] > > This will run CHAI for all package managers. As an example crates by > itself will take over an hour and consume >5GB storage. > > Currently, we support: > > - crates > - Homebrew > - Debian > - pkgx > > You can run a single package manager by running > `PACKAGE_MANAGER= docker compose up` > > We are planning on supporting `NPM`, `PyPI`, and `rubygems` next. ### Arguments Specify these eg. `FOO=bar docker compose up`: - `ENABLE_SCHEDULER`: When true, the pipeline runs on a schedule set by `FREQUENCY`. - `FREQUENCY`: Sets how often (in hours) the pipeline should run. - `TEST`: Useful for running in a test code insertions. - `FETCH`: Determines whether to fetch new data or use whatever was saved locally. - `NO_CACHE`: When true, deletes temporary files after processing. > [!NOTE] > The flag `NO_CACHE` does not mean that files will not get downloaded to your local > storage (specifically, the ./data directory). It only means that we'll > delete these temporary files from ./data once we're done processing them. > If `FETCH` is false, the pipeline looks for source data in the cache, so this > will fail if you run `NO_CACHE` first, and `FETCH` false second. These arguments are all configurable in the `docker-compose.yml` file. ### Docker Services Overview 1. `db`: [PostgreSQL] database for the reduced package data 2. `alembic`: handles migrations 3. `package_managers`: fetches and writes data for each package manager 4. `api`: a simple REST API for reading from the db 5. `ranker`: deduplicates and ranks the packages ### Hard Reset Stuff happens. Start over: `rm -rf ./data`: removes all the data the fetcher is putting. ## Goals Our goal is to build a data schema that looks like this: ![db/CHAI_ERD.png](db/CHAI_ERD.png) You can read more about specific data models in the dbs [readme](db/README.md) Our specific application extracts the dependency graph understand what are critical pieces of the open-source graph. We also built a simple example that displays [sbom-metadata](examples/sbom-meta) for your repository. There are many other potential use cases for this data: - License compatibility checker - Developer publications - Package popularity - Dependency analysis vulnerability tool (requires translating semver) > [!TIP] > Help us add the above to the examples folder. ## FAQs / Common Issues 1. The database url is `postgresql://postgres:s3cr3t@localhost:5435/chai`, and is used as `CHAI_DATABASE_URL` in the environment. `psql CHAI_DATABASE_URL` will connect you to the database. 2. If you're orchestrating via docker, swap `localhost` for `host.docker.internal` ## Managing Dependencies We use [`uv`](https://astral.sh/uv) to manage dependencies (and sometimes execution). All dependencies are listed in [`pyproject.toml`](./pyproject.toml), under the `dependency-groups` header. Each group helps us classify the service we're adding a dependency for. For example, if we're adding a new dependency for all the indexers: ```bash uv add --group indexer requests # use the --all-groups flag to sync your venv for all dependencies uv sync --all-groups uv pip compile --group indexers -o core/requirements.txt ``` The last step writes the updated dependencies to a requirements.txt file, which is crucial for the Docker containers executing the individual services to build correctly. Each indexer shares the same set of dependencies, and that requirement file is **generated by uv**, and maintained in [core/requirements.txt](core/requirements.txt) > [!IMPORTANT] > DO NOT UPDATE ANY `requirements.txt` FILES DIRECTLY > `uv` provides a way to generate that automatically, based on the pyproject.toml > > Have an idea on a better way to do this? Open to input... ## Deployment ```sh export CHAI_DATABASE_URL=postgresql://:@host.docker.internal:/chai export PGPASSWORD= docker compose up alembic ``` ## Tasks These are tasks that can be run using [xcfile.dev]. If you use `pkgx`, typing `dev` loads the environment. Alternatively, run them manually. ### reset ```sh rm -rf db/data data .venv ``` ### build ```sh docker compose build ``` ### start-all Requires: build ```sh docker compose up -d ``` ### stop ```sh docker compose down ``` ### logs ```sh docker compose logs ``` ### db-start Runs migrations and starts up the database ```sh docker compose build --no-cache db alembic docker compose up alembic -d ``` ### db-reset Requires: stop ```sh rm -rf db/data ``` ### db-generate-migration Inputs: MIGRATION_NAME Env: CHAI_DATABASE_URL=postgresql://postgres:s3cr3t@localhost:5435/chai ```sh cd alembic alembic revision --autogenerate -m "$MIGRATION_NAME" ``` ### db-upgrade Env: CHAI_DATABASE_URL=postgresql://postgres:s3cr3t@localhost:5435/chai ```sh cd alembic alembic upgrade head ``` ### db-downgrade Inputs: STEP Env: CHAI_DATABASE_URL=postgresql://postgres:s3cr3t@localhost:5435/chai ```sh cd alembic alembic downgrade -$STEP ``` ### db ```sh psql "postgresql://postgres:s3cr3t@localhost:5435/chai" ``` ### restart-api Refreshes table knowledge from the db. ```sh docker compose restart api ``` ### remove-orphans ```sh docker compose down --remove-orphans ``` ### start-service Inputs: SERVICE Env: CHAI_DATABASE_URL=postgresql://postgres:s3cr3t@host.docker.internal:5435/chai ```sh docker compose up $SERVICE --build ``` ### check Inputs: FOLDER Environment: FOLDER=. ```sh pkgx +python@3.13 ty check $FOLDER ``` [PostgreSQL]: https://www.postgresql.org [`pkgx`]: https://pkgx.sh ================================================ FILE: alembic/.pkgx.yaml ================================================ # this .pkgx.yaml file is only for alembic dependencies: postgresql.org: 16 alembic.sqlalchemy.org: 1 psycopg.org/psycopg2: 2 ================================================ FILE: alembic/Dockerfile ================================================ FROM ghcr.io/astral-sh/uv:python3.11-bookworm-slim RUN apt update && apt -y install postgresql RUN uv pip install alembic==1.13.2 psycopg2-binary==2.9.10 sqlalchemy==2.0.41 --system COPY . . WORKDIR /alembic RUN chmod +x /alembic/run_migrations.sh ENTRYPOINT ["/bin/sh", "/alembic/run_migrations.sh"] ================================================ FILE: alembic/README.md ================================================ # CHAI Data Migrations This directory contains the Alembic configuration and migration scripts for managing the database schema of the CHAI project. Alembic is used to handle database migrations, allowing for version control of our database schema. ### About Alembic Alembic is a database migration tool for SQLAlchemy. It allows us to: - Track changes to our database schema over time - Apply and revert these changes in a controlled manner - Generate migration scripts automatically based on model changes > [!NOTE] > It's important to note that while `alembic` serves our current needs, it may not be > our long-term solution. As the CHAI project evolves, we might explore other database > migration tools or strategies that better fit our growing requirements. We're open to > reassessing our approach to schema management as needed. ## Entrypoint The main entrypoint for running migrations is the [run migrations script](run_migrations.sh). This script orchestrates the initialization and migration process. ## Steps 1. [Initialize](init-script.sql) The initialization script creates the database `chai`, and loads it up with any extensions that we'd need, so we've got a clean slate for our db structures. 2. [Load](load-values.sql) The load script prepopulates some of the tables, with `enum`-like values - specifically for: - `url_types`: defines different types of URLs (e.g., source, homepage, documentation) - `depends_on_types`: defines different types of dependencies (e.g., runtime, development) - `sources` and `package_managers`: defines different package managers (e.g., npm, pypi) 3. Run Alembic Migrations After initialization and loading initial data, the script runs Alembic migrations to apply any pending database schema changes. ## Contributing To contribute to the database schema: 1. Make a change in the [models](../core/models/__init__.py) file 2. Generate a new migration script: `alembic revision --autogenerate "Description"` 3. Review the generated migration script in the [versions](versions/) directory. The auto-generation is powerful but not perfect, please review the script carefully. 4. Test the migration by running `alembic upgrade head`. ================================================ FILE: alembic/alembic.ini ================================================ [alembic] script_location = . file_template = %%(year)d%%(month).2d%%(day).2d_%%(hour).2d%%(minute).2d-%%(slug)s prepend_sys_path = .. version_path_separator = os # URL sqlalchemy.url = ${env:CHAI_DATABASE_URL} [post_write_hooks] # lint with attempts to fix using "ruff" - use the exec runner, execute a binary # TODO: this doesn't work rn # hooks = ruff # ruff.type = exec # ruff.executable = %(here)s/.venv/bin/ruff # ruff.options = --fix REVISION_SCRIPT_FILENAME # Logging configuration [loggers] keys = root,sqlalchemy,alembic [handlers] keys = console [formatters] keys = generic [logger_root] level = WARN handlers = console qualname = [logger_sqlalchemy] level = WARN handlers = qualname = sqlalchemy.engine [logger_alembic] level = INFO handlers = qualname = alembic [handler_console] class = StreamHandler args = (sys.stderr,) level = NOTSET formatter = generic [formatter_generic] format = %(levelname)-5.5s [%(name)s] %(message)s datefmt = %H:%M:%S ================================================ FILE: alembic/env.py ================================================ import os from logging.config import fileConfig from sqlalchemy import engine_from_config, pool from alembic import context from core.models import Base # this is the Alembic Config object, which provides # access to the values within the .ini file in use. config = context.config # interpret the config file for Python logging. if config.config_file_name is not None: fileConfig(config.config_file_name) # metadata for all models target_metadata = Base.metadata # get database url database_url = os.getenv("CHAI_DATABASE_URL") if database_url: config.set_main_option("sqlalchemy.url", database_url) def run_migrations_offline() -> None: """Run migrations in 'offline' mode. This configures the context with just a URL and not an Engine, though an Engine is acceptable here as well. By skipping the Engine creation we don't even need a DBAPI to be available. Calls to context.execute() here emit the given string to the script output. """ url = config.get_main_option("sqlalchemy.url") context.configure( url=url, target_metadata=target_metadata, literal_binds=True, dialect_opts={"paramstyle": "named"}, ) with context.begin_transaction(): context.run_migrations() def run_migrations_online() -> None: """Run migrations in 'online' mode. In this scenario we need to create an Engine and associate a connection with the context. """ connectable = engine_from_config( config.get_section(config.config_ini_section, {}), prefix="sqlalchemy.", poolclass=pool.NullPool, ) with connectable.connect() as connection: context.configure(connection=connection, target_metadata=target_metadata) with context.begin_transaction(): context.run_migrations() if context.is_offline_mode(): run_migrations_offline() else: run_migrations_online() ================================================ FILE: alembic/init-script.sql ================================================ CREATE DATABASE chai; \c chai CREATE EXTENSION IF NOT EXISTS "pgcrypto"; CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; CREATE EXTENSION IF NOT EXISTS pg_trgm; ================================================ FILE: alembic/load-values.sql ================================================ -- url types INSERT INTO "url_types" ("name") VALUES ('source'), ('homepage'), ('documentation'), ('repository') ON CONFLICT (name) DO NOTHING; -- dependency types INSERT INTO "depends_on_types" ("name") VALUES ('build'), ('development'), ('runtime'), ('test'), ('optional'), ('recommended'), ('uses_from_macos') ON CONFLICT (name) DO NOTHING; -- sources INSERT INTO "sources" ("type") VALUES ('crates'), ('npm'), ('pypi'), ('rubygems'), ('github'), ('homebrew'), ('debian'), ('pkgx') ON CONFLICT (type) DO NOTHING; INSERT INTO "package_managers" ("source_id") SELECT id FROM "sources" WHERE "type" IN ('crates', 'npm', 'pypi', 'rubygems', 'github', 'homebrew', 'debian', 'pkgx') ON CONFLICT (source_id) DO NOTHING; ================================================ FILE: alembic/run_migrations.sh ================================================ #!/bin/bash set -uo pipefail # This script sets up the database, runs migrations, and loads initial values # Check if the 'chai' database exists, create it if it doesn't if psql "$CHAI_DATABASE_ADMIN_URL" -tAc "SELECT 1 FROM pg_database WHERE datname='chai'" | grep -q 1 then echo "Database 'chai' already exists" else echo "Database 'chai' does not exist, creating..." psql "$CHAI_DATABASE_ADMIN_URL" -f init-script.sql -a fi # Run migrations and load data (uses 'chai' database) echo "Current database version: $(alembic current)" alembic upgrade head || { echo "Migration failed"; exit 1; } echo "Loading initial values into the database..." psql "$CHAI_DATABASE_URL" -f load-values.sql -a echo "Database setup and initialization complete" ================================================ FILE: alembic/script.py.mako ================================================ """${message} Revision ID: ${up_revision} Revises: ${down_revision | comma,n} Create Date: ${create_date} """ from typing import Sequence, Union from alembic import op import sqlalchemy as sa ${imports if imports else ""} # revision identifiers, used by Alembic. revision: str = ${repr(up_revision)} down_revision: Union[str, None] = ${repr(down_revision)} branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)} depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)} def upgrade() -> None: ${upgrades if upgrades else "pass"} def downgrade() -> None: ${downgrades if downgrades else "pass"} ================================================ FILE: alembic/versions/20241028_1217-base_migration.py ================================================ """base migration Revision ID: 238d591d5310 Revises: Create Date: 2024-10-28 12:17:43.762965 """ from collections.abc import Sequence import sqlalchemy as sa from alembic import op # revision identifiers, used by Alembic. revision: str = "238d591d5310" down_revision: str | None = None branch_labels: str | Sequence[str] | None = None depends_on: str | Sequence[str] | None = None def upgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### op.create_table( "depends_on_types", sa.Column( "id", sa.UUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False, ), sa.Column("name", sa.String(), nullable=False), sa.Column( "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False ), sa.Column( "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False ), sa.PrimaryKeyConstraint("id", name=op.f("pk_depends_on_types")), ) op.create_index( op.f("ix_depends_on_types_name"), "depends_on_types", ["name"], unique=True ) op.create_table( "licenses", sa.Column( "id", sa.UUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False, ), sa.Column("name", sa.String(), nullable=False), sa.Column( "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False ), sa.Column( "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False ), sa.PrimaryKeyConstraint("id", name=op.f("pk_licenses")), ) op.create_index(op.f("ix_licenses_name"), "licenses", ["name"], unique=True) op.create_table( "sources", sa.Column( "id", sa.UUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False, ), sa.Column("type", sa.String(), nullable=False), sa.Column( "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False ), sa.Column( "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False ), sa.PrimaryKeyConstraint("id", name=op.f("pk_sources")), sa.UniqueConstraint("type", name=op.f("uq_sources_type")), ) op.create_table( "url_types", sa.Column( "id", sa.UUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False, ), sa.Column("name", sa.String(), nullable=False), sa.Column( "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False ), sa.Column( "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False ), sa.PrimaryKeyConstraint("id", name=op.f("pk_url_types")), sa.UniqueConstraint("name", name=op.f("uq_url_types_name")), ) op.create_table( "package_managers", sa.Column( "id", sa.UUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False, ), sa.Column("source_id", sa.UUID(), nullable=False), sa.Column( "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False ), sa.Column( "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False ), sa.ForeignKeyConstraint( ["source_id"], ["sources.id"], name=op.f("fk_package_managers_source_id_sources"), ), sa.PrimaryKeyConstraint("id", name=op.f("pk_package_managers")), sa.UniqueConstraint("source_id", name=op.f("uq_package_managers_source_id")), ) op.create_table( "urls", sa.Column( "id", sa.UUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False, ), sa.Column("url", sa.String(), nullable=False), sa.Column("url_type_id", sa.UUID(), nullable=False), sa.Column( "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False ), sa.Column( "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False ), sa.ForeignKeyConstraint( ["url_type_id"], ["url_types.id"], name=op.f("fk_urls_url_type_id_url_types"), ), sa.PrimaryKeyConstraint("id", name=op.f("pk_urls")), sa.UniqueConstraint("url_type_id", "url", name="uq_url_type_url"), ) op.create_index(op.f("ix_urls_url"), "urls", ["url"], unique=False) op.create_index(op.f("ix_urls_url_type_id"), "urls", ["url_type_id"], unique=False) op.create_table( "users", sa.Column( "id", sa.UUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False, ), sa.Column("username", sa.String(), nullable=False), sa.Column("source_id", sa.UUID(), nullable=False), sa.Column("import_id", sa.String(), nullable=False), sa.Column( "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False ), sa.Column( "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False ), sa.ForeignKeyConstraint( ["source_id"], ["sources.id"], name=op.f("fk_users_source_id_sources") ), sa.PrimaryKeyConstraint("id", name=op.f("pk_users")), sa.UniqueConstraint("source_id", "username", name="uq_source_username"), ) op.create_index(op.f("ix_users_import_id"), "users", ["import_id"], unique=False) op.create_index(op.f("ix_users_source_id"), "users", ["source_id"], unique=False) op.create_index(op.f("ix_users_username"), "users", ["username"], unique=False) op.create_table( "load_history", sa.Column( "id", sa.UUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False, ), sa.Column("package_manager_id", sa.UUID(), nullable=False), sa.Column( "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False ), sa.Column( "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False ), sa.ForeignKeyConstraint( ["package_manager_id"], ["package_managers.id"], name=op.f("fk_load_history_package_manager_id_package_managers"), ), sa.PrimaryKeyConstraint("id", name=op.f("pk_load_history")), ) op.create_table( "packages", sa.Column( "id", sa.UUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False, ), sa.Column("derived_id", sa.String(), nullable=False), sa.Column("name", sa.String(), nullable=False), sa.Column("package_manager_id", sa.UUID(), nullable=False), sa.Column("import_id", sa.String(), nullable=False), sa.Column("readme", sa.String(), nullable=True), sa.Column( "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False ), sa.Column( "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False ), sa.ForeignKeyConstraint( ["package_manager_id"], ["package_managers.id"], name=op.f("fk_packages_package_manager_id_package_managers"), ), sa.PrimaryKeyConstraint("id", name=op.f("pk_packages")), sa.UniqueConstraint("derived_id", name=op.f("uq_packages_derived_id")), sa.UniqueConstraint( "package_manager_id", "import_id", name="uq_package_manager_import_id" ), ) op.create_index( op.f("ix_packages_import_id"), "packages", ["import_id"], unique=False ) op.create_index(op.f("ix_packages_name"), "packages", ["name"], unique=False) op.create_index( op.f("ix_packages_package_manager_id"), "packages", ["package_manager_id"], unique=False, ) op.create_table( "package_urls", sa.Column( "id", sa.UUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False, ), sa.Column("package_id", sa.UUID(), nullable=False), sa.Column("url_id", sa.UUID(), nullable=False), sa.Column( "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False ), sa.Column( "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False ), sa.ForeignKeyConstraint( ["package_id"], ["packages.id"], name=op.f("fk_package_urls_package_id_packages"), ), sa.ForeignKeyConstraint( ["url_id"], ["urls.id"], name=op.f("fk_package_urls_url_id_urls") ), sa.PrimaryKeyConstraint("id", name=op.f("pk_package_urls")), sa.UniqueConstraint("package_id", "url_id", name="uq_package_url"), ) op.create_index( op.f("ix_package_urls_package_id"), "package_urls", ["package_id"], unique=False ) op.create_index( op.f("ix_package_urls_url_id"), "package_urls", ["url_id"], unique=False ) op.create_table( "user_packages", sa.Column( "id", sa.UUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False, ), sa.Column("user_id", sa.UUID(), nullable=False), sa.Column("package_id", sa.UUID(), nullable=False), sa.Column( "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False ), sa.Column( "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False ), sa.ForeignKeyConstraint( ["package_id"], ["packages.id"], name=op.f("fk_user_packages_package_id_packages"), ), sa.ForeignKeyConstraint( ["user_id"], ["users.id"], name=op.f("fk_user_packages_user_id_users") ), sa.PrimaryKeyConstraint("id", name=op.f("pk_user_packages")), sa.UniqueConstraint("user_id", "package_id", name="uq_user_package"), ) op.create_index( op.f("ix_user_packages_package_id"), "user_packages", ["package_id"], unique=False, ) op.create_index( op.f("ix_user_packages_user_id"), "user_packages", ["user_id"], unique=False ) op.create_table( "versions", sa.Column( "id", sa.UUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False, ), sa.Column("package_id", sa.UUID(), nullable=False), sa.Column("version", sa.String(), nullable=False), sa.Column("import_id", sa.String(), nullable=False), sa.Column("size", sa.Integer(), nullable=True), sa.Column("published_at", sa.DateTime(), nullable=True), sa.Column("license_id", sa.UUID(), nullable=True), sa.Column("downloads", sa.Integer(), nullable=True), sa.Column("checksum", sa.String(), nullable=True), sa.Column( "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False ), sa.Column( "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False ), sa.ForeignKeyConstraint( ["license_id"], ["licenses.id"], name=op.f("fk_versions_license_id_licenses"), ), sa.ForeignKeyConstraint( ["package_id"], ["packages.id"], name=op.f("fk_versions_package_id_packages"), ), sa.PrimaryKeyConstraint("id", name=op.f("pk_versions")), sa.UniqueConstraint("package_id", "version", name="uq_package_version"), ) op.create_index( op.f("ix_versions_downloads"), "versions", ["downloads"], unique=False ) op.create_index( op.f("ix_versions_import_id"), "versions", ["import_id"], unique=False ) op.create_index( op.f("ix_versions_license_id"), "versions", ["license_id"], unique=False ) op.create_index( op.f("ix_versions_package_id"), "versions", ["package_id"], unique=False ) op.create_index( op.f("ix_versions_published_at"), "versions", ["published_at"], unique=False ) op.create_index(op.f("ix_versions_size"), "versions", ["size"], unique=False) op.create_index(op.f("ix_versions_version"), "versions", ["version"], unique=False) op.create_table( "dependencies", sa.Column( "id", sa.UUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False, ), sa.Column("version_id", sa.UUID(), nullable=False), sa.Column("dependency_id", sa.UUID(), nullable=False), sa.Column("dependency_type_id", sa.UUID(), nullable=True), sa.Column("semver_range", sa.String(), nullable=True), sa.Column( "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False ), sa.Column( "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False ), sa.ForeignKeyConstraint( ["dependency_id"], ["packages.id"], name=op.f("fk_dependencies_dependency_id_packages"), ), sa.ForeignKeyConstraint( ["dependency_type_id"], ["depends_on_types.id"], name=op.f("fk_dependencies_dependency_type_id_depends_on_types"), ), sa.ForeignKeyConstraint( ["version_id"], ["versions.id"], name=op.f("fk_dependencies_version_id_versions"), ), sa.PrimaryKeyConstraint("id", name=op.f("pk_dependencies")), sa.UniqueConstraint( "version_id", "dependency_id", "dependency_type_id", name="uq_version_dependency_type", ), ) op.create_index( op.f("ix_dependencies_dependency_id"), "dependencies", ["dependency_id"], unique=False, ) op.create_index( op.f("ix_dependencies_dependency_type_id"), "dependencies", ["dependency_type_id"], unique=False, ) op.create_index( op.f("ix_dependencies_version_id"), "dependencies", ["version_id"], unique=False ) op.create_table( "user_versions", sa.Column( "id", sa.UUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False, ), sa.Column("user_id", sa.UUID(), nullable=False), sa.Column("version_id", sa.UUID(), nullable=False), sa.Column( "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False ), sa.Column( "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False ), sa.ForeignKeyConstraint( ["user_id"], ["users.id"], name=op.f("fk_user_versions_user_id_users") ), sa.ForeignKeyConstraint( ["version_id"], ["versions.id"], name=op.f("fk_user_versions_version_id_versions"), ), sa.PrimaryKeyConstraint("id", name=op.f("pk_user_versions")), sa.UniqueConstraint("user_id", "version_id", name="uq_user_version"), ) op.create_index( op.f("ix_user_versions_user_id"), "user_versions", ["user_id"], unique=False ) op.create_index( op.f("ix_user_versions_version_id"), "user_versions", ["version_id"], unique=False, ) # ### end Alembic commands ### def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### op.drop_index(op.f("ix_user_versions_version_id"), table_name="user_versions") op.drop_index(op.f("ix_user_versions_user_id"), table_name="user_versions") op.drop_table("user_versions") op.drop_index(op.f("ix_dependencies_version_id"), table_name="dependencies") op.drop_index(op.f("ix_dependencies_dependency_type_id"), table_name="dependencies") op.drop_index(op.f("ix_dependencies_dependency_id"), table_name="dependencies") op.drop_table("dependencies") op.drop_index(op.f("ix_versions_version"), table_name="versions") op.drop_index(op.f("ix_versions_size"), table_name="versions") op.drop_index(op.f("ix_versions_published_at"), table_name="versions") op.drop_index(op.f("ix_versions_package_id"), table_name="versions") op.drop_index(op.f("ix_versions_license_id"), table_name="versions") op.drop_index(op.f("ix_versions_import_id"), table_name="versions") op.drop_index(op.f("ix_versions_downloads"), table_name="versions") op.drop_table("versions") op.drop_index(op.f("ix_user_packages_user_id"), table_name="user_packages") op.drop_index(op.f("ix_user_packages_package_id"), table_name="user_packages") op.drop_table("user_packages") op.drop_index(op.f("ix_package_urls_url_id"), table_name="package_urls") op.drop_index(op.f("ix_package_urls_package_id"), table_name="package_urls") op.drop_table("package_urls") op.drop_index(op.f("ix_packages_package_manager_id"), table_name="packages") op.drop_index(op.f("ix_packages_name"), table_name="packages") op.drop_index(op.f("ix_packages_import_id"), table_name="packages") op.drop_table("packages") op.drop_table("load_history") op.drop_index(op.f("ix_users_username"), table_name="users") op.drop_index(op.f("ix_users_source_id"), table_name="users") op.drop_index(op.f("ix_users_import_id"), table_name="users") op.drop_table("users") op.drop_index(op.f("ix_urls_url_type_id"), table_name="urls") op.drop_index(op.f("ix_urls_url"), table_name="urls") op.drop_table("urls") op.drop_table("package_managers") op.drop_table("url_types") op.drop_table("sources") op.drop_index(op.f("ix_licenses_name"), table_name="licenses") op.drop_table("licenses") op.drop_index(op.f("ix_depends_on_types_name"), table_name="depends_on_types") op.drop_table("depends_on_types") # ### end Alembic commands ### ================================================ FILE: alembic/versions/20250312_0045-add_legacy_dependency_table.py ================================================ """add-legacy-dependency-table Revision ID: 89af630dc946 Revises: 238d591d5310 Create Date: 2025-03-12 00:45:35.727521 """ from collections.abc import Sequence import sqlalchemy as sa from alembic import op # revision identifiers, used by Alembic. revision: str = "89af630dc946" down_revision: str | None = "238d591d5310" branch_labels: str | Sequence[str] | None = None depends_on: str | Sequence[str] | None = None def upgrade() -> None: op.create_table( "legacy_dependencies", sa.Column("id", sa.Integer(), nullable=False), sa.Column("package_id", sa.UUID(), nullable=False), sa.Column("dependency_id", sa.UUID(), nullable=False), sa.Column("dependency_type_id", sa.UUID(), nullable=False), sa.Column("semver_range", sa.String(), nullable=True), sa.Column( "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False ), sa.Column( "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False ), sa.ForeignKeyConstraint( ["dependency_id"], ["packages.id"], name=op.f("fk_legacy_dependencies_dependency_id_packages"), ), sa.ForeignKeyConstraint( ["dependency_type_id"], ["depends_on_types.id"], name=op.f("fk_legacy_dependencies_dependency_type_id_depends_on_types"), ), sa.ForeignKeyConstraint( ["package_id"], ["packages.id"], name=op.f("fk_legacy_dependencies_package_id_packages"), ), sa.PrimaryKeyConstraint("id", name=op.f("pk_legacy_dependencies")), sa.UniqueConstraint( "package_id", "dependency_id", name="uq_package_dependency" ), ) op.create_index( op.f("ix_legacy_dependencies_dependency_id"), "legacy_dependencies", ["dependency_id"], unique=False, ) op.create_index( op.f("ix_legacy_dependencies_dependency_type_id"), "legacy_dependencies", ["dependency_type_id"], unique=False, ) op.create_index( op.f("ix_legacy_dependencies_package_id"), "legacy_dependencies", ["package_id"], unique=False, ) def downgrade() -> None: op.drop_index( op.f("ix_legacy_dependencies_package_id"), table_name="legacy_dependencies" ) op.drop_index( op.f("ix_legacy_dependencies_dependency_type_id"), table_name="legacy_dependencies", ) op.drop_index( op.f("ix_legacy_dependencies_dependency_id"), table_name="legacy_dependencies" ) op.drop_table("legacy_dependencies") ================================================ FILE: alembic/versions/20250312_2244-canons.py ================================================ """canons Revision ID: e7632ae1aff7 Revises: 89af630dc946 Create Date: 2025-03-12 22:44:45.272179 """ from collections.abc import Sequence import sqlalchemy as sa from alembic import op # revision identifiers, used by Alembic. revision: str = "e7632ae1aff7" down_revision: str | None = "89af630dc946" branch_labels: str | Sequence[str] | None = None depends_on: str | Sequence[str] | None = None def upgrade() -> None: op.create_table( "canons", sa.Column("id", sa.UUID(), nullable=False), sa.Column("url", sa.String(), nullable=False), sa.Column("name", sa.String(), nullable=False), sa.Column( "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False ), sa.Column( "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False ), sa.PrimaryKeyConstraint("id", name=op.f("pk_canons")), ) op.create_index(op.f("ix_canons_name"), "canons", ["name"], unique=False) op.create_index(op.f("ix_canons_url"), "canons", ["url"], unique=True) op.create_table( "canon_packages", sa.Column("id", sa.UUID(), nullable=False), sa.Column("canon_id", sa.UUID(), nullable=False), sa.Column("package_id", sa.UUID(), nullable=False), sa.Column( "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False ), sa.Column( "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False ), sa.ForeignKeyConstraint( ["canon_id"], ["canons.id"], name=op.f("fk_canon_packages_canon_id_canons") ), sa.ForeignKeyConstraint( ["package_id"], ["packages.id"], name=op.f("fk_canon_packages_package_id_packages"), ), sa.PrimaryKeyConstraint("id", name=op.f("pk_canon_packages")), ) op.create_index( op.f("ix_canon_packages_canon_id"), "canon_packages", ["canon_id"], unique=False ) op.create_index( op.f("ix_canon_packages_package_id"), "canon_packages", ["package_id"], unique=False, ) def downgrade() -> None: op.drop_index(op.f("ix_canon_packages_package_id"), table_name="canon_packages") op.drop_index(op.f("ix_canon_packages_canon_id"), table_name="canon_packages") op.drop_table("canon_packages") op.drop_index(op.f("ix_canons_url"), table_name="canons") op.drop_index(op.f("ix_canons_name"), table_name="canons") op.drop_table("canons") ================================================ FILE: alembic/versions/20250416_0223-add_ranks.py ================================================ """add-ranks Revision ID: 26e124131bf8 Revises: e7632ae1aff7 Create Date: 2025-04-16 02:23:33.665773 """ from collections.abc import Sequence import sqlalchemy as sa from alembic import op # revision identifiers, used by Alembic. revision: str = "26e124131bf8" down_revision: str | None = "e7632ae1aff7" branch_labels: str | Sequence[str] | None = None depends_on: str | Sequence[str] | None = None def upgrade() -> None: op.create_table( "tea_rank_runs", sa.Column( "id", sa.UUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False, ), sa.Column("run", sa.Integer(), nullable=False), sa.Column("split_ratio", sa.String(), nullable=False), sa.Column( "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False ), sa.PrimaryKeyConstraint("id", name=op.f("pk_tea_rank_runs")), ) op.create_table( "tea_ranks", sa.Column( "id", sa.UUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False, ), sa.Column("tea_rank_run", sa.Integer(), nullable=False), sa.Column("canon_id", sa.UUID(), nullable=False), sa.Column("rank", sa.String(), nullable=False), sa.Column( "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False ), sa.ForeignKeyConstraint( ["canon_id"], ["canons.id"], name=op.f("fk_tea_ranks_canon_id_canons") ), sa.PrimaryKeyConstraint("id", name=op.f("pk_tea_ranks")), ) op.create_index( op.f("ix_tea_ranks_canon_id"), "tea_ranks", ["canon_id"], unique=False ) op.create_index( op.f("ix_tea_ranks_tea_rank_run"), "tea_ranks", ["tea_rank_run"], unique=False ) def downgrade() -> None: op.drop_index(op.f("ix_tea_ranks_tea_rank_run"), table_name="tea_ranks") op.drop_index(op.f("ix_tea_ranks_canon_id"), table_name="tea_ranks") op.drop_table("tea_ranks") op.drop_table("tea_rank_runs") ================================================ FILE: alembic/versions/20250422_0940-add_unique_package_to_canon_packages.py ================================================ """add-unique-package-to-canon-packages Revision ID: a41236bd2340 Revises: 26e124131bf8 Create Date: 2025-04-22 09:40:22.901637 """ from collections.abc import Sequence from alembic import op # revision identifiers, used by Alembic. revision: str = "a41236bd2340" down_revision: str | None = "26e124131bf8" branch_labels: str | Sequence[str] | None = None depends_on: str | Sequence[str] | None = None def upgrade() -> None: op.drop_index("ix_canon_packages_package_id", table_name="canon_packages") op.create_index( op.f("ix_canon_packages_package_id"), "canon_packages", ["package_id"], unique=True, ) def downgrade() -> None: op.drop_index(op.f("ix_canon_packages_package_id"), table_name="canon_packages") op.create_index( "ix_canon_packages_package_id", "canon_packages", ["package_id"], unique=False ) ================================================ FILE: alembic/versions/20250508_1752-add_trgm_indexes.py ================================================ """add_trgm_indexes Revision ID: 7392d4d74ce2 Revises: a41236bd2340 Create Date: 2025-05-08 17:52:40.417822 """ from collections.abc import Sequence from alembic import op # revision identifiers, used by Alembic. revision: str = "7392d4d74ce2" down_revision: str | None = "a41236bd2340" branch_labels: str | Sequence[str] | None = None depends_on: str | Sequence[str] | None = None def upgrade() -> None: # Drop the existing indexes op.drop_index("ix_canons_name", table_name="canons") op.drop_index("ix_urls_url", table_name="urls") # Create trigram indexes # NOTE: this was added manually to this script (not auto-generated) op.create_index( "ix_urls_url_trgm", "urls", ["url"], unique=False, postgresql_using="gin", postgresql_ops={"url": "gin_trgm_ops"}, ) op.create_index( "ix_canons_name_trgm", "canons", ["name"], unique=False, postgresql_using="gin", postgresql_ops={"name": "gin_trgm_ops"}, ) def downgrade() -> None: # Drop the trigram indexes # NOTE: this was added manually to this script (not auto-generated) op.drop_index("ix_urls_url_trgm", table_name="urls") op.drop_index("ix_canons_name_trgm", table_name="canons") # Recreate the existing indexes (auto-generated) op.create_index("ix_urls_url", "urls", ["url"], unique=False) op.create_index("ix_canons_name", "canons", ["name"], unique=False) ================================================ FILE: alembic/versions/20250529_2341-rename_canons_table_and_recreate.py ================================================ """rename_canons_table_and_recreate Revision ID: 542d79f30fc9 Revises: 7392d4d74ce2 Create Date: 2025-05-29 23:41:38.465987 """ from collections.abc import Sequence import sqlalchemy as sa from sqlalchemy.dialects.postgresql import UUID from alembic import op # revision identifiers, used by Alembic. revision: str = "542d79f30fc9" down_revision: str | None = "7392d4d74ce2" branch_labels: str | Sequence[str] | None = None depends_on: str | Sequence[str] | None = None def upgrade() -> None: """ Rename existing canons table and create new one with proper url_id FK """ # Step 1: Rename existing table to preserve data as backup op.rename_table("canons", "canons_old") # Step 2: Drop FK constraints that pointed to old table (from other tables) op.drop_constraint( "fk_canon_packages_canon_id_canons", "canon_packages", type_="foreignkey" ) op.drop_constraint("fk_tea_ranks_canon_id_canons", "tea_ranks", type_="foreignkey") # Step 3: Drop indexes and constraints from old table to avoid naming conflicts op.drop_constraint("pk_canons", "canons_old", type_="primary") op.drop_index("ix_canons_url", table_name="canons_old") op.drop_index("ix_canons_name_trgm", table_name="canons_old") # Step 4: Create new canons table with proper schema op.create_table( "canons", sa.Column( "id", UUID(as_uuid=True), primary_key=True, server_default=sa.func.uuid_generate_v4(), ), sa.Column( "url_id", UUID(as_uuid=True), nullable=False, index=True, unique=True ), sa.Column("name", sa.String(), nullable=False), sa.Column( "created_at", sa.DateTime(), nullable=False, server_default=sa.func.now() ), sa.Column( "updated_at", sa.DateTime(), nullable=False, server_default=sa.func.now() ), # Constraints sa.ForeignKeyConstraint(["url_id"], ["urls.id"], name="fk_canons_url_id_urls"), sa.UniqueConstraint("url_id", name="uq_canons_url_id"), ) # Step 5: Create indexes op.create_index( "ix_canons_name_trgm", "canons", ["name"], postgresql_using="gin", postgresql_ops={"name": "gin_trgm_ops"}, ) # Note: FK constraints to this table will be recreated in a separate migration # after data population, since this table starts empty def downgrade() -> None: """ Restore original canons table with all its original indexes and constraints """ # FK constraints were dropped in upgrade and not recreated, so no need to drop them here # Drop new table op.drop_table("canons") # Restore old table op.rename_table("canons_old", "canons") # Recreate all original constraints and indexes on restored table op.create_primary_key("pk_canons", "canons", ["id"]) op.create_index("ix_canons_url", "canons", ["url"], unique=True) op.create_index( "ix_canons_name_trgm", "canons", ["name"], postgresql_using="gin", postgresql_ops={"name": "gin_trgm_ops"}, ) # Recreate FK constraints from other tables pointing to canons op.create_foreign_key( "fk_canon_packages_canon_id_canons", "canon_packages", "canons", ["canon_id"], ["id"], ) op.create_foreign_key( "fk_tea_ranks_canon_id_canons", "tea_ranks", "canons", ["canon_id"], ["id"] ) ================================================ FILE: alembic/versions/20250529_2345-recreate_canon_foreign_keys.py ================================================ """recreate_canon_foreign_keys Revision ID: 3de32bb99a71 Revises: 542d79f30fc9 Create Date: 2025-05-29 23:45:12.372951 """ from collections.abc import Sequence from alembic import op # revision identifiers, used by Alembic. revision: str = "3de32bb99a71" down_revision: str | None = "542d79f30fc9" branch_labels: str | Sequence[str] | None = None depends_on: str | Sequence[str] | None = None def upgrade() -> None: """ Recreate FK constraints pointing to canons table after data population Run this AFTER your canonicalization script has populated the canons table """ # First, clean up any orphaned records in referencing tables # (Optional: uncomment if you want to auto-clean orphaned data) # op.execute(""" # DELETE FROM canon_packages # WHERE canon_id NOT IN (SELECT id FROM canons) # """) # op.execute(""" # DELETE FROM tea_ranks # WHERE canon_id NOT IN (SELECT id FROM canons) # """) # Recreate FK constraints op.create_foreign_key( "fk_canon_packages_canon_id_canons", "canon_packages", "canons", ["canon_id"], ["id"], ) op.create_foreign_key( "fk_tea_ranks_canon_id_canons", "tea_ranks", "canons", ["canon_id"], ["id"] ) def downgrade() -> None: """ Drop FK constraints pointing to canons table """ op.drop_constraint( "fk_canon_packages_canon_id_canons", "canon_packages", type_="foreignkey" ) op.drop_constraint("fk_tea_ranks_canon_id_canons", "tea_ranks", type_="foreignkey") ================================================ FILE: api/.dockerignore ================================================ /target .git .gitignore README.md ================================================ FILE: api/.gitignore ================================================ /target **/*.rs.bk Cargo.lock .env ================================================ FILE: api/Cargo.toml ================================================ [package] name = "chai-api" version = "1.3.0" edition = "2021" authors = ["Jacob Heider "] description = "A simple REST API for the CHAI database" readme = "README.md" license = "MIT" repository = "https://github.com/teaxyz/chai-oss" [dependencies] uuid = { version = "1.11.0", features = ["serde", "v4"] } actix-web = "4.3" dotenv = "0.15" tokio = { version = "1", features = ["full"] } log = "0.4" env_logger = "0.10" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" chrono = { version = "0.4", features = ["serde"] } tokio-postgres = { version = "0.7", features = [ "with-serde_json-1", "with-chrono-0_4", "with-uuid-1", ] } deadpool-postgres = "0.10.0" url = "2.5.2" dashmap = "6.1.0" ================================================ FILE: api/Dockerfile ================================================ FROM --platform=linux/amd64 lukemathwalker/cargo-chef:latest-rust-1.82.0 as chef WORKDIR /app FROM chef as planner COPY . . RUN cargo chef prepare --recipe-path recipe.json FROM chef as builder COPY --from=planner /app/recipe.json recipe.json RUN cargo chef cook --release --recipe-path recipe.json COPY . . RUN cargo build --release FROM debian:bookworm-slim as runtime WORKDIR /app RUN apt-get update && apt-get install -y curl openssl ca-certificates && rm -rf /var/lib/apt/lists/* COPY --from=builder /app/target/release/chai-api /usr/local/bin ENV DATABASE_URL=postgresql://postgres:s3cr3t@db:5432/chai EXPOSE 8080 CMD ["chai-api"] ================================================ FILE: api/README.md ================================================ # CHAI API CHAI API is a REST API service for accessing the CHAI database, which contains package manager data. ## Features - List all tables in the database - Fetch paginated data from any table - Heartbeat endpoint for health checks - Search deduplicated packages by name ## Requirements - Rust 1.67 or later - PostgreSQL database ## API Endpoints ### Health Check ``` GET /heartbeat ``` Returns the health status of the API and database connection. **Response (Success)** ```txt OK - Database connection is healthy ``` **Response (Failure - Database query failed):** ```txt Database query failed ``` **Response (Failure - Database connection failed):** ```txt Failed to get database connection ``` ### List Tables ``` GET /tables ``` Returns a paginated list of all available tables in the database. **Query Parameters** - `page` (optional): Page number (default: 1) - `limit` (optional): Number of items per page (default: 200) **Response** ```json { "data": [ "legacy_dependencies", "versions", "canons_old", "tea_rank_runs", "canons", "licenses", "canon_packages", "users", "load_history", "tea_ranks", "alembic_version", "sources", "package_managers", "url_types", "urls", "packages", "package_urls", "user_packages", "dependencies", "depends_on_types", "user_versions", "canon_packages_old", "tea_rank_old" ], "limit": 200, "page": 1, "total_count": 23, "total_pages": 1 } ``` ### Get Table Data ``` GET /{table} ``` Returns paginated data from the specified table. **Path Parameters** - `table`: Name of the table to query (see available tables in List Tables response) **Query Parameters** - `page` (optional): Page number (default: 1) - `limit` (optional): Number of items per page (default: 200) **Response** ```json { "table": "packages", "total_count": 166459, "page": 1, "limit": 2, "total_pages": 83230, "columns": [ ... ], "data": [ { "created_at": "2024-12-27 08:04:03.991832", "derived_id": "...", "id": "...", "import_id": "...", "name": "...", "package_manager_id": "...", "readme": "...", "updated_at": "2024-12-27 08:04:03.991832" }, ... ] } ``` ### Get Table Row By ID ``` GET /{table}/{id} ``` Returns a specific row from the table by its UUID. **Path Parameters** - `table`: Name of the table to query - `id`: UUID of the row to fetch **Response** ```json { "created_at": "2024-12-27 08:04:03.991832", "derived_id": "...", "id": "...", "import_id": "...", "name": "...", "package_manager_id": "...", "readme": "...", "updated_at": "2024-12-27 08:04:03.991832" } ``` ### Get Project ``` GET /project/{id} ``` Returns detailed information about a specific canon by its canonical ID. **Path Parameters** - `id`: UUID of the project (canon) to fetch **Response** ```json { "projectId": "550e8400-e29b-41d4-a716-446655440000", "homepage": "https://example.com", "name": "example-project", "source": "https://github.com/example/project", "teaRank": "150", "teaRankCalculatedAt": "2024-12-27T08:04:03.991832", "packageManagers": ["homebrew", "crates"] } ``` **Response (Not Found)** ```json { "error": "No row found with id '550e8400-e29b-41d4-a716-446655440000' in table canons" } ``` ### Get Projects Batch ``` POST /project/batch ``` Returns detailed information about multiple projects by their canonical IDs. **Request Body** ```json { "projectIds": ["uuid1", "uuid2", "..."] } ``` **Parameters** - `projectIds`: Array of project UUIDs to include in the leaderboard (required, max 100) **Example** ``` POST /project/batch ``` **Example Request** ```bash curl -X POST http://localhost:8080/project/batch \ -H "Content-Type: application/json" \ -d '{ "projectIds": [ "550e8400-e29b-41d4-a716-446655440000", "6ba7b810-9dad-11d1-80b4-00c04fd430c8" ] }' ``` **Response** ```json [ { "projectId": "550e8400-e29b-41d4-a716-446655440000", "homepage": "https://example.com", "name": "example-project", "source": "https://github.com/example/project", "teaRank": "150", "teaRankCalculatedAt": "2024-12-27T08:04:03.991832", "packageManagers": ["homebrew", "crates"] }, { "projectId": "6ba7b810-9dad-11d1-80b4-00c04fd430c8", "homepage": "https://another-example.com", "name": "another-project", "source": "https://github.com/another/project", "teaRank": "75", "teaRankCalculatedAt": "2024-12-26T10:15:22.123456", "packageManagers": ["debian", "pkgx"] } ] ``` **Response (Invalid UUIDs)** ```json { "error": "Invalid UUID format in project IDs" } ``` ### Search Projects ``` GET /project/search/{name} ``` Searches for projects by name using case-insensitive partial matching. Results are ordered by name length and limited to 10 items. **Path Parameters** - `name`: Project name to search for (partial matches supported) **Example** ``` GET /project/search/python ``` **Response** ```json [ { "projectId": "550e8400-e29b-41d4-a716-446655440000", "homepage": "https://reactjs.org", "name": "react", "source": "https://github.com/facebook/react", "packageManagers": ["homebrew", "npm"] }, { "projectId": "6ba7b810-9dad-11d1-80b4-00c04fd430c8", "homepage": "https://reactrouter.com", "name": "react-router", "source": "https://github.com/remix-run/react-router", "packageManagers": ["npm"] } ] ``` **Response (Empty Search)** ```json { "error": "Search name cannot be empty" } ``` ### Leaderboard ``` POST /leaderboard ``` Returns detailed information about specified projects, ordered by tea rank in descending order. This endpoint allows filtering by project IDs and limiting the number of results. **Request Body** ```json { "projectIds": ["uuid1", "uuid2", "..."], "limit": 10 } ``` **Parameters** - `projectIds`: Array of project UUIDs to include in the leaderboard (required, max 100) - `limit`: Maximum number of results to return (required, 1-100) **Example Request** ```bash curl -X POST http://localhost:8080/leaderboard \ -H "Content-Type: application/json" \ -d '{ "projectIds": [ "1e233f1b-2b49-4ada-9953-1763785fba2c", "2c24aa45-4fe2-4f2b-ae58-09d4b9a4ad28" ], "limit": 2 }' ``` **Response** ```json [ { "projectId": "1e233f1b-2b49-4ada-9953-1763785fba2c", "homepage": "https://example.com", "name": "example-project", "source": "https://github.com/example/project", "teaRank": "150", "teaRankCalculatedAt": "2024-12-27T08:04:03.991832", "packageManagers": ["homebrew", "crates"] }, { "projectId": "2c24aa45-4fe2-4f2b-ae58-09d4b9a4ad28", "homepage": "https://another-example.com", "name": "another-project", "source": "https://github.com/another/project", "teaRank": "75", "teaRankCalculatedAt": "2024-12-26T10:15:22.123456", "packageManagers": ["debian", "pkgx"] } ] ``` **Response (Validation Errors)** ```json { "error": "At least one project ID is required" } ``` ```json { "error": "Too many project IDs (maximum 100 allowed)" } ``` ```json { "error": "Invalid limit 150: must be between 1 and 100" } ``` ## Available Tables The database contains the following tables: | Table Name | Description | | ---------------- | ------------------------------------------------ | | alembic_version | Store the current version of alembic | | dependencies | Package dependencies | | depends_on_types | Types of package dependencies | | licenses | Package licenses | | load_history | Load history | | package_managers | Package manager information | | package_urls | Relationship of packages to URLs | | packages | Package metadata | | sources | Package manager sources (homebrew, crates, etc.) | | url_types | Types of URLs (homepage, repository, etc.) | | urls | Actual URLs | | user_packages | User-package relationships | | user_versions | User-version relationships | | users | User (package owner) information | | versions | Package versions | By default, the API will be available at `http://localhost:8080`. ## Deployment The CHAI API is deployed using AWS services with the following stack: - **Amazon ECR (Elastic Container Registry)** - Container image storage - **Amazon ECS (Elastic Container Service)** - Container orchestration - **ECS Service** - Manages running tasks and load balancing - **ECS Task Definition** - Defines container configuration ### Prerequisites - AWS CLI configured with appropriate permissions - Docker installed locally - Access to the AWS account and ECR repository ### Building and Pushing Docker Image 1. **Get ECR login credentials:** ```bash aws ecr get-login-password --region | docker login --username AWS --password-stdin .dkr.ecr..amazonaws.com ``` 2. **Build the Docker image:** ```bash docker build -t chai-api . ``` 3. **Tag the image for ECR:** ```bash docker tag chai-api:latest .dkr.ecr..amazonaws.com/chai-api:latest ``` 4. **Push the image to ECR:** ```bash docker push .dkr.ecr..amazonaws.com/chai-api:latest ``` > **Note:** Replace `` and `` with your AWS account ID and region. You can find the exact commands in your ECR repository console under "View push commands". ### Updating Existing ECS Service If updating the ECS service, you first need to Build and Push the docker image. Then: ```bash aws ecs update-service --cluster chai- --service -chai-api --force-new-deployment ``` ### Environment Variables Ensure the following environment variables are configured in your task definition: - `DATABASE_URL`: PostgreSQL connection string - `HOST`: Host to bind to (default: "0.0.0.0") - `PORT`: Port to listen on (default: "8080") ### Useful AWS Documentation - [Amazon ECR User Guide](https://docs.aws.amazon.com/ecr/) - [Amazon ECS Developer Guide](https://docs.aws.amazon.com/ecs/) - [ECS Task Definitions](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task_definitions.html) - [ECS Services](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs_services.html) - [AWS CLI ECS Commands](https://docs.aws.amazon.com/cli/latest/reference/ecs/) ## Tasks ### Format ```bash cargo fmt --all -- ``` ### Build ```bash cargo build --release ``` ### Validate ```bash cargo clippy --all-targets --all-features -- -D warnings ``` ### Run Env: DATABASE_URL=postgresql://postgres:s3cr3t@localhost:5435/chai ```bash target/release/chai-api ``` ================================================ FILE: api/src/app_state.rs ================================================ use dashmap::DashMap; use deadpool_postgres::Pool; use serde_json::Value; use std::sync::Arc; use std::time::{Duration, Instant}; use uuid::Uuid; const TTL: Duration = Duration::from_secs(3600); // 1 hour #[derive(Clone)] pub struct ProjectCacheEntry { pub data: Arc, pub created_at: Instant, } impl ProjectCacheEntry { pub fn new(data: Value) -> Self { Self { data: Arc::new(data), created_at: Instant::now(), } } pub fn is_expired(&self) -> bool { self.created_at.elapsed() > TTL } } pub struct AppState { pub pool: Pool, pub tables: Arc>, pub project_cache: Arc>, } ================================================ FILE: api/src/db.rs ================================================ use deadpool_postgres::{Config, Pool, Runtime}; use std::env; use std::sync::Arc; use tokio_postgres::{Client, NoTls}; use url::Url; pub async fn create_pool() -> Pool { let database_url = env::var("DATABASE_URL").expect("DATABASE_URL must be set"); let db_url = Url::parse(&database_url).expect("Invalid database URL"); let mut config = Config::new(); config.host = db_url.host_str().map(ToOwned::to_owned); config.port = db_url.port(); config.user = Some(db_url.username().to_owned()); config.password = db_url.password().map(ToOwned::to_owned); config.dbname = db_url.path().strip_prefix('/').map(ToOwned::to_owned); config .create_pool(Some(Runtime::Tokio1), NoTls) .expect("Failed to create pool") } pub async fn get_tables(client: &Client) -> Vec { let rows = client .query( "SELECT table_name FROM information_schema.tables WHERE table_schema = 'public'", &[], ) .await .expect("Failed to fetch tables"); rows.into_iter() .map(|row| row.get::<_, String>("table_name")) .collect() } pub async fn initialize_db() -> (Pool, Arc>) { let pool = create_pool().await; let client = pool.get().await.expect("Failed to get client from pool"); let tables = Arc::new(get_tables(&client).await); (pool, tables) } ================================================ FILE: api/src/handlers.rs ================================================ use actix_web::{get, post, web, HttpResponse, Responder}; use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; use std::sync::Arc; use tokio_postgres::error::SqlState; use uuid::Uuid; use crate::app_state::AppState; use crate::utils::{get_cached_projects, get_column_names, rows_to_json, Pagination}; const RESPONSE_LIMIT: i64 = 1000; #[derive(Deserialize)] pub struct PaginationParams { pub page: Option, pub limit: Option, } #[derive(Serialize)] struct PaginatedResponse { table: String, total_count: i64, page: i64, limit: i64, total_pages: i64, columns: Vec, data: Vec, } #[derive(Deserialize)] pub struct LeaderboardRequest { #[serde(rename = "projectIds")] pub project_ids: Option>, pub limit: i64, } #[derive(Deserialize)] pub struct ProjectBatchRequest { #[serde(rename = "projectIds")] pub project_ids: Vec, } pub fn check_table_exists(table: &str, tables: &[String]) -> Option { if !tables.contains(&table.to_string()) { Some(HttpResponse::NotFound().json(json!({ "error": format!("Table '{}' not found", table), "valid_tables": tables, "help": "Refer to the API documentation for valid table names." }))) } else { None } } #[get("/tables")] pub async fn list_tables( query: web::Query, data: web::Data, ) -> impl Responder { let total_count = data.tables.len() as i64; let pagination = Pagination::new(query, total_count); let start = pagination.offset as usize; let end = (start + pagination.limit as usize).min(data.tables.len()); let paginated_tables = &data.tables[start..end]; HttpResponse::Ok().json(json!({ "total_count": total_count, "page": pagination.page, "limit": pagination.limit, "total_pages": pagination.total_pages, "data": paginated_tables, })) } #[get("/heartbeat")] pub async fn heartbeat(data: web::Data) -> impl Responder { match data.pool.get().await { Ok(client) => match client.query_one("SELECT 1", &[]).await { Ok(_) => HttpResponse::Ok().body("OK - Database connection is healthy"), Err(e) => { log::error!("Database query failed: {e}"); HttpResponse::InternalServerError().body("Database query failed") } }, Err(e) => { log::error!("Failed to get database connection: {e}"); HttpResponse::InternalServerError().body("Failed to get database connection") } } } #[get("/tables/{table}")] pub async fn get_table( path: web::Path, query: web::Query, data: web::Data, ) -> impl Responder { let table = path.into_inner(); if let Some(response) = check_table_exists(&table, &data.tables) { return response; } let count_query = format!("SELECT COUNT(*) FROM {table}"); match data.pool.get().await { Ok(client) => match client.query_one(&count_query, &[]).await { Ok(count_row) => { let total_count: i64 = count_row.get(0); let pagination = Pagination::new(query, total_count); let data_query = format!("SELECT * FROM {table} LIMIT $1 OFFSET $2"); match client .query(&data_query, &[&pagination.limit, &pagination.offset]) .await { Ok(rows) => { let columns = get_column_names(&rows); let data = rows_to_json(&rows); let response = PaginatedResponse { table, total_count, page: pagination.page, limit: pagination.limit, total_pages: pagination.total_pages, columns, data, }; HttpResponse::Ok().json(response) } Err(e) => { log::error!("Database query error: {e}"); HttpResponse::InternalServerError().json(json!({ "error": "An error occurred while querying the database" })) } } } Err(e) => { log::error!("Database count query error: {e}"); HttpResponse::InternalServerError().json(json!({ "error": "An error occurred while counting rows in the database" })) } }, Err(e) => { log::error!("Failed to get database connection: {e}"); HttpResponse::InternalServerError().body("Failed to get database connection") } } } #[get("/tables/{table}/{id}")] pub async fn get_table_row( path: web::Path<(String, Uuid)>, data: web::Data, ) -> impl Responder { let (table_name, id) = path.into_inner(); if let Some(response) = check_table_exists(&table_name, &data.tables) { return response; } let query = format!("SELECT * FROM {table_name} WHERE id = $1"); match data.pool.get().await { Ok(client) => match client.query_one(&query, &[&id]).await { Ok(row) => { let json = rows_to_json(&[row]); let value = json.first().unwrap(); HttpResponse::Ok().json(value) } Err(e) => { if e.as_db_error() .is_some_and(|db_err| db_err.code() == &SqlState::UNDEFINED_TABLE) { HttpResponse::NotFound().json(json!({ "error": format!("Table '{}' not found", table_name) })) } else if e .as_db_error() .is_some_and(|e| e.code() == &SqlState::NO_DATA_FOUND) { HttpResponse::NotFound().json(json!({ "error": format!("No row found with id '{}' in table '{}'", id, table_name) })) } else { HttpResponse::InternalServerError().json(json!({ "error": format!("Database error: {}", e) })) } } }, Err(e) => { log::error!("Failed to get database connection: {e}"); HttpResponse::InternalServerError().body("Failed to get database connection") } } } #[get("/project/{id}")] pub async fn get_project(path: web::Path, data: web::Data) -> impl Responder { // Check if the table exists let id = path.into_inner(); // Construct the query let query = r#" WITH base AS MATERIALIZED ( SELECT c.id, u_homepage.url AS homepage, c.name, COALESCE(tr_latest.rank, '0') AS "teaRank", tr_latest.created_at AS "teaRankCalculatedAt", ( SELECT ARRAY_AGG(DISTINCT s.type) FROM canon_packages cp2 JOIN packages p2 ON cp2.package_id = p2.id JOIN package_managers pm2 ON p2.package_manager_id = pm2.id JOIN sources s ON pm2.source_id = s.id WHERE cp2.canon_id = c.id ) AS "packageManagers", ( SELECT COUNT(*)::bigint FROM legacy_dependencies ld JOIN canon_packages cp_out ON cp_out.package_id = ld.package_id WHERE cp_out.canon_id = c.id ) AS "dependenciesCount", ( SELECT COUNT(*)::bigint FROM legacy_dependencies ld JOIN canon_packages cp_in ON cp_in.package_id = ld.dependency_id WHERE cp_in.canon_id = c.id ) AS "dependentsCount" FROM canons c JOIN urls u_homepage ON c.url_id = u_homepage.id LEFT JOIN LATERAL ( SELECT tr.rank, tr.created_at FROM tea_ranks tr WHERE tr.canon_id = c.id ORDER BY tr.created_at DESC LIMIT 1 ) tr_latest ON TRUE WHERE c.id = $1 ) SELECT DISTINCT ON (b.id) b.id AS "projectId", b.homepage, b.name, u_source.url AS source, b."teaRank", b."teaRankCalculatedAt", b."packageManagers", b."dependenciesCount", b."dependentsCount" FROM base b JOIN canon_packages cp ON cp.canon_id = b.id JOIN package_urls pu ON pu.package_id = cp.package_id JOIN urls u_source ON pu.url_id = u_source.id JOIN url_types ut ON ut.id = u_source.url_type_id WHERE ut.name = 'source' ORDER BY b.id, b."teaRankCalculatedAt" DESC, u_source.url;"#; match data.pool.get().await { Ok(client) => match client.query_one(query, &[&id]).await { Ok(row) => { let json = rows_to_json(&[row]); let value = json.first().unwrap(); HttpResponse::Ok().json(value) } Err(e) => { if e.as_db_error() .is_some_and(|e| e.code() == &SqlState::NO_DATA_FOUND) { HttpResponse::NotFound().json(json!({ "error": format!("No row found with id '{:?}' in table canons", id) })) } else { HttpResponse::InternalServerError().json(json!({ "error": format!("Database error: {}", e) })) } } }, Err(e) => { log::error!("Failed to get database connection: {e}"); HttpResponse::InternalServerError().body("Failed to get database connection") } } } #[post("/project/batch")] pub async fn list_projects_by_id( req: web::Json, data: web::Data, ) -> impl Responder { if req.project_ids.is_empty() { return HttpResponse::BadRequest().json(json!({ "error": "No project IDs provided" })); } // Construct the query let query = r#" SELECT DISTINCT ON (c.id) c.id AS "projectId", u_homepage.url AS homepage, c.name, u_source.url AS source, COALESCE(tr.rank,'0') AS "teaRank", tr.created_at AS "teaRankCalculatedAt", ( SELECT ARRAY_AGG(DISTINCT s.type) FROM canon_packages cp2 JOIN packages p2 ON cp2.package_id = p2.id JOIN package_managers pm2 ON p2.package_manager_id = pm2.id JOIN sources s ON pm2.source_id = s.id WHERE cp2.canon_id = c.id ) AS "packageManagers" FROM canons c JOIN urls u_homepage ON u_homepage.id = c.url_id JOIN canon_packages cp ON cp.canon_id = c.id JOIN package_urls pu ON pu.package_id = cp.package_id JOIN urls u_source ON pu.url_id = u_source.id JOIN url_types ut ON ut.id = u_source.url_type_id LEFT JOIN tea_ranks tr ON tr.canon_id = c.id WHERE c.id = ANY($1::uuid[]) AND ut.name = 'source' ORDER BY c.id, tr.created_at DESC, u_source.url;"#; match data.pool.get().await { Ok(client) => match client.query(query, &[&req.project_ids]).await { Ok(rows) => { let json = rows_to_json(&rows); HttpResponse::Ok().json(json) } Err(e) => { log::error!("Database query error: {e}"); HttpResponse::InternalServerError().json(json!({ "error": format!("Database error: {}", e) })) } }, Err(e) => { log::error!("Failed to get database connection: {e}"); HttpResponse::InternalServerError().body("Failed to get database connection") } } } #[get("/project/search/{name}")] pub async fn list_projects_by_name( path: web::Path, data: web::Data, ) -> impl Responder { let name = path.into_inner(); if name.trim().is_empty() { return HttpResponse::BadRequest().json(json!({ "error": "Search name cannot be empty" })); } let wildcard = format!("%{name}%"); // Construct the query let query = r#" SELECT * FROM ( SELECT DISTINCT ON (c.id) c.id AS "projectId", u_homepage.url AS homepage, c.name, u_source.url AS source, ( SELECT ARRAY_AGG(DISTINCT s.type) FROM canon_packages cp2 JOIN packages p2 ON cp2.package_id = p2.id JOIN package_managers pm2 ON p2.package_manager_id = pm2.id JOIN sources s ON pm2.source_id = s.id WHERE cp2.canon_id = c.id ) AS "packageManagers" FROM canons c JOIN urls u_homepage ON c.url_id = u_homepage.id JOIN canon_packages cp ON cp.canon_id = c.id JOIN package_urls pu ON pu.package_id = cp.package_id JOIN urls u_source ON pu.url_id = u_source.id JOIN url_types ut_source ON ut_source.id = u_source.url_type_id WHERE ut_source.name = 'source' AND (c.name ILIKE $1) ORDER BY c.id ) sub ORDER BY LENGTH(name), name LIMIT 10;"#; match data.pool.get().await { Ok(client) => match client.query(query, &[&wildcard]).await { Ok(rows) => { let json = rows_to_json(&rows); HttpResponse::Ok().json(json) } Err(e) => { log::error!("Database query error: {e}"); HttpResponse::InternalServerError().json(json!({ "error": format!("Database error: {e}") })) } }, Err(e) => { log::error!("Failed to get database connection: {e}"); HttpResponse::InternalServerError().body("Failed to get database connection") } } } #[post("/leaderboard")] pub async fn get_leaderboard( req: web::Json, data: web::Data, ) -> impl Responder { let limit = req.limit.clamp(1, RESPONSE_LIMIT); let Some(project_ids) = req.project_ids.as_deref() else { return get_top_projects(data, limit).await; }; if project_ids.len() > RESPONSE_LIMIT as usize { return HttpResponse::BadRequest().json(json!({ "error": format!("Too many project IDs (maximum {} allowed)", RESPONSE_LIMIT) })); } // Get cached projects and identify missing ones let (cached_projects, missing_ids) = get_cached_projects(data.project_cache.clone(), project_ids); // If we have all projects cached, return them sorted if missing_ids.is_empty() { return sort_truncate_and_return(cached_projects, limit); } // Query for missing projects let query = r#" SELECT * FROM ( SELECT DISTINCT ON (c.id) c.id AS "projectId", u_homepage.url AS homepage, c.name, u_source.url AS source, COALESCE(tr.rank,'0') AS "teaRank", tr.created_at AS "teaRankCalculatedAt", ( SELECT ARRAY_AGG(DISTINCT s.type) FROM canon_packages cp2 JOIN packages p2 ON cp2.package_id = p2.id JOIN package_managers pm2 ON p2.package_manager_id = pm2.id JOIN sources s ON pm2.source_id = s.id WHERE cp2.canon_id = c.id ) AS "packageManagers" FROM canons c JOIN urls u_homepage ON c.url_id = u_homepage.id JOIN canon_packages cp ON cp.canon_id = c.id JOIN package_urls pu ON pu.package_id = cp.package_id JOIN urls u_source ON pu.url_id = u_source.id JOIN url_types ut_source ON ut_source.id = u_source.url_type_id LEFT JOIN tea_ranks tr ON tr.canon_id = c.id WHERE c.id = ANY($1::uuid[]) AND ut_source.name = 'source' AND CAST(tr.rank AS NUMERIC) > 0 ORDER BY c.id, tr.created_at DESC, u_source.url ) sub ORDER BY CAST("teaRank" AS NUMERIC) DESC NULLS LAST LIMIT $2"#; match data.pool.get().await { Ok(client) => match client.query(query, &[&missing_ids, &limit]).await { Ok(rows) => { let fresh_projects = rows_to_json(&rows); // Cache the fresh projects for project in &fresh_projects { if let Some(project_id) = project.get("projectId").and_then(|v| v.as_str()) { if let Ok(uuid) = Uuid::parse_str(project_id) { data.project_cache.insert( uuid, crate::app_state::ProjectCacheEntry::new(project.clone()), ); } else { log::warn!("Failed to parse project ID as UUID: {}", project_id); } } else { log::warn!("No projectId found in project: {:?}", project); } } // Combine cached and fresh projects - keep Arc for cached ones let mut all_projects: Vec> = cached_projects; // Convert fresh projects to Arc to match the type let fresh_arcs: Vec> = fresh_projects.into_iter().map(Arc::new).collect(); all_projects.extend(fresh_arcs); sort_truncate_and_return(all_projects, limit) } Err(e) => { log::error!("Database query error: {e}"); HttpResponse::InternalServerError().json(json!({ "error": format!("Database error: {}", e) })) } }, Err(e) => { log::error!("Failed to get database connection: {e}"); HttpResponse::InternalServerError().body("Failed to get database connection") } } } // Helper function to sort, truncate, and return the final response fn sort_truncate_and_return(projects: Vec>, limit: i64) -> actix_web::HttpResponse { let mut projects = projects; // Sort projects by teaRank (descending) - Arc derefs to Value projects.sort_by(|a, b| { let rank_a = a .get("teaRank") .and_then(|v| v.as_str()) .and_then(|s| s.parse::().ok()) .unwrap_or(0); let rank_b = b .get("teaRank") .and_then(|v| v.as_str()) .and_then(|s| s.parse::().ok()) .unwrap_or(0); rank_b.cmp(&rank_a) }); // Apply limit projects.truncate(limit as usize); // Convert to Vec only for the final response - Arc doesn't implement Serialize let final_projects: Vec = projects .into_iter() .map(|arc_val| (*arc_val).clone()) .collect(); actix_web::HttpResponse::Ok().json(final_projects) } async fn get_top_projects(data: web::Data, limit: i64) -> HttpResponse { // get client let Ok(client) = data.pool.get().await else { return HttpResponse::InternalServerError().body("Failed to get database connection"); }; // get latest run id let run_query = r#"SELECT MAX(run) from tea_rank_runs"#; let Ok(run_row) = client.query_one(run_query, &[]).await else { return HttpResponse::InternalServerError().body("Failed to get latest run"); }; let run: i32 = run_row.get(0); // get top projects (1-RESPONSE_LIMIT) let top_ranks_query = r#"SELECT canon_id as "projectId", name, rank as "teaRank", ( SELECT ARRAY_AGG(DISTINCT s.type) FROM canon_packages cp2 JOIN packages p2 ON cp2.package_id = p2.id JOIN package_managers pm2 ON p2.package_manager_id = pm2.id JOIN sources s ON pm2.source_id = s.id WHERE cp2.canon_id = canon_id ) AS "packageManagers" FROM tea_ranks JOIN canons ON canon_id = canons.id WHERE tea_rank_run = $1 ORDER BY rank DESC LIMIT $2"#; let Ok(top_ranks) = client .query(top_ranks_query, &[&run, &limit.clamp(1, RESPONSE_LIMIT)]) .await else { return HttpResponse::InternalServerError().json(json!({ "error": "Failed to fetch top ranks" })); }; let json = rows_to_json(&top_ranks); HttpResponse::Ok().json(json) } ================================================ FILE: api/src/logging.rs ================================================ use env_logger::Env; pub fn setup_logger() { env_logger::init_from_env(Env::default().default_filter_or("info")); } pub struct Logger; impl Logger { pub fn default() -> actix_web::middleware::Logger { actix_web::middleware::Logger::new("%a '%r' %s %b '%{Referer}i' '%{User-Agent}i' %T") } } ================================================ FILE: api/src/main.rs ================================================ mod app_state; mod db; mod handlers; mod logging; mod utils; use actix_web::{web, App, HttpServer}; use dashmap::DashMap; use dotenv::dotenv; use std::env; use std::sync::Arc; use crate::app_state::AppState; use crate::handlers::{ get_leaderboard, get_project, get_table, get_table_row, heartbeat, list_projects_by_id, list_projects_by_name, list_tables, }; use crate::logging::setup_logger; #[actix_web::main] async fn main() -> std::io::Result<()> { dotenv().ok(); setup_logger(); let host = env::var("HOST").unwrap_or_else(|_| "0.0.0.0".to_string()); let port = env::var("PORT").unwrap_or_else(|_| "8080".to_string()); let bind_address = format!("{host}:{port}"); let (pool, tables) = db::initialize_db().await; // Cache for project data to reduce database load on leaderboard routes let project_cache = Arc::new(DashMap::new()); log::info!("Available tables: {tables:?}"); log::info!("Starting server at http://{bind_address}"); HttpServer::new(move || { App::new() .wrap(logging::Logger::default()) .app_data(web::Data::new(AppState { pool: pool.clone(), tables: Arc::clone(&tables), project_cache: Arc::clone(&project_cache), })) // HEALTH .service(heartbeat) // SIMPLE CRUD OPERATIONS .service(list_tables) .service(get_table) .service(get_table_row) // BUSINESS LOGIC .service(get_leaderboard) .service(get_project) .service(list_projects_by_id) .service(list_projects_by_name) }) .bind(&bind_address)? .run() .await } ================================================ FILE: api/src/utils.rs ================================================ use actix_web::web::Query; use chrono::{DateTime, NaiveDate, NaiveDateTime, Utc}; use dashmap::DashMap; use serde_json::{json, Value}; use std::sync::Arc; use tokio_postgres::{types::Type, Row}; use uuid::Uuid; use crate::{app_state::ProjectCacheEntry, handlers::PaginationParams}; pub fn get_column_names(rows: &[Row]) -> Vec { if let Some(row) = rows.first() { row.columns() .iter() .map(|col| col.name().to_string()) .collect() } else { vec![] } } pub fn convert_optional_to_json(result: Result, E>) -> Value where T: serde::Serialize, { match result { Ok(Some(val)) => json!(val), _ => Value::Null, } } pub fn rows_to_json(rows: &[Row]) -> Vec { rows.iter() .map(|row| { let mut map = serde_json::Map::new(); for (i, column) in row.columns().iter().enumerate() { let value: Value = match *column.type_() { Type::INT2 => convert_optional_to_json(row.try_get::<_, Option>(i)), Type::INT4 => convert_optional_to_json(row.try_get::<_, Option>(i)), Type::INT8 => convert_optional_to_json(row.try_get::<_, Option>(i)), Type::FLOAT4 => convert_optional_to_json(row.try_get::<_, Option>(i)), Type::FLOAT8 => convert_optional_to_json(row.try_get::<_, Option>(i)), Type::BOOL => convert_optional_to_json(row.try_get::<_, Option>(i)), Type::VARCHAR | Type::TEXT | Type::BPCHAR => { convert_optional_to_json(row.try_get::<_, Option>(i)) } Type::TIMESTAMP => { convert_optional_to_json(row.try_get::<_, Option>(i)) } Type::TIMESTAMPTZ => { convert_optional_to_json(row.try_get::<_, Option>>(i)) } Type::DATE => convert_optional_to_json(row.try_get::<_, Option>(i)), Type::JSON | Type::JSONB => { convert_optional_to_json(row.try_get::<_, Option>(i)) } Type::UUID => convert_optional_to_json(row.try_get::<_, Option>(i)), Type::TEXT_ARRAY | Type::VARCHAR_ARRAY => { convert_optional_to_json(row.try_get::<_, Option>>(i)) } _ => { // For unsupported types, try to convert to string convert_optional_to_json(row.try_get::<_, Option>(i)) } }; map.insert(column.name().to_string(), value); } Value::Object(map) }) .collect() } pub struct Pagination { pub page: i64, pub limit: i64, pub offset: i64, pub total_pages: i64, } impl Pagination { pub fn new(query: Query, total_count: i64) -> Self { let limit = query.limit.unwrap_or(200).clamp(1, 1000); let total_pages = (total_count as f64 / limit as f64).ceil() as i64; let page = query.page.unwrap_or(1).clamp(1, total_pages); let offset = (page - 1) * limit; Self { page, limit, offset, total_pages, } } } // Helper function to get cached projects and return missing ones pub fn get_cached_projects( cache: Arc>, project_ids: &[Uuid], ) -> (Vec>, Vec) { let mut cached_projects = Vec::new(); let mut missing_ids = Vec::new(); for &project_id in project_ids { if let Some(entry) = cache.get(&project_id) { if !entry.is_expired() { cached_projects.push(entry.data.clone()); continue; } } missing_ids.push(project_id); } (cached_projects, missing_ids) } ================================================ FILE: core/README.md ================================================ # Core Tools for CHAI Python Loaders This directory contains a set of core tools and utilities to facilitate loading the CHAI database with package manager data, using python helpers. These tools provide a common foundation for fetching, transforming, and loading data from various package managers into the database. In general, the flow of an indexer is: 1. Fetch data from source 2. Fetch data from CHAI 3. Do a giant diff 4. Create new entries, updated entries for each package model in the db The best example is [Homebrew's](../package_managers/homebrew/main.py). ## Key Components ### [Config](config.py) Entrypoint for all loaders, generally has all the information needed for the pipeline to start. Includes: - Execution flags: - `FETCH` determines whether we request the data from source - `TEST` enables a test mode, to test specific portions of the pipeline - `NO_CACHE` to determine whether we save the intermediate pipeline files - Package Manager flags - `pm_id` gets the package manager id from the db, that we'd run the pipeline for - `source` is the data source for that package manager. `SOURCES` defines the map. The next 4 configuration classes retrieve the IDs for url types (homepage, documentation, etc.), dependency types (build, runtime, etc.), user types (crates user, github user), and all the package manager IDs as well. ### 2. [Database](db.py) The DB class offers a set of methods for interacting with the database, including: - Running queries to build a cache for the current state of the graph for a package manager - Batching utilities - Some load functions ### 3. [Fetcher](fetcher.py) The Fetcher class provides functionality for downloading and extracting data from package manager sources. It supports: - Downloading tarball / GZIP / Git files - Extracting contents to a specified directory - Maintaining a "latest" symlink so we always know where to look ### 4. [Logger](logger.py) A custom logging utility that provides consistent logging across all loaders. ### 5. [Models](models/__init__.py) SQLAlchemy models representing the database schema, including: - Package, Version, User, License, DependsOn, and other relevant tables > [!NOTE] > > This is currently used to actually generate the migrations as well ### 6. [Scheduler](scheduler.py) A scheduling utility that allows loaders to run at specified intervals. ### 7. [Transformer](transformer.py) The Transformer class provides a base for creating package manager-specific transformers. It includes: - Methods for locating and reading input files - Placeholder methods for transforming data into the required format ## Usage To create a new loader for a package manager: 1. Create a new directory under `package_managers/` for your package manager. 1. Implement a fetcher that inherits from the base Fetcher, that is able to fetch the raw data from the package manager's source. 1. Implement a custom Transformer class that inherits from the base Transformer, that figures out how to map the raw data provided by the package managers into the data model described in the [models](models/__init__.py) module. 1. Load the cache for data currently in CHAI for that package manager 1. Implement a diff to compare them 1. Pass diff objects (lists of new / updated data points) to `db.ingest` 1. Orchestrate via a `main.py`. Example usage can be found in the [crates](../package_managers/crates) loader. # TODOs - [ ] `Diff` currently has separate implementations for Homebrew and Crates, and could be centralized - open to help here! ================================================ FILE: core/config.py ================================================ from enum import Enum from sqlalchemy import UUID from core.db import ConfigDB from core.logger import Logger from core.utils import env_vars logger = Logger("config") class PackageManager(Enum): CRATES = "crates" HOMEBREW = "homebrew" DEBIAN = "debian" NPM = "npm" PKGX = "pkgx" TEST = env_vars("TEST", "false") FETCH = env_vars("FETCH", "true") NO_CACHE = env_vars("NO_CACHE", "true") SOURCES = { PackageManager.CRATES: "https://static.crates.io/db-dump.tar.gz", PackageManager.HOMEBREW: "https://formulae.brew.sh/api/formula.json", # for debian, sources contains the urls, packages is tied to the linux distribution PackageManager.DEBIAN: [ "https://ftp.debian.org/debian/dists/stable/main/binary-amd64/Packages.gz", "https://ftp.debian.org/debian/dists/stable/main/source/Sources.gz", ], PackageManager.NPM: "https://registry.npmjs.org/-/all", # fake for now PackageManager.PKGX: "https://github.com/pkgxdev/pantry.git", } # The three configuration values URLTypes, DependencyTypes, and UserTypes will query the # DB to get the respective IDs. If the values don't exist in the database, they will # raise an AttributeError (None has no attribute id) at the start class ExecConf: test: bool fetch: bool no_cache: bool def __init__(self) -> None: self.test = TEST self.fetch = FETCH self.no_cache = NO_CACHE def __str__(self): return f"ExecConf(test={self.test},fetch={self.fetch},no_cache={self.no_cache})" class PMConf: package_manager: PackageManager pm_id: str source: str | list[str] def __init__(self, pm: PackageManager, db: ConfigDB): self.package_manager = pm self.pm_id = db.select_package_manager_by_name(pm.value).id self.source = SOURCES[pm] def __str__(self): return f"PMConf(pm_id={self.pm_id},source={self.source})" class URLTypes: homepage: UUID repository: UUID documentation: UUID source: UUID def __init__(self, db: ConfigDB): self.load_url_types(db) def load_url_types(self, db: ConfigDB) -> None: self.homepage = db.select_url_types_by_name("homepage").id self.repository = db.select_url_types_by_name("repository").id self.documentation = db.select_url_types_by_name("documentation").id self.source = db.select_url_types_by_name("source").id def __str__(self) -> str: return f"URLs(homepage={self.homepage},repo={self.repository},docs={self.documentation},src={self.source})" class UserTypes: crates: UUID github: UUID def __init__(self, db: ConfigDB): self.crates = db.select_source_by_name("crates").id self.github = db.select_source_by_name("github").id def __str__(self) -> str: return f"UserTypes(crates={self.crates},github={self.github})" class DependencyTypes: build: UUID development: UUID runtime: UUID test: UUID optional: UUID recommended: UUID def __init__(self, db: ConfigDB): self.build = db.select_dependency_type_by_name("build").id self.development = db.select_dependency_type_by_name("development").id self.runtime = db.select_dependency_type_by_name("runtime").id self.test = db.select_dependency_type_by_name("test").id self.optional = db.select_dependency_type_by_name("optional").id self.recommended = db.select_dependency_type_by_name("recommended").id def __str__(self) -> str: return f"DependencyTypes(build={self.build},development={self.development},runtime={self.runtime},test={self.test},optional={self.optional},recommended={self.recommended})" class PackageManagers: crates: UUID homebrew: UUID debian: UUID npm: UUID pkgx: UUID def __init__(self, db: ConfigDB): self.crates = db.select_package_manager_by_name("crates").id self.homebrew = db.select_package_manager_by_name("homebrew").id self.debian = db.select_package_manager_by_name("debian").id self.npm = db.select_package_manager_by_name("npm").id self.pkgx = db.select_package_manager_by_name("pkgx").id class Config: exec_config: ExecConf pm_config: PMConf url_types: URLTypes user_types: UserTypes dependency_types: DependencyTypes package_managers: PackageManagers def __init__(self, pm: PackageManager) -> None: db = ConfigDB() self.exec_config = ExecConf() self.pm_config = PMConf(pm, db) self.url_types = URLTypes(db) self.user_types = UserTypes(db) self.dependency_types = DependencyTypes(db) self.package_managers = PackageManagers(db) def __str__(self): return f"Config(exec_config={self.exec_config}, pm_config={self.pm_config}, url_types={self.url_types}, user_types={self.user_types}, dependency_types={self.dependency_types}, package_managers={self.package_managers})" ================================================ FILE: core/db.py ================================================ import os from collections import defaultdict from datetime import datetime from typing import Any from uuid import UUID from sqlalchemy import Insert, Result, Update, create_engine, select, update from sqlalchemy.dialects import postgresql from sqlalchemy.orm import Session, sessionmaker from core.logger import Logger from core.models import ( URL, BaseModel, DependsOnType, LegacyDependency, LoadHistory, Package, PackageManager, PackageURL, Source, URLType, ) from core.structs import CurrentGraph, CurrentURLs, URLKey CHAI_DATABASE_URL = os.getenv("CHAI_DATABASE_URL") DEFAULT_BATCH_SIZE = 10000 class DB: def __init__(self, logger_name: str): self.logger = Logger(logger_name) self.engine = create_engine(CHAI_DATABASE_URL) self.session = sessionmaker(self.engine) self.logger.debug("connected") self.now: datetime = datetime.now() def insert_load_history(self, package_manager_id: str): with self.session() as session: session.add(LoadHistory(package_manager_id=package_manager_id)) session.commit() def print_statement(self, stmt): dialect = postgresql.dialect() compiled_stmt = stmt.compile( dialect=dialect, compile_kwargs={"literal_binds": True} ) self.logger.log(str(compiled_stmt)) def close(self): self.logger.debug("closing") self.engine.dispose() def search_names( self, package_names: list[str], package_managers: list[UUID] ) -> list[str]: """Return Homepage URLs for packages with these names""" with self.session() as session: results = ( session.query(Package, URL) .join(PackageURL, PackageURL.package_id == Package.id) .join(URL, PackageURL.url_id == URL.id) .join(URLType, URL.url_type_id == URLType.id) .filter(URLType.name == "homepage") .filter(Package.name.in_(package_names)) .filter(Package.package_manager_id.in_(package_managers)) .all() ) # build a mapping name_to_url = {result.Package.name: result.URL.url for result in results} # return in the order preserved by the input (bc its relevant) # and account for the fact that some names might not have a URL return [ name_to_url.get(name) for name in package_names if name in name_to_url ] def current_graph(self, package_manager_id: UUID) -> CurrentGraph: """Get the packages and dependencies for a specific package manager""" package_map: dict[str, Package] = defaultdict(Package) dependencies: dict[UUID, set[LegacyDependency]] = defaultdict(set) stmt = ( select(Package, LegacyDependency) .select_from(Package) .join( LegacyDependency, onclause=Package.id == LegacyDependency.package_id, isouter=True, ) .where(Package.package_manager_id == package_manager_id) ) with self.session() as session: result: Result[tuple[Package, LegacyDependency]] = session.execute(stmt) for pkg, dep in result: # add to the package map, by import_id, which is usually name package_map[pkg.import_id] = pkg # and add to the dependencies map as well if dep: # check because it's an outer join, so might be None dependencies[pkg.id].add(dep) self.logger.debug(f"Cached {len(package_map)} packages") return CurrentGraph(package_map, dependencies) def _build_current_urls( self, result: Result[tuple[Package, PackageURL, URL]] ) -> CurrentURLs: """Build the CurrentURLs result based on a query of Package, PackageURL, URL""" url_map: dict[URLKey, URL] = {} package_urls: dict[UUID, set[PackageURL]] = {} for pkg, pkg_url, url in result: url_key = URLKey(url.url, url.url_type_id) url_map[url_key] = url # since it's a left join, we need to check if pkg is None if pkg is not None: if pkg.id not in package_urls: package_urls[pkg.id] = set() package_urls[pkg.id].add(pkg_url) self.logger.debug(f"Cached {len(url_map)} URLs") self.logger.debug(f"Cached {len(package_urls)} package URLs") return CurrentURLs(url_map=url_map, package_urls=package_urls) def current_urls(self, urls: set[str]) -> CurrentURLs: """Get the Package URL Relationships for a given set of URLs""" stmt = ( select(Package, PackageURL, URL) .select_from(URL) .join(PackageURL, PackageURL.url_id == URL.id, isouter=True) .join(Package, Package.id == PackageURL.package_id, isouter=True) .where(URL.url.in_(urls)) ) with self.session() as session: result: Result[tuple[Package, PackageURL, URL]] = session.execute(stmt) return self._build_current_urls(result) def all_current_urls(self) -> CurrentURLs: """Get all the URLs and the Packages they are tied to from CHAI""" stmt = ( select(Package, PackageURL, URL) .select_from(URL) .join(PackageURL, PackageURL.url_id == URL.id, isouter=True) .join(Package, Package.id == PackageURL.package_id, isouter=True) ) with self.session() as session: result: Result[tuple[Package, PackageURL, URL]] = session.execute(stmt) return self._build_current_urls(result) def load( self, session: Session, data: list[BaseModel], stmt: Insert | Update ) -> None: """Smart batching utility""" if data: values: list[dict[str, str | UUID | datetime]] = [ obj.to_dict_v2() for obj in data ] self.batch(session, stmt, values, DEFAULT_BATCH_SIZE) def batch( self, session: Session, stmt: Insert | Update, values: list[dict[str, str | UUID | datetime]], batch_size: int = DEFAULT_BATCH_SIZE, ): """ Utility to batch insert or update, but doesn't commit! Inputs: - session: the sessionmaker object, so create it before you use it - stmt: the insert or update statement to construct, without the values - values: the values to insert or update - generally using to_dict_v2() - batch_size: the batch size, defaults to 10000 - obj_name: the name of the object being inserted for logging """ for i in range(0, len(values), batch_size): batch = values[i : i + batch_size] self.logger.log( f"Processing batch {i // batch_size + 1}/{(len(values) - 1) // batch_size + 1} ({len(batch)})" ) value_stmt = stmt.values(batch) session.execute(value_stmt) def ingest( self, new_packages: list[Package], new_urls: list[URL], new_package_urls: list[PackageURL], new_deps: list[LegacyDependency], removed_deps: list[LegacyDependency], updated_packages: list[dict[str, UUID | str | datetime]], updated_package_urls: list[dict[str, UUID | datetime]], ) -> None: """ Ingests a list of new, updated, and deleted objects from the database. It flushes after each insert, to ensure that the database is in a valid state prior to the next batch of ingestions. TODO: if pkey is set in the values provided, then sqlalchemy will use psycopg2.executemany(...), which is quicker, but still the slowest of all execution options provided by psycopg2. The best one is execute_values, which is **only** available for inserts, and can be used as follows: looks like sqlalchemy^2 has a native support for insertmanyvalues, but **I think** we need to pass the data in as a list[dict] instead of objects. See: https://docs.sqlalchemy.org/en/20/core/connections.html#engine-insertmanyvalues Inputs: - new_packages: a list of new Package objects - new_urls: a list of new URL objects - new_package_urls: a list of new PackageURL objects - updated_packages: a list of updated Package objects - updated_package_urls: a list of updated PackageURL objects - new_deps: a list of new LegacyDependency objects - removed_deps: a list of removed LegacyDependency objects """ self.logger.log("-" * 100) self.logger.log("Going to load") self.logger.log(f"New packages: {len(new_packages)}") self.logger.log(f"New URLs: {len(new_urls)}") self.logger.log(f"New package URLs: {len(new_package_urls)}") self.logger.log(f"Updated packages: {len(updated_packages)}") self.logger.log(f"Updated package URLs: {len(updated_package_urls)}") self.logger.log(f"New dependencies: {len(new_deps)}") self.logger.log(f"Removed dependencies: {len(removed_deps)}") self.logger.log("-" * 100) with self.session() as session: try: # 1. Add all new objects with granular flushes self.execute(session, new_packages, "add", "new packages") self.execute(session, new_urls, "add", "new urls") self.execute(session, new_package_urls, "add", "new package urls") self.execute(session, removed_deps, "delete", "removed dependencies") self.execute(session, new_deps, "add", "new dependencies") # 2. Perform updates (these will now operate on a flushed state) if updated_packages: session.execute(update(Package), updated_packages) if updated_package_urls: session.execute(update(PackageURL), updated_package_urls) # 3. Commit all changes session.commit() self.logger.log("✅ Successfully ingested") except Exception as e: self.logger.error(f"Error during batched ingest: {e}") session.rollback() raise e def execute(self, session: Session, data: list[Any], method: str, log: str) -> None: if method not in ["add", "delete"]: raise ValueError(f"db.execute({method}) is unknown") if data: match method: case "add": session.add_all(data) case "delete": self.remove_all(session, data) case _: pass session.flush() self.logger.log(f"✅ {len(data):,} {log}") def remove_all(self, session: Session, data: list[Any]) -> None: for item in data: session.delete(item) class ConfigDB(DB): def __init__(self): super().__init__("ConfigDB") def select_package_manager_by_name(self, package_manager: str) -> PackageManager: with self.session() as session: result = ( session.query(PackageManager) .join(Source, PackageManager.source_id == Source.id) .filter(Source.type == package_manager) .first() ) if result: return result raise ValueError(f"Package manager {package_manager} not found") def select_url_types_by_name(self, name: str) -> URLType: with self.session() as session: return session.query(URLType).filter(URLType.name == name).first() def select_source_by_name(self, name: str) -> Source: with self.session() as session: return session.query(Source).filter(Source.type == name).first() def select_dependency_type_by_name(self, name: str) -> DependsOnType: with self.session() as session: return ( session.query(DependsOnType).filter(DependsOnType.name == name).first() ) if __name__ == "__main__": db = ConfigDB() print(db.search_names(["elfutils.org", "elfutils"])) ================================================ FILE: core/fetcher.py ================================================ import gzip import json import os import tarfile from dataclasses import dataclass from datetime import datetime from io import BytesIO from shutil import rmtree from typing import Any import git from requests import get from core.logger import Logger @dataclass class Data: file_path: str file_name: str content: Any # json or bytes class Fetcher: def __init__(self, name: str, source: str, no_cache: bool, test: bool): self.name = name self.source = source self.output = f"data/{name}" self.logger = Logger(f"{name}_fetcher") self.no_cache = no_cache self.test = test def write(self, files: list[Data]): """generic write function for some collection of files""" # prep the file location now = datetime.now().strftime("%Y-%m-%d") root_path = f"{self.output}/{now}" # write # it can be anything - json, tarball, etc. for item in files: self.logger.debug(f"writing {item.file_path}/{item.file_name}") file_path = item.file_path file_name = item.file_name file_content = item.content full_path = os.path.join(root_path, file_path) # make sure the path exists os.makedirs(full_path, exist_ok=True) with open(os.path.join(full_path, file_name), "wb") as f: if isinstance(file_content, list | dict): # Convert JSON-serializable objects to string file_content = json.dumps(file_content) # Ensure content is bytes before writing if isinstance(file_content, str): file_content = file_content.encode("utf-8") f.write(file_content) # update the latest symlink self.update_symlink(now) def update_symlink(self, latest_path: str): latest_symlink = f"{self.output}/latest" if os.path.islink(latest_symlink): self.logger.debug(f"removing existing symlink {latest_symlink}") os.remove(latest_symlink) self.logger.debug(f"creating symlink {latest_symlink} -> {latest_path}") os.symlink(latest_path, latest_symlink) def fetch(self) -> bytes: if not self.source: raise ValueError("source is not set") response = get(self.source) try: response.raise_for_status() except Exception as e: self.logger.error(f"error fetching {self.source}: {e}") raise e return response.content def cleanup(self): if self.no_cache: # TODO: it's deleting everything here rmtree(self.output, ignore_errors=True) os.makedirs(self.output, exist_ok=True) class TarballFetcher(Fetcher): def __init__(self, name: str, source: str, no_cache: bool, test: bool): super().__init__(name, source, no_cache, test) def fetch(self) -> list[Data]: content = super().fetch() bytes_io_object = BytesIO(content) bytes_io_object.seek(0) files = [] with tarfile.open(fileobj=bytes_io_object, mode="r:gz") as tar: for member in tar.getmembers(): if member.isfile(): bytes_io_file = BytesIO(tar.extractfile(member).read()) destination_key = member.name file_name = destination_key.split("/")[-1] file_path = "/".join(destination_key.split("/")[:-1]) self.logger.debug(f"file_path/file_name: {file_path}/{file_name}") files.append(Data(file_path, file_name, bytes_io_file.read())) return files # GZip compresses only one file, so file_path and file_name are not used class GZipFetcher(Fetcher): def __init__( self, name: str, source: str, no_cache: bool, test: bool, file_path: str, file_name: str, ): super().__init__(name, source, no_cache, test) self.file_path = file_path self.file_name = file_name def fetch(self) -> list[Data]: content = super().fetch() files = [] decompressed = gzip.decompress(content).decode("utf-8") files.append(Data(self.file_path, self.file_name, decompressed.encode("utf-8"))) return files class GitFetcher(Fetcher): def __init__(self, name: str, source: str, no_cache: bool, test: bool): super().__init__(name, source, no_cache, test) def fetch(self) -> str: # assume that source is a git repo whose main branch needs to be cloned # we'll first prep the output directory, then clone, then update the symlinks # NOTE: this is what the main Fetcher does, but slightly modified for this case now = datetime.now().strftime("%Y-%m-%d") root_dir = f"{self.output}/{now}" os.makedirs(root_dir, exist_ok=True) # now, clone the repo here self.logger.debug(f"Cloning {self.source} into {root_dir}...") _ = git.Repo.clone_from(self.source, root_dir, depth=1, branch="main") self.logger.debug("Repository cloned successfully.") # update the symlinks self.update_symlink(now) return root_dir ================================================ FILE: core/logger.py ================================================ import sys import time import traceback from core.utils import env_vars DEBUG = env_vars("DEBUG", "false") def as_minutes(seconds: float) -> float: return seconds / 60 class Logger: SILENT = 0 NORMAL = 1 VERBOSE = 2 def __init__( self, name: str, mode: int = NORMAL, start: float | None = None ) -> None: self.name = name self.start = start or time.time() self.mode = Logger.VERBOSE if DEBUG else mode def print(self, msg: str): print(f"{self.time_diff():.2f}: [{self.name}]: {msg}", flush=True) def error(self, message): self.print(f"[ERROR]: {message}") def log(self, message): if self.mode >= Logger.NORMAL: self.print(f"{message}") def debug(self, message): if self.mode >= Logger.VERBOSE: self.print(f"[DEBUG]: {message}") def warn(self, message): if self.mode >= Logger.NORMAL: self.print(f"[WARN]: {message}") def is_verbose(self): return self.mode >= Logger.VERBOSE def time_diff(self): return time.time() - self.start def exception(self): exc_type, exc_value, exc_traceback = sys.exc_info() self.print(f"{exc_type.__name__}: {exc_value}") self.print("***** TRACEBACK *****") print(f"{''.join(traceback.format_tb(exc_traceback))}") def info(self, message): self.log(message) def warning(self, message): self.warn(message) ================================================ FILE: core/models/__init__.py ================================================ # __init__.py from __future__ import annotations from datetime import datetime from sqlalchemy import ( Column, DateTime, ForeignKey, Index, Integer, MetaData, String, UniqueConstraint, func, ) from sqlalchemy.dialects.postgresql import UUID from sqlalchemy.orm import Mapped, declarative_base, relationship naming_convention = { "ix": "ix_%(column_0_label)s", "uq": "uq_%(table_name)s_%(column_0_name)s", "ck": "ck_%(table_name)s_%(constraint_name)s", "fk": "fk_%(table_name)s_%(column_0_name)s_%(referred_table_name)s", "pk": "pk_%(table_name)s", } metadata = MetaData(naming_convention=naming_convention) class BaseModel: # we have UUIDs, strings, datetimes, ints, and floats def to_dict_v2(self) -> dict[str, str | UUID | datetime | int | float]: """Return a dictionary of all non-None attributes.""" return { attr: getattr(self, attr) for attr in self.__table__.columns.keys() # noqa: SIM118 if getattr(self, attr) is not None } Base = declarative_base(metadata=metadata, cls=BaseModel) class Package(Base): __tablename__ = "packages" __table_args__ = ( UniqueConstraint( "package_manager_id", "import_id", name="uq_package_manager_import_id" ), ) id = Column( UUID(as_uuid=True), primary_key=True, default=func.uuid_generate_v4(), server_default=func.uuid_generate_v4(), ) derived_id = Column(String, nullable=False, unique=True) # package_manager/name name = Column(String, nullable=False, index=True) package_manager_id = Column( UUID(as_uuid=True), ForeignKey("package_managers.id"), nullable=False, index=True, ) import_id = Column(String, nullable=False, index=True) readme = Column(String, nullable=True) created_at = Column( DateTime, nullable=False, default=func.now(), server_default=func.now() ) updated_at = Column( DateTime, nullable=False, default=func.now(), server_default=func.now() ) def to_dict(self): return { "derived_id": self.derived_id, "name": self.name, "package_manager_id": self.package_manager_id, "import_id": self.import_id, "readme": self.readme, } class PackageManager(Base): __tablename__ = "package_managers" id = Column( UUID(as_uuid=True), primary_key=True, default=func.uuid_generate_v4(), server_default=func.uuid_generate_v4(), ) source_id = Column( UUID(as_uuid=True), ForeignKey("sources.id"), nullable=False, unique=True ) created_at = Column( DateTime, nullable=False, default=func.now(), server_default=func.now() ) updated_at = Column( DateTime, nullable=False, default=func.now(), server_default=func.now() ) class Version(Base): __tablename__ = "versions" __table_args__ = ( UniqueConstraint("package_id", "version", name="uq_package_version"), ) id = Column( UUID(as_uuid=True), primary_key=True, default=func.uuid_generate_v4(), server_default=func.uuid_generate_v4(), ) package_id = Column( UUID(as_uuid=True), ForeignKey("packages.id"), nullable=False, index=True ) version = Column(String, nullable=False, index=True) import_id = Column(String, nullable=False, index=True) # size, published_at, license_id, downloads, checksum # are nullable bc not all sources provide them size = Column(Integer, nullable=True, index=True) published_at = Column(DateTime, nullable=True, index=True) license_id = Column( UUID(as_uuid=True), ForeignKey("licenses.id"), nullable=True, index=True ) downloads = Column(Integer, nullable=True, index=True) checksum = Column(String, nullable=True) created_at = Column( DateTime, nullable=False, default=func.now(), server_default=func.now() ) updated_at = Column( DateTime, nullable=False, default=func.now(), server_default=func.now() ) package: Mapped[Package] = relationship() license: Mapped[License] = relationship() def to_dict(self): return { "package_id": self.package_id, "version": self.version, "import_id": self.import_id, "size": self.size, "published_at": self.published_at, "license_id": self.license_id, "downloads": self.downloads, "checksum": self.checksum, } class License(Base): __tablename__ = "licenses" id = Column( UUID(as_uuid=True), primary_key=True, default=func.uuid_generate_v4(), server_default=func.uuid_generate_v4(), ) name = Column(String, nullable=False, unique=True, index=True) created_at = Column( DateTime, nullable=False, default=func.now(), server_default=func.now() ) updated_at = Column( DateTime, nullable=False, default=func.now(), server_default=func.now() ) class DependsOn(Base): __tablename__ = "dependencies" __table_args__ = ( UniqueConstraint( "version_id", "dependency_id", "dependency_type_id", name="uq_version_dependency_type", ), ) id = Column( UUID(as_uuid=True), primary_key=True, default=func.uuid_generate_v4(), server_default=func.uuid_generate_v4(), ) version_id = Column( UUID(as_uuid=True), ForeignKey("versions.id"), nullable=False, index=True ) dependency_id = Column( UUID(as_uuid=True), ForeignKey("packages.id"), nullable=False, index=True ) # ideally, these are non-nullable but diff package managers are picky about this dependency_type_id = Column( UUID(as_uuid=True), ForeignKey("depends_on_types.id"), nullable=True, index=True ) semver_range = Column(String, nullable=True) created_at = Column( DateTime, nullable=False, default=func.now(), server_default=func.now() ) updated_at = Column( DateTime, nullable=False, default=func.now(), server_default=func.now() ) version: Mapped[Version] = relationship() dependency: Mapped[Package] = relationship() dependency_type: Mapped[DependsOnType] = relationship() def to_dict(self): return { "version_id": self.version_id, "dependency_id": self.dependency_id, # "dependency_type_id": self.dependency_type_id, "semver_range": self.semver_range, } class DependsOnType(Base): __tablename__ = "depends_on_types" id = Column( UUID(as_uuid=True), primary_key=True, default=func.uuid_generate_v4(), server_default=func.uuid_generate_v4(), ) name = Column(String, nullable=False, unique=True, index=True) created_at = Column( DateTime, nullable=False, default=func.now(), server_default=func.now() ) updated_at = Column( DateTime, nullable=False, default=func.now(), server_default=func.now() ) class LoadHistory(Base): __tablename__ = "load_history" id = Column( UUID(as_uuid=True), primary_key=True, default=func.uuid_generate_v4(), server_default=func.uuid_generate_v4(), ) package_manager_id = Column( UUID(as_uuid=True), ForeignKey("package_managers.id"), nullable=False ) created_at = Column( DateTime, nullable=False, default=func.now(), server_default=func.now() ) updated_at = Column( DateTime, nullable=False, default=func.now(), server_default=func.now() ) # authoritative source of truth for all our sources class Source(Base): __tablename__ = "sources" id = Column( UUID(as_uuid=True), primary_key=True, default=func.uuid_generate_v4(), server_default=func.uuid_generate_v4(), ) type = Column(String, nullable=False, unique=True) created_at = Column( DateTime, nullable=False, default=func.now(), server_default=func.now() ) updated_at = Column( DateTime, nullable=False, default=func.now(), server_default=func.now() ) # this is a collection of all the different type of URLs class URL(Base): __tablename__ = "urls" __table_args__ = (UniqueConstraint("url_type_id", "url", name="uq_url_type_url"),) id = Column( UUID(as_uuid=True), primary_key=True, default=func.uuid_generate_v4(), server_default=func.uuid_generate_v4(), ) url_trgm_idx = Index( "ix_urls_url_trgm", "url", postgresql_using="gin", postgresql_ops={"url": "gin_trgm_ops"}, ) url = Column(String, nullable=False) url_type_id = Column( UUID(as_uuid=True), ForeignKey("url_types.id"), nullable=False, index=True ) created_at = Column( DateTime, nullable=False, default=func.now(), server_default=func.now() ) updated_at = Column( DateTime, nullable=False, default=func.now(), server_default=func.now() ) def to_dict(self): return {"url": self.url, "url_type_id": self.url_type_id} # homepage, repository, documentation, etc. class URLType(Base): __tablename__ = "url_types" id = Column( UUID(as_uuid=True), primary_key=True, default=func.uuid_generate_v4(), server_default=func.uuid_generate_v4(), ) name = Column(String, nullable=False, unique=True) created_at = Column( DateTime, nullable=False, default=func.now(), server_default=func.now() ) updated_at = Column( DateTime, nullable=False, default=func.now(), server_default=func.now() ) class User(Base): __tablename__ = "users" __table_args__ = ( UniqueConstraint("source_id", "import_id", name="uq_source_import_id"), ) id = Column( UUID(as_uuid=True), primary_key=True, default=func.uuid_generate_v4(), server_default=func.uuid_generate_v4(), ) username = Column(String, nullable=False, index=True) source_id = Column( UUID(as_uuid=True), ForeignKey("sources.id"), nullable=False, index=True ) import_id = Column(String, nullable=False, unique=False, index=True) created_at = Column( DateTime, nullable=False, default=func.now(), server_default=func.now() ) updated_at = Column( DateTime, nullable=False, default=func.now(), server_default=func.now() ) def to_dict(self): return { "username": self.username, "source_id": self.source_id, "import_id": self.import_id, } class UserVersion(Base): __tablename__ = "user_versions" __table_args__ = ( UniqueConstraint("user_id", "version_id", name="uq_user_version"), ) id = Column( UUID(as_uuid=True), primary_key=True, default=func.uuid_generate_v4(), server_default=func.uuid_generate_v4(), ) user_id = Column( UUID(as_uuid=True), ForeignKey("users.id"), nullable=False, index=True ) version_id = Column( UUID(as_uuid=True), ForeignKey("versions.id"), nullable=False, index=True ) created_at = Column( DateTime, nullable=False, default=func.now(), server_default=func.now() ) updated_at = Column( DateTime, nullable=False, default=func.now(), server_default=func.now() ) def to_dict(self): return { "user_id": self.user_id, "version_id": self.version_id, } class UserPackage(Base): __tablename__ = "user_packages" __table_args__ = ( UniqueConstraint("user_id", "package_id", name="uq_user_package"), ) id = Column( UUID(as_uuid=True), primary_key=True, default=func.uuid_generate_v4(), server_default=func.uuid_generate_v4(), ) user_id = Column( UUID(as_uuid=True), ForeignKey("users.id"), nullable=False, index=True ) package_id = Column( UUID(as_uuid=True), ForeignKey("packages.id"), nullable=False, index=True ) created_at = Column( DateTime, nullable=False, default=func.now(), server_default=func.now() ) updated_at = Column( DateTime, nullable=False, default=func.now(), server_default=func.now() ) def to_dict(self): return { "user_id": self.user_id, "package_id": self.package_id, } class PackageURL(Base): __tablename__ = "package_urls" __table_args__ = (UniqueConstraint("package_id", "url_id", name="uq_package_url"),) id = Column( UUID(as_uuid=True), primary_key=True, default=func.uuid_generate_v4(), server_default=func.uuid_generate_v4(), ) package_id = Column( UUID(as_uuid=True), ForeignKey("packages.id"), nullable=False, index=True ) url_id = Column( UUID(as_uuid=True), ForeignKey("urls.id"), nullable=False, index=True ) created_at = Column( DateTime, nullable=False, default=func.now(), server_default=func.now() ) updated_at = Column( DateTime, nullable=False, default=func.now(), server_default=func.now() ) # TODO: deprecated def to_dict(self): return { "package_id": self.package_id, "url_id": self.url_id, } class LegacyDependency(Base): __tablename__ = "legacy_dependencies" __table_args__ = ( UniqueConstraint("package_id", "dependency_id", name="uq_package_dependency"), ) id = Column(Integer, primary_key=True) package_id = Column( UUID(as_uuid=True), ForeignKey("packages.id"), nullable=False, index=True ) dependency_id = Column( UUID(as_uuid=True), ForeignKey("packages.id"), nullable=False, index=True ) dependency_type_id = Column( UUID(as_uuid=True), ForeignKey("depends_on_types.id"), nullable=False, index=True, ) semver_range = Column(String, nullable=True) created_at = Column( DateTime, nullable=False, default=func.now(), server_default=func.now() ) updated_at = Column( DateTime, nullable=False, default=func.now(), server_default=func.now() ) class Canon(Base): __tablename__ = "canons" id = Column( UUID(as_uuid=True), primary_key=True, default=func.uuid_generate_v4(), server_default=func.uuid_generate_v4(), ) url_id = Column( UUID(as_uuid=True), ForeignKey("urls.id"), nullable=False, index=True, unique=True, ) name_trgm_idx = Index( "ix_canons_name_trgm", "name", postgresql_using="gin", postgresql_ops={"name": "gin_trgm_ops"}, ) name = Column(String, nullable=False) created_at = Column( DateTime, nullable=False, default=func.now(), server_default=func.now() ) updated_at = Column( DateTime, nullable=False, default=func.now(), server_default=func.now() ) url: Mapped[URL] = relationship() class CanonPackage(Base): __tablename__ = "canon_packages" id = Column(UUID(as_uuid=True), primary_key=True) canon_id = Column( UUID(as_uuid=True), ForeignKey("canons.id"), nullable=False, index=True ) package_id = Column( UUID(as_uuid=True), ForeignKey("packages.id"), nullable=False, index=True, unique=True, ) created_at = Column( DateTime, nullable=False, default=func.now(), server_default=func.now() ) updated_at = Column( DateTime, nullable=False, default=func.now(), server_default=func.now() ) class TeaRankRun(Base): __tablename__ = "tea_rank_runs" id = Column( UUID(as_uuid=True), primary_key=True, default=func.uuid_generate_v4(), server_default=func.uuid_generate_v4(), ) run = Column(Integer, nullable=False) split_ratio = Column(String, nullable=False) created_at = Column( DateTime, nullable=False, default=func.now(), server_default=func.now() ) class TeaRank(Base): __tablename__ = "tea_ranks" id = Column( UUID(as_uuid=True), primary_key=True, default=func.uuid_generate_v4(), server_default=func.uuid_generate_v4(), ) tea_rank_run = Column(Integer, nullable=False, index=True) canon_id = Column( UUID(as_uuid=True), ForeignKey("canons.id"), nullable=False, index=True ) rank = Column(String, nullable=False) created_at = Column( DateTime, nullable=False, default=func.now(), server_default=func.now() ) ================================================ FILE: core/requirements.txt ================================================ # This file was autogenerated by uv via the following command: # uv pip compile --group indexers -o core/requirements.txt alembic==1.13.2 # via chai (pyproject.toml:indexers) certifi==2025.4.26 # via # chai (pyproject.toml:indexers) # requests charset-normalizer==3.4.2 # via # chai (pyproject.toml:indexers) # requests filelock==3.18.0 # via tldextract gitdb==4.0.12 # via gitpython gitpython==3.1.44 # via chai (pyproject.toml:indexers) idna==3.10 # via # chai (pyproject.toml:indexers) # requests # tldextract mako==1.3.10 # via alembic markupsafe==3.0.2 # via mako permalint==0.1.15 # via chai (pyproject.toml:indexers) psycopg2-binary==2.9.10 # via chai (pyproject.toml:indexers) pyyaml==6.0.2 # via chai (pyproject.toml:indexers) requests==2.32.4 # via # chai (pyproject.toml:indexers) # requests-file # tldextract requests-file==2.1.0 # via tldextract ruff==0.11.13 # via permalint schedule==1.2.2 # via chai (pyproject.toml:indexers) smmap==5.0.2 # via gitdb sqlalchemy==2.0.41 # via # chai (pyproject.toml:indexers) # alembic tldextract==5.3.0 # via permalint typing-extensions==4.14.0 # via # alembic # sqlalchemy urllib3==2.4.0 # via # chai (pyproject.toml:indexers) # requests ================================================ FILE: core/scheduler.py ================================================ import time from collections.abc import Callable from os import getenv from threading import Thread import schedule from core.logger import Logger FREQUENCY = int(getenv("FREQUENCY", 24)) class Scheduler: def __init__(self, name: str, frequency: int = FREQUENCY): self.name = name self.frequency = frequency self.logger = Logger(f"{name}_scheduler") self.job = None self.is_running = False def start(self, task: Callable, *args): self.job = schedule.every(self.frequency).hours.do(task, *args) self.is_running = True self.logger.log(f"scheduled {self.name} to run every {self.frequency} hours") def run_schedule(): while self.is_running: schedule.run_pending() time.sleep(1) Thread(target=run_schedule, daemon=True).start() def stop(self): if self.job: schedule.cancel_job(self.job) self.is_running = False self.logger.log(f"stopped {self.name} scheduler") def run_now(self, task: Callable, *args): self.logger.log(f"running {self.name} now") task(*args) ================================================ FILE: core/structs.py ================================================ from dataclasses import dataclass from datetime import datetime from uuid import UUID from core.models import URL, LegacyDependency, Package, PackageURL @dataclass class CurrentGraph: package_map: dict[str, Package] dependencies: dict[UUID, set[LegacyDependency]] @dataclass(frozen=True) class URLKey: url: str url_type_id: UUID @dataclass class CurrentURLs: url_map: dict[URLKey, URL] # URL and URL Type ID to URL object package_urls: dict[UUID, set[PackageURL]] # Package ID to PackageURL rows @dataclass class Cache: package_map: dict[str, Package] url_map: dict[URLKey, URL] package_urls: dict[UUID, set[PackageURL]] dependencies: dict[UUID, set[LegacyDependency]] @dataclass class DiffResult: new_packages: list[Package] new_urls: dict[URLKey, URL] new_package_urls: list[PackageURL] updated_packages: list[dict[str, UUID | str | datetime]] updated_package_urls: list[dict[str, UUID | datetime]] new_deps: list[LegacyDependency] removed_deps: list[LegacyDependency] ================================================ FILE: core/test.json ================================================ [ { 'id': UUID('b3133e5e-6d6b-458b-bd83-bf31032875a4'), 'package_id': UUID('7d6c7a3f-2c75-425f-8674-12efd7ce1ca4'), 'url_id': UUID('736acfdc-c3c2-4b53-ae6e-102fdd4f375a'), 'created_at': datetime.datetime(2025, 5, 19, 17, 5, 10, 255947), 'updated_at': datetime.datetime(2025, 5, 19, 17, 5, 10, 255947) }, {'id': UUID('a274bb96-a443-46a7-86ed-71c6ee87a89b'), 'package_id': UUID('506f5abc-f385-4cbf-9fb1-cd34053397f4'), 'url_id': UUID('d0346cef-80b0-456c-8de3-eb1b95481bac'), 'created_at': datetime.datetime(2025, 5, 19, 17, 5, 10, 255947), 'updated_at': datetime.datetime(2025, 5, 19, 17, 5, 10, 255947)}, {'id': UUID('17fe8d3a-78d8-42f5-a9f6-7b7abaa37d53'), 'package_id': UUID('a08b41eb-723d-4800-929d-cb6c6d3aeca4'), 'url_id': UUID('334ec74b-dda3-4bb0-99c5-f39abc132f5a'), 'created_at': datetime.datetime(2025, 5, 19, 17, 5, 10, 255947), 'updated_at': datetime.datetime(2025, 5, 19, 17, 5, 10, 255947)}, {'id': UUID('5dd47edf-bc5d-43b5-9acd-d099ae9a22f0'), 'package_id': UUID('624c333c-e303-4d9b-a66e-c499bb3b4806'), 'url_id': UUID('6d866142-e2a9-4da0-96de-b5bfadc7cee9'), 'created_at': datetime.datetime(2025, 5, 19, 17, 5, 10, 255947), 'updated_at': datetime.datetime(2025, 5, 19, 17, 5, 10, 255947)}, {'id': UUID('c924c668-c6cb-4b6b-bac2-b588377a695d'), 'package_id': UUID('2d182e7a-1960-4376-8272-5ce401c369fd'), 'url_id': UUID('359015d5-8807-4cdc-b1c8-a4771b0069fe'), 'created_at': datetime.datetime(2025, 5, 19, 17, 5, 10, 255947), 'updated_at': datetime.datetime(2025, 5, 19, 17, 5, 10, 255947)}] ================================================ FILE: core/transformer.py ================================================ import csv import os from permalint import normalize_url, possible_names from sqlalchemy import UUID from core.db import DB from core.logger import Logger # this is a temporary fix, but sometimes the raw files have weird characters # and lots of data within certain fields # this fix allows us to read the files with no hassles csv.field_size_limit(10000000) # the transformer class knows what files to open, and provide a generic wrapper # for the data within the files # each package manager will have its own transformer, that knows what data needs to be # extracted for our data model class Transformer: def __init__(self, name: str): self.name = name self.input = f"data/{name}/latest" self.logger = Logger(f"{name}_transformer") self.files: dict[str, str] = { "projects": "", "versions": "", "dependencies": "", "users": "", "urls": "", } self.url_types: dict[str, UUID] = {} def finder(self, file_name: str) -> str: input_dir = os.path.realpath(self.input) for root, _, files in os.walk(input_dir): if file_name in files: return os.path.join(root, file_name) else: self.logger.error(f"{file_name} not found in {input_dir}") raise FileNotFoundError(f"Missing {file_name} file") def open(self, file_name: str) -> str: file_path = self.finder(file_name) with open(file_path) as file: return file.read() def canonicalize(self, url: str) -> str: return normalize_url(url) def guess(self, db_client: DB, url: str, package_managers: list[UUID]) -> list[str]: names = possible_names(url) urls = db_client.search_names(names, package_managers) return urls ================================================ FILE: core/utils.py ================================================ from os import getenv from os.path import exists, join from typing import Any def safe_int(val: str) -> int | None: if val == "": return None return int(val) # TODO: needs explanation or simplification def build_query_params( items: list[dict[str, str]], cache: dict, attr: str ) -> list[str]: params = set() for item in items: if item[attr] not in cache: params.add(item[attr]) return list(params) # env vars could be true or 1, or anything else -- here's a centralized location to # handle that def env_vars(env_var: str, default: str) -> bool: var = getenv(env_var, default).lower() return var == "true" or var == "1" # convert keys to snake case def convert_keys_to_snake_case(data: dict[str, Any]) -> dict[str, Any]: """Recursively converts dictionary keys from hyphen-case to snake_case.""" if isinstance(data, dict): new_dict = {} for key, value in data.items(): new_key = key.replace("-", "_") new_dict[new_key] = convert_keys_to_snake_case(value) # handle nested return new_dict elif isinstance(data, list): return [convert_keys_to_snake_case(item) for item in data] else: return data def is_github_url(url: str) -> bool: """Assumes the url has been canonicalized by permalint""" return url.startswith("github.com/") def file_exists(*args) -> str: """Confirms if a file exists""" file_path = join(*args) if not exists(file_path): raise FileNotFoundError(f"{file_path} not found") return file_path ================================================ FILE: db/README.md ================================================ # CHAI Data Model The CHAI data model is designed to represent the package manager data in a unified and consistent form. The model's goal is _standardization_ - of the various complexities, and idiosyncrasies of each individual package manager. We want to provide a standard way for analysis, querying, and whatever your use case might be. ## Definitions We use certain nomenclature throughout the codebase: - `derived_id`: A unique identifier combining the package manager and package name. Like `crates/serde`, or `homebrew/a2ps`, or `npm/lodash`. - `import_id`: The original identifier from the source system. Like the `crate_id` integers provided by crates, or the package name provided by Homebrew # Core Entities ## Packages The Package model is a fundamental unit in our system. Each package is uniquely identified and associated with a specific package manager. Key fields: - `derived_id` - `name` - `package_manager_id`: Reference to the associated package manager. - `import_id`: The original identifier from the source system. - `readme`: Optional field for package documentation. ### Versions Each version is a different release of a package, and **must** be associated with a package. Key fields: - `package_id`: Reference to the associated package. - `version`: The version string. - `import_id`: The original identifier from the source system. - `size`, `published_at`, `license_id`, `downloads`, `checksum`: Optional metadata fields. ### Users The User model represents individuals or entities associated with packages. This is not necessarily always available, but if it is, it's interesting data. Key fields: - `username`: The user's name or identifier. - `source_id`: Reference to the data source (e.g., GitHub, npm user, crates user, etc). - `import_id`: The original identifier from the source system. ### URLs The URL model is populated with all the URLs that are provided by the package manager source data - this includes documentation, repository, source, issues, and other url types as well. Each URL is associated with a URL type. The relationships between a URL and a Package are captured in the PackageURL model. Key fields: - `url`: The URL. - `url_type_id`: Reference to the type of URL. (e.g., homepage, repository, etc) ## Type Models These models define categorizations and types used across the system. All these values are loaded from the alembic service, specifically in the [load-values.sql](../alembic/load-values.sql) script. ### URLType Represents different types of URLs associated with packages. Predefined types (from load-values.sql): - `source` - `homepage` - `documentation` - `repository` ### DependsOnType Categorizes different types of dependencies between packages. Predefined types (from load-values.sql): - `build` - `development` - `runtime` - `test` - `optional` - `recommended` - `uses_from_macos` (Homebrew only) ### Source Represents the authoritative sources of package data. - `crates` - `homebrew` The below are not yet supported: - `npm` - `pypi` - `rubygems` - `github` ## Relationship Models These models establish connections between core entities. ### DependsOn In our data model, a specific release depends on a specific package. We include a field `semver_range`, which would represent the range of dependency releases compatible with that specific release. > [!NOTE] > Not all package managers provide semantic versions. Homebrew does not, for example. > This is why `semver_range` is optional. > > On the other hand, the dependency type is non-optional, and the combination of > `version_id`, `dependency_id`, and `dependency_type_id` must be unique. Key fields: - `version_id`: The version that has the dependency. - `dependency_id`: The package that is depended upon. - `dependency_type_id`: The type of dependency. - `semver_range`: The version range for the dependency (optional). ### UserVersion and UserPackage These models associate users with specific versions and packages, respectively. ### PackageURL Associates packages with their various URLs. ## Caveats ### `Source` and `PackageManager` Relationship We've chosen to separate `Source` and `PackageManager` into distinct entities: - `Source`: Represents data sources that can provide information about packages, users, or both. - `PackageManager`: Specifically represents sources that are package managers. For example, 'crates' functions both as a package manager and as a source of user data. By keeping these concepts separate, we can accurately represent such systems, and have one point where we can modify any information about 'crates'. ## Additional Models ### License Represents software licenses associated with package versions. Great place to start contributions! ### LoadHistory Tracks the history of data loads for each package manager, useful for auditing and incremental updates. ================================================ FILE: db/queries.md ================================================ # Chai Data Exploration ```sql -- Packages with the longest lifetime SELECT p.name, SUM(v.downloads) AS "downloads", count(v.package_id) AS versions, min(v.published_at) AS "first published", max(v.published_at) AS "last published", max(v.published_at) - min(v.published_at) AS lifetime FROM packages AS p JOIN versions v ON v.package_id = p.id GROUP BY p.name ORDER BY lifetime DESC limit 100; -- Packages sorted by dependents SELECT p.name, count(d.id) AS dependents FROM packages AS p JOIN dependencies AS d ON d.dependency_id = p.id GROUP BY p.name ORDER BY count(d.id) DESC LIMIT 100; -- Packages sorted by dependents with lifetime SELECT p.name, count(d.id) AS dependents, min(v.published_at) AS "first published", max(v.published_at) AS "last published", max(v.published_at) - min(v.published_at) AS lifetime FROM packages AS p JOIN dependencies AS d ON d.dependency_id = p.id JOIN versions v ON v.package_id = p.id GROUP BY p.name ORDER BY count(d.id) DESC LIMIT 100; -- Packages sorted by dependents with downloads SELECT p.name, count(d.id) AS dependents, sum(v.downloads) AS downloads FROM packages AS p JOIN dependencies AS d ON d.dependency_id = p.id JOIN versions v ON v.package_id = p.id GROUP BY p.name ORDER BY count(d.id) DESC LIMIT 100; -- Packages with most dependents sorted by download/dependent ratio SELECT name, dependents, downloads, (downloads / dependents) AS ratio FROM (SELECT p.name, count(d.id) AS dependents, sum(v.downloads) AS downloads FROM packages AS p JOIN dependencies AS d ON d.dependency_id = p.id JOIN versions v ON v.package_id = p.id GROUP BY p.name ORDER BY count(d.id) DESC LIMIT 1000) ORDER BY ratio DESC; ``` ================================================ FILE: docker-compose.yml ================================================ services: db: image: postgres shm_size: 256m environment: - POSTGRES_USER=postgres - POSTGRES_PASSWORD=s3cr3t ports: - "5435:5432" volumes: - ./data/db/data:/var/lib/postgresql healthcheck: test: ["CMD-SHELL", "pg_isready -U postgres"] interval: 5s timeout: 5s retries: 5 alembic: build: context: . dockerfile: ./alembic/Dockerfile environment: - CHAI_DATABASE_URL=${CHAI_DATABASE_URL:-postgresql://postgres:s3cr3t@db:5432/chai} - CHAI_DATABASE_ADMIN_URL=${CHAI_DATABASE_ADMIN_URL:-postgresql://postgres:s3cr3t@db:5432/postgres} - PGPASSWORD=${PGPASSWORD:-s3cr3t} depends_on: db: condition: service_healthy working_dir: /alembic entrypoint: ["./run_migrations.sh"] crates: build: context: . dockerfile: ./package_managers/crates/Dockerfile environment: - CHAI_DATABASE_URL=${CHAI_DATABASE_URL:-postgresql://postgres:s3cr3t@db:5432/chai} - NO_CACHE=${NO_CACHE:-true} - PYTHONPATH=/ - DEBUG=${DEBUG:-false} - TEST=${TEST:-false} - FETCH=${FETCH:-true} - FREQUENCY=${FREQUENCY:-24} - ENABLE_SCHEDULER=${ENABLE_SCHEDULER:-true} volumes: - ./data/crates:/data/crates depends_on: db: condition: service_healthy alembic: condition: service_completed_successfully homebrew: build: context: . dockerfile: ./package_managers/homebrew/Dockerfile environment: - CHAI_DATABASE_URL=${CHAI_DATABASE_URL:-postgresql://postgres:s3cr3t@db:5432/chai} - NO_CACHE=${NO_CACHE:-false} - DEBUG=${DEBUG:-false} - TEST=${TEST:-false} - FETCH=${FETCH:-true} - FREQUENCY=${FREQUENCY:-1} - ENABLE_SCHEDULER=${ENABLE_SCHEDULER:-true} - PYTHONPATH=/ volumes: - ./data/homebrew:/data/homebrew depends_on: db: condition: service_healthy alembic: condition: service_completed_successfully api: build: context: ./api dockerfile: Dockerfile environment: - DATABASE_URL=postgresql://postgres:s3cr3t@db:5432/chai - HOST=0.0.0.0 - PORT=8080 ports: - "8080:8080" depends_on: db: condition: service_healthy alembic: condition: service_completed_successfully restart: unless-stopped healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8080/heartbeat"] interval: 30s timeout: 10s retries: 3 start_period: 5s debian: build: context: . dockerfile: ./package_managers/debian/Dockerfile environment: - CHAI_DATABASE_URL=${CHAI_DATABASE_URL:-postgresql://postgres:s3cr3t@db:5432/chai} - NO_CACHE=${NO_CACHE:-false} - PYTHONPATH=/ - DEBUG=${DEBUG:-false} - TEST=${TEST:-false} - FETCH=${FETCH:-true} - FREQUENCY=${FREQUENCY:-24} - ENABLE_SCHEDULER=${ENABLE_SCHEDULER:-true} volumes: - ./data/debian:/data/debian depends_on: db: condition: service_healthy alembic: condition: service_completed_successfully pkgx: build: context: . dockerfile: ./package_managers/pkgx/Dockerfile environment: - CHAI_DATABASE_URL=${CHAI_DATABASE_URL:-postgresql://postgres:s3cr3t@db:5432/chai} - NO_CACHE=${NO_CACHE:-false} - PYTHONPATH=/ - DEBUG=${DEBUG:-false} - TEST=${TEST:-false} - FETCH=${FETCH:-true} - FREQUENCY=${FREQUENCY:-24} - ENABLE_SCHEDULER=${ENABLE_SCHEDULER:-true} volumes: - ./data/pkgx:/data/pkgx depends_on: db: condition: service_healthy alembic: condition: service_completed_successfully ranker: build: context: . dockerfile: ./ranker/Dockerfile environment: - CHAI_DATABASE_URL=${CHAI_DATABASE_URL:-postgresql://postgres:s3cr3t@db:5432/chai} - PYTHONPATH=/ - LOAD=${LOAD:-false} - DEBUG=${DEBUG:-false} depends_on: db: condition: service_healthy alembic: condition: service_completed_successfully deploy: resources: limits: memory: 8G ================================================ FILE: examples/sbom-meta/README.md ================================================ # SBOM-Meta An example Chai application that displays package metadata for [SBOMs](https://github.com/anchore/syft) (software bill of materials). ## Installation 1. Start the [Chai DB](https://github.com/teaxyz/chai-oss) with `docker compose up`. 2. Run `go install` or `go build` to generate a binary. ## Usage Run `sbom-meta` in the root directory of any repository to get a list of dependencies with metadata. ```bash git clone git@github.com:starship/starship.git cd starship sbom-meta ``` You can sort any of the fields, ascending or descending: ```bash sbom-meta --sort downloads,desc sbom-meta --sort published,asc ``` Use the `--json` flag to output JSON: ```bash sbom-meta --json | jq .[1].name ``` ================================================ FILE: examples/sbom-meta/go.mod ================================================ module sbom-meta go 1.23.2 require ( github.com/anchore/syft v1.14.0 github.com/caarlos0/env v3.5.0+incompatible github.com/dustin/go-humanize v1.0.1 github.com/fatih/color v1.17.1-0.20241003070628-1c8d8706604e github.com/jedib0t/go-pretty/v6 v6.6.0 github.com/jmoiron/sqlx v1.4.0 github.com/lib/pq v1.10.9 ) require ( dario.cat/mergo v1.0.1 // indirect github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 // indirect github.com/AdamKorcz/go-118-fuzz-build v0.0.0-20230306123547-8075edf89bb0 // indirect github.com/BurntSushi/toml v1.4.0 // indirect github.com/CycloneDX/cyclonedx-go v0.9.1 // indirect github.com/DataDog/zstd v1.5.5 // indirect github.com/Masterminds/goutils v1.1.1 // indirect github.com/Masterminds/semver v1.5.0 // indirect github.com/Masterminds/semver/v3 v3.3.0 // indirect github.com/Masterminds/sprig/v3 v3.3.0 // indirect github.com/Microsoft/go-winio v0.6.2 // indirect github.com/Microsoft/hcsshim v0.11.7 // indirect github.com/ProtonMail/go-crypto v1.1.6 // indirect github.com/acobaugh/osrelease v0.1.0 // indirect github.com/adrg/xdg v0.5.0 // indirect github.com/anchore/clio v0.0.0-20240522144804-d81e109008aa // indirect github.com/anchore/fangs v0.0.0-20240903175602-e716ef12c23d // indirect github.com/anchore/go-collections v0.0.0-20240216171411-9321230ce537 // indirect github.com/anchore/go-logger v0.0.0-20230725134548-c21dafa1ec5a // indirect github.com/anchore/go-macholibre v0.0.0-20220308212642-53e6d0aaf6fb // indirect github.com/anchore/go-struct-converter v0.0.0-20221118182256-c68fdcfa2092 // indirect github.com/anchore/go-version v1.2.2-0.20200701162849-18adb9c92b9b // indirect github.com/anchore/packageurl-go v0.1.1-0.20240507183024-848e011fc24f // indirect github.com/anchore/stereoscope v0.0.4-0.20241005180410-efa76446cc1c // indirect github.com/andybalholm/brotli v1.0.4 // indirect github.com/aquasecurity/go-pep440-version v0.0.0-20210121094942-22b2f8951d46 // indirect github.com/aquasecurity/go-version v0.0.0-20210121072130-637058cfe492 // indirect github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect github.com/becheran/wildmatch-go v1.0.0 // indirect github.com/bmatcuk/doublestar/v4 v4.6.1 // indirect github.com/charmbracelet/lipgloss v0.13.0 // indirect github.com/charmbracelet/x/ansi v0.2.3 // indirect github.com/cloudflare/circl v1.6.1 // indirect github.com/containerd/cgroups v1.1.0 // indirect github.com/containerd/containerd v1.7.28 // indirect github.com/containerd/containerd/api v1.8.0 // indirect github.com/containerd/continuity v0.4.4 // indirect github.com/containerd/errdefs v0.3.0 // indirect github.com/containerd/fifo v1.1.0 // indirect github.com/containerd/log v0.1.0 // indirect github.com/containerd/platforms v0.2.1 // indirect github.com/containerd/stargz-snapshotter/estargz v0.14.3 // indirect github.com/containerd/ttrpc v1.2.7 // indirect github.com/containerd/typeurl/v2 v2.1.1 // indirect github.com/cyphar/filepath-securejoin v0.4.1 // indirect github.com/deitch/magic v0.0.0-20230404182410-1ff89d7342da // indirect github.com/distribution/reference v0.6.0 // indirect github.com/docker/cli v27.1.1+incompatible // indirect github.com/docker/distribution v2.8.3+incompatible // indirect github.com/docker/docker v27.3.1+incompatible // indirect github.com/docker/docker-credential-helpers v0.7.0 // indirect github.com/docker/go-connections v0.4.0 // indirect github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c // indirect github.com/docker/go-units v0.5.0 // indirect github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect github.com/edsrzf/mmap-go v1.1.0 // indirect github.com/elliotchance/phpserialize v1.4.0 // indirect github.com/emirpasic/gods v1.18.1 // indirect github.com/facebookincubator/nvdtools v0.1.5 // indirect github.com/felixge/fgprof v0.9.3 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/fsnotify/fsnotify v1.7.0 // indirect github.com/gabriel-vasile/mimetype v1.4.6 // indirect github.com/github/go-spdx/v2 v2.3.2 // indirect github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 // indirect github.com/go-git/go-billy/v5 v5.6.2 // indirect github.com/go-git/go-git/v5 v5.16.2 // indirect github.com/go-logr/logr v1.4.2 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-restruct/restruct v1.2.0-alpha // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect github.com/golang/protobuf v1.5.4 // indirect github.com/golang/snappy v0.0.4 // indirect github.com/google/go-cmp v0.7.0 // indirect github.com/google/go-containerregistry v0.20.2 // indirect github.com/google/licensecheck v0.3.1 // indirect github.com/google/pprof v0.0.0-20240409012703-83162a5b38cd // indirect github.com/google/uuid v1.6.0 // indirect github.com/gookit/color v1.5.4 // indirect github.com/hashicorp/errwrap v1.1.0 // indirect github.com/hashicorp/go-multierror v1.1.1 // indirect github.com/hashicorp/hcl v1.0.0 // indirect github.com/huandu/xstrings v1.5.0 // indirect github.com/iancoleman/strcase v0.3.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect github.com/jinzhu/copier v0.4.0 // indirect github.com/kastenhq/goversion v0.0.0-20230811215019-93b2f8823953 // indirect github.com/kevinburke/ssh_config v1.2.0 // indirect github.com/klauspost/compress v1.17.8 // indirect github.com/klauspost/pgzip v1.2.5 // indirect github.com/knqyf263/go-rpmdb v0.1.1 // indirect github.com/lucasb-eyer/go-colorful v1.2.0 // indirect github.com/magiconair/properties v1.8.7 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.20 // indirect github.com/mattn/go-runewidth v0.0.16 // indirect github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d // indirect github.com/mholt/archiver/v3 v3.5.1 // indirect github.com/microsoft/go-rustaudit v0.0.0-20220730194248-4b17361d90a5 // indirect github.com/mitchellh/copystructure v1.2.0 // indirect github.com/mitchellh/go-homedir v1.1.0 // indirect github.com/mitchellh/hashstructure/v2 v2.0.2 // indirect github.com/mitchellh/mapstructure v1.5.0 // indirect github.com/mitchellh/reflectwalk v1.0.2 // indirect github.com/moby/docker-image-spec v1.3.1 // indirect github.com/moby/locker v1.0.1 // indirect github.com/moby/sys/mountinfo v0.7.2 // indirect github.com/moby/sys/sequential v0.5.0 // indirect github.com/moby/sys/signal v0.7.0 // indirect github.com/moby/sys/user v0.3.0 // indirect github.com/moby/sys/userns v0.1.0 // indirect github.com/muesli/termenv v0.15.2 // indirect github.com/nwaples/rardecode v1.1.0 // indirect github.com/olekukonko/tablewriter v0.0.5 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/image-spec v1.1.0 // indirect github.com/opencontainers/runtime-spec v1.1.0 // indirect github.com/opencontainers/selinux v1.11.0 // indirect github.com/pborman/indent v1.2.1 // indirect github.com/pelletier/go-toml v1.9.5 // indirect github.com/pelletier/go-toml/v2 v2.2.2 // indirect github.com/pierrec/lz4/v4 v4.1.19 // indirect github.com/pjbgf/sha1cd v0.3.2 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pkg/profile v1.7.0 // indirect github.com/rivo/uniseg v0.4.7 // indirect github.com/saferwall/pe v1.5.4 // indirect github.com/sagikazarmark/locafero v0.4.0 // indirect github.com/sagikazarmark/slog-shim v0.1.0 // indirect github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d // indirect github.com/sassoftware/go-rpmutils v0.4.0 // indirect github.com/scylladb/go-set v1.0.3-0.20200225121959-cc7b2070d91e // indirect github.com/secDre4mer/pkcs7 v0.0.0-20240322103146-665324a4461d // indirect github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 // indirect github.com/shopspring/decimal v1.4.0 // indirect github.com/sirupsen/logrus v1.9.3 // indirect github.com/skeema/knownhosts v1.3.1 // indirect github.com/sourcegraph/conc v0.3.0 // indirect github.com/spdx/tools-golang v0.5.5 // indirect github.com/spf13/afero v1.11.0 // indirect github.com/spf13/cast v1.7.0 // indirect github.com/spf13/cobra v1.8.1 // indirect github.com/spf13/pflag v1.0.5 // indirect github.com/spf13/viper v1.19.0 // indirect github.com/subosito/gotenv v1.6.0 // indirect github.com/sylabs/sif/v2 v2.17.1 // indirect github.com/sylabs/squashfs v1.0.0 // indirect github.com/therootcompany/xz v1.0.1 // indirect github.com/ulikunitz/xz v0.5.12 // indirect github.com/vbatts/go-mtree v0.5.4 // indirect github.com/vbatts/tar-split v0.11.3 // indirect github.com/vifraa/gopom v1.0.0 // indirect github.com/wagoodman/go-partybus v0.0.0-20230516145632-8ccac152c651 // indirect github.com/wagoodman/go-progress v0.0.0-20230925121702-07e42b3cdba0 // indirect github.com/xanzy/ssh-agent v0.3.3 // indirect github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect go.opencensus.io v0.24.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 // indirect go.opentelemetry.io/otel v1.24.0 // indirect go.opentelemetry.io/otel/metric v1.24.0 // indirect go.opentelemetry.io/otel/trace v1.24.0 // indirect go.uber.org/atomic v1.9.0 // indirect go.uber.org/multierr v1.9.0 // indirect golang.org/x/crypto v0.40.0 // indirect golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect golang.org/x/mod v0.26.0 // indirect golang.org/x/net v0.42.0 // indirect golang.org/x/sync v0.16.0 // indirect golang.org/x/sys v0.34.0 // indirect golang.org/x/term v0.33.0 // indirect golang.org/x/text v0.27.0 // indirect golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect google.golang.org/genproto v0.0.0-20240213162025-012b6fc9bca9 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240401170217-c3f982113cda // indirect google.golang.org/grpc v1.62.1 // indirect google.golang.org/protobuf v1.35.2 // indirect gopkg.in/ini.v1 v1.67.0 // indirect gopkg.in/warnings.v0 v0.1.2 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) ================================================ FILE: examples/sbom-meta/go.sum ================================================ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU= cloud.google.com/go v0.44.1/go.mod h1:iSa0KzasP4Uvy3f1mN/7PiObzGgflwredwwASm/v6AU= cloud.google.com/go v0.44.2/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY= cloud.google.com/go v0.45.1/go.mod h1:RpBamKRgapWJb87xiFSdk4g1CME7QZg3uwTez+TSTjc= cloud.google.com/go v0.46.3/go.mod h1:a6bKKbmY7er1mI7TEI4lsAkts/mkhTSZK8w33B4RAg0= cloud.google.com/go v0.50.0/go.mod h1:r9sluTvynVuxRIOHXQEHMFffphuXHOMZMycpNR5e6To= cloud.google.com/go v0.52.0/go.mod h1:pXajvRH/6o3+F9jDHZWQ5PbGhn+o8w9qiu/CffaVdO4= cloud.google.com/go v0.53.0/go.mod h1:fp/UouUEsRkN6ryDKNW/Upv/JBKnv6WDthjR6+vze6M= cloud.google.com/go v0.54.0/go.mod h1:1rq2OEkV3YMf6n/9ZvGWI3GWw0VoqH/1x2nd8Is/bPc= cloud.google.com/go v0.56.0/go.mod h1:jr7tqZxxKOVYizybht9+26Z/gUq7tiRzu+ACVAMbKVk= cloud.google.com/go v0.57.0/go.mod h1:oXiQ6Rzq3RAkkY7N6t3TcE6jE+CIBBbA36lwQ1JyzZs= cloud.google.com/go v0.62.0/go.mod h1:jmCYTdRCQuc1PHIIJ/maLInMho30T/Y0M4hTdTShOYc= cloud.google.com/go v0.65.0/go.mod h1:O5N8zS7uWy9vkA9vayVHs65eM1ubvY4h553ofrNHObY= cloud.google.com/go v0.72.0/go.mod h1:M+5Vjvlc2wnp6tjzE102Dw08nGShTscUx2nZMufOKPI= cloud.google.com/go v0.74.0/go.mod h1:VV1xSbzvo+9QJOxLDaJfTjx5e+MePCpCWwvftOeQmWk= cloud.google.com/go v0.78.0/go.mod h1:QjdrLG0uq+YwhjoVOLsS1t7TW8fs36kLs4XO5R5ECHg= cloud.google.com/go v0.79.0/go.mod h1:3bzgcEeQlzbuEAYu4mrWhKqWjmpprinYgKJLgKHnbb8= cloud.google.com/go v0.81.0/go.mod h1:mk/AM35KwGk/Nm2YSeZbxXdrNK3KZOYHmLkOqC2V6E0= cloud.google.com/go v0.83.0/go.mod h1:Z7MJUsANfY0pYPdw0lbnivPx4/vhy/e2FEkSkF7vAVY= cloud.google.com/go v0.84.0/go.mod h1:RazrYuxIK6Kb7YrzzhPoLmCVzl7Sup4NrbKPg8KHSUM= cloud.google.com/go v0.87.0/go.mod h1:TpDYlFy7vuLzZMMZ+B6iRiELaY7z/gJPaqbMx6mlWcY= cloud.google.com/go v0.90.0/go.mod h1:kRX0mNRHe0e2rC6oNakvwQqzyDmg57xJ+SZU1eT2aDQ= cloud.google.com/go v0.93.3/go.mod h1:8utlLll2EF5XMAV15woO4lSbWQlk8rer9aLOfLh7+YI= cloud.google.com/go v0.94.1/go.mod h1:qAlAugsXlC+JWO+Bke5vCtc9ONxjQT3drlTTnAplMW4= cloud.google.com/go v0.97.0/go.mod h1:GF7l59pYBVlXQIBLx3a761cZ41F9bBH3JUlihCt2Udc= cloud.google.com/go v0.98.0/go.mod h1:ua6Ush4NALrHk5QXDWnjvZHN93OuF0HfuEPq9I1X0cM= cloud.google.com/go v0.99.0/go.mod h1:w0Xx2nLzqWJPuozYQX+hFfCSI8WioryfRDzkoI/Y2ZA= cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE= cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc= cloud.google.com/go/bigquery v1.5.0/go.mod h1:snEHRnqQbz117VIFhE8bmtwIDY80NLUZUMb4Nv6dBIg= cloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4gLoIoXIAPc= cloud.google.com/go/bigquery v1.8.0/go.mod h1:J5hqkt3O0uAFnINi6JXValWIb1v0goeZM77hZzJN/fQ= cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk= cloud.google.com/go/firestore v1.6.1/go.mod h1:asNXNOzBdyVQmEU+ggO8UPodTkEVFW5Qx+rwHnAz+EY= cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I= cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw= cloud.google.com/go/pubsub v1.2.0/go.mod h1:jhfEVHT8odbXTkndysNHCcx0awwzvfOlguIAii9o8iA= cloud.google.com/go/pubsub v1.3.1/go.mod h1:i+ucay31+CNRpDW4Lu78I4xXG+O1r/MAHgjpRVR+TSU= cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw= cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0ZeosJ0Rtdos= cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk= cloud.google.com/go/storage v1.8.0/go.mod h1:Wv1Oy7z6Yz3DshWRJFhqM/UCfaWIRTdp0RXyy7KQOVs= cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9ullr3+Kg0= dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s= dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk= dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA= filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4= github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 h1:bvDV9vkmnHYOMsOr4WLk+Vo07yKIzd94sVoIqshQ4bU= github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24/go.mod h1:8o94RPi1/7XTJvwPpRSzSUedZrtlirdB3r9Z20bi2f8= github.com/AdamKorcz/go-118-fuzz-build v0.0.0-20230306123547-8075edf89bb0 h1:59MxjQVfjXsBpLy+dbd2/ELV5ofnUkUZBvWSC85sheA= github.com/AdamKorcz/go-118-fuzz-build v0.0.0-20230306123547-8075edf89bb0/go.mod h1:OahwfttHWG6eJ0clwcfBAHoDI6X/LV/15hx/wlMZSrU= github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0= github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/toml v0.4.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= github.com/BurntSushi/toml v1.2.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= github.com/BurntSushi/toml v1.4.0 h1:kuoIxZQy2WRRk1pttg9asf+WVv6tWQuBNVmK8+nqPr0= github.com/BurntSushi/toml v1.4.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/CycloneDX/cyclonedx-go v0.9.1 h1:yffaWOZsv77oTJa/SdVZYdgAgFioCeycBUKkqS2qzQM= github.com/CycloneDX/cyclonedx-go v0.9.1/go.mod h1:NE/EWvzELOFlG6+ljX/QeMlVt9VKcTwu8u0ccsACEsw= github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ= github.com/DataDog/zstd v1.5.5 h1:oWf5W7GtOLgp6bciQYDmhHHjdhYkALu6S/5Ni9ZgSvQ= github.com/DataDog/zstd v1.5.5/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwSAmyw= github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI= github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU= github.com/Masterminds/semver v1.5.0 h1:H65muMkzWKEuNDnfl9d70GUjFniHKHRbFPGBuZ3QEww= github.com/Masterminds/semver v1.5.0/go.mod h1:MB6lktGJrhw8PrUyiEoblNEGEQ+RzHPF078ddwwvV3Y= github.com/Masterminds/semver/v3 v3.3.0 h1:B8LGeaivUe71a5qox1ICM/JLl0NqZSW5CHyL+hmvYS0= github.com/Masterminds/semver/v3 v3.3.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= github.com/Masterminds/sprig/v3 v3.3.0 h1:mQh0Yrg1XPo6vjYXgtf5OtijNAKJRNcTdOOGZe3tPhs= github.com/Masterminds/sprig/v3 v3.3.0/go.mod h1:Zy1iXRYNqNLUolqCpL4uhk6SHUMAOSCzdgBfDb35Lz0= github.com/Microsoft/go-winio v0.5.2/go.mod h1:WpS1mjBmmwHBEWmogvA2mj8546UReBk4v8QkMxJ6pZY= github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= github.com/Microsoft/hcsshim v0.11.7 h1:vl/nj3Bar/CvJSYo7gIQPyRWc9f3c6IeSNavBTSZNZQ= github.com/Microsoft/hcsshim v0.11.7/go.mod h1:MV8xMfmECjl5HdO7U/3/hFVnkmSBjAjmA09d4bExKcU= github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= github.com/OneOfOne/xxhash v1.2.8 h1:31czK/TI9sNkxIKfaUfGlU47BAxQ0ztGgd9vPyqimf8= github.com/OneOfOne/xxhash v1.2.8/go.mod h1:eZbhyaAYD41SGSSsnmcpxVoRiQ/MPUTjUdIIOT9Um7Q= github.com/ProtonMail/go-crypto v1.1.6 h1:ZcV+Ropw6Qn0AX9brlQLAUXfqLBc7Bl+f/DmNxpLfdw= github.com/ProtonMail/go-crypto v1.1.6/go.mod h1:rA3QumHc/FZ8pAHreoekgiAbzpNsfQAosU5td4SnOrE= github.com/acobaugh/osrelease v0.1.0 h1:Yb59HQDGGNhCj4suHaFQQfBps5wyoKLSSX/J/+UifRE= github.com/acobaugh/osrelease v0.1.0/go.mod h1:4bFEs0MtgHNHBrmHCt67gNisnabCRAlzdVasCEGHTWY= github.com/adrg/xdg v0.5.0 h1:dDaZvhMXatArP1NPHhnfaQUqWBLBsmx1h1HXQdMoFCY= github.com/adrg/xdg v0.5.0/go.mod h1:dDdY4M4DF9Rjy4kHPeNL+ilVF+p2lK8IdM9/rTSGcI4= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/anchore/clio v0.0.0-20240522144804-d81e109008aa h1:pwlAn4O9SBUnlgfa69YcqIynbUyobLVFYu8HxSoCffA= github.com/anchore/clio v0.0.0-20240522144804-d81e109008aa/go.mod h1:nD3H5uIvjxlfmakOBgtyFQbk5Zjp3l538kxfpHPslzI= github.com/anchore/fangs v0.0.0-20240903175602-e716ef12c23d h1:ZD4wdCBgJJzJybjTUIEiiupLF7B9H3WLuBTjspBO2Mc= github.com/anchore/fangs v0.0.0-20240903175602-e716ef12c23d/go.mod h1:Xh4ObY3fmoMzOEVXwDtS1uK44JC7+nRD0n29/1KYFYg= github.com/anchore/go-collections v0.0.0-20240216171411-9321230ce537 h1:GjNGuwK5jWjJMyVppBjYS54eOiiSNv4Ba869k4wh72Q= github.com/anchore/go-collections v0.0.0-20240216171411-9321230ce537/go.mod h1:1aiktV46ATCkuVg0O573ZrH56BUawTECPETbZyBcqT8= github.com/anchore/go-logger v0.0.0-20230725134548-c21dafa1ec5a h1:nJ2G8zWKASyVClGVgG7sfM5mwoZlZ2zYpIzN2OhjWkw= github.com/anchore/go-logger v0.0.0-20230725134548-c21dafa1ec5a/go.mod h1:ubLFmlsv8/DFUQrZwY5syT5/8Er3ugSr4rDFwHsE3hg= github.com/anchore/go-macholibre v0.0.0-20220308212642-53e6d0aaf6fb h1:iDMnx6LIjtjZ46C0akqveX83WFzhpTD3eqOthawb5vU= github.com/anchore/go-macholibre v0.0.0-20220308212642-53e6d0aaf6fb/go.mod h1:DmTY2Mfcv38hsHbG78xMiTDdxFtkHpgYNVDPsF2TgHk= github.com/anchore/go-struct-converter v0.0.0-20221118182256-c68fdcfa2092 h1:aM1rlcoLz8y5B2r4tTLMiVTrMtpfY0O8EScKJxaSaEc= github.com/anchore/go-struct-converter v0.0.0-20221118182256-c68fdcfa2092/go.mod h1:rYqSE9HbjzpHTI74vwPvae4ZVYZd1lue2ta6xHPdblA= github.com/anchore/go-testutils v0.0.0-20200925183923-d5f45b0d3c04 h1:VzprUTpc0vW0nnNKJfJieyH/TZ9UYAnTZs5/gHTdAe8= github.com/anchore/go-testutils v0.0.0-20200925183923-d5f45b0d3c04/go.mod h1:6dK64g27Qi1qGQZ67gFmBFvEHScy0/C8qhQhNe5B5pQ= github.com/anchore/go-version v1.2.2-0.20200701162849-18adb9c92b9b h1:e1bmaoJfZVsCYMrIZBpFxwV26CbsuoEh5muXD5I1Ods= github.com/anchore/go-version v1.2.2-0.20200701162849-18adb9c92b9b/go.mod h1:Bkc+JYWjMCF8OyZ340IMSIi2Ebf3uwByOk6ho4wne1E= github.com/anchore/packageurl-go v0.1.1-0.20240507183024-848e011fc24f h1:B/E9ixKNCasntpoch61NDaQyGPDXLEJlL+B9B/PbdbA= github.com/anchore/packageurl-go v0.1.1-0.20240507183024-848e011fc24f/go.mod h1:Blo6OgJNiYF41ufcgHKkbCKF2MDOMlrqhXv/ij6ocR4= github.com/anchore/stereoscope v0.0.4-0.20241005180410-efa76446cc1c h1:JXezMk8fF5ns4AgRGW49SGfoRgDjJHsDmcpNw272jkU= github.com/anchore/stereoscope v0.0.4-0.20241005180410-efa76446cc1c/go.mod h1:GMupz2FoBhy5RTTmawU06c2pZxgVTceahLWiwJef2uI= github.com/anchore/syft v1.14.0 h1:BeMmc3a9d/63O+nPM8QfV1Olh3r+pYf95JOqbfN4gQg= github.com/anchore/syft v1.14.0/go.mod h1:8bN2W/Tr4Mmm42h2XB9LPiPOps+NzCFIaQOKLBGb2b8= github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883/go.mod h1:rCTlJbsFo29Kk6CurOXKm700vrz8f0KW0JNfpkRJY/8= github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y= github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY= github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be h1:9AeTilPcZAjCFIImctFaOjnTIavg87rW78vTPkQqLI8= github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be/go.mod h1:ySMOLuWl6zY27l47sB3qLNK6tF2fkHG55UZxx8oIVo4= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= github.com/aquasecurity/go-pep440-version v0.0.0-20210121094942-22b2f8951d46 h1:vmXNl+HDfqqXgr0uY1UgK1GAhps8nbAAtqHNBcgyf+4= github.com/aquasecurity/go-pep440-version v0.0.0-20210121094942-22b2f8951d46/go.mod h1:olhPNdiiAAMiSujemd1O/sc6GcyePr23f/6uGKtthNg= github.com/aquasecurity/go-version v0.0.0-20210121072130-637058cfe492 h1:rcEG5HI490FF0a7zuvxOxen52ddygCfNVjP0XOCMl+M= github.com/aquasecurity/go-version v0.0.0-20210121072130-637058cfe492/go.mod h1:9Beu8XsUNNfzml7WBf3QmyPToP1wm1Gj/Vc5UJKqTzU= github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o= github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY= github.com/armon/go-metrics v0.3.10/go.mod h1:4O98XIr/9W0sxpJ8UaYkvjk10Iff7SnFrb4QAOwNTFc= github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= github.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4= github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI= github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k= github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8= github.com/becheran/wildmatch-go v1.0.0 h1:mE3dGGkTmpKtT4Z+88t8RStG40yN9T+kFEGj2PZFSzA= github.com/becheran/wildmatch-go v1.0.0/go.mod h1:gbMvj0NtVdJ15Mg/mH9uxk2R1QCistMyU7d9KFzroX4= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= github.com/bmatcuk/doublestar/v4 v4.6.1 h1:FH9SifrbvJhnlQpztAx++wlkk70QBf0iBWDwNy7PA4I= github.com/bmatcuk/doublestar/v4 v4.6.1/go.mod h1:xBQ8jztBU6kakFMg+8WGxn0c6z1fTSPVIjEY1Wr7jzc= github.com/bradleyjkemp/cupaloy/v2 v2.8.0 h1:any4BmKE+jGIaMpnU8YgH/I2LPiLBufr6oMMlVBbn9M= github.com/bradleyjkemp/cupaloy/v2 v2.8.0/go.mod h1:bm7JXdkRd4BHJk9HpwqAI8BoAY1lps46Enkdqw6aRX0= github.com/caarlos0/env v3.5.0+incompatible h1:Yy0UN8o9Wtr/jGHZDpCBLpNrzcFLLM2yixi/rBrKyJs= github.com/caarlos0/env v3.5.0+incompatible/go.mod h1:tdCsowwCzMLdkqRYDlHpZCp2UooDD3MspDBjZ2AD02Y= github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM= github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/census-instrumentation/opencensus-proto v0.3.0/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/charmbracelet/bubbles v0.20.0 h1:jSZu6qD8cRQ6k9OMfR1WlM+ruM8fkPWkHvQWD9LIutE= github.com/charmbracelet/bubbles v0.20.0/go.mod h1:39slydyswPy+uVOHZ5x/GjwVAFkCsV8IIVy+4MhzwwU= github.com/charmbracelet/bubbletea v1.1.1 h1:KJ2/DnmpfqFtDNVTvYZ6zpPFL9iRCRr0qqKOCvppbPY= github.com/charmbracelet/bubbletea v1.1.1/go.mod h1:9Ogk0HrdbHolIKHdjfFpyXJmiCzGwy+FesYkZr7hYU4= github.com/charmbracelet/lipgloss v0.13.0 h1:4X3PPeoWEDCMvzDvGmTajSyYPcZM4+y8sCA/SsA3cjw= github.com/charmbracelet/lipgloss v0.13.0/go.mod h1:nw4zy0SBX/F/eAO1cWdcvy6qnkDUxr8Lw7dvFrAIbbY= github.com/charmbracelet/x/ansi v0.2.3 h1:VfFN0NUpcjBRd4DnKfRaIRo53KRgey/nhOoEqosGDEY= github.com/charmbracelet/x/ansi v0.2.3/go.mod h1:dk73KoMTT5AX5BsX0KrqhsTqAnhZZoCBjs7dGWp4Ktw= github.com/charmbracelet/x/term v0.2.0 h1:cNB9Ot9q8I711MyZ7myUR5HFWL/lc3OpU8jZ4hwm0x0= github.com/charmbracelet/x/term v0.2.0/go.mod h1:GVxgxAbjUrmpvIINHIQnJJKpMlHiZ4cktEQCN6GWyF0= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag= github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cloudflare/circl v1.6.1 h1:zqIqSPIndyBh1bjLVVDHMPpVKqp8Su/V+6MeDzzQBQ0= github.com/cloudflare/circl v1.6.1/go.mod h1:uddAzsPgqdMAYatqJ0lsjX1oECcQLIlRpzZh3pJrofs= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/cncf/udpa/go v0.0.0-20200629203442-efcf912fb354/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= github.com/cncf/udpa/go v0.0.0-20210930031921-04548b0d99d4/go.mod h1:6pvJx4me5XPnfI9Z40ddWsdw2W/uZgQLFXToKeRcDiI= github.com/cncf/xds/go v0.0.0-20210312221358-fbca930ec8ed/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20210805033703-aa0b78936158/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20210922020428-25de7278fc84/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20211001041855-01bcc9b48dfe/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20211011173535-cb28da3451f1/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20211130200136-a8f946100490/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/containerd/cgroups v1.1.0 h1:v8rEWFl6EoqHB+swVNjVoCJE8o3jX7e8nqBGPLaDFBM= github.com/containerd/cgroups v1.1.0/go.mod h1:6ppBcbh/NOOUU+dMKrykgaBnK9lCIBxHqJDGwsa1mIw= github.com/containerd/containerd v1.7.28 h1:Nsgm1AtcmEh4AHAJ4gGlNSaKgXiNccU270Dnf81FQ3c= github.com/containerd/containerd v1.7.28/go.mod h1:azUkWcOvHrWvaiUjSQH0fjzuHIwSPg1WL5PshGP4Szs= github.com/containerd/containerd/api v1.8.0 h1:hVTNJKR8fMc/2Tiw60ZRijntNMd1U+JVMyTRdsD2bS0= github.com/containerd/containerd/api v1.8.0/go.mod h1:dFv4lt6S20wTu/hMcP4350RL87qPWLVa/OHOwmmdnYc= github.com/containerd/continuity v0.4.4 h1:/fNVfTJ7wIl/YPMHjf+5H32uFhl63JucB34PlCpMKII= github.com/containerd/continuity v0.4.4/go.mod h1:/lNJvtJKUQStBzpVQ1+rasXO1LAWtUQssk28EZvJ3nE= github.com/containerd/errdefs v0.3.0 h1:FSZgGOeK4yuT/+DnF07/Olde/q4KBoMsaamhXxIMDp4= github.com/containerd/errdefs v0.3.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M= github.com/containerd/fifo v1.1.0 h1:4I2mbh5stb1u6ycIABlBw9zgtlK8viPI9QkQNRQEEmY= github.com/containerd/fifo v1.1.0/go.mod h1:bmC4NWMbXlt2EZ0Hc7Fx7QzTFxgPID13eH0Qu+MAb2o= github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I= github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo= github.com/containerd/platforms v0.2.1 h1:zvwtM3rz2YHPQsF2CHYM8+KtB5dvhISiXh5ZpSBQv6A= github.com/containerd/platforms v0.2.1/go.mod h1:XHCb+2/hzowdiut9rkudds9bE5yJ7npe7dG/wG+uFPw= github.com/containerd/stargz-snapshotter/estargz v0.14.3 h1:OqlDCK3ZVUO6C3B/5FSkDwbkEETK84kQgEeFwDC+62k= github.com/containerd/stargz-snapshotter/estargz v0.14.3/go.mod h1:KY//uOCIkSuNAHhJogcZtrNHdKrA99/FCCRjE3HD36o= github.com/containerd/ttrpc v1.2.7 h1:qIrroQvuOL9HQ1X6KHe2ohc7p+HP/0VE6XPU7elJRqQ= github.com/containerd/ttrpc v1.2.7/go.mod h1:YCXHsb32f+Sq5/72xHubdiJRQY9inL4a4ZQrAbN1q9o= github.com/containerd/typeurl/v2 v2.1.1 h1:3Q4Pt7i8nYwy2KmQWIw2+1hTvwTE/6w9FqcttATPO/4= github.com/containerd/typeurl/v2 v2.1.1/go.mod h1:IDp2JFvbwZ31H8dQbEIY7sDl2L3o3HZj1hsSQlywkQ0= github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/cpuguy83/go-md2man/v2 v2.0.1/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/cyphar/filepath-securejoin v0.4.1 h1:JyxxyPEaktOD+GAnqIqTf9A8tHyAG22rowi7HkoSU1s= github.com/cyphar/filepath-securejoin v0.4.1/go.mod h1:Sdj7gXlvMcPZsbhwhQ33GguGLDGQL7h7bg04C/+u9jI= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/deitch/magic v0.0.0-20230404182410-1ff89d7342da h1:ZOjWpVsFZ06eIhnh4mkaceTiVoktdU67+M7KDHJ268M= github.com/deitch/magic v0.0.0-20230404182410-1ff89d7342da/go.mod h1:B3tI9iGHi4imdLi4Asdha1Sc6feLMTfPLXh9IUYmysk= github.com/dgrijalva/jwt-go/v4 v4.0.0-preview1/go.mod h1:+hnT3ywWDTAFrW5aE+u2Sa/wT555ZqwoCS+pk3p6ry4= github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk= github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E= github.com/docker/cli v27.1.1+incompatible h1:goaZxOqs4QKxznZjjBWKONQci/MywhtRv2oNn0GkeZE= github.com/docker/cli v27.1.1+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8= github.com/docker/distribution v2.8.3+incompatible h1:AtKxIZ36LoNK51+Z6RpzLpddBirtxJnzDrHLEKxTAYk= github.com/docker/distribution v2.8.3+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w= github.com/docker/docker v27.3.1+incompatible h1:KttF0XoteNTicmUtBO0L2tP+J7FGRFTjaEF4k6WdhfI= github.com/docker/docker v27.3.1+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= github.com/docker/docker-credential-helpers v0.7.0 h1:xtCHsjxogADNZcdv1pKUHXryefjlVRqWqIhk/uXJp0A= github.com/docker/docker-credential-helpers v0.7.0/go.mod h1:rETQfLdHNT3foU5kuNkFR1R1V12OJRRO5lzt2D1b5X0= github.com/docker/go-connections v0.4.0 h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKohAFqRJQ= github.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec= github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c h1:+pKlWGMw7gf6bQ+oDZB4KHQFypsfjYlq/C4rfL7D3g8= github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c/go.mod h1:Uw6UezgYA44ePAFQYUehOuCzmy5zmg/+nl2ZfMWGkpA= github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 h1:iFaUwBSo5Svw6L7HYpRu/0lE3e0BaElwnNO1qkNQxBY= github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s= github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/edsrzf/mmap-go v1.1.0 h1:6EUwBLQ/Mcr1EYLE4Tn1VdW1A4ckqCQWZBw8Hr0kjpQ= github.com/edsrzf/mmap-go v1.1.0/go.mod h1:19H/e8pUPLicwkyNgOykDXkJ9F0MHE+Z52B8EIth78Q= github.com/elazarl/goproxy v1.7.2 h1:Y2o6urb7Eule09PjlhQRGNsqRfPmYI3KKQLFpCAV3+o= github.com/elazarl/goproxy v1.7.2/go.mod h1:82vkLNir0ALaW14Rc399OTTjyNREgmdL2cVoIbS6XaE= github.com/elliotchance/phpserialize v1.4.0 h1:cAp/9+KSnEbUC8oYCE32n2n84BeW8HOY3HMDI8hG2OY= github.com/elliotchance/phpserialize v1.4.0/go.mod h1:gt7XX9+ETUcLXbtTKEuyrqW3lcLUAeS/AnGZ2e49TZs= github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc= github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= github.com/envoyproxy/go-control-plane v0.9.7/go.mod h1:cwu0lG7PUMfa9snN8LXBig5ynNVH9qI8YYLbd1fK2po= github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= github.com/envoyproxy/go-control-plane v0.9.9-0.20210217033140-668b12f5399d/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= github.com/envoyproxy/go-control-plane v0.9.9-0.20210512163311-63b5d3c536b0/go.mod h1:hliV/p42l8fGbc6Y9bQ70uLwIvmJyVE5k4iMKlh8wCQ= github.com/envoyproxy/go-control-plane v0.9.10-0.20210907150352-cf90f659a021/go.mod h1:AFq3mo9L8Lqqiid3OhADV3RfLJnjiw63cSpi+fDTRC0= github.com/envoyproxy/go-control-plane v0.10.1/go.mod h1:AY7fTTXNdv/aJ2O5jwpxAPOWUZ7hQAEvzN5Pf27BkQQ= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/envoyproxy/protoc-gen-validate v0.6.2/go.mod h1:2t7qjJNvHPx8IjnBOzl9E9/baC+qXE/TeeyBRzgJDws= github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4= github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f/go.mod h1:vw97MGsxSvLiUE2X8qFplwetxpGLQrlU1Q9AUEIzCaM= github.com/facebookincubator/flog v0.0.0-20190930132826-d2511d0ce33c/go.mod h1:QGzNH9ujQ2ZUr/CjDGZGWeDAVStrWNjHeEcjJL96Nuk= github.com/facebookincubator/nvdtools v0.1.5 h1:jbmDT1nd6+k+rlvKhnkgMokrCAzHoASWE5LtHbX2qFQ= github.com/facebookincubator/nvdtools v0.1.5/go.mod h1:Kh55SAWnjckS96TBSrXI99KrEKH4iB0OJby3N8GRJO4= github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU= github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk= github.com/fatih/color v1.17.1-0.20241003070628-1c8d8706604e h1:43jO1Ogdyp9HrUaSFfg1v8fsKxciHMlmK7lAUCHa0SE= github.com/fatih/color v1.17.1-0.20241003070628-1c8d8706604e/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU= github.com/fatih/set v0.2.1 h1:nn2CaJyknWE/6txyUDGwysr3G5QC6xWB/PtVjPBbeaA= github.com/fatih/set v0.2.1/go.mod h1:+RKtMCH+favT2+3YecHGxcc0b4KyVWA1QWWJUs4E0CI= github.com/felixge/fgprof v0.9.3 h1:VvyZxILNuCiUCSXtPtYmmtGvb65nqXh2QFWc0Wpf2/g= github.com/felixge/fgprof v0.9.3/go.mod h1:RdbpDgzqYVh/T9fPELJyV7EYJuHB55UTEULNun8eiPw= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= github.com/fsnotify/fsnotify v1.5.1/go.mod h1:T3375wBYaZdLLcVNkcVbzGHY7f1l/uK5T5Ai1i3InKU= github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= github.com/gabriel-vasile/mimetype v1.4.6 h1:3+PzJTKLkvgjeTbts6msPJt4DixhT4YtFNf1gtGe3zc= github.com/gabriel-vasile/mimetype v1.4.6/go.mod h1:JX1qVKqZd40hUPpAfiNTe0Sne7hdfKSbOqqmkq8GCXc= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/github/go-spdx/v2 v2.3.2 h1:IfdyNHTqzs4zAJjXdVQfRnxt1XMfycXoHBE2Vsm1bjs= github.com/github/go-spdx/v2 v2.3.2/go.mod h1:2ZxKsOhvBp+OYBDlsGnUMcchLeo2mrpEBn2L1C+U3IQ= github.com/glebarez/go-sqlite v1.20.3 h1:89BkqGOXR9oRmG58ZrzgoY/Fhy5x0M+/WV48U5zVrZ4= github.com/glebarez/go-sqlite v1.20.3/go.mod h1:u3N6D/wftiAzIOJtZl6BmedqxmmkDfH3q+ihjqxC9u0= github.com/gliderlabs/ssh v0.3.8 h1:a4YXD1V7xMF9g5nTkdfnja3Sxy1PVDCj1Zg4Wb8vY6c= github.com/gliderlabs/ssh v0.3.8/go.mod h1:xYoytBv1sV0aL3CavoDuJIQNURXkkfPA/wxQ1pL1fAU= github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 h1:+zs/tPmkDkHx3U66DAb0lQFJrpS6731Oaa12ikc+DiI= github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376/go.mod h1:an3vInlBmSxCcxctByoQdvwPiA7DTK7jaaFDBTtu0ic= github.com/go-git/go-billy/v5 v5.6.2 h1:6Q86EsPXMa7c3YZ3aLAQsMA0VlWmy43r6FHqa/UNbRM= github.com/go-git/go-billy/v5 v5.6.2/go.mod h1:rcFC2rAsp/erv7CMz9GczHcuD0D32fWzH+MJAU+jaUU= github.com/go-git/go-git-fixtures/v4 v4.3.2-0.20231010084843-55a94097c399 h1:eMje31YglSBqCdIqdhKBW8lokaMrL3uTkpGYlE2OOT4= github.com/go-git/go-git-fixtures/v4 v4.3.2-0.20231010084843-55a94097c399/go.mod h1:1OCfN199q1Jm3HZlxleg+Dw/mwps2Wbk9frAWm+4FII= github.com/go-git/go-git/v5 v5.16.2 h1:fT6ZIOjE5iEnkzKyxTHK1W4HGAsPhqEqiSAssSO77hM= github.com/go-git/go-git/v5 v5.16.2/go.mod h1:4Ge4alE/5gPs30F2H1esi2gPd69R0C39lolkucHBOp8= github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-restruct/restruct v1.2.0-alpha h1:2Lp474S/9660+SJjpVxoKuWX09JsXHSrdV7Nv3/gkvc= github.com/go-restruct/restruct v1.2.0-alpha/go.mod h1:KqrpKpn4M8OLznErihXTGLlsXFGeLxHUrLRRI/1YjGk= github.com/go-sql-driver/mysql v1.6.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= github.com/go-sql-driver/mysql v1.8.1 h1:LedoTUt/eveggdHS9qUFC1EFSa8bU2+1pZjSRpvNJ1Y= github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/go-test/deep v1.1.1 h1:0r/53hagsehfO4bzD2Pgr/+RgHqhmf+k1Bpse2cTu1U= github.com/go-test/deep v1.1.1/go.mod h1:5C2ZWiW0ErCdrYzpqxLbTX7MG14M9iiw8DgHncVwcsE= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 h1:f+oWsMOmNPc8JmEHVZIycC7hBoQxHH9pNKQORJNozsQ= github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8/go.mod h1:wcDNUvekVysuuOpQKo3191zZyTpiI6se1N1ULghS0sw= github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y= github.com/golang/mock v1.4.0/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= github.com/golang/mock v1.4.1/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= github.com/golang/mock v1.4.4/go.mod h1:l3mdAwkq5BuhzHwde/uurv3sEJeZMXNpwsxVWU71h+4= github.com/golang/mock v1.5.0/go.mod h1:CWnOUgYIOo4TcNZ0wHX3YZCqsaM1I1Jvs6v3mP3KVu8= github.com/golang/mock v1.6.0/go.mod h1:p6yTPP+5HYm5mzsMV8JkE6ZKdX+/wYM6Hr+LicevLPs= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= github.com/golang/protobuf v1.3.4/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk= github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= github.com/golang/protobuf v1.5.1/go.mod h1:DopwsBzvsk0Fs44TXzsVbJyPhcCPeIwnvohx4u74HPM= github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/golang/snappy v0.0.2/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.4.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/go-containerregistry v0.20.2 h1:B1wPJ1SN/S7pB+ZAimcciVD+r+yV/l/DSArMxlbwseo= github.com/google/go-containerregistry v0.20.2/go.mod h1:z38EKdKh4h7IP2gSfUUqEvalZBqs6AoLeWfUy34nQC8= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/licensecheck v0.3.1 h1:QoxgoDkaeC4nFrtGN1jV7IPmDCHFNIVh54e5hSt6sPs= github.com/google/licensecheck v0.3.1/go.mod h1:ORkR35t/JjW+emNKtfJDII0zlciG9JgbT7SmsohlHmY= github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0= github.com/google/martian/v3 v3.1.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0= github.com/google/martian/v3 v3.2.1/go.mod h1:oBOf6HBosgwRXnUGWUB05QECsc6uvmMiJ3+6W4l/CUk= github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= github.com/google/pprof v0.0.0-20191218002539-d4f498aebedc/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/pprof v0.0.0-20200229191704-1ebb73c60ed3/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/pprof v0.0.0-20201023163331-3e6fc7fc9c4c/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20201203190320-1bf35d6f28c2/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20210122040257-d980be63207e/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20210226084205-cbba55b83ad5/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20210601050228-01bbb1931b22/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20210609004039-a478d1d731e9/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20211214055906-6f57359322fd/go.mod h1:KgnwoLYCZ8IQu3XUZ8Nc/bM9CCZFOyjUNOSygVozoDg= github.com/google/pprof v0.0.0-20240409012703-83162a5b38cd h1:gbpYu9NMq8jhDVbvlGkMFWCjLFlqqEZjEmObmhUy6Vo= github.com/google/pprof v0.0.0-20240409012703-83162a5b38cd/go.mod h1:kf6iHlnVGwgKolg33glAes7Yg/8iWP8ukqeldJSO7jw= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= github.com/googleapis/gax-go/v2 v2.1.0/go.mod h1:Q3nei7sK6ybPYH7twZdmQpAd1MKb7pfu6SK+H1/DsU0= github.com/googleapis/gax-go/v2 v2.1.1/go.mod h1:hddJymUZASv3XPyGkUpKj8pPO47Rmb0eJc8R6ouapiM= github.com/gookit/color v1.2.5/go.mod h1:AhIE+pS6D4Ql0SQWbBeXPHw7gY0/sjHoA4s/n1KB7xg= github.com/gookit/color v1.5.4 h1:FZmqs7XOyGgCAxmWyPslpiok1k05wmY3SJTytgvYFs0= github.com/gookit/color v1.5.4/go.mod h1:pZJOeOS8DM43rXbp4AZo1n9zCU2qjpcRko0b6/QJi9w= github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rHAxPBD8KFhJpmcqms= github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0/go.mod h1:YN5jB8ie0yfIUg6VvR9Kz84aCaG7AsGZnLjhHbUqwPg= github.com/hashicorp/consul/api v1.11.0/go.mod h1:XjsvQN+RJGWI2TWy1/kqaE16HrR2J/FWgkYjdZQsX9M= github.com/hashicorp/consul/sdk v0.8.0/go.mod h1:GBvyrGALthsZObzUGsfgHZQDXjg4lOjagTIwIR1vPms= github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I= github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48= github.com/hashicorp/go-hclog v0.12.0/go.mod h1:whpDNt7SSdeAju8AWKIWsul05p54N/39EeqMAyrmvFQ= github.com/hashicorp/go-hclog v1.0.0/go.mod h1:whpDNt7SSdeAju8AWKIWsul05p54N/39EeqMAyrmvFQ= github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= github.com/hashicorp/go-immutable-radix v1.3.1/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM= github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk= github.com/hashicorp/go-multierror v1.1.0/go.mod h1:spPvp8C1qA32ftKqdAHm4hHTbPw+vmowP0z+KUhOZdA= github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo= github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= github.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs= github.com/hashicorp/go-rootcerts v1.0.2/go.mod h1:pqUvnprVnM5bf7AOirdbb01K4ccR319Vf4pU3K5EGc8= github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU= github.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4= github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v0.5.4 h1:YDjusn29QI/Das2iO9M0BHnIbxPeyuCHsjMW+lJfyTc= github.com/hashicorp/golang-lru v0.5.4/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4= github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64= github.com/hashicorp/mdns v1.0.1/go.mod h1:4gW7WsVCke5TE7EPeYliwHlRUyBtfCwuFwuMg2DmyNY= github.com/hashicorp/mdns v1.0.4/go.mod h1:mtBihi+LeNXGtG8L9dX59gAEa12BDtBQSp4v/YAJqrc= github.com/hashicorp/memberlist v0.2.2/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE= github.com/hashicorp/memberlist v0.3.0/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE= github.com/hashicorp/serf v0.9.5/go.mod h1:UWDWwZeL5cuWDJdl0C6wrvrUwEqtQ4ZKBKKENpqIUyk= github.com/hashicorp/serf v0.9.6/go.mod h1:TXZNMjZQijwlDvp+r0b63xZ45H7JmCmgg4gpTwn9UV4= github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI= github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE= github.com/iancoleman/strcase v0.2.0/go.mod h1:iwCmte+B7n89clKwxIoIXy/HfoL7AsD47ZCWhYzw7ho= github.com/iancoleman/strcase v0.3.0 h1:nTXanmYxhfFAMjZL34Ov6gkzEsSJZ5DbhxWjvSASxEI= github.com/iancoleman/strcase v0.3.0/go.mod h1:iwCmte+B7n89clKwxIoIXy/HfoL7AsD47ZCWhYzw7ho= github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/ianlancetaylor/demangle v0.0.0-20210905161508-09a460cdf81d/go.mod h1:aYm2/VgdVmcIU8iMfdMvDMsRAQjcfZSKFby6HOFvi/w= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 h1:BQSFePA1RWJOlocH6Fxy8MmwDt+yVQYULKfN0RoTN8A= github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99/go.mod h1:1lJo3i6rXxKeerYnT8Nvf0QmHCRC1n8sfWVwXF2Frvo= github.com/jedib0t/go-pretty/v6 v6.6.0 h1:wmZVuAcEkZRT+Aq1xXpE8IGat4vE5WXOMmBpbQqERXw= github.com/jedib0t/go-pretty/v6 v6.6.0/go.mod h1:zbn98qrYlh95FIhwwsbIip0LYpwSG8SUOScs+v9/t0E= github.com/jinzhu/copier v0.4.0 h1:w3ciUoD19shMCRargcpm0cm91ytaBhDvuRpz1ODO/U8= github.com/jinzhu/copier v0.4.0/go.mod h1:DfbEm0FYsaqBcKcFuvmOZb218JkPGtvSHsKg8S8hyyg= github.com/jmoiron/sqlx v1.4.0 h1:1PLqN7S1UYp5t4SrVVnt4nUVNemrDAtxlulVe+Qgm3o= github.com/jmoiron/sqlx v1.4.0/go.mod h1:ZrZ7UsYB/weZdl2Bxg6jCRO9c3YHl8r3ahlKmRT4JLY= github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= github.com/kastenhq/goversion v0.0.0-20230811215019-93b2f8823953 h1:WdAeg/imY2JFPc/9CST4bZ80nNJbiBFCAdSZCSgrS5Y= github.com/kastenhq/goversion v0.0.0-20230811215019-93b2f8823953/go.mod h1:6o+UrvuZWc4UTyBhQf0LGjW9Ld7qJxLz/OqvSOWWlEc= github.com/kevinburke/ssh_config v1.2.0 h1:x584FjTGwHzMwvHx18PXxbBVzfnxogHaAReU4gf13a4= github.com/kevinburke/ssh_config v1.2.0/go.mod h1:CT57kijsi8u/K/BOFA39wgDQJ9CxiF4nAY/ojJ6r6mM= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/compress v1.11.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/compress v1.17.8 h1:YcnTYrq7MikUT7k0Yb5eceMmALQPYBW/Xltxn0NAMnU= github.com/klauspost/compress v1.17.8/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE= github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= github.com/knqyf263/go-rpmdb v0.1.1 h1:oh68mTCvp1XzxdU7EfafcWzzfstUZAEa3MW0IJye584= github.com/knqyf263/go-rpmdb v0.1.1/go.mod h1:9LQcoMCMQ9vrF7HcDtXfvqGO4+ddxFQ8+YF/0CVGDww= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= github.com/logrusorgru/aurora v2.0.3+incompatible h1:tOpm7WcpBTn4fjmVfgpQq0EfczGlG91VSDkswnjF5A8= github.com/logrusorgru/aurora v2.0.3+incompatible/go.mod h1:7rIyQOR62GCctdiQpZ/zOJlFyk6y+94wXzv6RNZgaR4= github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY= github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0= github.com/lyft/protoc-gen-star v0.5.3/go.mod h1:V0xaHgaf5oCCqmcxYcWiDfTiKsZsRc87/1qhoTACD8w= github.com/magiconair/properties v1.8.5/go.mod h1:y3VJvCyxH9uVvJTWEGAELF3aiYNyPKd5NZ3oSwXrF60= github.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0VQdvPDY= github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0= github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= github.com/mattn/go-colorable v0.1.6/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= github.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4= github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= github.com/mattn/go-isatty v0.0.10/go.mod h1:qgIWMr58cqv1PHHyhnkY9lrL7etaEgOFcMEpPG5Rm84= github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOAqxQCu2WE= github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-localereader v0.0.2-0.20220822084749-2491eb6c1c75 h1:P8UmIzZMYDR+NGImiFvErt6VWfIRPuGM+vyjiEdkmIw= github.com/mattn/go-localereader v0.0.2-0.20220822084749-2491eb6c1c75/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88= github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc= github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU= github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d h1:5PJl274Y63IEHC+7izoQE9x6ikvDFZS2mDVS3drnohI= github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d/go.mod h1:01TrycV0kFyexm33Z7vhZRXopbI8J3TDReVlkTgMUxE= github.com/mholt/archiver/v3 v3.5.1 h1:rDjOBX9JSF5BvoJGvjqK479aL70qh9DIpZCl+k7Clwo= github.com/mholt/archiver/v3 v3.5.1/go.mod h1:e3dqJ7H78uzsRSEACH1joayhuSyhnonssnDhppzS1L4= github.com/microsoft/go-rustaudit v0.0.0-20220730194248-4b17361d90a5 h1:tQRHcLQwnwrPq2j2Qra/NnyjyESBGwdeBeVdAE9kXYg= github.com/microsoft/go-rustaudit v0.0.0-20220730194248-4b17361d90a5/go.mod h1:vYT9HE7WCvL64iVeZylKmCsWKfE+JZ8105iuh2Trk8g= github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= github.com/miekg/dns v1.1.26/go.mod h1:bPDLeHnStXmXAq1m/Ch/hvfNHr14JKNPMBo3VZKjuso= github.com/miekg/dns v1.1.41/go.mod h1:p6aan82bvRIyn+zDIv9xYNUpwa73JcSh9BKwknJysuI= github.com/mitchellh/cli v1.1.0/go.mod h1:xcISNoH86gajksDmfB23e/pu+B+GeFRMYmoHXxx3xhI= github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw= github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s= github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/go-testing-interface v1.0.0/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI= github.com/mitchellh/hashstructure/v2 v2.0.2 h1:vGKWl0YJqUNxE8d+h8f6NJLcCJrgbhC4NcD46KavDd4= github.com/mitchellh/hashstructure/v2 v2.0.2/go.mod h1:MG3aRVU/N29oo/V/IhBX8GR/zz4kQkprJgF2EVszyDE= github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= github.com/mitchellh/mapstructure v1.4.3/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ= github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw= github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0= github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo= github.com/moby/locker v1.0.1 h1:fOXqR41zeveg4fFODix+1Ch4mj/gT0NE1XJbp/epuBg= github.com/moby/locker v1.0.1/go.mod h1:S7SDdo5zpBK84bzzVlKr2V0hz+7x9hWbYC/kq7oQppc= github.com/moby/sys/mountinfo v0.7.2 h1:1shs6aH5s4o5H2zQLn796ADW1wMrIwHsyJ2v9KouLrg= github.com/moby/sys/mountinfo v0.7.2/go.mod h1:1YOa8w8Ih7uW0wALDUgT1dTTSBrZ+HiBLGws92L2RU4= github.com/moby/sys/sequential v0.5.0 h1:OPvI35Lzn9K04PBbCLW0g4LcFAJgHsvXsRyewg5lXtc= github.com/moby/sys/sequential v0.5.0/go.mod h1:tH2cOOs5V9MlPiXcQzRC+eEyab644PWKGRYaaV5ZZlo= github.com/moby/sys/signal v0.7.0 h1:25RW3d5TnQEoKvRbEKUGay6DCQ46IxAVTT9CUMgmsSI= github.com/moby/sys/signal v0.7.0/go.mod h1:GQ6ObYZfqacOwTtlXvcmh9A26dVRul/hbOZn88Kg8Tg= github.com/moby/sys/user v0.3.0 h1:9ni5DlcW5an3SvRSx4MouotOygvzaXbaSrc/wGDFWPo= github.com/moby/sys/user v0.3.0/go.mod h1:bG+tYYYJgaMtRKgEmuueC0hJEAZWwtIbZTB+85uoHjs= github.com/moby/sys/userns v0.1.0 h1:tVLXkFOxVu9A64/yh59slHVv9ahO9UIev4JZusOLG/g= github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcYfDHOl28= github.com/moby/term v0.0.0-20221205130635-1aeaba878587 h1:HfkjXDfhgVaN5rmueG8cL8KKeFNecRCXFhaJ2qZ5SKA= github.com/moby/term v0.0.0-20221205130635-1aeaba878587/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A= github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 h1:ZK8zHtRHOkbHy6Mmr5D264iyp3TiX5OmNcI5cIARiQI= github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6/go.mod h1:CJlz5H+gyd6CUWT45Oy4q24RdLyn7Md9Vj2/ldJBSIo= github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA= github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo= github.com/muesli/termenv v0.15.2 h1:GohcuySI0QmI3wN8Ok9PtKGkgkFIk7y6Vpb5PvrY+Wo= github.com/muesli/termenv v0.15.2/go.mod h1:Epx+iuz8sNs7mNKhxzH4fWXGNpZwUaJKRS1noLXviQ8= github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/ncruces/go-strftime v0.1.9 h1:bY0MQC28UADQmHmaF5dgpLmImcShSi2kHU9XLdhx/f4= github.com/ncruces/go-strftime v0.1.9/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls= github.com/nwaples/rardecode v1.1.0 h1:vSxaY8vQhOcVr4mm5e8XllHWTiM4JF507A0Katqw7MQ= github.com/nwaples/rardecode v1.1.0/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0= github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec= github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY= github.com/onsi/gomega v1.34.1 h1:EUMJIKUjM8sKjYbtxQI9A4z2o+rruxnzNvpknOXie6k= github.com/onsi/gomega v1.34.1/go.mod h1:kU1QgUvBDLXBJq618Xvm2LUX6rSAfRaFRTcdOeDLwwY= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug= github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM= github.com/opencontainers/runtime-spec v1.1.0 h1:HHUyrt9mwHUjtasSbXSMvs4cyFxh+Bll4AjJ9odEGpg= github.com/opencontainers/runtime-spec v1.1.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/selinux v1.11.0 h1:+5Zbo97w3Lbmb3PeqQtpmTkMwsW5nRI3YaLpt7tQ7oU= github.com/opencontainers/selinux v1.11.0/go.mod h1:E5dMC3VPuVvVHDYmi78qvhJp8+M586T4DlDRYpFkyec= github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= github.com/pborman/indent v1.2.1 h1:lFiviAbISHv3Rf0jcuh489bi06hj98JsVMtIDZQb9yM= github.com/pborman/indent v1.2.1/go.mod h1:FitS+t35kIYtB5xWTZAPhnmrxcciEEOdbyrrpz5K6Vw= github.com/pelletier/go-toml v1.9.4/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8= github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM= github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs= github.com/pierrec/lz4/v4 v4.1.2/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pierrec/lz4/v4 v4.1.19 h1:tYLzDnjDXh9qIxSTKHwXwOYmm9d887Y7Y1ZkyXYHAN4= github.com/pierrec/lz4/v4 v4.1.19/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pjbgf/sha1cd v0.3.2 h1:a9wb0bp1oC2TGwStyn0Umc/IGKQnEgF0vVaZ8QF8eo4= github.com/pjbgf/sha1cd v0.3.2/go.mod h1:zQWigSxVmsHEZow5qaLtPYxpcKMMQpa09ixqBxuCS6A= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/profile v1.7.0 h1:hnbDkaNWPCLMO9wGLdBFTIZvzDrDfBM2072E1S9gJkA= github.com/pkg/profile v1.7.0/go.mod h1:8Uer0jas47ZQMJ7VD+OHknK4YDY07LPUC6dEvqDjvNo= github.com/pkg/sftp v1.10.1/go.mod h1:lYOWFsE0bwd1+KfKJaKeuokY15vzFx25BLbzYYoAxZI= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI= github.com/posener/complete v1.2.3/go.mod h1:WZIdtGGp+qx0sLrYKtIRAruyNpv6hFCicSgv7Sy7s/s= github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4= github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A= github.com/prometheus/procfs v0.10.1 h1:kYK1Va/YMlutzCGazswoHKo//tZVlFpKYh+PymziUAg= github.com/prometheus/procfs v0.10.1/go.mod h1:nwNm2aOCAYw8uTR/9bWRREkZFxAUcWzPHWJq+XBB/FM= github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= github.com/saferwall/pe v1.5.4 h1:tLmMggEMUfeqrpJ25zS/okUQmyFdD5xWKL2+z9njCqg= github.com/saferwall/pe v1.5.4/go.mod h1:mJx+PuptmNpoPFBNhWs/uDMFL/kTHVZIkg0d4OUJFbQ= github.com/sagikazarmark/crypt v0.3.0/go.mod h1:uD/D+6UF4SrIR1uGEv7bBNkNqLGqUr43MRiaGWX1Nig= github.com/sagikazarmark/locafero v0.4.0 h1:HApY1R9zGo4DBgr7dqsTH/JJxLTTsOt7u6keLGt6kNQ= github.com/sagikazarmark/locafero v0.4.0/go.mod h1:Pe1W6UlPYUk/+wc/6KFhbORCfqzgYEpgQ3O5fPuL3H4= github.com/sagikazarmark/slog-shim v0.1.0 h1:diDBnUNK9N/354PgrxMywXnAwEr1QZcOr6gto+ugjYE= github.com/sagikazarmark/slog-shim v0.1.0/go.mod h1:SrcSrq8aKtyuqEI1uvTDTK1arOWRIczQRv+GVI1AkeQ= github.com/sahilm/fuzzy v0.1.1 h1:ceu5RHF8DGgoi+/dR5PsECjCDH1BE3Fnmpo7aVXOdRA= github.com/sahilm/fuzzy v0.1.1/go.mod h1:VFvziUEIMCrT6A6tw2RFIXPXXmzXbOsSHF0DOI8ZK9Y= github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d h1:hrujxIzL1woJ7AwssoOcM/tq5JjjG2yYOc8odClEiXA= github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU= github.com/sanity-io/litter v1.5.5 h1:iE+sBxPBzoK6uaEP5Lt3fHNgpKcHXc/A2HGETy0uJQo= github.com/sanity-io/litter v1.5.5/go.mod h1:9gzJgR2i4ZpjZHsKvUXIRQVk7P+yM3e+jAF7bU2UI5U= github.com/sassoftware/go-rpmutils v0.4.0 h1:ojND82NYBxgwrV+mX1CWsd5QJvvEZTKddtCdFLPWhpg= github.com/sassoftware/go-rpmutils v0.4.0/go.mod h1:3goNWi7PGAT3/dlql2lv3+MSN5jNYPjT5mVcQcIsYzI= github.com/scylladb/go-set v1.0.3-0.20200225121959-cc7b2070d91e h1:7q6NSFZDeGfvvtIRwBrU/aegEYJYmvev0cHAwo17zZQ= github.com/scylladb/go-set v1.0.3-0.20200225121959-cc7b2070d91e/go.mod h1:DkpGd78rljTxKAnTDPFqXSGxvETQnJyuSOQwsHycqfs= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= github.com/sebdah/goldie/v2 v2.5.3 h1:9ES/mNN+HNUbNWpVAlrzuZ7jE+Nrczbj8uFRjM7624Y= github.com/sebdah/goldie/v2 v2.5.3/go.mod h1:oZ9fp0+se1eapSRjfYbsV/0Hqhbuu3bJVvKI/NNtssI= github.com/secDre4mer/pkcs7 v0.0.0-20240322103146-665324a4461d h1:RQqyEogx5J6wPdoxqL132b100j8KjcVHO1c0KLRoIhc= github.com/secDre4mer/pkcs7 v0.0.0-20240322103146-665324a4461d/go.mod h1:PegD7EVqlN88z7TpCqH92hHP+GBpfomGCCnw1PFtNOA= github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= github.com/sergi/go-diff v1.2.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 h1:n661drycOFuPLCN3Uc8sB6B/s6Z4t2xvBgU1htSHuq8= github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4= github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k= github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME= github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/skeema/knownhosts v1.3.1 h1:X2osQ+RAjK76shCbvhHHHVl3ZlgDm8apHEHFqRjnBY8= github.com/skeema/knownhosts v1.3.1/go.mod h1:r7KTdC8l4uxWRyK2TpQZ/1o5HaSzh06ePQNxPwTcfiY= github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo= github.com/sourcegraph/conc v0.3.0/go.mod h1:Sdozi7LEKbFPqYX2/J+iBAM6HpqSLTASQIKqDmF7Mt0= github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= github.com/spdx/gordf v0.0.0-20201111095634-7098f93598fb/go.mod h1:uKWaldnbMnjsSAXRurWqqrdyZen1R7kxl8TkmWk2OyM= github.com/spdx/tools-golang v0.5.5 h1:61c0KLfAcNqAjlg6UNMdkwpMernhw3zVRwDZ2x9XOmk= github.com/spdx/tools-golang v0.5.5/go.mod h1:MVIsXx8ZZzaRWNQpUDhC4Dud34edUYJYecciXgrw5vE= github.com/spf13/afero v1.3.3/go.mod h1:5KUK8ByomD5Ti5Artl0RtHeI5pTF7MIDuXL3yY520V4= github.com/spf13/afero v1.6.0/go.mod h1:Ai8FlHk4v/PARR026UzYexafAt9roJ7LcLMAmO6Z93I= github.com/spf13/afero v1.11.0 h1:WJQKhtpdm3v2IzqG8VMqrr6Rf3UYpEF239Jy9wNepM8= github.com/spf13/afero v1.11.0/go.mod h1:GH9Y3pIexgf1MTIWtNGyogA5MwRIDXGUr+hbWNoBjkY= github.com/spf13/cast v1.4.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= github.com/spf13/cast v1.7.0 h1:ntdiHjuueXFgm5nzDRdOS4yfT43P5Fnud6DH50rz/7w= github.com/spf13/cast v1.7.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo= github.com/spf13/cobra v1.3.0/go.mod h1:BrRVncBjOJa/eUcVVm9CE+oC6as8k+VYr4NY7WCi9V4= github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y= github.com/spf13/jwalterweatherman v1.1.0/go.mod h1:aNWZUN0dPAAO/Ljvb5BEdw96iTZ0EXowPYD95IqWIGo= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/spf13/viper v1.10.0/go.mod h1:SoyBPwAtKDzypXNDFKN5kzH7ppppbGZtls1UpIy5AsM= github.com/spf13/viper v1.19.0 h1:RWq5SEjt8o25SROyN3z2OrDB9l7RPd3lwTWU8EcEdcI= github.com/spf13/viper v1.19.0/go.mod h1:GQUN9bilAbhU/jgc1bKs99f/suXKeUMct8Adx5+Ntkg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw= github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8= github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU= github.com/sylabs/sif/v2 v2.17.1 h1:p6Sl0LWyShXBj2SBsS1dMOMIMrZHe8pwBnBrYt6uo4M= github.com/sylabs/sif/v2 v2.17.1/go.mod h1:XUGB6AQUXGkms3qPOPdevctT3lBLRLWZNWHVnt5HMKE= github.com/sylabs/squashfs v1.0.0 h1:xAyMS21ogglkuR5HaY55PCfqY3H32ma9GkasTYo28Zg= github.com/sylabs/squashfs v1.0.0/go.mod h1:rhWzvgefq1X+R+LZdts10hfMsTg3g74OfGunW8tvg/4= github.com/terminalstatic/go-xsd-validate v0.1.5 h1:RqpJnf6HGE2CB/lZB1A8BYguk8uRtcvYAPLCF15qguo= github.com/terminalstatic/go-xsd-validate v0.1.5/go.mod h1:18lsvYFofBflqCrvo1umpABZ99+GneNTw2kEEc8UPJw= github.com/therootcompany/xz v1.0.1 h1:CmOtsn1CbtmyYiusbfmhmkpAAETj0wBIH6kCYaX+xzw= github.com/therootcompany/xz v1.0.1/go.mod h1:3K3UH1yCKgBneZYhuQUvJ9HPD19UEXEI0BWbMn8qNMY= github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM= github.com/ulikunitz/xz v0.5.8/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= github.com/ulikunitz/xz v0.5.9/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= github.com/ulikunitz/xz v0.5.12 h1:37Nm15o69RwBkXM0J6A5OlE67RZTfzUxTj8fB3dfcsc= github.com/ulikunitz/xz v0.5.12/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= github.com/urfave/cli v1.22.12/go.mod h1:sSBEIC79qR6OvcmsD4U3KABeOTxDqQtdDnaFuUN30b8= github.com/urfave/cli/v2 v2.3.0/go.mod h1:LJmUH05zAU44vOAcrfzZQKsZbVcdbOG8rtL3/XcUArI= github.com/vbatts/go-mtree v0.5.4 h1:OMAb8jaCyiFA7zXj0Zc/oARcxBDBoeu2LizjB8BVJl0= github.com/vbatts/go-mtree v0.5.4/go.mod h1:5GqJbVhm9BBiCc4K5uc/c42FPgXulHaQs4sFUEfIWMo= github.com/vbatts/tar-split v0.11.3 h1:hLFqsOLQ1SsppQNTMpkpPXClLDfC2A3Zgy9OUU+RVck= github.com/vbatts/tar-split v0.11.3/go.mod h1:9QlHN18E+fEH7RdG+QAJJcuya3rqT7eXSTY7wGrAokY= github.com/vifraa/gopom v1.0.0 h1:L9XlKbyvid8PAIK8nr0lihMApJQg/12OBvMA28BcWh0= github.com/vifraa/gopom v1.0.0/go.mod h1:oPa1dcrGrtlO37WPDBm5SqHAT+wTgF8An1Q71Z6Vv4o= github.com/wagoodman/go-partybus v0.0.0-20230516145632-8ccac152c651 h1:jIVmlAFIqV3d+DOxazTR9v+zgj8+VYuQBzPgBZvWBHA= github.com/wagoodman/go-partybus v0.0.0-20230516145632-8ccac152c651/go.mod h1:b26F2tHLqaoRQf8DywqzVaV1MQ9yvjb0OMcNl7Nxu20= github.com/wagoodman/go-progress v0.0.0-20230925121702-07e42b3cdba0 h1:0KGbf+0SMg+UFy4e1A/CPVvXn21f1qtWdeJwxZFoQG8= github.com/wagoodman/go-progress v0.0.0-20230925121702-07e42b3cdba0/go.mod h1:jLXFoL31zFaHKAAyZUh+sxiTDFe1L1ZHrcK2T1itVKA= github.com/xanzy/ssh-agent v0.3.3 h1:+/15pJfg/RsTxqYcX6fHqOXZwwMP+2VyYWJeWM2qQFM= github.com/xanzy/ssh-agent v0.3.3/go.mod h1:6dzNDKs0J9rVPHPhaGCukekBHKqfl+L3KghI1Bc68Uw= github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo= github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHovont7NscjpAxXsDA8S8BMYve8Y5+7cuRE7R0= github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ= github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17UxZ74= github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y= github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofmx9yWTog9BfvIu0q41lo= github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos= github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no= github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM= github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= go.etcd.io/etcd/api/v3 v3.5.1/go.mod h1:cbVKeC6lCfl7j/8jBhAK6aIYO9XOjdptoxU/nLQcPvs= go.etcd.io/etcd/client/pkg/v3 v3.5.1/go.mod h1:IJHfcCEKxYu1Os13ZdwCwIUTUVGYTSAM3YSwc9/Ac1g= go.etcd.io/etcd/client/v2 v2.305.1/go.mod h1:pMEacxZW7o8pg4CrFE7pquyCJJzZvkvdD2RibOCCCGs= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk= go.opencensus.io v0.23.0/go.mod h1:XItmlyltB5F7CS4xOC1DcqMoFqwtC6OG2xF7mCv7P7E= go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 h1:jq9TW8u3so/bN+JPT166wjOI6/vQPF6Xe7nMNIltagk= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0/go.mod h1:p8pYQP+m5XfbZm9fxtSKAbM6oIllS7s2AfxrChvc7iw= go.opentelemetry.io/otel v1.24.0 h1:0LAOdjNmQeSTzGBzduGe/rU4tZhMwL5rWgtp9Ku5Jfo= go.opentelemetry.io/otel v1.24.0/go.mod h1:W7b9Ozg4nkF5tWI5zsXkaKKDjdVjpD4oAt9Qi/MArHo= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0 h1:Mne5On7VWdx7omSrSSZvM4Kw7cS7NQkOOmLcgscI51U= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0/go.mod h1:IPtUMKL4O3tH5y+iXVyAXqpAwMuzC1IrxVS81rummfE= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.19.0 h1:IeMeyr1aBvBiPVYihXIaeIZba6b8E1bYp7lbdxK8CQg= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.19.0/go.mod h1:oVdCUtjq9MK9BlS7TtucsQwUcXcymNiEDjgDD2jMtZU= go.opentelemetry.io/otel/metric v1.24.0 h1:6EhoGWWK28x1fbpA4tYTOWBkPefTDQnb8WSGXlc88kI= go.opentelemetry.io/otel/metric v1.24.0/go.mod h1:VYhLe1rFfxuTXLgj4CBiyz+9WYBA8pNGJgDcSFRKBco= go.opentelemetry.io/otel/sdk v1.21.0 h1:FTt8qirL1EysG6sTQRZ5TokkU8d0ugCj8htOgThZXQ8= go.opentelemetry.io/otel/sdk v1.21.0/go.mod h1:Nna6Yv7PWTdgJHVRD9hIYywQBRx7pbox6nwBnZIxl/E= go.opentelemetry.io/otel/trace v1.24.0 h1:CsKnnL4dUAr/0llH9FKuc698G04IrpWV0MQA/Y1YELI= go.opentelemetry.io/otel/trace v1.24.0/go.mod h1:HPc3Xr/cOApsBI154IU0OI0HJexz+aw5uPdbs3UCjNU= go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= go.opentelemetry.io/proto/otlp v1.0.0 h1:T0TX0tmXU8a3CbNXzEKGeU5mIVOdf0oykP+u2lIVU/I= go.opentelemetry.io/proto/otlp v1.0.0/go.mod h1:Sy6pihPLfYHkr3NkUbEhGHFhINUSI/v80hjKIs5JXpM= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/atomic v1.9.0 h1:ECmE8Bn/WFTYwEW/bpKD3M8VtR/zQVbavAoalC1PYyE= go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= go.uber.org/multierr v1.9.0 h1:7fIwc/ZtS0q++VgcfqFDxSBZVv/Xo49/SYnDFupUwlI= go.uber.org/multierr v1.9.0/go.mod h1:X2jQV1h+kxSjClGpnseKVIxpmcjrj7MNnI0bnlfKTVQ= go.uber.org/zap v1.17.0/go.mod h1:MXVU+bhUf/A7Xi2HNOnopQOrmycQ5Ih87HtOu4q5SSo= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20181029021203-45a5f77698d3/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190820162420-60c769a6c586/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190923035154-9ee001bba392/go.mod h1:/lpIB1dKB+9EgE3H3cr1v9wB50oz8l4C4h62xy7jSTY= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210817164053-32db794688a5/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.40.0 h1:r4x+VvoG5Fm+eJcxMaY8CQM7Lb0l1lsmjGBQ6s8BfKM= golang.org/x/crypto v0.40.0/go.mod h1:Qr1vMER5WyS2dfPHAlsOj01wgLbsyWtFn/aY+5+ZdxY= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= golang.org/x/exp v0.0.0-20190829153037-c13cbed26979/go.mod h1:86+5VVa7VpoJ4kLfm080zCjGlMRFzhUhsZKEZO7MGek= golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136/go.mod h1:JXzH8nQsPlswgeRAPE3MuO9GYsAcnJvJ4vnMwN/5qkY= golang.org/x/exp v0.0.0-20191129062945-2f5052295587/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8= golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= golang.org/x/lint v0.0.0-20190909230951-414d861bb4ac/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRuDixDT3tpyyb+LUpUlRWLxfhWrs= golang.org/x/lint v0.0.0-20200130185559-910be7a94367/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= golang.org/x/lint v0.0.0-20201208152925-83fdc39ff7b5/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= golang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE= golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o= golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY= golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.5.0/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.26.0 h1:EGMPT//Ezu+ylkCijjPc+f4Aih7sZvaAr+O3EHBxvZg= golang.org/x/mod v0.26.0/go.mod h1:/j6NAhSk8iQ723BGAUyoAcn7SlD7s15Dp9Nd/SfeaFQ= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190628185345-da137c7871d7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190923162816-aa69164e4478/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200222125558-5a598a2470a0/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200506145744-7e3656a0809f/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201031054903-ff519b6c9102/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201209123823-ac852fbbde11/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210316092652-d523dce5a7f4/go.mod h1:RBQZq4jEuRlivfhVLdyRGr576XBO4/greRjx4P4O3yc= golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= golang.org/x/net v0.0.0-20210410081132-afb366fc7cd1/go.mod h1:9tjilg8BloeKEkVJvy7fQ90B1CfIiPueXVOjqfkSzI8= golang.org/x/net v0.0.0-20210503060351-7fd8e65b6420/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20210813160813-60bc85c4be6d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.42.0 h1:jzkYrhi3YQWD6MLBJcsklgQsoAcw89EcZbJw8Z614hs= golang.org/x/net v0.42.0/go.mod h1:FF1RA5d3u7nAYA4z2TkclSCKh68eSXtiFwcWQpPXdt8= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20200902213428-5d25da1a8d43/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20201109201403-9fd604954f58/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20201208152858-08078c50e5b5/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20210218202405-ba52d332ba99/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20210220000619-9bb904979d93/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20210313182246-cd4f82c27b84/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20210514164344-f6687ab2804c/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20210628180205-a41e5a781914/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20210805134026-6f1e6394065a/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20210819190943-2bc19b11175f/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20211005180243-6b3c2da341f1/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw= golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190922100055-0a153f010e69/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190924154521-2837fb4f24fe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191008105621-543471e840be/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200124204421-9fbb57f87de9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200331124033-c3d80250170d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200501052902-10377860bb8e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200511232937-7e40ca221e25/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200515095857-1151b9dac4a9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200523222454-059865788121/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200905004654-be1d3432aa8f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201201145000-ef89a241ccb3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210104204734-6f8348627aad/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210220050731-9a76102bfb43/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210303074136-134d130e1a04/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210305230114-8fe3ee5dd75b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210315160823-c6e025ad8005/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210403161142-5e06dd20ab57/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210514084401-e8d321eab015/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210603125802-9665404d3644/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210806184541-e5e7981a1069/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210816183151-1e6c022a8912/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210823070655-63515b42dcdf/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210908233432-aa78b53d3365/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211124211545-fe61309f8881/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211205182925-97ca703d548d/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220906165534-d0df966e6959/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA= golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.33.0 h1:NuFncQrRcaRvVmgRkvM3j/F00gWIAlcmlB8ACEKmGIg= golang.org/x/term v0.33.0/go.mod h1:s18+ql9tYWp1IfpV9DmCtQDDSRBUjKaw9M1eAv5UeF0= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.27.0 h1:4fGWRpyh641NLlecmyl4LOe6yDdfaYNrGb2zdfo4JV4= golang.org/x/text v0.27.0/go.mod h1:1D28KMCvyooCX9hBiosv5Tz/+YLxj0j7XhWjpSUF7CU= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE= golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20190907020128-2ca718005c18/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191113191852-77e3bb0ad9e7/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191130070609-6e064ea0cf2d/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191216173652-a0e659d51361/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200117161641-43d50277825c/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200122220014-bf1340f18c4a/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200204074204-1cc6d1ef6c74/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200224181240-023911ca70b2/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200227222343-706bc42d1f0d/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200304193943-95d2e580d8eb/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw= golang.org/x/tools v0.0.0-20200312045724-11d5b4c81c7d/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw= golang.org/x/tools v0.0.0-20200331025713-a30bf2db82d4/go.mod h1:Sl4aGygMT6LrqrWclx+PTx3U+LnKx/seiNR+3G19Ar8= golang.org/x/tools v0.0.0-20200501065659-ab2804fb9c9d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20200512131952-2bc93b1c0c88/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20200515010526-7d3b6ebf133d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20200618134242-20370b0cb4b2/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20200729194436-6467de6f59a7/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= golang.org/x/tools v0.0.0-20200804011535-6c149bb5ef0d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= golang.org/x/tools v0.0.0-20200904185747-39188db58858/go.mod h1:Cj7w3i3Rnn0Xh82ur9kSqwfTHTeVxaDqrfMjpcNT6bE= golang.org/x/tools v0.0.0-20201110124207-079ba7bd75cd/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.0.0-20201201161351-ac6f37ff4c2a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.0.0-20201208233053-a543418bbed2/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.0.0-20210105154028-b0ab187a4818/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.3/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.4/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 h1:H2TDz8ibqkAF6YGhCdN3jS9O0/s90v0rJh3X/OLHEUk= golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8= google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M= google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= google.golang.org/api v0.9.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= google.golang.org/api v0.13.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= google.golang.org/api v0.14.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= google.golang.org/api v0.15.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= google.golang.org/api v0.17.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= google.golang.org/api v0.18.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= google.golang.org/api v0.19.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= google.golang.org/api v0.20.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= google.golang.org/api v0.22.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= google.golang.org/api v0.24.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE= google.golang.org/api v0.28.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE= google.golang.org/api v0.29.0/go.mod h1:Lcubydp8VUV7KeIHD9z2Bys/sm/vGKnG1UHuDBSrHWM= google.golang.org/api v0.30.0/go.mod h1:QGmEvQ87FHZNiUVJkT14jQNYJ4ZJjdRF23ZXz5138Fc= google.golang.org/api v0.35.0/go.mod h1:/XrVsuzM0rZmrsbjJutiuftIzeuTQcEeaYcSk/mQ1dg= google.golang.org/api v0.36.0/go.mod h1:+z5ficQTmoYpPn8LCUNVpK5I7hwkpjbcgqA7I34qYtE= google.golang.org/api v0.40.0/go.mod h1:fYKFpnQN0DsDSKRVRcQSDQNtqWPfM9i+zNPxepjRCQ8= google.golang.org/api v0.41.0/go.mod h1:RkxM5lITDfTzmyKFPt+wGrCJbVfniCr2ool8kTBzRTU= google.golang.org/api v0.43.0/go.mod h1:nQsDGjRXMo4lvh5hP0TKqF244gqhGcr/YSIykhUk/94= google.golang.org/api v0.47.0/go.mod h1:Wbvgpq1HddcWVtzsVLyfLp8lDg6AA241LmgIL59tHXo= google.golang.org/api v0.48.0/go.mod h1:71Pr1vy+TAZRPkPs/xlCf5SsU8WjuAWv1Pfjbtukyy4= google.golang.org/api v0.50.0/go.mod h1:4bNT5pAuq5ji4SRZm+5QIkjny9JAyVD/3gaSihNefaw= google.golang.org/api v0.51.0/go.mod h1:t4HdrdoNgyN5cbEfm7Lum0lcLDLiise1F8qDKX00sOU= google.golang.org/api v0.54.0/go.mod h1:7C4bFFOvVDGXjfDTAsgGwDgAxRDeQ4X8NvUedIt6z3k= google.golang.org/api v0.55.0/go.mod h1:38yMfeP1kfjsl8isn0tliTjIb1rJXcQi4UXlbqivdVE= google.golang.org/api v0.56.0/go.mod h1:38yMfeP1kfjsl8isn0tliTjIb1rJXcQi4UXlbqivdVE= google.golang.org/api v0.57.0/go.mod h1:dVPlbZyBo2/OjBpmvNdpn2GRm6rPy75jyU7bmhdrMgI= google.golang.org/api v0.59.0/go.mod h1:sT2boj7M9YJxZzgeZqXogmhfmRWDtPzT31xkieUbuZU= google.golang.org/api v0.61.0/go.mod h1:xQRti5UdCmoCEqFxcz93fTl338AVqDgyaDRuOZ3hg9I= google.golang.org/api v0.62.0/go.mod h1:dKmwPCydfsad4qCH08MSdgWjfHOyfpd4VtDGgRFdavw= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0= google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= google.golang.org/genproto v0.0.0-20190801165951-fa694d86fc64/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/genproto v0.0.0-20190911173649-1774047e7e51/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8= google.golang.org/genproto v0.0.0-20191108220845-16a3f7862a1a/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= google.golang.org/genproto v0.0.0-20191115194625-c23dd37a84c9/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= google.golang.org/genproto v0.0.0-20191216164720-4f79533eabd1/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= google.golang.org/genproto v0.0.0-20191230161307-f3c370f40bfb/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= google.golang.org/genproto v0.0.0-20200115191322-ca5a22157cba/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= google.golang.org/genproto v0.0.0-20200122232147-0452cf42e150/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= google.golang.org/genproto v0.0.0-20200204135345-fa8e72b47b90/go.mod h1:GmwEX6Z4W5gMy59cAlVYjN9JhxgbQH6Gn+gFDQe2lzA= google.golang.org/genproto v0.0.0-20200212174721-66ed5ce911ce/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200228133532-8c2c7df3a383/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200305110556-506484158171/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200312145019-da6875a35672/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200331122359-1ee6d9798940/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200430143042-b979b6f78d84/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200511104702-f5ebc3bea380/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200515170657-fc4c6c6a6587/go.mod h1:YsZOwe1myG/8QRHRsmBRE1LrgQY60beZKjly0O1fX9U= google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= google.golang.org/genproto v0.0.0-20200618031413-b414f8b61790/go.mod h1:jDfRM7FcilCzHH/e9qn6dsT145K34l5v+OpcnNgKAAA= google.golang.org/genproto v0.0.0-20200729003335-053ba62fc06f/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20200804131852-c06518451d9c/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20200825200019-8632dd797987/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20200904004341-0bd0a958aa1d/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20201109203340-2640f1f9cdfb/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20201201144952-b05cb90ed32e/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20201210142538-e3217bee35cc/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20201214200347-8c77b98c765d/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20210222152913-aa3ee6e6a81c/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20210303154014-9728d6b83eeb/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20210310155132-4ce2db91004e/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20210319143718-93e7006c17a6/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20210402141018-6c239bbf2bb1/go.mod h1:9lPAdzaEmUacj36I+k7YKbEc5CXzPIeORRgDAUOu28A= google.golang.org/genproto v0.0.0-20210513213006-bf773b8c8384/go.mod h1:P3QM42oQyzQSnHPnZ/vqoCdDmzH28fzWByN9asMeM8A= google.golang.org/genproto v0.0.0-20210602131652-f16073e35f0c/go.mod h1:UODoCrxHCcBojKKwX1terBiRUaqAsFqJiF615XL43r0= google.golang.org/genproto v0.0.0-20210604141403-392c879c8b08/go.mod h1:UODoCrxHCcBojKKwX1terBiRUaqAsFqJiF615XL43r0= google.golang.org/genproto v0.0.0-20210608205507-b6d2f5bf0d7d/go.mod h1:UODoCrxHCcBojKKwX1terBiRUaqAsFqJiF615XL43r0= google.golang.org/genproto v0.0.0-20210624195500-8bfb893ecb84/go.mod h1:SzzZ/N+nwJDaO1kznhnlzqS8ocJICar6hYhVyhi++24= google.golang.org/genproto v0.0.0-20210713002101-d411969a0d9a/go.mod h1:AxrInvYm1dci+enl5hChSFPOmmUF1+uAa/UsgNRWd7k= google.golang.org/genproto v0.0.0-20210716133855-ce7ef5c701ea/go.mod h1:AxrInvYm1dci+enl5hChSFPOmmUF1+uAa/UsgNRWd7k= google.golang.org/genproto v0.0.0-20210728212813-7823e685a01f/go.mod h1:ob2IJxKrgPT52GcgX759i1sleT07tiKowYBGbczaW48= google.golang.org/genproto v0.0.0-20210805201207-89edb61ffb67/go.mod h1:ob2IJxKrgPT52GcgX759i1sleT07tiKowYBGbczaW48= google.golang.org/genproto v0.0.0-20210813162853-db860fec028c/go.mod h1:cFeNkxwySK631ADgubI+/XFU/xp8FD5KIVV4rj8UC5w= google.golang.org/genproto v0.0.0-20210821163610-241b8fcbd6c8/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY= google.golang.org/genproto v0.0.0-20210828152312-66f60bf46e71/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY= google.golang.org/genproto v0.0.0-20210831024726-fe130286e0e2/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY= google.golang.org/genproto v0.0.0-20210903162649-d08c68adba83/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY= google.golang.org/genproto v0.0.0-20210909211513-a8c4777a87af/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY= google.golang.org/genproto v0.0.0-20210924002016-3dee208752a0/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= google.golang.org/genproto v0.0.0-20211008145708-270636b82663/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= google.golang.org/genproto v0.0.0-20211028162531-8db9c33dc351/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= google.golang.org/genproto v0.0.0-20211118181313-81c1377c94b1/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= google.golang.org/genproto v0.0.0-20211129164237-f09f9a12af12/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= google.golang.org/genproto v0.0.0-20211203200212-54befc351ae9/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= google.golang.org/genproto v0.0.0-20211206160659-862468c7d6e0/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= google.golang.org/genproto v0.0.0-20211208223120-3a66f561d7aa/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= google.golang.org/genproto v0.0.0-20240213162025-012b6fc9bca9 h1:9+tzLLstTlPTRyJTh+ah5wIMsBW5c4tQwGTN3thOW9Y= google.golang.org/genproto v0.0.0-20240213162025-012b6fc9bca9/go.mod h1:mqHbVIp48Muh7Ywss/AD6I5kNVKZMmAa/QEW58Gxp2s= google.golang.org/genproto/googleapis/api v0.0.0-20240311132316-a219d84964c2 h1:rIo7ocm2roD9DcFIX67Ym8icoGCKSARAiPljFhh5suQ= google.golang.org/genproto/googleapis/api v0.0.0-20240311132316-a219d84964c2/go.mod h1:O1cOfN1Cy6QEYr7VxtjOyP5AdAuR0aJ/MYZaaof623Y= google.golang.org/genproto/googleapis/rpc v0.0.0-20240401170217-c3f982113cda h1:LI5DOvAxUPMv/50agcLLoo+AdWc1irS9Rzz4vPuD1V4= google.golang.org/genproto/googleapis/rpc v0.0.0-20240401170217-c3f982113cda/go.mod h1:WtryC6hu0hhx87FDGxWCDptyssuo68sk10vYjF+T9fY= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.28.0/go.mod h1:rpkK4SK4GF4Ach/+MFLZUBavHOvF2JJB5uozKKal+60= google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk= google.golang.org/grpc v1.30.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak= google.golang.org/grpc v1.31.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak= google.golang.org/grpc v1.31.1/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak= google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0= google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= google.golang.org/grpc v1.34.0/go.mod h1:WotjhfgOW/POjDeRt8vscBtXq+2VjORFy659qA51WJ8= google.golang.org/grpc v1.35.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= google.golang.org/grpc v1.36.1/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= google.golang.org/grpc v1.37.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM= google.golang.org/grpc v1.37.1/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM= google.golang.org/grpc v1.38.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM= google.golang.org/grpc v1.39.0/go.mod h1:PImNr+rS9TWYb2O4/emRugxiyHZ5JyHW5F+RPnDzfrE= google.golang.org/grpc v1.39.1/go.mod h1:PImNr+rS9TWYb2O4/emRugxiyHZ5JyHW5F+RPnDzfrE= google.golang.org/grpc v1.40.0/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34= google.golang.org/grpc v1.40.1/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34= google.golang.org/grpc v1.42.0/go.mod h1:k+4IHHFw41K8+bbowsex27ge2rCb65oeWqe4jJ590SU= google.golang.org/grpc v1.62.1 h1:B4n+nfKzOICUXMgyrNd19h/I9oH0L1pizfk1d4zSgTk= google.golang.org/grpc v1.62.1/go.mod h1:IWTG0VlJLCh1SkC58F7np9ka9mx/WNkjl4PGJaiq+QE= google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.1.0/go.mod h1:6Kw0yEErY5E/yWrBtf03jp27GLLJujG4z/JK95pnjjw= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4= google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.35.2 h1:8Ar7bF+apOIoThw1EdZl0p1oWvMqTHmpA2fRTyZO8io= google.golang.org/protobuf v1.35.2/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/ini.v1 v1.66.2/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA= gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= gopkg.in/warnings.v0 v0.1.2 h1:wFXVbFY8DY5/xOe1ECiWdKCzZlxgshcYVNkBHstARME= gopkg.in/warnings.v0 v0.1.2/go.mod h1:jksf8JmL6Qr/oQM2OXTHunEvvTAsrWBLb6OOjuVWRNI= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gotest.tools/v3 v3.0.3 h1:4AuOwCGf4lLR9u3YOe2awrHygurzhO/HeQ6laiA6Sx0= gotest.tools/v3 v3.0.3/go.mod h1:Z7Lb0S5l+klDB31fvDQX8ss/FlKDxtlFlw3Oa8Ymbl8= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6 h1:5D53IMaUuA5InSeMu9eJtlQXS2NxAhyWQvkKEgXZhHI= modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6/go.mod h1:Qz0X07sNOR1jWYCrJMEnbW/X55x206Q7Vt4mz6/wHp4= modernc.org/libc v1.55.3 h1:AzcW1mhlPNrRtjS5sS+eW2ISCgSOLLNyFzRh/V3Qj/U= modernc.org/libc v1.55.3/go.mod h1:qFXepLhz+JjFThQ4kzwzOjA/y/artDeg+pcYnY+Q83w= modernc.org/mathutil v1.6.0 h1:fRe9+AmYlaej+64JsEEhoWuAYBkOtQiMEU7n/XgfYi4= modernc.org/mathutil v1.6.0/go.mod h1:Ui5Q9q1TR2gFm0AQRqQUaBWFLAhQpCwNcuhBOSedWPo= modernc.org/memory v1.8.0 h1:IqGTL6eFMaDZZhEWwcREgeMXYwmW83LYW8cROZYkg+E= modernc.org/memory v1.8.0/go.mod h1:XPZ936zp5OMKGWPqbD3JShgd/ZoQ7899TUuQqxY+peU= modernc.org/sqlite v1.33.1 h1:trb6Z3YYoeM9eDL1O8do81kP+0ejv+YzgyFo+Gwy0nM= modernc.org/sqlite v1.33.1/go.mod h1:pXV2xHxhzXZsgT/RtTFAPY6JJDEvOTcTdwADQCCWD4k= modernc.org/strutil v1.2.0 h1:agBi9dp1I+eOnxXeiZawM8F4LawKv4NzGWSaLfyeNZA= modernc.org/strutil v1.2.0/go.mod h1:/mdcBmfOibveCTBxUl5B5l6W+TTH1FXPLHZE6bTosX0= modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y= modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= ================================================ FILE: examples/sbom-meta/main.go ================================================ package main import ( "context" "encoding/json" "flag" "fmt" "os" "sort" "strings" "time" "github.com/dustin/go-humanize" "github.com/fatih/color" "github.com/jedib0t/go-pretty/v6/table" "github.com/jmoiron/sqlx" _ "github.com/lib/pq" "github.com/anchore/syft/syft" "github.com/caarlos0/env" ) type config struct { Host string `env:"CHAI_DB_HOST" envDefault:"localhost"` User string `env:"CHAI_DB_USER" envDefault:"postgres"` Password string `env:"CHAI_DB_PASSWORD" envDefault:"s3cr3t"` Port int `env:"CHAI_DB_PORT" envDefault:"5435"` } type packageMeta struct { Name string `db:"name" json:"name"` Downloads int64 `db:"downloads" json:"downloads"` Dependents int64 `db:"dependents" json:"dependents,omitempty"` URL string `db:"url" json:"url"` FirstPublished time.Time `db:"first_published" json:"firstPublished"` LastPublished time.Time `db:"last_published" json:"lastPublished"` } const packageMetaFullSQL = ` SELECT p.name, count(d.id) AS dependents, sum(v.downloads) AS downloads, min(u.url) AS url, min(v.published_at) AS "first_published", max(v.published_at) AS "last_published" FROM packages AS p JOIN dependencies AS d ON d.dependency_id = p.id JOIN versions v ON v.package_id = p.id JOIN package_urls pu ON pu.package_id = p.id JOIN urls u ON u.id = pu.url_id JOIN url_types ut ON u.url_type_id = ut.id WHERE ut.name = 'repository' AND p.name = $1 GROUP BY p.name` const packageMetaSQL = ` SELECT p.name, sum(v.downloads) AS downloads, min(u.url) AS url, min(v.published_at) AS "first_published", max(v.published_at) AS "last_published" FROM packages AS p JOIN versions v ON v.package_id = p.id JOIN package_urls pu ON pu.package_id = p.id JOIN urls u ON u.id = pu.url_id JOIN url_types ut ON u.url_type_id = ut.id WHERE ut.name = 'repository' AND p.name = $1 GROUP BY p.name` func main() { var sourcePath string var cfg config var jsonFlag = flag.Bool("json", false, "Output JSON") var sortFlag = flag.String("sort", "published,asc", "Sort by field,asc|desc") flag.Usage = usage flag.Parse() args := flag.Args() err := env.Parse(&cfg) if err != nil { panic(err) } // use the current directory if no source path is specified switch len(args) { case 0: sourcePath = "." case 1: sourcePath = args[0] default: usage() os.Exit(1) } sortArg := strings.ToLower(*sortFlag) // connect to the chai db, defaulting to the docker-compose setup connStr := fmt.Sprintf("postgresql://%s:%s@%s:%d/chai?sslmode=disable", cfg.User, cfg.Password, cfg.Host, cfg.Port) // fmt.Printf("connecting to: %s\n", connStr) db, err := sqlx.Open("postgres", connStr) if err != nil { panic(err) } // use syft to get the sbom src, err := syft.GetSource(context.Background(), sourcePath, nil) if err != nil { panic(err) } sbom, err := syft.CreateSBOM(context.Background(), src, nil) if err != nil { panic(err) } pms := []packageMeta{} for p := range sbom.Artifacts.Packages.Enumerate() { rs := []packageMeta{} err = db.Select(&rs, packageMetaSQL, p.Name) if err != nil { panic(err) } for _, pm := range rs { pms = append(pms, pm) } } pms = dedupePackages(pms) sort.Slice(pms, func(i, j int) bool { switch sortArg { case "package", "package,asc": return pms[i].Name < pms[j].Name case "package,desc": return pms[i].Name > pms[j].Name case "repository", "repository,asc": return pms[i].URL < pms[j].URL case "repository,desc": return pms[i].URL > pms[j].URL case "published", "published,asc": return pms[i].LastPublished.After(pms[j].LastPublished) case "published,desc": return pms[i].LastPublished.Before(pms[j].LastPublished) case "downloads", "downloads,asc": return pms[i].Downloads < pms[j].Downloads case "downloads,desc": return pms[i].Downloads > pms[j].Downloads default: return pms[i].Name < pms[j].Name } }) if *jsonFlag { js, err := json.Marshal(pms) if err != nil { panic(err) } fmt.Printf("%s", js) } else { printPackagesMeta(pms) } } func printPackagesMeta(pms []packageMeta) { t := table.NewWriter() t.SetOutputMirror(os.Stdout) t.AppendHeader(table.Row{"Package", "Repository", "Published", "Downloads"}) t.SetColumnConfigs([]table.ColumnConfig{ {Name: "Package"}, {Name: "Repository"}, {Name: "Published", Transformer: formatTime}, {Name: "Downloads", Transformer: formatNumber}, }) for _, pm := range pms { p := color.New(color.FgHiGreen).Sprint(pm.Name) u := pm.URL t.Style().Options.DrawBorder = false t.AppendRow(table.Row{p, u, pm.LastPublished, pm.Downloads}) } t.Render() } func formatTime(val interface{}) string { if t, ok := val.(time.Time); ok { return humanize.Time(t) } return "Bad time format" } func formatNumber(val interface{}) string { if n, ok := val.(int64); ok { return humanize.Comma(n) } return "NaN" } func dedupePackages(pms []packageMeta) []packageMeta { pns := make(map[string]bool) dd := []packageMeta{} for _, pm := range pms { if _, v := pns[pm.Name]; !v { pns[pm.Name] = true dd = append(dd, pm) } } return dd } func usage() { fmt.Println("sbom-meta [SOURCE]") flag.PrintDefaults() } ================================================ FILE: examples/visualizer/README.md ================================================ # Visualizer An example Chai application that displays a graphical representation of a specific package. ## Requirements 1. [python]: version 3.11 2. [pip]: Ensure you have pip installed 3. [virtualenv]: It's recommended to use a virtual environment to manage dependencies ## Getting Started 1. Set up a virtual environment ```sh python -m venv venv source venv/bin/activate ``` 2. Install required packages ```sh pip install -r requirements.txt ``` 3. Ensure `CHAI_DATABASE_URL` is available as an environment variable. The default value from our docker config is below: ```sh export CHAI_DATABASE_URL=postgresql://postgres:s3cr3t@localhost:5432/chai ``` ## Usage 1. Start the [Chai DB](https://github.com/teaxyz/chai-oss) with `docker compose up`. 1. Run the visualizer: ```sh python main.py ``` ### Arguments - `--depth`: Maximum depth to go to. Default is `9999`, meaning all possible depths - `--profile`: Enable performance profiling. Default is `False`. ## Share your visuals If you create interesting visuals, share them on our [Discord]. Feel free to mess around and create alternate ways to generate them. [python]: https://www.python.org [pip]: https://pip.pypa.io/en/stable/installation/ [virtualenv]: https://virtualenv.pypa.io/en/latest/ [Discord]: https://discord.com/invite/tea-906608167901876256 ================================================ FILE: examples/visualizer/main.py ================================================ import argparse import cProfile import pstats from os import getenv from pstats import SortKey import psycopg2 import rustworkx as rx from rustworkx.visualization import graphviz_draw from tabulate import tabulate CHAI_DATABASE_URL = getenv("CHAI_DATABASE_URL") class Package: id: str name: str pagerank: float depth: int | None def __init__(self, id: str): self.id = id self.name = "" self.pagerank = 0 self.depth = None def __str__(self): return self.name class Graph(rx.PyDiGraph): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.node_index_map: dict[Package, int] = {} self._package_cache: dict[str, Package] = {} # The data model has IDs, but rustworkx uses indexes # Good news - it can index by object. So, we're just keeping track of that def _get_or_create_package(self, pkg_id: str) -> Package: """A cache to avoid creating the same package multiple times""" if pkg_id not in self._package_cache: pkg = Package(pkg_id) self._package_cache[pkg_id] = pkg return self._package_cache[pkg_id] def safely_add_node(self, pkg_id: str) -> int: """Adds a node to the graph if it doesn't already exist""" pkg = self._get_or_create_package(pkg_id) if pkg not in self.node_index_map: index = super().add_node(pkg) self.node_index_map[pkg] = index return index return self.node_index_map[pkg] def safely_add_nodes(self, nodes: list[str]) -> list[int]: return [self.safely_add_node(node) for node in nodes] def pagerank(self) -> None: pageranks = rx.pagerank(self) for index in self.node_indexes(): self[index].pagerank = pageranks[index] def nameless_nodes(self) -> list[str]: return [self[i].id for i in self.node_indexes() if self[i].name == ""] def max_depth(self) -> int: return max([self[i].depth for i in self.node_indexes()]) class DB: """Prepares the sql statements and connects to the database""" def __init__(self): self.connect() self.cursor.execute( "PREPARE select_id AS SELECT id FROM packages WHERE name = $1" ) self.cursor.execute( "PREPARE select_name AS SELECT id, name FROM packages WHERE id = ANY($1)" ) self.cursor.execute( "PREPARE select_deps AS \ SELECT DISTINCT p.id, p.name, d.dependency_id FROM packages p \ JOIN versions v ON p.id = v.package_id \ JOIN dependencies d ON v.id = d.version_id \ WHERE p.id = ANY($1)" ) def connect(self) -> None: if not CHAI_DATABASE_URL: raise RuntimeError("Environment variable CHAI_DATABASE_URL is not set.") try: self.conn = psycopg2.connect(CHAI_DATABASE_URL) self.cursor = self.conn.cursor() except psycopg2.OperationalError as e: raise RuntimeError(f"Failed to connect to the database: {e}") from e def select_id(self, package: str) -> int: self.cursor.execute("EXECUTE select_id (%s)", (package,)) return self.cursor.fetchone()[0] def select_deps(self, ids: list[str]) -> dict[str, dict[str, str | set[str]]]: # NOTE: this might be intense for larger package managers # NOTE: I have to cast the list to a uuid[] for psycopg2 to correctly handle it self.cursor.execute("EXECUTE select_deps (%s::uuid[])", (ids,)) flat = self.cursor.fetchall() # now, return this as a map capturing the package name and its dependencies result = {} for pkg_id, pkg_name, dep_id in flat: # add the package if it doesn't already exist in result if pkg_id not in result: result[pkg_id] = {"name": pkg_name, "dependencies": set()} # add the dependency to the dependencies set result[pkg_id]["dependencies"].add(dep_id) return result def select_name(self, ids: list[str]) -> list[tuple[str, str]]: self.cursor.execute("EXECUTE select_name (%s::uuid[])", (ids,)) return self.cursor.fetchall() def larger_query(db: DB, root_package: str, max_depth: int) -> Graph: graph = Graph() visited = set() leafs = set() # above sets will use the id of the package root_id = db.select_id(root_package) leafs.add(root_id) depth = 0 while leafs - visited: query = list(leafs - visited) dependencies = db.select_deps(query) # Increment the depth, and get out if too much depth += 1 if depth > max_depth: # Set the depth for the remaining leafs for pkg_id in query: i = graph.safely_add_node(pkg_id) graph[i].depth = depth break for pkg_id in query: i = graph.safely_add_node(pkg_id) # Have we encountered this node before? If not, set the depth if graph[i].depth is None: graph[i].depth = depth if pkg_id in dependencies: graph[i].name = dependencies[pkg_id]["name"] js = graph.safely_add_nodes(dependencies[pkg_id]["dependencies"]) edges = [(i, j, None) for j in js] graph.add_edges_from(edges) leafs.update(dependencies[pkg_id]["dependencies"]) visited.update(query) # Add the names for the packages that don't have dependencies nameless_nodes = graph.nameless_nodes() names = db.select_name(nameless_nodes) for pkg_id, pkg_name in names: i = graph.safely_add_node(pkg_id) graph[i].name = pkg_name return graph def display(graph: Graph): sorted_nodes = sorted(graph.node_indexes(), key=lambda x: graph[x].depth) headers = ["Package", "First Depth", "Dependencies", "Dependents", "Pagerank"] data = [] for node in sorted_nodes: data.append( [ graph[node], graph[node].depth, graph.out_degree(node), graph.in_degree(node), graph[node].pagerank, ] ) print(tabulate(data, headers=headers, floatfmt=".8f", intfmt=",")) def draw(graph: Graph, package: str, img_type: str = "svg"): ALLOWABLE_FILE_TYPES = ["svg", "png"] if img_type not in ALLOWABLE_FILE_TYPES: raise ValueError(f"file type must be one of {ALLOWABLE_FILE_TYPES}") max_depth = graph.max_depth() total_nodes = graph.num_nodes() total_edges = graph.num_edges() def depth_to_grayscale(depth: int) -> str: """Convert depth to a grayscale color.""" if depth == 1: return "red" return f"gray{depth + 10 + (depth - 1) // 9}" # Unused because I don't visualize edges def color_edge(edge): out_dict = { "color": "lightgrey", "fillcolor": "lightgrey", "penwidth": "0.05", "arrowsize": "0.05", "arrowhead": "tee", } return out_dict def color_node(node: Package): scale = 20 def label_nodes(node: Package): if node.pagerank > 0.01: return f"{node.name}" return "" def size_center_node(node: Package): if node.depth == 1: return "1" return str(node.pagerank * scale) out_dict = { "label": label_nodes(node), "fontsize": "5", "fontcolor": "gray", "fontname": "Menlo", "color": depth_to_grayscale(node.depth), "shape": "circle", "style": "filled", "fixedsize": "True", "width": size_center_node(node), "height": size_center_node(node), } return out_dict label = f"<{package} (big red dot)
depth: {max_depth}
nodes: {total_nodes!s}
edges: {total_edges!s}>" graph_attr = { "beautify": "True", "splines": "none", "overlap": "0", "label": label, "labelloc": "t", "labeljust": "l", "fontname": "Menlo", } graphviz_draw( graph, node_attr_fn=color_node, edge_attr_fn=color_edge, graph_attr=graph_attr, method="twopi", # NOTE: sfdp works as well filename=f"{package}.{img_type}", image_type=img_type, ) def latest(db: DB, package: str, depth: int, img_type: str): G = larger_query(db, package, depth) G.pagerank() display(G) draw(G, package, img_type) print("✅ Saved image") if __name__ == "__main__": db = DB() parser = argparse.ArgumentParser() parser.add_argument("package", help="The package to visualize", type=str) parser.add_argument( "--depth", help="Maximum depth to go to", type=int, default=9999 ) parser.add_argument( "--profile", help="Performance!", action="store_true", default=False ) parser.add_argument( "--image-type", help="The file type to save the image as", type=str, default="svg", ) args = parser.parse_args() package = args.package depth = args.depth profile = args.profile img_type = args.image_type if profile: profiler = cProfile.Profile() profiler.enable() latest(db, package, depth, img_type) if profile: profiler.disable() stats = pstats.Stats(profiler).sort_stats(SortKey.TIME) stats.print_stats() ================================================ FILE: examples/visualizer/monitor.py ================================================ import argparse import time from collections import defaultdict from collections.abc import Callable from functools import wraps from main import DB, latest METRICS: list[str] = [ "total_execution_time", "query_count", "total_query_time", "non_query_time", ] class Result: def __init__(self, **kwargs): for metric in self.METRICS: setattr(self, metric, kwargs[metric]) def __str__(self): return "\n".join( f"{metric}: {getattr(self, metric):.3f}s" if metric != "query_count" # I don't like this else f"{metric}: {getattr(self, metric)}" for metric in self.METRICS ) class MonitoredDB(DB): """Base monitoring wrapper for DB classes""" def __init__(self): self.query_count = 0 self.total_query_time = 0 super().__init__() def _monitor_query(self, func: Callable) -> Callable: @wraps(func) def wrapper(*args, **kwargs): self.query_count += 1 start_time = time.perf_counter() result = func(*args, **kwargs) self.total_query_time += time.perf_counter() - start_time return result return wrapper def connect(self): super().connect() # and wrap all the methods with monitoring for name in dir(self): if name.startswith("select_"): setattr(self, name, self._monitor_query(getattr(self, name))) def run_monitored(func: Callable, package: str) -> Result: """Run the main program with monitoring""" db = MonitoredDB() start_time = time.perf_counter() func(db, package) total_time = time.perf_counter() - start_time return Result( total_execution_time=total_time, query_count=db.query_count, total_query_time=db.total_query_time, non_query_time=total_time - db.total_query_time, ) def compare_implementations(package: str, runs: int = 3) -> dict[str, list[Result]]: """Compare old and new implementations""" implementations = [latest] results: dict[str, list[Result]] = defaultdict(list) for i in range(runs): print(f"\nRun {i + 1}/{runs}") for func in implementations: func_name = func.__name__ print(f"Running {func_name}...") result = run_monitored(func, package) results[func_name].append(result) return results def compare_results(results: dict[str, list[Result]], runs: int) -> None: implementations = list(results.keys()) print("\nResults Comparison:") print("-" * (25 + 20 * len(implementations))) # Header row with implementation names print(f"{'Metric':<25}", end="") for impl in implementations: print(f"{impl:>20}", end="") print() print("-" * (25 + 20 * len(implementations))) # Data rows for metric in Result.METRICS: print(f"{metric:<25}", end="") for impl in implementations: avg = sum(getattr(r, metric) for r in results[impl]) / runs if metric == "query_count": print(f"{avg:>20.0f}", end="") else: print(f"{avg:>20.3f}s", end="") print() # Calculate improvements relative to first implementation print("-" * (25 + 20 * len(implementations))) base_time = sum(r.total_execution_time for r in results[implementations[0]]) / runs for impl in implementations[1:]: new_time = sum(r.total_execution_time for r in results[impl]) / runs improvement = ((base_time - new_time) / base_time) * 100 print(f"Improvement ({impl} vs {implementations[0]}): {improvement:>+.1f}%") if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--package", help="The package to visualize") parser.add_argument( "--profile", help="Whether to profile the code", action="store_true", default=False, ) parser.add_argument("--runs", type=int, default=3, help="Number of runs to average") args = parser.parse_args() results = compare_implementations(args.package, args.runs) compare_results(results, args.runs) ================================================ FILE: package_managers/crates/Dockerfile ================================================ FROM ghcr.io/astral-sh/uv:python3.11-bookworm-slim # Copy everything from the root directory (build context) COPY . . # Install core requirements using uv WORKDIR /core RUN uv pip install --system -r requirements.txt WORKDIR / # Run the main application CMD ["python", "/package_managers/crates/main.py"] ================================================ FILE: package_managers/crates/README.md ================================================ # crates The crates service uses the database dump provided by crates.io and coerces their data model into CHAI's. It's containerized using Docker for easy deployment and consistency. It's also written in `python` as a first draft, and uses a lot of the [core tools](../../core/). ## Getting Started To just run the crates service, you can use the following commands: ```bash docker compose build crates docker compose run crates ``` ## Execution Steps The crates loader goes through the following steps when executed: 1. **Initialization**: The loader starts by initializing the configuration and database connection using `Config` and `CratesDB`. 2. **Fetching**: If the `FETCH` flag is set to true, the loader downloads the latest cargo data from the source using `TarballFetcher`. If needed, it saves to disk. 3. **Transformation**: The downloaded data is parsed and transformed using `CratesTransformer.parse()` into a format compatible with the CHAI database schema. 4. **Deletion**: The loader identifies crates that exist in the database but are no longer in the registry (crates.io allows deletion _sometimes_). 5. **Cache Building**: The loader builds a cache by setting the current graph and URLs from the database, then creates a `Cache` object for efficient diffing. 6. **Diff Process**: The loader performs a diff operation to categorize data into: - New packages vs updated packages - New URLs vs existing URLs - New package URLs vs updated package URLs - New dependencies vs removed dependencies 7. **Data Ingestion**: All categorized data is loaded into the database via a single `db.ingest()` call. The main execution logic is in the `main` function in [main.py](main.py): ```python def main(config: Config, db: CratesDB): logger = Logger("crates_main") logger.log("Starting crates_main") # fetch, write, transform if config.exec_config.fetch: fetcher = TarballFetcher(...) files = fetcher.fetch() if not config.exec_config.no_cache: fetcher.write(files) transformer = CratesTransformer(config) transformer.parse() # identify and handle deletions deletions = identify_deletions(transformer, db) if deletions: db.delete_packages_by_import_id(deletions) # build cache and diff db.set_current_graph() db.set_current_urls(crates_urls) cache = Cache(...) # perform diff and ingest diff = Diff(config, cache) # ... diff process ... db.ingest(new_packages, final_new_urls, new_package_urls, new_deps, removed_deps, updated_packages, updated_package_urls) ``` ### Configuration Flags The crates loader supports several configuration flags: - `DEBUG`: Enables debug logging when set to true. - `TEST`: Runs the loader in test mode when set to true, skipping certain data insertions. - `FETCH`: Determines whether to fetch new data from the source when set to true. - `FREQUENCY`: Sets how often (in hours) the pipeline should run. - `NO_CACHE`: When set to true, deletes temporary files after processing. These flags can be set in the `docker-compose.yml` file: ```yaml crates: build: context: . dockerfile: ./package_managers/crates/Dockerfile environment: - CHAI_DATABASE_URL=postgresql://postgres:s3cr3t@db:5432/chai - PYTHONPATH=/ - DEBUG=${DEBUG:-false} - TEST=${TEST:-false} - FETCH=${FETCH:-true} - FREQUENCY=${FREQUENCY:-24} - NO_CACHE=${NO_CACHE:-false} ``` ## TODOs - [ ] `versions.csv` contains all the `published_by` ids, who are the users, who'd need to be loaded as well - [ ] `versions.csv` also contains licenses ================================================ FILE: package_managers/crates/db.py ================================================ from uuid import UUID from sqlalchemy import select from core.config import Config from core.db import DB from core.models import ( CanonPackage, DependsOn, LegacyDependency, Package, PackageURL, UserPackage, UserVersion, Version, ) from core.structs import CurrentGraph, CurrentURLs class CratesDB(DB): def __init__(self, config: Config): super().__init__("crates_db") self.config = config # self.set_current_graph() def set_current_graph(self) -> None: self.graph: CurrentGraph = self.current_graph(self.config.pm_config.pm_id) def set_current_urls(self, urls: set[str]) -> None: self.urls: CurrentURLs = self.current_urls(urls) def delete_packages_by_import_id(self, import_ids: set[int]) -> None: """ Delete packages identified by import_ids and all their dependent records. This is a DB class method to handle the cascade deletion properly. """ # Convert import_ids to package_ids using the cache package_ids: list[UUID] = [] for import_id in import_ids: pkg_id = self.import_id_map.get(str(import_id)) if pkg_id: package_ids.append(pkg_id) if not package_ids: self.logger.debug("No packages found to delete") return self.logger.debug(f"Deleting {len(package_ids)} crates completely") # Delete records in reverse dependency order with self.session() as session: try: # 1. Delete PackageURLs package_urls_deleted = ( session.query(PackageURL) .filter(PackageURL.package_id.in_(package_ids)) .delete(synchronize_session=False) ) # 2. Delete CanonPackages canon_packages_deleted = ( session.query(CanonPackage) .filter(CanonPackage.package_id.in_(package_ids)) .delete(synchronize_session=False) ) # 3. Delete UserPackages user_packages_deleted = ( session.query(UserPackage) .filter(UserPackage.package_id.in_(package_ids)) .delete(synchronize_session=False) ) # 4. Delete LegacyDependencies (both package_id and dependency_id) legacy_deps_package_deleted = ( session.query(LegacyDependency) .filter(LegacyDependency.package_id.in_(package_ids)) .delete(synchronize_session=False) ) legacy_deps_dependency_deleted = ( session.query(LegacyDependency) .filter(LegacyDependency.dependency_id.in_(package_ids)) .delete(synchronize_session=False) ) # TODO: this table is deprecated, but still contains records # we can remove this line, once all indexers use LegacyDependencies # 5. Delete DependsOn where dependency_id is in package_ids depends_on_deleted = ( session.query(DependsOn) .filter(DependsOn.dependency_id.in_(package_ids)) .delete(synchronize_session=False) ) # 6. Delete Versions and their dependencies # TODO: remove this line once all indexers stop using Versions and # we can truncate this table # First get all version ids for these packages version_ids = [ vid for (vid,) in session.query(Version.id).filter( Version.package_id.in_(package_ids) ) ] # Delete dependencies attached to these versions version_deps_deleted = 0 user_versions_deleted = 0 if version_ids: version_deps_deleted = ( session.query(DependsOn) .filter(DependsOn.version_id.in_(version_ids)) .delete(synchronize_session=False) ) user_versions_deleted = ( session.query(UserVersion) .filter(UserVersion.version_id.in_(version_ids)) .delete(synchronize_session=False) ) # Now delete the versions versions_deleted = ( session.query(Version) .filter(Version.package_id.in_(package_ids)) .delete(synchronize_session=False) ) # 7. Finally delete the packages packages_deleted = ( session.query(Package) .filter(Package.id.in_(package_ids)) .delete(synchronize_session=False) ) self.logger.debug("-" * 100) self.logger.debug("Going to commit delete for") self.logger.debug(f"{packages_deleted} packages") self.logger.debug(f"{versions_deleted} versions") self.logger.debug(f"{version_deps_deleted} version dependencies") self.logger.debug(f"{user_versions_deleted} user versions") self.logger.debug(f"{depends_on_deleted} direct dependencies") self.logger.debug( f"{legacy_deps_package_deleted + legacy_deps_dependency_deleted} legacy deps" # E501 ) self.logger.debug(f"{user_packages_deleted} user packages") self.logger.debug(f"{canon_packages_deleted} canon packages") self.logger.debug(f"{package_urls_deleted} package URLs") self.logger.debug("-" * 100) # Commit the transaction session.commit() except Exception as e: session.rollback() self.logger.error(f"Error deleting packages: {e}") raise def get_cargo_id_to_chai_id(self) -> dict[str, UUID]: """ Returns a map of cargo import_ids to chai_ids """ with self.session() as session: stmt = select(Package.import_id, Package.id).where( Package.package_manager_id == self.config.pm_config.pm_id ) self.import_id_map: dict[str, UUID] = { row[0]: row[1] for row in session.execute(stmt).all() } return self.import_id_map ================================================ FILE: package_managers/crates/diff.py ================================================ from datetime import datetime from uuid import UUID, uuid4 from core.config import Config from core.logger import Logger from core.models import URL, LegacyDependency, Package, PackageURL from core.structs import Cache, URLKey from package_managers.crates.structs import Crate, DependencyType class Diff: def __init__(self, config: Config, caches: Cache): self.config = config self.now = datetime.now() self.caches = caches self.logger = Logger("crates_diff") def diff_pkg(self, pkg: Crate) -> tuple[UUID, Package | None, dict | None]: """ Checks if the given pkg is in the package_cache. Returns: pkg_id: UUID, the id of the package in the db pkg_obj: Package | None, the package object if it's new update_payload: dict | None, the update payload if it's an update """ pkg_id: UUID crate_id: str = str(pkg.id) # import_ids are strings in the db if crate_id not in self.caches.package_map: # new package p = Package( id=uuid4(), derived_id=f"crates/{pkg.name}", name=pkg.name, package_manager_id=self.config.pm_config.pm_id, import_id=crate_id, readme=pkg.readme, created_at=self.now, updated_at=self.now, ) pkg_id = p.id return pkg_id, p, {} else: # it's in the cache, so check for changes p = self.caches.package_map[crate_id] pkg_id = p.id # check for changes # right now, that's just the readme if p.readme != pkg.readme: return ( pkg_id, None, {"id": p.id, "readme": pkg.readme, "updated_at": self.now}, ) else: # existing package, no change return pkg_id, None, None def diff_url(self, pkg: Crate, new_urls: dict[URLKey, URL]) -> dict[UUID, UUID]: """ Identifies the correct URL for this crate, based on fetched data and all URL strings collected so far Returns: resolved_urls: dict[UUID, UUID], the resolved URL for this crate """ resolved_urls: dict[UUID, UUID] = {} urls: list[URLKey] = [ URLKey(pkg.homepage, self.config.url_types.homepage), URLKey(pkg.repository, self.config.url_types.repository), URLKey(pkg.documentation, self.config.url_types.documentation), ] + ([URLKey(pkg.source, self.config.url_types.source)] if pkg.source else []) for url_key in urls: url = url_key.url url_type = url_key.url_type_id # guard: no URL if not url: continue resolved_url_id: UUID if url_key in new_urls: # if we've already tried to create this URL, use that one resolved_url_id = new_urls[url_key].id elif url_key in self.caches.url_map: # if it's already in the database, let's use that one resolved_url_id = self.caches.url_map[url_key].id else: # most will be here because it's the first run of clean data new_url = URL( id=uuid4(), url=url, url_type_id=url_type, created_at=self.now, updated_at=self.now, ) resolved_url_id = new_url.id # NOTE: THIS IS SUPER IMPORTANT # we're adding to new_urls here, not just in main new_urls[url_key] = new_url resolved_urls[url_type] = resolved_url_id return resolved_urls def diff_pkg_url( self, pkg_id: UUID, resolved_urls: dict[UUID, UUID] ) -> tuple[list[PackageURL], list[dict]]: """Takes in a package_id and resolved URLs from diff_url, and generates new PackageURL objects as well as a list of changes to existing ones Inputs: - pkg_id: the id of the package - resolved_urls: a map of url types to final URL ID for this pkg Outputs: - new_package_urls: a list of new PackageURL objects - updated_package_urls: a list of changes to existing PackageURL objects TODO: - We're updating every single package_url entity, which takes time. We should check if the latest URL has changed, and if so, only update that one. """ new_links: list[PackageURL] = [] updates: list[dict] = [] # what are the existing links? existing: set[UUID] = { pu.url_id for pu in self.caches.package_urls.get(pkg_id, set()) } # for the correct URL type / URL for this package: for _url_type, url_id in resolved_urls.items(): if url_id not in existing: # new link! new_links.append( PackageURL( id=uuid4(), package_id=pkg_id, url_id=url_id, created_at=self.now, updated_at=self.now, ) ) else: # TODO: this should only happen for `latest` URLs # there is an existing link between this URL and this package # let's find it existing_pu = next( pu for pu in self.caches.package_urls[pkg_id] if pu.url_id == url_id ) existing_pu.updated_at = self.now updates.append({"id": existing_pu.id, "updated_at": self.now}) return new_links, updates def diff_deps( self, pkg: Crate ) -> tuple[list[LegacyDependency], list[LegacyDependency]]: """ Identifies new and removed dependencies for a given crate The process is: 1. Build a view of what the package's dependencies are according to the crates.io database. 2. Get this crate's Package ID from CHAI 3. Get this crate's existing dependencies from CHAI 4. Compare the two sets, and identify new and removed dependencies Note: The database has a unique constraint on (package_id, dependency_id), so if a package depends on the same dependency with multiple types (e.g., both runtime and build), we choose the highest priority type: NORMAL (runtime) > BUILD > DEV Returns: new_deps: list[LegacyDependency], the new dependencies removed_deps: list[LegacyDependency], the removed dependencies """ new_deps: list[LegacyDependency] = [] removed_deps: list[LegacyDependency] = [] # First, collect all dependencies and deduplicate by (package_id, dependency_id) # choosing the highest priority dependency type for each unique dependency dependency_map: dict[UUID, DependencyType] = {} # Priority order: NORMAL (runtime) > BUILD > DEV priority_order = { DependencyType.NORMAL: 1, DependencyType.BUILD: 2, DependencyType.DEV: 3, } # Build the map of dependencies, keeping only the highest priority type if pkg.latest_version: for dependency in pkg.latest_version.dependencies: dep_crate_id: str = str(dependency.dependency_id) dep_type: DependencyType = dependency.dependency_type # guard: no dep_id if not dep_crate_id: raise ValueError(f"No dep_id for {dependency}") # guard: no dep_type if dep_type is None: raise ValueError(f"No dep_type for {dependency}") # get the ID from the cache dependency_pkg = self.caches.package_map.get(dep_crate_id) # if we don't have the dependency, skip it for now if not dependency_pkg: self.logger.debug( f"{dep_crate_id}, dependency of {pkg.name} is new" ) continue dependency_id = dependency_pkg.id # If this dependency already exists in our map, choose higher priority if dependency_id in dependency_map: existing_priority = priority_order.get( dependency_map[dependency_id], 999 ) new_priority = priority_order.get(dep_type, 999) if ( new_priority < existing_priority ): # Lower number = higher priority old_type = dependency_map[dependency_id] dependency_map[dependency_id] = dep_type self.logger.debug( f"Updated dependency type for {dep_crate_id} from " f"{old_type} to {dep_type} (higher priority)" ) else: dependency_map[dependency_id] = dep_type # Now build the actual set of dependencies with resolved types actual: set[tuple[UUID, UUID]] = set() for dependency_id, dep_type in dependency_map.items(): # figure out the dependency type UUID dependency_type = self._resolve_dep_type(dep_type) # add it to the set of actual dependencies actual.add((dependency_id, dependency_type)) # establish the package that we are working with crate_id: str = str(pkg.id) package = self.caches.package_map.get(crate_id) if not package: # TODO: handle this case, though it fixes itself on the next run self.logger.debug(f"New package {pkg.name}, will grab its deps next time") return [], [] pkg_id: UUID = package.id # what are its existing dependencies? # specifically, existing dependencies IN THE SAME STRUCTURE as `actual`, # so we can do an easy comparison existing: set[tuple[UUID, UUID]] = { (dep.dependency_id, dep.dependency_type_id) for dep in self.caches.dependencies.get(pkg_id, set()) } # we have two sets! # actual minus existing = new_deps # existing minus actual = removed_deps new = actual - existing removed = existing - actual new_deps: list[LegacyDependency] = [ LegacyDependency( # don't include the ID because it's a sequence for this table package_id=pkg_id, dependency_id=dep[0], dependency_type_id=dep[1], created_at=self.now, updated_at=self.now, ) for dep in new ] # get the existing legacy dependency, and add it to removed_deps removed_deps: list[LegacyDependency] = [] cache_deps: set[LegacyDependency] = self.caches.dependencies.get(pkg_id, set()) for removed_dep_id, removed_dep_type in removed: try: existing_dep = next( dep for dep in cache_deps if dep.dependency_id == removed_dep_id and dep.dependency_type_id == removed_dep_type ) removed_deps.append(existing_dep) except StopIteration as exc: cache_deps_str = "\n".join( [ f"{dep.dependency_id} / {dep.dependency_type_id}" for dep in cache_deps ] ) raise ValueError( f"Removing {removed_dep_id} / {removed_dep_type} for {pkg_id} but not in Cache: \n{cache_deps_str}" ) from exc return new_deps, removed_deps def _resolve_dep_type(self, dep_type: DependencyType) -> UUID: """ Resolves the dependency type UUID from the config """ if dep_type == DependencyType.NORMAL: return self.config.dependency_types.runtime elif dep_type == DependencyType.BUILD: return self.config.dependency_types.build elif dep_type == DependencyType.DEV: return self.config.dependency_types.development else: raise ValueError(f"Unknown dependency type: {dep_type}") ================================================ FILE: package_managers/crates/main.py ================================================ from uuid import UUID from core.config import Config, PackageManager from core.fetcher import TarballFetcher from core.logger import Logger from core.models import ( URL, LegacyDependency, Package, PackageURL, ) from core.structs import Cache, URLKey from package_managers.crates.db import CratesDB from package_managers.crates.diff import Diff from package_managers.crates.transformer import CratesTransformer def identify_deletions(transformer: CratesTransformer, db: CratesDB) -> set[int]: """ Identifies crates that are in the db but not in the transformer Cargo enables deletion of crates from the registry, if: - the crate has been published for less than 72 hours - the crate only has a single owner - the crate has been downloaded less than 500 times for each month it has been - the crate is not depended upon by any other crate on crates.io The risk is that the namespace for an invalid import_id is now available, and might be taken by a new crate, which would violate our uniqueness constraint on derived_id Returns: - a set of import_ids that are in the db but not in the transformer References: - https://crates.io/policies - https://rurust.github.io/cargo-docs-ru/policies.html """ logger = Logger("crates_identify_deletions") # db needs to know the cargo id to chai id cargo_id_to_chai_id: dict[str, UUID] = db.get_cargo_id_to_chai_id() transformer_import_ids: set[int] = {int(c.id) for c in transformer.crates.values()} db_import_ids: set[int] = {int(p) for p in cargo_id_to_chai_id} # calculate deletions deletions: set[int] = db_import_ids - transformer_import_ids if deletions: logger.warn( f"There are {len(deletions)} crates in the db but not in the registry" ) return deletions def main(config: Config, db: CratesDB): logger = Logger("crates_main") logger.log("Starting crates_main") # fetch the files from cargo if config.exec_config.fetch: fetcher: TarballFetcher = TarballFetcher( "crates", str(config.pm_config.source), config.exec_config.no_cache, config.exec_config.test, ) files = fetcher.fetch() logger.log(f"Fetched {len(files)} files") # write the files to disk if not config.exec_config.fetch and not config.exec_config.no_cache: fetcher.write(files) logger.log("Wrote files to disk") # transform the files into a list of crates transformer = CratesTransformer(config) transformer.parse() logger.log(f"Parsed {len(transformer.crates)} crates") # identify crates we need to delete from CHAI because they are no longer on cargo deletions = identify_deletions(transformer, db) logger.log(f"Identified {len(deletions)} crates to delete") if deletions: db.delete_packages_by_import_id(deletions) logger.log(f"Deleted {len(deletions)} crates") # to build the cache, we need the graph object from the db and the URLs db.set_current_graph() crates_urls: set[str] = set() for crate in transformer.crates.values(): crates_urls.add(crate.homepage) crates_urls.add(crate.repository) crates_urls.add(crate.documentation) db.set_current_urls(crates_urls) cache = Cache( db.graph.package_map, db.urls.url_map, db.urls.package_urls, db.graph.dependencies, ) logger.log("Built cache") # now, we can do the diff new_packages: list[Package] = [] updated_packages: list[dict] = [] new_urls: dict[URLKey, URL] = {} new_package_urls: list[PackageURL] = [] updated_package_urls: list[dict] = [] new_deps: list[LegacyDependency] = [] removed_deps: list[LegacyDependency] = [] diff = Diff(config, cache) for pkg in transformer.crates.values(): pkg_id, pkg_obj, update_payload = diff.diff_pkg(pkg) if pkg_obj: new_packages.append(pkg_obj) if update_payload: updated_packages.append(update_payload) # URLs resolved_urls = diff.diff_url(pkg, new_urls) # package URLs new_links, updated_links = diff.diff_pkg_url(pkg_id, resolved_urls) if new_links: new_package_urls.extend(new_links) if updated_links: updated_package_urls.extend(updated_links) # finally, dependencies new_dependencies, removed_dependencies = diff.diff_deps(pkg) if new_dependencies: new_deps.extend(new_dependencies) if removed_dependencies: removed_deps.extend(removed_dependencies) logger.log(f"Diffed {len(transformer.crates)} crates!") # make new_urls a list of new URLs final_new_urls = list(new_urls.values()) db.ingest( new_packages, final_new_urls, new_package_urls, new_deps, removed_deps, updated_packages, updated_package_urls, ) logger.log("✅ Done") if __name__ == "__main__": config = Config(PackageManager.CRATES) db = CratesDB(config) main(config, db) ================================================ FILE: package_managers/crates/structs.py ================================================ from dataclasses import dataclass, field from datetime import datetime from enum import IntEnum from typing import TypedDict from uuid import UUID class DependencyType(IntEnum): """ The kind of dependency from the crates.io database - NORMAL: normal dependency (default) - BUILD: build dependency (used for build scripts) - DEV: dev dependency (used for testing or benchmarking) Resources: - https://doc.rust-lang.org/cargo/reference/specifying-dependencies.html """ NORMAL = 0 BUILD = 1 # used for build scripts DEV = 2 # used for testing or benchmarking OPTIONAL = 3 def __str__(self): return self.name.lower() @dataclass class CrateDependency: crate_id: int dependency_id: int dependency_type: DependencyType # kind semver_range: str # req @dataclass class CrateUser: # from users.csv or teams.csv id: int name: str | None = None github_username: str | None = None @dataclass class CrateLatestVersion: # latest version ID is from default_versions.csv # data is from versions.csv id: int checksum: str downloads: int license: str num: str published_at: datetime published_by: CrateUser | None = None # dependencies.csv dependencies: list[CrateDependency] = field(default_factory=list) @dataclass class Crate: # from crates.csv id: int name: str readme: str homepage: str repository: str documentation: str source: str | None = None # from versions.csv latest_version: CrateLatestVersion | None = None class CanonUpdatePayload(TypedDict): """Type-safe structure for canon update operations.""" id: UUID name: str updated_at: datetime class CanonPackageUpdatePayload(TypedDict): """Type-safe structure for canon package update operations.""" id: UUID canon_id: UUID updated_at: datetime ================================================ FILE: package_managers/crates/transformer.py ================================================ import csv from collections.abc import Generator from core.config import Config from core.transformer import Transformer from core.utils import is_github_url from package_managers.crates.structs import ( Crate, CrateDependency, CrateLatestVersion, CrateUser, DependencyType, ) class CratesTransformer(Transformer): def __init__(self, config: Config): super().__init__("crates") self.config = config self.crates: dict[int, Crate] = {} # files we need to parse self.files: dict[str, str] = { "crates": "crates.csv", "latest_versions": "default_versions.csv", "versions": "versions.csv", "dependencies": "dependencies.csv", "users": "users.csv", "teams": "teams.csv", } def _open_csv(self, file_name: str) -> Generator[dict[str, str], None, None]: try: file_path = self.finder(self.files[file_name]) with open(file_path, newline="", encoding="utf-8") as f: reader = csv.DictReader(f) yield from reader except KeyError as exc: raise KeyError( f"Missing {file_name} from self.files: {self.files}" ) from exc except FileNotFoundError as exc: self.logger.error(f"Missing {file_path} from data directory") raise FileNotFoundError(f"Missing {file_path} file") from exc except Exception as e: self.logger.error(f"Error reading {file_path}: {e}") raise e def parse(self) -> None: # first go through crates.csv to # here, we can get the import_id, name, homepage, documentation, repository # and also source, from repo if it is like GitHub for row in self._open_csv("crates"): crate_id = int(row["id"]) name = row["name"] readme = row["readme"] # URLs: homepage = self.canonicalize(row["homepage"]) documentation = self.canonicalize(row["documentation"]) repository = self.canonicalize(row["repository"]) source: str | None = None if is_github_url(repository): source = repository crate = Crate( crate_id, name, readme, homepage, repository, documentation, source ) self.crates[crate_id] = crate self.logger.log(f"Parsed {len(self.crates)} crates") # populate the map of crate_id to latest_version_id & all latest_version_ids latest_versions: set[int] latest_versions_map: dict[int, int] latest_versions, latest_versions_map = self._load_latest_versions() self.logger.log(f"Loaded {len(latest_versions)} latest versions") # also build the map of user_id to CrateUser object users: dict[int, CrateUser] = self._load_users() self.logger.log(f"Loaded {len(users)} users") # now, iterate through the versions.csv, and populate LatestVersion objects, # only if the version_id is in the latest_versions set for row in self._open_csv("versions"): version_id = int(row["id"]) crate_id = int(row["crate_id"]) # ignore if this version is not the latest if version_id not in latest_versions: continue checksum = row["checksum"] downloads = int(row["downloads"]) license = row["license"] num = row["num"] published_at = row["created_at"] # make a CrateUser object from the published_by published_by = row["published_by"] published_by_user: CrateUser | None = ( users[int(published_by)] if published_by else None ) latest_version = CrateLatestVersion( version_id, checksum, downloads, license, num, published_at, published_by_user, ) # map this LatestVersion to the crate in self.crates self.crates[crate_id].latest_version = latest_version self.logger.log("Parsed the latest versions for each crate") # finally, parse through the dependencies.csv # again, we only care about the dependencies for the latest version for row in self._open_csv("dependencies"): start_id = int(row["version_id"]) # ignore if this version is not the latest if start_id not in latest_versions: continue # map both ids to crates end_crate_id = int(row["crate_id"]) start_crate_id = int(latest_versions_map[start_id]) # guard if start_crate_id not in self.crates: raise ValueError(f"Crate {start_crate_id} not found in self.crates") kind = int(row["kind"]) # guard if kind not in [0, 1, 2]: raise ValueError(f"Unknown dependency kind: {kind}") dependency_type = DependencyType(kind) semver = row["req"] dependency = CrateDependency( start_crate_id, end_crate_id, dependency_type, semver ) # add this dependency to the crate self.crates[start_crate_id].latest_version.dependencies.append(dependency) self.logger.log("Parsed the dependencies for each crate") def _load_latest_versions(self) -> tuple[set[int], dict[int, int]]: latest_versions: set[int] = set() latest_versions_map: dict[int, int] = {} for row in self._open_csv("latest_versions"): crate_id = int(row["crate_id"]) version_id = int(row["version_id"]) latest_versions.add(version_id) latest_versions_map[version_id] = crate_id return latest_versions, latest_versions_map def _load_users(self) -> dict[int, CrateUser]: users: dict[int, CrateUser] = {} for row in self._open_csv("users"): user_id = int(row["id"]) name = row["name"] github_username = row["gh_login"] user = CrateUser(user_id, name, github_username) users[user_id] = user return users ================================================ FILE: package_managers/debian/Dockerfile ================================================ FROM ghcr.io/astral-sh/uv:python3.11-bookworm-slim # Copy everything COPY . . # Install core requirements using uv WORKDIR /core RUN uv pip install --system -r requirements.txt WORKDIR / # Run the main application CMD ["python", "/package_managers/debian/main.py"] ================================================ FILE: package_managers/debian/README.md ================================================ # Debian ## Data Structure - Source represents the original upstream as Debian receives - Package is a binary that users can install - Sources can specify multiple binaries - All packages need not specify a source (transitory or virtual packages) ## Scripts - `investigate_sources.py` can be run on the downloaded data dump from Debian, and prints information about the data integrity ## Approach There is a many to one mapping between Packages and Sources. During the load step, we populate the map between Packages and Sources (as in @investigate_sources), because information about a Debian package can be fetched from both data sources. While the parser currently captures all the information for each Package and Source (keep as-is), we only end up loading the following information for a package from each source: Source: - Vcs-Browser => URL, PackageURL - Vcs-Git => URL, PackageURL - Build-Depends => LegacyDependency - Maintainer => User, UserPackage - Uploaders => User, UserPackage Package: - Depends => LegacyDependency - Pre-Depends => LegacyDependency - Description => Package - Homepage => URL, PackageURL ================================================ FILE: package_managers/debian/db.py ================================================ #!/usr/bin/env pkgx uv run from core.config import Config from core.db import DB, CurrentURLs from core.structs import CurrentGraph, DiffResult class DebianDB(DB): def __init__(self, logger_name: str, config: Config): super().__init__(logger_name) self.config = config def set_current_graph(self) -> None: """Get the debian packages and dependencies""" self.graph: CurrentGraph = self.current_graph(self.config.pm_config.pm_id) def set_current_urls(self, urls: set[str]) -> None: """Getting all the URLs and Package URLs from the database""" self.urls: CurrentURLs = self.current_urls(urls) def ingest_wrapper(self, diff_result: DiffResult) -> None: """Wrapper for the main ingest function to handle DiffResult""" final_new_urls = list(diff_result.new_urls.values()) self.ingest( diff_result.new_packages, final_new_urls, diff_result.new_package_urls, diff_result.new_deps, diff_result.removed_deps, diff_result.updated_packages, diff_result.updated_package_urls, ) ================================================ FILE: package_managers/debian/debian_sources.py ================================================ from core.logger import Logger from package_managers.debian.parser import DebianParser from package_managers.debian.structs import DebianData def build_package_to_source_mapping( sources_file_path: str, logger: Logger ) -> dict[str, DebianData]: """ Build a mapping from binary package names to their source information. Args: sources_file_path: Path to the sources file test: Whether to limit parsing for testing Returns: Dictionary mapping binary package names to source DebianData objects """ # Parse sources file with open(sources_file_path) as f: sources_content = f.read() sources_parser = DebianParser(sources_content) # Build mapping: binary_package_name -> source_debian_data package_to_source: dict[str, DebianData] = {} for source_data in sources_parser.parse(): # Each source may produce multiple binary packages if source_data.binary: # Source has explicit binary list for binary_name in source_data.binary: binary_name = binary_name.strip() if binary_name: package_to_source[binary_name] = source_data else: # No explicit binary list, assume source name == binary name if source_data.package: package_to_source[source_data.package] = source_data logger.log( f"Built mapping for {len(package_to_source)} binary packages from sources" ) return package_to_source def enrich_package_with_source( package_data: DebianData, source_mapping: dict[str, DebianData], logger: Logger ) -> DebianData: """ Enrich a package with its corresponding source information. Args: package_data: The package data from packages file source_mapping: Mapping from package names to source data Returns: Enriched DebianData with both package and source information """ # Start with the package data enriched = package_data # Determine source name binary_name = package_data.package # Look up source information if binary_name in source_mapping: source_data = source_mapping[binary_name] # Enrich package with source information # Only add source fields that aren't already populated if not enriched.vcs_browser and source_data.vcs_browser: enriched.vcs_browser = source_data.vcs_browser if not enriched.vcs_git and source_data.vcs_git: enriched.vcs_git = source_data.vcs_git if not enriched.directory and source_data.directory: enriched.directory = source_data.directory if not enriched.build_depends and source_data.build_depends: enriched.build_depends = source_data.build_depends if not enriched.homepage and source_data.homepage: enriched.homepage = source_data.homepage else: # Log warning for missing source source_name = package_data.source or package_data.package logger.warn( f"Binary '{binary_name}' of source '{source_name}' was not found in sources file" ) return enriched ================================================ FILE: package_managers/debian/diff.py ================================================ #!/usr/bin/env pkgx uv run from datetime import datetime from uuid import UUID, uuid4 from core.config import Config from core.logger import Logger from core.models import URL, LegacyDependency, Package, PackageURL from core.structs import Cache, URLKey from core.utils import is_github_url from package_managers.debian.db import DebianDB from package_managers.debian.parser import DebianData from package_managers.debian.structs import Depends class DebianDiff: def __init__(self, config: Config, caches: Cache, db: DebianDB, logger: Logger): self.config = config self.now = datetime.now() self.caches = caches self.db = db self.logger = logger def diff_pkg( self, import_id: str, debian_data: DebianData ) -> tuple[UUID, Package | None, dict | None]: """ Checks if the given package is in the package_cache. Returns: - pkg_id: the id of the package - package: If new, returns a new package object. If existing, returns None - changes: a dictionary of changes (description updates) """ self.logger.debug(f"Diffing package: {import_id}") if import_id not in self.caches.package_map: # new package name = import_id.split("/")[1] p = Package( id=uuid4(), derived_id=import_id, name=name, package_manager_id=self.config.pm_config.pm_id, import_id=import_id, readme=debian_data.description, created_at=self.now, updated_at=self.now, ) pkg_id: UUID = p.id return pkg_id, p, {} else: # the package exists, check if description has changed existing_pkg = self.caches.package_map[import_id] pkg_id = existing_pkg.id # Check if description (readme) has changed if existing_pkg.readme != debian_data.description: update_payload = { "id": pkg_id, "readme": debian_data.description, "updated_at": self.now, } return pkg_id, None, update_payload else: return pkg_id, None, None def diff_url( self, import_id: str, debian_data: DebianData, new_urls: dict[URLKey, URL] ) -> dict[UUID, UUID]: """Given a package's URLs, returns the resolved URL for this specific package""" resolved_urls: dict[UUID, UUID] = {} # Generate the URLs for this package urls = self._generate_chai_urls(debian_data) # Process each URL for url_key in urls: # guard: _generate_chai_urls could be None for a url type if url_key is None: continue resolved_url_id: UUID if url_key in new_urls: resolved_url_id = new_urls[url_key].id elif url_key in self.caches.url_map: resolved_url_id = self.caches.url_map[url_key].id else: self.logger.debug( f"URL {url_key.url} as {url_key.url_type_id} is entirely new" ) new_url = URL( id=uuid4(), url=url_key.url, url_type_id=url_key.url_type_id, created_at=self.now, updated_at=self.now, ) resolved_url_id = new_url.id new_urls[url_key] = new_url resolved_urls[url_key.url_type_id] = resolved_url_id return resolved_urls def diff_pkg_url( self, pkg_id: UUID, resolved_urls: dict[UUID, UUID] ) -> tuple[list[PackageURL], list[dict]]: """Takes in a package_id and resolved URLs from diff_url, and generates new PackageURL objects as well as a list of changes to existing ones""" new_links: list[PackageURL] = [] updates: list[dict] = [] # what are the existing links? existing: set[UUID] = { pu.url_id for pu in self.caches.package_urls.get(pkg_id, set()) } # for each URL type/URL for this package: for _url_type, url_id in resolved_urls.items(): if url_id not in existing: # new link! new_links.append( PackageURL( id=uuid4(), package_id=pkg_id, url_id=url_id, created_at=self.now, updated_at=self.now, ) ) else: # existing link - update timestamp existing_pu = next( pu for pu in self.caches.package_urls[pkg_id] if pu.url_id == url_id ) existing_pu.updated_at = self.now updates.append({"id": existing_pu.id, "updated_at": self.now}) return new_links, updates def diff_deps( self, import_id: str, debian_data: DebianData ) -> tuple[list[LegacyDependency], list[LegacyDependency]]: """ Takes in a debian package and figures out what dependencies have changed. The process is: 1. Build a view of what the package's dependencies are according to the parsed debian data, using priority-based deduplication 2. Get this package's ID from CHAI 3. Get this package's existing dependencies from CHAI 4. Compare the two sets, and identify new and removed dependencies Note: The database has a unique constraint on (package_id, dependency_id), so if a package depends on the same dependency with multiple types (e.g., both runtime and build), we choose the highest priority type: Runtime > Build > Test Returns: - new_deps: a list of new dependencies - removed_deps: a list of removed dependencies """ # First, collect all dependencies and deduplicate by dependency name # choosing the highest priority dependency type for each unique dependency dependency_map: dict[str, UUID] = {} # Priority order: Runtime > Build > Test priority_order = { self.config.dependency_types.runtime: 1, self.config.dependency_types.build: 2, self.config.dependency_types.test: 3, } def process_deps(dependencies: list[Depends], dep_type: UUID) -> None: """Helper to process dependencies of a given type with priority""" for dep in dependencies: dep_name = f"debian/{dep.package}" # bc the map is by import_id # Get the dependency package from cache dependency = self.caches.package_map.get(dep_name) # try debian/dependency if not dependency: self.logger.debug(f"{dep_name} not loaded, will catch next time") continue # If this dependency already exists in our map, choose higher priority if dep_name in dependency_map: existing_priority = priority_order.get( dependency_map[dep_name], 999 ) new_priority = priority_order.get(dep_type, 999) if new_priority < existing_priority: # Lower is better! old_type_id = dependency_map[dep_name] dependency_map[dep_name] = dep_type self.logger.debug( f"Updated dependency type for {dep_name} from " f"{old_type_id} to {dep_type} (higher priority)" ) else: dependency_map[dep_name] = dep_type # Process different types of dependencies with priority handling # Debian has: depends (runtime), build_depends (build), recommends, suggests, etc. process_deps(debian_data.depends, self.config.dependency_types.runtime) process_deps(debian_data.build_depends, self.config.dependency_types.build) # Map recommends and suggests to runtime for simplicity process_deps(debian_data.recommends, self.config.dependency_types.runtime) process_deps(debian_data.suggests, self.config.dependency_types.runtime) # Now build the actual set of dependencies with resolved types actual: set[tuple[UUID, UUID]] = set() for dep_name, dep_type in dependency_map.items(): dependency = self.caches.package_map.get(dep_name) if dependency: # Double-check it still exists actual.add((dependency.id, dep_type)) # get the package ID for what we are working with package = self.caches.package_map.get(import_id) if not package: self.logger.debug(f"New package {import_id}, will grab its deps next time") return [], [] pkg_id: UUID = package.id # what are its existing dependencies? # specifically, existing dependencies IN THE SAME STRUCTURE as `actual`, # so we can do an easy comparison existing: set[tuple[UUID, UUID]] = { (dep.dependency_id, dep.dependency_type_id) for dep in self.caches.dependencies.get(pkg_id, set()) } # we have two sets! # actual minus existing = new_deps # existing minus actual = removed_deps new = actual - existing removed = existing - actual new_deps: list[LegacyDependency] = [ LegacyDependency( package_id=pkg_id, dependency_id=dep[0], dependency_type_id=dep[1], created_at=self.now, updated_at=self.now, ) for dep in new ] # get the existing legacy dependency, and add it to removed_deps removed_deps: list[LegacyDependency] = [] cache_deps: set[LegacyDependency] = self.caches.dependencies.get(pkg_id, set()) for removed_dep_id, removed_dep_type in removed: try: existing_dep = next( dep for dep in cache_deps if dep.dependency_id == removed_dep_id and dep.dependency_type_id == removed_dep_type ) removed_deps.append(existing_dep) except StopIteration as exc: cache_deps_str = "\n".join( [ f"{dep.dependency_id} / {dep.dependency_type_id}" for dep in cache_deps ] ) raise ValueError( f"Removing {removed_dep_id} / {removed_dep_type} for {pkg_id} but not in Cache: \n{cache_deps_str}" ) from exc return new_deps, removed_deps def _generate_chai_urls(self, debian_data: DebianData) -> list[URLKey]: """Generate URLs for a debian package""" urls = [] # Homepage URL if debian_data.homepage: urls.append(URLKey(debian_data.homepage, self.config.url_types.homepage)) # Source URL source_url = ( debian_data.vcs_git if debian_data.vcs_git else debian_data.vcs_browser ) if source_url: urls.append(URLKey(source_url, self.config.url_types.source)) # Repository URL if is_github_url(source_url): urls.append(URLKey(source_url, self.config.url_types.repository)) return urls ================================================ FILE: package_managers/debian/main.py ================================================ #!/usr/bin/env pkgx uv run import os import time from datetime import datetime from uuid import UUID from core.config import Config, PackageManager from core.fetcher import GZipFetcher from core.logger import Logger from core.models import URL, LegacyDependency, Package, PackageURL from core.scheduler import Scheduler from core.structs import Cache, DiffResult, URLKey from core.utils import file_exists from package_managers.debian.db import DebianDB from package_managers.debian.debian_sources import ( build_package_to_source_mapping, enrich_package_with_source, ) from package_managers.debian.diff import DebianDiff from package_managers.debian.parser import DebianData, DebianParser SCHEDULER_ENABLED = os.getenv("ENABLE_SCHEDULER", "true").lower() == "true" def fetch(config: Config, logger: Logger) -> tuple[GZipFetcher, GZipFetcher]: """Fetches the Debian packages & sources manifest files""" package_source = config.pm_config.source[0] sources_source = config.pm_config.source[1] no_cache = config.exec_config.no_cache test = config.exec_config.test package_fetcher = GZipFetcher( name="debian", source=package_source, no_cache=no_cache, test=test, file_path="", # will autosave in data/debian/latest file_name="packages", ) sources_fetcher = GZipFetcher( name="debian", source=sources_source, no_cache=no_cache, test=test, file_path="", # will autosave in data/debian/latest file_name="sources", ) # Fetch should_fetch = config.exec_config.fetch if should_fetch: package_files = package_fetcher.fetch() package_fetcher.write(package_files) logger.log(f"Fetched {len(package_files)} package files") sources_files = sources_fetcher.fetch() sources_fetcher.write(sources_files) logger.log(f"Fetched {len(sources_files)} sources files") return package_fetcher, sources_fetcher def diff( data: list[DebianData], config: Config, cache: Cache, db: DebianDB, logger: Logger ) -> DiffResult: # Keeps track of all the new packages we're adding seen: dict[str, UUID] = {} seen_new_pkg_urls: set[tuple[UUID, UUID]] = set() # Objects that we will return new_packages: list[Package] = [] new_urls: dict[URLKey, URL] = {} new_package_urls: list[PackageURL] = [] updated_packages: list[dict[str, UUID | str | datetime]] = [] updated_package_urls: list[dict[str, UUID | datetime]] = [] new_deps: list[LegacyDependency] = [] removed_deps: list[LegacyDependency] = [] # Create diff processor diff = DebianDiff(config, cache, db, logger) # Process each enriched package for i, debian_data in enumerate(data): import_id = f"debian/{debian_data.package}" if not import_id: logger.warn(f"Skipping package with empty name at index {i}") continue # Diff the package pkg_id, pkg_obj, update_payload = diff.diff_pkg(import_id, debian_data) # Guard: if pkg_obj is not None, that means it's a new package # If it's new, **and** we have seen it before, set the ID to what is seen # So, duplicates absorb all URLs & Dependencies under one umbrella resolved_pkg_id = seen.get(pkg_obj.import_id, pkg_id) if pkg_obj else pkg_id if pkg_obj and pkg_obj.import_id not in seen: logger.debug(f"New package: {pkg_obj.name}") new_packages.append(pkg_obj) seen[pkg_obj.import_id] = resolved_pkg_id if update_payload: logger.debug(f"Updated package: {update_payload['id']}") updated_packages.append(update_payload) # Diff URLs (resolved_urls is map of url types to final URL ID) resolved_urls = diff.diff_url(import_id, debian_data, new_urls) # Diff package URLs new_links, updated_links = diff.diff_pkg_url(resolved_pkg_id, resolved_urls) if new_links: logger.debug(f"New package URLs: {len(new_links)}") # guard: only add truly new links for link in new_links: if (link.package_id, link.url_id) not in seen_new_pkg_urls: new_package_urls.append(link) seen_new_pkg_urls.add((link.package_id, link.url_id)) if updated_links: updated_package_urls.extend(updated_links) # Diff dependencies new_dependencies, removed_dependencies = diff.diff_deps(import_id, debian_data) if new_dependencies: logger.debug(f"New dependencies: {len(new_dependencies)}") new_deps.extend(new_dependencies) if removed_dependencies: logger.debug(f"Removed dependencies: {len(removed_dependencies)}") removed_deps.extend(removed_dependencies) # In test mode, limit processing to the first 3 packages to reduce runtime and resource usage. if config.exec_config.test and i > 2: break return DiffResult( new_packages, new_urls, new_package_urls, updated_packages, updated_package_urls, new_deps, removed_deps, ) def run_pipeline(config: Config, db: DebianDB, logger: Logger): """The Debian Indexer""" package_fetcher, sources_fetcher = fetch(config, logger) input_dir = f"{sources_fetcher.output}/latest" # Build package-to-source mapping first sources_file_path = file_exists(input_dir, "sources") source_mapping = build_package_to_source_mapping(sources_file_path, logger) # Parse packages file packages_file_path = file_exists(input_dir, "packages") with open(packages_file_path) as f: packages_content = f.read() packages_parser = DebianParser(packages_content) # Process each package and enrich with source information enriched_packages: list[DebianData] = [] for package_data in packages_parser.parse(): enriched_package = enrich_package_with_source( package_data, source_mapping, logger ) enriched_packages.append(enriched_package) logger.log(f"Processed {len(enriched_packages)} enriched packages") # Grab all the URLs from enriched packages all_urls: set[str] = set() for package in enriched_packages: all_urls.add(package.homepage) all_urls.add(package.vcs_browser) all_urls.add(package.vcs_git) logger.log(f"Found {len(all_urls)} URLs to load") # Set up cache db.set_current_graph() db.set_current_urls(all_urls) cache = Cache( db.graph.package_map, db.urls.url_map, db.urls.package_urls, db.graph.dependencies, ) logger.log("Setup cache") # Perform the diff result = diff(enriched_packages, config, cache, db, logger) # Ingest all diffs db.ingest_wrapper(result) if config.exec_config.no_cache: package_fetcher.cleanup() sources_fetcher.cleanup() def main(config: Config, db: DebianDB, logger: Logger): logger.log("Initializing Debian package manager") logger.debug(f"Config: {config}") if SCHEDULER_ENABLED: logger.log("Scheduler enabled. Starting schedule.") scheduler = Scheduler("debian_scheduler") scheduler.start(run_pipeline, config, db, logger) # run immediately as well when scheduling scheduler.run_now(run_pipeline, config, db, logger) # keep the main thread alive for scheduler try: while True: time.sleep(3600) except KeyboardInterrupt: scheduler.stop() logger.log("Scheduler stopped.") else: logger.log("Scheduler disabled. Running pipeline once.") run_pipeline(config, db, logger) logger.log("Pipeline finished.") if __name__ == "__main__": config = Config(PackageManager.DEBIAN) db = DebianDB("debian_db", config) logger = Logger("debian") main(config, db, logger) ================================================ FILE: package_managers/debian/parser.py ================================================ import re from collections.abc import Iterator from permalint import normalize_url from package_managers.debian.structs import DebianData, Depends, Maintainer # NOTE: The DebianParser is the one which normalizes all the URLs! class DebianParser: def __init__(self, content: str): # content is the Packages or Sources file self.content = content def parse(self) -> Iterator[DebianData]: """Yield packages and sources from the Packages and Sources files.""" paragraphs = self.content.split("\n\n") # iterate over the lines for paragraph in paragraphs: # if the paragraph is empty, then move on if not paragraph.strip(): continue # each paragraph represents one object obj = DebianData() # State for handling multiline fields current_field = None current_value = "" # populate the object lines = paragraph.split("\n") for _i, line in enumerate(lines): # if the line is empty, then move on if not line.strip(): continue # if the line starts with a tab or space, then it's a continuation of # the previous field if line[0] == " " or line[0] == "\t": # Append continuation line to current field value if current_field is not None: current_value += " " + line.strip() continue # Process any accumulated field before starting new one if current_field is not None: self.mapper(obj, current_field, current_value) # Start new field if ":" not in line: continue key, value = line.split(":", 1) current_field = key.strip() current_value = value.strip() # Process the final accumulated field if current_field is not None: self.mapper(obj, current_field, current_value) if obj.package: yield obj else: raise ValueError(f"Invalid package: {paragraph}") def handle_line(self, obj: DebianData, line: str) -> None: key, value = line.split(":", 1) self.mapper(obj, key, value) def mapper(self, obj: DebianData, key: str, value: str) -> None: """Map fields from Debian package/source files to DebianData object.""" match key: case "Package": obj.package = value.strip() case "Source": obj.source = value.strip() case "Version": obj.version = value.strip() case "Installed-Size": obj.installed_size = int(value.strip()) case "Architecture": obj.architecture = value.strip() case "Description": obj.description = value.strip() case "Homepage": obj.homepage = normalize_url(value.strip()) case "Description-md5": obj.description_md5 = value.strip() case "Tag": obj.tag = value.strip() case "Section": obj.section = value.strip() case "Priority": obj.priority = value.strip() case "Filename": obj.filename = value.strip() case "Size": obj.size = int(value.strip()) case "MD5sum": obj.md5sum = value.strip() case "SHA256": obj.sha256 = value.strip() case "Standards-Version": obj.standards_version = value.strip() case "Format": obj.format = value.strip() case "Vcs-Browser": obj.vcs_browser = normalize_url(value.strip()) case "Vcs-Git": obj.vcs_git = normalize_url(value.strip()) case "Directory": obj.directory = value.strip() case "Testsuite": obj.testsuite = value.strip() case "Testsuite-Triggers": obj.testsuite_triggers = value.strip() case "Binary": obj.binary = [bin.strip() for bin in value.split(",") if bin.strip()] case "Package-List": obj.package_list = [pkg.strip() for pkg in value.split(",")] # Dependency Fields case "Depends": dependencies = value.split(", ") for dependency in dependencies: obj.depends.append(handle_depends(dependency.strip())) case "Pre-Depends": dependencies = value.split(", ") for dependency in dependencies: obj.pre_depends.append(handle_depends(dependency.strip())) case "Replaces": dependencies = value.split(", ") for dependency in dependencies: obj.replaces.append(handle_depends(dependency.strip())) case "Provides": dependencies = value.split(", ") for dependency in dependencies: obj.provides.append(handle_depends(dependency.strip())) case "Recommends": dependencies = value.split(", ") for dependency in dependencies: obj.recommends.append(handle_depends(dependency.strip())) case "Suggests": dependencies = value.split(", ") for dependency in dependencies: obj.suggests.append(handle_depends(dependency.strip())) case "Breaks": dependencies = value.split(", ") for dependency in dependencies: obj.breaks.append(handle_depends(dependency.strip())) case "Conflicts": dependencies = value.split(", ") for dependency in dependencies: obj.conflicts.append(handle_depends(dependency.strip())) case "Build-Depends": for build_depends in value.split(", "): obj.build_depends.append(handle_depends(build_depends.strip())) # Maintainer fields case "Uploaders": # Split by comma but respect quoted sections uploaders = [] in_quotes = False current = "" for char in value: if char == '"': in_quotes = not in_quotes current += char elif char == "," and not in_quotes: if current.strip(): uploaders.append(current.strip()) current = "" else: current += char if current.strip(): uploaders.append(current.strip()) for uploader in uploaders: obj.uploaders.append(handle_maintainer(uploader.strip())) case "Maintainer": obj.maintainer = handle_maintainer(value.strip()) # TODO: File Fields case _: pass # Helpers for handling specific fields in the mapper def handle_depends(dependency: str) -> Depends: # Handle various dependency formats: # 0ad-data (>= 0.0.26) # lib32gcc1-amd64-cross [amd64 arm64 i386 ppc64el x32] # gm2-11 [!powerpc !ppc64 !x32] # debhelper-compat (= 13) # gcc-11-source (>= 11.3.0-11~) # First, strip platform specifications in square brackets # Remove platform specs like [amd64 arm64 i386 ppc64el x32] or [!powerpc !ppc64 !x32] platform_match = re.search(r"\s*\[[^\]]+\]", dependency) if platform_match: dependency = dependency.replace(platform_match.group(0), "").strip() # Now handle version constraints in parentheses match = re.match(r"^(.*?)(\s*\((.*)\))?$", dependency) if match: dep = match.group(1).strip() if match.group(2): semver = match.group(3) return Depends(package=dep, semver=semver) else: return Depends(package=dep, semver="*") raise ValueError(f"Invalid dependency: {dependency}") def handle_maintainer(value: str) -> Maintainer: # Remove trailing comma if present value = value.rstrip(",") # For names with quotes like "Adam C. Powell, IV" if '"' in value: match = re.match(r'^"([^"]*)" <([^>]*)>$', value) if match: return Maintainer(name=match.group(1), email=match.group(2)) # Standard format: Name match = re.match(r"^(.*) <([^>]*)>$", value) if match: return Maintainer(name=match.group(1), email=match.group(2)) raise ValueError(f"Invalid maintainer: {value}") ================================================ FILE: package_managers/debian/scripts/investigate_sources.py ================================================ #!/usr/bin/env pkgx uv run """ Script to investigate the relationship between Debian sources and packages files. This helps understand the data structure before modifying the parser. """ import os import sys from core.logger import Logger logger = Logger("debian_investigator") def parse_sources_file(file_path: str) -> dict[str, set[str]]: """ Parse the sources file and return a mapping of source_name -> set of binary packages. Args: file_path: Path to the sources file Returns: Dictionary mapping source package names to sets of binary package names they produce """ source_binary_map = {} with open(file_path, encoding="utf-8") as f: current_package = None current_binaries = set() in_binary_field = False for line in f: original_line = line line = line.strip() if line.startswith("Package: "): # Save previous package if exists if current_package: if current_package in source_binary_map: # Merge with existing binaries for this source name source_binary_map[current_package].update(current_binaries) else: source_binary_map[current_package] = current_binaries # Start new package current_package = line[9:].strip() current_binaries = set() in_binary_field = False elif line.startswith("Binary: "): # Parse binary packages (comma-separated, may continue on next lines) binaries_str = line[8:].strip() binaries = [b.strip() for b in binaries_str.split(",") if b.strip()] current_binaries.update(binaries) in_binary_field = True elif current_package and original_line.startswith(" "): # Continuation line (starts with space) if in_binary_field: # Continue parsing Binary field binaries_str = line.strip() binaries = [b.strip() for b in binaries_str.split(",") if b.strip()] current_binaries.update(binaries) # If not in binary field, it's some other field continuation - ignore elif line == "" and current_package: # End of current package entry if current_package in source_binary_map: # Merge with existing binaries for this source name source_binary_map[current_package].update(current_binaries) else: source_binary_map[current_package] = current_binaries current_package = None current_binaries = set() in_binary_field = False else: # Any other field (not Package, not Binary, not continuation) # This includes new fields that don't start with space in_binary_field = False # Handle last package if file doesn't end with blank line if current_package: if current_package in source_binary_map: # Merge with existing binaries for this source name source_binary_map[current_package].update(current_binaries) else: source_binary_map[current_package] = current_binaries return source_binary_map def parse_packages_file(file_path: str) -> dict[str, str | None]: """ Parse the packages file and return a mapping of package_name -> source_name. Args: file_path: Path to the packages file Returns: Dictionary mapping package names to their source package names (None if not specified) """ package_source_map = {} with open(file_path, encoding="utf-8") as f: current_package = None current_source = None for line in f: line = line.strip() if line.startswith("Package: "): # Save previous package if exists if current_package: package_source_map[current_package] = current_source # Start new package current_package = line[9:].strip() current_source = None elif line.startswith("Source: "): # Extract source name (may include version info in parentheses) source_str = line[8:].strip() # Remove version info if present: "source (version)" -> "source" if "(" in source_str: current_source = source_str.split("(")[0].strip() else: current_source = source_str elif line == "" and current_package: # End of current package entry package_source_map[current_package] = current_source current_package = None current_source = None # Handle last package if file doesn't end with blank line if current_package: package_source_map[current_package] = current_source return package_source_map def investigate_mapping(sources_file: str, packages_file: str) -> None: """ Investigate the mapping between sources and packages files. Args: sources_file: Path to the sources file packages_file: Path to the packages file """ logger.log("Parsing sources file...") source_binary_map = parse_sources_file(sources_file) logger.log(f"Found {len(source_binary_map)} source packages") logger.log("Parsing packages file...") package_source_map = parse_packages_file(packages_file) logger.log(f"Found {len(package_source_map)} binary packages") # Validate mappings orphaned_packages = [] logger.log("\nValidating package -> source mappings...") for package_name, source_name in package_source_map.items(): if source_name: # Package has explicit source reference if source_name not in source_binary_map: logger.log( f"WARNING: Package '{package_name}' references unknown source '{source_name}'" ) orphaned_packages.append((package_name, source_name, "unknown_source")) elif package_name not in source_binary_map[source_name]: logger.log( f"WARNING: Package '{package_name}' not listed in source '{source_name}' binaries" ) orphaned_packages.append((package_name, source_name, "not_in_binaries")) else: # Package has no explicit source, assume source name == package name if package_name not in source_binary_map: logger.log( f"WARNING: Package '{package_name}' has no source reference and no matching source package" ) orphaned_packages.append( (package_name, package_name, "no_matching_source") ) elif package_name not in source_binary_map[package_name]: logger.log( f"WARNING: Package '{package_name}' not listed in its own source binaries" ) orphaned_packages.append( (package_name, package_name, "not_self_listed") ) # Summary logger.log("\n=== SUMMARY ===") logger.log(f"Total sources: {len(source_binary_map)}") logger.log(f"Total packages: {len(package_source_map)}") logger.log(f"Orphaned packages: {len(orphaned_packages)}") if orphaned_packages: logger.log("\nOrphaned packages by category:") categories = {} for pkg, src, reason in orphaned_packages: if reason not in categories: categories[reason] = [] categories[reason].append((pkg, src)) for reason, items in categories.items(): logger.log(f" {reason}: {len(items)} packages") for pkg, src in items[:5]: # Show first 5 examples logger.log(f" {pkg} -> {src}") if len(items) > 5: logger.log(f" ... and {len(items) - 5} more") def main(): data_dir = "data/debian/latest" # Check if data files exist sources_file = os.path.join(data_dir, "sources") packages_file = os.path.join(data_dir, "packages") if not os.path.exists(sources_file): logger.log(f"ERROR: Sources file not found at {sources_file}") logger.log("Use --fetch to download the latest data") return 1 if not os.path.exists(packages_file): logger.log(f"ERROR: Packages file not found at {packages_file}") logger.log("Use --fetch to download the latest data") return 1 logger.log(f"Using sources file: {sources_file}") logger.log(f"Using packages file: {packages_file}") investigate_mapping(sources_file, packages_file) return 0 if __name__ == "__main__": sys.exit(main()) ================================================ FILE: package_managers/debian/scripts/test_investigate_sources.py ================================================ from unittest.mock import mock_open, patch import pytest from package_managers.debian.scripts.investigate_sources import parse_sources_file @pytest.fixture def binutils(): return """ Package: binutils Binary: binutils-for-host, binutils-for-build, binutils-ia64-linux-gnu-dbg, binutils-m68k-linux-gnu, binutils-mips64el-linux-gnuabin32-dbg, binutils-mipsisa64r6-linux-gnuabin32, binutils-mipsisa64r6el-linux-gnuabi64-dbg """ @pytest.fixture def linux(): return """ Package: linux Binary: linux-support-6.1.0-32, linux-doc-6.1, linux-doc, linux-source-6.1, linux-source, linux-headers-6.1.0-32-common, linux-headers-6.1.0-32-common-rt, kernel-image-6.1.0-32-alpha-generic-di, nic-modules-6.1.0-32-alpha-generic-di, nic-wireless-modules-6.1.0-32-alpha-generic-di, nic-shared-modules-6.1.0-32-alpha-generic-di, serial-modules-6.1.0-32-alpha-generic-di, usb-serial-modules-6.1.0-32-alpha-generic-di, ppp-modules-6.1.0-32-alpha-generic-di, pata-modules-6.1.0-32-alpha-generic-di, cdrom-core-modules-6.1.0-32-alpha-generic-di, scsi-core-modules-6.1.0-32-alpha-generic-di, scsi-modules-6.1.0-32-alpha-generic-di, scsi-nic-modules-6.1.0-32-alpha-generic-di, loop-modules-6.1.0-32-alpha-generic-di, btrfs-modules-6.1.0-32-alpha-generic-di, ext4-modules-6.1.0-32-alpha-generic-di, isofs-modules-6.1.0-32-alpha-generic-di, jfs-modules-6.1.0-32-alpha-generic-di, xfs-modules-6.1.0-32-alpha-generic-di, fat-modules-6.1.0-32-alpha-generic-di, squashfs-modules-6.1.0-32-alpha-generic-di, fuse-modules-6.1.0-32-alpha-generic-di, f2fs-modules-6.1.0-32-alpha-generic-di, md-modules-6.1.0-32-alpha-generic-di, multipath-modules-6.1.0-32-alpha-generic-di, usb-modules-6.1.0-32-alpha-generic-di, usb-storage-modules-6.1.0-32-alpha-generic-di, fb-modules-6.1.0-32-alpha-generic-di, input-modules-6.1.0-32-alpha-generic-di, event-modules-6.1.0-32-alpha-generic-di, mouse-modules-6.1.0-32-alpha-generic-di, nic-pcmcia-modules-6.1.0-32-alpha-generic-di, pcmcia-modules-6.1.0-32-alpha-generic-di, nic-usb-modules-6.1.0-32-alpha-generic-di, sata-modules-6.1.0-32-alpha-generic-di, i2c-modules-6.1.0-32-alpha-generic-di, crc-modules-6.1.0-32-alpha-generic-di, crypto-modules-6.1.0-32-alpha-generic-di, crypto-dm-modules-6.1.0-32-alpha-generic-di, ata-modules-6.1.0-32-alpha-generic-di, nbd-modules-6.1.0-32-alpha-generic-di, srm-modules-6.1.0-32-alpha-generic-di, linux-libc-dev, linux-config-6.1, bpftool, linux-cpupower, libcpupower1, libcpupower-dev, linux-perf, usbip, hyperv-daemons, rtla, linux-kbuild-6.1, linux-bootwrapper-6.1.0-32, linux-headers-6.1.0-32-alpha-generic, linux-image-6.1.0-32-alpha-generic, linux-image-alpha-generic, linux-headers-alpha-generic, linux-image-6.1.0-32-alpha-generic-dbg, linux-image-alpha-generic-dbg, linux-headers-6.1.0-32-alpha-smp, linux-image-6.1.0-32-alpha-smp, linux-image-alpha-smp, linux-headers-alpha-smp, linux-image-6.1.0-32-alpha-smp-dbg, linux-image-alpha-smp-dbg, kernel-image-6.1.0-32-amd64-di, nic-modules-6.1.0-32-amd64-di, nic-wireless-modules-6.1.0-32-amd64-di, nic-shared-modules-6.1.0-32-amd64-di, serial-modules-6.1.0-32-amd64-di, usb-serial-modules-6.1.0-32-amd64-di, ppp-modules-6.1.0-32-amd64-di, pata-modules-6.1.0-32-amd64-di, cdrom-core-modules-6.1.0-32-amd64-di, firewire-core-modules-6.1.0-32-amd64-di, scsi-core-modules-6.1.0-32-amd64-di, scsi-modules-6.1.0-32-amd64-di, scsi-nic-modules-6.1.0-32-amd64-di, loop-modules-6.1.0-32-amd64-di, btrfs-modules-6.1.0-32-amd64-di, ext4-modules-6.1.0-32-amd64-di, isofs-modules-6.1.0-32-amd64-di, jfs-modules-6.1.0-32-amd64-di, xfs-modules-6.1.0-32-amd64-di, fat-modules-6.1.0-32-amd64-di, squashfs-modules-6.1.0-32-amd64-di, udf-modules-6.1.0-32-amd64-di, fuse-modules-6.1.0-32-amd64-di, f2fs-modules-6.1.0-32-amd64-di, md-modules-6.1.0-32-amd64-di, multipath-modules-6.1.0-32-amd64-di, usb-modules-6.1.0-32-amd64-di, usb-storage-modules-6.1.0-32-amd64-di, pcmcia-storage-modules-6.1.0-32-amd64-di, fb-modules-6.1.0-32-amd64-di, input-modules-6.1.0-32-amd64-di, event-modules-6.1.0-32-amd64-di, mouse-modules-6.1.0-32-amd64-di, nic-pcmcia-modules-6.1.0-32-amd64-di, pcmcia-modules-6.1.0-32-amd64-di, nic-usb-modules-6.1.0-32-amd64-di, sata-modules-6.1.0-32-amd64-di, acpi-modules-6.1.0-32-amd64-di, i2c-modules-6.1.0-32-amd64-di, crc-modules-6.1.0-32-amd64-di, crypto-modules-6.1.0-32-amd64-di, crypto-dm-modules-6.1.0-32-amd64-di, efi-modules-6.1.0-32-amd64-di, ata-modules-6.1.0-32-amd64-di, mmc-core-modules-6.1.0-32-amd64-di, mmc-modules-6.1.0-32-amd64-di, nbd-modules-6.1.0-32-amd64-di, speakup-modules-6.1.0-32-amd64-di, uinput-modules-6.1.0-32-amd64-di, sound-modules-6.1.0-32-amd64-di, mtd-core-modules-6.1.0-32-amd64-di, rfkill-modules-6.1.0-32-amd64-di, linux-image-amd64-signed-template, linux-headers-6.1.0-32-amd64, linux-image-6.1.0-32-amd64-unsigned, linux-image-6.1.0-32-amd64-dbg, linux-image-amd64-dbg, linux-headers-6.1.0-32-cloud-amd64, linux-image-6.1.0-32-cloud-amd64-unsigned, linux-image-6.1.0-32-cloud-amd64-dbg, linux-image-cloud-amd64-dbg, linux-headers-6.1.0-32-rt-amd64, linux-image-6.1.0-32-rt-amd64-unsigned, linux-image-6.1.0-32-rt-amd64-dbg, linux-image-rt-amd64-dbg, kernel-image-6.1.0-32-arm64-di, nic-modules-6.1.0-32-arm64-di, nic-wireless-modules-6.1.0-32-arm64-di, nic-shared-modules-6.1.0-32-arm64-di, usb-serial-modules-6.1.0-32-arm64-di, ppp-modules-6.1.0-32-arm64-di, cdrom-core-modules-6.1.0-32-arm64-di, scsi-core-modules-6.1.0-32-arm64-di, scsi-modules-6.1.0-32-arm64-di, scsi-nic-modules-6.1.0-32-arm64-di, loop-modules-6.1.0-32-arm64-di, btrfs-modules-6.1.0-32-arm64-di, ext4-modules-6.1.0-32-arm64-di, isofs-modules-6.1.0-32-arm64-di, jfs-modules-6.1.0-32-arm64-di, xfs-modules-6.1.0-32-arm64-di, fat-modules-6.1.0-32-arm64-di, squashfs-modules-6.1.0-32-arm64-di, udf-modules-6.1.0-32-arm64-di, fuse-modules-6.1.0-32-arm64-di, f2fs-modules-6.1.0-32-arm64-di, md-modules-6.1.0-32-arm64-di, multipath-modules-6.1.0-32-arm64-di, usb-modules-6.1.0-32-arm64-di, usb-storage-modules-6.1.0-32-arm64-di, fb-modules-6.1.0-32-arm64-di, input-modules-6.1.0-32-arm64-di, event-modules-6.1.0-32-arm64-di, nic-usb-modules-6.1.0-32-arm64-di, sata-modules-6.1.0-32-arm64-di, i2c-modules-6.1.0-32-arm64-di, crc-modules-6.1.0-32-arm64-di, crypto-modules-6.1.0-32-arm64-di, crypto-dm-modules-6.1.0-32-arm64-di, efi-modules-6.1.0-32-arm64-di, ata-modules-6.1.0-32-arm64-di, mmc-modules-6.1.0-32-arm64-di, nbd-modules-6.1.0-32-arm64-di, speakup-modules-6.1.0-32-arm64-di, uinput-modules-6.1.0-32-arm64-di, sound-modules-6.1.0-32-arm64-di, leds-modules-6.1.0-32-arm64-di, mtd-core-modules-6.1.0-32-arm64-di, linux-image-arm64-signed-template, linux-headers-6.1.0-32-arm64, linux-image-6.1.0-32-arm64-unsigned, linux-image-6.1.0-32-arm64-dbg, linux-image-arm64-dbg, linux-headers-6.1.0-32-cloud-arm64, linux-image-6.1.0-32-cloud-arm64-unsigned, linux-image-6.1.0-32-cloud-arm64-dbg, linux-image-cloud-arm64-dbg, linux-headers-6.1.0-32-rt-arm64, linux-image-6.1.0-32-rt-arm64-unsigned, linux-image-6.1.0-32-rt-arm64-dbg, linux-image-rt-arm64-dbg, kernel-image-6.1.0-32-marvell-di, nic-modules-6.1.0-32-marvell-di, nic-shared-modules-6.1.0-32-marvell-di, usb-serial-modules-6.1.0-32-marvell-di, ppp-modules-6.1.0-32-marvell-di, cdrom-core-modules-6.1.0-32-marvell-di, scsi-core-modules-6.1.0-32-marvell-di, loop-modules-6.1.0-32-marvell-di, ipv6-modules-6.1.0-32-marvell-di, btrfs-modules-6.1.0-32-marvell-di, ext4-modules-6.1.0-32-marvell-di, isofs-modules-6.1.0-32-marvell-di, jffs2-modules-6.1.0-32-marvell-di, jfs-modules-6.1.0-32-marvell-di, fat-modules-6.1.0-32-marvell-di, minix-modules-6.1.0-32-marvell-di, squashfs-modules-6.1.0-32-marvell-di, udf-modules-6.1.0-32-marvell-di, fuse-modules-6.1.0-32-marvell-di, f2fs-modules-6.1.0-32-marvell-di, md-modules-6.1.0-32-marvell-di, multipath-modules-6.1.0-32-marvell-di, usb-modules-6.1.0-32-marvell-di, usb-storage-modules-6.1.0-32-marvell-di, fb-modules-6.1.0-32-marvell-di, input-modules-6.1.0-32-marvell-di, event-modules-6.1.0-32-marvell-di, mouse-modules-6.1.0-32-marvell-di, nic-usb-modules-6.1.0-32-marvell-di, sata-modules-6.1.0-32-marvell-di, crc-modules-6.1.0-32-marvell-di, crypto-modules-6.1.0-32-marvell-di, crypto-dm-modules-6.1.0-32-marvell-di, mmc-core-modules-6.1.0-32-marvell-di, mmc-modules-6.1.0-32-marvell-di, nbd-modules-6.1.0-32-marvell-di, uinput-modules-6.1.0-32-marvell-di, leds-modules-6.1.0-32-marvell-di, mtd-modules-6.1.0-32-marvell-di, mtd-core-modules-6.1.0-32-marvell-di, linux-headers-6.1.0-32-marvell, linux-image-6.1.0-32-marvell, linux-image-marvell, linux-headers-marvell, linux-image-6.1.0-32-marvell-dbg, linux-image-marvell-dbg, linux-headers-6.1.0-32-rpi, linux-image-6.1.0-32-rpi, linux-image-rpi, linux-headers-rpi, linux-image-6.1.0-32-rpi-dbg, linux-image-rpi-dbg, kernel-image-6.1.0-32-armmp-di, nic-modules-6.1.0-32-armmp-di, nic-wireless-modules-6.1.0-32-armmp-di, nic-shared-modules-6.1.0-32-armmp-di, usb-serial-modules-6.1.0-32-armmp-di, ppp-modules-6.1.0-32-armmp-di, pata-modules-6.1.0-32-armmp-di, cdrom-core-modules-6.1.0-32-armmp-di, scsi-core-modules-6.1.0-32-armmp-di, scsi-modules-6.1.0-32-armmp-di, scsi-nic-modules-6.1.0-32-armmp-di, loop-modules-6.1.0-32-armmp-di, btrfs-modules-6.1.0-32-armmp-di, ext4-modules-6.1.0-32-armmp-di, isofs-modules-6.1.0-32-armmp-di, jfs-modules-6.1.0-32-armmp-di, fat-modules-6.1.0-32-armmp-di, squashfs-modules-6.1.0-32-armmp-di, udf-modules-6.1.0-32-armmp-di, fuse-modules-6.1.0-32-armmp-di, f2fs-modules-6.1.0-32-armmp-di, md-modules-6.1.0-32-armmp-di, multipath-modules-6.1.0-32-armmp-di, usb-modules-6.1.0-32-armmp-di, usb-storage-modules-6.1.0-32-armmp-di, fb-modules-6.1.0-32-armmp-di, input-modules-6.1.0-32-armmp-di, event-modules-6.1.0-32-armmp-di, nic-usb-modules-6.1.0-32-armmp-di, sata-modules-6.1.0-32-armmp-di, i2c-modules-6.1.0-32-armmp-di, crc-modules-6.1.0-32-armmp-di, crypto-modules-6.1.0-32-armmp-di, crypto-dm-modules-6.1.0-32-armmp-di, efi-modules-6.1.0-32-armmp-di, ata-modules-6.1.0-32-armmp-di, mmc-modules-6.1.0-32-armmp-di, nbd-modules-6.1.0-32-armmp-di, speakup-modules-6.1.0-32-armmp-di, uinput-modules-6.1.0-32-armmp-di, sound-modules-6.1.0-32-armmp-di, leds-modules-6.1.0-32-armmp-di, mtd-modules-6.1.0-32-armmp-di, linux-headers-6.1.0-32-armmp, linux-image-6.1.0-32-armmp, linux-image-armmp, linux-headers-armmp, linux-image-6.1.0-32-armmp-dbg, linux-image-armmp-dbg, linux-headers-6.1.0-32-armmp-lpae, linux-image-6.1.0-32-armmp-lpae, linux-image-armmp-lpae, linux-headers-armmp-lpae, linux-image-6.1.0-32-armmp-lpae-dbg, linux-image-armmp-lpae-dbg, linux-headers-6.1.0-32-rt-armmp, linux-image-6.1.0-32-rt-armmp, linux-image-rt-armmp, linux-headers-rt-armmp, linux-image-6.1.0-32-rt-armmp-dbg, linux-image-rt-armmp-dbg, kernel-image-6.1.0-32-parisc-di, nic-modules-6.1.0-32-parisc-di, nic-shared-modules-6.1.0-32-parisc-di, serial-modules-6.1.0-32-parisc-di, usb-serial-modules-6.1.0-32-parisc-di, ppp-modules-6.1.0-32-parisc-di, pata-modules-6.1.0-32-parisc-di, cdrom-core-modules-6.1.0-32-parisc-di, scsi-core-modules-6.1.0-32-parisc-di, scsi-modules-6.1.0-32-parisc-di, loop-modules-6.1.0-32-parisc-di, btrfs-modules-6.1.0-32-parisc-di, ext4-modules-6.1.0-32-parisc-di, isofs-modules-6.1.0-32-parisc-di, jfs-modules-6.1.0-32-parisc-di, xfs-modules-6.1.0-32-parisc-di, fat-modules-6.1.0-32-parisc-di, squashfs-modules-6.1.0-32-parisc-di, fuse-modules-6.1.0-32-parisc-di, f2fs-modules-6.1.0-32-parisc-di, md-modules-6.1.0-32-parisc-di, multipath-modules-6.1.0-32-parisc-di, usb-modules-6.1.0-32-parisc-di, usb-storage-modules-6.1.0-32-parisc-di, input-modules-6.1.0-32-parisc-di, event-modules-6.1.0-32-parisc-di, mouse-modules-6.1.0-32-parisc-di, nic-usb-modules-6.1.0-32-parisc-di, sata-modules-6.1.0-32-parisc-di, i2c-modules-6.1.0-32-parisc-di, crc-modules-6.1.0-32-parisc-di, crypto-modules-6.1.0-32-parisc-di, crypto-dm-modules-6.1.0-32-parisc-di, ata-modules-6.1.0-32-parisc-di, nbd-modules-6.1.0-32-parisc-di, kernel-image-6.1.0-32-parisc64-di, nic-modules-6.1.0-32-parisc64-di, nic-shared-modules-6.1.0-32-parisc64-di, serial-modules-6.1.0-32-parisc64-di, usb-serial-modules-6.1.0-32-parisc64-di, ppp-modules-6.1.0-32-parisc64-di, pata-modules-6.1.0-32-parisc64-di, cdrom-core-modules-6.1.0-32-parisc64-di, scsi-core-modules-6.1.0-32-parisc64-di, scsi-modules-6.1.0-32-parisc64-di, loop-modules-6.1.0-32-parisc64-di, btrfs-modules-6.1.0-32-parisc64-di, ext4-modules-6.1.0-32-parisc64-di, isofs-modules-6.1.0-32-parisc64-di, jfs-modules-6.1.0-32-parisc64-di, xfs-modules-6.1.0-32-parisc64-di, fat-modules-6.1.0-32-parisc64-di, squashfs-modules-6.1.0-32-parisc64-di, fuse-modules-6.1.0-32-parisc64-di, f2fs-modules-6.1.0-32-parisc64-di, md-modules-6.1.0-32-parisc64-di, multipath-modules-6.1.0-32-parisc64-di, usb-modules-6.1.0-32-parisc64-di, usb-storage-modules-6.1.0-32-parisc64-di, fb-modules-6.1.0-32-parisc64-di, input-modules-6.1.0-32-parisc64-di, event-modules-6.1.0-32-parisc64-di, mouse-modules-6.1.0-32-parisc64-di, nic-usb-modules-6.1.0-32-parisc64-di, sata-modules-6.1.0-32-parisc64-di, crc-modules-6.1.0-32-parisc64-di, crypto-modules-6.1.0-32-parisc64-di, crypto-dm-modules-6.1.0-32-parisc64-di, ata-modules-6.1.0-32-parisc64-di, nbd-modules-6.1.0-32-parisc64-di, linux-headers-6.1.0-32-parisc, linux-image-6.1.0-32-parisc, linux-image-parisc, linux-headers-parisc, linux-image-6.1.0-32-parisc-dbg, linux-image-parisc-dbg, linux-headers-6.1.0-32-parisc64, linux-image-6.1.0-32-parisc64, linux-image-parisc64, linux-headers-parisc64, linux-image-6.1.0-32-parisc64-dbg, linux-image-parisc64-dbg, kernel-image-6.1.0-32-686-di, nic-modules-6.1.0-32-686-di, nic-wireless-modules-6.1.0-32-686-di, nic-shared-modules-6.1.0-32-686-di, serial-modules-6.1.0-32-686-di, usb-serial-modules-6.1.0-32-686-di, ppp-modules-6.1.0-32-686-di, pata-modules-6.1.0-32-686-di, cdrom-core-modules-6.1.0-32-686-di, firewire-core-modules-6.1.0-32-686-di, scsi-core-modules-6.1.0-32-686-di, scsi-modules-6.1.0-32-686-di, scsi-nic-modules-6.1.0-32-686-di, loop-modules-6.1.0-32-686-di, btrfs-modules-6.1.0-32-686-di, ext4-modules-6.1.0-32-686-di, isofs-modules-6.1.0-32-686-di, jfs-modules-6.1.0-32-686-di, xfs-modules-6.1.0-32-686-di, fat-modules-6.1.0-32-686-di, squashfs-modules-6.1.0-32-686-di, udf-modules-6.1.0-32-686-di, fuse-modules-6.1.0-32-686-di, f2fs-modules-6.1.0-32-686-di, md-modules-6.1.0-32-686-di, multipath-modules-6.1.0-32-686-di, usb-modules-6.1.0-32-686-di, usb-storage-modules-6.1.0-32-686-di, pcmcia-storage-modules-6.1.0-32-686-di, fb-modules-6.1.0-32-686-di, input-modules-6.1.0-32-686-di, event-modules-6.1.0-32-686-di, mouse-modules-6.1.0-32-686-di, nic-pcmcia-modules-6.1.0-32-686-di, pcmcia-modules-6.1.0-32-686-di, nic-usb-modules-6.1.0-32-686-di, sata-modules-6.1.0-32-686-di, acpi-modules-6.1.0-32-686-di, i2c-modules-6.1.0-32-686-di, crc-modules-6.1.0-32-686-di, crypto-modules-6.1.0-32-686-di, crypto-dm-modules-6.1.0-32-686-di, efi-modules-6.1.0-32-686-di, ata-modules-6.1.0-32-686-di, mmc-core-modules-6.1.0-32-686-di, mmc-modules-6.1.0-32-686-di, nbd-modules-6.1.0-32-686-di, speakup-modules-6.1.0-32-686-di, uinput-modules-6.1.0-32-686-di, sound-modules-6.1.0-32-686-di, mtd-core-modules-6.1.0-32-686-di, rfkill-modules-6.1.0-32-686-di, kernel-image-6.1.0-32-686-pae-di, nic-modules-6.1.0-32-686-pae-di, nic-wireless-modules-6.1.0-32-686-pae-di, nic-shared-modules-6.1.0-32-686-pae-di, serial-modules-6.1.0-32-686-pae-di, usb-serial-modules-6.1.0-32-686-pae-di, ppp-modules-6.1.0-32-686-pae-di, pata-modules-6.1.0-32-686-pae-di, cdrom-core-modules-6.1.0-32-686-pae-di, firewire-core-modules-6.1.0-32-686-pae-di, scsi-core-modules-6.1.0-32-686-pae-di, scsi-modules-6.1.0-32-686-pae-di, scsi-nic-modules-6.1.0-32-686-pae-di, loop-modules-6.1.0-32-686-pae-di, btrfs-modules-6.1.0-32-686-pae-di, ext4-modules-6.1.0-32-686-pae-di, isofs-modules-6.1.0-32-686-pae-di, jfs-modules-6.1.0-32-686-pae-di, xfs-modules-6.1.0-32-686-pae-di, fat-modules-6.1.0-32-686-pae-di, squashfs-modules-6.1.0-32-686-pae-di, udf-modules-6.1.0-32-686-pae-di, fuse-modules-6.1.0-32-686-pae-di, f2fs-modules-6.1.0-32-686-pae-di, md-modules-6.1.0-32-686-pae-di, multipath-modules-6.1.0-32-686-pae-di, usb-modules-6.1.0-32-686-pae-di, usb-storage-modules-6.1.0-32-686-pae-di, pcmcia-storage-modules-6.1.0-32-686-pae-di, fb-modules-6.1.0-32-686-pae-di, input-modules-6.1.0-32-686-pae-di, event-modules-6.1.0-32-686-pae-di, mouse-modules-6.1.0-32-686-pae-di, nic-pcmcia-modules-6.1.0-32-686-pae-di, pcmcia-modules-6.1.0-32-686-pae-di, nic-usb-modules-6.1.0-32-686-pae-di, sata-modules-6.1.0-32-686-pae-di, acpi-modules-6.1.0-32-686-pae-di, i2c-modules-6.1.0-32-686-pae-di, crc-modules-6.1.0-32-686-pae-di, crypto-modules-6.1.0-32-686-pae-di, crypto-dm-modules-6.1.0-32-686-pae-di, efi-modules-6.1.0-32-686-pae-di, ata-modules-6.1.0-32-686-pae-di, mmc-core-modules-6.1.0-32-686-pae-di, mmc-modules-6.1.0-32-686-pae-di, nbd-modules-6.1.0-32-686-pae-di, speakup-modules-6.1.0-32-686-pae-di, uinput-modules-6.1.0-32-686-pae-di, sound-modules-6.1.0-32-686-pae-di, mtd-core-modules-6.1.0-32-686-pae-di, rfkill-modules-6.1.0-32-686-pae-di, linux-image-i386-signed-template, linux-headers-6.1.0-32-686, linux-image-6.1.0-32-686-unsigned, linux-image-6.1.0-32-686-dbg, linux-image-686-dbg, linux-headers-6.1.0-32-686-pae, linux-image-6.1.0-32-686-pae-unsigned, linux-image-6.1.0-32-686-pae-dbg, linux-image-686-pae-dbg, linux-headers-6.1.0-32-rt-686-pae, linux-image-6.1.0-32-rt-686-pae-unsigned, linux-image-6.1.0-32-rt-686-pae-dbg, linux-image-rt-686-pae-dbg, kernel-image-6.1.0-32-itanium-di, nic-modules-6.1.0-32-itanium-di, nic-shared-modules-6.1.0-32-itanium-di, serial-modules-6.1.0-32-itanium-di, usb-serial-modules-6.1.0-32-itanium-di, ppp-modules-6.1.0-32-itanium-di, pata-modules-6.1.0-32-itanium-di, cdrom-core-modules-6.1.0-32-itanium-di, firewire-core-modules-6.1.0-32-itanium-di, scsi-core-modules-6.1.0-32-itanium-di, scsi-modules-6.1.0-32-itanium-di, scsi-nic-modules-6.1.0-32-itanium-di, loop-modules-6.1.0-32-itanium-di, btrfs-modules-6.1.0-32-itanium-di, ext4-modules-6.1.0-32-itanium-di, isofs-modules-6.1.0-32-itanium-di, jfs-modules-6.1.0-32-itanium-di, xfs-modules-6.1.0-32-itanium-di, fat-modules-6.1.0-32-itanium-di, squashfs-modules-6.1.0-32-itanium-di, udf-modules-6.1.0-32-itanium-di, fuse-modules-6.1.0-32-itanium-di, f2fs-modules-6.1.0-32-itanium-di, md-modules-6.1.0-32-itanium-di, multipath-modules-6.1.0-32-itanium-di, usb-modules-6.1.0-32-itanium-di, usb-storage-modules-6.1.0-32-itanium-di, fb-modules-6.1.0-32-itanium-di, input-modules-6.1.0-32-itanium-di, event-modules-6.1.0-32-itanium-di, mouse-modules-6.1.0-32-itanium-di, pcmcia-modules-6.1.0-32-itanium-di, nic-usb-modules-6.1.0-32-itanium-di, sata-modules-6.1.0-32-itanium-di, i2c-modules-6.1.0-32-itanium-di, crc-modules-6.1.0-32-itanium-di, crypto-modules-6.1.0-32-itanium-di, crypto-dm-modules-6.1.0-32-itanium-di, ata-modules-6.1.0-32-itanium-di, nbd-modules-6.1.0-32-itanium-di, uinput-modules-6.1.0-32-itanium-di, mtd-core-modules-6.1.0-32-itanium-di, linux-headers-6.1.0-32-itanium, linux-image-6.1.0-32-itanium, linux-image-itanium, linux-headers-itanium, linux-image-6.1.0-32-itanium-dbg, linux-image-itanium-dbg, linux-headers-6.1.0-32-mckinley, linux-image-6.1.0-32-mckinley, linux-image-mckinley, linux-headers-mckinley, linux-image-6.1.0-32-mckinley-dbg, linux-image-mckinley-dbg, kernel-image-6.1.0-32-m68k-di, nic-modules-6.1.0-32-m68k-di, nic-shared-modules-6.1.0-32-m68k-di, ppp-modules-6.1.0-32-m68k-di, pata-modules-6.1.0-32-m68k-di, cdrom-core-modules-6.1.0-32-m68k-di, scsi-core-modules-6.1.0-32-m68k-di, scsi-modules-6.1.0-32-m68k-di, loop-modules-6.1.0-32-m68k-di, btrfs-modules-6.1.0-32-m68k-di, ext4-modules-6.1.0-32-m68k-di, isofs-modules-6.1.0-32-m68k-di, fat-modules-6.1.0-32-m68k-di, hfs-modules-6.1.0-32-m68k-di, affs-modules-6.1.0-32-m68k-di, squashfs-modules-6.1.0-32-m68k-di, udf-modules-6.1.0-32-m68k-di, fuse-modules-6.1.0-32-m68k-di, md-modules-6.1.0-32-m68k-di, crc-modules-6.1.0-32-m68k-di, crypto-modules-6.1.0-32-m68k-di, ata-modules-6.1.0-32-m68k-di, nbd-modules-6.1.0-32-m68k-di, linux-headers-6.1.0-32-m68k, linux-image-6.1.0-32-m68k, linux-image-m68k, linux-headers-m68k, linux-image-6.1.0-32-m68k-dbg, linux-image-m68k-dbg, kernel-image-6.1.0-32-4kc-malta-di, nic-modules-6.1.0-32-4kc-malta-di, nic-wireless-modules-6.1.0-32-4kc-malta-di, nic-shared-modules-6.1.0-32-4kc-malta-di, usb-serial-modules-6.1.0-32-4kc-malta-di, ppp-modules-6.1.0-32-4kc-malta-di, pata-modules-6.1.0-32-4kc-malta-di, cdrom-core-modules-6.1.0-32-4kc-malta-di, firewire-core-modules-6.1.0-32-4kc-malta-di, scsi-core-modules-6.1.0-32-4kc-malta-di, scsi-modules-6.1.0-32-4kc-malta-di, scsi-nic-modules-6.1.0-32-4kc-malta-di, loop-modules-6.1.0-32-4kc-malta-di, btrfs-modules-6.1.0-32-4kc-malta-di, ext4-modules-6.1.0-32-4kc-malta-di, isofs-modules-6.1.0-32-4kc-malta-di, jfs-modules-6.1.0-32-4kc-malta-di, xfs-modules-6.1.0-32-4kc-malta-di, fat-modules-6.1.0-32-4kc-malta-di, affs-modules-6.1.0-32-4kc-malta-di, minix-modules-6.1.0-32-4kc-malta-di, nfs-modules-6.1.0-32-4kc-malta-di, squashfs-modules-6.1.0-32-4kc-malta-di, udf-modules-6.1.0-32-4kc-malta-di, fuse-modules-6.1.0-32-4kc-malta-di, f2fs-modules-6.1.0-32-4kc-malta-di, md-modules-6.1.0-32-4kc-malta-di, multipath-modules-6.1.0-32-4kc-malta-di, usb-modules-6.1.0-32-4kc-malta-di, usb-storage-modules-6.1.0-32-4kc-malta-di, fb-modules-6.1.0-32-4kc-malta-di, input-modules-6.1.0-32-4kc-malta-di, event-modules-6.1.0-32-4kc-malta-di, mouse-modules-6.1.0-32-4kc-malta-di, nic-usb-modules-6.1.0-32-4kc-malta-di, sata-modules-6.1.0-32-4kc-malta-di, crc-modules-6.1.0-32-4kc-malta-di, crypto-modules-6.1.0-32-4kc-malta-di, crypto-dm-modules-6.1.0-32-4kc-malta-di, ata-modules-6.1.0-32-4kc-malta-di, mmc-core-modules-6.1.0-32-4kc-malta-di, mmc-modules-6.1.0-32-4kc-malta-di, nbd-modules-6.1.0-32-4kc-malta-di, speakup-modules-6.1.0-32-4kc-malta-di, sound-modules-6.1.0-32-4kc-malta-di, kernel-image-6.1.0-32-mips32r2eb-di, nic-modules-6.1.0-32-mips32r2eb-di, nic-wireless-modules-6.1.0-32-mips32r2eb-di, nic-shared-modules-6.1.0-32-mips32r2eb-di, usb-serial-modules-6.1.0-32-mips32r2eb-di, ppp-modules-6.1.0-32-mips32r2eb-di, pata-modules-6.1.0-32-mips32r2eb-di, cdrom-core-modules-6.1.0-32-mips32r2eb-di, firewire-core-modules-6.1.0-32-mips32r2eb-di, scsi-core-modules-6.1.0-32-mips32r2eb-di, scsi-modules-6.1.0-32-mips32r2eb-di, scsi-nic-modules-6.1.0-32-mips32r2eb-di, loop-modules-6.1.0-32-mips32r2eb-di, btrfs-modules-6.1.0-32-mips32r2eb-di, ext4-modules-6.1.0-32-mips32r2eb-di, isofs-modules-6.1.0-32-mips32r2eb-di, jfs-modules-6.1.0-32-mips32r2eb-di, xfs-modules-6.1.0-32-mips32r2eb-di, fat-modules-6.1.0-32-mips32r2eb-di, affs-modules-6.1.0-32-mips32r2eb-di, minix-modules-6.1.0-32-mips32r2eb-di, nfs-modules-6.1.0-32-mips32r2eb-di, squashfs-modules-6.1.0-32-mips32r2eb-di, udf-modules-6.1.0-32-mips32r2eb-di, fuse-modules-6.1.0-32-mips32r2eb-di, f2fs-modules-6.1.0-32-mips32r2eb-di, md-modules-6.1.0-32-mips32r2eb-di, multipath-modules-6.1.0-32-mips32r2eb-di, usb-modules-6.1.0-32-mips32r2eb-di, usb-storage-modules-6.1.0-32-mips32r2eb-di, fb-modules-6.1.0-32-mips32r2eb-di, input-modules-6.1.0-32-mips32r2eb-di, event-modules-6.1.0-32-mips32r2eb-di, mouse-modules-6.1.0-32-mips32r2eb-di, nic-usb-modules-6.1.0-32-mips32r2eb-di, sata-modules-6.1.0-32-mips32r2eb-di, crc-modules-6.1.0-32-mips32r2eb-di, crypto-modules-6.1.0-32-mips32r2eb-di, crypto-dm-modules-6.1.0-32-mips32r2eb-di, ata-modules-6.1.0-32-mips32r2eb-di, mmc-core-modules-6.1.0-32-mips32r2eb-di, mmc-modules-6.1.0-32-mips32r2eb-di, nbd-modules-6.1.0-32-mips32r2eb-di, speakup-modules-6.1.0-32-mips32r2eb-di, sound-modules-6.1.0-32-mips32r2eb-di, kernel-image-6.1.0-32-octeon-di, nic-modules-6.1.0-32-octeon-di, nic-wireless-modules-6.1.0-32-octeon-di, nic-shared-modules-6.1.0-32-octeon-di, usb-serial-modules-6.1.0-32-octeon-di, ppp-modules-6.1.0-32-octeon-di, pata-modules-6.1.0-32-octeon-di, cdrom-core-modules-6.1.0-32-octeon-di, firewire-core-modules-6.1.0-32-octeon-di, scsi-core-modules-6.1.0-32-octeon-di, scsi-modules-6.1.0-32-octeon-di, scsi-nic-modules-6.1.0-32-octeon-di, loop-modules-6.1.0-32-octeon-di, btrfs-modules-6.1.0-32-octeon-di, ext4-modules-6.1.0-32-octeon-di, isofs-modules-6.1.0-32-octeon-di, jfs-modules-6.1.0-32-octeon-di, xfs-modules-6.1.0-32-octeon-di, fat-modules-6.1.0-32-octeon-di, affs-modules-6.1.0-32-octeon-di, minix-modules-6.1.0-32-octeon-di, nfs-modules-6.1.0-32-octeon-di, squashfs-modules-6.1.0-32-octeon-di, udf-modules-6.1.0-32-octeon-di, fuse-modules-6.1.0-32-octeon-di, f2fs-modules-6.1.0-32-octeon-di, md-modules-6.1.0-32-octeon-di, multipath-modules-6.1.0-32-octeon-di, usb-modules-6.1.0-32-octeon-di, usb-storage-modules-6.1.0-32-octeon-di, fb-modules-6.1.0-32-octeon-di, input-modules-6.1.0-32-octeon-di, event-modules-6.1.0-32-octeon-di, mouse-modules-6.1.0-32-octeon-di, nic-usb-modules-6.1.0-32-octeon-di, sata-modules-6.1.0-32-octeon-di, crc-modules-6.1.0-32-octeon-di, crypto-modules-6.1.0-32-octeon-di, crypto-dm-modules-6.1.0-32-octeon-di, ata-modules-6.1.0-32-octeon-di, mmc-core-modules-6.1.0-32-octeon-di, mmc-modules-6.1.0-32-octeon-di, nbd-modules-6.1.0-32-octeon-di, speakup-modules-6.1.0-32-octeon-di, sound-modules-6.1.0-32-octeon-di, linux-headers-6.1.0-32-4kc-malta, linux-image-6.1.0-32-4kc-malta, linux-image-4kc-malta, linux-headers-4kc-malta, linux-image-6.1.0-32-4kc-malta-dbg, linux-image-4kc-malta-dbg, linux-headers-6.1.0-32-mips32r2eb, linux-image-6.1.0-32-mips32r2eb, linux-image-mips32r2eb, linux-headers-mips32r2eb, linux-image-6.1.0-32-mips32r2eb-dbg, linux-image-mips32r2eb-dbg, linux-headers-6.1.0-32-octeon, linux-image-6.1.0-32-octeon, linux-image-octeon, linux-headers-octeon, linux-image-6.1.0-32-octeon-dbg, linux-image-octeon-dbg, kernel-image-6.1.0-32-5kc-malta-di, nic-modules-6.1.0-32-5kc-malta-di, nic-wireless-modules-6.1.0-32-5kc-malta-di, nic-shared-modules-6.1.0-32-5kc-malta-di, usb-serial-modules-6.1.0-32-5kc-malta-di, ppp-modules-6.1.0-32-5kc-malta-di, pata-modules-6.1.0-32-5kc-malta-di, cdrom-core-modules-6.1.0-32-5kc-malta-di, firewire-core-modules-6.1.0-32-5kc-malta-di, scsi-core-modules-6.1.0-32-5kc-malta-di, scsi-modules-6.1.0-32-5kc-malta-di, scsi-nic-modules-6.1.0-32-5kc-malta-di, loop-modules-6.1.0-32-5kc-malta-di, btrfs-modules-6.1.0-32-5kc-malta-di, ext4-modules-6.1.0-32-5kc-malta-di, isofs-modules-6.1.0-32-5kc-malta-di, jfs-modules-6.1.0-32-5kc-malta-di, xfs-modules-6.1.0-32-5kc-malta-di, fat-modules-6.1.0-32-5kc-malta-di, affs-modules-6.1.0-32-5kc-malta-di, minix-modules-6.1.0-32-5kc-malta-di, nfs-modules-6.1.0-32-5kc-malta-di, squashfs-modules-6.1.0-32-5kc-malta-di, udf-modules-6.1.0-32-5kc-malta-di, fuse-modules-6.1.0-32-5kc-malta-di, f2fs-modules-6.1.0-32-5kc-malta-di, md-modules-6.1.0-32-5kc-malta-di, multipath-modules-6.1.0-32-5kc-malta-di, usb-modules-6.1.0-32-5kc-malta-di, usb-storage-modules-6.1.0-32-5kc-malta-di, fb-modules-6.1.0-32-5kc-malta-di, input-modules-6.1.0-32-5kc-malta-di, event-modules-6.1.0-32-5kc-malta-di, mouse-modules-6.1.0-32-5kc-malta-di, nic-usb-modules-6.1.0-32-5kc-malta-di, sata-modules-6.1.0-32-5kc-malta-di, crc-modules-6.1.0-32-5kc-malta-di, crypto-modules-6.1.0-32-5kc-malta-di, crypto-dm-modules-6.1.0-32-5kc-malta-di, ata-modules-6.1.0-32-5kc-malta-di, mmc-core-modules-6.1.0-32-5kc-malta-di, mmc-modules-6.1.0-32-5kc-malta-di, nbd-modules-6.1.0-32-5kc-malta-di, speakup-modules-6.1.0-32-5kc-malta-di, sound-modules-6.1.0-32-5kc-malta-di, kernel-image-6.1.0-32-mips64r2eb-di, nic-modules-6.1.0-32-mips64r2eb-di, nic-wireless-modules-6.1.0-32-mips64r2eb-di, nic-shared-modules-6.1.0-32-mips64r2eb-di, usb-serial-modules-6.1.0-32-mips64r2eb-di, ppp-modules-6.1.0-32-mips64r2eb-di, pata-modules-6.1.0-32-mips64r2eb-di, cdrom-core-modules-6.1.0-32-mips64r2eb-di, firewire-core-modules-6.1.0-32-mips64r2eb-di, scsi-core-modules-6.1.0-32-mips64r2eb-di, scsi-modules-6.1.0-32-mips64r2eb-di, scsi-nic-modules-6.1.0-32-mips64r2eb-di, loop-modules-6.1.0-32-mips64r2eb-di, btrfs-modules-6.1.0-32-mips64r2eb-di, ext4-modules-6.1.0-32-mips64r2eb-di, isofs-modules-6.1.0-32-mips64r2eb-di, jfs-modules-6.1.0-32-mips64r2eb-di, xfs-modules-6.1.0-32-mips64r2eb-di, fat-modules-6.1.0-32-mips64r2eb-di, affs-modules-6.1.0-32-mips64r2eb-di, minix-modules-6.1.0-32-mips64r2eb-di, nfs-modules-6.1.0-32-mips64r2eb-di, squashfs-modules-6.1.0-32-mips64r2eb-di, udf-modules-6.1.0-32-mips64r2eb-di, fuse-modules-6.1.0-32-mips64r2eb-di, f2fs-modules-6.1.0-32-mips64r2eb-di, md-modules-6.1.0-32-mips64r2eb-di, multipath-modules-6.1.0-32-mips64r2eb-di, usb-modules-6.1.0-32-mips64r2eb-di, usb-storage-modules-6.1.0-32-mips64r2eb-di, fb-modules-6.1.0-32-mips64r2eb-di, input-modules-6.1.0-32-mips64r2eb-di, event-modules-6.1.0-32-mips64r2eb-di, mouse-modules-6.1.0-32-mips64r2eb-di, nic-usb-modules-6.1.0-32-mips64r2eb-di, sata-modules-6.1.0-32-mips64r2eb-di, crc-modules-6.1.0-32-mips64r2eb-di, crypto-modules-6.1.0-32-mips64r2eb-di, crypto-dm-modules-6.1.0-32-mips64r2eb-di, ata-modules-6.1.0-32-mips64r2eb-di, mmc-core-modules-6.1.0-32-mips64r2eb-di, mmc-modules-6.1.0-32-mips64r2eb-di, nbd-modules-6.1.0-32-mips64r2eb-di, speakup-modules-6.1.0-32-mips64r2eb-di, sound-modules-6.1.0-32-mips64r2eb-di, linux-headers-6.1.0-32-5kc-malta, linux-image-6.1.0-32-5kc-malta, linux-image-5kc-malta, linux-headers-5kc-malta, linux-image-6.1.0-32-5kc-malta-dbg, linux-image-5kc-malta-dbg, linux-headers-6.1.0-32-mips64r2eb, linux-image-6.1.0-32-mips64r2eb, linux-image-mips64r2eb, linux-headers-mips64r2eb, linux-image-6.1.0-32-mips64r2eb-dbg, linux-image-mips64r2eb-dbg, kernel-image-6.1.0-32-loongson-3-di, nic-modules-6.1.0-32-loongson-3-di, nic-wireless-modules-6.1.0-32-loongson-3-di, nic-shared-modules-6.1.0-32-loongson-3-di, usb-serial-modules-6.1.0-32-loongson-3-di, ppp-modules-6.1.0-32-loongson-3-di, pata-modules-6.1.0-32-loongson-3-di, cdrom-core-modules-6.1.0-32-loongson-3-di, firewire-core-modules-6.1.0-32-loongson-3-di, scsi-core-modules-6.1.0-32-loongson-3-di, scsi-modules-6.1.0-32-loongson-3-di, scsi-nic-modules-6.1.0-32-loongson-3-di, loop-modules-6.1.0-32-loongson-3-di, btrfs-modules-6.1.0-32-loongson-3-di, ext4-modules-6.1.0-32-loongson-3-di, isofs-modules-6.1.0-32-loongson-3-di, jfs-modules-6.1.0-32-loongson-3-di, xfs-modules-6.1.0-32-loongson-3-di, fat-modules-6.1.0-32-loongson-3-di, affs-modules-6.1.0-32-loongson-3-di, minix-modules-6.1.0-32-loongson-3-di, nfs-modules-6.1.0-32-loongson-3-di, squashfs-modules-6.1.0-32-loongson-3-di, udf-modules-6.1.0-32-loongson-3-di, fuse-modules-6.1.0-32-loongson-3-di, f2fs-modules-6.1.0-32-loongson-3-di, md-modules-6.1.0-32-loongson-3-di, multipath-modules-6.1.0-32-loongson-3-di, usb-modules-6.1.0-32-loongson-3-di, usb-storage-modules-6.1.0-32-loongson-3-di, fb-modules-6.1.0-32-loongson-3-di, input-modules-6.1.0-32-loongson-3-di, event-modules-6.1.0-32-loongson-3-di, mouse-modules-6.1.0-32-loongson-3-di, nic-usb-modules-6.1.0-32-loongson-3-di, sata-modules-6.1.0-32-loongson-3-di, crc-modules-6.1.0-32-loongson-3-di, crypto-modules-6.1.0-32-loongson-3-di, crypto-dm-modules-6.1.0-32-loongson-3-di, ata-modules-6.1.0-32-loongson-3-di, mmc-core-modules-6.1.0-32-loongson-3-di, mmc-modules-6.1.0-32-loongson-3-di, nbd-modules-6.1.0-32-loongson-3-di, speakup-modules-6.1.0-32-loongson-3-di, sound-modules-6.1.0-32-loongson-3-di, kernel-image-6.1.0-32-mips64r2el-di, nic-modules-6.1.0-32-mips64r2el-di, nic-wireless-modules-6.1.0-32-mips64r2el-di, nic-shared-modules-6.1.0-32-mips64r2el-di, usb-serial-modules-6.1.0-32-mips64r2el-di, ppp-modules-6.1.0-32-mips64r2el-di, pata-modules-6.1.0-32-mips64r2el-di, cdrom-core-modules-6.1.0-32-mips64r2el-di, firewire-core-modules-6.1.0-32-mips64r2el-di, scsi-core-modules-6.1.0-32-mips64r2el-di, scsi-modules-6.1.0-32-mips64r2el-di, scsi-nic-modules-6.1.0-32-mips64r2el-di, loop-modules-6.1.0-32-mips64r2el-di, btrfs-modules-6.1.0-32-mips64r2el-di, ext4-modules-6.1.0-32-mips64r2el-di, isofs-modules-6.1.0-32-mips64r2el-di, jfs-modules-6.1.0-32-mips64r2el-di, xfs-modules-6.1.0-32-mips64r2el-di, fat-modules-6.1.0-32-mips64r2el-di, affs-modules-6.1.0-32-mips64r2el-di, minix-modules-6.1.0-32-mips64r2el-di, nfs-modules-6.1.0-32-mips64r2el-di, squashfs-modules-6.1.0-32-mips64r2el-di, udf-modules-6.1.0-32-mips64r2el-di, fuse-modules-6.1.0-32-mips64r2el-di, f2fs-modules-6.1.0-32-mips64r2el-di, md-modules-6.1.0-32-mips64r2el-di, multipath-modules-6.1.0-32-mips64r2el-di, usb-modules-6.1.0-32-mips64r2el-di, usb-storage-modules-6.1.0-32-mips64r2el-di, fb-modules-6.1.0-32-mips64r2el-di, input-modules-6.1.0-32-mips64r2el-di, event-modules-6.1.0-32-mips64r2el-di, mouse-modules-6.1.0-32-mips64r2el-di, nic-usb-modules-6.1.0-32-mips64r2el-di, sata-modules-6.1.0-32-mips64r2el-di, crc-modules-6.1.0-32-mips64r2el-di, crypto-modules-6.1.0-32-mips64r2el-di, crypto-dm-modules-6.1.0-32-mips64r2el-di, ata-modules-6.1.0-32-mips64r2el-di, mmc-core-modules-6.1.0-32-mips64r2el-di, mmc-modules-6.1.0-32-mips64r2el-di, nbd-modules-6.1.0-32-mips64r2el-di, speakup-modules-6.1.0-32-mips64r2el-di, sound-modules-6.1.0-32-mips64r2el-di, linux-headers-6.1.0-32-mips64r2el, linux-image-6.1.0-32-mips64r2el, linux-image-mips64r2el, linux-headers-mips64r2el, linux-image-6.1.0-32-mips64r2el-dbg, linux-image-mips64r2el-dbg, linux-headers-6.1.0-32-loongson-3, linux-image-6.1.0-32-loongson-3, linux-image-loongson-3, linux-headers-loongson-3, linux-image-6.1.0-32-loongson-3-dbg, linux-image-loongson-3-dbg, kernel-image-6.1.0-32-mips64r6eb-di, nic-modules-6.1.0-32-mips64r6eb-di, nic-wireless-modules-6.1.0-32-mips64r6eb-di, nic-shared-modules-6.1.0-32-mips64r6eb-di, usb-serial-modules-6.1.0-32-mips64r6eb-di, ppp-modules-6.1.0-32-mips64r6eb-di, pata-modules-6.1.0-32-mips64r6eb-di, cdrom-core-modules-6.1.0-32-mips64r6eb-di, firewire-core-modules-6.1.0-32-mips64r6eb-di, scsi-core-modules-6.1.0-32-mips64r6eb-di, scsi-modules-6.1.0-32-mips64r6eb-di, scsi-nic-modules-6.1.0-32-mips64r6eb-di, loop-modules-6.1.0-32-mips64r6eb-di, btrfs-modules-6.1.0-32-mips64r6eb-di, ext4-modules-6.1.0-32-mips64r6eb-di, isofs-modules-6.1.0-32-mips64r6eb-di, jfs-modules-6.1.0-32-mips64r6eb-di, xfs-modules-6.1.0-32-mips64r6eb-di, fat-modules-6.1.0-32-mips64r6eb-di, affs-modules-6.1.0-32-mips64r6eb-di, minix-modules-6.1.0-32-mips64r6eb-di, nfs-modules-6.1.0-32-mips64r6eb-di, squashfs-modules-6.1.0-32-mips64r6eb-di, udf-modules-6.1.0-32-mips64r6eb-di, fuse-modules-6.1.0-32-mips64r6eb-di, f2fs-modules-6.1.0-32-mips64r6eb-di, md-modules-6.1.0-32-mips64r6eb-di, multipath-modules-6.1.0-32-mips64r6eb-di, usb-modules-6.1.0-32-mips64r6eb-di, usb-storage-modules-6.1.0-32-mips64r6eb-di, fb-modules-6.1.0-32-mips64r6eb-di, input-modules-6.1.0-32-mips64r6eb-di, event-modules-6.1.0-32-mips64r6eb-di, mouse-modules-6.1.0-32-mips64r6eb-di, nic-usb-modules-6.1.0-32-mips64r6eb-di, sata-modules-6.1.0-32-mips64r6eb-di, crc-modules-6.1.0-32-mips64r6eb-di, crypto-modules-6.1.0-32-mips64r6eb-di, crypto-dm-modules-6.1.0-32-mips64r6eb-di, ata-modules-6.1.0-32-mips64r6eb-di, mmc-core-modules-6.1.0-32-mips64r6eb-di, mmc-modules-6.1.0-32-mips64r6eb-di, nbd-modules-6.1.0-32-mips64r6eb-di, speakup-modules-6.1.0-32-mips64r6eb-di, sound-modules-6.1.0-32-mips64r6eb-di, linux-headers-6.1.0-32-mips64r6eb, linux-image-6.1.0-32-mips64r6eb, linux-image-mips64r6eb, linux-headers-mips64r6eb, linux-image-6.1.0-32-mips64r6eb-dbg, linux-image-mips64r6eb-dbg, kernel-image-6.1.0-32-mips64r6el-di, nic-modules-6.1.0-32-mips64r6el-di, nic-wireless-modules-6.1.0-32-mips64r6el-di, nic-shared-modules-6.1.0-32-mips64r6el-di, usb-serial-modules-6.1.0-32-mips64r6el-di, ppp-modules-6.1.0-32-mips64r6el-di, pata-modules-6.1.0-32-mips64r6el-di, cdrom-core-modules-6.1.0-32-mips64r6el-di, firewire-core-modules-6.1.0-32-mips64r6el-di, scsi-core-modules-6.1.0-32-mips64r6el-di, scsi-modules-6.1.0-32-mips64r6el-di, scsi-nic-modules-6.1.0-32-mips64r6el-di, loop-modules-6.1.0-32-mips64r6el-di, btrfs-modules-6.1.0-32-mips64r6el-di, ext4-modules-6.1.0-32-mips64r6el-di, isofs-modules-6.1.0-32-mips64r6el-di, jfs-modules-6.1.0-32-mips64r6el-di, xfs-modules-6.1.0-32-mips64r6el-di, fat-modules-6.1.0-32-mips64r6el-di, affs-modules-6.1.0-32-mips64r6el-di, minix-modules-6.1.0-32-mips64r6el-di, nfs-modules-6.1.0-32-mips64r6el-di, squashfs-modules-6.1.0-32-mips64r6el-di, udf-modules-6.1.0-32-mips64r6el-di, fuse-modules-6.1.0-32-mips64r6el-di, f2fs-modules-6.1.0-32-mips64r6el-di, md-modules-6.1.0-32-mips64r6el-di, multipath-modules-6.1.0-32-mips64r6el-di, usb-modules-6.1.0-32-mips64r6el-di, usb-storage-modules-6.1.0-32-mips64r6el-di, fb-modules-6.1.0-32-mips64r6el-di, input-modules-6.1.0-32-mips64r6el-di, event-modules-6.1.0-32-mips64r6el-di, mouse-modules-6.1.0-32-mips64r6el-di, nic-usb-modules-6.1.0-32-mips64r6el-di, sata-modules-6.1.0-32-mips64r6el-di, crc-modules-6.1.0-32-mips64r6el-di, crypto-modules-6.1.0-32-mips64r6el-di, crypto-dm-modules-6.1.0-32-mips64r6el-di, ata-modules-6.1.0-32-mips64r6el-di, mmc-core-modules-6.1.0-32-mips64r6el-di, mmc-modules-6.1.0-32-mips64r6el-di, nbd-modules-6.1.0-32-mips64r6el-di, speakup-modules-6.1.0-32-mips64r6el-di, sound-modules-6.1.0-32-mips64r6el-di, linux-headers-6.1.0-32-mips64r6el, linux-image-6.1.0-32-mips64r6el, linux-image-mips64r6el, linux-headers-mips64r6el, linux-image-6.1.0-32-mips64r6el-dbg, linux-image-mips64r6el-dbg, kernel-image-6.1.0-32-mips32r2el-di, nic-modules-6.1.0-32-mips32r2el-di, nic-wireless-modules-6.1.0-32-mips32r2el-di, nic-shared-modules-6.1.0-32-mips32r2el-di, usb-serial-modules-6.1.0-32-mips32r2el-di, ppp-modules-6.1.0-32-mips32r2el-di, pata-modules-6.1.0-32-mips32r2el-di, cdrom-core-modules-6.1.0-32-mips32r2el-di, firewire-core-modules-6.1.0-32-mips32r2el-di, scsi-core-modules-6.1.0-32-mips32r2el-di, scsi-modules-6.1.0-32-mips32r2el-di, scsi-nic-modules-6.1.0-32-mips32r2el-di, loop-modules-6.1.0-32-mips32r2el-di, btrfs-modules-6.1.0-32-mips32r2el-di, ext4-modules-6.1.0-32-mips32r2el-di, isofs-modules-6.1.0-32-mips32r2el-di, jfs-modules-6.1.0-32-mips32r2el-di, xfs-modules-6.1.0-32-mips32r2el-di, fat-modules-6.1.0-32-mips32r2el-di, affs-modules-6.1.0-32-mips32r2el-di, minix-modules-6.1.0-32-mips32r2el-di, nfs-modules-6.1.0-32-mips32r2el-di, squashfs-modules-6.1.0-32-mips32r2el-di, udf-modules-6.1.0-32-mips32r2el-di, fuse-modules-6.1.0-32-mips32r2el-di, f2fs-modules-6.1.0-32-mips32r2el-di, md-modules-6.1.0-32-mips32r2el-di, multipath-modules-6.1.0-32-mips32r2el-di, usb-modules-6.1.0-32-mips32r2el-di, usb-storage-modules-6.1.0-32-mips32r2el-di, fb-modules-6.1.0-32-mips32r2el-di, input-modules-6.1.0-32-mips32r2el-di, event-modules-6.1.0-32-mips32r2el-di, mouse-modules-6.1.0-32-mips32r2el-di, nic-usb-modules-6.1.0-32-mips32r2el-di, sata-modules-6.1.0-32-mips32r2el-di, crc-modules-6.1.0-32-mips32r2el-di, crypto-modules-6.1.0-32-mips32r2el-di, crypto-dm-modules-6.1.0-32-mips32r2el-di, ata-modules-6.1.0-32-mips32r2el-di, mmc-core-modules-6.1.0-32-mips32r2el-di, mmc-modules-6.1.0-32-mips32r2el-di, nbd-modules-6.1.0-32-mips32r2el-di, speakup-modules-6.1.0-32-mips32r2el-di, sound-modules-6.1.0-32-mips32r2el-di, linux-headers-6.1.0-32-mips32r2el, linux-image-6.1.0-32-mips32r2el, linux-image-mips32r2el, linux-headers-mips32r2el, linux-image-6.1.0-32-mips32r2el-dbg, linux-image-mips32r2el-dbg, kernel-image-6.1.0-32-mips32r6eb-di, nic-modules-6.1.0-32-mips32r6eb-di, nic-wireless-modules-6.1.0-32-mips32r6eb-di, nic-shared-modules-6.1.0-32-mips32r6eb-di, usb-serial-modules-6.1.0-32-mips32r6eb-di, ppp-modules-6.1.0-32-mips32r6eb-di, pata-modules-6.1.0-32-mips32r6eb-di, cdrom-core-modules-6.1.0-32-mips32r6eb-di, firewire-core-modules-6.1.0-32-mips32r6eb-di, scsi-core-modules-6.1.0-32-mips32r6eb-di, scsi-modules-6.1.0-32-mips32r6eb-di, scsi-nic-modules-6.1.0-32-mips32r6eb-di, loop-modules-6.1.0-32-mips32r6eb-di, btrfs-modules-6.1.0-32-mips32r6eb-di, ext4-modules-6.1.0-32-mips32r6eb-di, isofs-modules-6.1.0-32-mips32r6eb-di, jfs-modules-6.1.0-32-mips32r6eb-di, xfs-modules-6.1.0-32-mips32r6eb-di, fat-modules-6.1.0-32-mips32r6eb-di, affs-modules-6.1.0-32-mips32r6eb-di, minix-modules-6.1.0-32-mips32r6eb-di, nfs-modules-6.1.0-32-mips32r6eb-di, squashfs-modules-6.1.0-32-mips32r6eb-di, udf-modules-6.1.0-32-mips32r6eb-di, fuse-modules-6.1.0-32-mips32r6eb-di, f2fs-modules-6.1.0-32-mips32r6eb-di, md-modules-6.1.0-32-mips32r6eb-di, multipath-modules-6.1.0-32-mips32r6eb-di, usb-modules-6.1.0-32-mips32r6eb-di, usb-storage-modules-6.1.0-32-mips32r6eb-di, fb-modules-6.1.0-32-mips32r6eb-di, input-modules-6.1.0-32-mips32r6eb-di, event-modules-6.1.0-32-mips32r6eb-di, mouse-modules-6.1.0-32-mips32r6eb-di, nic-usb-modules-6.1.0-32-mips32r6eb-di, sata-modules-6.1.0-32-mips32r6eb-di, crc-modules-6.1.0-32-mips32r6eb-di, crypto-modules-6.1.0-32-mips32r6eb-di, crypto-dm-modules-6.1.0-32-mips32r6eb-di, ata-modules-6.1.0-32-mips32r6eb-di, mmc-core-modules-6.1.0-32-mips32r6eb-di, mmc-modules-6.1.0-32-mips32r6eb-di, nbd-modules-6.1.0-32-mips32r6eb-di, speakup-modules-6.1.0-32-mips32r6eb-di, sound-modules-6.1.0-32-mips32r6eb-di, linux-headers-6.1.0-32-mips32r6eb, linux-image-6.1.0-32-mips32r6eb, linux-image-mips32r6eb, linux-headers-mips32r6eb, linux-image-6.1.0-32-mips32r6eb-dbg, linux-image-mips32r6eb-dbg, kernel-image-6.1.0-32-mips32r6el-di, nic-modules-6.1.0-32-mips32r6el-di, nic-wireless-modules-6.1.0-32-mips32r6el-di, nic-shared-modules-6.1.0-32-mips32r6el-di, usb-serial-modules-6.1.0-32-mips32r6el-di, ppp-modules-6.1.0-32-mips32r6el-di, pata-modules-6.1.0-32-mips32r6el-di, cdrom-core-modules-6.1.0-32-mips32r6el-di, firewire-core-modules-6.1.0-32-mips32r6el-di, scsi-core-modules-6.1.0-32-mips32r6el-di, scsi-modules-6.1.0-32-mips32r6el-di, scsi-nic-modules-6.1.0-32-mips32r6el-di, loop-modules-6.1.0-32-mips32r6el-di, btrfs-modules-6.1.0-32-mips32r6el-di, ext4-modules-6.1.0-32-mips32r6el-di, isofs-modules-6.1.0-32-mips32r6el-di, jfs-modules-6.1.0-32-mips32r6el-di, xfs-modules-6.1.0-32-mips32r6el-di, fat-modules-6.1.0-32-mips32r6el-di, affs-modules-6.1.0-32-mips32r6el-di, minix-modules-6.1.0-32-mips32r6el-di, nfs-modules-6.1.0-32-mips32r6el-di, squashfs-modules-6.1.0-32-mips32r6el-di, udf-modules-6.1.0-32-mips32r6el-di, fuse-modules-6.1.0-32-mips32r6el-di, f2fs-modules-6.1.0-32-mips32r6el-di, md-modules-6.1.0-32-mips32r6el-di, multipath-modules-6.1.0-32-mips32r6el-di, usb-modules-6.1.0-32-mips32r6el-di, usb-storage-modules-6.1.0-32-mips32r6el-di, fb-modules-6.1.0-32-mips32r6el-di, input-modules-6.1.0-32-mips32r6el-di, event-modules-6.1.0-32-mips32r6el-di, mouse-modules-6.1.0-32-mips32r6el-di, nic-usb-modules-6.1.0-32-mips32r6el-di, sata-modules-6.1.0-32-mips32r6el-di, crc-modules-6.1.0-32-mips32r6el-di, crypto-modules-6.1.0-32-mips32r6el-di, crypto-dm-modules-6.1.0-32-mips32r6el-di, ata-modules-6.1.0-32-mips32r6el-di, mmc-core-modules-6.1.0-32-mips32r6el-di, mmc-modules-6.1.0-32-mips32r6el-di, nbd-modules-6.1.0-32-mips32r6el-di, speakup-modules-6.1.0-32-mips32r6el-di, sound-modules-6.1.0-32-mips32r6el-di, linux-headers-6.1.0-32-mips32r6el, linux-image-6.1.0-32-mips32r6el, linux-image-mips32r6el, linux-headers-mips32r6el, linux-image-6.1.0-32-mips32r6el-dbg, linux-image-mips32r6el-dbg, kernel-image-6.1.0-32-powerpc-di, nic-modules-6.1.0-32-powerpc-di, nic-wireless-modules-6.1.0-32-powerpc-di, nic-shared-modules-6.1.0-32-powerpc-di, serial-modules-6.1.0-32-powerpc-di, usb-serial-modules-6.1.0-32-powerpc-di, ppp-modules-6.1.0-32-powerpc-di, pata-modules-6.1.0-32-powerpc-di, cdrom-core-modules-6.1.0-32-powerpc-di, firewire-core-modules-6.1.0-32-powerpc-di, scsi-core-modules-6.1.0-32-powerpc-di, scsi-modules-6.1.0-32-powerpc-di, scsi-nic-modules-6.1.0-32-powerpc-di, loop-modules-6.1.0-32-powerpc-di, btrfs-modules-6.1.0-32-powerpc-di, ext4-modules-6.1.0-32-powerpc-di, isofs-modules-6.1.0-32-powerpc-di, jfs-modules-6.1.0-32-powerpc-di, xfs-modules-6.1.0-32-powerpc-di, fat-modules-6.1.0-32-powerpc-di, hfs-modules-6.1.0-32-powerpc-di, affs-modules-6.1.0-32-powerpc-di, squashfs-modules-6.1.0-32-powerpc-di, udf-modules-6.1.0-32-powerpc-di, fuse-modules-6.1.0-32-powerpc-di, f2fs-modules-6.1.0-32-powerpc-di, md-modules-6.1.0-32-powerpc-di, multipath-modules-6.1.0-32-powerpc-di, usb-modules-6.1.0-32-powerpc-di, usb-storage-modules-6.1.0-32-powerpc-di, pcmcia-storage-modules-6.1.0-32-powerpc-di, fb-modules-6.1.0-32-powerpc-di, input-modules-6.1.0-32-powerpc-di, event-modules-6.1.0-32-powerpc-di, mouse-modules-6.1.0-32-powerpc-di, nic-pcmcia-modules-6.1.0-32-powerpc-di, pcmcia-modules-6.1.0-32-powerpc-di, nic-usb-modules-6.1.0-32-powerpc-di, sata-modules-6.1.0-32-powerpc-di, crc-modules-6.1.0-32-powerpc-di, crypto-modules-6.1.0-32-powerpc-di, crypto-dm-modules-6.1.0-32-powerpc-di, ata-modules-6.1.0-32-powerpc-di, mmc-core-modules-6.1.0-32-powerpc-di, nbd-modules-6.1.0-32-powerpc-di, uinput-modules-6.1.0-32-powerpc-di, kernel-image-6.1.0-32-powerpc64-di, nic-modules-6.1.0-32-powerpc64-di, nic-wireless-modules-6.1.0-32-powerpc64-di, nic-shared-modules-6.1.0-32-powerpc64-di, serial-modules-6.1.0-32-powerpc64-di, usb-serial-modules-6.1.0-32-powerpc64-di, ppp-modules-6.1.0-32-powerpc64-di, pata-modules-6.1.0-32-powerpc64-di, cdrom-core-modules-6.1.0-32-powerpc64-di, firewire-core-modules-6.1.0-32-powerpc64-di, scsi-core-modules-6.1.0-32-powerpc64-di, scsi-modules-6.1.0-32-powerpc64-di, scsi-nic-modules-6.1.0-32-powerpc64-di, loop-modules-6.1.0-32-powerpc64-di, btrfs-modules-6.1.0-32-powerpc64-di, ext4-modules-6.1.0-32-powerpc64-di, isofs-modules-6.1.0-32-powerpc64-di, jfs-modules-6.1.0-32-powerpc64-di, xfs-modules-6.1.0-32-powerpc64-di, fat-modules-6.1.0-32-powerpc64-di, hfs-modules-6.1.0-32-powerpc64-di, affs-modules-6.1.0-32-powerpc64-di, squashfs-modules-6.1.0-32-powerpc64-di, udf-modules-6.1.0-32-powerpc64-di, fuse-modules-6.1.0-32-powerpc64-di, f2fs-modules-6.1.0-32-powerpc64-di, md-modules-6.1.0-32-powerpc64-di, multipath-modules-6.1.0-32-powerpc64-di, usb-modules-6.1.0-32-powerpc64-di, usb-storage-modules-6.1.0-32-powerpc64-di, pcmcia-storage-modules-6.1.0-32-powerpc64-di, fb-modules-6.1.0-32-powerpc64-di, input-modules-6.1.0-32-powerpc64-di, event-modules-6.1.0-32-powerpc64-di, mouse-modules-6.1.0-32-powerpc64-di, nic-pcmcia-modules-6.1.0-32-powerpc64-di, pcmcia-modules-6.1.0-32-powerpc64-di, nic-usb-modules-6.1.0-32-powerpc64-di, sata-modules-6.1.0-32-powerpc64-di, i2c-modules-6.1.0-32-powerpc64-di, crc-modules-6.1.0-32-powerpc64-di, crypto-modules-6.1.0-32-powerpc64-di, crypto-dm-modules-6.1.0-32-powerpc64-di, ata-modules-6.1.0-32-powerpc64-di, mmc-core-modules-6.1.0-32-powerpc64-di, nbd-modules-6.1.0-32-powerpc64-di, uinput-modules-6.1.0-32-powerpc64-di, mtd-core-modules-6.1.0-32-powerpc64-di, hypervisor-modules-6.1.0-32-powerpc64-di, fancontrol-modules-6.1.0-32-powerpc64-di, linux-headers-6.1.0-32-powerpc, linux-image-6.1.0-32-powerpc, linux-image-powerpc, linux-headers-powerpc, linux-image-6.1.0-32-powerpc-dbg, linux-image-powerpc-dbg, linux-headers-6.1.0-32-powerpc-smp, linux-image-6.1.0-32-powerpc-smp, linux-image-powerpc-smp, linux-headers-powerpc-smp, linux-image-6.1.0-32-powerpc-smp-dbg, linux-image-powerpc-smp-dbg, linux-headers-6.1.0-32-powerpc64, linux-image-6.1.0-32-powerpc64, linux-image-powerpc64, linux-headers-powerpc64, linux-image-6.1.0-32-powerpc64-dbg, linux-image-powerpc64-dbg, kernel-image-6.1.0-32-powerpc64le-di, nic-modules-6.1.0-32-powerpc64le-di, nic-wireless-modules-6.1.0-32-powerpc64le-di, nic-shared-modules-6.1.0-32-powerpc64le-di, serial-modules-6.1.0-32-powerpc64le-di, usb-serial-modules-6.1.0-32-powerpc64le-di, ppp-modules-6.1.0-32-powerpc64le-di, cdrom-core-modules-6.1.0-32-powerpc64le-di, firewire-core-modules-6.1.0-32-powerpc64le-di, scsi-core-modules-6.1.0-32-powerpc64le-di, scsi-modules-6.1.0-32-powerpc64le-di, scsi-nic-modules-6.1.0-32-powerpc64le-di, loop-modules-6.1.0-32-powerpc64le-di, btrfs-modules-6.1.0-32-powerpc64le-di, ext4-modules-6.1.0-32-powerpc64le-di, isofs-modules-6.1.0-32-powerpc64le-di, jfs-modules-6.1.0-32-powerpc64le-di, xfs-modules-6.1.0-32-powerpc64le-di, fat-modules-6.1.0-32-powerpc64le-di, squashfs-modules-6.1.0-32-powerpc64le-di, udf-modules-6.1.0-32-powerpc64le-di, fuse-modules-6.1.0-32-powerpc64le-di, f2fs-modules-6.1.0-32-powerpc64le-di, md-modules-6.1.0-32-powerpc64le-di, multipath-modules-6.1.0-32-powerpc64le-di, usb-modules-6.1.0-32-powerpc64le-di, usb-storage-modules-6.1.0-32-powerpc64le-di, fb-modules-6.1.0-32-powerpc64le-di, input-modules-6.1.0-32-powerpc64le-di, event-modules-6.1.0-32-powerpc64le-di, mouse-modules-6.1.0-32-powerpc64le-di, nic-usb-modules-6.1.0-32-powerpc64le-di, sata-modules-6.1.0-32-powerpc64le-di, i2c-modules-6.1.0-32-powerpc64le-di, crc-modules-6.1.0-32-powerpc64le-di, crypto-modules-6.1.0-32-powerpc64le-di, crypto-dm-modules-6.1.0-32-powerpc64le-di, ata-modules-6.1.0-32-powerpc64le-di, nbd-modules-6.1.0-32-powerpc64le-di, uinput-modules-6.1.0-32-powerpc64le-di, mtd-core-modules-6.1.0-32-powerpc64le-di, hypervisor-modules-6.1.0-32-powerpc64le-di, fancontrol-modules-6.1.0-32-powerpc64le-di, linux-headers-6.1.0-32-powerpc64le, linux-image-6.1.0-32-powerpc64le, linux-image-powerpc64le, linux-headers-powerpc64le, linux-image-6.1.0-32-powerpc64le-dbg, linux-image-powerpc64le-dbg, kernel-image-6.1.0-32-riscv64-di, nic-modules-6.1.0-32-riscv64-di, nic-wireless-modules-6.1.0-32-riscv64-di, nic-shared-modules-6.1.0-32-riscv64-di, usb-serial-modules-6.1.0-32-riscv64-di, ppp-modules-6.1.0-32-riscv64-di, pata-modules-6.1.0-32-riscv64-di, cdrom-core-modules-6.1.0-32-riscv64-di, scsi-core-modules-6.1.0-32-riscv64-di, scsi-modules-6.1.0-32-riscv64-di, scsi-nic-modules-6.1.0-32-riscv64-di, loop-modules-6.1.0-32-riscv64-di, btrfs-modules-6.1.0-32-riscv64-di, ext4-modules-6.1.0-32-riscv64-di, isofs-modules-6.1.0-32-riscv64-di, jfs-modules-6.1.0-32-riscv64-di, fat-modules-6.1.0-32-riscv64-di, squashfs-modules-6.1.0-32-riscv64-di, udf-modules-6.1.0-32-riscv64-di, fuse-modules-6.1.0-32-riscv64-di, f2fs-modules-6.1.0-32-riscv64-di, md-modules-6.1.0-32-riscv64-di, multipath-modules-6.1.0-32-riscv64-di, usb-modules-6.1.0-32-riscv64-di, usb-storage-modules-6.1.0-32-riscv64-di, fb-modules-6.1.0-32-riscv64-di, input-modules-6.1.0-32-riscv64-di, event-modules-6.1.0-32-riscv64-di, nic-usb-modules-6.1.0-32-riscv64-di, sata-modules-6.1.0-32-riscv64-di, i2c-modules-6.1.0-32-riscv64-di, crc-modules-6.1.0-32-riscv64-di, crypto-modules-6.1.0-32-riscv64-di, crypto-dm-modules-6.1.0-32-riscv64-di, ata-modules-6.1.0-32-riscv64-di, mmc-core-modules-6.1.0-32-riscv64-di, mmc-modules-6.1.0-32-riscv64-di, nbd-modules-6.1.0-32-riscv64-di, mtd-modules-6.1.0-32-riscv64-di, mtd-core-modules-6.1.0-32-riscv64-di, linux-headers-6.1.0-32-riscv64, linux-image-6.1.0-32-riscv64, linux-image-riscv64, linux-headers-riscv64, linux-image-6.1.0-32-riscv64-dbg, linux-image-riscv64-dbg, kernel-image-6.1.0-32-s390x-di, nic-modules-6.1.0-32-s390x-di, cdrom-core-modules-6.1.0-32-s390x-di, scsi-core-modules-6.1.0-32-s390x-di, scsi-modules-6.1.0-32-s390x-di, loop-modules-6.1.0-32-s390x-di, btrfs-modules-6.1.0-32-s390x-di, ext4-modules-6.1.0-32-s390x-di, isofs-modules-6.1.0-32-s390x-di, xfs-modules-6.1.0-32-s390x-di, fat-modules-6.1.0-32-s390x-di, udf-modules-6.1.0-32-s390x-di, fuse-modules-6.1.0-32-s390x-di, f2fs-modules-6.1.0-32-s390x-di, md-modules-6.1.0-32-s390x-di, multipath-modules-6.1.0-32-s390x-di, crc-modules-6.1.0-32-s390x-di, crypto-modules-6.1.0-32-s390x-di, crypto-dm-modules-6.1.0-32-s390x-di, nbd-modules-6.1.0-32-s390x-di, mtd-core-modules-6.1.0-32-s390x-di, dasd-modules-6.1.0-32-s390x-di, dasd-extra-modules-6.1.0-32-s390x-di, linux-headers-6.1.0-32-s390x, linux-image-6.1.0-32-s390x, linux-image-s390x, linux-headers-s390x, linux-image-6.1.0-32-s390x-dbg, linux-image-s390x-dbg, kernel-image-6.1.0-32-sh7751r-di, nic-modules-6.1.0-32-sh7751r-di, nic-shared-modules-6.1.0-32-sh7751r-di, usb-serial-modules-6.1.0-32-sh7751r-di, ppp-modules-6.1.0-32-sh7751r-di, pata-modules-6.1.0-32-sh7751r-di, cdrom-core-modules-6.1.0-32-sh7751r-di, firewire-core-modules-6.1.0-32-sh7751r-di, loop-modules-6.1.0-32-sh7751r-di, btrfs-modules-6.1.0-32-sh7751r-di, ext4-modules-6.1.0-32-sh7751r-di, isofs-modules-6.1.0-32-sh7751r-di, jfs-modules-6.1.0-32-sh7751r-di, xfs-modules-6.1.0-32-sh7751r-di, fat-modules-6.1.0-32-sh7751r-di, minix-modules-6.1.0-32-sh7751r-di, squashfs-modules-6.1.0-32-sh7751r-di, udf-modules-6.1.0-32-sh7751r-di, fuse-modules-6.1.0-32-sh7751r-di, f2fs-modules-6.1.0-32-sh7751r-di, md-modules-6.1.0-32-sh7751r-di, multipath-modules-6.1.0-32-sh7751r-di, usb-storage-modules-6.1.0-32-sh7751r-di, nic-usb-modules-6.1.0-32-sh7751r-di, sata-modules-6.1.0-32-sh7751r-di, i2c-modules-6.1.0-32-sh7751r-di, crc-modules-6.1.0-32-sh7751r-di, crypto-modules-6.1.0-32-sh7751r-di, crypto-dm-modules-6.1.0-32-sh7751r-di, nbd-modules-6.1.0-32-sh7751r-di, speakup-modules-6.1.0-32-sh7751r-di, sound-modules-6.1.0-32-sh7751r-di, kernel-image-6.1.0-32-sh7785lcr-di, nic-modules-6.1.0-32-sh7785lcr-di, nic-shared-modules-6.1.0-32-sh7785lcr-di, usb-serial-modules-6.1.0-32-sh7785lcr-di, ppp-modules-6.1.0-32-sh7785lcr-di, pata-modules-6.1.0-32-sh7785lcr-di, cdrom-core-modules-6.1.0-32-sh7785lcr-di, firewire-core-modules-6.1.0-32-sh7785lcr-di, loop-modules-6.1.0-32-sh7785lcr-di, btrfs-modules-6.1.0-32-sh7785lcr-di, ext4-modules-6.1.0-32-sh7785lcr-di, isofs-modules-6.1.0-32-sh7785lcr-di, jfs-modules-6.1.0-32-sh7785lcr-di, xfs-modules-6.1.0-32-sh7785lcr-di, fat-modules-6.1.0-32-sh7785lcr-di, minix-modules-6.1.0-32-sh7785lcr-di, squashfs-modules-6.1.0-32-sh7785lcr-di, udf-modules-6.1.0-32-sh7785lcr-di, fuse-modules-6.1.0-32-sh7785lcr-di, f2fs-modules-6.1.0-32-sh7785lcr-di, md-modules-6.1.0-32-sh7785lcr-di, multipath-modules-6.1.0-32-sh7785lcr-di, nic-usb-modules-6.1.0-32-sh7785lcr-di, sata-modules-6.1.0-32-sh7785lcr-di, crc-modules-6.1.0-32-sh7785lcr-di, crypto-modules-6.1.0-32-sh7785lcr-di, crypto-dm-modules-6.1.0-32-sh7785lcr-di, nbd-modules-6.1.0-32-sh7785lcr-di, speakup-modules-6.1.0-32-sh7785lcr-di, sound-modules-6.1.0-32-sh7785lcr-di, linux-headers-6.1.0-32-sh7751r, linux-image-6.1.0-32-sh7751r, linux-image-sh7751r, linux-headers-sh7751r, linux-image-6.1.0-32-sh7751r-dbg, linux-image-sh7751r-dbg, linux-headers-6.1.0-32-sh7785lcr, linux-image-6.1.0-32-sh7785lcr, linux-image-sh7785lcr, linux-headers-sh7785lcr, linux-image-6.1.0-32-sh7785lcr-dbg, linux-image-sh7785lcr-dbg, kernel-image-6.1.0-32-sparc64-di, nic-modules-6.1.0-32-sparc64-di, nic-shared-modules-6.1.0-32-sparc64-di, usb-serial-modules-6.1.0-32-sparc64-di, ppp-modules-6.1.0-32-sparc64-di, pata-modules-6.1.0-32-sparc64-di, cdrom-core-modules-6.1.0-32-sparc64-di, scsi-core-modules-6.1.0-32-sparc64-di, scsi-modules-6.1.0-32-sparc64-di, btrfs-modules-6.1.0-32-sparc64-di, ext4-modules-6.1.0-32-sparc64-di, isofs-modules-6.1.0-32-sparc64-di, jfs-modules-6.1.0-32-sparc64-di, ufs-modules-6.1.0-32-sparc64-di, xfs-modules-6.1.0-32-sparc64-di, fat-modules-6.1.0-32-sparc64-di, squashfs-modules-6.1.0-32-sparc64-di, udf-modules-6.1.0-32-sparc64-di, fuse-modules-6.1.0-32-sparc64-di, f2fs-modules-6.1.0-32-sparc64-di, md-modules-6.1.0-32-sparc64-di, multipath-modules-6.1.0-32-sparc64-di, usb-modules-6.1.0-32-sparc64-di, usb-storage-modules-6.1.0-32-sparc64-di, fb-modules-6.1.0-32-sparc64-di, input-modules-6.1.0-32-sparc64-di, nic-usb-modules-6.1.0-32-sparc64-di, sata-modules-6.1.0-32-sparc64-di, i2c-modules-6.1.0-32-sparc64-di, crc-modules-6.1.0-32-sparc64-di, crypto-modules-6.1.0-32-sparc64-di, crypto-dm-modules-6.1.0-32-sparc64-di, ata-modules-6.1.0-32-sparc64-di, nbd-modules-6.1.0-32-sparc64-di, linux-headers-6.1.0-32-sparc64, linux-image-6.1.0-32-sparc64, linux-image-sparc64, linux-headers-sparc64, linux-image-6.1.0-32-sparc64-dbg, linux-image-sparc64-dbg, linux-headers-6.1.0-32-sparc64-smp, linux-image-6.1.0-32-sparc64-smp, linux-image-sparc64-smp, linux-headers-sparc64-smp, linux-image-6.1.0-32-sparc64-smp-dbg, linux-image-sparc64-smp-dbg, linux-compiler-gcc-12-arm, linux-compiler-gcc-12-s390, linux-compiler-gcc-12-x86, linux-image-parisc64-smp, linux-image-parisc-smp """ def test_binutils(binutils): m = mock_open(read_data=binutils) with patch("builtins.open", m): result = parse_sources_file("dummy") assert result == { "binutils": { "binutils-for-host", "binutils-for-build", "binutils-ia64-linux-gnu-dbg", "binutils-m68k-linux-gnu", "binutils-mips64el-linux-gnuabin32-dbg", "binutils-mipsisa64r6-linux-gnuabin32", "binutils-mipsisa64r6el-linux-gnuabi64-dbg", } } def test_linux(linux): m = mock_open(read_data=linux) with patch("builtins.open", m): result = parse_sources_file("dummy") assert "linux-headers-6.1.0-32-amd64" in result["linux"] assert "linux-headers-6.1.0-32-cloud-amd64" in result["linux"] ================================================ FILE: package_managers/debian/structs.py ================================================ from dataclasses import dataclass, field # structures @dataclass class Maintainer: name: str = field(default_factory=str) email: str = field(default_factory=str) @dataclass class File: hash: str = field(default_factory=str) size: int = field(default_factory=int) filename: str = field(default_factory=str) @dataclass class Depends: package: str = field(default_factory=str) semver: str = field(default_factory=str) @dataclass class Tag: name: str = field(default_factory=str) value: str = field(default_factory=str) # this represents whatever we might get from Debian...either packages or sources # it's immaterial what it is, we just need to know how to parse it @dataclass class DebianData: # Package fields package: str = field(default_factory=str) source: str = field(default_factory=str) version: str = field(default_factory=str) installed_size: int = field(default_factory=int) maintainer: Maintainer = field(default_factory=Maintainer) architecture: str = field(default_factory=str) description: str = field(default_factory=str) homepage: str = field(default_factory=str) description_md5: str = field(default_factory=str) tag: str = field(default_factory=str) section: str = field(default_factory=str) priority: str = field(default_factory=str) filename: str = field(default_factory=str) size: int = field(default_factory=int) md5sum: str = field(default_factory=str) sha256: str = field(default_factory=str) # Dependency fields replaces: list[Depends] = field(default_factory=list) provides: list[Depends] = field(default_factory=list) depends: list[Depends] = field(default_factory=list) pre_depends: list[Depends] = field(default_factory=list) recommends: list[Depends] = field(default_factory=list) suggests: list[Depends] = field(default_factory=list) breaks: list[Depends] = field(default_factory=list) conflicts: list[Depends] = field(default_factory=list) build_depends: list[Depends] = field(default_factory=list) # source only # Source fields binary: list[str] = field(default_factory=list) uploaders: list[Maintainer] = field(default_factory=list) standards_version: str = field(default_factory=str) format: str = field(default_factory=str) files: list[File] = field(default_factory=list) vcs_browser: str = field(default_factory=str) vcs_git: str = field(default_factory=str) checksums_sha256: list[File] = field(default_factory=list) package_list: list[str] = field(default_factory=list) directory: str = field(default_factory=str) testsuite: str = field(default_factory=str) testsuite_triggers: str = field(default_factory=str) ================================================ FILE: package_managers/homebrew/Dockerfile ================================================ FROM ghcr.io/astral-sh/uv:python3.11-bookworm-slim # Copy everything from the root directory (build context) COPY . . # Install core requirements using uv WORKDIR /core RUN uv pip install --system -r requirements.txt WORKDIR / # Run the main application CMD ["python", "/package_managers/homebrew/main.py"] ================================================ FILE: package_managers/homebrew/README.md ================================================ # Homebrew The Homebrew service uses Homebrew's JSON API Documentation to build the Homebrew data model, using a diff approach to build it out. ## Getting Started To just run the Homebrew service, you can use the following commands: ```bash docker compose build homebrew docker compose run homebrew ``` ## Pipeline Overview The Homebrew pipeline consists of one main script: `main.py`. It fetches two things from CHAI: 1. Homebrew's Graph, based on packages and legacy dependencies 2. All the URLs in CHAI for Homebrew's clean URLs Based on that, it does a diff across each object for each package, and makes changes to CHAI accordingly. ## Notes - Homebrew's dependencies are not just restricted to the `{build,test,...}_dependencies` fields listed in the JSON APIs...it also uses some system level packages denoted in `uses_from_macos`, and `variations` (for linux). The pipeline currently does NOT consider those dependencies - This job ignores the versions table entirely, and instead populates the legacy dependencies table, which maintains a package to package relationship - Versioned formulae (like `python`, `postgresql`) are ones where the Homebrew package specifies a version. The pipeline considers these packages individual packages, and so creates new records in the `packages` table. - The data source for Homebrew does not retrieve the analytics information that is available via the individual JSON API endpoints for each package. ================================================ FILE: package_managers/homebrew/db.py ================================================ from core.config import Config from core.db import DB, CurrentURLs from core.structs import CurrentGraph class HomebrewDB(DB): def __init__(self, logger_name: str, config: Config): super().__init__(logger_name) self.config = config self.set_current_graph() def set_current_graph(self) -> None: """Get the Homebrew packages and dependencies""" self.graph: CurrentGraph = self.current_graph(self.config.pm_config.pm_id) self.logger.log(f"Loaded {len(self.graph.package_map)} Homebrew packages") def set_current_urls(self, urls: set[str]) -> None: """Wrapper for setting current urls""" self.urls: CurrentURLs = self.current_urls(urls) self.logger.log(f"Found {len(self.urls.url_map)} Homebrew URLs") ================================================ FILE: package_managers/homebrew/diff.py ================================================ from datetime import datetime from uuid import UUID, uuid4 from core.config import Config from core.logger import Logger from core.models import URL, LegacyDependency, Package, PackageURL from core.structs import Cache, URLKey from package_managers.homebrew.structs import Actual class Diff: def __init__(self, config: Config, caches: Cache): self.config = config self.now = datetime.now() self.caches = caches self.logger = Logger("homebrew_diff") def diff_pkg(self, pkg: Actual) -> tuple[UUID, Package | None, dict | None]: """ Checks if the given pkg is in the package_cache. Returns: - pkg_id: the id of the package - package: If new, returns a new package object. If existing, returns None - changes: a dictionary of changes """ self.logger.debug(f"Diffing package: {pkg.formula}") pkg_id: UUID if pkg.formula not in self.caches.package_map: # new package p = Package( id=uuid4(), derived_id=f"homebrew/{pkg.formula}", name=pkg.formula, package_manager_id=self.config.pm_config.pm_id, import_id=pkg.formula, readme=pkg.description, created_at=self.now, updated_at=self.now, ) pkg_id: UUID = p.id # no update payload, so that's empty return pkg_id, p, {} else: p = self.caches.package_map[pkg.formula] pkg_id = p.id # check for changes # right now, that's just the readme if p.readme != pkg.description: self.logger.debug(f"Description changed for {pkg.formula}") return ( pkg_id, None, {"id": p.id, "readme": pkg.description, "updated_at": self.now}, ) else: # existing package, no change return pkg_id, None, None def diff_url( self, pkg: Actual, new_urls: dict[tuple[str, UUID], URL] ) -> dict[UUID, UUID]: """Given a package's URLs, returns the resolved URL or this specific formula""" resolved_urls: dict[UUID, UUID] = {} # we need to check if (a) URLs are in our cache, or (b) if we've already handled # them before. if so, we should use that urls = ( (pkg.homepage, self.config.url_types.homepage), (pkg.source, self.config.url_types.source), (pkg.repository, self.config.url_types.repository), ) for url, url_type in urls: # guard: no URL if not url: continue url_key = URLKey(url, url_type) resolved_url_id: UUID if url_key in new_urls: resolved_url_id = new_urls[url_key].id elif url_key in self.caches.url_map: resolved_url_id = self.caches.url_map[url_key].id else: self.logger.debug(f"URL {url} for {url_type} is entirely new") new_url = URL( id=uuid4(), url=url, url_type_id=url_type, created_at=self.now, updated_at=self.now, ) resolved_url_id = new_url.id # NOTE: THIS IS SUPER IMPORTANT # we're not just borrowing this value, we're mutating it as well new_urls[url_key] = new_url resolved_urls[url_type] = resolved_url_id return resolved_urls def diff_pkg_url( self, pkg_id: UUID, resolved_urls: dict[UUID, UUID] ) -> tuple[list[PackageURL], list[dict]]: """Takes in a package_id and resolved URLs from diff_url, and generates new PackageURL objects as well as a list of changes to existing ones Inputs: - pkg_id: the id of the package - resolved_urls: a map of url types to final URL ID for this pkg Outputs: - new_package_urls: a list of new PackageURL objects - updated_package_urls: a list of changes to existing PackageURL objects TODO: - We're updating every single package_url entity, which takes time. We should check if the latest URL has changed, and if so, only update that one. """ new_links: list[PackageURL] = [] updates: list[dict] = [] # what are the existing links? existing: set[UUID] = { pu.url_id for pu in self.caches.package_urls.get(pkg_id, set()) } # for the correct URL type / URL for this package: for _url_type, url_id in resolved_urls.items(): if url_id not in existing: # new link! new_links.append( PackageURL( id=uuid4(), package_id=pkg_id, url_id=url_id, created_at=self.now, updated_at=self.now, ) ) else: # TODO: this should only happen for `latest` URLs # here is an existing link between this URL and this package # let's find it existing_pu = next( pu for pu in self.caches.package_urls[pkg_id] if pu.url_id == url_id ) existing_pu.updated_at = self.now updates.append({"id": existing_pu.id, "updated_at": self.now}) return new_links, updates def diff_deps( self, pkg: Actual ) -> tuple[list[LegacyDependency], list[LegacyDependency]]: """ Takes in a Homebrew formula and figures out what dependencies have changed. Also uses the LegacyDependency table, because that is package to package. Warnings: - Updates show up as removed + new - This is Homebrew specific, since LegacyDependency mandates uniqueness from package_id -> dependency_id, but Homebrew allows duplicate dependencies across multiple dependency types. So we've got a process helper that handles this. Returns: - new_deps: a list of new dependencies - removed_deps: a list of removed dependencies """ new_deps: list[LegacyDependency] = [] removed_deps: list[LegacyDependency] = [] # serialize the actual dependencies into a set of tuples actual: set[tuple[UUID, UUID]] = set() processed: set[str] = set() def process(dep_names: list[str] | None, dep_type: UUID) -> None: """Helper to process dependencies of a given type""" # guard: no dependencies if not dep_names: return for name in dep_names: # guard: no dependency name / empty name if not name: continue # means one dependency is build and test, for example # see https://formulae.brew.sh/api/formula/abook.json for example # gettext is both a build and runtime dependency if name in processed: continue dependency = self.caches.package_map.get(name) # guard: no dependency if not dependency: # TODO: handle this case, though it fixes itself on the next run self.logger.warn(f"{name}, dep of {pkg.formula} is new") continue actual.add((dependency.id, dep_type)) processed.add(name) # alright, let's do it if hasattr(pkg, "dependencies"): process(pkg.dependencies, self.config.dependency_types.runtime) if hasattr(pkg, "build_dependencies"): process(pkg.build_dependencies, self.config.dependency_types.build) if hasattr(pkg, "test_dependencies"): process(pkg.test_dependencies, self.config.dependency_types.test) if hasattr(pkg, "recommended_dependencies"): process( pkg.recommended_dependencies, self.config.dependency_types.recommended ) if hasattr(pkg, "optional_dependencies"): process(pkg.optional_dependencies, self.config.dependency_types.optional) # get the package ID for what we are working with package = self.caches.package_map.get(pkg.formula) if not package: # TODO: handle this case, though it fixes itself on the next run self.logger.warn(f"New package {pkg.formula}, will grab its deps next time") return [], [] pkg_id: UUID = package.id # now, we need to figure out what's new / removed # we need: # 1. something in that same structure as `actual`, to track what's in CHAI existing: set[tuple[UUID, UUID]] = set() # 2. set of LegacyDependency objects legacy_links: set[LegacyDependency] = self.caches.dependencies.get( pkg_id, set() ) # 3. easy look up to get to legacy_links to go from 1 to 2 existing_legacy_map: dict[tuple[UUID, UUID], LegacyDependency] = {} for legacy in legacy_links: key = (legacy.dependency_id, legacy.dependency_type_id) existing_legacy_map[key] = legacy existing.add(key) # calculate our diffs added_tuples: set[tuple[UUID, UUID]] = actual - existing removed_tuples: set[tuple[UUID, UUID]] = existing - actual # convert these to LegacyDependency objects for dep_id, type_id in added_tuples: new_dep = LegacyDependency( package_id=pkg_id, dependency_id=dep_id, dependency_type_id=type_id, created_at=self.now, updated_at=self.now, ) new_deps.append(new_dep) for dep_id, type_id in removed_tuples: removed_dep = existing_legacy_map.get((dep_id, type_id)) if removed_dep: removed_deps.append(removed_dep) return new_deps, removed_deps ================================================ FILE: package_managers/homebrew/formulae.py ================================================ import re from typing import Any from permalint import normalize_url from requests import get from core.config import Config from core.fetcher import Data, Fetcher from core.logger import Logger from package_managers.homebrew.structs import Actual logger = Logger("homebrew_formulae") class HomebrewFetcher(Fetcher): def __init__(self, config: Config): super().__init__( name="homebrew", source=config.pm_config.source, no_cache=config.exec_config.no_cache, test=config.exec_config.test, ) def fetch(self) -> list[Actual]: """Get the current state of Homebrew""" response = get(self.source) try: response.raise_for_status() except Exception as e: logger.error(f"Error fetching Homebrew formulae: {e}") raise e # make json data: list[dict[str, Any]] = response.json() # prep results results: list[Actual] = [] for formula in data: # check if deprecated # TODO: should we delete deprecated = formula.get("deprecated", False) if deprecated: continue # create temp vars for stuff we transform...basically URL homepage = normalize_url(formula["homepage"]) # try urls.head.url, because that generally points to GitHub / git # use urls.stable.url as a backstop source = normalize_url( formula["urls"].get("head", formula["urls"]["stable"]).get("url", "") ) # collect github / gitlab repos if re.search(r"^github.com", source) or re.search(r"^gitlab.com", source): repository = source else: repository = None # create the actual actual = Actual( formula=formula["name"], description=formula["desc"], license=formula["license"], homepage=homepage, source=source, repository=repository, build_dependencies=formula["build_dependencies"], dependencies=formula["dependencies"], test_dependencies=formula["test_dependencies"], recommended_dependencies=formula["recommended_dependencies"], optional_dependencies=formula["optional_dependencies"], # TODO: anything else? ) results.append(actual) if self.no_cache: logger.log("No cache, so not saving to file") else: write = Data(".", "homebrew_formulae.json", data) self.write([write]) return results ================================================ FILE: package_managers/homebrew/main.py ================================================ #! /usr/bin/env pkgx +python@3.11 uv run from datetime import datetime from uuid import UUID from core.config import Config, PackageManager from core.logger import Logger from core.models import URL, LegacyDependency, Package, PackageURL from core.structs import Cache, URLKey from package_managers.homebrew.db import HomebrewDB from package_managers.homebrew.diff import Diff from package_managers.homebrew.formulae import HomebrewFetcher def main(config: Config, db: HomebrewDB) -> None: """A diff-based attempt at loading into CHAI""" logger = Logger("homebrew_main") fetcher = HomebrewFetcher(config) brew = fetcher.fetch() # get the URLs & set that brew_urls = {b.source for b in brew} | {b.homepage for b in brew} db.set_current_urls(brew_urls) logger.log("Set current URLs") # get the caches here cache = Cache( db.graph.package_map, db.urls.url_map, db.urls.package_urls, db.graph.dependencies, ) # total set of updates we'll make are: new_packages: list[Package] = [] new_urls: dict[URLKey, URL] = {} # we'll convert this later new_package_urls: list[PackageURL] = [] updated_packages: list[dict[str, UUID | str | datetime]] = [] updated_package_urls: list[dict[str, UUID | datetime]] = [] new_deps: list[LegacyDependency] = [] removed_deps: list[LegacyDependency] = [] diff = Diff(config, cache) for i, pkg in enumerate(brew): pkg_id, pkg_obj, update_payload = diff.diff_pkg(pkg) if pkg_obj: logger.debug(f"New package: {pkg_obj.name}") new_packages.append(pkg_obj) if update_payload: logger.debug(f"Updated package: {update_payload['id']}") updated_packages.append(update_payload) # NOTE: resolved urls is a map of url types to final URL ID for this pkg # also, &new_urls gets passed in AND mutated resolved_urls = diff.diff_url(pkg, new_urls) # now, new package urls new_links, updated_links = diff.diff_pkg_url(pkg_id, resolved_urls) if new_links: logger.debug(f"New package URLs: {len(new_links)}") new_package_urls.extend(new_links) if updated_links: logger.debug(f"Updated package URLs: {len(updated_links)}") updated_package_urls.extend(updated_links) # finally, dependencies new_dependencies, removed_dependencies = diff.diff_deps(pkg) if new_dependencies: logger.debug(f"New dependencies: {len(new_dependencies)}") new_deps.extend(new_dependencies) if removed_dependencies: logger.debug(f"Removed dependencies: {len(removed_dependencies)}") removed_deps.extend(removed_dependencies) if config.exec_config.test and i > 100: break # final cleanup is to replace the new_urls map with a list final_new_urls = list(new_urls.values()) # send to loader db.ingest( new_packages, final_new_urls, new_package_urls, new_deps, removed_deps, updated_packages, updated_package_urls, ) if __name__ == "__main__": config = Config(PackageManager.HOMEBREW) db = HomebrewDB("homebrew_db_main", config) main(config, db) ================================================ FILE: package_managers/homebrew/structs.py ================================================ from dataclasses import dataclass @dataclass class Actual: formula: str description: str license: str homepage: str source: str repository: str | None build_dependencies: list[str] | None dependencies: list[str] | None test_dependencies: list[str] | None recommended_dependencies: list[str] | None optional_dependencies: list[str] | None ================================================ FILE: package_managers/pkgx/Dockerfile ================================================ FROM ghcr.io/astral-sh/uv:python3.11-bookworm-slim # Copy everything COPY . . # Install core requirements using uv WORKDIR /core RUN uv pip install --system -r requirements.txt WORKDIR / # Run the main application CMD ["python", "/package_managers/pkgx/main.py"] ================================================ FILE: package_managers/pkgx/db.py ================================================ #!/usr/bin/env pkgx uv run from core.config import Config from core.db import DB, CurrentURLs from core.structs import CurrentGraph class PkgxDB(DB): def __init__(self, logger_name: str, config: Config): super().__init__(logger_name) self.config = config def set_current_graph(self) -> None: """Get the pkgx packages and dependencies""" self.graph: CurrentGraph = self.current_graph(self.config.pm_config.pm_id) self.logger.log(f"Loaded {len(self.graph.package_map)} pkgx packages") def set_current_urls(self) -> None: """Getting all the URLs and Package URLs from the database""" self.urls: CurrentURLs = self.all_current_urls() self.logger.log(f"Loaded {len(self.urls.url_map)} URLs") ================================================ FILE: package_managers/pkgx/diff.py ================================================ #!/usr/bin/env pkgx uv run from datetime import datetime from uuid import UUID, uuid4 from core.config import Config from core.logger import Logger from core.models import URL, LegacyDependency, Package, PackageURL from core.structs import Cache, URLKey from package_managers.pkgx.db import DB from package_managers.pkgx.parser import DependencyBlock, PkgxPackage from package_managers.pkgx.url import generate_chai_urls class PkgxDiff: def __init__(self, config: Config, caches: Cache, db: DB, logger: Logger): self.config = config self.now = datetime.now() self.caches = caches self.db = db self.logger = logger def diff_pkg( self, import_id: str, pkg: PkgxPackage ) -> tuple[UUID, Package | None, dict | None]: """ Checks if the given pkg is in the package_cache. Returns: - pkg_id: the id of the package - package: If new, returns a new package object. If existing, returns None - changes: a dictionary of changes """ self.logger.debug(f"Diffing package: {import_id}") if import_id not in self.caches.package_map: # new package p = Package( id=uuid4(), derived_id=f"pkgx/{import_id}", name=import_id, package_manager_id=self.config.pm_config.pm_id, import_id=import_id, readme="", # NOTE: pkgx doesn't have a description field created_at=self.now, updated_at=self.now, ) pkg_id: UUID = p.id return pkg_id, p, {} else: # the package exists, but since pkgx doesn't maintain a readme or # description field, we can just return pkg_id = self.caches.package_map[import_id].id return pkg_id, None, None def diff_url( self, import_id: str, pkg: PkgxPackage, new_urls: dict[URLKey, URL] ) -> dict[UUID, UUID]: """Given a package's URLs, returns the resolved URL for this specific package""" resolved_urls: dict[UUID, UUID] = {} # Generate the URLs for this package urls = generate_chai_urls( self.config, self.db, import_id, pkg.distributable[0].url, self.logger ) # Process each URL for url_key in urls: # guard: generate_chai_urls could be None for a url type if url_key is None: continue resolved_url_id: UUID if url_key in new_urls: resolved_url_id = new_urls[url_key].id elif url_key in self.caches.url_map: resolved_url_id = self.caches.url_map[url_key].id else: self.logger.debug( f"URL {url_key.url} as {url_key.url_type_id} is entirely new" ) new_url = URL( id=uuid4(), url=url_key.url, url_type_id=url_key.url_type_id, created_at=self.now, updated_at=self.now, ) resolved_url_id = new_url.id new_urls[url_key] = new_url resolved_urls[url_key.url_type_id] = resolved_url_id return resolved_urls def diff_pkg_url( self, pkg_id: UUID, resolved_urls: dict[UUID, UUID] ) -> tuple[list[PackageURL], list[dict]]: """Takes in a package_id and resolved URLs from diff_url, and generates new PackageURL objects as well as a list of changes to existing ones""" new_links: list[PackageURL] = [] updates: list[dict] = [] # what are the existing links? existing: set[UUID] = { pu.url_id for pu in self.caches.package_urls.get(pkg_id, set()) } # for each URL type/URL for this package: for _url_type, url_id in resolved_urls.items(): if url_id not in existing: # new link! new_links.append( PackageURL( id=uuid4(), package_id=pkg_id, url_id=url_id, created_at=self.now, updated_at=self.now, ) ) else: # existing link - update timestamp existing_pu = next( pu for pu in self.caches.package_urls[pkg_id] if pu.url_id == url_id ) existing_pu.updated_at = self.now updates.append({"id": existing_pu.id, "updated_at": self.now}) return new_links, updates def diff_deps( self, import_id: str, pkg: PkgxPackage ) -> tuple[list[LegacyDependency], list[LegacyDependency]]: """ Takes in a pkgx package and figures out what dependencies have changed. The process is: 1. Build a view of what the package's dependencies are according to the parsed pkgx data, using priority-based deduplication 2. Get this package's ID from CHAI 3. Get this package's existing dependencies from CHAI 4. Compare the two sets, and identify new and removed dependencies Note: The database has a unique constraint on (package_id, dependency_id), so if a package depends on the same dependency with multiple types (e.g., both runtime and build), we choose the highest priority type: Runtime > Build > Test Returns: - new_deps: a list of new dependencies - removed_deps: a list of removed dependencies """ new_deps: list[LegacyDependency] = [] removed_deps: list[LegacyDependency] = [] # First, collect all dependencies and deduplicate by dependency name # choosing the highest priority dependency type for each unique dependency dependency_map: dict[str, UUID] = {} # Priority order: Runtime > Build > Test priority_order = { self.config.dependency_types.runtime: 1, self.config.dependency_types.build: 2, self.config.dependency_types.test: 3, } def process_deps(dependencies: list[DependencyBlock], dep_type: UUID) -> None: """Helper to process dependencies of a given type with priority""" for dep in dependencies: for dep_obj in dep.dependencies: if not dep_obj.name: continue # Get the dependency package from cache dependency = self.caches.package_map.get(dep_obj.name) if not dependency: self.logger.warn( f"{dep_obj.name}, dep of {import_id} is not in cache" ) continue # If this dependency already exists in our map, choose higher priority if dep_obj.name in dependency_map: existing_priority = priority_order.get( dependency_map[dep_obj.name], 999 ) new_priority = priority_order.get(dep_type, 999) if ( new_priority < existing_priority ): # Lower number = higher priority old_type_id = dependency_map[dep_obj.name] dependency_map[dep_obj.name] = dep_type self.logger.debug( f"Updated dependency type for {dep_obj.name} from " f"{old_type_id} to {dep_type} (higher priority)" ) else: dependency_map[dep_obj.name] = dep_type # Process different types of dependencies with priority handling process_deps(pkg.dependencies, self.config.dependency_types.runtime) process_deps(pkg.build.dependencies, self.config.dependency_types.build) process_deps(pkg.test.dependencies, self.config.dependency_types.test) # Now build the actual set of dependencies with resolved types actual: set[tuple[UUID, UUID]] = set() for dep_name, dep_type in dependency_map.items(): dependency = self.caches.package_map.get(dep_name) if dependency: # Double-check it still exists actual.add((dependency.id, dep_type)) # get the package ID for what we are working with package = self.caches.package_map.get(import_id) if not package: self.logger.warn(f"New package {import_id}, will grab its deps next time") return [], [] pkg_id: UUID = package.id # what are its existing dependencies? # specifically, existing dependencies IN THE SAME STRUCTURE as `actual`, # so we can do an easy comparison existing: set[tuple[UUID, UUID]] = { (dep.dependency_id, dep.dependency_type_id) for dep in self.caches.dependencies.get(pkg_id, set()) } # we have two sets! # actual minus existing = new_deps # existing minus actual = removed_deps new = actual - existing removed = existing - actual new_deps: list[LegacyDependency] = [ LegacyDependency( package_id=pkg_id, dependency_id=dep[0], dependency_type_id=dep[1], created_at=self.now, updated_at=self.now, ) for dep in new ] # get the existing legacy dependency, and add it to removed_deps removed_deps: list[LegacyDependency] = [] cache_deps: set[LegacyDependency] = self.caches.dependencies.get(pkg_id, set()) for removed_dep_id, removed_dep_type in removed: try: existing_dep = next( dep for dep in cache_deps if dep.dependency_id == removed_dep_id and dep.dependency_type_id == removed_dep_type ) removed_deps.append(existing_dep) except StopIteration as exc: cache_deps_str = "\n".join( [ f"{dep.dependency_id} / {dep.dependency_type_id}" for dep in cache_deps ] ) raise ValueError( f"Removing {removed_dep_id} / {removed_dep_type} for {pkg_id} but not in Cache: \n{cache_deps_str}" ) from exc return new_deps, removed_deps ================================================ FILE: package_managers/pkgx/loader.py ================================================ from sqlalchemy import select from sqlalchemy.dialects.postgresql import insert as pg_insert from core.config import Config from core.db import DB from core.models import ( LegacyDependency, Package, ) from package_managers.pkgx.parser import DependencyBlock from package_managers.pkgx.transformer import Cache BATCH_SIZE = 10000 # NOTE: this is a separate instance of the db that is used in main class PkgxLoader(DB): def __init__(self, config: Config, data: dict[str, Cache]): super().__init__("pkgx_db") self.config = config self.data = data self.debug = config.exec_config.test self.logger.debug(f"Initialized PkgxLoader with {len(data)} packages") def load_packages(self) -> None: """ Efficiently load all unique packages from the cache map into the database using bulk insertion and returning inserted IDs. """ unique_packages = {} for key, cache in self.data.items(): package = cache.package if not isinstance(package, Package): self.logger.error( f"Invalid package object for key {key}: {type(package)}" ) continue if package.derived_id not in unique_packages: unique_packages[package.derived_id] = package self.logger.log(f"Found {len(unique_packages)} unique packages to insert") package_dicts = [] for pkg in unique_packages.values(): try: package_dicts.append(pkg.to_dict()) except Exception as e: self.logger.error(f"Error in to_dict for package {pkg.name}: {e!s}") if not package_dicts: self.logger.log("No packages to insert") return with self.session() as session: try: stmt = pg_insert(Package).values(package_dicts).on_conflict_do_nothing() # TODO: can just generate the UUID myself and provide it, so no need to # return stmt = stmt.returning(Package.id, Package.derived_id) self.logger.log("About to execute insert statement for packages") result = session.execute(stmt) inserted_packages = {row.derived_id: row.id for row in result} session.commit() self.logger.log( f"Successfully inserted {len(inserted_packages)} packages" ) missing_derived_ids = [ derived_id for derived_id in unique_packages if derived_id not in inserted_packages ] self.logger.log( f"Fetching {len(missing_derived_ids)} IDs for conflicting packages" ) if missing_derived_ids: # Fetch missing IDs in batches for i in range(0, len(missing_derived_ids), BATCH_SIZE): batch_ids = missing_derived_ids[i : i + BATCH_SIZE] stmt = select(Package.id, Package.derived_id).where( Package.derived_id.in_(batch_ids) ) result = session.execute(stmt) for row in result: inserted_packages[row.derived_id] = row.id updated_count = 0 for cache in self.data.values(): if cache.package.derived_id in inserted_packages: cache.package.id = inserted_packages[cache.package.derived_id] updated_count += 1 self.logger.log(f"Updated cache with IDs for {updated_count} packages") except Exception as e: self.logger.error(f"Error inserting packages: {e!s}") self.logger.error(f"Error type: {type(e)}") raise def load_dependencies(self) -> None: """ Load all dependencies into the LegacyDependency table. This requires package IDs to be loaded first. # FIXME: legacy dependencies are package to package relationships. # A migration is needed to move all dependencies to the LegacyDependency structure. """ self.logger.log("Starting to load legacy dependencies") legacy_dependency_dicts = [] missing = set() for key, cache in self.data.items(): # Ensure the main package has an ID if not hasattr(cache.package, "id") or cache.package.id is None: self.logger.warn( f"Package {key} has no ID when loading dependencies, skipping" ) continue package_id = cache.package.id # Helper to process a list of dependency names for a given type def process_deps( dep_blocks: list[DependencyBlock], dep_type_id: str, key=key, package_id=package_id, ): for dep_block in dep_blocks: # TODO: do we need to use this? for dep in dep_block.dependencies: dep_name = dep.name dep_semver = dep.semver # Find the dependency package in our cache dep_cache = self.data.get(dep_name) if not dep_cache: missing.add(dep_name) continue # Checks: has to have an ID if ( not hasattr(dep_cache.package, "id") or dep_cache.package.id is None ): self.logger.warn( f"Dependency package '{dep_name}' has no ID, skipping linkage for '{key}'" ) continue dependency_id = dep_cache.package.id # Append data for bulk insert legacy_dependency_dicts.append( { "package_id": package_id, "dependency_id": dependency_id, "dependency_type_id": dep_type_id, "semver_range": dep_semver, } ) # Process each dependency type process_deps(cache.dependencies.build, self.config.dependency_types.build) process_deps(cache.dependencies.test, self.config.dependency_types.test) process_deps( cache.dependencies.dependencies, self.config.dependency_types.runtime ) self.logger.log( f"Found {len(legacy_dependency_dicts)} legacy dependencies to insert" ) if missing: self.logger.warn(f"{len(missing)} pkgs are deps, but have no pkgx.yaml") self.logger.warn(f"Missing pkgs: {missing}") if not legacy_dependency_dicts: self.logger.log("No legacy dependencies to insert") return # Bulk insert legacy dependencies with self.session() as session: try: for i in range(0, len(legacy_dependency_dicts), BATCH_SIZE): batch = legacy_dependency_dicts[i : i + BATCH_SIZE] self.logger.log( f"Processing LegacyDependency batch {i // BATCH_SIZE + 1}/{(len(legacy_dependency_dicts) - 1) // BATCH_SIZE + 1} ({len(batch)} links)" ) stmt = ( pg_insert(LegacyDependency) .values(batch) .on_conflict_do_nothing() ) session.execute(stmt) session.commit() self.logger.log("Successfully inserted all pkgx dependencies") except Exception as e: self.logger.error(f"Error inserting legacy dependencies: {e!s}") self.logger.error(f"Error type: {type(e)}") raise ================================================ FILE: package_managers/pkgx/main.py ================================================ #!/usr/bin/env pkgx +python@3.11 uv run import os import time from datetime import datetime from uuid import UUID from core.config import Config, PackageManager from core.fetcher import GitFetcher from core.logger import Logger from core.models import URL, LegacyDependency, Package, PackageURL from core.scheduler import Scheduler from core.structs import Cache, URLKey from package_managers.pkgx.db import PkgxDB from package_managers.pkgx.diff import PkgxDiff from package_managers.pkgx.parser import PkgxParser logger = Logger("pkgx") SCHEDULER_ENABLED = os.getenv("ENABLE_SCHEDULER", "true").lower() == "true" BATCH_SIZE = 500 PROJECTS_DIR = "projects" PACKAGE_FILE = "package.yml" def fetch(config: Config) -> GitFetcher: should_fetch = config.exec_config.fetch fetcher = GitFetcher( "pkgx", config.pm_config.source, config.exec_config.no_cache, config.exec_config.test, ) if should_fetch: logger.debug("Starting Pkgx package fetch") fetcher.fetch() else: # symlink would still be updated logger.log("Fetching disabled, skipping fetch") # if no_cache is on, we'll delete stuff from here return fetcher def run_pipeline(config: Config, db: PkgxDB): """A diff-based approach to loading pkgx data into CHAI""" fetcher = fetch(config) output_dir = f"{fetcher.output}/latest" # Parse all packages pkgx_parser = PkgxParser(output_dir) packages = list(pkgx_parser.parse_packages()) logger.log(f"Parsed {len(packages)} packages") # Set up cache db.set_current_graph() db.set_current_urls() logger.log("Set current URLs") # Build cache for differential loading cache = Cache( db.graph.package_map, db.urls.url_map, db.urls.package_urls, db.graph.dependencies, ) # Initialize differential loading collections new_packages: list[Package] = [] new_urls: dict[URLKey, URL] = {} new_package_urls: list[PackageURL] = [] updated_packages: list[dict[str, UUID | str | datetime]] = [] updated_package_urls: list[dict[str, UUID | datetime]] = [] new_deps: list[LegacyDependency] = [] removed_deps: list[LegacyDependency] = [] # Create diff processor diff = PkgxDiff(config, cache, db, logger) # Process each package for i, (pkg_data, import_id) in enumerate(packages): # Diff the package pkg_id, pkg_obj, update_payload = diff.diff_pkg(import_id, pkg_data) if pkg_obj: logger.debug(f"New package: {pkg_obj.name}") new_packages.append(pkg_obj) if update_payload: logger.debug(f"Updated package: {update_payload['id']}") updated_packages.append(update_payload) # Diff URLs (resolved_urls is map of url types to final URL ID) resolved_urls = diff.diff_url(import_id, pkg_data, new_urls) # Diff package URLs new_links, updated_links = diff.diff_pkg_url(pkg_id, resolved_urls) if new_links: logger.debug(f"New package URLs: {len(new_links)}") new_package_urls.extend(new_links) if updated_links: updated_package_urls.extend(updated_links) # Diff dependencies new_dependencies, removed_dependencies = diff.diff_deps(import_id, pkg_data) if new_dependencies: logger.debug(f"New dependencies: {len(new_dependencies)}") new_deps.extend(new_dependencies) if removed_dependencies: logger.debug(f"Removed dependencies: {len(removed_dependencies)}") removed_deps.extend(removed_dependencies) if config.exec_config.test and i > 10: break # Convert new_urls dict to list for ingestion final_new_urls = list(new_urls.values()) # Ingest all diffs db.ingest( new_packages, final_new_urls, new_package_urls, new_deps, removed_deps, updated_packages, updated_package_urls, ) if config.exec_config.no_cache: fetcher.cleanup() def main(): logger.log("Initializing Pkgx package manager") config = Config(PackageManager.PKGX) db = PkgxDB("pkgx_main_db_logger", config) logger.debug(f"Using config: {config}") if SCHEDULER_ENABLED: logger.log("Scheduler enabled. Starting schedule.") scheduler = Scheduler("pkgx") scheduler.start(run_pipeline, config) # run immediately as well when scheduling scheduler.run_now(run_pipeline, config, db) # keep the main thread alive for scheduler try: while True: time.sleep(3600) except KeyboardInterrupt: scheduler.stop() logger.log("Scheduler stopped.") else: logger.log("Scheduler disabled. Running pipeline once.") run_pipeline(config, db) logger.log("Pipeline finished.") if __name__ == "__main__": main() ================================================ FILE: package_managers/pkgx/parser.py ================================================ from collections.abc import Iterator from dataclasses import dataclass, field from pathlib import Path from typing import Any import yaml from core.logger import Logger from core.utils import convert_keys_to_snake_case logger = Logger("pkgx") PROJECTS_DIR = "projects" PACKAGE_FILE = "package.yml" # IMPORTANT: # the package.yml maintains a warnings list, which sometimes contain "vendored" # this correlates to Homebrew's casks, and CHAI ignores them # structures # this enables everything, but we don't need all of it right now @dataclass class Distributable: url: str strip_components: int | None = field(default=None) ref: str | None = field(default=None) sig: str | None = field(default=None) sha: str | None = field(default=None) @dataclass class Version: github: str | None = field(default=None) # (user)?(/tags/releases) gitlab: str | None = field(default=None) # (user|project)?(/tags/releases) url: str | None = field(default=None) # for non github projects match: str | None = field(default=None) # regex to match the version strip: str | None = field(default=None) # regex to strip the version ignore: str | None = field(default=None) # regex to ignore the version versions: list[str] | None = field(default=None) # list of versions npm: str | None = field(default=None) # npm package name transform: str | None = field(default=None) # regex to transform the version stripe: str | None = field(default=None) # not sure what this is @dataclass class Dependency: name: str semver: str @dataclass class EnvironmentVariable: name: str value: str | list[str] @dataclass class DependencyBlock: platform: str # 'all', 'linux', 'darwin', etc. dependencies: list[Dependency] @dataclass class Build: script: str dependencies: list[DependencyBlock] = field(default_factory=list) env: list[EnvironmentVariable] = field(default_factory=list) working_directory: str | None = field(default=None) @dataclass class Test: script: str dependencies: list[DependencyBlock] = field(default_factory=list) env: list[EnvironmentVariable] = field(default_factory=list) fixture: str | None = field(default=None) @dataclass class PkgxPackage: distributable: list[Distributable] versions: Version build: Build | None = field(default=None) test: Test | None = field(default=None) # provides: list[str] = field(default_factory=list) # all cli commands provided # platforms: list[str] = field( # default_factory=list # ) # darwin, linux/x64, linux/arm64, etc. # Store a list of dependency blocks, each specifying a platform and its deps dependencies: list[DependencyBlock] = field(default_factory=list) # Pkgx Parser can look at the pantry and yield a dictionary of information in the YAML class PkgxParser: def __init__(self, repo_path: str): self.repo_path = repo_path def find_package_yamls(self) -> Iterator[tuple[Path, str]]: """Finds all package.yml files within the projects directory.""" projects_path = Path(self.repo_path) / PROJECTS_DIR if not projects_path.is_dir(): logger.error(f"Projects directory not found at: {projects_path}") return logger.debug(f"Searching for {PACKAGE_FILE} in {projects_path}...") count = 0 for yaml_path in projects_path.rglob(PACKAGE_FILE): if yaml_path.is_file(): # Calculate relative path for project identifier relative_path = yaml_path.parent.relative_to(projects_path) project_identifier = str(relative_path) yield yaml_path, project_identifier count += 1 logger.debug(f"Found {count} {PACKAGE_FILE} files.") def is_vendored(self, data: dict[str, Any]) -> bool: """Checks if the package is vendored.""" if "warnings" in data: warnings = data.get("warnings", []) if "vendored" in warnings: return True return False def parse_package_yaml(self, file_path: Path) -> PkgxPackage | None: """Parses a single package.yaml file.""" try: with open(file_path) as f: data = yaml.safe_load(f) if not isinstance(data, dict): logger.warn( f"Expected dict, got {type(data).__name__} in {file_path}" ) return None # check if the package is vendored if self.is_vendored(data): return None pkgx_package = self.map_package_yaml_to_pkgx_package( data, str(file_path) ) return pkgx_package except yaml.YAMLError as e: logger.error(f"Error parsing YAML file {file_path}: {e}") return None except Exception as e: logger.error(f"Error reading file {file_path}: {e}") raise e return None def parse_packages(self) -> Iterator[tuple[PkgxPackage, str]]: """Parses all package.yml files found in the repository.""" for yaml_path, project_identifier in self.find_package_yamls(): parsed_data = self.parse_package_yaml(yaml_path) if parsed_data: yield parsed_data, project_identifier def _parse_dependency_list( self, deps_data: Any, context: str ) -> list[DependencyBlock]: """Parses a dependency dictionary into a list of DependencyBlock objects.""" if not isinstance(deps_data, dict): # For now, assume empty dict means no deps, but non-dict is error. if deps_data is None or deps_data == {}: return [] dep_type = type(deps_data).__name__ raise TypeError( f"Expected dependencies to be a dict in {context}, got {dep_type}" ) dependency_blocks = [] direct_deps = [] for key, value in deps_data.items(): # Platform-specific block if isinstance(value, dict): platform = key platform_deps = [] for dep_name, semver in value.items(): if isinstance(semver, str): platform_deps.append(Dependency(name=dep_name, semver=semver)) elif isinstance(semver, int | float): platform_deps.append( Dependency(name=dep_name, semver=str(semver)) ) else: raise TypeError( f"Unexpected semver type for {dep_name} under platform {platform} in {context}: {type(semver).__name__}" ) if platform_deps: dependency_blocks.append( DependencyBlock(platform=platform, dependencies=platform_deps) ) # else: empty platform block is ignored # Direct dependency declaration elif isinstance(value, str): dep_name = key semver = value direct_deps.append(Dependency(name=dep_name, semver=semver)) # Direct declaration, but sometimes the semvers are exact elif isinstance(value, int | float): dep_name = key semver = str(value) direct_deps.append(Dependency(name=dep_name, semver=semver)) # Invalid structure else: raise TypeError( f"Unexpected dependency value type for key '{key}' in {context}: {type(value).__name__}. Expected dict or str or float." ) # Add all direct dependencies under the 'all' platform if direct_deps: dependency_blocks.append( DependencyBlock(platform="all", dependencies=direct_deps) ) return dependency_blocks def _parse_build_section(self, build_data: Any, file_path_str: str) -> Build: """Parses the build section if its a dict, list, or str""" if isinstance(build_data, dict): # Pass original dependencies dict, don't convert keys here build_deps_list = self._parse_dependency_list( build_data.get("dependencies"), f"build section of {file_path_str}" ) # Convert env var keys just before instantiation build_env = [ EnvironmentVariable(**convert_keys_to_snake_case(env)) for env in build_data.get("env", []) if isinstance(env, dict) ] # Convert build_data keys just before creating Build object build_kwargs = convert_keys_to_snake_case(build_data) return Build( script=build_kwargs.get("script", ""), dependencies=build_deps_list, # Use the originally parsed list env=build_env, working_directory=build_kwargs.get("working_directory"), ) elif isinstance(build_data, list): # Generally, it's a list of build commands, so we only have script info # TODO: Potentially improve handling of list-based build data script = ( build_data[0] if build_data and isinstance(build_data[0], str) else "" ) return Build( script=script, dependencies=[], env=[], working_directory=None, ) elif isinstance(build_data, str): return Build( script=build_data, dependencies=[], env=[], working_directory=None, ) else: build_type = type(build_data).__name__ raise TypeError(f"Build in {file_path_str} is {build_type}") def _parse_test_section(self, test_data: Any, file_path_str: str) -> Test: """Parses the test section if its a dict, list, or str""" if isinstance(test_data, dict): # Pass original dependencies dict test_deps_list = self._parse_dependency_list( test_data.get("dependencies"), f"test section of {file_path_str}" ) # Convert env var keys just before instantiation test_env = [ EnvironmentVariable(**convert_keys_to_snake_case(env)) for env in test_data.get("env", []) if isinstance(env, dict) ] # Convert test_data keys just before creating Test object test_kwargs = convert_keys_to_snake_case(test_data) return Test( script=test_kwargs.get("script", ""), dependencies=test_deps_list, # Use the originally parsed list env=test_env, fixture=test_kwargs.get("fixture"), ) elif isinstance(test_data, list): # TODO: Clarify how to handle list-based test data. Assuming empty for now. return Test(script="", dependencies=[], env=[], fixture=None) elif isinstance(test_data, str): # Assuming string directly means the script return Test(script=test_data, dependencies=[], env=[], fixture=None) elif isinstance(test_data, bool): # bad tests are sometimes just true/false return Test(script=str(test_data), dependencies=[], env=[], fixture=None) else: test_type = type(test_data).__name__ raise TypeError(f"Test for {file_path_str} is {test_type}") def _parse_versions_section( self, versions_data: Any, file_path_str: str ) -> Version: """Parses the versions section if its a list, dict, or None""" if isinstance(versions_data, list): # list of version strings (nums) return Version(versions=versions_data) elif isinstance(versions_data, dict): # github or gitlab...something useful # Convert keys just before creating Version object return Version(**convert_keys_to_snake_case(versions_data)) elif versions_data is None: # Handle case where versions might be missing, return default empty logger.warn(f"Missing 'versions' section in {file_path_str} using default.") return Version() else: version_type = type(versions_data).__name__ raise TypeError(f"Versions in {file_path_str} is {version_type}") def _parse_distributable_section( self, distributable_data: Any, file_path_str: str ) -> Distributable | list[Distributable]: """Parses the distributable section from the package data.""" if isinstance(distributable_data, list): # Convert keys for each dict in the list before creating Distributable return [ Distributable(**convert_keys_to_snake_case(d)) for d in distributable_data if isinstance(d, dict) ] elif isinstance(distributable_data, dict): # Convert keys just before creating Distributable object return [Distributable(**convert_keys_to_snake_case(distributable_data))] elif distributable_data is None: return [Distributable(url="~")] else: distributable_type = type(distributable_data).__name__ raise TypeError(f"Distributable in {file_path_str} is {distributable_type}") def map_package_yaml_to_pkgx_package( self, data: dict[str, Any], file_path_str: str ) -> PkgxPackage: """Maps a package.yml to a PkgxPackage.""" # Keep the original data, do not normalize globally here # normalized_data = convert_keys_to_snake_case(data) # Parse sections using helper functions, passing original data segments build_data = data.get("build") build_obj = self._parse_build_section(build_data, file_path_str) test_data = data.get("test") test_obj = self._parse_test_section(test_data, file_path_str) versions_data = data.get("versions") versions_obj = self._parse_versions_section(versions_data, file_path_str) distributable_data = data.get("distributable") distributable_obj = self._parse_distributable_section( distributable_data, file_path_str ) # Parse top-level dependencies using original keys dependencies_data = data.get("dependencies") top_level_deps_list = self._parse_dependency_list( dependencies_data, f"top-level of {file_path_str}" ) # TODO: Implement parsing for 'provides' list # would be useful because we have the set of "names" / "commands" for it! # provides_data = data.get("provides") # provides_obj = self._parse_provides_section(provides_data, file_path_str) # TODO: Implement parsing for 'platforms' list # platforms_data = data.get("platforms") # platforms_obj = self._parse_platforms_section(platforms_data, file_path_str) # Note: PkgxPackage itself doesn't directly take snake_case kwargs from top level # Its arguments are constructed from the parsed objects. return PkgxPackage( distributable=distributable_obj, versions=versions_obj, dependencies=top_level_deps_list, build=build_obj, test=test_obj, # provides=provides, # platforms=platforms, ) ================================================ FILE: package_managers/pkgx/url.py ================================================ import re from uuid import UUID from permalint import normalize_url, possible_names from requests import Response, get from core.config import Config from core.logger import Logger from core.structs import URLKey from core.utils import is_github_url from package_managers.pkgx.db import DB HOMEPAGE_URL = "https://pkgx.dev/pkgs/{name}.json" def canonicalize(url: str) -> str: return normalize_url(url) def guess(db_client: DB, package_managers: list[UUID], url: str) -> list[str]: names = possible_names(url) urls = db_client.search_names(names, package_managers) return urls def ask_pkgx(import_id: str) -> str | None: """ ask max's scraping work for the homepage of a package Homepage comes from the pkgxdev/www repo The API https://pkgx.dev/pkgs/{name}.json returns a blob which may contain the homepage field """ response: Response = get(HOMEPAGE_URL.format(name=import_id)) if response.status_code == 200: data: dict[str, str] = response.json() if "homepage" in data: return data["homepage"] def special_case(import_id: str, logger: Logger) -> str | None: homepage: str | None = None # if no slashes, then pkgx used the homepage as the name # if two slashes, then probably github / gitlab if not re.search(r"/", import_id) or re.search(r"/.+/", import_id): homepage = import_id # if it's a crates.io package, then we can use the crates URL elif re.search(r"^crates.io", import_id): if "/" in import_id: name = import_id.split("/")[1] homepage = f"https://crates.io/crates/{name}" else: logger.warn(f"Invalid format for crates.io import_id: {import_id}") # if it's part of the x.org family elif re.search(r"^x.org", import_id): homepage = "https://x.org" # if it's part of the pkgx family elif re.search("^pkgx.sh", import_id): tool = import_id.split("/")[1] homepage = f"https://github.com/pkgxdev/{tool}" # python.org/typing_extensions elif import_id == "python.org/typing_extensions": homepage = "https://github.com/python/typing_extensions" # thrysoee.dk/editline elif import_id == "thrysoee.dk/editline": homepage = "https://thrysoee.dk/editline" # gen-ir is a Homebrew Tap, which lists this as its homepage elif import_id == "veracode.com/gen-ir": homepage = "https://github.com/veracode/gen-ir" else: logger.warn(f"no homepage in pkgx for {import_id}") return homepage def generate_chai_urls( config: Config, db: DB, import_id: str, distributable_url: str, logger: Logger ) -> list[URLKey]: """For a pkgx import_id, generate a list of URLs it could have""" urls: list[URLKey] = [] # homepage similar = [config.package_managers.debian, config.package_managers.homebrew] maybe: list[str] = guess(db, similar, import_id) if maybe: homepage = maybe[0] else: homepage = ask_pkgx(import_id) if not homepage: homepage = special_case(import_id, logger) if homepage: canonical_homepage = canonicalize(homepage) urls.append(URLKey(canonical_homepage, config.url_types.homepage)) # source # NOTE: for non-GitHub source URLs, pkgx tells you where the version string for the # downloadable tarball is...right now, we don't do anything about that canonical_distributable = canonicalize(distributable_url) urls.append(URLKey(canonical_distributable, config.url_types.source)) if is_github_url(canonical_distributable): urls.append(URLKey(canonical_distributable, config.url_types.repository)) return urls ================================================ FILE: pkgx.yaml ================================================ # this is the pkgx config across all the services covered by docker-compose dependencies: python.org: ~3.11 xcfile.dev: 0 cli.github.com: 2 astral.sh/uv: 0 postgresql.org: 16 docker.com/compose: 2 alembic.sqlalchemy.org: 1 psycopg.org/psycopg2: 2 ================================================ FILE: pyproject.toml ================================================ [project] name = "chai" version = "1.0.0" description = "An open-source data pipeline for all package managers" authors = [ { name = "Sanchit Ram Arvind", email = "sanchitram@gmail.com" }, { name = "Jacob Heider", email = "jhheider@pkgx.dev" }, ] keywords = ["data", "pipeline"] readme = "README.md" requires-python = ">= 3.11" dependencies = [] [project.urls] Homepage = "https://github.com/teaxyz/chai" Repository = "https://github.com/teaxyz/chai" Source = "https://github.com/teaxyz/chai" [tool.uv] managed = true [tool.pytest.ini_options] pythonpath = ["."] minversion = "8.0" python_files = ["test_*.py"] python_classes = ["Test*"] python_functions = ["test_"] addopts = ["-ra", "--strict-markers", "--disable-warnings", "--tb=short"] markers = ["unit"] [tool.ruff] line-length = 88 exclude = ["__pycache__", ".venv", ".git", ".pytest_cache"] [tool.ruff.lint] select = [ "E", # pycodestyle (error) "F", # pyflakes "B", # bugbear "B9", "C4", # flake8-comprehensions "SIM", # flake8-simplify "I", # isort "UP", # pyupgrade "PIE", # flake8-pie "PGH", # pygrep-hooks "PYI", # flake8-pyi "RUF", ] ignore = [ # leave it to the formatter to split long lines and # the judgement of all of us. "E501", ] fixable = ["ALL"] [tool.ruff.format] quote-style = "double" indent-style = "space" [dependency-groups] dev = [ "pytest>=8.4.0", "pytest-cov>=6.2.1", "ruff>=0.11.13", "testing-postgresql>=1.3.0", ] indexers = [ "alembic==1.13.2", "certifi>=2025.4.26", "charset-normalizer>=3.4.2", "gitpython>=3.1.44", "idna>=3.10", "permalint>=0.1.15", "psycopg2-binary==2.9.10", "pyyaml>=6.0.2", "requests>=2.32.4", "schedule>=1.2.2", "sqlalchemy>=2.0.41", "urllib3>=2.4.0", ] ranker = ["numpy>=2.3.0", "rustworkx>=0.16.0"] ================================================ FILE: ranker/.dockerignore ================================================ prompts/ ================================================ FILE: ranker/.gitignore ================================================ prompts/ ================================================ FILE: ranker/Dockerfile ================================================ FROM python:3.11 # Copy everything COPY . . # Install core requirements WORKDIR /core RUN pip install --no-cache-dir -r requirements.txt # Install ranker requirements WORKDIR /ranker RUN pip install --no-cache-dir -r requirements.txt # Command to run the application CMD ["python", "/ranker/main.py"] ================================================ FILE: ranker/README.md ================================================ # ranker generates a deduplicated graph across all CHAI package managers by URL, and publishes a tea_rank ## Requirements 1. [pkgx](pkgx.sh) 2. [uv](astral.sh/uv) ## Deduplication (`dedupe.py`) `dedupe.py` handles the deduplication of packages based on their homepage URLs. It ensures that packages sharing the same canonical homepage URL are grouped together. **Process:** 1. **Fetch Existing State:** Retrieves all current canonical homepage URLs, their associated packages from the `canons`, `canon_packages`, and `package_urls` tables 2. **Determine Latest URLs:** Identifies the most recent URL 3. **Diff:** Identify new canons, new canon_packages, canon_packages to update 4. **Ingest:** Create new canons and new links if necessary, update existing ones This process is idempotent, meaning running it multiple times converges to the same correct state based on the latest available package URL data. ### Getting started 1. You need `CHAI_DATABASE_URL` setup, and the CHAI db running 2. With pkgx: ```bash chmod +x ranker/dedupe.py PYTHONPATH=. LOAD=0 ranker/dedupe.py ``` You can toggle LOAD to do a dry-run, where it will tell you what it's about to do without loading any information ## Ranking - [ ] Add a description here ## Usage ### With pkgx ```bash chmod +x dedupe.py ./main.py ``` ### Without pkgx ```bash uv run main.py ``` ## Docker This service can be run inside a Docker container. The container assumes that the `core` library is available and that the `CHAI_DATABASE_URL` environment variable is set to point to the database. **Building the Image:** From the root of the `chai-oss` repository: ```bash docker build -t chai-ranker -f ranker/Dockerfile . ``` **Running the Container:** Make sure to provide the database connection string via the `CHAI_DATABASE_URL` environment variable: ```bash docker run --rm -e CHAI_DATABASE_URL=postgresql://postgres:s3cr3t@localhost:5435/chai chai-ranker ``` The container will execute `dedupe.py` followed by `main.py` and exit with code 0 on success or a non-zero code on failure. ================================================ FILE: ranker/config.py ================================================ from dataclasses import dataclass from decimal import Decimal, getcontext from uuid import UUID from sqlalchemy import func from core.db import DB from core.logger import Logger from core.models import Canon, CanonPackage, Package, PackageManager, Source, URLType from core.utils import env_vars logger = Logger("graph.config") SYSTEM_PACKAGE_MANAGERS = ["homebrew", "debian", "pkgx"] # setup decimal getcontext().prec = 9 getcontext().rounding = "ROUND_HALF_UP" class ConfigDB(DB): def __init__(self): super().__init__("graph.config::db") def get_homepage_url_type_id(self) -> UUID: with self.session() as session: result = ( session.query(URLType.id).filter(URLType.name == "homepage").scalar() ) if result is None: raise ValueError("homepage url type not found") return result def get_npm_pm_id(self) -> UUID: return self.get_pm_id_by_name("npm")[0][0] def get_canons_with_source_types( self, source_types: list[str] ) -> list[tuple[UUID, list[str]]]: with self.session() as session: return ( session.query( Canon.id, func.array_agg(Source.type).label("source_types") ) .join(CanonPackage, Canon.id == CanonPackage.canon_id) .join(Package, CanonPackage.package_id == Package.id) .join(PackageManager, Package.package_manager_id == PackageManager.id) .join(Source, PackageManager.source_id == Source.id) .filter(Source.type.in_(source_types)) .group_by(Canon.id) .all() ) def get_pm_id_by_name(self, name: str | list[str]) -> UUID: if isinstance(name, str): name = [name] with self.session() as session: result = ( session.query(PackageManager.id) .join(Source, PackageManager.source_id == Source.id) .filter(Source.type.in_(name)) .all() ) if result is None: raise ValueError(f"package manager {name} not found") return result class TeaRankConfig: def __init__(self, db: ConfigDB) -> None: self.db = db self.favorites: dict[str, Decimal] = {} self.weights: dict[UUID, Decimal] = {} self.personalization: dict[UUID, Decimal] = {} self.map_favorites(SYSTEM_PACKAGE_MANAGERS) alpha: Decimal = Decimal("0.85") split_ratio: Decimal = Decimal("0.5") tol: Decimal = Decimal("1e-6") max_iter: int = 1000000 def map_favorites(self, package_managers: list[str]) -> None: for pm in package_managers: match pm: case "homebrew": pm_id = self.db.get_pm_id_by_name("homebrew")[0][0] self.favorites[pm_id] = Decimal("0.3") case "debian": pm_id = self.db.get_pm_id_by_name("debian")[0][0] self.favorites[pm_id] = Decimal("0.6") case "pkgx": pm_id = self.db.get_pm_id_by_name("pkgx")[0][0] self.favorites[pm_id] = Decimal("0.1") case _: raise ValueError(f"Unknown system package manager: {pm}") def personalize( self, canons_with_source_types: list[tuple[UUID, list[str]]] ) -> None: """Adjust canon weights proportionally to the sum of `favorites` in their associated package managers, normalized to total 1.""" def coefficient(source_types: list[str]) -> Decimal: return sum(self.favorites[source_type] for source_type in source_types) # calculate raw weights for each canon based on favorites raw_weights = {} total = Decimal(0) for canon_id, package_manager_ids in canons_with_source_types: # make source_types a set to deduplicate source_types = set(package_manager_ids) # sum the weights for all package managers this canon appears in weight = coefficient(source_types) raw_weights[canon_id] = weight total += weight constant = Decimal(1) / total for canon_id, weight in raw_weights.items(): self.personalization[canon_id] = weight * constant logger.debug(f"Personalization sum: {sum(self.personalization.values())}") def __str__(self) -> str: return f"TeaRankConfig(alpha={self.alpha}, favorites={self.favorites}, weights={len(self.weights)}, personalization={len(self.personalization)})" # E501 class PMConfig: def __init__(self, db: ConfigDB) -> None: self.db = db self.npm_pm_id = self.db.get_npm_pm_id() self.system_pm_ids = [ id[0] for id in self.db.get_pm_id_by_name(SYSTEM_PACKAGE_MANAGERS) ] # TODO: we'll add PyPI, rubygems from when we load with legacy data def __str__(self) -> str: return ( f"PMConfig(npm_pm_id={self.npm_pm_id}, system_pm_ids={self.system_pm_ids})" ) class URLTypes: def __init__(self, db: ConfigDB) -> None: self.db = db self.homepage_url_type_id = self.db.get_homepage_url_type_id() def __str__(self) -> str: return f"URLTypes(homepage_url_type_id={self.homepage_url_type_id})" class DedupeConfig: def __init__(self, db: ConfigDB) -> None: self.homepage_url_type_id = db.get_homepage_url_type_id() self.load = env_vars("LOAD", "true") def __str__(self) -> str: return f"DedupeConfig(homepage_url_type_id={self.homepage_url_type_id}, load={self.load})" # E501 @dataclass class Config: def __init__(self, db: ConfigDB) -> None: self.db = db self.tearank_config = TeaRankConfig(db=db) self.pm_config = PMConfig(db=db) self.url_types = URLTypes(db=db) def __str__(self) -> str: return f"Config(tearank_config={self.tearank_config}, pm_config={self.pm_config}, url_types={self.url_types})" # E501 def load_config() -> Config: logger.debug("Loading config") return Config(db=ConfigDB()) def load_dedupe_config() -> DedupeConfig: return DedupeConfig(db=ConfigDB()) ================================================ FILE: ranker/db.py ================================================ from uuid import UUID from sqlalchemy.dialects.postgresql import insert as pg_insert from core.db import DB from core.models import ( URL, Canon, CanonPackage, DependsOn, LegacyDependency, Package, PackageURL, TeaRank, TeaRankRun, URLType, Version, ) BATCH_SIZE = 20000 class GraphDB(DB): def __init__(self, legacy_pm_id: UUID, system_pm_ids: list[UUID]): super().__init__("graph.db") self.legacy_pm_id = legacy_pm_id self.system_pm_ids = system_pm_ids def is_canon_populated(self) -> bool: with self.session() as session: return session.query(Canon).count() > 0 def is_canon_package_populated(self) -> bool: with self.session() as session: return session.query(CanonPackage).count() > 0 def get_all_canons(self) -> dict[str, UUID]: """Fetch all existing canons as a map from URL to Canon ID.""" with self.session() as session: results = session.query(Canon.url, Canon.id).all() return dict(results) def get_packages_with_urls(self) -> list[tuple[UUID, str, str, str]]: """ Retrieve packages with their associated URLs and URL types. Returns: List of tuples containing id, name, and url """ with self.session() as session: return ( session.query(Package.id, Package.name, URL.url, URL.created_at) .join(PackageURL, Package.id == PackageURL.package_id) .join(URL, PackageURL.url_id == URL.id) .join(URLType, URL.url_type_id == URLType.id) .where(URLType.name == "homepage") # we're deduplicating on homepage .order_by(URL.created_at.desc()) .all() ) def load_canonical_packages(self, data: list[Canon]) -> None: """ Load canonical packages into the database in batches, handling conflicts. Args: data: List of Canon objects. """ with self.session() as session: for i in range(0, len(data), BATCH_SIZE): batch = data[i : i + BATCH_SIZE] if not batch: continue # Convert batch objects to dictionaries for insert statement insert_data = [ {"id": item.id, "url": item.url, "name": item.name} for item in batch ] stmt = pg_insert(Canon).values(insert_data) stmt = stmt.on_conflict_do_nothing(index_elements=["url"]) if stmt is not None: session.execute(stmt) # log batch_number = (i // BATCH_SIZE) + 1 total_batches = (len(data) + BATCH_SIZE - 1) // BATCH_SIZE self.logger.log( f"Processed Canon batch {batch_number} of {total_batches}" ) session.commit() def load_canonical_package_mappings(self, data: list[CanonPackage]) -> None: """ Load canonical package mappings into the database in batches, updating on conflict. Args: data: List of CanonPackage objects. """ with self.session() as session: for i in range(0, len(data), BATCH_SIZE): batch = data[i : i + BATCH_SIZE] if not batch: continue # Convert batch objects to dictionaries insert_data = [ { "id": item.id, "canon_id": item.canon_id, "package_id": item.package_id, } for item in batch ] stmt = pg_insert(CanonPackage).values(insert_data) update_dict = {"canon_id": stmt.excluded.canon_id} # this is the unique constraint on canon_packages -> if its violated, # that means that the package has changed its URL, and the dedupe # logic has corrected the correct canon for this package stmt = stmt.on_conflict_do_update( index_elements=["package_id"], set_=update_dict ) if stmt is not None: session.execute(stmt) # log batch_number = (i // BATCH_SIZE) + 1 total_batches = (len(data) + BATCH_SIZE - 1) // BATCH_SIZE self.logger.log( f"Processed CanonPackage batch {batch_number} of {total_batches}" ) session.commit() def get_packages(self) -> list[tuple[UUID, UUID]]: """Gets all packages for the run""" self.logger.debug(f"Getting packages for {self.system_pm_ids} package managers") with self.session() as session: return ( session.query(Package.id, Package.package_manager_id) .where(Package.package_manager_id.in_(self.system_pm_ids)) .all() ) def get_dependencies(self, package_id: UUID) -> list[tuple[UUID]]: """Gets all the dependencies based on the CHAI data model""" with self.session() as session: return ( session.query(DependsOn.dependency_id) .join(Version, DependsOn.version_id == Version.id) .join(Package, Version.package_id == Package.id) .filter(Package.id == package_id) .all() ) def get_package_to_canon_mapping(self) -> dict[UUID, UUID]: with self.session() as session: return { canon_package.package_id: canon.id for canon, canon_package in session.query(Canon, CanonPackage) .join(CanonPackage, Canon.id == CanonPackage.canon_id) .join(Package, CanonPackage.package_id == Package.id) .where(Package.package_manager_id != self.legacy_pm_id) } def get_legacy_dependencies(self, package_id: UUID) -> list[tuple[UUID]]: """Gets all the legacy dependencies based on the legacy CHAI data model""" with self.session() as session: return ( session.query(LegacyDependency.dependency_id) .filter(LegacyDependency.package_id == package_id) .filter(LegacyDependency.dependency_id != package_id) .all() ) def load_tea_ranks(self, data: list[TeaRank]) -> None: """Loads tea ranks into the database""" with self.session() as session: session.add_all(data) session.commit() def load_tea_rank_runs(self, data: list[TeaRankRun]) -> None: """Loads tea rank runs into the database""" with self.session() as session: session.add_all(data) session.commit() def get_current_tea_rank_run(self) -> TeaRankRun | None: """Gets the current tea rank run""" with self.session() as session: return ( session.query(TeaRankRun).order_by(TeaRankRun.created_at.desc()).first() ) ================================================ FILE: ranker/dedupe.py ================================================ #!/usr/bin/env uv run --with sqlalchemy==2.0.34 --with permalint==0.1.12 from datetime import datetime from uuid import UUID, uuid4 from permalint import is_canonical_url from sqlalchemy import update from sqlalchemy.orm import Session from core.db import DB from core.logger import Logger from core.models import URL, BaseModel, Canon, CanonPackage, Package, PackageURL from package_managers.crates.structs import ( CanonPackageUpdatePayload, CanonUpdatePayload, ) from ranker.config import DedupeConfig, load_dedupe_config from ranker.naming import compute_canon_name, get_effective_canon_name class DedupeDB(DB): def __init__(self, config: DedupeConfig): super().__init__("ranker.db") self.config: DedupeConfig = config def get_current_canons(self) -> dict[UUID, Canon]: """Get current canons as a mapping from URL ID to Canon object.""" with self.session() as session: canons = session.query(Canon).all() return {canon.url_id: canon for canon in canons} def get_current_canon_packages(self) -> dict[UUID, dict[str, UUID]]: """Get current canon-package mappings as dict[package_id -> canon_id].""" with self.session() as session: canon_packages = session.query(CanonPackage).all() return { cp.package_id: {"id": cp.id, "canon_id": cp.canon_id} for cp in canon_packages } def get_packages_with_homepages(self) -> list[tuple[Package, URL]]: with self.session() as session: return ( session.query(Package, URL) .join(PackageURL, Package.id == PackageURL.package_id) .join(URL, PackageURL.url_id == URL.id) .where(URL.url_type_id == self.config.homepage_url_type_id) .order_by(Package.id, URL.created_at.desc()) # Latest URL / package .all() ) def get_all_package_names(self) -> dict[UUID, str]: with self.session() as session: return {pkg.id: pkg.name for pkg in session.query(Package).all()} # TODO: first to be optimized def ingest( self, new_canons: list[Canon], canon_updates: list[CanonUpdatePayload], new_canon_packages: list[CanonPackage], updated_canon_packages: list[CanonPackageUpdatePayload], ) -> None: with self.session() as session: if new_canons: self.add_with_flush(session, new_canons) if canon_updates: session.execute(update(Canon), canon_updates) if new_canon_packages: self.add_with_flush(session, new_canon_packages) if updated_canon_packages: session.execute(update(CanonPackage), updated_canon_packages) session.commit() def add_with_flush(self, session: Session, rows: list[BaseModel]) -> None: session.add_all(rows) session.flush() def get_latest_homepage_per_package( packages_with_homepages: list[tuple[Package, URL]], logger: Logger ) -> tuple[dict[UUID, URL], list[URL]]: """Get the latest homepage URL for each package.""" latest_homepages: dict[UUID, URL] = {} non_canonical_urls: list[URL] = [] for pkg, url in packages_with_homepages: # Since we ordered by Package.id, URL.created_at.desc(), # the first URL we see for each package is the latest if pkg.id not in latest_homepages: # skip empty or whitespace-only urls if not url.url or url.url.strip() == "": continue # guard against non-canonicalized URLs try: if not is_canonical_url(url.url): non_canonical_urls.append(url) else: latest_homepages[pkg.id] = url except Exception as e: logger.warn(f"Error checking if {url.url} is canonical: {e}") non_canonical_urls.append(url) if non_canonical_urls: logger.warn(f"Found {len(non_canonical_urls)} non-canonicalized URLs in URLs") return latest_homepages, non_canonical_urls def build_canon_update_payload( canon: Canon, new_name: str, now: datetime ) -> CanonUpdatePayload: """Build update payload for a canon with a new name.""" return CanonUpdatePayload(id=canon.id, name=new_name, updated_at=now) def build_canon_package_update_payload( current_canon_packages: dict[UUID, dict[str, UUID]], pkg_id: UUID, new_canon_id: UUID, now: datetime, ) -> CanonPackageUpdatePayload: """Build an update payload for a canon package.""" canon_package_data = current_canon_packages.get(pkg_id) if canon_package_data is None: raise ValueError(f"No canon package mappings for {pkg_id}") current_canon_package_id = canon_package_data.get("id") if current_canon_package_id is None: raise ValueError(f"{pkg_id} has no canon package ID but canon: {new_canon_id}") return CanonPackageUpdatePayload( id=current_canon_package_id, canon_id=new_canon_id, updated_at=now ) def process_deduplication_changes( latest_homepages: dict[UUID, URL], current_canons: dict[UUID, Canon], current_canon_packages: dict[UUID, dict[str, UUID]], name_map: dict[UUID, str], logger: Logger, ) -> tuple[ list[Canon], list[CanonUpdatePayload], list[CanonPackage], list[CanonPackageUpdatePayload], ]: """ Process deduplication changes based on current state. Returns: tuple of (canons_to_create, canons_to_update, mappings_to_create, mappings_to_update) """ now = datetime.now() canons_to_create: dict[UUID, Canon] = {} # indexed by url_id for deduplication canons_to_update: dict[UUID, CanonUpdatePayload] = {} # indexed by canon_id mappings_to_create: list[CanonPackage] = [] mappings_to_update: list[CanonPackageUpdatePayload] = [] for pkg_id, url in latest_homepages.items(): # Check if the URL has an existing canon existing_canon: Canon | None = current_canons.get(url.id) # If no existing canon, check if we're creating one for this URL if existing_canon is None: existing_canon = canons_to_create.get(url.id) existing_canon_id: UUID | None = existing_canon.id if existing_canon else None # Check if the package is already linked to a canon linked_canon_id: UUID | None = current_canon_packages.get(pkg_id, {}).get( "canon_id" ) if existing_canon_id is None: # No canon exists for this URL - create a new one pkg_name = name_map.get(pkg_id) # Compute the name for the new canon name = compute_canon_name(url.url, pkg_name) new_canon = Canon( id=uuid4(), url_id=url.id, name=name, created_at=now, updated_at=now, ) canons_to_create[url.id] = new_canon # Handle package-to-canon mapping if linked_canon_id is None: # Create new canon package mapping new_canon_package = CanonPackage( id=uuid4(), canon_id=new_canon.id, package_id=pkg_id, created_at=now, updated_at=now, ) mappings_to_create.append(new_canon_package) else: # Update existing mapping to point to new canon update_payload = build_canon_package_update_payload( current_canon_packages, pkg_id, new_canon.id, now ) mappings_to_update.append(update_payload) else: # Canon exists - check if name needs updating # Get the most current name (considering pending updates) current_name = get_effective_canon_name(existing_canon, canons_to_update) pkg_name = name_map.get(pkg_id) desired_name = compute_canon_name(url.url, pkg_name, current_name) # Update canon name if it's different if desired_name != current_name: update_payload = build_canon_update_payload( existing_canon, desired_name, now ) canons_to_update[existing_canon.id] = update_payload # Handle package-to-canon mapping if linked_canon_id is None: # Create new canon package mapping new_canon_package = CanonPackage( id=uuid4(), canon_id=existing_canon_id, package_id=pkg_id, created_at=now, updated_at=now, ) mappings_to_create.append(new_canon_package) elif linked_canon_id != existing_canon_id: # Update existing mapping to correct canon update_payload = build_canon_package_update_payload( current_canon_packages, pkg_id, existing_canon_id, now ) mappings_to_update.append(update_payload) # else: mapping is already correct, no action needed return ( list(canons_to_create.values()), list(canons_to_update.values()), mappings_to_create, mappings_to_update, ) def main(config: DedupeConfig, db: DedupeDB): logger = Logger("ranker.dedupe") now = datetime.now() logger.log(f"Starting deduplication process at {now}") # 1. Get current state current_canons: dict[UUID, Canon] = db.get_current_canons() logger.debug(f"Found {len(current_canons)} current canons") current_canon_packages: dict[UUID, dict[str, UUID]] = ( db.get_current_canon_packages() ) logger.debug(f"Found {len(current_canon_packages)} current canon packages") packages_with_homepages: list[tuple[Package, URL]] = ( db.get_packages_with_homepages() ) logger.debug(f"Found {len(packages_with_homepages)} packages with homepages") name_map: dict[UUID, str] = db.get_all_package_names() # 2. Get latest homepage per package latest_homepages, non_canonical_urls = get_latest_homepage_per_package( packages_with_homepages, logger ) logger.debug(f"Found {len(latest_homepages)} packages with latest homepages") # 3. Process changes differentially (canons_to_create, canons_to_update, mappings_to_create, mappings_to_update) = ( process_deduplication_changes( latest_homepages, current_canons, current_canon_packages, name_map, logger ) ) # 4. Apply changes logger.log("-" * 100) logger.log("Changes to apply:") logger.log(f" Canons to create: {len(canons_to_create)}") logger.log(f" Canons to update: {len(canons_to_update)}") logger.log(f" Mappings to create: {len(mappings_to_create)}") logger.log(f" Mappings to update: {len(mappings_to_update)}") logger.log("-" * 100) if not config.load: logger.log("Skipping changes because LOAD is not set") return db.ingest( canons_to_create, canons_to_update, mappings_to_create, mappings_to_update ) logger.log("✅ Deduplication process completed") if non_canonical_urls: logger.warn(f"Found {len(non_canonical_urls)} non-canonical URLs") if __name__ == "__main__": config: DedupeConfig = load_dedupe_config() db: DedupeDB = DedupeDB(config) try: main(config, db) finally: db.close() ================================================ FILE: ranker/main.py ================================================ #! /usr/bin/env pkgx +python@3.11 uv run # /// script # dependencies = [ # "permalint==0.1.12", # "sqlalchemy==2.0.34", # "numpy==2.2.3", # "rustworkx==0.16.0", # "psycopg2-binary==2.9.10", # ] # /// from dataclasses import dataclass from uuid import UUID from core.logger import Logger from core.models import TeaRank, TeaRankRun from ranker.config import Config, DedupeConfig, load_config, load_dedupe_config from ranker.db import GraphDB from ranker.dedupe import DedupeDB from ranker.dedupe import main as dedupe from ranker.rx_graph import CHAI, PackageNode logger = Logger("ranker.main") @dataclass class PackageInfo: id: UUID package_manager_id: UUID def load_graph( config: Config, db: GraphDB, package_to_canon_mapping: dict[UUID, UUID], packages: list[PackageInfo], stop: int | None = None, ) -> CHAI: chai = CHAI() missing: set[tuple[UUID, UUID]] = set() npm_pm_id = config.pm_config.npm_pm_id for i, package in enumerate(packages): # add this package's canon to the graph try: canon_id = package_to_canon_mapping[package.id] except KeyError: missing.add((str(package.id), str(package.package_manager_id))) continue # grab the object from the graph if it exists if canon_id in chai.canon_to_index: node = chai[chai.canon_to_index[canon_id]] else: # otherwise, create a new one node = PackageNode(canon_id=canon_id) node.index = chai.add_node(node) # add the package manager id to the node node.package_manager_ids.append(package.package_manager_id) # now grab its dependencies # there are two cases: legacy CHAI or new CHAI # the db helps us these two distinctions with two different helpers # TODO: eventually, CHAI will be at package to package, so everything will # "get_legacy_dependencies" if package.package_manager_id == npm_pm_id: dependencies = db.get_legacy_dependencies(package.id) else: dependencies = db.get_dependencies(package.id) # for each dependency, add the corresponding canon to the graph # and set the edge for dependency in dependencies: dep = dependency[0] try: dep_canon_id = package_to_canon_mapping[dep] except KeyError: missing.add((str(dep), str(package.package_manager_id))) continue dep_node = PackageNode(canon_id=dep_canon_id) dep_node.index = chai.add_node(dep_node) chai.add_edge(node.index, dep_node.index, {}) if stop is not None and i >= stop: break if i % 1000 == 0: logger.debug(f"Processing package {i+1}/{len(packages)} (ID: {package.id})") logger.log(f"Missing {len(missing)} packages") # TODO: should we save the missing packages? return chai def main(config: Config, db: GraphDB) -> None: # get the map of package_id -> canon_id package_to_canon: dict[UUID, UUID] = db.get_package_to_canon_mapping() logger.log(f"{len(package_to_canon)} package to canon mappings") # get the list of packages packages = [ PackageInfo(id=id, package_manager_id=pm_id) for id, pm_id in db.get_packages() ] logger.log(f"{len(packages)} packages") # load the graph chai = load_graph(config, db, package_to_canon, packages) logger.log(f"CHAI has {len(chai)} nodes and {len(chai.edge_to_index)} edges") # now, I need to generate the personalization vector canons_with_source_types: list[tuple[UUID, list[UUID]]] = [] for idx in chai.node_indexes(): node = chai[idx] canons_with_source_types.append((node.canon_id, node.package_manager_ids)) config.tearank_config.personalize(canons_with_source_types) # generate tea_ranks ranks = chai.distribute( config.tearank_config.personalization, config.tearank_config.split_ratio, config.tearank_config.tol, config.tearank_config.max_iter, ) str_ranks = {str(chai[id].canon_id): f"{rank}" for id, rank in ranks.items()} # Determine the next run ID latest_run = db.get_current_tea_rank_run() current_run = latest_run.run + 1 if latest_run else 1 logger.log(f"Starting TeaRank run number: {current_run}") # Prepare TeaRank objects with the *next* run ID tea_ranks = [ TeaRank(canon_id=UUID(canon_id), tea_rank_run=current_run, rank=rank) for canon_id, rank in str_ranks.items() ] # Load all ranks first db.load_tea_ranks(tea_ranks) # Only after successfully loading ranks, load the corresponding run entry tea_rank_run = TeaRankRun( run=current_run, split_ratio=config.tearank_config.split_ratio ) db.load_tea_rank_runs([tea_rank_run]) logger.log("Done!") if __name__ == "__main__": # first deduplicate dedupe_config: DedupeConfig = load_dedupe_config() dedupe_db: DedupeDB = DedupeDB(dedupe_config) try: dedupe(dedupe_config, dedupe_db) except Exception as e: logger.error(f"Some error occurred when deduplicating: {e}") raise # then rank ranker_config = load_config() ranker_db = GraphDB( ranker_config.pm_config.npm_pm_id, ranker_config.pm_config.system_pm_ids ) try: main(ranker_config, ranker_db) except Exception as e: logger.error(f"Some error occurred when ranking: {e}") raise ================================================ FILE: ranker/naming.py ================================================ #!/usr/bin/env uv run --with permalint==0.1.12 from uuid import UUID from permalint import possible_names from core.models import Canon from package_managers.crates.structs import CanonUpdatePayload def compute_canon_name(url: str, package_name: str, existing_name: str = "") -> str: """ Determines the name of the canon, based on the package name, URL, and canon name Notes: - the logic for determining whether it's an update or not, is left to the caller - this function does not do anything for monorepos - as a fallback, the original package name is always returned """ if not url or not package_name: raise ValueError(f"Missing one of url={url} | package_name={package_name}") best_guess = extract_repo_name_from_url(url) if existing_name: # guard if url == existing_name: return package_name return check_if_better(best_guess, package_name, existing_name) return package_name def check_if_better(best_guess: str, package_name: str, existing_name: str) -> str: """Check if we have a better name than the existing name.""" if best_guess == package_name: # boom, this is the ideal case. the repo and the package share a name! return package_name package_name_score = score_name(package_name, best_guess) existing_name_score = score_name(existing_name, best_guess) if package_name_score > existing_name_score: return package_name return existing_name def extract_repo_name_from_url(url: str) -> str: """ Extract a reasonable name from a URL, typically the repository name. We're trusting permalint's rules for guessing a package's name based on the homepage URL here. Note that the fallback is always to retrieve the full URL name, which will be the only element in the result """ if not url: return url names: list[str] = possible_names(url) if len(names) > 1: return names[1].lower() else: return names[0].lower() def score_name(name: str, best_guess: str) -> int: """ Score a package name based on some rules 1. Prefer shorter, simpler names 2. Prefer names without prefixes/suffixes that suggest forks or variations """ if not name and not best_guess: raise ValueError(f"Missing one of name={name} | guess={best_guess}") score = 0 clean = name.lower() if best_guess in clean: score += 1 # Prefer shorter names score += max(0, 15 - len(clean)) # Penalize scoped packages if clean.startswith("@"): score -= 3 return score def get_effective_canon_name( canon: Canon, pending_updates: dict[UUID, CanonUpdatePayload] ) -> str: """ Get the effective name for a canon, considering both current and pending updates. This ensures we always work with the most up-to-date name when processing multiple canons that might reference each other. """ if canon.id in pending_updates: return pending_updates[canon.id]["name"] return canon.name ================================================ FILE: ranker/requirements.txt ================================================ # This file was autogenerated by uv via the following command: # uv pip compile --group ranker -o ranker/requirements.txt numpy==2.3.0 # via # chai (pyproject.toml:ranker) # rustworkx rustworkx==0.16.0 # via chai (pyproject.toml:ranker) ================================================ FILE: ranker/rx_graph.py ================================================ #!/usr/bin/env pkgx +python@3.11 uv run from collections import defaultdict, deque from dataclasses import dataclass, field from decimal import Decimal from typing import Any from uuid import UUID import rustworkx as rx from core.logger import Logger logger = Logger("ranker.chai_graph") @dataclass class PackageNode: """Note that this is different from PackageInfo in main.py! This is based on canons!""" canon_id: UUID package_manager_ids: list[UUID] = field(default_factory=list) weight: Decimal = field(default_factory=Decimal) index: int = field(default_factory=lambda: -1) class CHAI(rx.PyDiGraph): def __init__(self): super().__init__() self.canon_to_index: dict[UUID, int] = {} self.edge_to_index: dict[tuple[int, int], int] = {} def add_node(self, node: PackageNode) -> int: """Safely add a node to the graph. If exists, return the index""" if node.canon_id not in self.canon_to_index: index = super().add_node(node) self.canon_to_index[node.canon_id] = index return self.canon_to_index[node.canon_id] def add_edge(self, u: int, v: int, edge_data: Any) -> None: """Safely add an edge to the graph. If exists, return the index""" if (u, v) not in self.edge_to_index: index = super().add_edge(u, v, edge_data) self.edge_to_index[(u, v)] = index return self.edge_to_index[(u, v)] def generate_personalization( self, personalization: dict[UUID, Decimal] ) -> dict[int, float]: result = {} for id, weight in personalization.items(): if id not in self.canon_to_index: continue result[self.canon_to_index[id]] = float(weight) return result def pagerank( self, alpha: Decimal, personalization: dict[UUID, Decimal] ) -> rx.CentralityMapping: return rx.pagerank( self, alpha=float(alpha), personalization=self.generate_personalization(personalization), ) def distribute( self, personalization: dict[UUID, Decimal], split_ratio: Decimal, tol: Decimal, max_iter: int = 100, ) -> dict[int, Decimal]: """Distribute values across the graph based on dependencies.""" if not personalization: raise ValueError("Personalization is empty") # Convert personalization to index-based dict result = defaultdict(Decimal) q: deque[tuple[int, Decimal]] = deque() for id, weight in personalization.items(): if id not in self.canon_to_index: logger.log(f"{id} is type {type(id)}") raise ValueError(f"Canon ID {id} not found in CHAI") q.append((self.canon_to_index[id], weight)) iterations: int = 0 while q: iterations += 1 node_id, weight = q.popleft() # Ensure iteration count check happens regardless of other logic if iterations > max_iter: logger.warn(f"Max iterations reached: {max_iter}") break dependencies = self.successors(node_id) num_dependencies = len(dependencies) # If the weight arriving is already below tolerance, or if it's a terminal # node, add the entire weight to the result and stop distributing from # this node in this path. if num_dependencies == 0 or weight < tol: result[node_id] += weight continue # Handle non-terminal nodes with significant weight (weight >= tol) # Calculate the portion of weight the current node keeps. keep = weight * split_ratio # Always add the 'keep' amount to the node's result. # The tolerance check below is only for preventing further distribution # of insignificant amounts, not for deciding if the current node's # share is worth keeping. result[node_id] += keep # Calculate the total amount to be split among dependencies. split = weight - keep # Equivalent to weight * (1 - split_ratio) # Calculate split per dependency. split_per_dep = split / num_dependencies # Use tolerance to gate further distribution: Only queue dependencies # if the amount they would receive individually is significant enough. if split_per_dep >= tol: for dep in dependencies: q.append((dep.index, split_per_dep)) # If split_per_dep < tol, the remaining 'split' amount is effectively # dropped from this distribution path, as it's deemed too small # to continue propagating. This helps prune the calculation. logger.log(f"Iterations: {iterations}. Ranks sum to {sum(result.values()):.9f}") return dict(result) ================================================ FILE: ranker/utils/analyze_ranks.py ================================================ #!/usr/bin/env pkgx +python@3.11 uv run --with pandas --with sqlalchemy """Script to analyze rank data and generate formatted CSV output. Usage: python analyze_ranks.py [--file PATH_TO_RANK_FILE] """ from __future__ import annotations import argparse import json import os from pathlib import Path import pandas as pd from sqlalchemy import create_engine, distinct, func, select from sqlalchemy.orm import Session from core.models import ( Canon, CanonPackage, Package, PackageManager, Source, ) def get_latest_rank_file() -> Path: """Get the path to the latest rank file.""" data_dir = Path("data/ranker/ranks") latest_symlink = data_dir / "latest.json" return latest_symlink.resolve() def get_rank_file(filename: str | None = None) -> Path: """Get the path to the rank file. Args: filename: Optional path to a specific rank file. Returns: Path to the rank file. Raises: FileNotFoundError: If the specified file doesn't exist. """ if filename: file_path = Path(filename) if not file_path.exists(): raise FileNotFoundError(f"Rank file not found: {filename}") return file_path return get_latest_rank_file() def load_rank_data(file_path: Path) -> dict[str, float]: """Load rank data from JSON file.""" with open(file_path) as f: return json.load(f) def get_output_filename(input_path: Path) -> Path: """Generate output filename based on input filename.""" # Extract the rank number from filenames like "ranks_37_0.7" parts = input_path.stem.split("_") rank_num = "_".join(parts[1:]) if len(parts) >= 2 else input_path.stem output_dir = Path("data/ranker/analysis") output_dir.mkdir(parents=True, exist_ok=True) return output_dir / f"formatted_ranks_{rank_num}.csv" def get_package_data(ranks: dict[str, float], db_session: Session) -> pd.DataFrame: """Query database for package information and combine with ranks.""" # Query for package data including URLs and aggregated package info query = ( select( Canon.id.label("canon_id"), Canon.url.label("homepage_url"), Canon.name.label("package_name"), func.array_agg(distinct(Source.type)).label("package_managers"), func.array_agg(distinct(Package.name)).label("package_names"), ) .join(CanonPackage, Canon.id == CanonPackage.canon_id) .join(Package, CanonPackage.package_id == Package.id) .join(PackageManager, Package.package_manager_id == PackageManager.id) .join(Source, PackageManager.source_id == Source.id) .group_by(Canon.id, Canon.url, Canon.name) ) results = pd.DataFrame(db_session.execute(query)) # Convert UUID objects to strings in results DataFrame results["canon_id"] = results["canon_id"].astype(str) # Convert ranks to DataFrame and merge ranks_df = pd.DataFrame.from_dict(ranks, orient="index", columns=["tea_rank"]) ranks_df.index.name = "canon_id" ranks_df.reset_index(inplace=True) # Merge and sort final_df = pd.merge(ranks_df, results, on="canon_id") if final_df.empty: raise ValueError( "No data to process - no matching canon_ids between ranks and database results" ) final_df.sort_values(["tea_rank"], ascending=[False], inplace=True) return final_df[ [ "canon_id", "package_name", "tea_rank", "homepage_url", "package_managers", "package_names", ] ] def parse_args() -> argparse.Namespace: """Parse command-line arguments.""" parser = argparse.ArgumentParser( description="Analyze rank data and generate formatted CSV output" ) parser.add_argument( "--file", type=str, default=None, help="Path to a specific rank file. If not provided, the latest rank file will be used.", ) return parser.parse_args() def main() -> None: """Main function to process rank data and generate CSV.""" # Parse command-line arguments args = parse_args() # Setup database connection engine = create_engine(os.environ["CHAI_DATABASE_URL"]) # Get input and output paths rank_file = get_rank_file(args.file) output_file = get_output_filename(rank_file) print(f"Output will be saved to: {output_file}") # Process data ranks = load_rank_data(rank_file) with Session(engine) as session: result_df = get_package_data(ranks, session) # Save output result_df.to_csv(output_file, index=False) if __name__ == "__main__": main() ================================================ FILE: ranker/utils/parse_log.py ================================================ #!/usr/bin/env pkgx +python@3.11 uv run """ Parse graph run log to calculate processing metrics. This script analyzes a log file to compute: 1. Average time to process 1,000 packages 2. Average packages processed per second Usage: From file: ./parse_log.py log_file From tmux: tmux capture-pane -p | ./parse_log.py """ import re import sys from statistics import mean def parse_log_line(line: str) -> tuple[float, int]: """ Extract timestamp and package count from a log line. Args: line: A line from the log file Returns: Tuple of (timestamp, package_count) """ pattern = r"^(\d+\.\d+): \[graph\.main\]: (\d+):" match = re.match(pattern, line) if match: timestamp = float(match.group(1)) package_count = int(match.group(2)) return timestamp, package_count return None def calculate_metrics(log_lines: list[str]) -> tuple[float, float]: """ Calculate processing metrics from log lines. Args: log_lines: List of log file lines Returns: Tuple of (avg_time_per_1000, packages_per_second) """ data_points = [] previous_timestamp = None previous_count = None for line in log_lines: result = parse_log_line(line) if not result: continue timestamp, count = result if previous_timestamp is not None and previous_count is not None: time_diff = timestamp - previous_timestamp count_diff = count - previous_count # Only process if we're looking at approximately 1000 package difference if 900 <= count_diff <= 1100: data_points.append((time_diff, count_diff)) previous_timestamp = timestamp previous_count = count if not data_points: return 0.0, 0.0 # Calculate average time for processing 1000 packages time_diffs = [time for time, _ in data_points] avg_time_per_1000 = mean(time_diffs) # Calculate average packages per second packages_per_second = 1000 / avg_time_per_1000 return avg_time_per_1000, packages_per_second def main(): """Process the log data and display metrics.""" log_lines = [] # Read from file if specified, otherwise from stdin if len(sys.argv) == 2: log_file = sys.argv[1] try: with open(log_file) as f: log_lines = f.readlines() except OSError as e: print(f"Error reading log file: {e}") sys.exit(1) else: # Read from stdin (for piping from tmux) log_lines = sys.stdin.readlines() if not log_lines: print(f"Usage: {sys.argv[0]} [log_file]") print(f" or: tmux capture-pane -p | {sys.argv[0]}") sys.exit(1) avg_time, pkg_per_second = calculate_metrics(log_lines) print(f"Average time to process 1,000 packages: {avg_time:.2f} seconds") print(f"Average packages processed per second: {pkg_per_second:.2f}") if __name__ == "__main__": main() ================================================ FILE: scripts/chai-legacy-loader/README.md ================================================ # CHAI Legacy Data Loader Tools for loading legacy CHAI data into the current CHAI database framework. > [!NOTE] > This can only be executed if you have access to the Legacy CHAI database. If not, > you can ignore everything inside this folder. ## Requirements - pkgx.sh ## Overview This is a set of utility python scripts to efficiently transfer data from the legacy CHAI database into the current CHAI schema. ## Loader Scripts - `add_package_fields.py`: enriches package data dumps from Legacy CHAI with fields required by CHAI - `copy_dependencies_no_thread.py`: fetches dependency data from `public.sources` for a given package manager and uses psycopg2's `copy_expert` function to load it in batches into CHAI - `add_urls.py`: add urls and package_urls relationships from Legacy CHAI ## Usage 1. Set up environment variables (or use defaults): ```bash export LEGACY_CHAI_DATABASE_URL=credentials_from_itn export CHAI_DATABASE_URL=postgresql://postgres:postgres@localhost:5435/chai ``` 2. Loading packages 1. `psql $LEGACY_CHAI_DATABASE_URL -t -A -F',' -f sql/packages.sql -o /path/to/output.csv` 1. Run `add_package_fields.py /file/from/step/1.csv /path/to/output package_manager_id` to enrich it with additional fields 1. `psql $CHAI_DATABASE_URL -c "CREATE TABLE temp_import (LIKE packages);"` 1. `psql $CHAI_DATABASE_URL -c "\COPY temp_import (id, derived_id, name, package_manager_id, import_id, created_at, updated_at) FROM '/path/to/csv/from/step/2' WITH (FORMAT csv, HEADER true, DELIMITER ',');"` 1. `psql $CHAI_DATABASE_URL -c "INSERT INTO packages SELECT * FROM temp_import ON CONFLICT DO NOTHING;"` 1. `psql $CHAI_DATABASE_URL -c "DROP TABLE temp_import;"` 3. Loading dependencies With pkgx, just invoking the script from the root directory of chai ```bash cd ../.. PYTHONPATH=. copy_dependencies_no_thread.py ``` 4. Loading URLs 1. Run [urls.sql](sql/urls.sql), which generates a csv 1. Run `batch_insert_urls.py /path/to/step/1 -d` to insert the raw URLs, and get a dump of the loaded IDs and the URL 1. Run `batch_insert_package_urls.py /path/to/step/1 --urls /path/to/step/2` to insert the package_url relationships. If no cache is provided, it'll try to read all loaded URLs and their IDs from the db (long) ```bash pkgx psql -h localhost -U gardener -p 5430 temp_chai < dev_chai_fixed.sql ``` ================================================ FILE: scripts/chai-legacy-loader/add_package_fields.py ================================================ #!/usr/bin/env pkgx +python@3.11 uv run """ For a csv generated from legacy chai, this script adds the id, created_at, and updated_at fields to the csv. The input CSV must have a header row: "derived_id,name,import_id". The package_manager argument must be a valid UUID. Usage: chmod +x add-package-fields.py ./add-package-fields.py input.csv output.csv """ import csv import sys import uuid from datetime import UTC, datetime def validate_uuid(uuid_string: str) -> None: """Raises ValueError if the string is not a valid UUID.""" try: uuid.UUID(uuid_string) except ValueError as exc: raise ValueError(f"Invalid UUID format: {uuid_string}") from exc def process_csv(input_file: str, output_file: str, package_manager_id: str) -> None: """ Processes the input CSV, validates headers, adds new fields, and writes to the output CSV. Args: input_file: Path to the input CSV file. output_file: Path to the output CSV file. package_manager_id: The UUID of the package manager. Raises: ValueError: If the input CSV header is missing or incorrect. """ now = datetime.now(UTC).isoformat() expected_header: list[str] = ["derived_id", "name", "import_id"] output_header: list[str] = [ "id", "derived_id", "name", "package_manager_id", "import_id", "created_at", "updated_at", ] with ( open(input_file, newline="") as infile, open(output_file, "w", newline="") as outfile, ): reader: csv._reader = csv.reader(infile) writer: csv._writer = csv.writer(outfile) # 1. Validate header row header: list[str] | None = next(reader, None) if header is None: raise ValueError(f"Input file '{input_file}' is missing a header row.") if header != expected_header: raise ValueError( f"Input file '{input_file}' header mismatch. " f"Expected: {expected_header}, Got: {header}" ) # Write output header writer.writerow(output_header) # Process data rows row_count = 0 for row in reader: if len(row) != len(expected_header): msg = f"Warning: Skipping row {reader.line_num} due to incorrect \ column count ({len(row)} instead of {len(expected_header)}): {row}" print(msg, file=sys.stderr) continue row_uuid: str = str(uuid.uuid4()) derived_id, name, import_id = row output_row: list[str] = [ row_uuid, derived_id, name, package_manager_id, import_id, now, now, ] writer.writerow(output_row) row_count += 1 print(f"Processed {row_count} rows from {input_file} -> {output_file}") if __name__ == "__main__": if len(sys.argv) != 4: print( f"Usage: {sys.argv[0]} input.csv output.csv ", file=sys.stderr, ) sys.exit(1) input_csv_path: str = sys.argv[1] output_csv_path: str = sys.argv[2] pm_uuid: str = sys.argv[3] try: # 6. Validate package_manager argument is a UUID validate_uuid(pm_uuid) process_csv(input_csv_path, output_csv_path, pm_uuid) except FileNotFoundError as e: print(f"Error: Input file not found - {e}", file=sys.stderr) sys.exit(1) except ValueError as e: print(f"Error: {e}", file=sys.stderr) sys.exit(1) except Exception as e: print(f"An unexpected error occurred: {e}", file=sys.stderr) sys.exit(1) ================================================ FILE: scripts/chai-legacy-loader/batch_insert_package_urls.py ================================================ #!/usr/bin/env pkgx +python@3.11 uv run --with psycopg2==2.9.9 import argparse import csv import os import uuid from datetime import datetime import psycopg2 import psycopg2.extras from core.config import Config, PackageManager from core.logger import Logger CHAI_DATABASE_URL = os.environ.get("CHAI_DATABASE_URL") DEFAULT_BATCH_SIZE = 20000 class ChaiPackageUrlsDB: """Handles DB interactions for batch package_urls insertion.""" def __init__(self, logger: Logger): self.logger = logger if not CHAI_DATABASE_URL: self.logger.error("CHAI_DATABASE_URL environment variable not set.") raise ValueError("CHAI_DATABASE_URL not set") self.conn = None self.cursor = None try: self.conn = psycopg2.connect(CHAI_DATABASE_URL) self.cursor = self.conn.cursor() self.logger.log("CHAI database connection established for PackageUrlsDB") except psycopg2.Error as e: self.logger.error(f"PackageUrlsDB connection error: {e}") raise def load_package_id_cache(self) -> dict[str, uuid.UUID]: """Load all packages (import_id -> id) into a cache.""" self.logger.log("Loading package_id cache from database...") query = "SELECT import_id, id FROM packages" try: self.cursor.execute(query) cache = {str(row[0]): row[1] for row in self.cursor.fetchall() if row[0]} self.logger.log(f"Loaded {len(cache)} packages into package_id cache.") return cache except psycopg2.Error as e: self.logger.error(f"Error loading package_id cache: {e}") raise def load_url_id_cache_from_db( self, ) -> dict[tuple[str, uuid.UUID], uuid.UUID]: """Load all URLs ( (url, url_type_id) -> id ) into a cache from DB.""" self.logger.log("Loading url_id cache from database (fallback)...") query = "SELECT id, url, url_type_id FROM urls" cache: dict[tuple[str, uuid.UUID], uuid.UUID] = {} try: self.cursor.execute(query) for row in self.cursor.fetchall(): url_id, url_str, url_type_id = row[0], row[1], row[2] if url_str and url_type_id: cache[(url_str, url_type_id)] = url_id self.logger.log(f"Loaded {len(cache)} URLs into url_id cache from DB.") return cache except psycopg2.Error as e: self.logger.error(f"Error loading url_id cache from DB: {e}") raise def batch_insert_package_urls( self, data_tuples: list[tuple[uuid.UUID, uuid.UUID, uuid.UUID, datetime, datetime]], ) -> None: """Batch insert into package_urls table.""" if not data_tuples: return query = """ INSERT INTO package_urls (id, package_id, url_id, created_at, updated_at) VALUES %s ON CONFLICT (package_id, url_id) DO UPDATE SET updated_at = EXCLUDED.updated_at """ try: psycopg2.extras.execute_values( self.cursor, query, data_tuples, page_size=len(data_tuples) ) self.conn.commit() self.logger.log( f"Successfully inserted/updated {len(data_tuples)} package_urls" ) except psycopg2.Error as e: self.logger.error(f"Error during batch insert into package_urls: {e}") self.logger.log( f"Failed data sample: {data_tuples[0] if data_tuples else 'N/A'}" ) self.conn.rollback() raise except Exception as e: self.logger.error(f"Unexpected error during package_urls batch insert: {e}") self.conn.rollback() raise def close(self): if self.cursor: self.cursor.close() if self.conn: self.conn.close() self.logger.log("PackageUrlsDB connection closed.") def load_url_id_cache_from_file( cache_file_path: str, logger: Logger ) -> dict[tuple[str, uuid.UUID], uuid.UUID]: """Load URL ID cache from the CSV file generated by batch_insert_urls.py.""" logger.log(f"Loading url_id cache from file: {cache_file_path}...") cache: dict[tuple[str, uuid.UUID], uuid.UUID] = {} try: with open(cache_file_path, newline="", encoding="utf-8") as csvfile: reader = csv.reader(csvfile) header = next(reader, None) # Skip header if not header or header != ["id", "url", "url_type_id"]: logger.error( f"Invalid or missing header in URL cache file: {cache_file_path}. Expected ['id', 'url', 'url_type_id']" ) raise ValueError("Invalid URL cache file format") for i, row in enumerate(reader): if len(row) == 3: try: url_id_str, url_str, url_type_id_str = row[0], row[1], row[2] if url_str and url_type_id_str: # Ensure no empty strings cache[(url_str, uuid.UUID(url_type_id_str))] = uuid.UUID( url_id_str ) except ValueError as ve: logger.warn( f"Invalid UUID in URL cache file at row {i+2}: {row} - {ve}" ) continue else: logger.warn( f"Skipping malformed row in URL cache file at row {i+2}: {row}" ) logger.log(f"Loaded {len(cache)} URLs into url_id cache from file.") return cache except FileNotFoundError: logger.error(f"URL cache file not found: {cache_file_path}") raise except Exception as e: logger.error(f"Error loading URL cache file {cache_file_path}: {e}") raise def process_package_url_associations( input_csv_path: str, batch_size: int, script_execution_time: datetime, url_cache_csv_path: str | None, stop_at: int | None, main_logger: Logger, ) -> None: """Main processing logic for associating packages with URLs.""" main_logger.log(f"Starting package-URL association for: {input_csv_path}") main_logger.log( f"Batch size: {batch_size}, URL cache: {url_cache_csv_path}, Stop at: {stop_at}" ) try: config = Config(PackageManager.NPM) url_type_homepage_id = config.url_types.homepage url_type_source_id = config.url_types.source except Exception as e: main_logger.error(f"Error initializing config: {e}") return db_handler = None package_id_cache: dict[str, uuid.UUID] = {} url_id_cache: dict[tuple[str, uuid.UUID], uuid.UUID] = {} try: db_handler = ChaiPackageUrlsDB(main_logger) package_id_cache = db_handler.load_package_id_cache() if url_cache_csv_path: url_id_cache = load_url_id_cache_from_file(url_cache_csv_path, main_logger) else: main_logger.log( "No URL cache file provided, loading all URLs from database..." ) url_id_cache = db_handler.load_url_id_cache_from_db() except Exception as e: main_logger.error(f"Failed during setup (DB or cache loading): {e}") if db_handler: db_handler.close() return package_urls_to_insert: list[ tuple[uuid.UUID, uuid.UUID, uuid.UUID, datetime, datetime] ] = [] processed_csv_rows = 0 total_associations_prepared = 0 processed_pairs: set[tuple[uuid.UUID, uuid.UUID]] = ( set() ) # To avoid duplicates in a single batch try: with open(input_csv_path, newline="", encoding="utf-8") as infile: reader = csv.reader(infile) header = next(reader, None) if not header: main_logger.warn( f"Input CSV file {input_csv_path} is empty or has no header." ) return main_logger.log(f"Input CSV Header: {header}") for row_num, row in enumerate(reader): processed_csv_rows += 1 current_csv_line = row_num + 2 # 1 for header, 1 for 0-indexing if not (len(row) >= 3): main_logger.warn( f"Skipping row {current_csv_line} (length < 3): {row}" ) continue import_id, source_url_str, homepage_url_str = row[0], row[1], row[2] if not import_id: main_logger.warn( f"Skipping row {current_csv_line} due to missing import_id: {row}" ) continue package_id = package_id_cache.get(import_id) if not package_id: # We didn't load all the packages from ITN, so this is expected continue urls_to_link = [] if source_url_str and source_url_str.lower() != "null": source_key = (source_url_str.strip(), url_type_source_id) source_url_id = url_id_cache.get(source_key) if source_url_id: urls_to_link.append(source_url_id) else: main_logger.warn( f"Source URL for import_id '{import_id}' not found in URL cache: '{source_url_str}' (row {current_csv_line})" ) if homepage_url_str and homepage_url_str.lower() != "null": homepage_key = (homepage_url_str.strip(), url_type_homepage_id) homepage_url_id = url_id_cache.get(homepage_key) if homepage_url_id: urls_to_link.append(homepage_url_id) else: main_logger.warn( f"Homepage URL for import_id '{import_id}' not found in URL cache: '{homepage_url_str}' (row {current_csv_line})" ) for url_id_to_link in urls_to_link: if (package_id, url_id_to_link) not in processed_pairs: package_urls_to_insert.append( ( uuid.uuid4(), package_id, url_id_to_link, script_execution_time, script_execution_time, ) ) processed_pairs.add((package_id, url_id_to_link)) total_associations_prepared += 1 if len(package_urls_to_insert) >= batch_size: db_handler.batch_insert_package_urls(package_urls_to_insert) package_urls_to_insert = [] processed_pairs.clear() # Clear after batch insert main_logger.log( f"Processed batch. CSV rows: {processed_csv_rows}, Associations: {total_associations_prepared}" ) if stop_at and processed_csv_rows >= stop_at: main_logger.log(f"Reached stop limit of {stop_at} CSV rows.") break if package_urls_to_insert: # Process remaining db_handler.batch_insert_package_urls(package_urls_to_insert) main_logger.log( f"Processed final batch. CSV rows: {processed_csv_rows}, Associations: {total_associations_prepared}" ) main_logger.log( f"Package-URL association processing complete. Total CSV rows: {processed_csv_rows}. Associations prepared: {total_associations_prepared}." ) except FileNotFoundError: main_logger.error(f"Input CSV file not found: {input_csv_path}") except csv.Error as e: main_logger.error( f"CSV reading error in {input_csv_path} near line {reader.line_num if 'reader' in locals() else 'unknown'}: {e}" ) except psycopg2.Error as e: main_logger.error(f"A database error occurred: {e}") main_logger.exception() except Exception as e: main_logger.error(f"An unexpected error occurred: {e}") main_logger.exception() finally: if db_handler: db_handler.close() if __name__ == "__main__": parser = argparse.ArgumentParser( description="Batch insert package-URL relationships from a CSV file." ) parser.add_argument( "file_path", help="Path to the input CSV file (import_id, source_url, homepage_url).", ) parser.add_argument( "--cache", metavar="URL_CACHE_CSV_PATH", help="Optional path to the CSV file containing URL IDs (output of batch_insert_urls.py).", ) parser.add_argument( "--batch-size", "-b", type=int, default=DEFAULT_BATCH_SIZE, help=f"Number of records to insert per batch (default: {DEFAULT_BATCH_SIZE}).", ) parser.add_argument( "--stop", "-s", type=int, help="Optional: stop processing after this many CSV rows.", ) args = parser.parse_args() script_start_time = datetime.now() logger = Logger("main_pkg_url_assoc_loader") logger.log(f"Script started at {script_start_time.isoformat()}") process_package_url_associations( input_csv_path=args.file_path, batch_size=args.batch_size, script_execution_time=script_start_time, url_cache_csv_path=args.cache, stop_at=args.stop, main_logger=logger, ) logger.log( f"Script finished. Total execution time: {datetime.now() - script_start_time}" ) ================================================ FILE: scripts/chai-legacy-loader/batch_insert_urls.py ================================================ #!/usr/bin/env pkgx +python@3.11 uv run --with psycopg2==2.9.9 import argparse import csv import os import uuid from datetime import datetime import psycopg2 import psycopg2.extras from core.config import Config, PackageManager from core.logger import Logger CHAI_DATABASE_URL = os.environ.get("CHAI_DATABASE_URL") DEFAULT_BATCH_SIZE = 20000 OUTPUT_CSV_FILENAME = "inserted_urls.csv" class ChaiDB: """Handles interactions with the CHAI database for batch URL insertion.""" def __init__(self): """Initialize connection to the CHAI database.""" self.logger = Logger("batch_url_db") if not CHAI_DATABASE_URL: self.logger.error("CHAI_DATABASE_URL environment variable not set.") raise ValueError("CHAI_DATABASE_URL not set") self.conn = None self.cursor = None try: self.conn = psycopg2.connect(CHAI_DATABASE_URL) self.cursor = self.conn.cursor() self.logger.log("CHAI database connection established") except psycopg2.Error as e: self.logger.error(f"Database connection error: {e}") raise def batch_insert_urls( self, url_data_tuples: list[tuple[str, uuid.UUID, datetime, datetime]], dump_output: bool, ) -> list[tuple[uuid.UUID, str, uuid.UUID]] | None: """ Batch insert URLs into the database. Args: url_data_tuples: A list of tuples, each containing (url, url_type_id, created_at_ts, updated_at_ts). dump_output: If True, return the inserted/updated rows. Returns: A list of (id, url, url_type_id) tuples if dump_output is True, else None. """ if not url_data_tuples: return [] if dump_output else None query_base = """ INSERT INTO urls (url, url_type_id, created_at, updated_at) VALUES %s ON CONFLICT (url_type_id, url) DO UPDATE SET updated_at = EXCLUDED.updated_at """ if dump_output: query = query_base + " RETURNING id, url, url_type_id" else: query = query_base try: psycopg2.extras.execute_values( self.cursor, query, url_data_tuples, page_size=len(url_data_tuples) ) self.conn.commit() self.logger.log( f"Successfully inserted/updated {len(url_data_tuples)} URL records." ) if dump_output: return self.cursor.fetchall() return None except psycopg2.Error as e: self.logger.error(f"Error during batch insert: {e}") self.logger.log(url_data_tuples) self.conn.rollback() raise e except Exception as e: self.logger.error(f"An unexpected error occurred during batch insert: {e}") self.conn.rollback() raise e def close(self): """Close the database connection.""" if self.cursor: self.cursor.close() if self.conn: self.conn.close() self.logger.log("CHAI database connection closed") def process_urls_for_batch_insert( file_path: str, batch_size: int, script_execution_time: datetime, dump_output: bool, stop_at: int | None = None, ) -> None: """ Reads URLs from a CSV file, prepares them, and batch inserts them into the database. Args: file_path: Path to the input CSV file. batch_size: Number of records to insert per batch. script_execution_time: Timestamp for created_at/updated_at. dump_output: Whether to dump inserted data to a CSV file. stop_at: Optional number of CSV rows to process. """ logger = Logger("url_batch_processor") logger.log(f"Starting URL batch processing for file: {file_path}") logger.log( f"Batch size: {batch_size}, Dump output: {dump_output}, Stop at: {stop_at}" ) cache: set[tuple[str, uuid.UUID]] = set() try: config = Config(PackageManager.NPM) url_type_homepage_id = config.url_types.homepage url_type_source_id = config.url_types.source except AttributeError as e: logger.error( f"Could not load URL types from config. Ensure DB contains these types: {e}" ) return except Exception as e: logger.error(f"Error initializing config: {e}") return chai_db = None try: chai_db = ChaiDB() except Exception as e: logger.error(f"Failed to initialize ChaiDB: {e}") return # Exit if DB connection fails url_data_to_insert: list[tuple[str, uuid.UUID, datetime, datetime]] = [] all_inserted_data_for_dump: list[tuple[uuid.UUID, str, uuid.UUID]] = [] processed_csv_rows = 0 total_urls_prepared = 0 try: with open(file_path, newline="", encoding="utf-8") as csvfile: reader = csv.reader(csvfile) header = next(reader, None) # Skip header if not header: logger.warn("CSV file is empty or has no header.") return logger.log(f"CSV Header: {header}") # Log the header for context for row in reader: processed_csv_rows += 1 if not (len(row) >= 3): logger.warn(f">3 cols at L{processed_csv_rows + 1}: {row}") continue # Assuming import_id is row[0], source is row[1], homepage is row[2] # set the source data source_url = row[1].strip() if row[1] else None source_data = (source_url, url_type_source_id) # set the homepage data homepage_url = row[2].strip() if row[2] else None homepage_data = (homepage_url, url_type_homepage_id) # add to url_data_to_insert if valid and not in cache # also, update the cache urls_to_process = [] if ( source_url and source_url.lower() != "null" and source_data not in cache ): urls_to_process.append(source_data) cache.add(source_data) if ( homepage_url and homepage_url.lower() != "null" and homepage_data not in cache ): urls_to_process.append(homepage_data) cache.add(homepage_data) for url_str, url_type_id in urls_to_process: url_data_to_insert.append( ( url_str, url_type_id, script_execution_time, script_execution_time, ) ) total_urls_prepared += 1 # insert the data in batches if len(url_data_to_insert) >= batch_size: results = chai_db.batch_insert_urls(url_data_to_insert, dump_output) if dump_output and results: all_inserted_data_for_dump.extend(results) url_data_to_insert = [] logger.log( f"Processed batch. Total CSV rows read: {processed_csv_rows}, Total URLs prepared: {total_urls_prepared}" ) if stop_at and processed_csv_rows >= stop_at: logger.log(f"Reached stop limit of {stop_at} CSV rows.") break # Process any remaining URLs in the buffer if url_data_to_insert: results = chai_db.batch_insert_urls(url_data_to_insert, dump_output) if dump_output and results: all_inserted_data_for_dump.extend(results) logger.log( f"Processed final batch. Total CSV rows read: {processed_csv_rows}, Total URLs prepared: {total_urls_prepared}" ) if dump_output: with open( OUTPUT_CSV_FILENAME, "w", newline="", encoding="utf-8" ) as outfile: writer = csv.writer(outfile) writer.writerow(["id", "url", "url_type_id"]) # Header for output CSV writer.writerows(all_inserted_data_for_dump) logger.log( f"Dumped {len(all_inserted_data_for_dump)} records to {OUTPUT_CSV_FILENAME}" ) logger.log( f"URL batch processing complete. Total CSV rows processed: {processed_csv_rows}. Total URLs prepared/processed: {total_urls_prepared}." ) except FileNotFoundError: logger.error(f"Input CSV file not found: {file_path}") except csv.Error as e: logger.error( f"CSV reading error in {file_path} near line {reader.line_num}: {e}" ) except psycopg2.Error as e: logger.error(f"A database error occurred: {e}") logger.exception() except Exception as e: logger.error(f"An unexpected error occurred: {e}") logger.exception() finally: if chai_db: chai_db.close() if __name__ == "__main__": parser = argparse.ArgumentParser( description="Batch insert URLs from a CSV file into the CHAI database." ) parser.add_argument("file_path", help="Path to the input CSV file (e.g., data.csv)") parser.add_argument( "--batch-size", "-b", type=int, default=DEFAULT_BATCH_SIZE, help=f"Number of records to insert per batch (default: {DEFAULT_BATCH_SIZE})", ) parser.add_argument( "--stop", "-s", type=int, help="Optional: stop processing after this many CSV rows.", ) parser.add_argument( "--dump-output", "-d", action="store_true", help=f"If set, dump all inserted/updated (id, url, url_type_id) to {OUTPUT_CSV_FILENAME}", ) args = parser.parse_args() script_start_time = datetime.now() main_logger = Logger("main_batch_url_loader") main_logger.log(f"Script started at {script_start_time.isoformat()}") process_urls_for_batch_insert( file_path=args.file_path, batch_size=args.batch_size, script_execution_time=script_start_time, # Use a consistent time for the whole run dump_output=args.dump_output, stop_at=args.stop, ) main_logger.log( f"Script finished. Total execution time: {datetime.now() - script_start_time}" ) ================================================ FILE: scripts/chai-legacy-loader/copy_dependencies_no_thread.py ================================================ #!/usr/bin/env pkgx +python@3.11 uv run import argparse import io import os import uuid import psycopg2 import psycopg2.errors from core.config import Config, PackageManager from core.logger import Logger LEGACY_CHAI_DATABASE_URL = os.environ.get("LEGACY_CHAI_DATABASE_URL") CHAI_DATABASE_URL = os.environ.get("CHAI_DATABASE_URL") BATCH_SIZE = 20000 LEGACY_CHAI_PACKAGE_MANAGER_MAP: dict[PackageManager, str] = { PackageManager.NPM: "npm", PackageManager.CRATES: "crates", PackageManager.HOMEBREW: "brew", PackageManager.DEBIAN: "apt", PackageManager.PKGX: "pkgx", } class LegacyDB: """Handles all interactions with the legacy CHAI database.""" def __init__(self, input_package_manager: PackageManager): """Initialize connection to the legacy database.""" self.conn = psycopg2.connect(LEGACY_CHAI_DATABASE_URL) # Set autocommit to False for server-side cursors self.conn.set_session(autocommit=False) self.logger = Logger("legacy_db") self.logger.debug("Legacy database connection established") self.package_manager_name = LEGACY_CHAI_PACKAGE_MANAGER_MAP[ input_package_manager ] def __del__(self): """Close connection when object is destroyed.""" if hasattr(self, "conn") and self.conn: self.conn.close() def get_sql_content(self, filename: str) -> str: """Load SQL content from a file.""" sql_file_path = os.path.join( os.path.dirname(os.path.abspath(__file__)), "sql", filename ) with open(sql_file_path) as f: return f.read() def create_server_cursor(self, sql_file: str, cursor_name: str) -> None: """Create a server-side cursor for efficient data fetching. Inputs: sql_file: The name of the SQL file to load cursor_name: The name of the cursor to create package_manager_name: The name of the package manager whose legacy data we are fetching """ query = self.get_sql_content(sql_file) # substitute $1 with self.package_manager_name query = query.replace("$1", f"'{self.package_manager_name}'") self.logger.debug(f"Query: {query}") # create a named server side cursor for retrieving data declare_stmt = f"DECLARE {cursor_name} CURSOR FOR {query}" # create a cursor to execute the declare statement with self.conn.cursor() as cursor: cursor.execute(declare_stmt) self.logger.debug( f"Created server-side cursor '{cursor_name}' for {sql_file}" ) def fetch_batch(self, cursor_name: str, batch_size: int) -> list[tuple]: """Fetch a batch of records using the server-side cursor.""" cursor = self.conn.cursor() cursor.execute(f"FETCH {batch_size} FROM {cursor_name}") batch = cursor.fetchall() self.logger.log(f"Fetched {len(batch)} records from cursor '{cursor_name}'") cursor.close() return batch def close_cursor(self, cursor_name: str) -> None: """Close a server-side cursor.""" cursor = self.conn.cursor() cursor.execute(f"CLOSE {cursor_name}") self.logger.log(f"Closed server-side cursor '{cursor_name}'") cursor.close() class ChaiDB: """Handles all interactions with the CHAI database.""" def __init__(self, config: Config): """Initialize connection to the CHAI database.""" self.logger = Logger("chai_db") self.config = config # connect to the database self.conn = psycopg2.connect(CHAI_DATABASE_URL) # Use autocommit=False for server-side cursors if needed within a transaction # self.conn.set_session(autocommit=False) self.logger.debug("CHAI database connection established") # create the cursor for general operations self.cursor = self.conn.cursor() self.logger.debug("CHAI database cursor created") # configure some variables self.legacy_dependency_columns = [ "package_id", "dependency_id", # the below two are not available from the sources table in the legacy db # assuming everything is a runtime dependency and use the semver range * "dependency_type_id", "semver_range", ] # initialize package map self.package_map = self._get_package_map() self.logger.debug( f"{len(self.package_map)} {self.config.pm_config.package_manager} packages in CHAI" ) # Load existing legacy dependencies to avoid duplicates self.processed_pairs = set() self._load_existing_dependencies() def _get_package_map(self) -> dict[str, uuid.UUID]: """Get a map of package import_ids to their UUIDs for the configured package manager""" query = """SELECT import_id, id FROM packages WHERE package_manager_id = %(pm_id)s AND import_id IS NOT NULL""" self.cursor.execute(query, {"pm_id": self.config.pm_config.pm_id}) rows = self.cursor.fetchall() # check that we actually loaded packages for the specified manager if len(rows) == 0: raise ValueError( f"{self.config.pm_config.package_manager} packages not found in DB" ) return {row[0]: row[1] for row in rows} def _load_existing_dependencies(self, batch_size: int = BATCH_SIZE) -> None: """ Loads existing (package_id, dependency_id) pairs from the legacy_dependencies table into self.processed_pairs using a server-side cursor to handle potentially large datasets efficiently. """ self.logger.log("Loading existing legacy dependencies...") query = "SELECT package_id, dependency_id FROM legacy_dependencies" cursor_name = "existing_deps_cursor" total_loaded = 0 # Use a transaction context for the server-side cursor with self.conn, self.conn.cursor(name=cursor_name) as named_cursor: named_cursor.execute(query) while True: batch = named_cursor.fetchmany(batch_size) if not batch: break # Convert batch of tuples to set for efficient update self.processed_pairs.update(batch) total_loaded += len(batch) if total_loaded % (batch_size * 20000) == 0: self.logger.debug( f"Loaded {total_loaded} existing dependency pairs..." ) self.logger.log( f"Finished loading {total_loaded} existing dependency pairs into memory." ) def init_copy_expert(self) -> None: """Initialize a StringIO object to collect CSV data for copy operation""" self.csv_data = io.StringIO() self.columns_str = ", ".join(self.legacy_dependency_columns) self.logger.debug("Copy buffer initialized") def add_rows_to_copy_expert(self, rows: list[tuple]) -> int: """Add rows to the StringIO buffer for later COPY operation""" rows_added = 0 for row in rows: package_id = self.package_map.get(row[0]) dependency_id = self.package_map.get(row[1]) # if package or dependency are not found, skip the row if not package_id or not dependency_id: # skipping because maybe the package or dependency is # not in legacy chai # marked as spam continue # if the pair has already been processed, skip the row if (package_id, dependency_id) in self.processed_pairs: continue # add the pair to the processed pairs self.processed_pairs.add((package_id, dependency_id)) # get the dependency type and semver range # not available from the sources table in the legacy db # assume everything is a runtime dependency, and use the semver range * dependency_type_id = self.config.dependency_types.runtime semver_range = "*" csv_line = ( f"{package_id},{dependency_id},{dependency_type_id},{semver_range}" ) self.csv_data.write(csv_line + "\n") rows_added += 1 return rows_added def add_rows_with_flush(self, rows: list[tuple], max_buffer_size=100000) -> int: """Add rows to the StringIO buffer for later COPY operation""" rows_added = self.add_rows_to_copy_expert(rows) self.logger.log(f"Added {rows_added} rows to the copy expert") # if the buffer is too large, flush it if self.csv_data.tell() > max_buffer_size: self.complete_copy_expert() # reinitialize the buffer self.init_copy_expert() return rows_added def complete_copy_expert(self): """Execute the COPY operation with collected data""" # Reset buffer position to start self.csv_data.seek(0) # Execute the COPY FROM operation try: self.cursor.copy_expert( f"COPY legacy_dependencies ({self.columns_str}) FROM STDIN WITH CSV", self.csv_data, ) self.conn.commit() self.logger.log(f"{len(self.processed_pairs)} total rows copied") except psycopg2.errors.BadCopyFileFormat as e: self.logger.log(f"Error copying data to database: {e}") # write the csv data to a file with open("bad_copy_file.csv", "w") as f: f.write(self.csv_data.getvalue()) self.conn.rollback() raise e def main( logger: Logger, config: Config, input_package_manager: PackageManager, stop: int | None, ) -> None: legacy_db = LegacyDB(input_package_manager) chai_db = ChaiDB(config) # initialize the copy expert chai_db.init_copy_expert() # set up the legacy db cursor_name = "legacy_dependencies_cursor" legacy_db.create_server_cursor("dependencies.sql", cursor_name) logger.log("Starting dependency loop process") total_rows = 0 try: while True: rows = legacy_db.fetch_batch(cursor_name, BATCH_SIZE) # break if we have no more rows if not rows: break # keep adding the rows to the copy expert rows_added = chai_db.add_rows_with_flush(rows) # update the total rows processed total_rows += rows_added # break if we have processed the stop number of rows if stop and total_rows >= stop: break # complete the copy expert logger.log("Completing copy expert for the last batch") chai_db.complete_copy_expert() except KeyboardInterrupt: logger.log("Keyboard interrupt detected") chai_db.complete_copy_expert() logger.log(f"Total rows processed: {total_rows}") finally: logger.log(f"Total rows processed: {total_rows}") legacy_db.close_cursor(cursor_name) legacy_db.conn.close() chai_db.cursor.close() chai_db.conn.close() logger.log("Database connections closed") if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--package-manager", type=PackageManager, choices=list(PackageManager), required=True, ) parser.add_argument( "--stop", type=int, default=None, help="Stop after processing a certain number of rows", ) args = parser.parse_args() input_package_manager: PackageManager = args.package_manager stop: int | None = args.stop logger = Logger("chai_legacy_loader") config = Config(input_package_manager) logger.log(f"Importing legacy dependencies for {args.package_manager}") main( logger, config, input_package_manager, stop, ) ================================================ FILE: scripts/chai-legacy-loader/pkgx.yaml ================================================ dependencies: - python@3.11 - postgresql.org@16 - astral.sh/uv ================================================ FILE: scripts/chai-legacy-loader/sql/dependencies.sql ================================================ -- from old CHAI's structure, the sources table stores dependencies from package to -- package -- the projects tables stores the package managers themselves, which is where we apply -- the where clause select s.start_id, s.end_id from public.sources s join public.projects p on s.start_id = p.id and $1 = any(p.package_managers) ; ================================================ FILE: scripts/chai-legacy-loader/sql/packages.sql ================================================ -- TODO: swap npm for $1, and update the scripts select concat('npm', '/', project_name) as "derived_id", project_name as "name", id as "import_id" from projects where 'npm' = any(package_managers) and created_at < '2024-01-01'::timestamp -- before ITN and is_spam is false -- use legacy spam filter ; ================================================ FILE: scripts/chai-legacy-loader/sql/urls.sql ================================================ select id as import_id, "source", homepage from projects where 'npm' = any(package_managers) and created_at < '2024-01-01'::timestamp -- before ITN and is_spam is false -- use legacy spam filter ================================================ FILE: scripts/npm-singleton/README.md ================================================ # NPM Singleton Package Loader A utility script for loading a single NPM package and its metadata into the CHAI database. ## Purpose This script allows you to: 1. Check if an NPM package exists in the CHAI database 2. Fetch package metadata from the NPM registry 3. Verify package URLs (homepage, repository, source) 4. Check dependencies and their existence in CHAI 5. Add the package to the CHAI database ## Usage 1. You must either run this script from the project root directory or specify `PYTHONPATH` to point to the root directory, since it imports modules from the `core` library. 2. You must also specify a `CHAI_DATABASE_URL` string ### Method 1: Using pkgx (recommended) ```bash # Make the script executable chmod +x scripts/npm-singleton/single.py # Run with PYTHONPATH set PYTHONPATH=. scripts/npm-singleton/single.py [--dry-run] ``` ### Method 2: Using [uv](https://github.com/astral-sh/uv) directly ```bash PYTHONPATH=. uv run scripts/npm-singleton/single.py [--dry-run] ``` ## Arguments - `package_name`: Name of the NPM package to load (required) - `--dry-run`: Run in read-only mode without committing to the database > [!NOTE] > Strongly recommend running with the `--dry-run` flag first, to see what changes > you're about to implement. The output looks like: ```bash --------------------------------------------- Package: @types/jest --------------------------------------------- ✅ @types/jest doesn't exist on CHAI --------------------------------------------- ✅ OK from NPM --------------------------------------------- ✅ has homepage: github.com/DefinitelyTyped/DefinitelyTyped ✅ has repository: github.com/DefinitelyTyped/DefinitelyTyped.git ✅ has source: github.com/DefinitelyTyped/DefinitelyTyped.git --------------------------------------------- Runtime Dependencies: ✅ expect / ^29.0.0 on CHAI ✅ pretty-format / ^29.0.0 on CHAI --------------------------------------------- Dev Dependencies: (none) --------------------------------------------- DRY RUN: Would create the following rows: - 1 Package - 3 URLs - 3 PackageURLs - 2 Runtime Dependencies - 0 Dev Dependencies --------------------------------------------- ℹ️ Dry run: No changes committed to database ``` > If a dependency doesn't exist on CHAI, you can just run the script for that > dependency, and then run it for your main package ## Output The script provides detailed status information about the package: ``` --------------------------------------------- Package: --------------------------------------------- ❌ Exiting bc exists on CHAI | ✅ doesn't exist on CHAI --------------------------------------------- ❌ Exiting bc response error from registry | ✅ OK from NPM --------------------------------------------- ✅ has homepage: | ❌ no homepage ✅ has repository: | ❌ no repository ✅ has source: | ❌ no source --------------------------------------------- ✅ / on CHAI | ❌ / not on CHAI ... for each dependency --------------------------------------------- ``` In dry-run mode, the script will show what changes would be made without committing them to the database. ## Examples Check a package without adding it to the database: ```bash PYTHONPATH=. ./scripts/npm-singleton/single.py react --dry-run ``` Add a package to the database: ```bash PYTHONPATH=. ./scripts/npm-singleton/single.py lodash ``` ## Tasks The below can be run using [xc](https://xcfile.dev), from within the scripts/npm-singleton directory ### check Env: PYTHONPATH=../.. Inputs: PACKAGE ```bash ./single.py $PACKAGE --dry-run ``` ### add Env: PYTHONPATH=../.. Inputs: PACKAGE ```bash ./single.py $PACKAGE ``` ================================================ FILE: scripts/npm-singleton/pkgx.yaml ================================================ dependencies: xcfile.dev: "*" python.org: ^3.11 ================================================ FILE: scripts/npm-singleton/single.py ================================================ #!/usr/bin/env pkgx +python@3.11 uv run --with requests==2.31.0 --with permalint==0.1.15 import argparse import sys from uuid import UUID, uuid4 import requests from permalint import normalize_url from core.config import Config, PackageManager from core.db import DB from core.models import URL, LegacyDependency, Package, PackageURL NPM_API_URL = "https://registry.npmjs.org/{name}" class ChaiDB(DB): def __init__(self): super().__init__("chai-singleton") def check_package_exists(self, derived_id: str) -> bool: with self.session() as session: return ( session.query(Package).filter(Package.derived_id == derived_id).first() is not None ) def get_package_by_derived_id(self, derived_id: str) -> Package: with self.session() as session: return ( session.query(Package).filter(Package.derived_id == derived_id).first() ) def load( self, pkg: Package, urls: list[URL], runtime_deps: list[LegacyDependency], dev_deps: list[LegacyDependency], ) -> None: """Load a package and its URLs into the database. Uses the same session to avoid transactional inconsistencies. Args: pkg: The package to load. urls: The URLs to load. """ with self.session() as session: # Load the package first session.add(pkg) session.flush() # to create the id pkg_id = pkg.id # Load the URLs for url in urls: session.add(url) session.flush() # to create the id url_ids = [url.id for url in urls] # Create the package URL relationships for url_id in url_ids: session.add(PackageURL(package_id=pkg_id, url_id=url_id)) # Create the legacy dependencies for dep in runtime_deps: session.add(dep) for dep in dev_deps: session.add(dep) session.commit() def get_package_info(npm_package: str) -> tuple[bool, dict, str | None]: url = NPM_API_URL.format(name=npm_package) try: response = requests.get(url) if response.status_code != 200: return ( False, {}, f"Failed with status {response.status_code}: {response.text}", ) return True, response.json(), None except Exception as e: return False, {}, f"Request failed: {e!s}" def get_homepage(package_info: dict) -> tuple[bool, str | None]: try: return True, canonicalize(package_info["homepage"]) except KeyError: return False, None except Exception as e: return False, str(e) def get_repository_url(package_info: dict) -> tuple[bool, str | None]: try: return True, canonicalize(package_info["repository"]["url"]) except KeyError: return False, None except Exception as e: return False, str(e) def get_source_url(package_info: dict) -> tuple[bool, str | None]: try: repository_obj = package_info["repository"] if repository_obj["type"] == "git": return True, canonicalize(repository_obj["url"]) else: return False, f"Repository is not a git URL: {repository_obj}" except KeyError: return False, None except Exception as e: return False, str(e) def canonicalize(url: str) -> str: return normalize_url(url) def get_latest_version(package_info: dict) -> tuple[bool, str | None]: try: dist_tags = package_info["dist-tags"] return True, dist_tags["latest"] except KeyError: return False, None def get_version_info(package_info: dict, version: str) -> tuple[bool, dict | None]: try: return True, package_info["versions"][version] except KeyError: return False, None def get_latest_version_dependencies( latest_version: dict, ) -> tuple[bool, dict[str, str]]: """Gets the dependencies from a version object from NPM's Registry API Returns: - a tuple of (success, dependencies) where dependencies is a dictionary keyed by dependency, with semver range as the value """ try: deps = latest_version.get("dependencies", {}) return True, deps except Exception: return False, {} def get_latest_version_dev_dependencies( latest_version: dict, ) -> tuple[bool, dict[str, str]]: """Gets the development dependencies from a version object from NPM's Registry API Returns: - a tuple of (success, dependencies) where dependencies is a dictionary keyed by dependency, with semver range as the value """ try: deps = latest_version.get("devDependencies", {}) return True, deps except Exception: return False, {} def check_dependencies_on_chai( db: ChaiDB, deps: dict[str, str] ) -> list[tuple[str, str, bool]]: """Check if dependencies exist on CHAI Args: db: ChaiDB instance deps: Dependencies to check Returns: List of tuples (dependency_name, semver_range, exists_on_chai) """ results = [] for dep_name, dep_range in deps.items(): derived_id = f"npm/{dep_name}" exists = db.get_package_by_derived_id(derived_id) is not None results.append((dep_name, dep_range, exists)) return results def generate_url(url_type_id: UUID, url: str) -> URL: return URL(id=uuid4(), url=url, url_type_id=url_type_id) def generate_legacy_dependencies( db: ChaiDB, pkg: Package, deps: dict[str, str], dependency_type_id: UUID ) -> tuple[list[LegacyDependency], list[tuple[str, str, bool]]]: legacy_deps: list[LegacyDependency] = [] dep_status: list[tuple[str, str, bool]] = [] for dep_name, dep_range in deps.items(): derived_id = f"npm/{dep_name}" chai_dep: Package | None = db.get_package_by_derived_id(derived_id) exists = chai_dep is not None dep_status.append((dep_name, dep_range, exists)) if not exists: continue dependency = LegacyDependency( package_id=pkg.id, dependency_id=chai_dep.id, dependency_type_id=dependency_type_id, semver_range=dep_range, ) legacy_deps.append(dependency) return legacy_deps, dep_status def print_status_report( package_name: str, exists_on_chai: bool, npm_response_ok: bool, npm_error: str | None, homepage_result: tuple[bool, str | None], repository_result: tuple[bool, str | None], source_result: tuple[bool, str | None], runtime_deps: list[tuple[str, str, bool]], dev_deps: list[tuple[str, str, bool]], changes_summary: dict[str, int] | None = None, dry_run: bool = False, ): """Print a formatted status report of the package processing""" divider = "-" * 45 print(divider) print(f"Package: {package_name}") print(divider) if exists_on_chai: print(f"❌ Exiting bc {package_name} exists on CHAI") else: print(f"✅ {package_name} doesn't exist on CHAI") print(divider) if npm_response_ok: print("✅ OK from NPM") else: print(f"❌ Exiting bc response error from registry: {npm_error}") print(divider) homepage_ok, homepage = homepage_result if homepage_ok: print(f"✅ has homepage: {homepage}") else: print("❌ no homepage") repository_ok, repository = repository_result if repository_ok: print(f"✅ has repository: {repository}") else: print("❌ no repository") source_ok, source = source_result if source_ok: print(f"✅ has source: {source}") else: print("❌ no source") print(divider) print("Runtime Dependencies:") if not runtime_deps: print("(none)") else: for dep, semver, exists in runtime_deps: if exists: print(f"✅ {dep} / {semver} on CHAI") else: print(f"❌ {dep} / {semver} not on CHAI") print(divider) print("Dev Dependencies:") if not dev_deps: print("(none)") else: for dep, semver, exists in dev_deps: if exists: print(f"✅ {dep} / {semver} on CHAI") else: print(f"❌ {dep} / {semver} not on CHAI") print(divider) if changes_summary: if dry_run: print("DRY RUN: Would create the following rows:") else: print("Created the following rows:") for entity_type, count in changes_summary.items(): print(f" - {count} {entity_type}") else: print("Won't even create any rows") print(divider) def process_package(package_name: str, dry_run: bool = False) -> bool: """Process a package and return True if successful, False otherwise""" config = Config(PackageManager.NPM) chai_db = ChaiDB() # Check if package exists derived_id = f"npm/{package_name}" exists_on_chai = chai_db.check_package_exists(derived_id) # Get Package Info from NPM npm_response_ok, package_info, npm_error = get_package_info(package_name) # Check URLs homepage_result = get_homepage(package_info) if npm_response_ok else (False, None) repository_result = ( get_repository_url(package_info) if npm_response_ok else (False, None) ) source_result = get_source_url(package_info) if npm_response_ok else (False, None) # Check latest version latest_version_result = ( get_latest_version(package_info) if npm_response_ok else (False, None) ) # Get version info version_info_result = (False, None) if npm_response_ok and latest_version_result[0]: version_info_result = get_version_info(package_info, latest_version_result[1]) # Get dependencies runtime_deps_result = (False, {}) dev_deps_result = (False, {}) if npm_response_ok and version_info_result[0]: runtime_deps_result = get_latest_version_dependencies(version_info_result[1]) dev_deps_result = get_latest_version_dev_dependencies(version_info_result[1]) # Check dependencies on CHAI runtime_deps_status = check_dependencies_on_chai(chai_db, runtime_deps_result[1]) dev_deps_status = check_dependencies_on_chai(chai_db, dev_deps_result[1]) # Create entities to add to database if not in dry run mode and all checks pass changes_summary = { "Package": 1, "URLs": 0, "PackageURLs": 0, "Runtime Dependencies": 0, "Dev Dependencies": 0, } # Early exit if necessary conditions aren't met if exists_on_chai or not npm_response_ok: print_status_report( package_name, exists_on_chai, npm_response_ok, npm_error, homepage_result, repository_result, source_result, runtime_deps_status, dev_deps_status, None, dry_run, ) return False # Create Package derived_id = f"npm/{package_name}" package_manager_id = config.pm_config.pm_id import_id = f"npm-singleton/{package_name}" readme = package_info.get("readme", "") pkg = Package( id=uuid4(), name=package_name, derived_id=derived_id, package_manager_id=package_manager_id, import_id=import_id, readme=readme, ) # URLs urls = [] if homepage_result[0]: urls.append( generate_url(config.url_types.homepage, normalize_url(homepage_result[1])) ) if repository_result[0]: urls.append( generate_url( config.url_types.repository, normalize_url(repository_result[1]) ) ) if source_result[0]: urls.append( generate_url(config.url_types.source, normalize_url(source_result[1])) ) changes_summary["URLs"] = len(urls) changes_summary["PackageURLs"] = len(urls) # Dependencies runtime_deps, _ = generate_legacy_dependencies( chai_db, pkg, runtime_deps_result[1], config.dependency_types.runtime ) dev_deps, _ = generate_legacy_dependencies( chai_db, pkg, dev_deps_result[1], config.dependency_types.development ) changes_summary["Runtime Dependencies"] = len(runtime_deps) changes_summary["Dev Dependencies"] = len(dev_deps) # Print status report print_status_report( package_name, exists_on_chai, npm_response_ok, npm_error, homepage_result, repository_result, source_result, runtime_deps_status, dev_deps_status, changes_summary, dry_run, ) # Load the package into the database (unless in dry run mode) if not dry_run: chai_db.load(pkg, urls, runtime_deps, dev_deps) print("✅ Successfully committed changes to database") else: print("🌵 Dry run: No changes committed to database") return True if __name__ == "__main__": parser = argparse.ArgumentParser( description="Load a single NPM package by name into CHAI" ) parser.add_argument("name", help="Name of the NPM package") parser.add_argument( "--dry-run", action="store_true", help="Check package without committing to database", ) args = parser.parse_args() success = process_package(args.name, args.dry_run) if not success: sys.exit(1) ================================================ FILE: scripts/package_to_package/package_dependencies.py ================================================ #! /usr/bin/env pkgx +python@3.11 uv run import argparse import re import sys from typing import Any from packaging import version as packaging_version from sqlalchemy.dialects.postgresql import insert as pg_insert from sqlalchemy.exc import IntegrityError from sqlalchemy.orm import Session, sessionmaker from core.config import Config, PackageManager from core.db import DB from core.logger import Logger from core.models import DependsOn, LegacyDependency, Package, Version # --- Constants --- INSERT_BATCH_SIZE = 5000 DEFAULT_SEMVER_RANGE = "*" logger = Logger("package_dependency_migration") # --- Helper Functions --- def preprocess_version_string(version_str: str) -> str: """ Transforms known non-PEP440 version strings into a parseable format. Handles specific date formats, build tags, and common non-standard separators. """ # Replace underscores between digits or letters/digits version_str = re.sub(r"(?<=[a-zA-Z\d])_(?=[a-zA-Z\d])", ".", version_str) # === Pattern Matching & Transformation (Order Matters!) === # --- Specific Patterns First --- # Handle X.Y.Z-M -> X.Y.Z+M (Milestone) match_milestone = re.fullmatch(r"(\d+(\.\d+)*)-M(\d+)", version_str) if match_milestone: return f"{match_milestone.group(1)}+M{match_milestone.group(3)}" # Handle X.Y.Z-. -> X.Y.Z+. (Vendor Build) match_vendor_build = re.fullmatch(r"(\d+(\.\d+)+)-([a-zA-Z]+)\.(\d+)", version_str) if match_vendor_build: return f"{match_vendor_build.group(1)}+{match_vendor_build.group(3)}.{match_vendor_build.group(4)}" # Handle X.Y.Z-git -> X.Y.Z+git match_git_build = re.fullmatch(r"(\d+(\.\d+)+)-(git[\da-zA-Z]+)", version_str) if match_git_build: return f"{match_git_build.group(1)}+{match_git_build.group(2)}" # Handle X.Y.Z-p / X.Y.Zp -> X.Y.Z+p match_p_patch1 = re.fullmatch(r"(\d+(\.\d+)+)-p(\d+)", version_str) if match_p_patch1: return f"{match_p_patch1.group(1)}+p{match_p_patch1.group(3)}" match_p_patch2 = re.fullmatch(r"(\d+(\.\d+)+)p(\d+)", version_str) if match_p_patch2: return f"{match_p_patch2.group(1)}+p{match_p_patch2.group(3)}" # --- Date Formats --- # YYYY-MM-DD -> YYYY.MM.DD if re.fullmatch(r"\d{4}-\d{2}-\d{2}", version_str): return version_str.replace("-", ".") # YYYY.MM.DD. -> YYYY.MM.DD+commit_hash # TODO: Hashes on the same date are compared lexicographically, which might not # reflect actual order. match_dot_date_hash = re.fullmatch( r"(\d{4}\.\d{2}\.\d{2})\.([a-zA-Z0-9]+)", version_str ) if match_dot_date_hash: # Ensure the suffix isn't just a standard version number or time-like suffix = match_dot_date_hash.group(2) try: # If packaging can parse "0.", it's likely not a hash packaging_version.parse(f"0.{suffix}") # Also check if it looks like HH.MM.SS if not re.fullmatch(r"\d{2}\.\d{2}\.\d{2}", suffix): return f"{match_dot_date_hash.group(1)}+{suffix}" # Treat as hash except packaging_version.InvalidVersion: return f"{match_dot_date_hash.group(1)}+{suffix}" # Treat as hash except Exception: return f"{match_dot_date_hash.group(1)}+{suffix}" # Treat as hash # YYYYMMDDTHHMMSS -> YYYYMMDD.HHMMSS match_ymdt_compact = re.fullmatch(r"(\d{8})T(\d{6})", version_str) if match_ymdt_compact: return f"{match_ymdt_compact.group(1)}.{match_ymdt_compact.group(2)}" # YYYY.MM.DD-HH.MM.SS -> YYYY.MM.DD+HHMMSS match_ymd_time_hyphen = re.fullmatch( r"(\d{4}\.\d{2}\.\d{2})-(\d{2}\.\d{2}\.\d{2})", version_str ) if match_ymd_time_hyphen: time_part = match_ymd_time_hyphen.group(2).replace(".", "") return f"{match_ymd_time_hyphen.group(1)}+{time_part}" # ISO 8601 subset: YYYY-MM-DDTHH-MM-SSZ -> YYYY.MM.DD+HHMMSSZ match_iso_subset = re.fullmatch( r"(\d{4})-(\d{2})-(\d{2})T(\d{2})-(\d{2})-(\d{2})Z", version_str ) if match_iso_subset: date_part = f"{match_iso_subset.group(1)}.{match_iso_subset.group(2)}.{match_iso_subset.group(3)}" time_part = f"{match_iso_subset.group(4)}{match_iso_subset.group(5)}{match_iso_subset.group(6)}Z" return f"{date_part}+{time_part}" # YYYY_MM_DD.commit_hash -> YYYY.MM.DD+commit_hash match_commit_hash = re.fullmatch( r"(\d{4}_\d{2}_\d{2})\.([a-zA-Z0-9]+)", version_str ) if match_commit_hash: return f"{match_commit_hash.group(1)}+{match_commit_hash.group(2)}" # - -> + match_date_suffix = re.fullmatch(r"(\d{8})-?(.*)", version_str) if match_date_suffix and match_date_suffix.group(2): # Ensure there is a suffix # Check if suffix looks like a simple version number itself, # otherwise treat as string suffix = match_date_suffix.group(2) # Normalize suffix by removing dots if it looks like a version part # This helps comparison e.g., update1 vs 3.1 -> update1 vs 31 normalized_suffix = suffix.replace(".", "") return f"{match_date_suffix.group(1)}+{normalized_suffix}" # --- More General Build/Patch Identifiers --- # Handle X.Y.Z.v -> X.Y.Z+v match_v_build = re.fullmatch(r"(\d+(\.\d+)+)\.v(.*)", version_str) if match_v_build: return f"{match_v_build.group(1)}+v{match_v_build.group(3)}" # Handle X.Yrel. -> X.Y+rel. match_rel_build = re.fullmatch(r"(\d+(\.\d+)+)rel\.(.*)", version_str) if match_rel_build: return f"{match_rel_build.group(1)}+rel.{match_rel_build.group(3)}" # Handle X.Yga -> X.Y+ga match_ga_build = re.fullmatch(r"(\d+(\.\d+)+)ga(\d+)", version_str) if match_ga_build: return f"{match_ga_build.group(1)}+ga{match_ga_build.group(3)}" # Handle - (comes after more specific hyphenated patterns) match_major_build = re.fullmatch(r"(\d+)-([\da-zA-Z]+)", version_str) if match_major_build: return f"{match_major_build.group(1)}+{match_major_build.group(2)}" # Handle r -> 0+r match_revision = re.fullmatch(r"r(\d+)", version_str) if match_revision: return f"0+r{match_revision.group(1)}" # Handle X.Y.Z... -> X.Y.Z...+suffix (openssl@1.1.1w) match_version_letter_suffix = re.fullmatch(r"(\d+(\.\d+)+)([a-zA-Z]+)", version_str) if match_version_letter_suffix: base_version_part = match_version_letter_suffix.group(1) if base_version_part.count(".") > 0: # Ensures at least X.Y.Z format return f"{match_version_letter_suffix.group(1)}+{match_version_letter_suffix.group(3)}" # Handle X.Y / X.Y -> X.Y+suffix match_letter_suffix = re.fullmatch(r"(\d+\.\d+)([a-zA-Z]{1,2})", version_str) if match_letter_suffix: return f"{match_letter_suffix.group(1)}+{match_letter_suffix.group(2)}" # Handle leading 'p' if it looks like p if version_str.startswith("p") and re.match(r"p\d", version_str): potential_version = version_str[1:] try: packaging_version.parse(potential_version) return potential_version except packaging_version.InvalidVersion: pass # --- Fallback --- return version_str def get_latest_version_info(versions: list[Version]) -> Version | None: """ Identifies the latest version from a list using packaging.version for robust parsing unless there is only one version provided. Args: versions: A list of Version objects for a single package. Returns: - None if the list is empty, or; - The single Version object if only one is provided, or; - The Version object corresponding to the latest parseable version. """ # Handle empty list if not versions: return None # If there's only one version, return it directly without parsing if len(versions) == 1: return versions[0] # Proceed with parsing and comparison if more than one version exists latest_parsed_version = None latest_version_obj = None for version_obj in versions: original_version_str = version_obj.version preprocessed_str = preprocess_version_string(original_version_str) try: current_parsed_version = packaging_version.parse(preprocessed_str) if ( latest_parsed_version is None or current_parsed_version > latest_parsed_version ): latest_parsed_version = current_parsed_version latest_version_obj = version_obj except packaging_version.InvalidVersion as e_invalid: logger.warn( f"Invalid version: '{original_version_str}' -> '{preprocessed_str}' -> {e_invalid}" ) continue except Exception as e_general: logger.error( f"Unexpected error: '{original_version_str}' -> '{preprocessed_str}' -> {e_general}" ) continue # If no versions were successfully processed if latest_version_obj is None: import_id = versions[0].import_id versions_str = ", ".join([v.version for v in versions]) logger.warn(f"No versions for {import_id}: {versions_str}") return latest_version_obj def insert_legacy_dependencies( session: Session, data_batch: list[dict[str, Any]] ) -> None: """ Inserts a batch of legacy dependency records into the database, ignoring duplicates based on the (package_id, dependency_id) unique constraint. Args: session: The SQLAlchemy session object. data_batch: A list of dictionaries, each representing a LegacyDependency row. """ if not data_batch: return try: # Get the target table object legacy_table = LegacyDependency.__table__ # Construct the PostgreSQL INSERT...ON CONFLICT DO NOTHING statement stmt = pg_insert(legacy_table).values(data_batch) # Specify the columns involved in the unique constraint # The constraint name 'uq_package_dependency' is defined in the model stmt = stmt.on_conflict_do_nothing( index_elements=["package_id", "dependency_id"] ) # Execute the statement session.execute(stmt) session.commit() except IntegrityError as e: logger.error(f"Database Integrity Error during insert: {e}") session.rollback() raise e except Exception as e: logger.error(f"An unexpected error occurred during bulk insert: {e}") session.rollback() raise e def process_package_dependencies(config: Config, session: Session) -> None: legacy_deps_to_insert: list[dict[str, Any]] = [] total_packages_processed = 0 total_dependencies_found = 0 default_dependency_type_id = config.dependency_types.runtime logger.log(f"Starting migration for package manager ID: {config.pm_config.pm_id}") # --- Fetch ALL packages for the manager --- logger.log("Fetching all packages for the specified manager...") all_packages: list[Package] = ( session.query(Package) .filter(Package.package_manager_id == config.pm_config.pm_id) .all() ) logger.log(f"Fetched {len(all_packages)} packages.") # --- Process all fetched packages --- for pkg in all_packages: total_packages_processed += 1 # debug if total_packages_processed % 1000 == 0: logger.debug( f"Processed {total_packages_processed}/{len(all_packages)} packages..." ) versions = session.query(Version).filter(Version.package_id == pkg.id).all() # skip if no versions if not versions: continue # grab the latest version latest_version = get_latest_version_info(versions) if latest_version is None: continue # grab the dependencies for the latest version dependencies = ( session.query(DependsOn) .filter(DependsOn.version_id == latest_version.id) .all() ) # construct the load object for dependency in dependencies: dep_data = { "package_id": pkg.id, "dependency_id": dependency.dependency_id, "dependency_type_id": dependency.dependency_type_id or default_dependency_type_id, "semver_range": dependency.semver_range or DEFAULT_SEMVER_RANGE, } legacy_deps_to_insert.append(dep_data) total_dependencies_found += 1 # --- Insert if batch is full --- if len(legacy_deps_to_insert) >= INSERT_BATCH_SIZE: logger.log(f"Reached insert batch size ({INSERT_BATCH_SIZE}). Inserting...") insert_legacy_dependencies(session, legacy_deps_to_insert) legacy_deps_to_insert = [] # --- Final Insert --- if legacy_deps_to_insert: logger.log( f"Inserting final batch of {len(legacy_deps_to_insert)} dependency records." ) insert_legacy_dependencies(session, legacy_deps_to_insert) logger.log("--- Migration Summary ---") logger.log(f"Total packages processed: {total_packages_processed}") logger.log(f"Total dependencies found: {total_dependencies_found}") logger.log("Migration process completed.") # --- Main Execution --- if __name__ == "__main__": desc = """Migrate version-specific dependencies to package-level dependencies based on the latest version.""" parser = argparse.ArgumentParser(description=desc) parser.add_argument( "--package-manager", type=lambda pm: PackageManager[pm.upper()], choices=list(PackageManager), required=True, help="The package manager to process (e.g., NPM, CRATES).", ) args = parser.parse_args() logger.log( f"Starting package dependency migration for: {args.package_manager.name}" ) SessionLocal = None try: config = Config(args.package_manager) db = DB("db_logger") SessionLocal = sessionmaker(bind=db.engine) with SessionLocal() as session: process_package_dependencies(config, session) except Exception as e: logger.error(f"An critical error occurred: {e}") sys.exit(1) finally: logger.log("Script finished.") ================================================ FILE: scripts/upgrade_canons/.gitignore ================================================ *.csv ================================================ FILE: scripts/upgrade_canons/README.md ================================================ # Upgrade Canons Scripts Collection of scripts for managing canonical URLs and Canon IDs in CHAI database. ## Scripts Overview | Script | Purpose | Usage | Sample Output | | -------------------------- | ------------------------------------------------------------------ | ------------------------------------------------------------- | ------------------------------------------- | | `main.py` | Creates canonical URL entries for non-standardized homepage URLs | `./main.py --homepage-id [--dry-run]` | `✅ Inserted 45678 URLs, 52341 PackageURLs` | | `registered_projects.py` | Updates Canon IDs for registered projects to restore old canon IDs | `cat canon_ids.txt \| ./registered_projects.py [--dry-run]` | `✅ Success: 150`
`❌ Failure: 25` | | `create_deleted_canons.py` | Creates canons for registered projects that were deleted | `./create_deleted_canons.py --csv-file input.csv [--dry-run]` | `✅ Success: 75`
`❌ Failure: 12` | ## Requirements - pkgx (or uv) - CHAI_DATABASE_URL environment variable - Python dependencies: `psycopg2==2.9.10`, `permalint==0.1.14` ## Common Options - `--dry-run`: Show what would be done without making changes - Input failures are written to CSV files for review ## Database Schema Dependencies Scripts interact with these tables: - `urls`, `url_types`, `package_urls` - `canons`, `canon_packages`, `canon_packages_old` - `tea_ranks`, `packages` ================================================ FILE: scripts/upgrade_canons/create_deleted_canons.py ================================================ #!/usr/bin/env pkgx uv run import argparse import csv import sys from uuid import UUID from scripts.upgrade_canons.db import DB def read_package_data_from_csv(filename: str) -> list[tuple[str, UUID]]: """Read package names and canon IDs from CSV file and return as list of tuples.""" package_data = [] try: with open(filename, newline="") as csvfile: reader = csv.DictReader(csvfile) for row in reader: try: package_name = row["package_name"] canon_id = UUID(row["canon_id"]) package_data.append((package_name, canon_id)) except (ValueError, KeyError) as e: print( f"Warning: Invalid row in CSV '{row}': {e}", file=sys.stderr, ) except FileNotFoundError: print(f"Error: File '{filename}' not found", file=sys.stderr) sys.exit(1) return package_data def process_deleted_package( db: DB, package_name: str, target_canon_id: UUID, dry_run: bool ) -> tuple[bool, str]: """ Process a single package name for deleted registered projects. Returns (success, reason) tuple. """ # Step 1: Prepend 'npm/' to the name to create derived_id derived_id = f"npm/{package_name}" # Step 2: Search by derived_id to get the package_id db.cursor.execute( """ SELECT id FROM packages WHERE derived_id = %s """, (derived_id,), ) package_result = db.cursor.fetchone() if not package_result: return False, "could not find derived_id" package_id = package_result[0] # Step 3: Join to canon_packages to retrieve the current canon_id db.cursor.execute( """ SELECT canon_id FROM canon_packages WHERE package_id = %s """, (package_id,), ) current_result = db.cursor.fetchone() if not current_result: return False, "could not find current canon_id" current_canon_id = current_result[0] if dry_run: print( f"DRY RUN: Would update canon_id {current_canon_id} to {target_canon_id} for package {derived_id} (package_id: {package_id})" ) return True, "" try: # Run the three update statements using target_canon_id from CSV # 1. Update canons table db.cursor.execute( """ UPDATE canons SET id = %s WHERE id = %s """, (target_canon_id, current_canon_id), ) # 2. Update canon_packages table db.cursor.execute( """ UPDATE canon_packages SET canon_id = %s WHERE canon_id = %s """, (target_canon_id, current_canon_id), ) # 3. Update tea_ranks table db.cursor.execute( """ UPDATE tea_ranks SET canon_id = %s WHERE canon_id = %s """, (target_canon_id, current_canon_id), ) return True, "" except Exception as e: print( f"Error updating canon_id for package {package_name}: {e}", file=sys.stderr ) return False, f"database error: {e!s}" def write_failures_csv( failures: list[tuple[str, str]], filename: str = "deleted_canons_failures.csv" ): """Write failures to a CSV file.""" with open(filename, "w", newline="") as csvfile: writer = csv.writer(csvfile) writer.writerow(["package_name", "reason"]) for package_name, reason in failures: writer.writerow([package_name, reason]) def main(): parser = argparse.ArgumentParser( description="Create canons for registered projects that were deleted" ) parser.add_argument( "--dry-run", action="store_true", help="Show what would be done without making changes", ) parser.add_argument( "--csv-file", type=str, required=True, help="CSV file containing package_name and canon_id columns", ) args = parser.parse_args() # Read package data from CSV package_data = read_package_data_from_csv(args.csv_file) if not package_data: print("No package data provided", file=sys.stderr) sys.exit(1) print( f"Processing {len(package_data)} package records for deleted registered projects..." ) # Initialize database connection db = DB() success_count = 0 failure_count = 0 failures = [] try: for package_name, target_canon_id in package_data: success, reason = process_deleted_package( db, package_name, target_canon_id, args.dry_run ) if success: success_count += 1 else: failure_count += 1 failures.append((package_name, reason)) print(f"Warning: Failed to process package {package_name}: {reason}") # Commit changes if not dry run if not args.dry_run and success_count > 0: db.conn.commit() print("Database changes committed.") # Write failures to CSV if any if failures: write_failures_csv(failures) print("Failures written to deleted_canons_failures.csv") finally: db.close() # Print final summary print("--------------------------------------------------") print(f"✅ Success: {success_count}") print(f"❌ Failure: {failure_count}") print("--------------------------------------------------") if __name__ == "__main__": main() ================================================ FILE: scripts/upgrade_canons/db.py ================================================ from collections import defaultdict from os import getenv from uuid import UUID import psycopg2 from psycopg2.extras import execute_values, register_uuid from psycopg2.sql import SQL, Identifier from scripts.upgrade_canons.structs import URL, PackageURL CHAI_DATABASE_URL = getenv("CHAI_DATABASE_URL") class DB: def __init__(self): if not CHAI_DATABASE_URL: raise Exception("CHAI_DATABASE_URL is not set") self.conn = psycopg2.connect(CHAI_DATABASE_URL) self.cursor = self.conn.cursor() register_uuid(self.conn) def get_urls_by_type( self, url_type_name: str ) -> tuple[set[str], dict[UUID, list[str]]]: """ Returns a set of ALL URLs of the specified type (including orphans), and a map of package ID to list of URL strings for URLs that are attached to packages Args: url_type_name: The name of the URL type to filter by (e.g., 'homepage', 'repository', 'source') """ self.cursor.execute( """ SELECT u.url, pu.package_id FROM urls u JOIN url_types ut ON ut.id = u.url_type_id LEFT JOIN package_urls pu ON pu.url_id = u.id WHERE ut.name = %s;""", (url_type_name,), ) package_url_map: dict[UUID, list[str]] = defaultdict(list) all_urls: set[str] = set() for url, package_id in self.cursor.fetchall(): all_urls.add(url) # Add all URLs (including orphans) if ( package_id is not None ): # Only add to package map if attached to a package package_url_map[package_id].append(url) return all_urls, package_url_map def db_execute_values( self, table_name: str, columns: list[str], values: list[tuple] ): query = ( SQL("INSERT INTO {table_name} ({columns}) VALUES %s") .format( table_name=Identifier(table_name), columns=SQL(", ").join(Identifier(column) for column in columns), ) .as_string(self.conn) ) try: execute_values(self.cursor, query, values) print(f"Inserted {len(values)} rows into {table_name}") except Exception as e: print(f"Error inserting {table_name}: {e}") raise def ingest( self, urls_to_add: list[URL], package_urls_to_add: list[PackageURL], dry_run: bool, ): """ inserts into the db using psycopg2's execute_values execute_values expects the data to be formatted as a list of tuples """ if urls_to_add: table_name = "urls" columns = ["id", "url", "url_type_id", "created_at", "updated_at"] values = [ (url.id, url.url, url.url_type_id, url.created_at, url.updated_at) for url in urls_to_add ] self.db_execute_values(table_name, columns, values) if package_urls_to_add: table_name = "package_urls" columns = ["id", "package_id", "url_id", "created_at", "updated_at"] values = [ ( package_url.id, package_url.package_id, package_url.url_id, package_url.created_at, package_url.updated_at, ) for package_url in package_urls_to_add ] self.db_execute_values(table_name, columns, values) if not dry_run: self.conn.commit() def close(self): self.cursor.close() self.conn.close() def get_canons_by_url_ids(self, url_ids: list[UUID]) -> list[tuple[UUID, UUID]]: if not url_ids: return [] # Use unnest to properly handle UUID array comparison placeholders = ",".join(["%s"] * len(url_ids)) self.cursor.execute( f""" SELECT id, url_id FROM canons WHERE url_id IN ({placeholders}); """, url_ids, ) return self.cursor.fetchall() ================================================ FILE: scripts/upgrade_canons/delete_non_canonical_urls.py ================================================ #!/usr/bin/env pkgx uv run import argparse import csv import sys import warnings from uuid import UUID from permalint import is_canonical_url from scripts.upgrade_canons.db import DB def write_to_csv(filename: str, headers: list[str], data: list[tuple]): with open(filename, "w") as f: writer = csv.writer(f) writer.writerow(headers) writer.writerows(data) def get_all_urls(db: DB) -> list[tuple[UUID, str]]: """ Query all URLs from the urls table. Returns list of tuples (url_id, url_string). """ db.cursor.execute( """ SELECT id, url FROM urls ORDER BY id """ ) return db.cursor.fetchall() def identify_non_canonical_urls(urls: list[tuple[UUID, str]]) -> list[UUID]: """ Check each URL for canonicality using permalint. Returns list of URL IDs that are not canonical. """ non_canonical_ids = [] for url_id, url_string in urls: try: if not is_canonical_url(url_string): non_canonical_ids.append(url_id) except Exception as e: print(f"Warning: Error checking URL {url_string}: {e}") # Treat URLs that can't be checked as non-canonical non_canonical_ids.append(url_id) return non_canonical_ids def delete_urls_from_database(db: DB, url_ids: list[UUID], dry_run: bool) -> None: """ Delete URLs and their package_urls entries from the database. """ if not url_ids: print("No URLs to delete.") return if dry_run: print( f"DRY RUN: Would delete {len(url_ids)} URLs and their package_urls entries" ) return # Batch delete operations for efficiency placeholders = ",".join(["%s"] * len(url_ids)) # Delete from canons first (if any exist) db.cursor.execute(f"DELETE FROM canons WHERE url_id IN ({placeholders})", url_ids) # Delete from package_urls (foreign key constraint) db.cursor.execute( f"DELETE FROM package_urls WHERE url_id IN ({placeholders})", url_ids ) # Then delete from urls db.cursor.execute(f"DELETE FROM urls WHERE id IN ({placeholders})", url_ids) # Commit the transaction db.conn.commit() print(f"Successfully deleted {len(url_ids)} URLs and their package_urls entries") def main(dry_run: bool = False): """Main function to delete non-canonical URLs.""" print("Starting deletion of non-canonical URLs...") db = DB() try: # Get all URLs from database print("Fetching all URLs from database...") all_urls = get_all_urls(db) print(f"Found {len(all_urls)} total URLs") # Identify non-canonical URLs print("Checking URLs for canonicality...") non_canonical_ids = identify_non_canonical_urls(all_urls) canonical_count = len(all_urls) - len(non_canonical_ids) print(f"Found {len(non_canonical_ids)} non-canonical URLs") print(f"Found {canonical_count} canonical URLs") # Delete non-canonical URLs if non_canonical_ids: canons = db.get_canons_by_url_ids(non_canonical_ids) if canons: print(f"WARNING: Found {len(canons)} - delete them urself") write_to_csv( "non_canonical_urls_that_have_canons.csv", ["canon_id", "url_id"], canons, ) sys.exit(1) print("Deleting non-canonical URLs...") delete_urls_from_database(db, non_canonical_ids, dry_run) else: print("No non-canonical URLs found to delete.") # Summary print("-" * 50) if dry_run: print("DRY RUN SUMMARY:") print(f"Would delete: {len(non_canonical_ids)} URLs") print(f"Would keep: {canonical_count} URLs") else: print("DELETION SUMMARY:") print(f"✅ Deleted: {len(non_canonical_ids)} URLs") print(f"✅ Kept: {canonical_count} URLs") print("-" * 50) finally: db.close() if __name__ == "__main__": parser = argparse.ArgumentParser( description="Delete non-canonical URLs from the database" ) parser.add_argument( "--dry-run", action="store_true", help="Run in dry-run mode without making database changes", ) args = parser.parse_args() with warnings.catch_warnings(action="ignore"): main(args.dry_run) ================================================ FILE: scripts/upgrade_canons/main.py ================================================ #!/usr/bin/env uv run --with psycopg2==2.9.10 --with permalint==0.1.14 import argparse import warnings from datetime import datetime from uuid import UUID, uuid4 from permalint import is_canonical_url, normalize_url from scripts.upgrade_canons.db import DB from scripts.upgrade_canons.structs import URL, PackageURL # Valid URL types that can be canonicalized VALID_URL_TYPES = {"homepage", "repository", "source"} def is_one_url_canonical(urls: list[str]) -> bool: """Returns True if at least one of the URLs is canonical""" return any(is_canonical_url(url) for url in urls) def generate_canonical_url(urls: list[str]) -> str: """ Returns the canonical URL for the given list of URLs - TODO: we should be smart about which one to pick, like most recent perhaps? """ return normalize_url(urls[0]) def generate_new_url(url: str, url_type_id: UUID, now: datetime) -> URL: """Creates a new URL object for the given URL.""" return URL(uuid4(), url, url_type_id, now, now) def generate_new_package_url( package_id: UUID, url_id: UUID, now: datetime ) -> PackageURL: """Creates a new PackageURL object for the given package and URL""" return PackageURL(uuid4(), package_id, url_id, now, now) # Pure functions for business logic - highly testable def analyze_packages_needing_canonicalization( package_url_map: dict[UUID, list[str]], existing_urls: set[str], ) -> dict[UUID, str]: """ Analyze which packages need canonical URLs created. Returns a mapping of package_id to the canonical URL that should be created. """ packages_needing_canon: dict[UUID, str] = {} canonical_urls_to_create: set[str] = set() for package_id, urls in package_url_map.items(): # Skip if package already has at least one canonical URL if is_one_url_canonical(urls): continue canonical_url = generate_canonical_url(urls) # Skip if canonical URL already exists in database if canonical_url in existing_urls: continue # Skip if we're already planning to create this canonical URL if canonical_url in canonical_urls_to_create: continue # This package needs a canonical URL created packages_needing_canon[package_id] = canonical_url canonical_urls_to_create.add(canonical_url) return packages_needing_canon def create_url_and_package_url_objects( packages_needing_canon: dict[UUID, str], url_type_id: UUID, now: datetime, ) -> tuple[list[URL], list[PackageURL]]: """ Create URL and PackageURL objects for the packages that need canonicalization. """ new_urls: list[URL] = [] new_package_urls: list[PackageURL] = [] for package_id, canonical_url in packages_needing_canon.items(): new_url = generate_new_url(canonical_url, url_type_id, now) new_package_url = generate_new_package_url(package_id, new_url.id, now) new_urls.append(new_url) new_package_urls.append(new_package_url) return new_urls, new_package_urls def main(db: DB, url_type: str, url_type_id: UUID, dry_run: bool): now = datetime.now() print(f"Starting main for URL type '{url_type}': {now}") # Get data from database all_urls, package_url_map = db.get_urls_by_type(url_type) print(f"Found {len(all_urls)} {url_type} URLs") print(f"Found {len(package_url_map)} packages with {url_type} URLs") # Analyze which packages need canonicalization packages_needing_canon = analyze_packages_needing_canonicalization( package_url_map, all_urls ) # Create objects new_urls, new_package_urls = create_url_and_package_url_objects( packages_needing_canon, url_type_id, now ) print("-" * 100) print("Going to insert:") print(f" {len(new_urls)} URLs") print(f" {len(new_package_urls)} PackageURLs") print("-" * 100) # Ingest to database db.ingest(new_urls, new_package_urls, dry_run) if __name__ == "__main__": parser = argparse.ArgumentParser(description="Canonicalize URLs of specified type") parser.add_argument( "--url-type", type=str, required=True, choices=VALID_URL_TYPES, help=f"Type of URLs to canonicalize. Valid options: {', '.join(VALID_URL_TYPES)}", ) parser.add_argument( "--url-type-id", type=UUID, required=True, help="UUID of the URL type in the database", ) parser.add_argument( "--dry-run", action="store_true", help="Run in dry-run mode without making database changes", ) args = parser.parse_args() db = DB() try: with warnings.catch_warnings(action="ignore"): main(db, args.url_type, args.url_type_id, args.dry_run) finally: db.close() ================================================ FILE: scripts/upgrade_canons/registered_projects.py ================================================ #!/usr/bin/env pkgx uv run import argparse import csv import sys from uuid import UUID from scripts.upgrade_canons.db import DB def read_canon_ids_from_stdin() -> list[UUID]: """Read canon IDs from stdin and return as list of UUIDs.""" canon_ids = [] for line in sys.stdin: line = line.strip() if line: try: canon_ids.append(UUID(line)) except ValueError as e: print(f"Warning: Invalid UUID format '{line}': {e}", file=sys.stderr) return canon_ids def process_canon_id(db: DB, canon_id: UUID, dry_run: bool) -> tuple[bool, str]: """ Process a single canon ID and perform the updates. Returns (success, reason) tuple. """ # First, join to canon_packages_old to get package_id db.cursor.execute( """ SELECT package_id FROM canon_packages_old WHERE canon_id = %s """, (canon_id,), ) old_result = db.cursor.fetchone() if not old_result: return False, "could not find package_id" package_id = old_result[0] # Next, join to canon_packages to get current canon_id db.cursor.execute( """ SELECT canon_id FROM canon_packages WHERE package_id = %s """, (package_id,), ) current_result = db.cursor.fetchone() if not current_result: return False, "could not find new canon_id" new_canon_id = current_result[0] if dry_run: print( f"DRY RUN: Would update canon_id {new_canon_id} to {canon_id} for package {package_id}" ) return True, "" try: # Run the three update statements # 1. Update canons table db.cursor.execute( """ UPDATE canons SET id = %s WHERE id = %s """, (canon_id, new_canon_id), ) # 2. Update canon_packages table db.cursor.execute( """ UPDATE canon_packages SET canon_id = %s WHERE canon_id = %s """, (canon_id, new_canon_id), ) # 3. Update tea_ranks table db.cursor.execute( """ UPDATE tea_ranks SET canon_id = %s WHERE canon_id = %s """, (canon_id, new_canon_id), ) return True, "" except Exception as e: print(f"Error updating canon_id {canon_id}: {e}", file=sys.stderr) return False, f"database error: {e!s}" def write_failures_csv( failures: list[tuple[UUID, str]], filename: str = "canon_update_failures.csv" ): """Write failures to a CSV file.""" with open(filename, "w", newline="") as csvfile: writer = csv.writer(csvfile) writer.writerow(["canon_id", "reason"]) for canon_id, reason in failures: writer.writerow([str(canon_id), reason]) def main(): parser = argparse.ArgumentParser( description="Update Canon IDs for registered projects" ) parser.add_argument( "--dry-run", action="store_true", help="Show what would be done without making changes", ) args = parser.parse_args() # Read canon IDs from stdin canon_ids = read_canon_ids_from_stdin() if not canon_ids: print("No canon IDs provided via stdin", file=sys.stderr) sys.exit(1) print(f"Processing {len(canon_ids)} canon IDs...") # Initialize database connection db = DB() success_count = 0 failure_count = 0 failures = [] try: for canon_id in canon_ids: success, reason = process_canon_id(db, canon_id, args.dry_run) if success: success_count += 1 else: failure_count += 1 failures.append((canon_id, reason)) print(f"Warning: Failed to process canon_id {canon_id}: {reason}") # Commit changes if not dry run if not args.dry_run and success_count > 0: db.conn.commit() print("Database changes committed.") # Write failures to CSV if any if failures: write_failures_csv(failures) print("Failures written to canon_update_failures.csv") finally: db.close() # Print final summary print("--------------------------------------------------") print(f"✅ Success: {success_count}") print(f"❌ Failure: {failure_count}") print("--------------------------------------------------") if __name__ == "__main__": main() ================================================ FILE: scripts/upgrade_canons/structs.py ================================================ from dataclasses import dataclass from datetime import datetime from uuid import UUID # let's make classes defining the data models, since scripts can't really access ./core @dataclass class URL: id: UUID url: str url_type_id: UUID created_at: datetime updated_at: datetime @dataclass class PackageURL: id: UUID package_id: UUID url_id: UUID created_at: datetime updated_at: datetime ================================================ FILE: tests/README.md ================================================ # CHAI Test Suite This directory contains the test suite for the CHAI package indexer. All tests are written using [pytest](https://docs.pytest.org/) and follow a consistent structure to ensure maintainability and ease of use. ## Table of Contents - [Directory Structure](#directory-structure) - [Running Tests](#running-tests) - [Adding New Tests](#adding-new-tests) - [Test Coverage](#test-coverage) - [Fixtures and Mocking](#fixtures-and-mocking) - [Test Markers](#test-markers) ## Directory Structure The test suite is organized to mirror the main project structure: ``` tests/ ├── conftest.py # Common fixtures and configuration ├── requirements.txt # Test dependencies ├── unit/ # Unit tests for core functionality │ ├── test_debian_parser.py │ └── test_pkgx_load_urls.py ├── package_managers/ # Tests for package manager indexers │ ├── crates/ │ │ ├── test_diff_deps.py │ │ └── test_special_case.py │ └── homebrew/ │ └── test_diff_dep.py └── ranker/ # Tests for ranking functionality ├── test_dedupe.py └── test_rx_graph.py ``` ## Running Tests ### Prerequisites Install test dependencies: ```bash uv pip install -r tests/requirements.txt ``` ### Running All Tests To run all tests: ```bash pytest tests/ ``` ### Running Specific Tests Run tests for a specific module: ```bash # Run all tests in a directory pytest tests/package_managers/crates/ # Run a specific test file pytest tests/unit/test_debian_parser.py # Run a specific test class pytest tests/unit/test_debian_parser.py::TestDebianParser # Run a specific test method pytest tests/unit/test_debian_parser.py::TestDebianParser::test_parse_package_data ``` ### Running Tests by Marker Tests are categorized with markers. To run tests for a specific category: ```bash # Run only unit tests pytest -m unit # Run only parser tests pytest -m parser # Run only transformer tests pytest -m transformer # Run only ranker tests pytest -m ranker # Run all tests except slow ones pytest -m "not slow" ``` ### Verbose Output For more detailed output: ```bash pytest -v tests/ # Show captured print statements pytest -s tests/ # Show local variables in tracebacks pytest -l tests/ ``` ## Adding New Tests ### 1. Create a Test File Test files should: - Be placed in the appropriate directory based on what's being tested - Follow the naming convention `test_*.py` - Include a module docstring explaining what's being tested Example: ```python """ Test the package parsing functionality for NewPackageManager. This module tests the Parser class which extracts package information from the package manager's data format. """ import pytest from package_managers.newpm.parser import Parser ``` ### 2. Use Fixtures for Common Setup Instead of setUp/tearDown methods, use pytest fixtures: ```python @pytest.fixture def sample_package_data(): """Provides sample package data for testing.""" return { "name": "example-package", "version": "1.0.0", "dependencies": ["dep1", "dep2"], } def test_parse_package(sample_package_data): """Test parsing a package with valid data.""" parser = Parser() result = parser.parse(sample_package_data) assert result.name == "example-package" ``` ### 3. Use Markers for Test Categories Apply appropriate markers to your tests: ```python @pytest.mark.parser @pytest.mark.unit class TestNewParser: """Test the new package manager parser.""" def test_parse_valid_data(self): """Test parsing valid package data.""" # test implementation ``` ### 4. Mock External Dependencies Use the fixtures from `conftest.py` or create specific mocks: ```python def test_with_mocked_config(mock_config): """Test using the common mock_config fixture.""" # mock_config is automatically injected from conftest.py transformer = Transformer(mock_config) # test implementation ``` ### 5. Write Clear Assertions Use clear, descriptive assertions: ```python # Good assert len(packages) == 3, "Should parse exactly 3 packages from the data" # Less clear assert len(packages) == 3 ``` ## Test Coverage ### Running Tests with Coverage To generate a coverage report: ```bash # Run with coverage and generate terminal report pytest --cov=. --cov-report=term tests/ # Generate HTML coverage report pytest --cov=. --cov-report=html tests/ # Open htmlcov/index.html in a browser # Generate coverage for specific modules pytest --cov=package_managers.crates --cov=ranker tests/ # Show missing lines in terminal pytest --cov=. --cov-report=term-missing tests/ ``` ### Coverage by Docker Service To check coverage for specific Docker services defined in `docker-compose.yml`: ```bash # Coverage for crates indexer pytest --cov=package_managers.crates --cov-report=term-missing tests/package_managers/crates/ # Coverage for homebrew indexer pytest --cov=package_managers.homebrew --cov-report=term-missing tests/package_managers/homebrew/ # Coverage for debian indexer pytest --cov=package_managers.debian --cov-report=term-missing tests/unit/test_debian_parser.py # Coverage for pkgx indexer pytest --cov=package_managers.pkgx --cov-report=term-missing tests/unit/test_pkgx_load_urls.py # Coverage for ranker pytest --cov=ranker --cov-report=term-missing tests/ranker/ ``` ### Setting Coverage Thresholds To fail tests if coverage drops below a threshold: ```bash pytest --cov=. --cov-fail-under=80 tests/ ``` ## Fixtures and Mocking ### Common Fixtures The `conftest.py` file provides several reusable fixtures: - **`mock_config`**: A mocked Config object with all sub-configurations - **`mock_url_types`**: Mocked URL types (homepage, repository, etc.) - **`mock_dependency_types`**: Mocked dependency types (runtime, build, dev, test) - **`mock_package_managers`**: Mocked package manager configurations - **`sample_package_data`**: Sample data for different package managers ### Using Fixtures Fixtures are automatically injected into test functions: ```python def test_example(mock_config, sample_package_data): """Example test using multiple fixtures.""" # mock_config and sample_package_data are automatically available crates_data = sample_package_data["crates"] # test implementation ``` ### Creating Test-Specific Fixtures For test-specific setup, create local fixtures: ```python @pytest.fixture def special_cache(): """Create a cache with specific test data.""" return Cache( package_map={"test": Package(id=uuid4(), name="test")}, url_map={}, dependencies={}, ) def test_with_special_cache(special_cache): """Test using the special cache.""" # test implementation ``` ## Test Markers Available markers (defined in `conftest.py`): - **`@pytest.mark.unit`**: Unit tests - **`@pytest.mark.integration`**: Integration tests - **`@pytest.mark.slow`**: Slow-running tests - **`@pytest.mark.parser`**: Parser tests - **`@pytest.mark.transformer`**: Transformer tests - **`@pytest.mark.loader`**: Loader tests - **`@pytest.mark.ranker`**: Ranker tests To list all available markers: ```bash pytest --markers ``` ## Best Practices 1. **Test One Thing**: Each test should verify a single behavior 2. **Use Descriptive Names**: Test names should clearly indicate what they test 3. **Keep Tests Independent**: Tests should not depend on each other 4. **Use Fixtures**: Leverage fixtures for common setup instead of duplicating code 5. **Mock External Dependencies**: Don't make actual database or network calls 6. **Test Edge Cases**: Include tests for error conditions and edge cases 7. **Document Complex Tests**: Add docstrings explaining complex test scenarios ## Troubleshooting ### Common Issues 1. **Import Errors**: Ensure `PYTHONPATH` includes the project root: ```bash export PYTHONPATH=/workspace:$PYTHONPATH ``` 2. **Missing Dependencies**: Install test requirements: ```bash uv sync --all-groups source .venv/bin/activate ``` 3. **Database Connection Errors**: Tests should not require `CHAI_DATABASE_URL`. If a test fails due to database issues, it likely needs better mocking. ### Debugging Tests To debug a failing test: ```bash # Drop into debugger on failure pytest --pdb tests/failing_test.py # Show local variables in traceback pytest -l tests/failing_test.py # Increase verbosity pytest -vv tests/failing_test.py ``` ================================================ FILE: tests/conftest.py ================================================ """ Common test fixtures and configurations for pytest. This module provides reusable fixtures for testing the CHAI package indexer. Instead of mocking database operations, these fixtures focus on providing test data and mock objects for testing the core logic of transformers, parsers, and other components. """ import uuid from unittest.mock import MagicMock, Mock import pytest from core.config import ( Config, DependencyTypes, PackageManagers, PMConf, URLTypes, UserTypes, ) from core.db import DB from core.logger import Logger @pytest.fixture def mock_logger(): """Mock logger for testing.""" logger = MagicMock(spec=Logger) logger.debug.side_effect = lambda x: print(f"DEBUG: {x}") logger.warn.side_effect = lambda x: print(f"WARN: {x}") logger.log.side_effect = lambda x: print(x) return logger @pytest.fixture def mock_url_types(): """ Mock URL types with consistent UUIDs for testing. Returns a mock URLTypes object that returns consistent URL type objects for common URL type names. """ url_types = MagicMock(spec=URLTypes) # Set up URL type attributes directly url_types.homepage = uuid.UUID("00000000-0000-0000-0000-000000000001") url_types.repository = uuid.UUID("00000000-0000-0000-0000-000000000002") url_types.documentation = uuid.UUID("00000000-0000-0000-0000-000000000003") url_types.source = uuid.UUID("00000000-0000-0000-0000-000000000004") return url_types @pytest.fixture def mock_dependency_types(): """ Mock dependency types for testing. Returns a mock DependencyTypes object with common dependency types. """ dep_types = MagicMock(spec=DependencyTypes) # Set up dependency type attributes directly dep_types.runtime = uuid.UUID("00000000-0000-0000-0000-000000000010") dep_types.build = uuid.UUID("00000000-0000-0000-0000-000000000011") dep_types.dev = uuid.UUID("00000000-0000-0000-0000-000000000012") dep_types.test = uuid.UUID("00000000-0000-0000-0000-000000000013") dep_types.development = dep_types.dev # Alias for development dep_types.recommended = uuid.UUID("00000000-0000-0000-0000-000000000014") dep_types.optional = uuid.UUID("00000000-0000-0000-0000-000000000015") return dep_types @pytest.fixture def mock_sources(): """ Mock sources with consistent UUIDs for testing. Returns a dict mapping source names to mock Source objects. """ return { "github": uuid.UUID("00000000-0000-0000-0000-000000000020"), "crates": uuid.UUID("00000000-0000-0000-0000-000000000021"), "homebrew": uuid.UUID("00000000-0000-0000-0000-000000000022"), "debian": uuid.UUID("00000000-0000-0000-0000-000000000023"), "pkgx": uuid.UUID("00000000-0000-0000-0000-000000000024"), } @pytest.fixture def mock_package_managers(): """ Mock package managers for testing. Returns a mock PackageManagers object. """ package_managers = MagicMock(spec=PackageManagers) # Set up package manager attributes directly package_managers.crates = uuid.UUID("00000000-0000-0000-0000-000000000030") package_managers.homebrew = uuid.UUID("00000000-0000-0000-0000-000000000031") package_managers.debian = uuid.UUID("00000000-0000-0000-0000-000000000032") package_managers.pkgx = uuid.UUID("00000000-0000-0000-0000-000000000033") return package_managers @pytest.fixture def mock_pm_config(mock_package_managers): """ Mock PMConf (Package Manager Configuration) for testing. Returns a mock PMConf object with a default package manager ID. """ pm_config = MagicMock(spec=PMConf) pm_config.pm_id = mock_package_managers.crates return pm_config @pytest.fixture def mock_config( mock_url_types, mock_dependency_types, mock_package_managers, mock_pm_config, mock_sources, ): """ Mock Config object with all necessary sub-configurations. This is the main configuration fixture that most tests will use. """ config = MagicMock(spec=Config) # Set up execution configuration config.exec_config = MagicMock() config.exec_config.test = True config.exec_config.no_cache = True config.exec_config.debug = False # Set up sub-configurations config.url_types = mock_url_types config.dependency_types = mock_dependency_types config.package_managers = mock_package_managers config.pm_config = mock_pm_config # Mock DB that returns consistent source objects mock_db = MagicMock() mock_db.select_source_by_name.side_effect = lambda name: mock_sources.get(name) mock_db.select_url_types_by_name.side_effect = lambda name: mock_url_types.get(name) # Set the db for config to be a minimal mock db created config.db = mock_db return config @pytest.fixture def mock_user_types(): """ Mock user types for testing. Returns a mock UserTypes object. """ user_types = MagicMock(spec=UserTypes) # Set up user type attributes directly user_types.admin = Mock(id=uuid.UUID("00000000-0000-0000-0000-000000000040")) user_types.maintainer = Mock(id=uuid.UUID("00000000-0000-0000-0000-000000000041")) user_types.contributor = Mock(id=uuid.UUID("00000000-0000-0000-0000-000000000042")) return user_types @pytest.fixture def sample_package_data(): """ Provides sample package data for testing transformers and parsers. Returns a dict with sample data for different package managers. """ return { "crates": { "name": "serde", "version": "1.0.130", "description": "A generic serialization/deserialization framework", "homepage": "https://serde.rs", "repository": "https://github.com/serde-rs/serde", "dependencies": {"serde_derive": "1.0.130"}, }, "homebrew": { "name": "wget", "version": "1.21.2", "description": "Internet file retriever", "homepage": "https://www.gnu.org/software/wget/", "dependencies": ["gettext", "libidn2", "openssl@1.1"], }, "debian": { "package": "curl", "version": "7.74.0-1.3+deb11u1", "maintainer": "Alessandro Ghedini ", "depends": ["libc6", "libcurl4", "zlib1g"], }, "pkgx": { "full_name": "gnu.org/wget", "version": "1.21.2", "homepage": "https://www.gnu.org/software/wget/", "dependencies": {"gnu.org/gettext": "^0.21", "openssl.org": "^1.1"}, }, } @pytest.fixture def mock_csv_reader(): """ Creates a mock CSV reader for testing transformers that read CSV files. Returns a function that creates mock readers with specific data. """ def create_mock_reader(data): """ Create a mock reader that returns the specified data. Args: data: List of rows to return from the CSV reader Returns: A mock function that returns an iterator over the data """ def mock_reader(file_key): return iter([data]) return mock_reader return create_mock_reader # Markers for categorizing tests def pytest_configure(config): """Register custom markers for test categorization.""" config.addinivalue_line("markers", "unit: Unit tests") config.addinivalue_line("markers", "integration: Integration tests") config.addinivalue_line("markers", "slow: Slow running tests") config.addinivalue_line("markers", "parser: Parser tests") config.addinivalue_line("markers", "transformer: Transformer tests") config.addinivalue_line("markers", "loader: Loader tests") config.addinivalue_line("markers", "ranker: Ranker tests") @pytest.fixture def mock_db(): return MagicMock(spec=DB) ================================================ FILE: tests/package_managers/crates/conftest.py ================================================ from datetime import datetime from uuid import uuid4 import pytest from core.models import Package from core.structs import Cache from package_managers.crates.main import Diff from package_managers.crates.structs import ( Crate, CrateLatestVersion, ) @pytest.fixture def package_ids(): """Fixture providing consistent package IDs for testing.""" return {"main": uuid4(), "dep": uuid4()} @pytest.fixture def packages(package_ids): """Fixture providing test packages.""" return { "main": Package( id=package_ids["main"], name="main_pkg", package_manager_id=1, import_id="1048221", created_at=datetime.now(), updated_at=datetime.now(), ), "dep": Package( id=package_ids["dep"], name="dep_pkg", package_manager_id=1, import_id="271975", created_at=datetime.now(), updated_at=datetime.now(), ), } @pytest.fixture def diff_instance(mock_config): """ Factory fixture to create Diff instances with specific cache configurations. Returns a function that creates Diff instances. """ def create_diff(package_map, dependencies=None, url_map=None, package_urls=None): cache = Cache( package_map=package_map, url_map=url_map or {}, package_urls=package_urls or {}, dependencies=dependencies or {}, ) return Diff(mock_config, cache) return create_diff @pytest.fixture def crate_with_dependencies(): """ Factory fixture to create Crate objects with specified dependencies. Returns a function that creates Crate objects. """ def create_crate(crate_id="1048221", dependencies=None): latest_version = CrateLatestVersion( id=9337571, checksum="some-checksum", downloads=1000, license="MIT", num="1.0.0", published_by=None, published_at="2023-01-01", ) if dependencies: latest_version.dependencies = dependencies else: latest_version.dependencies = [] crate = Crate( id=int(crate_id), name="main_pkg", readme="Test readme", homepage="", repository="", documentation="", source=None, ) crate.latest_version = latest_version return crate return create_crate ================================================ FILE: tests/package_managers/crates/test_crates_diff_deps.py ================================================ """ Test the diff_deps functionality for the crates package manager. This module tests the Diff.diff_deps method which determines what dependencies need to be added or removed when processing crate updates. """ from datetime import datetime import pytest from core.models import LegacyDependency from package_managers.crates.structs import CrateDependency, DependencyType @pytest.mark.transformer class TestDiffDeps: """Tests for the diff_deps method in the Diff class for crates.""" def test_existing_dependency_no_changes( self, packages, package_ids, diff_instance, crate_with_dependencies, mock_config ): """ Test that when a dependency already exists in the database and also appears in the crate object, it is neither added to new_deps nor removed_deps. """ # Create an existing runtime dependency existing_dep = LegacyDependency( id=1, package_id=package_ids["main"], dependency_id=package_ids["dep"], dependency_type_id=mock_config.dependency_types.runtime, created_at=datetime.now(), updated_at=datetime.now(), ) # Create diff with existing dependency diff = diff_instance( package_map={"1048221": packages["main"], "271975": packages["dep"]}, dependencies={package_ids["main"]: {existing_dep}}, ) # Create crate with the same dependency dependency = CrateDependency( crate_id=1048221, dependency_id=271975, dependency_type=DependencyType.NORMAL, # Runtime dependency semver_range="^0.26.1", ) crate = crate_with_dependencies(dependencies=[dependency]) # Execute new_deps, removed_deps = diff.diff_deps(crate) # Assert assert len(new_deps) == 0, "No new deps should be added" assert len(removed_deps) == 0, "No deps should be removed" def test_dependency_changed_type( self, packages, package_ids, diff_instance, crate_with_dependencies, mock_config ): """ Test that when a dependency exists but its type changes, it is both added to new_deps and removed_deps. """ # Create an existing build dependency existing_dep = LegacyDependency( id=1, package_id=package_ids["main"], dependency_id=package_ids["dep"], dependency_type_id=mock_config.dependency_types.build, # BUILD type created_at=datetime.now(), updated_at=datetime.now(), ) # Create diff with existing dependency diff = diff_instance( package_map={"1048221": packages["main"], "271975": packages["dep"]}, dependencies={package_ids["main"]: {existing_dep}}, ) # Create crate with dependency changed to runtime dependency = CrateDependency( crate_id=1048221, dependency_id=271975, dependency_type=DependencyType.NORMAL, # Changed to runtime semver_range="^0.26.1", ) crate = crate_with_dependencies(dependencies=[dependency]) # Execute new_deps, removed_deps = diff.diff_deps(crate) # Assert assert len(new_deps) == 1, "One new dep should be added (new type)" assert len(removed_deps) == 1, "One dep should be removed (old type)" # Verify new dep is runtime new_dep = new_deps[0] assert new_dep.package_id == package_ids["main"] assert new_dep.dependency_id == package_ids["dep"] assert new_dep.dependency_type_id == mock_config.dependency_types.runtime # Verify removed dep is build removed_dep = removed_deps[0] assert removed_dep.package_id == package_ids["main"] assert removed_dep.dependency_id == package_ids["dep"] assert removed_dep.dependency_type_id == mock_config.dependency_types.build def test_new_dependency( self, packages, package_ids, diff_instance, crate_with_dependencies, mock_config ): """ Test that when a dependency doesn't exist in the cache but appears in the crate object, it is added to new_deps. """ # Create diff with no existing dependencies diff = diff_instance( package_map={"1048221": packages["main"], "271975": packages["dep"]} ) # Create crate with a new dependency dependency = CrateDependency( crate_id=1048221, dependency_id=271975, dependency_type=DependencyType.NORMAL, semver_range="^0.26.1", ) crate = crate_with_dependencies(dependencies=[dependency]) # Execute new_deps, removed_deps = diff.diff_deps(crate) # Assert assert len(new_deps) == 1, "One new dep should be added" assert len(removed_deps) == 0, "No deps should be removed" # Verify new dep new_dep = new_deps[0] assert new_dep.package_id == package_ids["main"] assert new_dep.dependency_id == package_ids["dep"] assert new_dep.dependency_type_id == mock_config.dependency_types.runtime def test_removed_dependency( self, packages, package_ids, diff_instance, crate_with_dependencies, mock_config ): """ Test that when a dependency exists in the cache but doesn't appear in the crate object, it is added to removed_deps. """ # Create an existing dependency existing_dep = LegacyDependency( id=1, package_id=package_ids["main"], dependency_id=package_ids["dep"], dependency_type_id=mock_config.dependency_types.runtime, created_at=datetime.now(), updated_at=datetime.now(), ) # Create diff with existing dependency diff = diff_instance( package_map={"1048221": packages["main"], "271975": packages["dep"]}, dependencies={package_ids["main"]: {existing_dep}}, ) # Create crate with no dependencies crate = crate_with_dependencies(dependencies=[]) # Execute new_deps, removed_deps = diff.diff_deps(crate) # Assert assert len(new_deps) == 0, "No new deps should be added" assert len(removed_deps) == 1, "One dep should be removed" # Verify removed dep removed_dep = removed_deps[0] assert removed_dep.package_id == package_ids["main"] assert removed_dep.dependency_id == package_ids["dep"] assert removed_dep.dependency_type_id == mock_config.dependency_types.runtime def test_multiple_dependency_types_same_package( self, packages, package_ids, diff_instance, crate_with_dependencies, mock_config ): """ Test that when a package depends on the same dependency package with multiple dependency types (e.g., both runtime and build), we handle the unique constraint on (package_id, dependency_id) properly. This test exposes the bug where multiple LegacyDependency records with the same package_id and dependency_id but different dependency_type_id would violate the DB constraint. """ # Create diff with no existing dependencies diff = diff_instance( package_map={"1048221": packages["main"], "271975": packages["dep"]} ) # Create crate with multiple dependency types to the same package runtime_dep = CrateDependency( crate_id=1048221, dependency_id=271975, dependency_type=DependencyType.NORMAL, # Runtime semver_range="^0.26.1", ) build_dep = CrateDependency( crate_id=1048221, dependency_id=271975, dependency_type=DependencyType.BUILD, # Build semver_range="^0.26.1", ) crate = crate_with_dependencies(dependencies=[runtime_dep, build_dep]) # Execute new_deps, removed_deps = diff.diff_deps(crate) # Assert assert len(removed_deps) == 0, "No deps should be removed" # With the fix, only create 1 dependency with highest priority type # Priority: NORMAL > BUILD > DEV assert len(new_deps) == 1, "Should create only 1 dep with highest priority type" # Should have runtime type (NORMAL has highest priority) new_dep = new_deps[0] assert new_dep.package_id == package_ids["main"] assert new_dep.dependency_id == package_ids["dep"] assert new_dep.dependency_type_id == mock_config.dependency_types.runtime, ( "Should choose NORMAL (runtime) over BUILD as it has higher priority" ) def test_multiple_dependency_types_build_vs_dev( self, packages, package_ids, diff_instance, crate_with_dependencies, mock_config ): """ Test that when a package depends on the same dependency package with BUILD and DEV types (no NORMAL), BUILD type takes precedence. Priority order: NORMAL > BUILD > DEV """ # Create diff with no existing dependencies diff = diff_instance( package_map={"1048221": packages["main"], "271975": packages["dep"]} ) # Create crate with BUILD and DEV dependencies to the same package build_dep = CrateDependency( crate_id=1048221, dependency_id=271975, dependency_type=DependencyType.BUILD, semver_range="^0.26.1", ) dev_dep = CrateDependency( crate_id=1048221, dependency_id=271975, dependency_type=DependencyType.DEV, semver_range="^0.26.1", ) # Add DEV first to test ordering doesn't matter crate = crate_with_dependencies(dependencies=[dev_dep, build_dep]) # Execute new_deps, removed_deps = diff.diff_deps(crate) # Assert assert len(removed_deps) == 0, "No deps should be removed" assert len(new_deps) == 1, "Should create only 1 dep with highest priority type" # Should have build type (BUILD > DEV) new_dep = new_deps[0] assert new_dep.package_id == package_ids["main"] assert new_dep.dependency_id == package_ids["dep"] assert new_dep.dependency_type_id == mock_config.dependency_types.build, ( "Should choose BUILD over DEV as it has higher priority" ) ================================================ FILE: tests/package_managers/debian/conftest.py ================================================ from package_managers.debian.parser import DebianData, Depends def create_debian_package( package: str = "test-package", description: str = "Test package", homepage: str = "", vcs_git: str = "", vcs_browser: str = "", directory: str = "", filename: str = "", depends: list[str] | None = None, build_depends: list[str] | None = None, recommends: list[str] | None = None, suggests: list[str] | None = None, ) -> DebianData: """Helper to create DebianData instances for testing""" debian_data = DebianData() debian_data.package = package debian_data.description = description debian_data.homepage = homepage debian_data.vcs_git = vcs_git debian_data.vcs_browser = vcs_browser debian_data.directory = directory debian_data.filename = filename # Convert string dependencies to Depends objects if depends: debian_data.depends = [Depends(package=dep, semver="*") for dep in depends] if build_depends: # build_depends is now list[Depends] like other dependency fields debian_data.build_depends = [ Depends(package=dep, semver="*") for dep in build_depends ] if recommends: debian_data.recommends = [ Depends(package=dep, semver="*") for dep in recommends ] if suggests: debian_data.suggests = [Depends(package=dep, semver="*") for dep in suggests] return debian_data ================================================ FILE: tests/package_managers/debian/test_debian_diff.py ================================================ from uuid import uuid4 from core.models import URL, LegacyDependency, Package, PackageURL from core.structs import Cache, URLKey from package_managers.debian.diff import DebianDiff from package_managers.debian.main import diff as main_diff from tests.package_managers.debian.conftest import create_debian_package class TestDebianDifferentialLoading: """Test cases for debian differential loading scenarios""" def test_package_exists_url_update(self, mock_config, mock_logger, mock_db): """Tests that Diff updates URLs when the package exists and the URL changes""" # Setup existing package and URL existing_pkg_id = uuid4() existing_url_id = uuid4() existing_package_url_id = uuid4() existing_package = Package( id=existing_pkg_id, derived_id="debian/url-pkg", name="url-pkg", package_manager_id=mock_config.pm_config.pm_id, import_id="url-pkg", readme="Test package", ) existing_url = URL( id=existing_url_id, url="https://old-homepage.com", url_type_id=mock_config.url_types.homepage, ) existing_package_url = PackageURL( id=existing_package_url_id, package_id=existing_pkg_id, url_id=existing_url_id, ) # Create cache cache = Cache( package_map={"url-pkg": existing_package}, url_map={ URLKey( "https://old-homepage.com", mock_config.url_types.homepage ): existing_url }, package_urls={existing_pkg_id: {existing_package_url}}, dependencies={}, ) # Create package data with new URL new_pkg_data = create_debian_package( package="url-pkg", homepage="https://new-homepage.com", ) new_urls = {} # this tracks all the new URLs we've created so far # Test the diff diff = DebianDiff(mock_config, cache, mock_db, mock_logger) resolved_urls = diff.diff_url("url-pkg", new_pkg_data, new_urls) new_links, _ = diff.diff_pkg_url(existing_pkg_id, resolved_urls) # Assertions assert len(new_links) == 1 # New URL should be created new_link = new_links[0] assert new_link.package_id == existing_pkg_id # The URL should be created in new_urls dict and the link should reference it assert len(new_urls) == 1 # One new URL should be created new_url_key = next(iter(new_urls.keys())) new_url = new_urls[new_url_key] assert new_link.url_id == new_url.id # Link should reference the new URL assert new_url_key.url == "https://new-homepage.com" assert new_url_key.url_type_id == mock_config.url_types.homepage def test_package_exists_dependency_change(self, mock_config, mock_logger, mock_db): """ Tests that diff correctly records: - New dependency - Changes to existing dependencies - Removed dependencies """ # Setup existing package and dependencies existing_pkg_id = uuid4() dep1_id = uuid4() dep2_id = uuid4() dep3_id = uuid4() existing_import_id = "debian/dep-pkg" existing_package = Package( id=existing_pkg_id, derived_id=existing_import_id, name="dep-pkg", package_manager_id=mock_config.pm_config.pm_id, import_id=existing_import_id, readme="", ) # Create dependency packages dep1_pkg = Package( id=dep1_id, derived_id="debian/dep1", name="dep1", import_id="debian/dep1" ) dep2_pkg = Package( id=dep2_id, derived_id="debian/dep2", name="dep2", import_id="debian/dep2" ) dep3_pkg = Package( id=dep3_id, derived_id="debian/dep3", name="dep3", import_id="debian/dep3" ) # Create existing dependencies (dep1 as runtime, dep2 as build) existing_dep1 = LegacyDependency( package_id=existing_pkg_id, dependency_id=dep1_id, dependency_type_id=mock_config.dependency_types.runtime, ) existing_dep2 = LegacyDependency( package_id=existing_pkg_id, dependency_id=dep2_id, dependency_type_id=mock_config.dependency_types.build, ) # Create cache cache = Cache( package_map={ existing_import_id: existing_package, "debian/dep1": dep1_pkg, "debian/dep2": dep2_pkg, "debian/dep3": dep3_pkg, }, url_map={}, package_urls={}, dependencies={existing_pkg_id: {existing_dep1, existing_dep2}}, ) # Create new package data with changed dependencies # Remove dep2, keep dep1, add dep3 as runtime new_pkg_data = create_debian_package( package="dep-pkg", depends=["dep1", "dep3"], # runtime deps build_depends=[], # no build deps (removes dep2) ) # Test the diff diff = DebianDiff(mock_config, cache, mock_db, mock_logger) new_deps, removed_deps = diff.diff_deps(existing_import_id, new_pkg_data) # Assertions assert len(new_deps) == 1 # dep3 should be added assert new_deps[0].dependency_id == dep3_id assert new_deps[0].dependency_type_id == mock_config.dependency_types.runtime assert len(removed_deps) == 1 # dep2 should be removed assert removed_deps[0].dependency_id == dep2_id assert removed_deps[0].dependency_type_id == mock_config.dependency_types.build def test_completely_new_package(self, mock_config, mock_logger, mock_db): """Tests the addition of completely new packages & new URLs""" # Create empty cache (no existing packages) cache = Cache(package_map={}, url_map={}, package_urls={}, dependencies={}) # Create new package data new_pkg_data = create_debian_package( package="new-pkg", description="A new package", homepage="https://github.com/example/new-pkg", depends=["some-dep"], build_depends=["build-tool"], ) # Test the diff diff = DebianDiff(mock_config, cache, mock_db, mock_logger) pkg_id, pkg_obj, update_payload = diff.diff_pkg("debian/new-pkg", new_pkg_data) # Assertions assert pkg_obj is not None # New package should be created assert pkg_obj.derived_id == "debian/new-pkg" assert pkg_obj.name == "new-pkg" assert pkg_obj.import_id == "debian/new-pkg" assert pkg_obj.package_manager_id == mock_config.pm_config.pm_id assert pkg_obj.readme == "A new package" assert update_payload == {} # No updates for new package # Test URL creation new_urls = {} resolved_urls = diff.diff_url("new-pkg", new_pkg_data, new_urls) new_links, updated_links = diff.diff_pkg_url(pkg_id, resolved_urls) # Should create URL for homepage assert len(new_urls) >= 1 # At least homepage assert len(new_links) >= 1 # At least homepage link assert len(updated_links) == 0 # No existing links to update # Check that homepage URL was created homepage_url_found = False for url_key, _url in new_urls.items(): if url_key.url_type_id == mock_config.url_types.homepage: assert url_key.url == "https://github.com/example/new-pkg" homepage_url_found = True break assert homepage_url_found def test_no_changes_scenario(self, mock_config, mock_logger, mock_db): """Tests where package exists but has no changes""" # Setup existing package existing_pkg_id = uuid4() existing_package = Package( id=existing_pkg_id, derived_id="debian/unchanged-pkg", name="unchanged-pkg", package_manager_id=mock_config.pm_config.pm_id, import_id="unchanged-pkg", readme="Unchanged description", ) cache = Cache( package_map={"unchanged-pkg": existing_package}, url_map={}, package_urls={}, dependencies={}, ) # Create package data with same description pkg_data = create_debian_package( package="unchanged-pkg", description="Unchanged description" ) # Test the diff diff = DebianDiff(mock_config, cache, mock_db, mock_logger) pkg_id, pkg_obj, update_payload = diff.diff_pkg("unchanged-pkg", pkg_data) # Assertions assert pkg_id == existing_pkg_id assert pkg_obj is None # No new package assert update_payload is None # No changes def test_package_description_update(self, mock_config, mock_logger, mock_db): """Test scenario where package exists but description has changed""" # Setup existing package existing_pkg_id = uuid4() existing_package = Package( id=existing_pkg_id, derived_id="debian/desc-pkg", name="desc-pkg", package_manager_id=mock_config.pm_config.pm_id, import_id="desc-pkg", readme="Old description", ) cache = Cache( package_map={"desc-pkg": existing_package}, url_map={}, package_urls={}, dependencies={}, ) # Create package data with new description pkg_data = create_debian_package( package="desc-pkg", description="New description" ) # Test the diff diff = DebianDiff(mock_config, cache, mock_db, mock_logger) pkg_id, pkg_obj, update_payload = diff.diff_pkg("desc-pkg", pkg_data) # Assertions assert pkg_id == existing_pkg_id assert pkg_obj is None # No new package assert update_payload is not None # Should have changes assert update_payload["id"] == existing_pkg_id assert update_payload["readme"] == "New description" def test_missing_dependency_handling(self, mock_config, mock_logger, mock_db): """Tests the case that we DON'T add dependencies for new packages""" existing_pkg_id = uuid4() existing_package = Package( id=existing_pkg_id, derived_id="debian/missing-dep-pkg", name="missing-dep-pkg", import_id="missing-dep-pkg", ) cache = Cache( package_map={"missing-dep-pkg": existing_package}, url_map={}, package_urls={}, dependencies={}, ) # Create package with dependency that doesn't exist in cache pkg_data = create_debian_package( package="missing-dep-pkg", depends=["non-existent-dep"] ) diff = DebianDiff(mock_config, cache, mock_db, mock_logger) new_deps, removed_deps = diff.diff_deps("missing-dep-pkg", pkg_data) # Should handle gracefully - no deps added for missing packages assert len(new_deps) == 0 assert len(removed_deps) == 0 def test_dependency_type_priority_no_change( self, mock_config, mock_logger, mock_db ): """ Scenario: - p1 has runtime dependency to p2 in cache - p1 depends on p2 as both runtime and build in parsed data Expect no change (runtime has priority). """ # Setup existing package and dependencies p1_id = uuid4() p2_id = uuid4() p1_pkg = Package(id=p1_id, derived_id="debian/p1", name="p1", import_id="p1") p2_pkg = Package(id=p2_id, derived_id="debian/p2", name="p2", import_id="p2") # Existing runtime dependency in cache existing_runtime_dep = LegacyDependency( package_id=p1_id, dependency_id=p2_id, dependency_type_id=mock_config.dependency_types.runtime, ) cache = Cache( package_map={"debian/p1": p1_pkg, "debian/p2": p2_pkg}, url_map={}, package_urls={}, dependencies={p1_id: {existing_runtime_dep}}, ) # Parsed data has p2 as both runtime and build dependency new_pkg_data = create_debian_package( package="p1", depends=["p2"], # runtime build_depends=["p2"], # build ) diff = DebianDiff(mock_config, cache, mock_db, mock_logger) new_deps, removed_deps = diff.diff_deps("debian/p1", new_pkg_data) # Should have no changes - runtime priority means no change needed assert len(new_deps) == 0 assert len(removed_deps) == 0 def test_dependency_type_change_runtime_to_build( self, mock_config, mock_logger, mock_db ): """ Scenario - p1 has runtime dependency to p2 in cache - p1 has build dependency to p2 in parsed data. Expect removed runtime dependency and new build dependency """ p1_id = uuid4() p2_id = uuid4() p1_pkg = Package(id=p1_id, derived_id="debian/p1", name="p1", import_id="p1") p2_pkg = Package(id=p2_id, derived_id="debian/p2", name="p2", import_id="p2") # Existing runtime dependency existing_runtime_dep = LegacyDependency( package_id=p1_id, dependency_id=p2_id, dependency_type_id=mock_config.dependency_types.runtime, ) cache = Cache( package_map={"debian/p1": p1_pkg, "debian/p2": p2_pkg}, url_map={}, package_urls={}, dependencies={p1_id: {existing_runtime_dep}}, ) # Parsed data only has build dependency new_pkg_data = create_debian_package( package="p1", depends=[], # no runtime deps build_depends=["p2"], # only build ) diff = DebianDiff(mock_config, cache, mock_db, mock_logger) new_deps, removed_deps = diff.diff_deps("debian/p1", new_pkg_data) # Should remove runtime and add build assert len(removed_deps) == 1 assert removed_deps[0].dependency_id == p2_id assert ( removed_deps[0].dependency_type_id == mock_config.dependency_types.runtime ) assert len(new_deps) == 1 assert new_deps[0].dependency_id == p2_id assert new_deps[0].dependency_type_id == mock_config.dependency_types.build def test_dependency_type_change_build_to_runtime( self, mock_config, mock_logger, mock_db ): """ Scenario: - p1 has build dependency to p2 in cache - p1 has runtime dependency to p2 in parsed data. Expect removed build dependency and new runtime dependency """ p1_id = uuid4() p2_id = uuid4() p1_pkg = Package(id=p1_id, derived_id="debian/p1", name="p1", import_id="p1") p2_pkg = Package(id=p2_id, derived_id="debian/p2", name="p2", import_id="p2") # Existing build dependency existing_build_dep = LegacyDependency( package_id=p1_id, dependency_id=p2_id, dependency_type_id=mock_config.dependency_types.build, ) cache = Cache( package_map={"debian/p1": p1_pkg, "debian/p2": p2_pkg}, url_map={}, package_urls={}, dependencies={p1_id: {existing_build_dep}}, ) # Parsed data only has runtime dependency new_pkg_data = create_debian_package( package="p1", depends=["p2"], # runtime build_depends=[], # no build deps ) diff = DebianDiff(mock_config, cache, mock_db, mock_logger) new_deps, removed_deps = diff.diff_deps("debian/p1", new_pkg_data) # Should remove build and add runtime assert len(removed_deps) == 1 assert removed_deps[0].dependency_id == p2_id assert removed_deps[0].dependency_type_id == mock_config.dependency_types.build assert len(new_deps) == 1 assert new_deps[0].dependency_id == p2_id assert new_deps[0].dependency_type_id == mock_config.dependency_types.runtime def test_dependency_type_priority_new_package( self, mock_config, mock_logger, mock_db ): """ Scenario: - p1 has no dependencies to p2 in cache - p1 has both runtime and build dependencies to p2 in parsed data Expect one new runtime dependency (priority over build). """ p1_id = uuid4() p2_id = uuid4() p1_pkg = Package(id=p1_id, derived_id="debian/p1", name="p1", import_id="p1") p2_pkg = Package(id=p2_id, derived_id="debian/p2", name="p2", import_id="p2") cache = Cache( package_map={"debian/p1": p1_pkg, "debian/p2": p2_pkg}, url_map={}, package_urls={}, dependencies={}, # No existing dependencies ) # Parsed data has both runtime and build dependencies to p2 new_pkg_data = create_debian_package( package="p1", depends=["p2"], # runtime build_depends=["p2"], # build ) diff = DebianDiff(mock_config, cache, mock_db, mock_logger) new_deps, removed_deps = diff.diff_deps("debian/p1", new_pkg_data) # Should only create one new dependency - runtime (higher priority) assert len(removed_deps) == 0 assert len(new_deps) == 1 assert new_deps[0].dependency_id == p2_id assert new_deps[0].dependency_type_id == mock_config.dependency_types.runtime def test_debian_specific_dependencies(self, mock_config, mock_logger, mock_db): """Test Debian-specific dependency types: recommends, suggests""" p1_id = uuid4() p2_id = uuid4() p3_id = uuid4() p1_pkg = Package(id=p1_id, derived_id="debian/p1", name="p1") p2_pkg = Package(id=p2_id, derived_id="debian/p2", name="p2") p3_pkg = Package(id=p3_id, derived_id="debian/p3", name="p3") cache = Cache( package_map={"debian/p1": p1_pkg, "debian/p2": p2_pkg, "debian/p3": p3_pkg}, url_map={}, package_urls={}, dependencies={}, ) # Parsed data with recommends and suggests (mapped to runtime) new_pkg_data = create_debian_package( package="p1", recommends=["p2"], suggests=["p3"], ) diff = DebianDiff(mock_config, cache, mock_db, mock_logger) new_deps, removed_deps = diff.diff_deps("debian/p1", new_pkg_data) # Should create runtime dependencies for both recommends and suggests assert len(removed_deps) == 0 assert len(new_deps) == 2 # Both should be runtime dependencies for dep in new_deps: assert dep.dependency_type_id == mock_config.dependency_types.runtime assert dep.dependency_id in [p2_id, p3_id] class TestDebianDiffFunction: """Test cases for the main.diff function""" def test_duplicate_package_paragraphs(self, mock_config, mock_logger, mock_db): """Tests the case when the Debian Packages file contains duplicate packages""" d1 = Package(id=uuid4(), derived_id="debian/d1", name="d1", import_id="d1") d2 = Package(id=uuid4(), derived_id="debian/d2", name="d2", import_id="d2") p1 = create_debian_package( package="linux-doc", homepage="homepage.org", depends=["d1"] ) p2 = create_debian_package( package="linux-doc", homepage="homepage.org", depends=["d2"] ) cache = Cache( package_map={"debian/d1": d1, "debian/d2": d2}, url_map={}, package_urls={}, dependencies={}, ) data = [p1, p2] result = main_diff(data, mock_config, cache, mock_db, mock_logger) assert len(result.new_packages) == 1 assert len(result.new_package_urls) == 1 assert len(result.new_deps) == 0 # bc we don't load dependencies of new pkgs ================================================ FILE: tests/package_managers/debian/test_debian_parser.py ================================================ """ Test Debian package parser functionality. This module tests the DebianParser class which parses Debian package and source entries from Packages and Sources files. """ import pytest from package_managers.debian.parser import DebianParser @pytest.fixture def simple_package(): return """Package: 0ad Version: 0.0.26-1 Installed-Size: 19162 Maintainer: Debian Games Team Architecture: amd64 Depends: 0ad-data (>= 0.0.26), 0ad-data-common (>= 0.0.26), libc6 (>= 2.29), libcurl4 (>= 7.16.2), libenet7 (>= 1.3.13), libgloox18, libjsoncpp25 (>= 1.9.5), libminiupnpc17 (>= 1.9.20140610), libnspr4 (>= 2:4.9.2), libnss3 (>= 2:3.22) Recommends: fonts-freefont-ttf, fonts-texgyre Suggests: 0ad-dbg Description: Real-time strategy game of ancient warfare Homepage: https://play0ad.com/ Section: games Priority: optional Filename: pool/main/0/0ad/0ad_0.0.26-1_amd64.deb Size: 6050744 MD5sum: a777ddf01c18dbdef15c589f8325d7a3 SHA256: 9da19833c1a51e890aa8a11f82ec1e383c0e79410c3d2f6845fd2ec3e23249b8 """ @pytest.fixture def simple_source(): return """Package: 0ad Binary: 0ad, 0ad-dbg, 0ad-data, 0ad-data-common Version: 0.0.26-1 Maintainer: Debian Games Team Uploaders: Vincent Cheng , Euan Kemp Build-Depends: debhelper-compat (= 13), cmake, dpkg-dev (>= 1.15.5), libboost-dev, libenet-dev (>= 1.3), libopenal-dev, libpng-dev, libsdl2-dev, libtiff5-dev, libvorbis-dev, libxcursor-dev, pkg-config, zlib1g-dev, libcurl4-gnutls-dev, libgloox-dev, libjsoncpp-dev, libminiupnpc-dev, libnspr4-dev, libnss3-dev, libsodium-dev, libwxgtk3.0-gtk3-dev | libwxgtk3.0-dev, python3, python3-dev, libxml2-dev, rust-gdb [amd64 i386 ppc64el] Architecture: any all Standards-Version: 4.5.1 Format: 3.0 (quilt) Files: 2fc0f38b8a4cf56fea7040fcf5f79ca3 2414 0ad_0.0.26-1.dsc 35ca57e781448c69ba31323313e972af 31463733 0ad_0.0.26.orig.tar.xz f78de44c8a9c32e6be3ae99f2747c330 71948 0ad_0.0.26-1.debian.tar.xz Vcs-Browser: https://salsa.debian.org/games-team/0ad Vcs-Git: https://salsa.debian.org/games-team/0ad.git Directory: pool/main/0/0ad Priority: optional Section: games Testsuite: autopkgtest Testsuite-Triggers: g++, pyrex """ @pytest.fixture def multiline_binary(): """Fixture for binary fields, specifically multi-lines ones""" return """ Package: binutils Binary: binutils-for-host, binutils-for-build, binutils-ia64-linux-gnu-dbg, binutils-m68k-linux-gnu, binutils-mips64el-linux-gnuabin32-dbg, binutils-mipsisa64r6-linux-gnuabin32, binutils-mipsisa64r6el-linux-gnuabi64-dbg """ @pytest.fixture def build_depends(): """Fixture for all kinds of build depends.""" return """ Package: example Build-Depends: gcc-11-source (>= 11.3.0-11~), gawk, lib32gcc1-amd64-cross [amd64 arm64 i386 ppc64el x32], g++-11, gm2-11 [!powerpc !ppc64 !x32] """ @pytest.mark.parser class TestDebianParser: """Test the Debian parser functionality.""" def test_build_depends(self, build_depends): """Test parsing build depends.""" parser = DebianParser(build_depends) sources = list(parser.parse()) assert len(sources) == 1 source = sources[0] assert len(source.build_depends) == 5 assert any(dep.package == "gcc-11-source" for dep in source.build_depends) assert any(dep.package == "gawk" for dep in source.build_depends) assert any( dep.package == "lib32gcc1-amd64-cross" for dep in source.build_depends ) assert any(dep.package == "g++-11" for dep in source.build_depends) assert any(dep.package == "gm2-11" for dep in source.build_depends) def test_multiline_binary(self, multiline_binary): """Test handling of multiline binaries.""" parser = DebianParser(multiline_binary) sources = list(parser.parse()) assert len(sources) == 1 source = sources[0] assert source.package == "binutils" assert source.binary == [ "binutils-for-host", "binutils-for-build", "binutils-ia64-linux-gnu-dbg", "binutils-m68k-linux-gnu", "binutils-mips64el-linux-gnuabin32-dbg", "binutils-mipsisa64r6-linux-gnuabin32", "binutils-mipsisa64r6el-linux-gnuabi64-dbg", ] def test_parse_package_data(self, simple_package): """Test parsing a typical package entry from Packages file.""" # Sample package data from a Packages file package_data = simple_package # Parse the package data parser = DebianParser(package_data) packages = list(parser.parse()) # Validate we have one package assert len(packages) == 1 package = packages[0] # Test basic fields assert package.package == "0ad" assert package.version == "0.0.26-1" assert package.installed_size == 19162 assert package.architecture == "amd64" # Test maintainer parsing assert package.maintainer.name == "Debian Games Team" assert package.maintainer.email == "pkg-games-devel@lists.alioth.debian.org" # Test dependency parsing assert len(package.depends) == 10 assert package.depends[0].package == "0ad-data" assert package.depends[0].semver == ">= 0.0.26" # Test recommends parsing assert len(package.recommends) == 2 assert package.recommends[0].package == "fonts-freefont-ttf" # Test suggests parsing assert len(package.suggests) == 1 assert package.suggests[0].package == "0ad-dbg" def test_parse_source_data(self, simple_source): """Test parsing a typical source entry from Sources file.""" source_data = simple_source parser = DebianParser(source_data) sources = list(parser.parse()) # Validate we have one source package assert len(sources) == 1 source = sources[0] # Test basic fields assert source.package == "0ad" assert source.version == "0.0.26-1" # Test binary field assert isinstance(source.binary, list) # Fixed: binary should be a list assert "0ad" in source.binary assert "0ad-dbg" in source.binary assert "0ad-data" in source.binary assert "0ad-data-common" in source.binary # Test maintainer parsing assert source.maintainer.name == "Debian Games Team" assert source.maintainer.email == "pkg-games-devel@lists.alioth.debian.org" # Test uploaders parsing assert len(source.uploaders) == 2 assert source.uploaders[0].name == "Vincent Cheng" assert source.uploaders[0].email == "vcheng@debian.org" assert source.uploaders[1].name == "Euan Kemp" assert source.uploaders[1].email == "euank@euank.com" # Test build depends parsing assert len(source.build_depends) == 25 assert any(dep.package == "debhelper-compat" for dep in source.build_depends) # Test other source fields assert source.format == "3.0 (quilt)" assert source.vcs_browser == "salsa.debian.org/games-team/0ad" assert source.vcs_git == "salsa.debian.org/games-team/0ad" assert source.testsuite == "autopkgtest" assert source.testsuite_triggers == "g++, pyrex" ================================================ FILE: tests/package_managers/debian/test_debian_sources.py ================================================ from package_managers.debian.main import ( build_package_to_source_mapping, enrich_package_with_source, ) from tests.package_managers.debian.conftest import create_debian_package class TestPackageSourceMapping: """Test cases for package to source mapping functionality""" def test_build_package_to_source_mapping_with_binary_list( self, tmp_path, mock_logger ): """Test building mapping when source has explicit binary list""" # Create a test sources file sources_content = """Package: test-source Binary: test-pkg1, test-pkg2, test-pkg3 Vcs-Git: https://github.com/test/test-source.git Homepage: https://example.com/test-source Package: another-source Binary: another-pkg Vcs-Browser: https://github.com/test/another-source """ sources_file = tmp_path / "sources" sources_file.write_text(sources_content) # Build mapping mapping = build_package_to_source_mapping(str(sources_file), mock_logger) # Verify mapping assert len(mapping) == 4 # 3 packages from first source + 1 from second assert "test-pkg1" in mapping assert "test-pkg2" in mapping assert "test-pkg3" in mapping assert "another-pkg" in mapping # Verify source data is correctly associated assert mapping["test-pkg1"].package == "test-source" # URLs are normalized by the parser - expect normalized format assert mapping["test-pkg1"].vcs_git == "github.com/test/test-source" assert mapping["test-pkg2"].package == "test-source" assert mapping["another-pkg"].package == "another-source" assert mapping["another-pkg"].vcs_browser == "github.com/test/another-source" def test_build_package_to_source_mapping_no_binary_list( self, tmp_path, mock_logger ): """Test building mapping when source has no explicit binary list""" # Create a test sources file with no Binary field sources_content = """Package: single-source Vcs-Git: https://github.com/test/single-source.git Homepage: https://example.com/single-source """ sources_file = tmp_path / "sources" sources_file.write_text(sources_content) # Build mapping mapping = build_package_to_source_mapping(str(sources_file), mock_logger) # Verify mapping - should use source package name as binary name assert len(mapping) == 1 assert "single-source" in mapping assert mapping["single-source"].package == "single-source" # URLs are normalized by the parser - expect normalized format assert mapping["single-source"].vcs_git == "github.com/test/single-source" def test_enrich_package_with_explicit_source(self, mock_logger): """Test enriching package that has explicit source reference""" # Create package data with explicit source reference package_data = create_debian_package( package="binary-pkg", description="A binary package", ) package_data.source = "source-pkg" # Create source mapping source_data = create_debian_package( package="source-pkg", vcs_git="github.com/test/source-pkg", # Already normalized format homepage="example.com/source-pkg", # Already normalized format build_depends=["build-dep1", "build-dep2"], ) source_mapping = {"binary-pkg": source_data} # Enrich package enriched = enrich_package_with_source(package_data, source_mapping, mock_logger) # Verify enrichment assert enriched.package == "binary-pkg" assert enriched.description == "A binary package" assert enriched.vcs_git == "github.com/test/source-pkg" assert enriched.homepage == "example.com/source-pkg" assert len(enriched.build_depends) == 2 build_depend_names = [item.package for item in enriched.build_depends] assert build_depend_names == ["build-dep1", "build-dep2"] def test_enrich_package_no_explicit_source(self, mock_logger): """Test enriching package with no explicit source reference""" # Create package data with no explicit source package_data = create_debian_package( package="self-source-pkg", description="A self-sourced package", ) # Create source mapping with same name as package source_data = create_debian_package( package="self-source-pkg", vcs_browser="github.com/test/self-source-pkg", # Already normalized format directory="pool/main/s/self-source-pkg", ) source_mapping = {"self-source-pkg": source_data} # Enrich package enriched = enrich_package_with_source(package_data, source_mapping, mock_logger) # Verify enrichment assert enriched.package == "self-source-pkg" assert enriched.vcs_browser == "github.com/test/self-source-pkg" assert enriched.directory == "pool/main/s/self-source-pkg" def test_enrich_package_missing_source_warning(self, caplog, mock_logger): """Test warning when package references missing source""" from package_managers.debian.main import enrich_package_with_source # Create package data with source that doesn't exist in mapping package_data = create_debian_package( package="orphan-pkg", description="An orphaned package", ) package_data.source = "missing-source" # Empty source mapping source_mapping = {} # Enrich package (this should log a warning) enriched = enrich_package_with_source(package_data, source_mapping, mock_logger) # The warning should be present in the function execution output # Check the logged warning message directly # Note: The warning is logged by our function, so we check the expected behavior # Package should remain unchanged assert enriched.package == "orphan-pkg" assert enriched.description == "An orphaned package" assert not enriched.vcs_git assert not enriched.vcs_browser def test_enrich_package_preserves_existing_fields(self, mock_logger): """Test that existing package fields are not overwritten""" # Create package data with existing homepage package_data = create_debian_package( package="pkg-with-homepage", homepage="pkg-homepage.com", # Normalized format ) # Create source data with different homepage source_data = create_debian_package( package="pkg-with-homepage", homepage="source-homepage.com", # Normalized format vcs_git="github.com/test/pkg", # Normalized format ) source_mapping = {"pkg-with-homepage": source_data} # Enrich package enriched = enrich_package_with_source(package_data, source_mapping, mock_logger) # Verify package homepage is preserved, but source info is added assert enriched.homepage == "pkg-homepage.com" # Package value preserved assert enriched.vcs_git == "github.com/test/pkg" # Source value added ================================================ FILE: tests/package_managers/homebrew/conftest.py ================================================ from datetime import datetime from uuid import UUID, uuid4 import pytest from core.models import Package from core.structs import Cache from package_managers.homebrew.diff import Diff from package_managers.homebrew.structs import Actual @pytest.fixture def package_ids() -> dict[str, UUID]: """Fixture providing consistent package IDs for testing.""" return {"foo": uuid4(), "bar": uuid4(), "baz": uuid4(), "qux": uuid4()} @pytest.fixture def packages(package_ids) -> dict[str, Package]: """Fixture providing test packages.""" return { "foo": Package( id=package_ids["foo"], name="foo", package_manager_id=1, import_id="foo", created_at=datetime.now(), updated_at=datetime.now(), ), "bar": Package( id=package_ids["bar"], name="bar", package_manager_id=1, import_id="bar", created_at=datetime.now(), updated_at=datetime.now(), ), "baz": Package( id=package_ids["baz"], name="baz", package_manager_id=1, import_id="baz", created_at=datetime.now(), updated_at=datetime.now(), ), "qux": Package( id=package_ids["qux"], name="qux", package_manager_id=1, import_id="qux", created_at=datetime.now(), updated_at=datetime.now(), ), } @pytest.fixture def diff_instance(mock_config): """ Factory fixture to create Diff instances with specific cache configurations. Returns a function that creates Diff instances. """ def create_diff( package_map, dependencies=None, url_map=None, package_urls=None ) -> Diff: cache = Cache( package_map=package_map, url_map=url_map or {}, package_urls=package_urls or {}, dependencies=dependencies or {}, ) return Diff(mock_config, cache) return create_diff @pytest.fixture def homebrew_formula(): """ Factory fixture to create Actual homebrew formula objects. Returns a function that creates Actual objects. """ def create_formula( formula_name, dependencies=None, build_dependencies=None, test_dependencies=None, recommended_dependencies=None, optional_dependencies=None, ): return Actual( formula=formula_name, description="Test formula", license="MIT", homepage="", source="", repository="", dependencies=dependencies or [], build_dependencies=build_dependencies or [], test_dependencies=test_dependencies or [], recommended_dependencies=recommended_dependencies or [], optional_dependencies=optional_dependencies or [], ) return create_formula ================================================ FILE: tests/package_managers/homebrew/test_homebrew_diff_deps.py ================================================ """ Test the diff_deps functionality for the homebrew package manager. This module tests the Diff.diff_deps method which determines what dependencies need to be added or removed when processing homebrew formula updates. """ from datetime import datetime from uuid import UUID, uuid4 import pytest from core.models import LegacyDependency, Package from package_managers.homebrew.diff import Diff from package_managers.homebrew.structs import Actual @pytest.mark.transformer class TestDiffDeps: """Tests for the diff_deps method in the Diff class.""" def test_new_package_not_in_cache(self, packages, diff_instance, homebrew_formula): """ If the package is not even in the package cache, that means it is new. Since we won't know the ID of the package during dependency loading, we're going to continue to the next package and write a warning. """ # Create cache without the package we'll look for diff = diff_instance( package_map={"bar": packages["bar"], "baz": packages["baz"]} ) # Create an Actual package that's not in the cache new_pkg = homebrew_formula( "new_package", dependencies=["baz"], build_dependencies=["bar"] ) # Execute new_deps, removed_deps = diff.diff_deps(new_pkg) # Assert assert len(new_deps) == 0, "No new deps for new pkg" assert len(removed_deps) == 0, "No removed deps for new pkg" def test_existing_package_adding_dependency( self, packages: dict[str, Package], package_ids: dict[str, UUID], diff_instance: Diff, homebrew_formula: Actual, mock_config, ): """Test diff_deps when adding a new dependency to an existing package.""" # Create existing dependency existing_dep = LegacyDependency( id=uuid4(), package_id=package_ids["foo"], dependency_id=package_ids["bar"], dependency_type_id=mock_config.dependency_types.runtime, created_at=datetime.now(), updated_at=datetime.now(), ) # Create diff with existing dependency diff: Diff = diff_instance( package_map=packages, dependencies={package_ids["foo"]: {existing_dep}} ) for k, s in diff.caches.dependencies.items(): print(f"Package: {k}") for v in s: print(v.dependency_id) print(v.dependency_type_id) # Create formula with existing dependency plus a new one pkg = homebrew_formula( "foo", dependencies=["bar"], # existing dependency build_dependencies=["baz"], # new dependency ) # Execute new_deps, removed_deps = diff.diff_deps(pkg) for dep in new_deps: print( f"{dep.package_id} -> {dep.dependency_id} as {dep.dependency_type_id}" ) # Assert assert len(new_deps) == 1, "One new dependency should be added" assert len(removed_deps) == 0, "No dependencies should be removed" # Verify new dep is a build dep on baz new_dep = new_deps[0] assert new_dep.package_id == package_ids["foo"] assert new_dep.dependency_id == package_ids["baz"] assert new_dep.dependency_type_id == mock_config.dependency_types.build def test_existing_package_removing_dependency( self, packages, package_ids, diff_instance, homebrew_formula, mock_config ): """Test diff_deps when removing a dependency from an existing package.""" # Create existing dependencies existing_dep = LegacyDependency( id=uuid4(), package_id=package_ids["foo"], dependency_id=package_ids["bar"], dependency_type_id=mock_config.dependency_types.runtime, created_at=datetime.now(), updated_at=datetime.now(), ) to_be_removed_dep = LegacyDependency( id=uuid4(), package_id=package_ids["foo"], dependency_id=package_ids["baz"], dependency_type_id=mock_config.dependency_types.build, created_at=datetime.now(), updated_at=datetime.now(), ) # Create diff with both dependencies diff = diff_instance( package_map=packages, dependencies={package_ids["foo"]: {existing_dep, to_be_removed_dep}}, ) # Create formula with only one of the previous dependencies pkg = homebrew_formula( "foo", dependencies=["bar"], # only keep this dependency ) # Execute new_deps, removed_deps = diff.diff_deps(pkg) # Assert assert len(new_deps) == 0, "No new deps should be added" assert len(removed_deps) == 1, "One dep should be removed" # Verify removed dep is a build dep on baz removed_dep = removed_deps[0] assert removed_dep.package_id == package_ids["foo"] assert removed_dep.dependency_id == package_ids["baz"] assert removed_dep.dependency_type_id == mock_config.dependency_types.build def test_existing_package_changing_dependency_type( self, packages, package_ids, diff_instance, homebrew_formula, mock_config ): """ If the dependency types for a specific package to package relationship change, then Diff sees two changes: one removal and one addition. """ # Create existing runtime dependency existing_dep = LegacyDependency( id=uuid4(), package_id=package_ids["foo"], dependency_id=package_ids["bar"], dependency_type_id=mock_config.dependency_types.runtime, created_at=datetime.now(), updated_at=datetime.now(), ) # Create diff with existing dependency diff = diff_instance( package_map={"foo": packages["foo"], "bar": packages["bar"]}, dependencies={package_ids["foo"]: {existing_dep}}, ) # Create formula with same dependency but changed type pkg = homebrew_formula( "foo", build_dependencies=["bar"], # Changed from runtime to build ) # Execute new_deps, removed_deps = diff.diff_deps(pkg) # Assert assert len(new_deps) == 1, "One new dep should be added (new type)" assert len(removed_deps) == 1, "One dep should be removed (old type)" # Verify removed dep is runtime removed_dep = removed_deps[0] assert removed_dep.package_id == package_ids["foo"] assert removed_dep.dependency_id == package_ids["bar"] assert removed_dep.dependency_type_id == mock_config.dependency_types.runtime # Verify new dep is build new_dep = new_deps[0] assert new_dep.package_id == package_ids["foo"] assert new_dep.dependency_id == package_ids["bar"] assert new_dep.dependency_type_id == mock_config.dependency_types.build def test_existing_package_no_dependency_changes( self, packages, package_ids, diff_instance, homebrew_formula, mock_config ): """ Test a case where there's no changes to be made, because the database and Homebrew's JSON response indicate the same data. """ # Create existing dependency existing_dep = LegacyDependency( id=uuid4(), package_id=package_ids["foo"], dependency_id=package_ids["bar"], dependency_type_id=mock_config.dependency_types.runtime, created_at=datetime.now(), updated_at=datetime.now(), ) # Create diff with existing dependency diff = diff_instance( package_map={"foo": packages["foo"], "bar": packages["bar"]}, dependencies={package_ids["foo"]: {existing_dep}}, ) # Create formula with same dependency and type pkg = homebrew_formula( "foo", dependencies=["bar"], # same dependency with same type ) # Execute new_deps, removed_deps = diff.diff_deps(pkg) # Assert assert len(new_deps) == 0, "No new deps should be added" assert len(removed_deps) == 0, "No deps should be removed" def test_existing_package_same_dependency_multiple_times_no_changes( self, packages, package_ids, diff_instance, homebrew_formula, mock_config ): """ The case here is that the formula specifies a runtime and build dependency, and the db already captured the runtime dependency. Since the Diff class has a hierarchy of which dependency to choose, and runtime is on top, we should see no changes. """ # Create existing runtime dependency existing_dep = LegacyDependency( id=uuid4(), package_id=package_ids["foo"], dependency_id=package_ids["bar"], dependency_type_id=mock_config.dependency_types.runtime, created_at=datetime.now(), updated_at=datetime.now(), ) # Create diff with existing dependency diff = diff_instance( package_map={"foo": packages["foo"], "bar": packages["bar"]}, dependencies={package_ids["foo"]: {existing_dep}}, ) # Create formula with same dependency multiple times pkg = homebrew_formula("foo", dependencies=["bar"], build_dependencies=["bar"]) # Execute new_deps, removed_deps = diff.diff_deps(pkg) # Assert # Since runtime is encountered first and that's in the DB/cache, # we should see no new dependencies assert len(new_deps) == 0, "No new deps should be added" assert len(removed_deps) == 0, "No deps should be removed" def test_existing_package_same_dependency_multiple_times_yes_changes( self, packages, package_ids, diff_instance, homebrew_formula, mock_config ): """ In this case, suppose the DB maintained a build relationship between foo and bar and actually there is a runtime and build dependency according to Homebrew. Here CHAI updates this record to a runtime dependency. """ # Create existing build dependency existing_dep = LegacyDependency( id=uuid4(), package_id=package_ids["foo"], dependency_id=package_ids["bar"], dependency_type_id=mock_config.dependency_types.build, created_at=datetime.now(), updated_at=datetime.now(), ) # Create diff with existing dependency diff = diff_instance( package_map={"foo": packages["foo"], "bar": packages["bar"]}, dependencies={package_ids["foo"]: {existing_dep}}, ) # Create formula with same dependency multiple times pkg = homebrew_formula( "foo", dependencies=["bar"], # runtime has higher priority build_dependencies=["bar"], ) # Execute new_deps, removed_deps = diff.diff_deps(pkg) # Assert assert len(new_deps) == 1, "One new dependency should be added" assert new_deps[0].dependency_type_id == mock_config.dependency_types.runtime, ( "The new dependency should be runtime" ) assert len(removed_deps) == 1, "The build dependency should be removed" assert ( removed_deps[0].dependency_type_id == mock_config.dependency_types.build ), "The removed dependency should be build" ================================================ FILE: tests/package_managers/pkgx/test_pkgx_diff.py ================================================ #!/usr/bin/env pkgx uv run from unittest.mock import patch from uuid import uuid4 from core.models import URL, LegacyDependency, Package, PackageURL from core.structs import Cache, URLKey from package_managers.pkgx.diff import PkgxDiff from package_managers.pkgx.parser import ( Dependency, DependencyBlock, Distributable, PkgxPackage, Version, ) def create_pkgx_package( distributables: list[str] | None = None, dependencies: list[str] | None = None, build_deps: list[str] | None = None, test_deps: list[str] | None = None, ) -> PkgxPackage: """Helper to create PkgxPackage instances for testing""" # Create distributable blocks distributable_blocks = [] if distributables: for url in distributables: distributable_blocks.append(Distributable(url=url)) # Create dependency objects dep_objects = [ DependencyBlock( platform="all", dependencies=[ Dependency(name=dep, semver="*") for dep in (dependencies or []) ], ) ] build_dep_objects = [ DependencyBlock( platform="all", dependencies=[ Dependency(name=dep, semver="*") for dep in (build_deps or []) ], ) ] test_dep_objects = [ DependencyBlock( platform="all", dependencies=[ Dependency(name=dep, semver="*") for dep in (test_deps or []) ], ) ] # Create version object version = Version() return PkgxPackage( distributable=distributable_blocks, versions=version, dependencies=dep_objects, build=DependencyBlock(platform="linux", dependencies=build_dep_objects), test=DependencyBlock(platform="linux", dependencies=test_dep_objects), ) class TestPkgxDifferentialLoading: """Test cases for pkgx differential loading scenarios""" def test_package_exists_url_update(self, mock_config, mock_logger, mock_db): """Test scenario 2: Package existed in database and needed a URL update""" # Setup existing package and URL existing_pkg_id = uuid4() existing_url_id = uuid4() existing_package_url_id = uuid4() existing_package = Package( id=existing_pkg_id, derived_id="pkgx/url-pkg", name="url-pkg", package_manager_id=mock_config.pm_config.pm_id, import_id="url-pkg", readme="Test package", ) existing_url = URL( id=existing_url_id, url="https://old-source.com/file.tar.gz", url_type_id=mock_config.url_types.source, ) existing_package_url = PackageURL( id=existing_package_url_id, package_id=existing_pkg_id, url_id=existing_url_id, ) # Create cache cache = Cache( package_map={"url-pkg": existing_package}, url_map={ URLKey( "https://old-source.com/file.tar.gz", mock_config.url_types.source ): existing_url }, package_urls={existing_pkg_id: {existing_package_url}}, dependencies={}, ) # Create package data with new URL new_pkg_data = create_pkgx_package( distributables=["https://new-source.com/file.tar.gz"], ) new_generated_urls = [ URLKey("https://new-source.com/file.tar.gz", mock_config.url_types.source) ] new_urls = {} # this tracks all the new URLs we've created so far - # let it be empty for this test # Test the diff diff = PkgxDiff(mock_config, cache, mock_db, mock_logger) # Mock the URL retrieval step with ( patch( "package_managers.pkgx.diff.generate_chai_urls", return_value=new_generated_urls, ), ): resolved_urls = diff.diff_url("url-pkg", new_pkg_data, new_urls) new_links, _ = diff.diff_pkg_url(existing_pkg_id, resolved_urls) # Assertions assert len(new_links) == 1 # New URL should be created new_link = new_links[0] assert new_link.package_id == existing_pkg_id # The URL should be created in new_urls dict and the link should reference it assert len(new_urls) == 1 # One new URL should be created new_url_key = next(iter(new_urls.keys())) new_url = new_urls[new_url_key] assert new_link.url_id == new_url.id # Link should reference the new URL assert new_url_key.url == "https://new-source.com/file.tar.gz" assert new_url_key.url_type_id == mock_config.url_types.source def test_package_exists_dependency_change(self, mock_config, mock_logger, mock_db): """Test scenario 3: Package existed in database and changed its dependencies""" # Setup existing package and dependencies existing_pkg_id = uuid4() dep1_id = uuid4() dep2_id = uuid4() dep3_id = uuid4() existing_package = Package( id=existing_pkg_id, derived_id="pkgx/dep-pkg", name="dep-pkg", package_manager_id=mock_config.pm_config.pm_id, import_id="dep-pkg", readme="", ) # Create dependency packages dep1_pkg = Package( id=dep1_id, derived_id="pkgx/dep1", name="dep1", import_id="dep1" ) dep2_pkg = Package( id=dep2_id, derived_id="pkgx/dep2", name="dep2", import_id="dep2" ) dep3_pkg = Package( id=dep3_id, derived_id="pkgx/dep3", name="dep3", import_id="dep3" ) # Create existing dependencies (dep1 as runtime, dep2 as build) existing_dep1 = LegacyDependency( package_id=existing_pkg_id, dependency_id=dep1_id, dependency_type_id=mock_config.dependency_types.runtime, ) existing_dep2 = LegacyDependency( package_id=existing_pkg_id, dependency_id=dep2_id, dependency_type_id=mock_config.dependency_types.build, ) # Create cache cache = Cache( package_map={ "dep-pkg": existing_package, "dep1": dep1_pkg, "dep2": dep2_pkg, "dep3": dep3_pkg, }, url_map={}, package_urls={}, dependencies={existing_pkg_id: {existing_dep1, existing_dep2}}, ) # Create new package data with changed dependencies # Remove dep2, keep dep1, add dep3 as runtime new_pkg_data = create_pkgx_package( dependencies=["dep1", "dep3"], # runtime deps build_deps=[], # no build deps (removes dep2) ) # Test the diff diff = PkgxDiff(mock_config, cache, mock_db, mock_logger) new_deps, removed_deps = diff.diff_deps("dep-pkg", new_pkg_data) # Assertions assert len(new_deps) == 1 # dep3 should be added assert new_deps[0].dependency_id == dep3_id assert new_deps[0].dependency_type_id == mock_config.dependency_types.runtime assert len(removed_deps) == 1 # dep2 should be removed assert removed_deps[0].dependency_id == dep2_id assert removed_deps[0].dependency_type_id == mock_config.dependency_types.build def test_completely_new_package(self, mock_config, mock_logger, mock_db): """Test scenario 4: Package was completely new to the database""" # Create empty cache (no existing packages) cache = Cache(package_map={}, url_map={}, package_urls={}, dependencies={}) # Create new package data new_pkg_data = create_pkgx_package( distributables=["https://github.com/example/new-pkg/archive/v1.0.tar.gz"], dependencies=["some-dep"], build_deps=["build-tool"], ) # Test the diff diff = PkgxDiff(mock_config, cache, mock_db, mock_logger) pkg_id, pkg_obj, update_payload = diff.diff_pkg("new-pkg", new_pkg_data) # Assertions assert pkg_obj is not None # New package should be created assert pkg_obj.derived_id == "pkgx/new-pkg" assert pkg_obj.name == "new-pkg" assert pkg_obj.import_id == "new-pkg" assert pkg_obj.package_manager_id == mock_config.pm_config.pm_id assert update_payload == {} # No updates for new package # Test URL creation new_urls = {} # Mock generate_chai_urls to return predictable URLs mock_urls = [ URLKey( "https://github.com/example/new-pkg", mock_config.url_types.homepage ), URLKey( "https://github.com/example/new-pkg/archive/v1.0.tar.gz", mock_config.url_types.source, ), ] with patch( "package_managers.pkgx.diff.generate_chai_urls", return_value=mock_urls ): resolved_urls = diff.diff_url("new-pkg", new_pkg_data, new_urls) new_links, updated_links = diff.diff_pkg_url(pkg_id, resolved_urls) # Should create URLs for homepage, source, and repository (GitHub) assert len(new_urls) >= 2 # At least source and homepage assert len(new_links) >= 2 # At least source and homepage links assert len(updated_links) == 0 # No existing links to update def test_no_changes_scenario(self, mock_config, mock_logger, mock_db): """Test scenario where package exists but has no changes""" # Setup existing package existing_pkg_id = uuid4() existing_package = Package( id=existing_pkg_id, derived_id="pkgx/unchanged-pkg", name="unchanged-pkg", package_manager_id=mock_config.pm_config.pm_id, import_id="unchanged-pkg", readme="Unchanged description", ) cache = Cache( package_map={"unchanged-pkg": existing_package}, url_map={}, package_urls={}, dependencies={}, ) # Create package data with same description pkg_data = create_pkgx_package() # Test the diff diff = PkgxDiff(mock_config, cache, mock_db, mock_logger) pkg_id, pkg_obj, update_payload = diff.diff_pkg("unchanged-pkg", pkg_data) # Assertions assert pkg_id == existing_pkg_id assert pkg_obj is None # No new package assert update_payload is None # No changes def test_missing_dependency_handling(self, mock_config, mock_logger, mock_db): """Test how missing dependencies are handled""" existing_pkg_id = uuid4() existing_package = Package( id=existing_pkg_id, derived_id="pkgx/missing-dep-pkg", name="missing-dep-pkg", import_id="missing-dep-pkg", ) cache = Cache( package_map={"missing-dep-pkg": existing_package}, url_map={}, package_urls={}, dependencies={}, ) # Create package with dependency that doesn't exist in cache pkg_data = create_pkgx_package(dependencies=["non-existent-dep"]) diff = PkgxDiff(mock_config, cache, mock_db, mock_logger) new_deps, removed_deps = diff.diff_deps("missing-dep-pkg", pkg_data) # Should handle gracefully - no deps added for missing packages assert len(new_deps) == 0 assert len(removed_deps) == 0 def test_dependency_type_priority_no_change( self, mock_config, mock_logger, mock_db ): """Test case 1: p1 has runtime dependency to p2 in cache, p1 depends on p2 as both runtime and build in parsed data. Expect no change (runtime has priority).""" # Setup existing package and dependencies p1_id = uuid4() p2_id = uuid4() p1_pkg = Package(id=p1_id, derived_id="pkgx/p1", name="p1", import_id="p1") p2_pkg = Package(id=p2_id, derived_id="pkgx/p2", name="p2", import_id="p2") # Existing runtime dependency in cache existing_runtime_dep = LegacyDependency( package_id=p1_id, dependency_id=p2_id, dependency_type_id=mock_config.dependency_types.runtime, ) cache = Cache( package_map={"p1": p1_pkg, "p2": p2_pkg}, url_map={}, package_urls={}, dependencies={p1_id: {existing_runtime_dep}}, ) # Parsed data has p2 as both runtime and build dependency new_pkg_data = create_pkgx_package( dependencies=["p2"], # runtime build_deps=["p2"], # build ) diff = PkgxDiff(mock_config, cache, mock_db, mock_logger) new_deps, removed_deps = diff.diff_deps("p1", new_pkg_data) # Should have no changes - runtime priority means no change needed assert len(new_deps) == 0 assert len(removed_deps) == 0 def test_dependency_type_change_runtime_to_build( self, mock_config, mock_logger, mock_db ): """Test case 2: p1 has runtime dependency to p2 in cache, p1 has build dependency to p2 in parsed data. Expect removed runtime dependency and new build dependency.""" p1_id = uuid4() p2_id = uuid4() p1_pkg = Package(id=p1_id, derived_id="pkgx/p1", name="p1", import_id="p1") p2_pkg = Package(id=p2_id, derived_id="pkgx/p2", name="p2", import_id="p2") # Existing runtime dependency existing_runtime_dep = LegacyDependency( package_id=p1_id, dependency_id=p2_id, dependency_type_id=mock_config.dependency_types.runtime, ) cache = Cache( package_map={"p1": p1_pkg, "p2": p2_pkg}, url_map={}, package_urls={}, dependencies={p1_id: {existing_runtime_dep}}, ) # Parsed data only has build dependency new_pkg_data = create_pkgx_package( dependencies=[], # no runtime deps build_deps=["p2"], # only build ) diff = PkgxDiff(mock_config, cache, mock_db, mock_logger) new_deps, removed_deps = diff.diff_deps("p1", new_pkg_data) # Should remove runtime and add build assert len(removed_deps) == 1 assert removed_deps[0].dependency_id == p2_id assert ( removed_deps[0].dependency_type_id == mock_config.dependency_types.runtime ) assert len(new_deps) == 1 assert new_deps[0].dependency_id == p2_id assert new_deps[0].dependency_type_id == mock_config.dependency_types.build def test_dependency_type_change_build_to_runtime( self, mock_config, mock_logger, mock_db ): """Test case 3: p1 has build dependency to p2 in cache, p1 has runtime dependency to p2 in parsed data. Expect removed build dependency and new runtime dependency.""" p1_id = uuid4() p2_id = uuid4() p1_pkg = Package(id=p1_id, derived_id="pkgx/p1", name="p1", import_id="p1") p2_pkg = Package(id=p2_id, derived_id="pkgx/p2", name="p2", import_id="p2") # Existing build dependency existing_build_dep = LegacyDependency( package_id=p1_id, dependency_id=p2_id, dependency_type_id=mock_config.dependency_types.build, ) cache = Cache( package_map={"p1": p1_pkg, "p2": p2_pkg}, url_map={}, package_urls={}, dependencies={p1_id: {existing_build_dep}}, ) # Parsed data only has runtime dependency new_pkg_data = create_pkgx_package( dependencies=["p2"], # runtime build_deps=[], # no build deps ) diff = PkgxDiff(mock_config, cache, mock_db, mock_logger) new_deps, removed_deps = diff.diff_deps("p1", new_pkg_data) # Should remove build and add runtime assert len(removed_deps) == 1 assert removed_deps[0].dependency_id == p2_id assert removed_deps[0].dependency_type_id == mock_config.dependency_types.build assert len(new_deps) == 1 assert new_deps[0].dependency_id == p2_id assert new_deps[0].dependency_type_id == mock_config.dependency_types.runtime def test_dependency_type_priority_new_package( self, mock_config, mock_logger, mock_db ): """Test case 4: p1 has no dependencies to p2 in cache, p1 has both runtime and build dependencies to p2 in parsed data. Expect one new runtime dependency (priority over build).""" p1_id = uuid4() p2_id = uuid4() p1_pkg = Package(id=p1_id, derived_id="pkgx/p1", name="p1", import_id="p1") p2_pkg = Package(id=p2_id, derived_id="pkgx/p2", name="p2", import_id="p2") cache = Cache( package_map={"p1": p1_pkg, "p2": p2_pkg}, url_map={}, package_urls={}, dependencies={}, # No existing dependencies ) # Parsed data has both runtime and build dependencies to p2 new_pkg_data = create_pkgx_package( dependencies=["p2"], # runtime build_deps=["p2"], # build ) diff = PkgxDiff(mock_config, cache, mock_db, mock_logger) new_deps, removed_deps = diff.diff_deps("p1", new_pkg_data) # Should only create one new dependency - runtime (higher priority) assert len(removed_deps) == 0 assert len(new_deps) == 1 assert new_deps[0].dependency_id == p2_id assert new_deps[0].dependency_type_id == mock_config.dependency_types.runtime def test_dependency_type_priority_with_test( self, mock_config, mock_logger, mock_db ): """Test priority handling with test dependencies: Runtime > Build > Test""" p1_id = uuid4() p2_id = uuid4() p3_id = uuid4() p4_id = uuid4() p1_pkg = Package(id=p1_id, derived_id="pkgx/p1", name="p1", import_id="p1") p2_pkg = Package(id=p2_id, derived_id="pkgx/p2", name="p2", import_id="p2") p3_pkg = Package(id=p3_id, derived_id="pkgx/p3", name="p3", import_id="p3") p4_pkg = Package(id=p4_id, derived_id="pkgx/p4", name="p4", import_id="p4") cache = Cache( package_map={"p1": p1_pkg, "p2": p2_pkg, "p3": p3_pkg, "p4": p4_pkg}, url_map={}, package_urls={}, dependencies={}, ) # Parsed data with overlapping dependencies across different types new_pkg_data = create_pkgx_package( dependencies=["p2", "p3"], # runtime: p2, p3 build_deps=["p2", "p4"], # build: p2, p4 test_deps=["p2", "p3", "p4"], # test: p2, p3, p4 ) diff = PkgxDiff(mock_config, cache, mock_db, mock_logger) new_deps, removed_deps = diff.diff_deps("p1", new_pkg_data) # Should create dependencies based on priority: # p2: runtime (highest priority among runtime/build/test) # p3: runtime (highest priority among runtime/test) # p4: build (highest priority among build/test) assert len(removed_deps) == 0 assert len(new_deps) == 3 # Sort by dependency_id for consistent testing new_deps_sorted = sorted(new_deps, key=lambda d: str(d.dependency_id)) # p2 should be runtime (highest priority) p2_dep = next(d for d in new_deps_sorted if d.dependency_id == p2_id) assert p2_dep.dependency_type_id == mock_config.dependency_types.runtime # p3 should be runtime (highest priority) p3_dep = next(d for d in new_deps_sorted if d.dependency_id == p3_id) assert p3_dep.dependency_type_id == mock_config.dependency_types.runtime # p4 should be build (highest available priority) p4_dep = next(d for d in new_deps_sorted if d.dependency_id == p4_id) assert p4_dep.dependency_type_id == mock_config.dependency_types.build ================================================ FILE: tests/package_managers/pkgx/test_special_case.py ================================================ """ Test special case URL handling in PkgxTransformer. This module tests the special_case method which handles URL transformations for specific package sources like crates.io, x.org, and pkgx.sh. """ import pytest from package_managers.pkgx.url import special_case @pytest.mark.transformer class TestSpecialCase: """Test special case URL transformations.""" def test_special_case_crates_io(self, mock_logger): """Test that crates.io URLs are properly transformed.""" assert ( special_case("crates.io/pkgx", mock_logger) == "https://crates.io/crates/pkgx" ) def test_special_case_x_org(self, mock_logger): """Test that x.org URLs are normalized.""" assert special_case("x.org/ice", mock_logger) == "https://x.org" assert special_case("x.org/xxf86vm", mock_logger) == "https://x.org" def test_special_case_pkgx_sh(self, mock_logger): """Test that pkgx.sh URLs are redirected to GitHub.""" assert ( special_case("pkgx.sh/pkgx", mock_logger) == "https://github.com/pkgxdev/pkgx" ) def test_special_case_no_slashes(self, mock_logger): """Test that URLs without slashes are returned as-is.""" assert special_case("abseil.io", mock_logger) == "abseil.io" def test_special_case_double_slashes(self, mock_logger): """Test that URLs with double slashes are returned as-is.""" assert ( special_case("github.com/awslabs/llrt", mock_logger) == "github.com/awslabs/llrt" ) ================================================ FILE: tests/ranker/test_compute_canon_name.py ================================================ #!/usr/bin/env uv run --with pytest import pytest from ranker.naming import ( check_if_better, compute_canon_name, extract_repo_name_from_url, score_name, ) @pytest.mark.parametrize( "url, best_guess", [ ("github.com/user/repo", "repo"), ("gitlab.com/user/repo", "repo"), ("bitbucket.org/user/repo", "repo"), ("not-a-valid-url", "not-a-valid-url"), ("", ""), ], ) def test_extract_repo_name_from_url(url, best_guess): assert extract_repo_name_from_url(url) == best_guess @pytest.mark.parametrize( "name, best_guess, expected_score", [ ("@user/repo", "repo", 3), ("test3js", "web3.js", 8), ("web3", "web3.js", 11), ("@platonenterprise/web3", "web3.js", -3), ("eleventy-plugin-embed-everything", "embed-everything", 1), ("eleventy-plugin-embed-ted", "embed-everything", 0), ], ) def test_score_name(name, best_guess, expected_score): assert score_name(name, best_guess) == expected_score @pytest.mark.parametrize( "name, best_guess, package_name, expected", [ ( "web3.js", "test3js", "https://github.com/ethereum/web3.js#readmeweb3.js", "test3js", ), ("web3.js", "web3", "test3js", "web3"), ("web3.js", "@platonenterprise/web3", "web3", "web3"), ], ) def test_check_if_better(name, best_guess, package_name, expected): assert check_if_better(name, best_guess, package_name) == expected @pytest.mark.parametrize( "url, package_name, existing_name, expected", [ # new canon, we should always have the package_name ("github.com/user/repo", "repo", "", "repo"), ( "github.com/user/repo", "@scoped/random-name-123", "@scoped/random-name-123", "@scoped/random-name-123", ), ( "gfscott.com/embed-everything", "eleventy-plugin-embed-everything", "gfscott.com/embed-everything", "eleventy-plugin-embed-everything", ), ( "gfscott.com/embed-everything", "eleventy-plugin-embed-ted", "eleventy-plugin-embed-everything", "eleventy-plugin-embed-everything", ), ], ) def test_compute_canon_name(url, package_name, existing_name, expected): assert compute_canon_name(url, package_name, existing_name) == expected ================================================ FILE: tests/ranker/test_dedupe.py ================================================ """ Test the package deduplication functionality in the ranker. This module tests the dedupe.main function which handles deduplication of packages based on their homepage URLs, creating and managing canonical package representations. """ from datetime import datetime from unittest.mock import MagicMock, patch from uuid import uuid4 import pytest from core.models import URL, Canon, Package from ranker.config import DedupeConfig from ranker.dedupe import DedupeDB, main @pytest.fixture def ids(): """Fixture providing consistent IDs for testing.""" return { "homepage_url_type": uuid4(), "package_manager": uuid4(), "pkg1": uuid4(), "pkg2": uuid4(), "pkg3": uuid4(), "canon1": uuid4(), "canon2": uuid4(), "canon3": uuid4(), "url1": uuid4(), "url2": uuid4(), "url3": uuid4(), } @pytest.fixture def test_packages(ids): """Fixture providing test package objects.""" return { "package1": Package( id=ids["pkg1"], name="package1", package_manager_id=ids["package_manager"], import_id="pkg1", derived_id="npm/package1", created_at=datetime.now(), updated_at=datetime.now(), ), "package2": Package( id=ids["pkg2"], name="package2", package_manager_id=ids["package_manager"], import_id="pkg2", derived_id="npm/package2", created_at=datetime.now(), updated_at=datetime.now(), ), "package3": Package( id=ids["pkg3"], name="package3", package_manager_id=ids["package_manager"], import_id="pkg3", derived_id="npm/package3", created_at=datetime.now(), updated_at=datetime.now(), ), } @pytest.fixture def test_urls(ids): """Fixture providing test URL objects.""" canonical_url = "github.com/example/repo" non_canonical_url = "https://github.com/example/repo" different_url = "https://gitlab.com/example/repo" return { "canonical": URL( id=ids["url1"], url=canonical_url, url_type_id=ids["homepage_url_type"], created_at=datetime.now(), updated_at=datetime.now(), ), "non_canonical": URL( id=ids["url2"], url=non_canonical_url, url_type_id=ids["homepage_url_type"], created_at=datetime.now(), updated_at=datetime.now(), ), "different": URL( id=ids["url3"], url=different_url, url_type_id=ids["homepage_url_type"], created_at=datetime.now(), updated_at=datetime.now(), ), } @pytest.fixture def mock_dedupe_config(ids): """Fixture providing mock DedupeConfig.""" config = MagicMock(spec=DedupeConfig) config.load = True config.homepage_url_type_id = ids["homepage_url_type"] return config @pytest.fixture def mock_db(): """Fixture providing mock DedupeDB.""" return MagicMock(spec=DedupeDB) def capture_ingest_calls(mock_db): """Helper function to capture arguments passed to db.ingest.""" ingest_calls = [] def capture_ingest( new_canons, updated_canons, new_canon_packages, updated_canon_packages ): ingest_calls.append( (new_canons, updated_canons, new_canon_packages, updated_canon_packages) ) mock_db.ingest.side_effect = capture_ingest return ingest_calls @pytest.mark.ranker class TestDedupe: """Test the deduplication of packages - focused on different cases.""" def test_new_canon_new_mapping( self, ids, test_packages, test_urls, mock_dedupe_config, mock_db ): """ Test: URL has no canon AND package has no existing mapping Expected: Create new canon + create new mapping """ # Arrange package = test_packages["package1"] homepage_url = test_urls["canonical"] # Current state: no canons exist for this URL, no package mapping exists mock_db.get_current_canons.return_value = {} # URL has no canon mock_db.get_current_canon_packages.return_value = {} # Package has no mapping mock_db.get_packages_with_homepages.return_value = [(package, homepage_url)] mock_db.get_all_package_names.return_value = {ids["pkg1"]: "package1"} ingest_calls = capture_ingest_calls(mock_db) # Act with patch.dict("os.environ", {"LOAD": "true", "TEST": "false"}): main(mock_dedupe_config, mock_db) # Assert assert len(ingest_calls) == 1, "Should call ingest exactly once" new_canons, updated_canons, new_canon_packages, updated_canon_packages = ( ingest_calls[0] ) # Verify canon creation assert len(new_canons) == 1, "Should create exactly one new canon" assert len(updated_canons) == 0, "Should not update any canons" assert len(new_canon_packages) == 1, "Should create exactly one new mapping" assert len(updated_canon_packages) == 0, "Should not update any mappings" created_canon = new_canons[0] assert ( created_canon.url_id == ids["url1"] ), "Canon should reference correct URL ID" assert created_canon.name == "package1", "Canon name should be the package name" # Verify mapping creation created_mapping = new_canon_packages[0] assert created_mapping.package_id == ids["pkg1"], "Should map correct package" assert created_mapping.canon_id == created_canon.id, "Should map to new canon" def test_new_canon_update_mapping( self, ids, test_packages, test_urls, mock_dedupe_config, mock_db ): """ Test: URL has no canon AND package has existing mapping to different canon Expected: Create new canon + update existing mapping """ # Arrange package = test_packages["package1"] homepage_url = test_urls["canonical"] # Create existing canon for different URL existing_canon = Canon( id=ids["canon2"], url_id=ids["url2"], # Different URL name="old-canon", created_at=datetime.now(), updated_at=datetime.now(), ) # Current state: no canon for this URL, but package is mapped to different canon mock_db.get_current_canons.return_value = {ids["url2"]: existing_canon} mock_db.get_current_canon_packages.return_value = { ids["pkg1"]: {"id": uuid4(), "canon_id": existing_canon.id} } mock_db.get_packages_with_homepages.return_value = [(package, homepage_url)] mock_db.get_all_package_names.return_value = {ids["pkg1"]: "package1"} ingest_calls = capture_ingest_calls(mock_db) # Act with patch.dict("os.environ", {"LOAD": "true", "TEST": "false"}): main(mock_dedupe_config, mock_db) # Assert assert len(ingest_calls) == 1, "Should call ingest exactly once" new_canons, updated_canons, new_canon_packages, updated_canon_packages = ( ingest_calls[0] ) # Verify canon creation assert len(new_canons) == 1, "Should create exactly one new canon" assert len(updated_canons) == 0, "Should not update any canons" assert len(new_canon_packages) == 0, "Should not create new mappings" assert len(updated_canon_packages) == 1, "Should update exactly one mapping" created_canon = new_canons[0] assert ( created_canon.url_id == ids["url1"] ), "Canon should reference correct URL ID" assert created_canon.name == "package1", "Canon name should be the package name" # Verify mapping update (should point to NEW canon, not old one) updated_mapping = updated_canon_packages[0] assert "id" in updated_mapping, "Update should include canon package ID" assert ( updated_mapping["canon_id"] == created_canon.id ), "Should update to NEW canon" assert ( updated_mapping["canon_id"] != ids["canon2"] ), "Should NOT point to old canon" assert "updated_at" in updated_mapping, "Update should include timestamp" def test_no_changes_needed( self, ids, test_packages, test_urls, mock_dedupe_config, mock_db ): """ Test: URL has canon AND package already linked to that canon Expected: Do nothing (no changes) """ # Arrange package = test_packages["package1"] homepage_url = test_urls["canonical"] existing_canon = Canon( id=ids["canon1"], url_id=ids["url1"], name="existing-canon", created_at=datetime.now(), updated_at=datetime.now(), ) # Current state: URL has canon, package linked to that same canon mock_db.get_current_canons.return_value = {ids["url1"]: existing_canon} mock_db.get_current_canon_packages.return_value = { ids["pkg1"]: {"id": uuid4(), "canon_id": ids["canon1"]} } mock_db.get_packages_with_homepages.return_value = [(package, homepage_url)] mock_db.get_all_package_names.return_value = {ids["pkg1"]: "existing-canon"} ingest_calls = capture_ingest_calls(mock_db) # Act with patch.dict("os.environ", {"LOAD": "true", "TEST": "false"}): main(mock_dedupe_config, mock_db) # Assert - should call ingest with empty lists (no changes) assert len(ingest_calls) == 1, "Should call ingest exactly once" new_canons, updated_canons, new_canon_packages, updated_canon_packages = ( ingest_calls[0] ) assert len(new_canons) == 0, "Should not create any canons" assert len(updated_canons) == 0, "Should not update any canons" assert len(new_canon_packages) == 0, "Should not create any mappings" assert len(updated_canon_packages) == 0, "Should not update any mappings" def test_update_existing_mapping( self, ids, test_packages, test_urls, mock_dedupe_config, mock_db ): """ Test: URL has canon AND package linked to different canon Expected: Update mapping to correct canon """ # Arrange package = test_packages["package1"] homepage_url = test_urls["canonical"] correct_canon = Canon( id=ids["canon1"], url_id=ids["url1"], # This URL's canon name="correct-name", created_at=datetime.now(), updated_at=datetime.now(), ) wrong_canon = Canon( id=ids["canon2"], url_id=ids["url2"], # Different URL's canon name="correct-name", created_at=datetime.now(), updated_at=datetime.now(), ) # Current state: URL has canon, but package linked to wrong canon mock_db.get_current_canons.return_value = { ids["url1"]: correct_canon, ids["url2"]: wrong_canon, } mock_db.get_current_canon_packages.return_value = { ids["pkg1"]: { "id": uuid4(), "canon_id": ids["canon2"], } # Linked to wrong canon } mock_db.get_packages_with_homepages.return_value = [(package, homepage_url)] mock_db.get_all_package_names.return_value = {ids["pkg1"]: "correct-name"} ingest_calls = capture_ingest_calls(mock_db) # Act with patch.dict("os.environ", {"LOAD": "true", "TEST": "false"}): main(mock_dedupe_config, mock_db) # Assert assert len(ingest_calls) == 1, "Should call ingest exactly once" new_canons, updated_canons, new_canon_packages, updated_canon_packages = ( ingest_calls[0] ) # Should only update mapping, no new creations assert len(new_canons) == 0, "Should not create any canons" assert len(updated_canons) == 0, "Should not update any canons" assert len(new_canon_packages) == 0, "Should not create any new mappings" assert len(updated_canon_packages) == 1, "Should update exactly one mapping" # Verify mapping update points to correct canon updated_mapping = updated_canon_packages[0] assert "id" in updated_mapping, "Update should include canon package ID" assert ( updated_mapping["canon_id"] == ids["canon1"] ), "Should update to correct canon" assert ( updated_mapping["canon_id"] != ids["canon2"] ), "Should NOT point to wrong canon" assert "updated_at" in updated_mapping, "Update should include timestamp" def test_create_new_mapping( self, ids, test_packages, test_urls, mock_dedupe_config, mock_db ): """ Test: URL has canon AND package has no mapping Expected: Create new mapping to existing canon """ # Arrange package = test_packages["package1"] homepage_url = test_urls["canonical"] existing_canon = Canon( id=ids["canon1"], url_id=ids["url1"], name="existing-canon", created_at=datetime.now(), updated_at=datetime.now(), ) # Current state: URL has canon, but package has no mapping mock_db.get_current_canons.return_value = {ids["url1"]: existing_canon} mock_db.get_current_canon_packages.return_value = {} # Package not linked mock_db.get_packages_with_homepages.return_value = [(package, homepage_url)] mock_db.get_all_package_names.return_value = {ids["pkg1"]: "existing-canon"} ingest_calls = capture_ingest_calls(mock_db) # Act with patch.dict("os.environ", {"LOAD": "true", "TEST": "false"}): main(mock_dedupe_config, mock_db) # Assert assert len(ingest_calls) == 1, "Should call ingest exactly once" new_canons, updated_canons, new_canon_packages, updated_canon_packages = ( ingest_calls[0] ) # Should only create new mapping, no updates or new canons assert len(new_canons) == 0, "Should not create any canons" assert len(updated_canons) == 0, "Should not update any canons" assert len(new_canon_packages) == 1, "Should create exactly one new mapping" assert len(updated_canon_packages) == 0, "Should not update any mappings" # Verify mapping creation points to existing canon created_mapping = new_canon_packages[0] assert created_mapping.package_id == ids["pkg1"], "Should map correct package" assert created_mapping.canon_id == ids["canon1"], "Should map to existing canon" def test_multiple_packages_same_homepage_creates_single_canon( self, ids, test_packages, test_urls, mock_dedupe_config, mock_db ): """ Test deduplication: Multiple packages with same homepage URL should create only one canon This tests the core deduplication logic where: - Package 1 points to URL X (no existing canon) - Package 2 also points to URL X - Should create only ONE canon for URL X - Both packages should be linked to the same canon """ # Arrange package1 = test_packages["package1"] package2 = test_packages["package2"] shared_homepage_url = test_urls["canonical"] # Current state: no canons exist for this URL, no package mappings exist mock_db.get_current_canons.return_value = {} # URL has no canon mock_db.get_current_canon_packages.return_value = {} # No mappings mock_db.get_packages_with_homepages.return_value = [ (package1, shared_homepage_url), # Both packages point to same URL (package2, shared_homepage_url), ] mock_db.get_all_package_names.return_value = { ids["pkg1"]: "package1", ids["pkg2"]: "package2", } ingest_calls = capture_ingest_calls(mock_db) # Act with patch.dict("os.environ", {"LOAD": "true", "TEST": "false"}): main(mock_dedupe_config, mock_db) # Assert assert len(ingest_calls) == 1, "Should call ingest exactly once" new_canons, updated_canons, new_canon_packages, updated_canon_packages = ( ingest_calls[0] ) # Should create only ONE canon for the shared URL assert len(new_canons) == 1, "Should create exactly one canon for shared URL" assert len(updated_canons) == 0, "Should not update any canons" assert len(new_canon_packages) == 2, "Should create mappings for both packages" assert len(updated_canon_packages) == 0, "Should not update any mappings" # Verify single canon creation created_canon = new_canons[0] assert created_canon.url_id == ids["url1"], "Canon should reference shared URL" # Verify both packages map to the same canon canon_ids = {mapping.canon_id for mapping in new_canon_packages} assert len(canon_ids) == 1, "Both packages should map to same canon" assert ( canon_ids.pop() == created_canon.id ), "Both should map to the created canon" # Verify package IDs package_ids = {mapping.package_id for mapping in new_canon_packages} assert package_ids == {ids["pkg1"], ids["pkg2"]}, "Should map both packages" # Verify the name. Heuristics wouldn't rank "package2" higher than "package1" assert created_canon.name == "package1", "Canon name should be the package name" def test_empty_urls_no_deduplication( self, ids, test_packages, test_urls, mock_dedupe_config, mock_db ): """ Test that packages with empty URLs are not deduplicated with each other A lot of packages in CHAI have no URLs, and we should not deduplicate them with each other. This test case ensures that if two packages have no URLs, they do not get deduplicated. """ # Arrange package1 = test_packages["package1"] package2 = test_packages["package2"] empty_url1 = URL( id=ids["url1"], url="", url_type_id=ids["homepage_url_type"], created_at=datetime.now(), updated_at=datetime.now(), ) empty_url2 = URL( id=ids["url2"], url="", url_type_id=ids["homepage_url_type"], created_at=datetime.now(), updated_at=datetime.now(), ) # Current state: no canons exist for these URLs, no package mappings exist mock_db.get_current_canons.return_value = {} # No canons mock_db.get_current_canon_packages.return_value = {} # No mappings mock_db.get_packages_with_homepages.return_value = [ (package1, empty_url1), # Both packages have empty URLs (package2, empty_url2), ] ingest_calls = capture_ingest_calls(mock_db) # Act with patch.dict("os.environ", {"LOAD": "true", "TEST": "false"}): main(mock_dedupe_config, mock_db) # Assert assert len(ingest_calls) == 1, "Should call ingest exactly once" new_canons, updated_canons, new_canon_packages, updated_canon_packages = ( ingest_calls[0] ) # Should not create any canons or mappings for empty URLs assert len(new_canons) == 0, "Should not create any canons for empty URLs" assert len(updated_canons) == 0, "Should not update any canons" assert ( len(new_canon_packages) == 0 ), "Should not create any mappings for empty URLs" assert len(updated_canon_packages) == 0, "Should not update any mappings" def test_canon_name_update_when_url_changes( self, ids, test_packages, test_urls, mock_dedupe_config, mock_db ): """ Test that canon name gets updated when the canonical URL changes This tests canon update functionality when: - A canon exists with name "old-url" - The canonical URL for that canon changes to "new-url" - The canon name should be updated to match the new URL """ # Arrange package = test_packages["package1"] # URL with updated canonical form updated_url = URL( id=ids["url1"], url="github.com/example/new-repo", # Changed URL url_type_id=ids["homepage_url_type"], created_at=datetime.now(), updated_at=datetime.now(), ) # Existing canon with old name existing_canon = Canon( id=ids["canon1"], url_id=ids["url1"], name="github.com/example/old-repo", # Old name created_at=datetime.now(), updated_at=datetime.now(), ) # Current state: canon exists but name doesn't match current URL mock_db.get_current_canons.return_value = {ids["url1"]: existing_canon} mock_db.get_current_canon_packages.return_value = { ids["pkg1"]: {"id": uuid4(), "canon_id": ids["canon1"]} } mock_db.get_packages_with_homepages.return_value = [(package, updated_url)] mock_db.get_all_package_names.return_value = { ids["pkg1"]: "github.com/example/new-repo" } ingest_calls = capture_ingest_calls(mock_db) # Act with patch.dict("os.environ", {"LOAD": "true", "TEST": "false"}): main(mock_dedupe_config, mock_db) # Assert assert len(ingest_calls) == 1, "Should call ingest exactly once" new_canons, updated_canons, new_canon_packages, updated_canon_packages = ( ingest_calls[0] ) # Should update the existing canon name assert len(new_canons) == 0, "Should not create any new canons" assert len(updated_canons) == 1, "Should update exactly one canon" assert len(new_canon_packages) == 0, "Should not create any new mappings" assert len(updated_canon_packages) == 0, "Should not update any mappings" # Verify canon update updated_canon = updated_canons[0] assert "id" in updated_canon, "Update should include canon ID" assert updated_canon["id"] == ids["canon1"], "Should update correct canon" assert ( updated_canon["name"] == "github.com/example/new-repo" ), "Should update name to new URL" assert "updated_at" in updated_canon, "Update should include timestamp" def test_canon_update_with_multiple_packages( self, ids, test_packages, test_urls, mock_dedupe_config, mock_db ): """ Test canon update when multiple packages share the same canon This ensures that when a canon's URL changes, the canon is updated but all existing package mappings remain intact. """ # Arrange package1 = test_packages["package1"] package2 = test_packages["package2"] # URL with updated canonical form updated_url = URL( id=ids["url1"], url="github.com/example/updated-repo", url_type_id=ids["homepage_url_type"], created_at=datetime.now(), updated_at=datetime.now(), ) # Existing canon with old name existing_canon = Canon( id=ids["canon1"], url_id=ids["url1"], name="github.com/example/old-repo", # Different from current URL created_at=datetime.now(), updated_at=datetime.now(), ) # Current state: both packages mapped to same canon mock_db.get_current_canons.return_value = {ids["url1"]: existing_canon} mock_db.get_current_canon_packages.return_value = { ids["pkg1"]: {"id": uuid4(), "canon_id": ids["canon1"]}, ids["pkg2"]: {"id": uuid4(), "canon_id": ids["canon1"]}, } mock_db.get_packages_with_homepages.return_value = [ (package1, updated_url), (package2, updated_url), ] mock_db.get_all_package_names.return_value = { ids["pkg1"]: "github.com/example/updated-repo", ids["pkg2"]: "github.com/example/updated-repo", } ingest_calls = capture_ingest_calls(mock_db) # Act with patch.dict("os.environ", {"LOAD": "true", "TEST": "false"}): main(mock_dedupe_config, mock_db) # Assert assert len(ingest_calls) == 1, "Should call ingest exactly once" new_canons, updated_canons, new_canon_packages, updated_canon_packages = ( ingest_calls[0] ) # Should only update the canon, no new mappings or updates to mappings assert len(new_canons) == 0, "Should not create any new canons" assert len(updated_canons) == 1, "Should update exactly one canon" assert len(new_canon_packages) == 0, "Should not create any new mappings" assert len(updated_canon_packages) == 0, "Should not update any mappings" # Verify canon update updated_canon = updated_canons[0] assert updated_canon["id"] == ids["canon1"], "Should update correct canon" assert ( updated_canon["name"] == "github.com/example/updated-repo" ), "Should update name to new URL" def test_skip_when_load_disabled(self, mock_dedupe_config, mock_db): """ Test that no processing occurs when load is disabled Expected: db.ingest should not be called """ # Arrange mock_dedupe_config.load = False # Act with patch.dict("os.environ", {"LOAD": "false", "TEST": "false"}): main(mock_dedupe_config, mock_db) # Assert mock_db.ingest.assert_not_called() ================================================ FILE: tests/ranker/test_rx_graph.py ================================================ """ Test the CHAI graph ranking algorithm. This module tests the rx_graph module which implements a custom graph-based ranking algorithm for packages. The tests focus on verifying that the distribute function conserves weight appropriately. """ import random import uuid from decimal import Decimal import pytest from ranker.rx_graph import CHAI, PackageNode # Constants for the test NUM_NODES = 100000 EDGE_PROBABILITY = 0.001 SPLIT_RATIO = Decimal("0.85") TOLERANCE = Decimal("1e-6") MAX_ITER = 10000000 @pytest.fixture def large_chai_graph() -> tuple[CHAI, dict[uuid.UUID, Decimal]]: """Creates a large CHAI graph with random edges and personalization.""" G = CHAI() nodes = [] initial_personalization_raw = {} # Create nodes for _i in range(NUM_NODES): canon_id = uuid.uuid4() node = PackageNode(canon_id=canon_id) node.index = G.add_node(node) nodes.append(node) # Assign random initial weight for personalization initial_personalization_raw[canon_id] = Decimal(random.random()) # Normalize personalization to sum to 1 total_weight = sum(initial_personalization_raw.values()) personalization = { uid: weight / total_weight for uid, weight in initial_personalization_raw.items() } assert ( abs(sum(personalization.values()) - Decimal("1.0")) <= TOLERANCE ), f"Initial personalization should sum to 1 within tolerance: {sum(personalization.values())}" # Add random edges (potential cycles) node_indices = list(G.node_indices()) for u_idx in node_indices: for v_idx in node_indices: if u_idx != v_idx and random.random() < EDGE_PROBABILITY: G.add_edge(u_idx, v_idx, None) # Edge data is not used in distribute return G, personalization ================================================ FILE: tests/scripts/upgrade_canons/test_analyze_packages_needing_canonicalization.py ================================================ #!/usr/bin/env pkgx uv run from unittest.mock import call, patch from uuid import UUID import pytest from scripts.upgrade_canons.main import analyze_packages_needing_canonicalization class TestAnalyzePackagesNeedingCanonicalization: """Test the analyze_packages_needing_canonicalization function""" def setup_method(self): """Setup test fixtures""" self.package_id_1 = UUID("11111111-1111-1111-1111-111111111111") self.package_id_2 = UUID("22222222-2222-2222-2222-222222222222") self.package_id_3 = UUID("33333333-3333-3333-3333-333333333333") self.package_id_4 = UUID("44444444-4444-4444-4444-444444444444") @patch("scripts.upgrade_canons.main.is_canonical_url") @patch("scripts.upgrade_canons.main.normalize_url") def test_case_1_should_create_canonical_url( self, mock_normalize, mock_is_canonical ): """ Test Case 1: Package has non-canonical URLs, canonical doesn't exist Expected: Should return this package in the result """ # Setup mocks mock_is_canonical.return_value = False mock_normalize.return_value = "github.com/org/repo" # Test data package_url_map = { self.package_id_1: [ "https://github.com/org/repo", "https://github.com/org/repo/tree/main", "https://github.com/org/repo/blob/main/README.md", ] } existing_homepages = { "https://github.com/org/repo", "https://github.com/org/repo/tree/main", "https://github.com/org/repo/blob/main/README.md", } # no canon # Execute result = analyze_packages_needing_canonicalization( package_url_map, existing_homepages ) # Verify assert len(result) == 1 assert self.package_id_1 in result assert result[self.package_id_1] == "github.com/org/repo" # Verify mocks were called correctly # is_canonical should be called once for each URL until it finds a canonical one (or all if none are canonical) expected_calls = [ call("https://github.com/org/repo"), call("https://github.com/org/repo/tree/main"), call("https://github.com/org/repo/blob/main/README.md"), ] mock_is_canonical.assert_has_calls(expected_calls) assert mock_is_canonical.call_count == 3 # normalize should only be called once with the first URL mock_normalize.assert_called_once_with("https://github.com/org/repo") @patch("scripts.upgrade_canons.main.is_canonical_url") @patch("scripts.upgrade_canons.main.normalize_url") def test_case_2_canonical_exists_in_database( self, mock_normalize, mock_is_canonical ): """ Test Case 2: Package has non-canonical URLs, but canonical already exists in DB Expected: Should not return this package (skip it) """ # Setup mocks mock_is_canonical.return_value = False mock_normalize.return_value = "https://example.com" # Test data package_url_map = { self.package_id_1: ["http://example.com", "https://www.example.com"] } existing_homepages = {"https://example.com"} # Canonical already exists # Execute result = analyze_packages_needing_canonicalization( package_url_map, existing_homepages ) # Verify assert len(result) == 0 assert self.package_id_1 not in result @patch("scripts.upgrade_canons.main.is_canonical_url") @patch("scripts.upgrade_canons.main.normalize_url") def test_case_3_canonical_already_planned(self, mock_normalize, mock_is_canonical): """ Test Case 3: Two packages would create the same canonical URL Expected: Only the first package should be included, second should be skipped """ # Setup mocks mock_is_canonical.return_value = False mock_normalize.return_value = ( "https://example.com" # Both packages normalize to same URL ) # Test data - both packages would create the same canonical URL package_url_map = { self.package_id_1: ["http://example.com"], self.package_id_2: [ "https://www.example.com" ], # Different input, same canonical } existing_homepages = set() # Empty - canonical doesn't exist # Execute result = analyze_packages_needing_canonicalization( package_url_map, existing_homepages ) # Verify - only one package should be included (whichever was processed first) assert len(result) == 1 assert "https://example.com" in result.values() # Verify that exactly one of the packages was included included_packages = list(result.keys()) assert len(included_packages) == 1 assert included_packages[0] in [self.package_id_1, self.package_id_2] @patch("scripts.upgrade_canons.main.is_canonical_url") def test_case_4_package_already_has_canonical(self, mock_is_canonical): """ Test Case 4: Package already has at least one canonical URL Expected: Should not return this package (skip it) """ # Setup mocks - return True for canonical check mock_is_canonical.return_value = True # Test data package_url_map = { self.package_id_1: [ "https://example.com", "http://example.com", ] # First URL is canonical } existing_homepages = set() # Execute result = analyze_packages_needing_canonicalization( package_url_map, existing_homepages ) # Verify assert len(result) == 0 assert self.package_id_1 not in result # Verify that we never tried to normalize (because we skipped early) mock_is_canonical.assert_called_once_with("https://example.com") @patch("scripts.upgrade_canons.main.is_canonical_url") @patch("scripts.upgrade_canons.main.normalize_url") def test_mixed_scenarios(self, mock_normalize, mock_is_canonical): """ Test with multiple packages covering different scenarios """ # Setup mocks with side effects for different URLs def mock_is_canonical_side_effect(url): return url == "https://canonical.com" # Only this URL is canonical def mock_normalize_side_effect(url): if "example" in url: return "https://example.com" elif "test" in url: return "https://test.com" else: return f"https://{url.split('://')[1]}" mock_is_canonical.side_effect = mock_is_canonical_side_effect mock_normalize.side_effect = mock_normalize_side_effect # Test data package_url_map = { self.package_id_1: ["http://example.com"], # Should create canonical self.package_id_2: ["https://canonical.com"], # Already canonical - skip self.package_id_3: ["http://test.com"], # Should create canonical self.package_id_4: [ "https://www.example.com" ], # Same canonical as package_id_1 - skip } existing_homepages = set() # Execute result = analyze_packages_needing_canonicalization( package_url_map, existing_homepages ) # Verify assert len(result) == 2 # Package 1 should be included (creates https://example.com) assert self.package_id_1 in result assert result[self.package_id_1] == "https://example.com" # Package 2 should be skipped (already canonical) assert self.package_id_2 not in result # Package 3 should be included (creates https://test.com) assert self.package_id_3 in result assert result[self.package_id_3] == "https://test.com" # Package 4 should be skipped (duplicate canonical URL) assert self.package_id_4 not in result def test_empty_inputs(self): """Test with empty inputs""" result = analyze_packages_needing_canonicalization({}, set()) assert result == {} @patch("scripts.upgrade_canons.main.is_canonical_url") @patch("scripts.upgrade_canons.main.normalize_url") def test_edge_case_empty_url_list(self, mock_normalize, mock_is_canonical): """Test with package that has empty URL list""" # This shouldn't happen in practice, but let's handle it gracefully package_url_map = { self.package_id_1: [] # Empty URL list } existing_homepages = set() # This will raise an IndexError when trying to access urls[0] in generate_canonical_url # Let's verify this behavior is expected with pytest.raises(IndexError): analyze_packages_needing_canonicalization( package_url_map, existing_homepages ) if __name__ == "__main__": pytest.main([__file__])