[
  {
    "path": ".dockerignore",
    "content": "# directories\ndata/\n.venv/\ntests/\nscripts/\nlogs/\ndb/ \n\n# other files\n.gitignore\ndocker-compose.yml\n.DS_Store\n.git\nREADME.md\nLICENSE\n.cursorrules\n.coverage\n*.xml"
  },
  {
    "path": ".github/actions/complain/action.yml",
    "content": "name: teaxyz/chai/complain\ndescription: creates an issue for any failing tests\n\ninputs:\n  test_function:\n    description: test function to complain about\n    required: true\n  token:\n    description: github token\n    required: true\n    default: ${{ github.token }}\n\nruns:\n  using: composite\n  steps:\n    - name: Find Issue\n      uses: actions-cool/issues-helper@v3\n      id: find\n      with:\n        actions: \"find-issues\"\n        token: ${{ inputs.token }}\n        issue-state: \"open\"\n        title-includes: \"❌ test failure\"\n        labels: \"test-failure\"\n\n    - name: Create Issue\n      uses: actions-cool/issues-helper@v3\n      id: create\n      if: ${{ steps.find.outputs.issues == '[]' }}\n      with:\n        actions: \"create-issue\"\n        token: ${{ inputs.token }}\n        title: \"❌ test failure\"\n        body: \"Running log of test failure for ${{ inputs.test_function }}\"\n        labels: \"test-failure\"\n        assignees: \"sanchitram1\"\n\n    - name: Log Comment\n      uses: actions-cool/issues-helper@v3\n      with:\n        actions: \"create-comment\"\n        token: ${{ inputs.token }}\n        issue-number: ${{ steps.create.outputs.issue-number || fromJSON(steps.find.outputs.issues)[0].number }}\n        body: |\n          # Test failure\n          ## ${{ inputs.test_function }}\n\n          logs: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}\n"
  },
  {
    "path": ".github/workflows/chai-api.ci.yml",
    "content": "name: api.ci\n\non:\n  push:\n    branches: [main]\n    paths:\n      - \"api/**\"\n  pull_request:\n    paths:\n      - \"api/**\"\n\nenv:\n  CARGO_TERM_COLOR: always\n\nconcurrency:\n  group: ${{ github.workflow }}-${{ github.ref }}\n  cancel-in-progress: true\n\njobs:\n  test:\n    name: test\n    runs-on: ubuntu-latest\n\n    services:\n      postgres:\n        image: postgres\n        env:\n          POSTGRES_USER: postgres\n          POSTGRES_PASSWORD: s3cr3t\n          POSTGRES_DB: chai\n        options: >-\n          --health-cmd pg_isready\n          --health-interval 10s\n          --health-timeout 5s\n          --health-retries 5\n        ports:\n          - 5435:5432\n\n    steps:\n      - uses: actions/checkout@v5\n      - name: Install dependencies\n        run: |\n          sudo apt-get update\n          sudo apt-get install -y postgresql-client\n      - name: Run tests\n        run: cargo test --verbose\n        working-directory: api\n        env:\n          DATABASE_URL: postgresql://postgres:s3cr3t@localhost:5435/chai\n\n  fmt:\n    name: Rustfmt\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v5\n      - uses: actions-rs/toolchain@v1\n        with:\n          profile: minimal\n          toolchain: stable\n          override: true\n          components: rustfmt\n      - run: cargo fmt --all -- --check\n        working-directory: api\n\n  clippy:\n    name: Clippy\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v5\n      - uses: actions-rs/toolchain@v1\n        with:\n          profile: minimal\n          toolchain: stable\n          override: true\n          components: clippy\n      - run: cargo clippy --all-targets --all-features -- -D warnings\n        working-directory: api\n\n  docker-build:\n    name: Build Docker Image\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v5\n      - name: Set up Docker Buildx\n        uses: docker/setup-buildx-action@v2\n      - name: Build\n        uses: docker/build-push-action@v6\n        with:\n          context: ./api\n          push: false # Set push to false to prevent pushing the image\n"
  },
  {
    "path": ".github/workflows/ci.yml",
    "content": "name: CI\n\non:\n  workflow_dispatch:\n    inputs:\n      env:\n        description: \"The environment to test against\"\n        required: false\n        type: choice\n        options:\n          - dev\n          - sepolia\n          - mainnet\n        default: \"dev\"\n  push:\n    branches:\n      - main\n    paths:\n      - \"**/*.py\"\n      - tests/**\n      - core/**\n      - package_managers/**\n      - ranker/**\n  pull_request:\n    branches:\n      - main\n    paths:\n      - \"**/*.py\"\n      - tests/**\n      - core/**\n      - package_managers/**\n      - ranker/**\n\njobs:\n  check:\n    runs-on: ubuntu-latest\n    steps:\n      - name: Checkout\n        uses: actions/checkout@v5\n\n      - name: Setup with pkgx\n        uses: pkgxdev/setup@v4\n        with:\n          +: astral.sh/uv\n            astral.sh/ruff\n            pytest.org\n\n      - name: Set up Python\n        uses: actions/setup-python@v5\n        with:\n          python-version-file: \".python-version\"\n\n      - name: Install the project\n        run: pkgx uv sync --locked --all-extras --dev --all-groups\n\n      - name: Lint with Ruff\n        uses: astral-sh/ruff-action@v3\n        with:\n          src: .\n\n      - name: Run tests with pytest\n        id: pytest\n        run: |\n          pkgx uv run pytest tests/\n\n  complain:\n    needs: check\n    if: failure()\n    runs-on: ubuntu-latest\n    permissions:\n      issues: write\n    steps:\n      - uses: actions/checkout@v5\n      - uses: ./.github/actions/complain\n        with:\n          test_function: \"pytest\"\n          token: ${{ secrets.GITHUB_TOKEN }}\n"
  },
  {
    "path": ".github/workflows/deploy.yml",
    "content": "name: \"Release Chai\"\nrun-name: Release Chai - ${{ inputs.env || 'auto' }} - ${{ inputs.ref || github.ref }}\n\non:\n  push:\n    branches:\n      - main\n  workflow_dispatch:\n    inputs:\n      env:\n        description: \"The environment to deploy to\"\n        required: true\n        type: choice\n        options:\n          - dev\n          - sepolia\n          - testnet\n          - mainnet\n      ref:\n        description: \"The git ref (SHA or tag) to deploy\"\n        required: false\n      deploy_latest:\n        description: \"Force deploy all components from latest commit\"\n        required: false\n        type: boolean\n\npermissions:\n  id-token: write\n  contents: read\n\njobs:\n  build:\n    if: ${{ github.event_name == 'workflow_dispatch' && inputs.deploy_latest == true }}\n    environment: ${{ inputs.env || 'dev' }}\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v5\n\n      - name: Configure AWS credentials\n        uses: aws-actions/configure-aws-credentials@v3\n        with:\n          role-to-assume: ${{ secrets.TEA_AWS_ROLE }}\n          aws-region: us-east-1\n\n      - name: Login to Amazon ECR\n        id: login-ecr\n        uses: aws-actions/amazon-ecr-login@v2\n\n      - name: Set deployment ref\n        id: set-ref\n        run: |\n          DEPLOY_REF=${{ inputs.ref || github.sha }}\n          echo \"DEPLOY_REF=${DEPLOY_REF}\" >> $GITHUB_ENV\n          echo \"deploy_ref=${DEPLOY_REF}\" >> $GITHUB_OUTPUT\n\n      - name: Set environment\n        id: set-env\n        run: |\n          DEPLOY_ENV=${{ inputs.env || 'dev' }}\n          echo \"DEPLOY_ENV=${DEPLOY_ENV}\" >> $GITHUB_ENV\n          echo \"deploy_env=${DEPLOY_ENV}\" >> $GITHUB_OUTPUT\n\n      - name: Build and push Crates indexer image\n        if: ${{ github.event_name == 'workflow_dispatch' && inputs.deploy_latest == true }}\n        uses: docker/build-push-action@v6\n        with:\n          context: .\n          file: ./package_managers/crates/Dockerfile\n          push: true\n          tags: |\n            ${{ steps.login-ecr.outputs.registry }}/chai-v2/crates:${{ env.DEPLOY_REF }}-${{ env.DEPLOY_ENV }}\n            ${{ steps.login-ecr.outputs.registry }}/chai-v2/crates:latest\n\n      - name: Build and push Homebrew indexer image\n        if: ${{ github.event_name == 'workflow_dispatch' && inputs.deploy_latest == true }}\n        uses: docker/build-push-action@v6\n        with:\n          context: .\n          file: ./package_managers/homebrew/Dockerfile\n          push: true\n          tags: |\n            ${{ steps.login-ecr.outputs.registry }}/chai-v2/homebrew:${{ env.DEPLOY_REF }}-${{ env.DEPLOY_ENV }}\n            ${{ steps.login-ecr.outputs.registry }}/chai-v2/homebrew:latest\n\n      - name: Build and push Debian indexer image\n        if: ${{ github.event_name == 'workflow_dispatch' && inputs.deploy_latest == true }}\n        uses: docker/build-push-action@v6\n        with:\n          context: .\n          file: ./package_managers/debian/Dockerfile\n          push: true\n          tags: |\n            ${{ steps.login-ecr.outputs.registry }}/chai-v2/debian:${{ env.DEPLOY_REF }}-${{ env.DEPLOY_ENV }}\n            ${{ steps.login-ecr.outputs.registry }}/chai-v2/debian:latest\n\n      - name: Build and push Pkgx indexer image\n        if: ${{ github.event_name == 'workflow_dispatch' && inputs.deploy_latest == true }}\n        uses: docker/build-push-action@v6\n        with:\n          context: .\n          file: ./package_managers/pkgx/Dockerfile\n          push: true\n          tags: |\n            ${{ steps.login-ecr.outputs.registry }}/chai-v2/pkgx:${{ env.DEPLOY_REF }}-${{ env.DEPLOY_ENV }}\n            ${{ steps.login-ecr.outputs.registry }}/chai-v2/pkgx:latest\n\n      - name: Build and push Alembic image\n        if: ${{ github.event_name == 'workflow_dispatch' && inputs.deploy_latest == true }}\n        uses: docker/build-push-action@v6\n        with:\n          context: .\n          file: ./alembic/Dockerfile\n          push: true\n          tags: |\n            ${{ steps.login-ecr.outputs.registry }}/chai-v2/alembic:${{ env.DEPLOY_REF }}-${{ env.DEPLOY_ENV }}\n            ${{ steps.login-ecr.outputs.registry }}/chai-v2/alembic:latest\n\n      - name: Build and push chai-api image\n        if: ${{ github.event_name == 'workflow_dispatch' && inputs.deploy_latest == true }}\n        uses: docker/build-push-action@v6\n        with:\n          context: ./api\n          file: ./api/Dockerfile\n          push: true\n          tags: |\n            ${{ steps.login-ecr.outputs.registry }}/chai-v2/chai-api:${{ env.DEPLOY_REF }}-${{ env.DEPLOY_ENV }}\n            ${{ steps.login-ecr.outputs.registry }}/chai-v2/chai-api:latest\n\n      - name: Build and push Ranker indexer image\n        if: ${{ github.event_name == 'workflow_dispatch' && inputs.deploy_latest == true }}\n        uses: docker/build-push-action@v2\n        with:\n          context: .\n          file: ./ranker/Dockerfile\n          push: true\n          tags: |\n            ${{ steps.login-ecr.outputs.registry }}/chai-v2/ranker:${{ env.DEPLOY_REF }}-${{ env.DEPLOY_ENV }}\n            ${{ steps.login-ecr.outputs.registry }}/chai-v2/ranker:latest\n\n  deploy:\n    needs: build\n    if: ${{ github.event_name == 'workflow_dispatch' }}\n    runs-on: ubuntu-latest\n    environment: ${{ inputs.env || 'dev' }}\n    steps:\n      - uses: actions/checkout@v5\n\n      - name: Configure AWS credentials\n        uses: aws-actions/configure-aws-credentials@v3\n        with:\n          role-to-assume: ${{ secrets.TEA_AWS_ROLE }}\n          aws-region: us-east-1\n\n      - name: Set deployment ref\n        id: set-ref\n        run: |\n          DEPLOY_REF=${{ inputs.ref }}\n          echo \"DEPLOY_REF=${DEPLOY_REF}\" >> $GITHUB_ENV\n          echo \"deploy_ref=${DEPLOY_REF}\" >> $GITHUB_OUTPUT\n\n      - name: Set environment\n        id: set-env\n        run: |\n          DEPLOY_ENV=${{ inputs.env }}\n          echo \"DEPLOY_ENV=${DEPLOY_ENV}\" >> $GITHUB_ENV\n          echo \"deploy_env=${DEPLOY_ENV}\" >> $GITHUB_OUTPUT\n\n      - name: Deploy chai-api\n        run: |\n          aws ecs update-service --cluster chai-${{ env.DEPLOY_ENV }} \\\n                                 --service ${{ env.DEPLOY_ENV }}-chai-api \\\n                                 --force-new-deployment\n"
  },
  {
    "path": ".gitignore",
    "content": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packaging\n.Python\nbuild/\ndevelop-eggs/\ndist/\ndownloads/\neggs/\n.eggs/\nlib/\nlib64/\nparts/\nsdist/\nvar/\nwheels/\nshare/python-wheels/\n*.egg-info/\n.installed.cfg\n*.egg\nMANIFEST\n\n# PyInstaller\n#  Usually these files are written by a python script from a template\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\n*.manifest\n*.spec\n\n# Installer logs\npip-log.txt\npip-delete-this-directory.txt\n\n# Unit test / coverage reports\nhtmlcov/\n.tox/\n.nox/\n.coverage\n.coverage.*\n.cache\nnosetests.xml\ncoverage.xml\n*.cover\n*.py,cover\n.hypothesis/\n.pytest_cache/\ncover/\n\n# Translations\n*.mo\n*.pot\n\n# Django stuff:\n*.log\nlocal_settings.py\ndb.sqlite3\ndb.sqlite3-journal\n\n# Flask stuff:\ninstance/\n.webassets-cache\n\n# Scrapy stuff:\n.scrapy\n\n# Sphinx documentation\ndocs/_build/\n\n# PyBuilder\n.pybuilder/\ntarget/\n\n# Jupyter Notebook\n.ipynb_checkpoints\n\n# IPython\nprofile_default/\nipython_config.py\n\n# pyenv\n#   For a library or package, you might want to ignore these files since the code is\n#   intended to run in multiple environments; otherwise, check them in:\n# .python-version\n\n# pipenv\n#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.\n#   However, in case of collaboration, if having platform-specific dependencies or dependencies\n#   having no cross-platform support, pipenv may install dependencies that don't work, or not\n#   install all needed dependencies.\n#Pipfile.lock\n\n# poetry\n#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.\n#   This is especially recommended for binary packages to ensure reproducibility, and is more\n#   commonly ignored for libraries.\n#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control\n#poetry.lock\n\n# pdm\n#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.\n#pdm.lock\n#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it\n#   in version control.\n#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control\n.pdm.toml\n.pdm-python\n.pdm-build/\n\n# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm\n__pypackages__/\n\n# Celery stuff\ncelerybeat-schedule\ncelerybeat.pid\n\n# SageMath parsed files\n*.sage.py\n\n# Environments\n.env\n.venv\nenv/\nvenv/\nENV/\nenv.bak/\nvenv.bak/\n\n# Spyder project settings\n.spyderproject\n.spyproject\n\n# Rope project settings\n.ropeproject\n\n# mkdocs documentation\n/site\n\n# mypy\n.mypy_cache/\n.dmypy.json\ndmypy.json\n\n# Pyre type checker\n.pyre/\n\n# pytype static type analyzer\n.pytype/\n\n# Cython debug symbols\ncython_debug/\n\n# PyCharm\n#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can\n#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore\n#  and can be added to the global gitignore or merged into this file.  For a more nuclear\n#  option (not recommended) you can uncomment the following to ignore the entire idea folder.\n#.idea/\n\n# data files\ndata\ndb/data\n\n# examples\nexamples/sbom-meta/sbom-meta\n*.svg\n\n# cursor\n.cursorrules\n.cursor/\nTASKS.md\n\n# DS Store\n.DS_Store\n\n# Profiling\n*.prof"
  },
  {
    "path": ".python-version",
    "content": "3.11\n"
  },
  {
    "path": "LICENSE",
    "content": "MIT License\n\nCopyright (c) 2024 tea protocol\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n"
  },
  {
    "path": "README.md",
    "content": "# CHAI\n\nCHAI is an attempt at an open-source data pipeline for package managers. The\ngoal is to have a pipeline that can use the data from any package manager and\nprovide a normalized data source for myriads of different use cases.\n\n## Getting Started\n\nUse [Docker](https://docker.com)\n\n1. Install Docker\n2. Clone the chai repository (https://docs.github.com/en/repositories/creating-and-managing-repositories/cloning-a-repository)\n3. Using a terminal, navigate to the cloned repository directory\n4. Run `docker compose build` to create the latest Docker images\n5. Then, run `docker compose up` to launch.\n\n> [!NOTE]\n>\n> This will run CHAI for all package managers. As an example crates by\n> itself will take over an hour and consume >5GB storage.\n>\n> Currently, we support:\n>\n> - crates\n> - Homebrew\n> - Debian\n> - pkgx\n>\n> You can run a single package manager by running\n> `PACKAGE_MANAGER=<name> docker compose up`\n>\n> We are planning on supporting `NPM`, `PyPI`, and `rubygems` next.\n\n### Arguments\n\nSpecify these eg. `FOO=bar docker compose up`:\n\n- `ENABLE_SCHEDULER`: When true, the pipeline runs on a schedule set by `FREQUENCY`.\n- `FREQUENCY`: Sets how often (in hours) the pipeline should run.\n- `TEST`: Useful for running in a test code\n  insertions.\n- `FETCH`: Determines whether to fetch new data or use whatever was saved locally.\n- `NO_CACHE`: When true, deletes temporary files after processing.\n\n> [!NOTE]\n> The flag `NO_CACHE` does not mean that files will not get downloaded to your local\n> storage (specifically, the ./data directory). It only means that we'll\n> delete these temporary files from ./data once we're done processing them.\n> If `FETCH` is false, the pipeline looks for source data in the cache, so this\n> will fail if you run `NO_CACHE` first, and `FETCH` false second.\n\nThese arguments are all configurable in the `docker-compose.yml` file.\n\n### Docker Services Overview\n\n1. `db`: [PostgreSQL] database for the reduced package data\n2. `alembic`: handles migrations\n3. `package_managers`: fetches and writes data for each package manager\n4. `api`: a simple REST API for reading from the db\n5. `ranker`: deduplicates and ranks the packages\n\n### Hard Reset\n\nStuff happens. Start over:\n\n`rm -rf ./data`: removes all the data the fetcher is putting.\n\n<!-- this is handled now that alembic/psycopg2 are in pkgx -->\n<!--\n## Alembic Alternatives\n\n- sqlx command line tool to manage migrations, alongside models for sqlx in rust\n- vapor's migrations are written in swift\n-->\n\n## Goals\n\nOur goal is to build a data schema that looks like this:\n\n![db/CHAI_ERD.png](db/CHAI_ERD.png)\n\nYou can read more about specific data models in the dbs [readme](db/README.md)\n\nOur specific application extracts the dependency graph understand what are\ncritical pieces of the open-source graph. We also built a simple example that displays\n[sbom-metadata](examples/sbom-meta) for your repository.\n\nThere are many other potential use cases for this data:\n\n- License compatibility checker\n- Developer publications\n- Package popularity\n- Dependency analysis vulnerability tool (requires translating semver)\n\n> [!TIP]\n> Help us add the above to the examples folder.\n\n## FAQs / Common Issues\n\n1. The database url is `postgresql://postgres:s3cr3t@localhost:5435/chai`, and\n   is used as `CHAI_DATABASE_URL` in the environment. `psql CHAI_DATABASE_URL`\n   will connect you to the database.\n2. If you're orchestrating via docker, swap `localhost` for `host.docker.internal`\n\n## Managing Dependencies\n\nWe use [`uv`](https://astral.sh/uv) to manage dependencies (and sometimes execution).\nAll dependencies are listed in [`pyproject.toml`](./pyproject.toml), under the\n`dependency-groups` header. Each group helps us classify the service we're adding a\ndependency for. For example, if we're adding a new dependency for all the indexers:\n\n```bash\nuv add --group indexer requests\n\n# use the --all-groups flag to sync your venv for all dependencies\nuv sync --all-groups\nuv pip compile --group indexers -o core/requirements.txt\n```\n\nThe last step writes the updated dependencies to a requirements.txt file, which is\ncrucial for the Docker containers executing the individual services to build correctly.\nEach indexer shares the same set of dependencies, and that requirement file is\n**generated by uv**, and maintained in [core/requirements.txt](core/requirements.txt)\n\n> [!IMPORTANT]\n> DO NOT UPDATE ANY `requirements.txt` FILES DIRECTLY\n> `uv` provides a way to generate that automatically, based on the pyproject.toml\n>\n> Have an idea on a better way to do this? Open to input...\n\n## Deployment\n\n```sh\nexport CHAI_DATABASE_URL=postgresql://<user>:<pw>@host.docker.internal:<port>/chai\nexport PGPASSWORD=<pw>\ndocker compose up alembic\n```\n\n## Tasks\n\nThese are tasks that can be run using [xcfile.dev]. If you use `pkgx`, typing\n`dev` loads the environment. Alternatively, run them manually.\n\n### reset\n\n```sh\nrm -rf db/data data .venv\n```\n\n### build\n\n```sh\ndocker compose build\n```\n\n### start-all\n\nRequires: build\n\n```sh\ndocker compose up -d\n```\n\n### stop\n\n```sh\ndocker compose down\n```\n\n### logs\n\n```sh\ndocker compose logs\n```\n\n### db-start\n\nRuns migrations and starts up the database\n\n```sh\ndocker compose build --no-cache db alembic\ndocker compose up alembic -d\n```\n\n### db-reset\n\nRequires: stop\n\n```sh\nrm -rf db/data\n```\n\n### db-generate-migration\n\nInputs: MIGRATION_NAME\nEnv: CHAI_DATABASE_URL=postgresql://postgres:s3cr3t@localhost:5435/chai\n\n```sh\ncd alembic\nalembic revision --autogenerate -m \"$MIGRATION_NAME\"\n```\n\n### db-upgrade\n\nEnv: CHAI_DATABASE_URL=postgresql://postgres:s3cr3t@localhost:5435/chai\n\n```sh\ncd alembic\nalembic upgrade head\n```\n\n### db-downgrade\n\nInputs: STEP\nEnv: CHAI_DATABASE_URL=postgresql://postgres:s3cr3t@localhost:5435/chai\n\n```sh\ncd alembic\nalembic downgrade -$STEP\n```\n\n### db\n\n```sh\npsql \"postgresql://postgres:s3cr3t@localhost:5435/chai\"\n```\n\n### restart-api\n\nRefreshes table knowledge from the db.\n\n```sh\ndocker compose restart api\n```\n\n### remove-orphans\n\n```sh\ndocker compose down --remove-orphans\n```\n\n### start-service\n\nInputs: SERVICE\nEnv: CHAI_DATABASE_URL=postgresql://postgres:s3cr3t@host.docker.internal:5435/chai\n\n```sh\ndocker compose up $SERVICE --build\n```\n\n### check\n\nInputs: FOLDER\nEnvironment: FOLDER=.\n\n```sh\npkgx +python@3.13 ty check $FOLDER\n```\n\n[PostgreSQL]: https://www.postgresql.org\n[`pkgx`]: https://pkgx.sh\n"
  },
  {
    "path": "alembic/.pkgx.yaml",
    "content": "# this .pkgx.yaml file is only for alembic\n\ndependencies:\n  postgresql.org: 16\n  alembic.sqlalchemy.org: 1\n  psycopg.org/psycopg2: 2\n"
  },
  {
    "path": "alembic/Dockerfile",
    "content": "FROM ghcr.io/astral-sh/uv:python3.11-bookworm-slim\nRUN apt update && apt -y install postgresql\nRUN uv pip install alembic==1.13.2 psycopg2-binary==2.9.10 sqlalchemy==2.0.41 --system\nCOPY . . \nWORKDIR /alembic\nRUN chmod +x /alembic/run_migrations.sh\nENTRYPOINT [\"/bin/sh\", \"/alembic/run_migrations.sh\"]\n"
  },
  {
    "path": "alembic/README.md",
    "content": "# CHAI Data Migrations\n\nThis directory contains the Alembic configuration and migration scripts for managing the\ndatabase schema of the CHAI project. Alembic is used to handle database migrations,\nallowing for version control of our database schema.\n\n### About Alembic\n\nAlembic is a database migration tool for SQLAlchemy. It allows us to:\n\n- Track changes to our database schema over time\n- Apply and revert these changes in a controlled manner\n- Generate migration scripts automatically based on model changes\n\n> [!NOTE]\n> It's important to note that while `alembic` serves our current needs, it may not be\n> our long-term solution. As the CHAI project evolves, we might explore other database\n> migration tools or strategies that better fit our growing requirements. We're open to\n> reassessing our approach to schema management as needed.\n\n## Entrypoint\n\nThe main entrypoint for running migrations is the\n[run migrations script](run_migrations.sh). This script orchestrates the initialization\nand migration process.\n\n## Steps\n\n1. [Initialize](init-script.sql)\n\nThe initialization script creates the database `chai`, and loads it up with any\nextensions that we'd need, so we've got a clean slate for our db structures.\n\n2. [Load](load-values.sql)\n\nThe load script prepopulates some of the tables, with `enum`-like values - specifically\nfor:\n\n- `url_types`: defines different types of URLs (e.g., source, homepage, documentation)\n- `depends_on_types`: defines different types of dependencies (e.g., runtime,\n  development)\n- `sources` and `package_managers`: defines different package managers (e.g., npm, pypi)\n\n3. Run Alembic Migrations\n\nAfter initialization and loading initial data, the script runs Alembic migrations to apply any pending database schema changes.\n\n## Contributing\n\nTo contribute to the database schema:\n\n1. Make a change in the [models](../core/models/__init__.py) file\n2. Generate a new migration script: `alembic revision --autogenerate \"Description\"`\n3. Review the generated migration script in the [versions](versions/) directory. The\n   auto-generation is powerful but not perfect, please review the script carefully.\n4. Test the migration by running `alembic upgrade head`.\n"
  },
  {
    "path": "alembic/alembic.ini",
    "content": "[alembic]\nscript_location = .\nfile_template = %%(year)d%%(month).2d%%(day).2d_%%(hour).2d%%(minute).2d-%%(slug)s\n\nprepend_sys_path = ..\nversion_path_separator = os\n\n# URL\nsqlalchemy.url = ${env:CHAI_DATABASE_URL}\n\n\n[post_write_hooks]\n# lint with attempts to fix using \"ruff\" - use the exec runner, execute a binary\n# TODO: this doesn't work rn\n# hooks = ruff\n# ruff.type = exec\n# ruff.executable = %(here)s/.venv/bin/ruff\n# ruff.options = --fix REVISION_SCRIPT_FILENAME\n\n# Logging configuration\n[loggers]\nkeys = root,sqlalchemy,alembic\n\n[handlers]\nkeys = console\n\n[formatters]\nkeys = generic\n\n[logger_root]\nlevel = WARN\nhandlers = console\nqualname =\n\n[logger_sqlalchemy]\nlevel = WARN\nhandlers =\nqualname = sqlalchemy.engine\n\n[logger_alembic]\nlevel = INFO\nhandlers =\nqualname = alembic\n\n[handler_console]\nclass = StreamHandler\nargs = (sys.stderr,)\nlevel = NOTSET\nformatter = generic\n\n[formatter_generic]\nformat = %(levelname)-5.5s [%(name)s] %(message)s\ndatefmt = %H:%M:%S\n"
  },
  {
    "path": "alembic/env.py",
    "content": "import os\nfrom logging.config import fileConfig\n\nfrom sqlalchemy import engine_from_config, pool\n\nfrom alembic import context\nfrom core.models import Base\n\n# this is the Alembic Config object, which provides\n# access to the values within the .ini file in use.\nconfig = context.config\n\n# interpret the config file for Python logging.\nif config.config_file_name is not None:\n    fileConfig(config.config_file_name)\n\n# metadata for all models\ntarget_metadata = Base.metadata\n\n# get database url\ndatabase_url = os.getenv(\"CHAI_DATABASE_URL\")\nif database_url:\n    config.set_main_option(\"sqlalchemy.url\", database_url)\n\n\ndef run_migrations_offline() -> None:\n    \"\"\"Run migrations in 'offline' mode.\n\n    This configures the context with just a URL\n    and not an Engine, though an Engine is acceptable\n    here as well.  By skipping the Engine creation\n    we don't even need a DBAPI to be available.\n\n    Calls to context.execute() here emit the given string to the\n    script output.\n    \"\"\"\n    url = config.get_main_option(\"sqlalchemy.url\")\n    context.configure(\n        url=url,\n        target_metadata=target_metadata,\n        literal_binds=True,\n        dialect_opts={\"paramstyle\": \"named\"},\n    )\n\n    with context.begin_transaction():\n        context.run_migrations()\n\n\ndef run_migrations_online() -> None:\n    \"\"\"Run migrations in 'online' mode.\n\n    In this scenario we need to create an Engine\n    and associate a connection with the context.\n    \"\"\"\n    connectable = engine_from_config(\n        config.get_section(config.config_ini_section, {}),\n        prefix=\"sqlalchemy.\",\n        poolclass=pool.NullPool,\n    )\n\n    with connectable.connect() as connection:\n        context.configure(connection=connection, target_metadata=target_metadata)\n\n        with context.begin_transaction():\n            context.run_migrations()\n\n\nif context.is_offline_mode():\n    run_migrations_offline()\nelse:\n    run_migrations_online()\n"
  },
  {
    "path": "alembic/init-script.sql",
    "content": "CREATE DATABASE chai;\n\n\\c chai\n\nCREATE EXTENSION IF NOT EXISTS \"pgcrypto\";\nCREATE EXTENSION IF NOT EXISTS \"uuid-ossp\";\nCREATE EXTENSION IF NOT EXISTS pg_trgm;\n"
  },
  {
    "path": "alembic/load-values.sql",
    "content": "-- url types\nINSERT INTO \"url_types\" (\"name\")\nVALUES ('source'), ('homepage'), ('documentation'), ('repository')\nON CONFLICT (name) DO NOTHING;\n\n-- dependency types \nINSERT INTO \"depends_on_types\" (\"name\")\nVALUES\n('build'),\n('development'),\n('runtime'),\n('test'),\n('optional'),\n('recommended'),\n('uses_from_macos')\nON CONFLICT (name) DO NOTHING;\n\n-- sources\nINSERT INTO \"sources\" (\"type\")\nVALUES ('crates'), ('npm'), ('pypi'), ('rubygems'), ('github'), ('homebrew'), ('debian'), ('pkgx')\nON CONFLICT (type) DO NOTHING;\n\nINSERT INTO \"package_managers\" (\"source_id\")\nSELECT id\nFROM \"sources\"\nWHERE \"type\" IN ('crates', 'npm', 'pypi', 'rubygems', 'github', 'homebrew', 'debian', 'pkgx')\nON CONFLICT (source_id) DO NOTHING;\n"
  },
  {
    "path": "alembic/run_migrations.sh",
    "content": "#!/bin/bash\n\nset -uo pipefail\n\n# This script sets up the database, runs migrations, and loads initial values\n\n# Check if the 'chai' database exists, create it if it doesn't\nif psql \"$CHAI_DATABASE_ADMIN_URL\" -tAc \"SELECT 1 FROM pg_database WHERE datname='chai'\" | grep -q 1\nthen\n    echo \"Database 'chai' already exists\"\nelse\n    echo \"Database 'chai' does not exist, creating...\"\n    psql \"$CHAI_DATABASE_ADMIN_URL\" -f init-script.sql -a\nfi\n\n# Run migrations and load data (uses 'chai' database)\necho \"Current database version: $(alembic current)\"\nalembic upgrade head || { echo \"Migration failed\"; exit 1; }\n\necho \"Loading initial values into the database...\"\npsql \"$CHAI_DATABASE_URL\" -f load-values.sql -a\n\necho \"Database setup and initialization complete\""
  },
  {
    "path": "alembic/script.py.mako",
    "content": "\"\"\"${message}\n\nRevision ID: ${up_revision}\nRevises: ${down_revision | comma,n}\nCreate Date: ${create_date}\n\n\"\"\"\nfrom typing import Sequence, Union\n\nfrom alembic import op\nimport sqlalchemy as sa\n${imports if imports else \"\"}\n\n# revision identifiers, used by Alembic.\nrevision: str = ${repr(up_revision)}\ndown_revision: Union[str, None] = ${repr(down_revision)}\nbranch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}\ndepends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}\n\n\ndef upgrade() -> None:\n    ${upgrades if upgrades else \"pass\"}\n\n\ndef downgrade() -> None:\n    ${downgrades if downgrades else \"pass\"}\n"
  },
  {
    "path": "alembic/versions/20241028_1217-base_migration.py",
    "content": "\"\"\"base migration\n\nRevision ID: 238d591d5310\nRevises:\nCreate Date: 2024-10-28 12:17:43.762965\n\n\"\"\"\n\nfrom collections.abc import Sequence\n\nimport sqlalchemy as sa\n\nfrom alembic import op\n\n# revision identifiers, used by Alembic.\nrevision: str = \"238d591d5310\"\ndown_revision: str | None = None\nbranch_labels: str | Sequence[str] | None = None\ndepends_on: str | Sequence[str] | None = None\n\n\ndef upgrade() -> None:\n    # ### commands auto generated by Alembic - please adjust! ###\n    op.create_table(\n        \"depends_on_types\",\n        sa.Column(\n            \"id\",\n            sa.UUID(),\n            server_default=sa.text(\"uuid_generate_v4()\"),\n            nullable=False,\n        ),\n        sa.Column(\"name\", sa.String(), nullable=False),\n        sa.Column(\n            \"created_at\", sa.DateTime(), server_default=sa.text(\"now()\"), nullable=False\n        ),\n        sa.Column(\n            \"updated_at\", sa.DateTime(), server_default=sa.text(\"now()\"), nullable=False\n        ),\n        sa.PrimaryKeyConstraint(\"id\", name=op.f(\"pk_depends_on_types\")),\n    )\n    op.create_index(\n        op.f(\"ix_depends_on_types_name\"), \"depends_on_types\", [\"name\"], unique=True\n    )\n    op.create_table(\n        \"licenses\",\n        sa.Column(\n            \"id\",\n            sa.UUID(),\n            server_default=sa.text(\"uuid_generate_v4()\"),\n            nullable=False,\n        ),\n        sa.Column(\"name\", sa.String(), nullable=False),\n        sa.Column(\n            \"created_at\", sa.DateTime(), server_default=sa.text(\"now()\"), nullable=False\n        ),\n        sa.Column(\n            \"updated_at\", sa.DateTime(), server_default=sa.text(\"now()\"), nullable=False\n        ),\n        sa.PrimaryKeyConstraint(\"id\", name=op.f(\"pk_licenses\")),\n    )\n    op.create_index(op.f(\"ix_licenses_name\"), \"licenses\", [\"name\"], unique=True)\n    op.create_table(\n        \"sources\",\n        sa.Column(\n            \"id\",\n            sa.UUID(),\n            server_default=sa.text(\"uuid_generate_v4()\"),\n            nullable=False,\n        ),\n        sa.Column(\"type\", sa.String(), nullable=False),\n        sa.Column(\n            \"created_at\", sa.DateTime(), server_default=sa.text(\"now()\"), nullable=False\n        ),\n        sa.Column(\n            \"updated_at\", sa.DateTime(), server_default=sa.text(\"now()\"), nullable=False\n        ),\n        sa.PrimaryKeyConstraint(\"id\", name=op.f(\"pk_sources\")),\n        sa.UniqueConstraint(\"type\", name=op.f(\"uq_sources_type\")),\n    )\n    op.create_table(\n        \"url_types\",\n        sa.Column(\n            \"id\",\n            sa.UUID(),\n            server_default=sa.text(\"uuid_generate_v4()\"),\n            nullable=False,\n        ),\n        sa.Column(\"name\", sa.String(), nullable=False),\n        sa.Column(\n            \"created_at\", sa.DateTime(), server_default=sa.text(\"now()\"), nullable=False\n        ),\n        sa.Column(\n            \"updated_at\", sa.DateTime(), server_default=sa.text(\"now()\"), nullable=False\n        ),\n        sa.PrimaryKeyConstraint(\"id\", name=op.f(\"pk_url_types\")),\n        sa.UniqueConstraint(\"name\", name=op.f(\"uq_url_types_name\")),\n    )\n    op.create_table(\n        \"package_managers\",\n        sa.Column(\n            \"id\",\n            sa.UUID(),\n            server_default=sa.text(\"uuid_generate_v4()\"),\n            nullable=False,\n        ),\n        sa.Column(\"source_id\", sa.UUID(), nullable=False),\n        sa.Column(\n            \"created_at\", sa.DateTime(), server_default=sa.text(\"now()\"), nullable=False\n        ),\n        sa.Column(\n            \"updated_at\", sa.DateTime(), server_default=sa.text(\"now()\"), nullable=False\n        ),\n        sa.ForeignKeyConstraint(\n            [\"source_id\"],\n            [\"sources.id\"],\n            name=op.f(\"fk_package_managers_source_id_sources\"),\n        ),\n        sa.PrimaryKeyConstraint(\"id\", name=op.f(\"pk_package_managers\")),\n        sa.UniqueConstraint(\"source_id\", name=op.f(\"uq_package_managers_source_id\")),\n    )\n    op.create_table(\n        \"urls\",\n        sa.Column(\n            \"id\",\n            sa.UUID(),\n            server_default=sa.text(\"uuid_generate_v4()\"),\n            nullable=False,\n        ),\n        sa.Column(\"url\", sa.String(), nullable=False),\n        sa.Column(\"url_type_id\", sa.UUID(), nullable=False),\n        sa.Column(\n            \"created_at\", sa.DateTime(), server_default=sa.text(\"now()\"), nullable=False\n        ),\n        sa.Column(\n            \"updated_at\", sa.DateTime(), server_default=sa.text(\"now()\"), nullable=False\n        ),\n        sa.ForeignKeyConstraint(\n            [\"url_type_id\"],\n            [\"url_types.id\"],\n            name=op.f(\"fk_urls_url_type_id_url_types\"),\n        ),\n        sa.PrimaryKeyConstraint(\"id\", name=op.f(\"pk_urls\")),\n        sa.UniqueConstraint(\"url_type_id\", \"url\", name=\"uq_url_type_url\"),\n    )\n    op.create_index(op.f(\"ix_urls_url\"), \"urls\", [\"url\"], unique=False)\n    op.create_index(op.f(\"ix_urls_url_type_id\"), \"urls\", [\"url_type_id\"], unique=False)\n    op.create_table(\n        \"users\",\n        sa.Column(\n            \"id\",\n            sa.UUID(),\n            server_default=sa.text(\"uuid_generate_v4()\"),\n            nullable=False,\n        ),\n        sa.Column(\"username\", sa.String(), nullable=False),\n        sa.Column(\"source_id\", sa.UUID(), nullable=False),\n        sa.Column(\"import_id\", sa.String(), nullable=False),\n        sa.Column(\n            \"created_at\", sa.DateTime(), server_default=sa.text(\"now()\"), nullable=False\n        ),\n        sa.Column(\n            \"updated_at\", sa.DateTime(), server_default=sa.text(\"now()\"), nullable=False\n        ),\n        sa.ForeignKeyConstraint(\n            [\"source_id\"], [\"sources.id\"], name=op.f(\"fk_users_source_id_sources\")\n        ),\n        sa.PrimaryKeyConstraint(\"id\", name=op.f(\"pk_users\")),\n        sa.UniqueConstraint(\"source_id\", \"username\", name=\"uq_source_username\"),\n    )\n    op.create_index(op.f(\"ix_users_import_id\"), \"users\", [\"import_id\"], unique=False)\n    op.create_index(op.f(\"ix_users_source_id\"), \"users\", [\"source_id\"], unique=False)\n    op.create_index(op.f(\"ix_users_username\"), \"users\", [\"username\"], unique=False)\n    op.create_table(\n        \"load_history\",\n        sa.Column(\n            \"id\",\n            sa.UUID(),\n            server_default=sa.text(\"uuid_generate_v4()\"),\n            nullable=False,\n        ),\n        sa.Column(\"package_manager_id\", sa.UUID(), nullable=False),\n        sa.Column(\n            \"created_at\", sa.DateTime(), server_default=sa.text(\"now()\"), nullable=False\n        ),\n        sa.Column(\n            \"updated_at\", sa.DateTime(), server_default=sa.text(\"now()\"), nullable=False\n        ),\n        sa.ForeignKeyConstraint(\n            [\"package_manager_id\"],\n            [\"package_managers.id\"],\n            name=op.f(\"fk_load_history_package_manager_id_package_managers\"),\n        ),\n        sa.PrimaryKeyConstraint(\"id\", name=op.f(\"pk_load_history\")),\n    )\n    op.create_table(\n        \"packages\",\n        sa.Column(\n            \"id\",\n            sa.UUID(),\n            server_default=sa.text(\"uuid_generate_v4()\"),\n            nullable=False,\n        ),\n        sa.Column(\"derived_id\", sa.String(), nullable=False),\n        sa.Column(\"name\", sa.String(), nullable=False),\n        sa.Column(\"package_manager_id\", sa.UUID(), nullable=False),\n        sa.Column(\"import_id\", sa.String(), nullable=False),\n        sa.Column(\"readme\", sa.String(), nullable=True),\n        sa.Column(\n            \"created_at\", sa.DateTime(), server_default=sa.text(\"now()\"), nullable=False\n        ),\n        sa.Column(\n            \"updated_at\", sa.DateTime(), server_default=sa.text(\"now()\"), nullable=False\n        ),\n        sa.ForeignKeyConstraint(\n            [\"package_manager_id\"],\n            [\"package_managers.id\"],\n            name=op.f(\"fk_packages_package_manager_id_package_managers\"),\n        ),\n        sa.PrimaryKeyConstraint(\"id\", name=op.f(\"pk_packages\")),\n        sa.UniqueConstraint(\"derived_id\", name=op.f(\"uq_packages_derived_id\")),\n        sa.UniqueConstraint(\n            \"package_manager_id\", \"import_id\", name=\"uq_package_manager_import_id\"\n        ),\n    )\n    op.create_index(\n        op.f(\"ix_packages_import_id\"), \"packages\", [\"import_id\"], unique=False\n    )\n    op.create_index(op.f(\"ix_packages_name\"), \"packages\", [\"name\"], unique=False)\n    op.create_index(\n        op.f(\"ix_packages_package_manager_id\"),\n        \"packages\",\n        [\"package_manager_id\"],\n        unique=False,\n    )\n    op.create_table(\n        \"package_urls\",\n        sa.Column(\n            \"id\",\n            sa.UUID(),\n            server_default=sa.text(\"uuid_generate_v4()\"),\n            nullable=False,\n        ),\n        sa.Column(\"package_id\", sa.UUID(), nullable=False),\n        sa.Column(\"url_id\", sa.UUID(), nullable=False),\n        sa.Column(\n            \"created_at\", sa.DateTime(), server_default=sa.text(\"now()\"), nullable=False\n        ),\n        sa.Column(\n            \"updated_at\", sa.DateTime(), server_default=sa.text(\"now()\"), nullable=False\n        ),\n        sa.ForeignKeyConstraint(\n            [\"package_id\"],\n            [\"packages.id\"],\n            name=op.f(\"fk_package_urls_package_id_packages\"),\n        ),\n        sa.ForeignKeyConstraint(\n            [\"url_id\"], [\"urls.id\"], name=op.f(\"fk_package_urls_url_id_urls\")\n        ),\n        sa.PrimaryKeyConstraint(\"id\", name=op.f(\"pk_package_urls\")),\n        sa.UniqueConstraint(\"package_id\", \"url_id\", name=\"uq_package_url\"),\n    )\n    op.create_index(\n        op.f(\"ix_package_urls_package_id\"), \"package_urls\", [\"package_id\"], unique=False\n    )\n    op.create_index(\n        op.f(\"ix_package_urls_url_id\"), \"package_urls\", [\"url_id\"], unique=False\n    )\n    op.create_table(\n        \"user_packages\",\n        sa.Column(\n            \"id\",\n            sa.UUID(),\n            server_default=sa.text(\"uuid_generate_v4()\"),\n            nullable=False,\n        ),\n        sa.Column(\"user_id\", sa.UUID(), nullable=False),\n        sa.Column(\"package_id\", sa.UUID(), nullable=False),\n        sa.Column(\n            \"created_at\", sa.DateTime(), server_default=sa.text(\"now()\"), nullable=False\n        ),\n        sa.Column(\n            \"updated_at\", sa.DateTime(), server_default=sa.text(\"now()\"), nullable=False\n        ),\n        sa.ForeignKeyConstraint(\n            [\"package_id\"],\n            [\"packages.id\"],\n            name=op.f(\"fk_user_packages_package_id_packages\"),\n        ),\n        sa.ForeignKeyConstraint(\n            [\"user_id\"], [\"users.id\"], name=op.f(\"fk_user_packages_user_id_users\")\n        ),\n        sa.PrimaryKeyConstraint(\"id\", name=op.f(\"pk_user_packages\")),\n        sa.UniqueConstraint(\"user_id\", \"package_id\", name=\"uq_user_package\"),\n    )\n    op.create_index(\n        op.f(\"ix_user_packages_package_id\"),\n        \"user_packages\",\n        [\"package_id\"],\n        unique=False,\n    )\n    op.create_index(\n        op.f(\"ix_user_packages_user_id\"), \"user_packages\", [\"user_id\"], unique=False\n    )\n    op.create_table(\n        \"versions\",\n        sa.Column(\n            \"id\",\n            sa.UUID(),\n            server_default=sa.text(\"uuid_generate_v4()\"),\n            nullable=False,\n        ),\n        sa.Column(\"package_id\", sa.UUID(), nullable=False),\n        sa.Column(\"version\", sa.String(), nullable=False),\n        sa.Column(\"import_id\", sa.String(), nullable=False),\n        sa.Column(\"size\", sa.Integer(), nullable=True),\n        sa.Column(\"published_at\", sa.DateTime(), nullable=True),\n        sa.Column(\"license_id\", sa.UUID(), nullable=True),\n        sa.Column(\"downloads\", sa.Integer(), nullable=True),\n        sa.Column(\"checksum\", sa.String(), nullable=True),\n        sa.Column(\n            \"created_at\", sa.DateTime(), server_default=sa.text(\"now()\"), nullable=False\n        ),\n        sa.Column(\n            \"updated_at\", sa.DateTime(), server_default=sa.text(\"now()\"), nullable=False\n        ),\n        sa.ForeignKeyConstraint(\n            [\"license_id\"],\n            [\"licenses.id\"],\n            name=op.f(\"fk_versions_license_id_licenses\"),\n        ),\n        sa.ForeignKeyConstraint(\n            [\"package_id\"],\n            [\"packages.id\"],\n            name=op.f(\"fk_versions_package_id_packages\"),\n        ),\n        sa.PrimaryKeyConstraint(\"id\", name=op.f(\"pk_versions\")),\n        sa.UniqueConstraint(\"package_id\", \"version\", name=\"uq_package_version\"),\n    )\n    op.create_index(\n        op.f(\"ix_versions_downloads\"), \"versions\", [\"downloads\"], unique=False\n    )\n    op.create_index(\n        op.f(\"ix_versions_import_id\"), \"versions\", [\"import_id\"], unique=False\n    )\n    op.create_index(\n        op.f(\"ix_versions_license_id\"), \"versions\", [\"license_id\"], unique=False\n    )\n    op.create_index(\n        op.f(\"ix_versions_package_id\"), \"versions\", [\"package_id\"], unique=False\n    )\n    op.create_index(\n        op.f(\"ix_versions_published_at\"), \"versions\", [\"published_at\"], unique=False\n    )\n    op.create_index(op.f(\"ix_versions_size\"), \"versions\", [\"size\"], unique=False)\n    op.create_index(op.f(\"ix_versions_version\"), \"versions\", [\"version\"], unique=False)\n    op.create_table(\n        \"dependencies\",\n        sa.Column(\n            \"id\",\n            sa.UUID(),\n            server_default=sa.text(\"uuid_generate_v4()\"),\n            nullable=False,\n        ),\n        sa.Column(\"version_id\", sa.UUID(), nullable=False),\n        sa.Column(\"dependency_id\", sa.UUID(), nullable=False),\n        sa.Column(\"dependency_type_id\", sa.UUID(), nullable=True),\n        sa.Column(\"semver_range\", sa.String(), nullable=True),\n        sa.Column(\n            \"created_at\", sa.DateTime(), server_default=sa.text(\"now()\"), nullable=False\n        ),\n        sa.Column(\n            \"updated_at\", sa.DateTime(), server_default=sa.text(\"now()\"), nullable=False\n        ),\n        sa.ForeignKeyConstraint(\n            [\"dependency_id\"],\n            [\"packages.id\"],\n            name=op.f(\"fk_dependencies_dependency_id_packages\"),\n        ),\n        sa.ForeignKeyConstraint(\n            [\"dependency_type_id\"],\n            [\"depends_on_types.id\"],\n            name=op.f(\"fk_dependencies_dependency_type_id_depends_on_types\"),\n        ),\n        sa.ForeignKeyConstraint(\n            [\"version_id\"],\n            [\"versions.id\"],\n            name=op.f(\"fk_dependencies_version_id_versions\"),\n        ),\n        sa.PrimaryKeyConstraint(\"id\", name=op.f(\"pk_dependencies\")),\n        sa.UniqueConstraint(\n            \"version_id\",\n            \"dependency_id\",\n            \"dependency_type_id\",\n            name=\"uq_version_dependency_type\",\n        ),\n    )\n    op.create_index(\n        op.f(\"ix_dependencies_dependency_id\"),\n        \"dependencies\",\n        [\"dependency_id\"],\n        unique=False,\n    )\n    op.create_index(\n        op.f(\"ix_dependencies_dependency_type_id\"),\n        \"dependencies\",\n        [\"dependency_type_id\"],\n        unique=False,\n    )\n    op.create_index(\n        op.f(\"ix_dependencies_version_id\"), \"dependencies\", [\"version_id\"], unique=False\n    )\n    op.create_table(\n        \"user_versions\",\n        sa.Column(\n            \"id\",\n            sa.UUID(),\n            server_default=sa.text(\"uuid_generate_v4()\"),\n            nullable=False,\n        ),\n        sa.Column(\"user_id\", sa.UUID(), nullable=False),\n        sa.Column(\"version_id\", sa.UUID(), nullable=False),\n        sa.Column(\n            \"created_at\", sa.DateTime(), server_default=sa.text(\"now()\"), nullable=False\n        ),\n        sa.Column(\n            \"updated_at\", sa.DateTime(), server_default=sa.text(\"now()\"), nullable=False\n        ),\n        sa.ForeignKeyConstraint(\n            [\"user_id\"], [\"users.id\"], name=op.f(\"fk_user_versions_user_id_users\")\n        ),\n        sa.ForeignKeyConstraint(\n            [\"version_id\"],\n            [\"versions.id\"],\n            name=op.f(\"fk_user_versions_version_id_versions\"),\n        ),\n        sa.PrimaryKeyConstraint(\"id\", name=op.f(\"pk_user_versions\")),\n        sa.UniqueConstraint(\"user_id\", \"version_id\", name=\"uq_user_version\"),\n    )\n    op.create_index(\n        op.f(\"ix_user_versions_user_id\"), \"user_versions\", [\"user_id\"], unique=False\n    )\n    op.create_index(\n        op.f(\"ix_user_versions_version_id\"),\n        \"user_versions\",\n        [\"version_id\"],\n        unique=False,\n    )\n    # ### end Alembic commands ###\n\n\ndef downgrade() -> None:\n    # ### commands auto generated by Alembic - please adjust! ###\n    op.drop_index(op.f(\"ix_user_versions_version_id\"), table_name=\"user_versions\")\n    op.drop_index(op.f(\"ix_user_versions_user_id\"), table_name=\"user_versions\")\n    op.drop_table(\"user_versions\")\n    op.drop_index(op.f(\"ix_dependencies_version_id\"), table_name=\"dependencies\")\n    op.drop_index(op.f(\"ix_dependencies_dependency_type_id\"), table_name=\"dependencies\")\n    op.drop_index(op.f(\"ix_dependencies_dependency_id\"), table_name=\"dependencies\")\n    op.drop_table(\"dependencies\")\n    op.drop_index(op.f(\"ix_versions_version\"), table_name=\"versions\")\n    op.drop_index(op.f(\"ix_versions_size\"), table_name=\"versions\")\n    op.drop_index(op.f(\"ix_versions_published_at\"), table_name=\"versions\")\n    op.drop_index(op.f(\"ix_versions_package_id\"), table_name=\"versions\")\n    op.drop_index(op.f(\"ix_versions_license_id\"), table_name=\"versions\")\n    op.drop_index(op.f(\"ix_versions_import_id\"), table_name=\"versions\")\n    op.drop_index(op.f(\"ix_versions_downloads\"), table_name=\"versions\")\n    op.drop_table(\"versions\")\n    op.drop_index(op.f(\"ix_user_packages_user_id\"), table_name=\"user_packages\")\n    op.drop_index(op.f(\"ix_user_packages_package_id\"), table_name=\"user_packages\")\n    op.drop_table(\"user_packages\")\n    op.drop_index(op.f(\"ix_package_urls_url_id\"), table_name=\"package_urls\")\n    op.drop_index(op.f(\"ix_package_urls_package_id\"), table_name=\"package_urls\")\n    op.drop_table(\"package_urls\")\n    op.drop_index(op.f(\"ix_packages_package_manager_id\"), table_name=\"packages\")\n    op.drop_index(op.f(\"ix_packages_name\"), table_name=\"packages\")\n    op.drop_index(op.f(\"ix_packages_import_id\"), table_name=\"packages\")\n    op.drop_table(\"packages\")\n    op.drop_table(\"load_history\")\n    op.drop_index(op.f(\"ix_users_username\"), table_name=\"users\")\n    op.drop_index(op.f(\"ix_users_source_id\"), table_name=\"users\")\n    op.drop_index(op.f(\"ix_users_import_id\"), table_name=\"users\")\n    op.drop_table(\"users\")\n    op.drop_index(op.f(\"ix_urls_url_type_id\"), table_name=\"urls\")\n    op.drop_index(op.f(\"ix_urls_url\"), table_name=\"urls\")\n    op.drop_table(\"urls\")\n    op.drop_table(\"package_managers\")\n    op.drop_table(\"url_types\")\n    op.drop_table(\"sources\")\n    op.drop_index(op.f(\"ix_licenses_name\"), table_name=\"licenses\")\n    op.drop_table(\"licenses\")\n    op.drop_index(op.f(\"ix_depends_on_types_name\"), table_name=\"depends_on_types\")\n    op.drop_table(\"depends_on_types\")\n    # ### end Alembic commands ###\n"
  },
  {
    "path": "alembic/versions/20250312_0045-add_legacy_dependency_table.py",
    "content": "\"\"\"add-legacy-dependency-table\n\nRevision ID: 89af630dc946\nRevises: 238d591d5310\nCreate Date: 2025-03-12 00:45:35.727521\n\n\"\"\"\n\nfrom collections.abc import Sequence\n\nimport sqlalchemy as sa\n\nfrom alembic import op\n\n# revision identifiers, used by Alembic.\nrevision: str = \"89af630dc946\"\ndown_revision: str | None = \"238d591d5310\"\nbranch_labels: str | Sequence[str] | None = None\ndepends_on: str | Sequence[str] | None = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"legacy_dependencies\",\n        sa.Column(\"id\", sa.Integer(), nullable=False),\n        sa.Column(\"package_id\", sa.UUID(), nullable=False),\n        sa.Column(\"dependency_id\", sa.UUID(), nullable=False),\n        sa.Column(\"dependency_type_id\", sa.UUID(), nullable=False),\n        sa.Column(\"semver_range\", sa.String(), nullable=True),\n        sa.Column(\n            \"created_at\", sa.DateTime(), server_default=sa.text(\"now()\"), nullable=False\n        ),\n        sa.Column(\n            \"updated_at\", sa.DateTime(), server_default=sa.text(\"now()\"), nullable=False\n        ),\n        sa.ForeignKeyConstraint(\n            [\"dependency_id\"],\n            [\"packages.id\"],\n            name=op.f(\"fk_legacy_dependencies_dependency_id_packages\"),\n        ),\n        sa.ForeignKeyConstraint(\n            [\"dependency_type_id\"],\n            [\"depends_on_types.id\"],\n            name=op.f(\"fk_legacy_dependencies_dependency_type_id_depends_on_types\"),\n        ),\n        sa.ForeignKeyConstraint(\n            [\"package_id\"],\n            [\"packages.id\"],\n            name=op.f(\"fk_legacy_dependencies_package_id_packages\"),\n        ),\n        sa.PrimaryKeyConstraint(\"id\", name=op.f(\"pk_legacy_dependencies\")),\n        sa.UniqueConstraint(\n            \"package_id\", \"dependency_id\", name=\"uq_package_dependency\"\n        ),\n    )\n    op.create_index(\n        op.f(\"ix_legacy_dependencies_dependency_id\"),\n        \"legacy_dependencies\",\n        [\"dependency_id\"],\n        unique=False,\n    )\n    op.create_index(\n        op.f(\"ix_legacy_dependencies_dependency_type_id\"),\n        \"legacy_dependencies\",\n        [\"dependency_type_id\"],\n        unique=False,\n    )\n    op.create_index(\n        op.f(\"ix_legacy_dependencies_package_id\"),\n        \"legacy_dependencies\",\n        [\"package_id\"],\n        unique=False,\n    )\n\n\ndef downgrade() -> None:\n    op.drop_index(\n        op.f(\"ix_legacy_dependencies_package_id\"), table_name=\"legacy_dependencies\"\n    )\n    op.drop_index(\n        op.f(\"ix_legacy_dependencies_dependency_type_id\"),\n        table_name=\"legacy_dependencies\",\n    )\n    op.drop_index(\n        op.f(\"ix_legacy_dependencies_dependency_id\"), table_name=\"legacy_dependencies\"\n    )\n    op.drop_table(\"legacy_dependencies\")\n"
  },
  {
    "path": "alembic/versions/20250312_2244-canons.py",
    "content": "\"\"\"canons\n\nRevision ID: e7632ae1aff7\nRevises: 89af630dc946\nCreate Date: 2025-03-12 22:44:45.272179\n\n\"\"\"\n\nfrom collections.abc import Sequence\n\nimport sqlalchemy as sa\n\nfrom alembic import op\n\n# revision identifiers, used by Alembic.\nrevision: str = \"e7632ae1aff7\"\ndown_revision: str | None = \"89af630dc946\"\nbranch_labels: str | Sequence[str] | None = None\ndepends_on: str | Sequence[str] | None = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"canons\",\n        sa.Column(\"id\", sa.UUID(), nullable=False),\n        sa.Column(\"url\", sa.String(), nullable=False),\n        sa.Column(\"name\", sa.String(), nullable=False),\n        sa.Column(\n            \"created_at\", sa.DateTime(), server_default=sa.text(\"now()\"), nullable=False\n        ),\n        sa.Column(\n            \"updated_at\", sa.DateTime(), server_default=sa.text(\"now()\"), nullable=False\n        ),\n        sa.PrimaryKeyConstraint(\"id\", name=op.f(\"pk_canons\")),\n    )\n    op.create_index(op.f(\"ix_canons_name\"), \"canons\", [\"name\"], unique=False)\n    op.create_index(op.f(\"ix_canons_url\"), \"canons\", [\"url\"], unique=True)\n    op.create_table(\n        \"canon_packages\",\n        sa.Column(\"id\", sa.UUID(), nullable=False),\n        sa.Column(\"canon_id\", sa.UUID(), nullable=False),\n        sa.Column(\"package_id\", sa.UUID(), nullable=False),\n        sa.Column(\n            \"created_at\", sa.DateTime(), server_default=sa.text(\"now()\"), nullable=False\n        ),\n        sa.Column(\n            \"updated_at\", sa.DateTime(), server_default=sa.text(\"now()\"), nullable=False\n        ),\n        sa.ForeignKeyConstraint(\n            [\"canon_id\"], [\"canons.id\"], name=op.f(\"fk_canon_packages_canon_id_canons\")\n        ),\n        sa.ForeignKeyConstraint(\n            [\"package_id\"],\n            [\"packages.id\"],\n            name=op.f(\"fk_canon_packages_package_id_packages\"),\n        ),\n        sa.PrimaryKeyConstraint(\"id\", name=op.f(\"pk_canon_packages\")),\n    )\n    op.create_index(\n        op.f(\"ix_canon_packages_canon_id\"), \"canon_packages\", [\"canon_id\"], unique=False\n    )\n    op.create_index(\n        op.f(\"ix_canon_packages_package_id\"),\n        \"canon_packages\",\n        [\"package_id\"],\n        unique=False,\n    )\n\n\ndef downgrade() -> None:\n    op.drop_index(op.f(\"ix_canon_packages_package_id\"), table_name=\"canon_packages\")\n    op.drop_index(op.f(\"ix_canon_packages_canon_id\"), table_name=\"canon_packages\")\n    op.drop_table(\"canon_packages\")\n    op.drop_index(op.f(\"ix_canons_url\"), table_name=\"canons\")\n    op.drop_index(op.f(\"ix_canons_name\"), table_name=\"canons\")\n    op.drop_table(\"canons\")\n"
  },
  {
    "path": "alembic/versions/20250416_0223-add_ranks.py",
    "content": "\"\"\"add-ranks\n\nRevision ID: 26e124131bf8\nRevises: e7632ae1aff7\nCreate Date: 2025-04-16 02:23:33.665773\n\n\"\"\"\n\nfrom collections.abc import Sequence\n\nimport sqlalchemy as sa\n\nfrom alembic import op\n\n# revision identifiers, used by Alembic.\nrevision: str = \"26e124131bf8\"\ndown_revision: str | None = \"e7632ae1aff7\"\nbranch_labels: str | Sequence[str] | None = None\ndepends_on: str | Sequence[str] | None = None\n\n\ndef upgrade() -> None:\n    op.create_table(\n        \"tea_rank_runs\",\n        sa.Column(\n            \"id\",\n            sa.UUID(),\n            server_default=sa.text(\"uuid_generate_v4()\"),\n            nullable=False,\n        ),\n        sa.Column(\"run\", sa.Integer(), nullable=False),\n        sa.Column(\"split_ratio\", sa.String(), nullable=False),\n        sa.Column(\n            \"created_at\", sa.DateTime(), server_default=sa.text(\"now()\"), nullable=False\n        ),\n        sa.PrimaryKeyConstraint(\"id\", name=op.f(\"pk_tea_rank_runs\")),\n    )\n    op.create_table(\n        \"tea_ranks\",\n        sa.Column(\n            \"id\",\n            sa.UUID(),\n            server_default=sa.text(\"uuid_generate_v4()\"),\n            nullable=False,\n        ),\n        sa.Column(\"tea_rank_run\", sa.Integer(), nullable=False),\n        sa.Column(\"canon_id\", sa.UUID(), nullable=False),\n        sa.Column(\"rank\", sa.String(), nullable=False),\n        sa.Column(\n            \"created_at\", sa.DateTime(), server_default=sa.text(\"now()\"), nullable=False\n        ),\n        sa.ForeignKeyConstraint(\n            [\"canon_id\"], [\"canons.id\"], name=op.f(\"fk_tea_ranks_canon_id_canons\")\n        ),\n        sa.PrimaryKeyConstraint(\"id\", name=op.f(\"pk_tea_ranks\")),\n    )\n    op.create_index(\n        op.f(\"ix_tea_ranks_canon_id\"), \"tea_ranks\", [\"canon_id\"], unique=False\n    )\n    op.create_index(\n        op.f(\"ix_tea_ranks_tea_rank_run\"), \"tea_ranks\", [\"tea_rank_run\"], unique=False\n    )\n\n\ndef downgrade() -> None:\n    op.drop_index(op.f(\"ix_tea_ranks_tea_rank_run\"), table_name=\"tea_ranks\")\n    op.drop_index(op.f(\"ix_tea_ranks_canon_id\"), table_name=\"tea_ranks\")\n    op.drop_table(\"tea_ranks\")\n    op.drop_table(\"tea_rank_runs\")\n"
  },
  {
    "path": "alembic/versions/20250422_0940-add_unique_package_to_canon_packages.py",
    "content": "\"\"\"add-unique-package-to-canon-packages\n\nRevision ID: a41236bd2340\nRevises: 26e124131bf8\nCreate Date: 2025-04-22 09:40:22.901637\n\n\"\"\"\n\nfrom collections.abc import Sequence\n\nfrom alembic import op\n\n# revision identifiers, used by Alembic.\nrevision: str = \"a41236bd2340\"\ndown_revision: str | None = \"26e124131bf8\"\nbranch_labels: str | Sequence[str] | None = None\ndepends_on: str | Sequence[str] | None = None\n\n\ndef upgrade() -> None:\n    op.drop_index(\"ix_canon_packages_package_id\", table_name=\"canon_packages\")\n    op.create_index(\n        op.f(\"ix_canon_packages_package_id\"),\n        \"canon_packages\",\n        [\"package_id\"],\n        unique=True,\n    )\n\n\ndef downgrade() -> None:\n    op.drop_index(op.f(\"ix_canon_packages_package_id\"), table_name=\"canon_packages\")\n    op.create_index(\n        \"ix_canon_packages_package_id\", \"canon_packages\", [\"package_id\"], unique=False\n    )\n"
  },
  {
    "path": "alembic/versions/20250508_1752-add_trgm_indexes.py",
    "content": "\"\"\"add_trgm_indexes\n\nRevision ID: 7392d4d74ce2\nRevises: a41236bd2340\nCreate Date: 2025-05-08 17:52:40.417822\n\n\"\"\"\n\nfrom collections.abc import Sequence\n\nfrom alembic import op\n\n# revision identifiers, used by Alembic.\nrevision: str = \"7392d4d74ce2\"\ndown_revision: str | None = \"a41236bd2340\"\nbranch_labels: str | Sequence[str] | None = None\ndepends_on: str | Sequence[str] | None = None\n\n\ndef upgrade() -> None:\n    # Drop the existing indexes\n    op.drop_index(\"ix_canons_name\", table_name=\"canons\")\n    op.drop_index(\"ix_urls_url\", table_name=\"urls\")\n\n    # Create trigram indexes\n    # NOTE: this was added manually to this script (not auto-generated)\n    op.create_index(\n        \"ix_urls_url_trgm\",\n        \"urls\",\n        [\"url\"],\n        unique=False,\n        postgresql_using=\"gin\",\n        postgresql_ops={\"url\": \"gin_trgm_ops\"},\n    )\n    op.create_index(\n        \"ix_canons_name_trgm\",\n        \"canons\",\n        [\"name\"],\n        unique=False,\n        postgresql_using=\"gin\",\n        postgresql_ops={\"name\": \"gin_trgm_ops\"},\n    )\n\n\ndef downgrade() -> None:\n    # Drop the trigram indexes\n    # NOTE: this was added manually to this script (not auto-generated)\n    op.drop_index(\"ix_urls_url_trgm\", table_name=\"urls\")\n    op.drop_index(\"ix_canons_name_trgm\", table_name=\"canons\")\n\n    # Recreate the existing indexes (auto-generated)\n    op.create_index(\"ix_urls_url\", \"urls\", [\"url\"], unique=False)\n    op.create_index(\"ix_canons_name\", \"canons\", [\"name\"], unique=False)\n"
  },
  {
    "path": "alembic/versions/20250529_2341-rename_canons_table_and_recreate.py",
    "content": "\"\"\"rename_canons_table_and_recreate\n\nRevision ID: 542d79f30fc9\nRevises: 7392d4d74ce2\nCreate Date: 2025-05-29 23:41:38.465987\n\n\"\"\"\n\nfrom collections.abc import Sequence\n\nimport sqlalchemy as sa\nfrom sqlalchemy.dialects.postgresql import UUID\n\nfrom alembic import op\n\n# revision identifiers, used by Alembic.\nrevision: str = \"542d79f30fc9\"\ndown_revision: str | None = \"7392d4d74ce2\"\nbranch_labels: str | Sequence[str] | None = None\ndepends_on: str | Sequence[str] | None = None\n\n\ndef upgrade() -> None:\n    \"\"\"\n    Rename existing canons table and create new one with proper url_id FK\n    \"\"\"\n    # Step 1: Rename existing table to preserve data as backup\n    op.rename_table(\"canons\", \"canons_old\")\n\n    # Step 2: Drop FK constraints that pointed to old table (from other tables)\n    op.drop_constraint(\n        \"fk_canon_packages_canon_id_canons\", \"canon_packages\", type_=\"foreignkey\"\n    )\n    op.drop_constraint(\"fk_tea_ranks_canon_id_canons\", \"tea_ranks\", type_=\"foreignkey\")\n\n    # Step 3: Drop indexes and constraints from old table to avoid naming conflicts\n    op.drop_constraint(\"pk_canons\", \"canons_old\", type_=\"primary\")\n    op.drop_index(\"ix_canons_url\", table_name=\"canons_old\")\n    op.drop_index(\"ix_canons_name_trgm\", table_name=\"canons_old\")\n\n    # Step 4: Create new canons table with proper schema\n    op.create_table(\n        \"canons\",\n        sa.Column(\n            \"id\",\n            UUID(as_uuid=True),\n            primary_key=True,\n            server_default=sa.func.uuid_generate_v4(),\n        ),\n        sa.Column(\n            \"url_id\", UUID(as_uuid=True), nullable=False, index=True, unique=True\n        ),\n        sa.Column(\"name\", sa.String(), nullable=False),\n        sa.Column(\n            \"created_at\", sa.DateTime(), nullable=False, server_default=sa.func.now()\n        ),\n        sa.Column(\n            \"updated_at\", sa.DateTime(), nullable=False, server_default=sa.func.now()\n        ),\n        # Constraints\n        sa.ForeignKeyConstraint([\"url_id\"], [\"urls.id\"], name=\"fk_canons_url_id_urls\"),\n        sa.UniqueConstraint(\"url_id\", name=\"uq_canons_url_id\"),\n    )\n\n    # Step 5: Create indexes\n    op.create_index(\n        \"ix_canons_name_trgm\",\n        \"canons\",\n        [\"name\"],\n        postgresql_using=\"gin\",\n        postgresql_ops={\"name\": \"gin_trgm_ops\"},\n    )\n\n    # Note: FK constraints to this table will be recreated in a separate migration\n    # after data population, since this table starts empty\n\n\ndef downgrade() -> None:\n    \"\"\"\n    Restore original canons table with all its original indexes and constraints\n    \"\"\"\n    # FK constraints were dropped in upgrade and not recreated, so no need to drop them here\n\n    # Drop new table\n    op.drop_table(\"canons\")\n\n    # Restore old table\n    op.rename_table(\"canons_old\", \"canons\")\n\n    # Recreate all original constraints and indexes on restored table\n    op.create_primary_key(\"pk_canons\", \"canons\", [\"id\"])\n    op.create_index(\"ix_canons_url\", \"canons\", [\"url\"], unique=True)\n    op.create_index(\n        \"ix_canons_name_trgm\",\n        \"canons\",\n        [\"name\"],\n        postgresql_using=\"gin\",\n        postgresql_ops={\"name\": \"gin_trgm_ops\"},\n    )\n\n    # Recreate FK constraints from other tables pointing to canons\n    op.create_foreign_key(\n        \"fk_canon_packages_canon_id_canons\",\n        \"canon_packages\",\n        \"canons\",\n        [\"canon_id\"],\n        [\"id\"],\n    )\n    op.create_foreign_key(\n        \"fk_tea_ranks_canon_id_canons\", \"tea_ranks\", \"canons\", [\"canon_id\"], [\"id\"]\n    )\n"
  },
  {
    "path": "alembic/versions/20250529_2345-recreate_canon_foreign_keys.py",
    "content": "\"\"\"recreate_canon_foreign_keys\n\nRevision ID: 3de32bb99a71\nRevises: 542d79f30fc9\nCreate Date: 2025-05-29 23:45:12.372951\n\n\"\"\"\n\nfrom collections.abc import Sequence\n\nfrom alembic import op\n\n# revision identifiers, used by Alembic.\nrevision: str = \"3de32bb99a71\"\ndown_revision: str | None = \"542d79f30fc9\"\nbranch_labels: str | Sequence[str] | None = None\ndepends_on: str | Sequence[str] | None = None\n\n\ndef upgrade() -> None:\n    \"\"\"\n    Recreate FK constraints pointing to canons table after data population\n    Run this AFTER your canonicalization script has populated the canons table\n    \"\"\"\n    # First, clean up any orphaned records in referencing tables\n    # (Optional: uncomment if you want to auto-clean orphaned data)\n    # op.execute(\"\"\"\n    #     DELETE FROM canon_packages\n    #     WHERE canon_id NOT IN (SELECT id FROM canons)\n    # \"\"\")\n    # op.execute(\"\"\"\n    #     DELETE FROM tea_ranks\n    #     WHERE canon_id NOT IN (SELECT id FROM canons)\n    # \"\"\")\n\n    # Recreate FK constraints\n    op.create_foreign_key(\n        \"fk_canon_packages_canon_id_canons\",\n        \"canon_packages\",\n        \"canons\",\n        [\"canon_id\"],\n        [\"id\"],\n    )\n    op.create_foreign_key(\n        \"fk_tea_ranks_canon_id_canons\", \"tea_ranks\", \"canons\", [\"canon_id\"], [\"id\"]\n    )\n\n\ndef downgrade() -> None:\n    \"\"\"\n    Drop FK constraints pointing to canons table\n    \"\"\"\n    op.drop_constraint(\n        \"fk_canon_packages_canon_id_canons\", \"canon_packages\", type_=\"foreignkey\"\n    )\n    op.drop_constraint(\"fk_tea_ranks_canon_id_canons\", \"tea_ranks\", type_=\"foreignkey\")\n"
  },
  {
    "path": "api/.dockerignore",
    "content": "/target\n.git\n.gitignore\nREADME.md\n"
  },
  {
    "path": "api/.gitignore",
    "content": "/target\n**/*.rs.bk\nCargo.lock\n.env\n"
  },
  {
    "path": "api/Cargo.toml",
    "content": "[package]\nname = \"chai-api\"\nversion = \"1.3.0\"\nedition = \"2021\"\nauthors = [\"Jacob Heider <jacob@pkgx.dev>\"]\ndescription = \"A simple REST API for the CHAI database\"\nreadme = \"README.md\"\nlicense = \"MIT\"\nrepository = \"https://github.com/teaxyz/chai-oss\"\n\n[dependencies]\nuuid = { version = \"1.11.0\", features = [\"serde\", \"v4\"] }\nactix-web = \"4.3\"\ndotenv = \"0.15\"\ntokio = { version = \"1\", features = [\"full\"] }\nlog = \"0.4\"\nenv_logger = \"0.10\"\nserde = { version = \"1.0\", features = [\"derive\"] }\nserde_json = \"1.0\"\nchrono = { version = \"0.4\", features = [\"serde\"] }\ntokio-postgres = { version = \"0.7\", features = [\n  \"with-serde_json-1\",\n  \"with-chrono-0_4\",\n  \"with-uuid-1\",\n] }\ndeadpool-postgres = \"0.10.0\"\nurl = \"2.5.2\"\ndashmap = \"6.1.0\"\n"
  },
  {
    "path": "api/Dockerfile",
    "content": "FROM --platform=linux/amd64 lukemathwalker/cargo-chef:latest-rust-1.82.0 as chef\nWORKDIR /app\n\nFROM chef as planner\nCOPY . .\nRUN cargo chef prepare --recipe-path recipe.json\n\nFROM chef as builder\nCOPY --from=planner /app/recipe.json recipe.json\nRUN cargo chef cook --release --recipe-path recipe.json\nCOPY . .\nRUN cargo build --release\n\nFROM debian:bookworm-slim as runtime\nWORKDIR /app\nRUN apt-get update && apt-get install -y curl openssl ca-certificates && rm -rf /var/lib/apt/lists/*\nCOPY --from=builder /app/target/release/chai-api /usr/local/bin\nENV DATABASE_URL=postgresql://postgres:s3cr3t@db:5432/chai\nEXPOSE 8080\nCMD [\"chai-api\"]\n"
  },
  {
    "path": "api/README.md",
    "content": "# CHAI API\n\nCHAI API is a REST API service for accessing the CHAI database, which contains package\nmanager data.\n\n## Features\n\n- List all tables in the database\n- Fetch paginated data from any table\n- Heartbeat endpoint for health checks\n- Search deduplicated packages by name\n\n## Requirements\n\n- Rust 1.67 or later\n- PostgreSQL database\n\n## API Endpoints\n\n### Health Check\n\n```\nGET /heartbeat\n```\n\nReturns the health status of the API and database connection.\n\n**Response (Success)**\n\n```txt\nOK - Database connection is healthy\n```\n\n**Response (Failure - Database query failed):**\n\n```txt\nDatabase query failed\n```\n\n**Response (Failure - Database connection failed):**\n\n```txt\nFailed to get database connection\n```\n\n### List Tables\n\n```\nGET /tables\n```\n\nReturns a paginated list of all available tables in the database.\n\n**Query Parameters**\n\n- `page` (optional): Page number (default: 1)\n- `limit` (optional): Number of items per page (default: 200)\n\n**Response**\n\n```json\n{\n  \"data\": [\n    \"legacy_dependencies\",\n    \"versions\",\n    \"canons_old\",\n    \"tea_rank_runs\",\n    \"canons\",\n    \"licenses\",\n    \"canon_packages\",\n    \"users\",\n    \"load_history\",\n    \"tea_ranks\",\n    \"alembic_version\",\n    \"sources\",\n    \"package_managers\",\n    \"url_types\",\n    \"urls\",\n    \"packages\",\n    \"package_urls\",\n    \"user_packages\",\n    \"dependencies\",\n    \"depends_on_types\",\n    \"user_versions\",\n    \"canon_packages_old\",\n    \"tea_rank_old\"\n  ],\n  \"limit\": 200,\n  \"page\": 1,\n  \"total_count\": 23,\n  \"total_pages\": 1\n}\n```\n\n### Get Table Data\n\n```\nGET /{table}\n```\n\nReturns paginated data from the specified table.\n\n**Path Parameters**\n\n- `table`: Name of the table to query (see available tables in List Tables response)\n\n**Query Parameters**\n\n- `page` (optional): Page number (default: 1)\n- `limit` (optional): Number of items per page (default: 200)\n\n**Response**\n\n```json\n{\n    \"table\": \"packages\",\n    \"total_count\": 166459,\n    \"page\": 1,\n    \"limit\": 2,\n    \"total_pages\": 83230,\n    \"columns\": [\n        ...\n    ],\n    \"data\": [\n        {\n            \"created_at\": \"2024-12-27 08:04:03.991832\",\n            \"derived_id\": \"...\",\n            \"id\": \"...\",\n            \"import_id\": \"...\",\n            \"name\": \"...\",\n            \"package_manager_id\": \"...\",\n            \"readme\": \"...\",\n            \"updated_at\": \"2024-12-27 08:04:03.991832\"\n        },\n        ...\n    ]\n}\n```\n\n### Get Table Row By ID\n\n```\nGET /{table}/{id}\n```\n\nReturns a specific row from the table by its UUID.\n\n**Path Parameters**\n\n- `table`: Name of the table to query\n- `id`: UUID of the row to fetch\n\n**Response**\n\n```json\n{\n  \"created_at\": \"2024-12-27 08:04:03.991832\",\n  \"derived_id\": \"...\",\n  \"id\": \"...\",\n  \"import_id\": \"...\",\n  \"name\": \"...\",\n  \"package_manager_id\": \"...\",\n  \"readme\": \"...\",\n  \"updated_at\": \"2024-12-27 08:04:03.991832\"\n}\n```\n\n### Get Project\n\n```\nGET /project/{id}\n```\n\nReturns detailed information about a specific canon by its canonical ID.\n\n**Path Parameters**\n\n- `id`: UUID of the project (canon) to fetch\n\n**Response**\n\n```json\n{\n  \"projectId\": \"550e8400-e29b-41d4-a716-446655440000\",\n  \"homepage\": \"https://example.com\",\n  \"name\": \"example-project\",\n  \"source\": \"https://github.com/example/project\",\n  \"teaRank\": \"150\",\n  \"teaRankCalculatedAt\": \"2024-12-27T08:04:03.991832\",\n  \"packageManagers\": [\"homebrew\", \"crates\"]\n}\n```\n\n**Response (Not Found)**\n\n```json\n{\n  \"error\": \"No row found with id '550e8400-e29b-41d4-a716-446655440000' in table canons\"\n}\n```\n\n### Get Projects Batch\n\n```\nPOST /project/batch\n```\n\nReturns detailed information about multiple projects by their canonical IDs.\n\n**Request Body**\n\n```json\n{\n  \"projectIds\": [\"uuid1\", \"uuid2\", \"...\"]\n}\n```\n\n**Parameters**\n\n- `projectIds`: Array of project UUIDs to include in the leaderboard (required, max 100)\n\n**Example**\n\n```\nPOST /project/batch\n```\n\n**Example Request**\n\n```bash\ncurl -X POST http://localhost:8080/project/batch \\\n  -H \"Content-Type: application/json\" \\\n  -d '{\n    \"projectIds\": [\n      \"550e8400-e29b-41d4-a716-446655440000\",\n      \"6ba7b810-9dad-11d1-80b4-00c04fd430c8\"\n    ]\n  }'\n```\n\n**Response**\n\n```json\n[\n  {\n    \"projectId\": \"550e8400-e29b-41d4-a716-446655440000\",\n    \"homepage\": \"https://example.com\",\n    \"name\": \"example-project\",\n    \"source\": \"https://github.com/example/project\",\n    \"teaRank\": \"150\",\n    \"teaRankCalculatedAt\": \"2024-12-27T08:04:03.991832\",\n    \"packageManagers\": [\"homebrew\", \"crates\"]\n  },\n  {\n    \"projectId\": \"6ba7b810-9dad-11d1-80b4-00c04fd430c8\",\n    \"homepage\": \"https://another-example.com\",\n    \"name\": \"another-project\",\n    \"source\": \"https://github.com/another/project\",\n    \"teaRank\": \"75\",\n    \"teaRankCalculatedAt\": \"2024-12-26T10:15:22.123456\",\n    \"packageManagers\": [\"debian\", \"pkgx\"]\n  }\n]\n```\n\n**Response (Invalid UUIDs)**\n\n```json\n{\n  \"error\": \"Invalid UUID format in project IDs\"\n}\n```\n\n### Search Projects\n\n```\nGET /project/search/{name}\n```\n\nSearches for projects by name using case-insensitive partial matching. Results are\nordered by name length and limited to 10 items.\n\n**Path Parameters**\n\n- `name`: Project name to search for (partial matches supported)\n\n**Example**\n\n```\nGET /project/search/python\n```\n\n**Response**\n\n```json\n[\n  {\n    \"projectId\": \"550e8400-e29b-41d4-a716-446655440000\",\n    \"homepage\": \"https://reactjs.org\",\n    \"name\": \"react\",\n    \"source\": \"https://github.com/facebook/react\",\n    \"packageManagers\": [\"homebrew\", \"npm\"]\n  },\n  {\n    \"projectId\": \"6ba7b810-9dad-11d1-80b4-00c04fd430c8\",\n    \"homepage\": \"https://reactrouter.com\",\n    \"name\": \"react-router\",\n    \"source\": \"https://github.com/remix-run/react-router\",\n    \"packageManagers\": [\"npm\"]\n  }\n]\n```\n\n**Response (Empty Search)**\n\n```json\n{\n  \"error\": \"Search name cannot be empty\"\n}\n```\n\n### Leaderboard\n\n```\nPOST /leaderboard\n```\n\nReturns detailed information about specified projects, ordered by tea rank in descending\norder. This endpoint allows filtering by project IDs and limiting the number of results.\n\n**Request Body**\n\n```json\n{\n  \"projectIds\": [\"uuid1\", \"uuid2\", \"...\"],\n  \"limit\": 10\n}\n```\n\n**Parameters**\n\n- `projectIds`: Array of project UUIDs to include in the leaderboard (required, max 100)\n- `limit`: Maximum number of results to return (required, 1-100)\n\n**Example Request**\n\n```bash\ncurl -X POST http://localhost:8080/leaderboard \\\n  -H \"Content-Type: application/json\" \\\n  -d '{\n    \"projectIds\": [\n      \"1e233f1b-2b49-4ada-9953-1763785fba2c\",\n      \"2c24aa45-4fe2-4f2b-ae58-09d4b9a4ad28\"\n    ],\n    \"limit\": 2\n  }'\n```\n\n**Response**\n\n```json\n[\n  {\n    \"projectId\": \"1e233f1b-2b49-4ada-9953-1763785fba2c\",\n    \"homepage\": \"https://example.com\",\n    \"name\": \"example-project\",\n    \"source\": \"https://github.com/example/project\",\n    \"teaRank\": \"150\",\n    \"teaRankCalculatedAt\": \"2024-12-27T08:04:03.991832\",\n    \"packageManagers\": [\"homebrew\", \"crates\"]\n  },\n  {\n    \"projectId\": \"2c24aa45-4fe2-4f2b-ae58-09d4b9a4ad28\",\n    \"homepage\": \"https://another-example.com\",\n    \"name\": \"another-project\",\n    \"source\": \"https://github.com/another/project\",\n    \"teaRank\": \"75\",\n    \"teaRankCalculatedAt\": \"2024-12-26T10:15:22.123456\",\n    \"packageManagers\": [\"debian\", \"pkgx\"]\n  }\n]\n```\n\n**Response (Validation Errors)**\n\n```json\n{\n  \"error\": \"At least one project ID is required\"\n}\n```\n\n```json\n{\n  \"error\": \"Too many project IDs (maximum 100 allowed)\"\n}\n```\n\n```json\n{\n  \"error\": \"Invalid limit 150: must be between 1 and 100\"\n}\n```\n\n## Available Tables\n\nThe database contains the following tables:\n\n| Table Name       | Description                                      |\n| ---------------- | ------------------------------------------------ |\n| alembic_version  | Store the current version of alembic             |\n| dependencies     | Package dependencies                             |\n| depends_on_types | Types of package dependencies                    |\n| licenses         | Package licenses                                 |\n| load_history     | Load history                                     |\n| package_managers | Package manager information                      |\n| package_urls     | Relationship of packages to URLs                 |\n| packages         | Package metadata                                 |\n| sources          | Package manager sources (homebrew, crates, etc.) |\n| url_types        | Types of URLs (homepage, repository, etc.)       |\n| urls             | Actual URLs                                      |\n| user_packages    | User-package relationships                       |\n| user_versions    | User-version relationships                       |\n| users            | User (package owner) information                 |\n| versions         | Package versions                                 |\n\nBy default, the API will be available at `http://localhost:8080`.\n\n## Deployment\n\nThe CHAI API is deployed using AWS services with the following stack:\n\n- **Amazon ECR (Elastic Container Registry)** - Container image storage\n- **Amazon ECS (Elastic Container Service)** - Container orchestration\n- **ECS Service** - Manages running tasks and load balancing\n- **ECS Task Definition** - Defines container configuration\n\n### Prerequisites\n\n- AWS CLI configured with appropriate permissions\n- Docker installed locally\n- Access to the AWS account and ECR repository\n\n### Building and Pushing Docker Image\n\n1. **Get ECR login credentials:**\n\n   ```bash\n   aws ecr get-login-password --region <your-region> | docker login --username AWS --password-stdin <account-id>.dkr.ecr.<your-region>.amazonaws.com\n   ```\n\n2. **Build the Docker image:**\n\n   ```bash\n   docker build -t chai-api .\n   ```\n\n3. **Tag the image for ECR:**\n\n   ```bash\n   docker tag chai-api:latest <account-id>.dkr.ecr.<your-region>.amazonaws.com/chai-api:latest\n   ```\n\n4. **Push the image to ECR:**\n\n   ```bash\n   docker push <account-id>.dkr.ecr.<your-region>.amazonaws.com/chai-api:latest\n   ```\n\n   > **Note:** Replace `<account-id>` and `<your-region>` with your AWS account ID and region. You can find the exact commands in your ECR repository console under \"View push commands\".\n\n### Updating Existing ECS Service\n\nIf updating the ECS service, you first need to Build and Push the docker image. Then:\n\n```bash\naws ecs update-service --cluster chai-<environment> --service <environment>-chai-api --force-new-deployment\n```\n\n### Environment Variables\n\nEnsure the following environment variables are configured in your task definition:\n\n- `DATABASE_URL`: PostgreSQL connection string\n- `HOST`: Host to bind to (default: \"0.0.0.0\")\n- `PORT`: Port to listen on (default: \"8080\")\n\n### Useful AWS Documentation\n\n- [Amazon ECR User Guide](https://docs.aws.amazon.com/ecr/)\n- [Amazon ECS Developer Guide](https://docs.aws.amazon.com/ecs/)\n- [ECS Task Definitions](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task_definitions.html)\n- [ECS Services](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs_services.html)\n- [AWS CLI ECS Commands](https://docs.aws.amazon.com/cli/latest/reference/ecs/)\n\n## Tasks\n\n### Format\n\n```bash\ncargo fmt --all --\n```\n\n### Build\n\n```bash\ncargo build --release\n```\n\n### Validate\n\n```bash\ncargo clippy --all-targets --all-features -- -D warnings\n```\n\n### Run\n\nEnv: DATABASE_URL=postgresql://postgres:s3cr3t@localhost:5435/chai\n\n```bash\ntarget/release/chai-api\n```\n"
  },
  {
    "path": "api/src/app_state.rs",
    "content": "use dashmap::DashMap;\nuse deadpool_postgres::Pool;\nuse serde_json::Value;\nuse std::sync::Arc;\nuse std::time::{Duration, Instant};\nuse uuid::Uuid;\n\nconst TTL: Duration = Duration::from_secs(3600); // 1 hour\n\n#[derive(Clone)]\npub struct ProjectCacheEntry {\n    pub data: Arc<Value>,\n    pub created_at: Instant,\n}\n\nimpl ProjectCacheEntry {\n    pub fn new(data: Value) -> Self {\n        Self {\n            data: Arc::new(data),\n            created_at: Instant::now(),\n        }\n    }\n\n    pub fn is_expired(&self) -> bool {\n        self.created_at.elapsed() > TTL\n    }\n}\n\npub struct AppState {\n    pub pool: Pool,\n    pub tables: Arc<Vec<String>>,\n    pub project_cache: Arc<DashMap<Uuid, ProjectCacheEntry>>,\n}\n"
  },
  {
    "path": "api/src/db.rs",
    "content": "use deadpool_postgres::{Config, Pool, Runtime};\nuse std::env;\nuse std::sync::Arc;\nuse tokio_postgres::{Client, NoTls};\nuse url::Url;\n\npub async fn create_pool() -> Pool {\n    let database_url = env::var(\"DATABASE_URL\").expect(\"DATABASE_URL must be set\");\n    let db_url = Url::parse(&database_url).expect(\"Invalid database URL\");\n\n    let mut config = Config::new();\n    config.host = db_url.host_str().map(ToOwned::to_owned);\n    config.port = db_url.port();\n    config.user = Some(db_url.username().to_owned());\n    config.password = db_url.password().map(ToOwned::to_owned);\n    config.dbname = db_url.path().strip_prefix('/').map(ToOwned::to_owned);\n\n    config\n        .create_pool(Some(Runtime::Tokio1), NoTls)\n        .expect(\"Failed to create pool\")\n}\n\npub async fn get_tables(client: &Client) -> Vec<String> {\n    let rows = client\n        .query(\n            \"SELECT table_name FROM information_schema.tables WHERE table_schema = 'public'\",\n            &[],\n        )\n        .await\n        .expect(\"Failed to fetch tables\");\n\n    rows.into_iter()\n        .map(|row| row.get::<_, String>(\"table_name\"))\n        .collect()\n}\n\npub async fn initialize_db() -> (Pool, Arc<Vec<String>>) {\n    let pool = create_pool().await;\n    let client = pool.get().await.expect(\"Failed to get client from pool\");\n    let tables = Arc::new(get_tables(&client).await);\n    (pool, tables)\n}\n"
  },
  {
    "path": "api/src/handlers.rs",
    "content": "use actix_web::{get, post, web, HttpResponse, Responder};\nuse serde::{Deserialize, Serialize};\nuse serde_json::{json, Value};\nuse std::sync::Arc;\nuse tokio_postgres::error::SqlState;\nuse uuid::Uuid;\n\nuse crate::app_state::AppState;\nuse crate::utils::{get_cached_projects, get_column_names, rows_to_json, Pagination};\n\nconst RESPONSE_LIMIT: i64 = 1000;\n\n#[derive(Deserialize)]\npub struct PaginationParams {\n    pub page: Option<i64>,\n    pub limit: Option<i64>,\n}\n\n#[derive(Serialize)]\nstruct PaginatedResponse {\n    table: String,\n    total_count: i64,\n    page: i64,\n    limit: i64,\n    total_pages: i64,\n    columns: Vec<String>,\n    data: Vec<Value>,\n}\n\n#[derive(Deserialize)]\npub struct LeaderboardRequest {\n    #[serde(rename = \"projectIds\")]\n    pub project_ids: Option<Vec<Uuid>>,\n    pub limit: i64,\n}\n\n#[derive(Deserialize)]\npub struct ProjectBatchRequest {\n    #[serde(rename = \"projectIds\")]\n    pub project_ids: Vec<Uuid>,\n}\n\npub fn check_table_exists(table: &str, tables: &[String]) -> Option<HttpResponse> {\n    if !tables.contains(&table.to_string()) {\n        Some(HttpResponse::NotFound().json(json!({\n            \"error\": format!(\"Table '{}' not found\", table),\n            \"valid_tables\": tables,\n            \"help\": \"Refer to the API documentation for valid table names.\"\n        })))\n    } else {\n        None\n    }\n}\n\n#[get(\"/tables\")]\npub async fn list_tables(\n    query: web::Query<PaginationParams>,\n    data: web::Data<AppState>,\n) -> impl Responder {\n    let total_count = data.tables.len() as i64;\n    let pagination = Pagination::new(query, total_count);\n\n    let start = pagination.offset as usize;\n    let end = (start + pagination.limit as usize).min(data.tables.len());\n\n    let paginated_tables = &data.tables[start..end];\n\n    HttpResponse::Ok().json(json!({\n        \"total_count\": total_count,\n        \"page\": pagination.page,\n        \"limit\": pagination.limit,\n        \"total_pages\": pagination.total_pages,\n        \"data\": paginated_tables,\n    }))\n}\n\n#[get(\"/heartbeat\")]\npub async fn heartbeat(data: web::Data<AppState>) -> impl Responder {\n    match data.pool.get().await {\n        Ok(client) => match client.query_one(\"SELECT 1\", &[]).await {\n            Ok(_) => HttpResponse::Ok().body(\"OK - Database connection is healthy\"),\n            Err(e) => {\n                log::error!(\"Database query failed: {e}\");\n                HttpResponse::InternalServerError().body(\"Database query failed\")\n            }\n        },\n        Err(e) => {\n            log::error!(\"Failed to get database connection: {e}\");\n            HttpResponse::InternalServerError().body(\"Failed to get database connection\")\n        }\n    }\n}\n\n#[get(\"/tables/{table}\")]\npub async fn get_table(\n    path: web::Path<String>,\n    query: web::Query<PaginationParams>,\n    data: web::Data<AppState>,\n) -> impl Responder {\n    let table = path.into_inner();\n    if let Some(response) = check_table_exists(&table, &data.tables) {\n        return response;\n    }\n\n    let count_query = format!(\"SELECT COUNT(*) FROM {table}\");\n    match data.pool.get().await {\n        Ok(client) => match client.query_one(&count_query, &[]).await {\n            Ok(count_row) => {\n                let total_count: i64 = count_row.get(0);\n                let pagination = Pagination::new(query, total_count);\n\n                let data_query = format!(\"SELECT * FROM {table} LIMIT $1 OFFSET $2\");\n                match client\n                    .query(&data_query, &[&pagination.limit, &pagination.offset])\n                    .await\n                {\n                    Ok(rows) => {\n                        let columns = get_column_names(&rows);\n                        let data = rows_to_json(&rows);\n                        let response = PaginatedResponse {\n                            table,\n                            total_count,\n                            page: pagination.page,\n                            limit: pagination.limit,\n                            total_pages: pagination.total_pages,\n                            columns,\n                            data,\n                        };\n                        HttpResponse::Ok().json(response)\n                    }\n                    Err(e) => {\n                        log::error!(\"Database query error: {e}\");\n                        HttpResponse::InternalServerError().json(json!({\n                            \"error\": \"An error occurred while querying the database\"\n                        }))\n                    }\n                }\n            }\n            Err(e) => {\n                log::error!(\"Database count query error: {e}\");\n                HttpResponse::InternalServerError().json(json!({\n                    \"error\": \"An error occurred while counting rows in the database\"\n                }))\n            }\n        },\n        Err(e) => {\n            log::error!(\"Failed to get database connection: {e}\");\n            HttpResponse::InternalServerError().body(\"Failed to get database connection\")\n        }\n    }\n}\n\n#[get(\"/tables/{table}/{id}\")]\npub async fn get_table_row(\n    path: web::Path<(String, Uuid)>,\n    data: web::Data<AppState>,\n) -> impl Responder {\n    let (table_name, id) = path.into_inner();\n\n    if let Some(response) = check_table_exists(&table_name, &data.tables) {\n        return response;\n    }\n\n    let query = format!(\"SELECT * FROM {table_name} WHERE id = $1\");\n\n    match data.pool.get().await {\n        Ok(client) => match client.query_one(&query, &[&id]).await {\n            Ok(row) => {\n                let json = rows_to_json(&[row]);\n                let value = json.first().unwrap();\n                HttpResponse::Ok().json(value)\n            }\n            Err(e) => {\n                if e.as_db_error()\n                    .is_some_and(|db_err| db_err.code() == &SqlState::UNDEFINED_TABLE)\n                {\n                    HttpResponse::NotFound().json(json!({\n                        \"error\": format!(\"Table '{}' not found\", table_name)\n                    }))\n                } else if e\n                    .as_db_error()\n                    .is_some_and(|e| e.code() == &SqlState::NO_DATA_FOUND)\n                {\n                    HttpResponse::NotFound().json(json!({\n                        \"error\": format!(\"No row found with id '{}' in table '{}'\", id, table_name)\n                    }))\n                } else {\n                    HttpResponse::InternalServerError().json(json!({\n                        \"error\": format!(\"Database error: {}\", e)\n                    }))\n                }\n            }\n        },\n        Err(e) => {\n            log::error!(\"Failed to get database connection: {e}\");\n            HttpResponse::InternalServerError().body(\"Failed to get database connection\")\n        }\n    }\n}\n\n#[get(\"/project/{id}\")]\npub async fn get_project(path: web::Path<Uuid>, data: web::Data<AppState>) -> impl Responder {\n    // Check if the table exists\n    let id = path.into_inner();\n\n    // Construct the query\n    let query = r#\"\n        WITH base AS MATERIALIZED (\n            SELECT\n                c.id,\n                u_homepage.url AS homepage,\n                c.name,\n                COALESCE(tr_latest.rank, '0') AS \"teaRank\",\n                tr_latest.created_at AS \"teaRankCalculatedAt\",\n                (\n                SELECT ARRAY_AGG(DISTINCT s.type)\n                FROM canon_packages cp2\n                JOIN packages p2           ON cp2.package_id = p2.id\n                JOIN package_managers pm2  ON p2.package_manager_id = pm2.id\n                JOIN sources s             ON pm2.source_id = s.id\n                WHERE cp2.canon_id = c.id\n                ) AS \"packageManagers\",\n                (\n                SELECT COUNT(*)::bigint\n                FROM legacy_dependencies ld\n                JOIN canon_packages cp_out ON cp_out.package_id = ld.package_id\n                WHERE cp_out.canon_id = c.id\n                ) AS \"dependenciesCount\",\n                (\n                SELECT COUNT(*)::bigint\n                FROM legacy_dependencies ld\n                JOIN canon_packages cp_in ON cp_in.package_id = ld.dependency_id\n                WHERE cp_in.canon_id = c.id\n                ) AS \"dependentsCount\"\n            FROM canons c\n            JOIN urls u_homepage ON c.url_id = u_homepage.id\n            LEFT JOIN LATERAL (\n                SELECT tr.rank, tr.created_at\n                FROM tea_ranks tr\n                WHERE tr.canon_id = c.id\n                ORDER BY tr.created_at DESC\n                LIMIT 1\n            ) tr_latest ON TRUE\n            WHERE c.id = $1\n        )\n        SELECT DISTINCT ON (b.id)\n            b.id                AS \"projectId\",\n            b.homepage,\n            b.name,\n            u_source.url        AS source,\n            b.\"teaRank\",\n            b.\"teaRankCalculatedAt\",\n            b.\"packageManagers\",\n            b.\"dependenciesCount\",\n            b.\"dependentsCount\"\n        FROM base b\n        JOIN canon_packages cp ON cp.canon_id = b.id\n        JOIN package_urls pu   ON pu.package_id = cp.package_id\n        JOIN urls u_source     ON pu.url_id = u_source.id\n        JOIN url_types ut      ON ut.id = u_source.url_type_id\n        WHERE ut.name = 'source'\n        ORDER BY b.id, b.\"teaRankCalculatedAt\" DESC, u_source.url;\"#;\n\n    match data.pool.get().await {\n        Ok(client) => match client.query_one(query, &[&id]).await {\n            Ok(row) => {\n                let json = rows_to_json(&[row]);\n                let value = json.first().unwrap();\n                HttpResponse::Ok().json(value)\n            }\n            Err(e) => {\n                if e.as_db_error()\n                    .is_some_and(|e| e.code() == &SqlState::NO_DATA_FOUND)\n                {\n                    HttpResponse::NotFound().json(json!({\n                        \"error\": format!(\"No row found with id '{:?}' in table canons\", id)\n                    }))\n                } else {\n                    HttpResponse::InternalServerError().json(json!({\n                        \"error\": format!(\"Database error: {}\", e)\n                    }))\n                }\n            }\n        },\n        Err(e) => {\n            log::error!(\"Failed to get database connection: {e}\");\n            HttpResponse::InternalServerError().body(\"Failed to get database connection\")\n        }\n    }\n}\n\n#[post(\"/project/batch\")]\npub async fn list_projects_by_id(\n    req: web::Json<ProjectBatchRequest>,\n    data: web::Data<AppState>,\n) -> impl Responder {\n    if req.project_ids.is_empty() {\n        return HttpResponse::BadRequest().json(json!({\n            \"error\": \"No project IDs provided\"\n        }));\n    }\n\n    // Construct the query\n    let query = r#\"\n        SELECT DISTINCT ON (c.id)\n            c.id AS \"projectId\",\n            u_homepage.url AS homepage,\n            c.name,\n            u_source.url AS source,\n            COALESCE(tr.rank,'0') AS \"teaRank\",\n            tr.created_at AS \"teaRankCalculatedAt\",\n            (\n                SELECT ARRAY_AGG(DISTINCT s.type)\n                FROM canon_packages cp2\n                JOIN packages p2 ON cp2.package_id = p2.id\n                JOIN package_managers pm2 ON p2.package_manager_id = pm2.id\n                JOIN sources s ON pm2.source_id = s.id\n                WHERE cp2.canon_id = c.id\n            ) AS \"packageManagers\"\n        FROM canons c\n        JOIN urls u_homepage ON u_homepage.id = c.url_id\n        JOIN canon_packages cp ON cp.canon_id = c.id\n        JOIN package_urls pu ON pu.package_id = cp.package_id\n        JOIN urls u_source ON pu.url_id = u_source.id\n        JOIN url_types ut ON ut.id = u_source.url_type_id\n        LEFT JOIN tea_ranks tr ON tr.canon_id = c.id\n        WHERE c.id = ANY($1::uuid[]) AND ut.name = 'source'\n        ORDER BY c.id, tr.created_at DESC, u_source.url;\"#;\n\n    match data.pool.get().await {\n        Ok(client) => match client.query(query, &[&req.project_ids]).await {\n            Ok(rows) => {\n                let json = rows_to_json(&rows);\n                HttpResponse::Ok().json(json)\n            }\n            Err(e) => {\n                log::error!(\"Database query error: {e}\");\n                HttpResponse::InternalServerError().json(json!({\n                    \"error\": format!(\"Database error: {}\", e)\n                }))\n            }\n        },\n        Err(e) => {\n            log::error!(\"Failed to get database connection: {e}\");\n            HttpResponse::InternalServerError().body(\"Failed to get database connection\")\n        }\n    }\n}\n\n#[get(\"/project/search/{name}\")]\npub async fn list_projects_by_name(\n    path: web::Path<String>,\n    data: web::Data<AppState>,\n) -> impl Responder {\n    let name = path.into_inner();\n\n    if name.trim().is_empty() {\n        return HttpResponse::BadRequest().json(json!({\n            \"error\": \"Search name cannot be empty\"\n        }));\n    }\n\n    let wildcard = format!(\"%{name}%\");\n\n    // Construct the query\n    let query = r#\"\n        SELECT *\n        FROM (\n            SELECT DISTINCT ON (c.id)\n                c.id AS \"projectId\",\n                u_homepage.url AS homepage,\n                c.name,\n                u_source.url AS source,\n                (\n                    SELECT ARRAY_AGG(DISTINCT s.type)\n                    FROM canon_packages cp2\n                    JOIN packages p2 ON cp2.package_id = p2.id\n                    JOIN package_managers pm2 ON p2.package_manager_id = pm2.id\n                    JOIN sources s ON pm2.source_id = s.id\n                    WHERE cp2.canon_id = c.id\n                ) AS \"packageManagers\"\n            FROM canons c\n            JOIN urls u_homepage ON c.url_id = u_homepage.id\n            JOIN canon_packages cp ON cp.canon_id = c.id\n            JOIN package_urls pu ON pu.package_id = cp.package_id\n            JOIN urls u_source ON pu.url_id = u_source.id\n            JOIN url_types ut_source ON ut_source.id = u_source.url_type_id\n            WHERE ut_source.name = 'source' AND (c.name ILIKE $1)\n            ORDER BY c.id\n        ) sub\n        ORDER BY LENGTH(name), name\n        LIMIT 10;\"#;\n\n    match data.pool.get().await {\n        Ok(client) => match client.query(query, &[&wildcard]).await {\n            Ok(rows) => {\n                let json = rows_to_json(&rows);\n                HttpResponse::Ok().json(json)\n            }\n            Err(e) => {\n                log::error!(\"Database query error: {e}\");\n                HttpResponse::InternalServerError().json(json!({\n                    \"error\": format!(\"Database error: {e}\")\n                }))\n            }\n        },\n        Err(e) => {\n            log::error!(\"Failed to get database connection: {e}\");\n            HttpResponse::InternalServerError().body(\"Failed to get database connection\")\n        }\n    }\n}\n\n#[post(\"/leaderboard\")]\npub async fn get_leaderboard(\n    req: web::Json<LeaderboardRequest>,\n    data: web::Data<AppState>,\n) -> impl Responder {\n    let limit = req.limit.clamp(1, RESPONSE_LIMIT);\n\n    let Some(project_ids) = req.project_ids.as_deref() else {\n        return get_top_projects(data, limit).await;\n    };\n\n    if project_ids.len() > RESPONSE_LIMIT as usize {\n        return HttpResponse::BadRequest().json(json!({\n            \"error\": format!(\"Too many project IDs (maximum {} allowed)\", RESPONSE_LIMIT)\n        }));\n    }\n\n    // Get cached projects and identify missing ones\n    let (cached_projects, missing_ids) =\n        get_cached_projects(data.project_cache.clone(), project_ids);\n\n    // If we have all projects cached, return them sorted\n    if missing_ids.is_empty() {\n        return sort_truncate_and_return(cached_projects, limit);\n    }\n\n    // Query for missing projects\n    let query = r#\"\n        SELECT *\n        FROM (\n            SELECT DISTINCT ON (c.id)\n                c.id AS \"projectId\",\n                u_homepage.url AS homepage,\n                c.name,\n                u_source.url AS source,\n                COALESCE(tr.rank,'0') AS \"teaRank\",\n                tr.created_at AS \"teaRankCalculatedAt\",\n                (\n                    SELECT ARRAY_AGG(DISTINCT s.type)\n                    FROM canon_packages cp2\n                    JOIN packages p2 ON cp2.package_id = p2.id\n                    JOIN package_managers pm2 ON p2.package_manager_id = pm2.id\n                    JOIN sources s ON pm2.source_id = s.id\n                    WHERE cp2.canon_id = c.id\n                ) AS \"packageManagers\"\n            FROM canons c\n            JOIN urls u_homepage ON c.url_id = u_homepage.id\n            JOIN canon_packages cp ON cp.canon_id = c.id\n            JOIN package_urls pu ON pu.package_id = cp.package_id\n            JOIN urls u_source ON pu.url_id = u_source.id\n            JOIN url_types ut_source ON ut_source.id = u_source.url_type_id\n            LEFT JOIN tea_ranks tr ON tr.canon_id = c.id\n            WHERE\n            c.id = ANY($1::uuid[])\n            AND ut_source.name = 'source'\n            AND CAST(tr.rank AS NUMERIC) > 0\n            ORDER BY c.id, tr.created_at DESC, u_source.url\n        ) sub\n        ORDER BY CAST(\"teaRank\" AS NUMERIC) DESC NULLS LAST\n        LIMIT $2\"#;\n\n    match data.pool.get().await {\n        Ok(client) => match client.query(query, &[&missing_ids, &limit]).await {\n            Ok(rows) => {\n                let fresh_projects = rows_to_json(&rows);\n\n                // Cache the fresh projects\n                for project in &fresh_projects {\n                    if let Some(project_id) = project.get(\"projectId\").and_then(|v| v.as_str()) {\n                        if let Ok(uuid) = Uuid::parse_str(project_id) {\n                            data.project_cache.insert(\n                                uuid,\n                                crate::app_state::ProjectCacheEntry::new(project.clone()),\n                            );\n                        } else {\n                            log::warn!(\"Failed to parse project ID as UUID: {}\", project_id);\n                        }\n                    } else {\n                        log::warn!(\"No projectId found in project: {:?}\", project);\n                    }\n                }\n\n                // Combine cached and fresh projects - keep Arc<Value> for cached ones\n                let mut all_projects: Vec<Arc<Value>> = cached_projects;\n\n                // Convert fresh projects to Arc<Value> to match the type\n                let fresh_arcs: Vec<Arc<Value>> =\n                    fresh_projects.into_iter().map(Arc::new).collect();\n                all_projects.extend(fresh_arcs);\n\n                sort_truncate_and_return(all_projects, limit)\n            }\n            Err(e) => {\n                log::error!(\"Database query error: {e}\");\n                HttpResponse::InternalServerError().json(json!({\n                    \"error\": format!(\"Database error: {}\", e)\n                }))\n            }\n        },\n        Err(e) => {\n            log::error!(\"Failed to get database connection: {e}\");\n            HttpResponse::InternalServerError().body(\"Failed to get database connection\")\n        }\n    }\n}\n\n// Helper function to sort, truncate, and return the final response\nfn sort_truncate_and_return(projects: Vec<Arc<Value>>, limit: i64) -> actix_web::HttpResponse {\n    let mut projects = projects;\n\n    // Sort projects by teaRank (descending) - Arc<Value> derefs to Value\n    projects.sort_by(|a, b| {\n        let rank_a = a\n            .get(\"teaRank\")\n            .and_then(|v| v.as_str())\n            .and_then(|s| s.parse::<i64>().ok())\n            .unwrap_or(0);\n        let rank_b = b\n            .get(\"teaRank\")\n            .and_then(|v| v.as_str())\n            .and_then(|s| s.parse::<i64>().ok())\n            .unwrap_or(0);\n        rank_b.cmp(&rank_a)\n    });\n\n    // Apply limit\n    projects.truncate(limit as usize);\n\n    // Convert to Vec<Value> only for the final response - Arc<Value> doesn't implement Serialize\n    let final_projects: Vec<Value> = projects\n        .into_iter()\n        .map(|arc_val| (*arc_val).clone())\n        .collect();\n    actix_web::HttpResponse::Ok().json(final_projects)\n}\n\nasync fn get_top_projects(data: web::Data<AppState>, limit: i64) -> HttpResponse {\n    // get client\n    let Ok(client) = data.pool.get().await else {\n        return HttpResponse::InternalServerError().body(\"Failed to get database connection\");\n    };\n\n    // get latest run id\n    let run_query = r#\"SELECT MAX(run) from tea_rank_runs\"#;\n    let Ok(run_row) = client.query_one(run_query, &[]).await else {\n        return HttpResponse::InternalServerError().body(\"Failed to get latest run\");\n    };\n    let run: i32 = run_row.get(0);\n\n    // get top projects (1-RESPONSE_LIMIT)\n    let top_ranks_query = r#\"SELECT\n            canon_id as \"projectId\",\n            name,\n            rank as \"teaRank\",\n            (\n                SELECT ARRAY_AGG(DISTINCT s.type)\n                FROM canon_packages cp2\n                JOIN packages p2 ON cp2.package_id = p2.id\n                JOIN package_managers pm2 ON p2.package_manager_id = pm2.id\n                JOIN sources s ON pm2.source_id = s.id\n                WHERE cp2.canon_id = canon_id\n            ) AS \"packageManagers\"\n        FROM\n            tea_ranks\n            JOIN canons ON canon_id = canons.id\n        WHERE\n            tea_rank_run = $1\n        ORDER BY\n            rank DESC\n        LIMIT $2\"#;\n    let Ok(top_ranks) = client\n        .query(top_ranks_query, &[&run, &limit.clamp(1, RESPONSE_LIMIT)])\n        .await\n    else {\n        return HttpResponse::InternalServerError().json(json!({\n            \"error\": \"Failed to fetch top ranks\"\n        }));\n    };\n    let json = rows_to_json(&top_ranks);\n    HttpResponse::Ok().json(json)\n}\n"
  },
  {
    "path": "api/src/logging.rs",
    "content": "use env_logger::Env;\n\npub fn setup_logger() {\n    env_logger::init_from_env(Env::default().default_filter_or(\"info\"));\n}\n\npub struct Logger;\n\nimpl Logger {\n    pub fn default() -> actix_web::middleware::Logger {\n        actix_web::middleware::Logger::new(\"%a '%r' %s %b '%{Referer}i' '%{User-Agent}i' %T\")\n    }\n}\n"
  },
  {
    "path": "api/src/main.rs",
    "content": "mod app_state;\nmod db;\nmod handlers;\nmod logging;\nmod utils;\n\nuse actix_web::{web, App, HttpServer};\nuse dashmap::DashMap;\nuse dotenv::dotenv;\nuse std::env;\nuse std::sync::Arc;\n\nuse crate::app_state::AppState;\nuse crate::handlers::{\n    get_leaderboard, get_project, get_table, get_table_row, heartbeat, list_projects_by_id,\n    list_projects_by_name, list_tables,\n};\nuse crate::logging::setup_logger;\n\n#[actix_web::main]\nasync fn main() -> std::io::Result<()> {\n    dotenv().ok();\n    setup_logger();\n\n    let host = env::var(\"HOST\").unwrap_or_else(|_| \"0.0.0.0\".to_string());\n    let port = env::var(\"PORT\").unwrap_or_else(|_| \"8080\".to_string());\n    let bind_address = format!(\"{host}:{port}\");\n\n    let (pool, tables) = db::initialize_db().await;\n    // Cache for project data to reduce database load on leaderboard routes\n    let project_cache = Arc::new(DashMap::new());\n\n    log::info!(\"Available tables: {tables:?}\");\n    log::info!(\"Starting server at http://{bind_address}\");\n\n    HttpServer::new(move || {\n        App::new()\n            .wrap(logging::Logger::default())\n            .app_data(web::Data::new(AppState {\n                pool: pool.clone(),\n                tables: Arc::clone(&tables),\n                project_cache: Arc::clone(&project_cache),\n            }))\n            // HEALTH\n            .service(heartbeat)\n            // SIMPLE CRUD OPERATIONS\n            .service(list_tables)\n            .service(get_table)\n            .service(get_table_row)\n            // BUSINESS LOGIC\n            .service(get_leaderboard)\n            .service(get_project)\n            .service(list_projects_by_id)\n            .service(list_projects_by_name)\n    })\n    .bind(&bind_address)?\n    .run()\n    .await\n}\n"
  },
  {
    "path": "api/src/utils.rs",
    "content": "use actix_web::web::Query;\nuse chrono::{DateTime, NaiveDate, NaiveDateTime, Utc};\nuse dashmap::DashMap;\nuse serde_json::{json, Value};\nuse std::sync::Arc;\nuse tokio_postgres::{types::Type, Row};\nuse uuid::Uuid;\n\nuse crate::{app_state::ProjectCacheEntry, handlers::PaginationParams};\n\npub fn get_column_names(rows: &[Row]) -> Vec<String> {\n    if let Some(row) = rows.first() {\n        row.columns()\n            .iter()\n            .map(|col| col.name().to_string())\n            .collect()\n    } else {\n        vec![]\n    }\n}\n\npub fn convert_optional_to_json<T, E>(result: Result<Option<T>, E>) -> Value\nwhere\n    T: serde::Serialize,\n{\n    match result {\n        Ok(Some(val)) => json!(val),\n        _ => Value::Null,\n    }\n}\n\npub fn rows_to_json(rows: &[Row]) -> Vec<Value> {\n    rows.iter()\n        .map(|row| {\n            let mut map = serde_json::Map::new();\n            for (i, column) in row.columns().iter().enumerate() {\n                let value: Value = match *column.type_() {\n                    Type::INT2 => convert_optional_to_json(row.try_get::<_, Option<i16>>(i)),\n                    Type::INT4 => convert_optional_to_json(row.try_get::<_, Option<i32>>(i)),\n                    Type::INT8 => convert_optional_to_json(row.try_get::<_, Option<i64>>(i)),\n                    Type::FLOAT4 => convert_optional_to_json(row.try_get::<_, Option<f32>>(i)),\n                    Type::FLOAT8 => convert_optional_to_json(row.try_get::<_, Option<f64>>(i)),\n                    Type::BOOL => convert_optional_to_json(row.try_get::<_, Option<bool>>(i)),\n                    Type::VARCHAR | Type::TEXT | Type::BPCHAR => {\n                        convert_optional_to_json(row.try_get::<_, Option<String>>(i))\n                    }\n                    Type::TIMESTAMP => {\n                        convert_optional_to_json(row.try_get::<_, Option<NaiveDateTime>>(i))\n                    }\n                    Type::TIMESTAMPTZ => {\n                        convert_optional_to_json(row.try_get::<_, Option<DateTime<Utc>>>(i))\n                    }\n                    Type::DATE => convert_optional_to_json(row.try_get::<_, Option<NaiveDate>>(i)),\n                    Type::JSON | Type::JSONB => {\n                        convert_optional_to_json(row.try_get::<_, Option<serde_json::Value>>(i))\n                    }\n                    Type::UUID => convert_optional_to_json(row.try_get::<_, Option<Uuid>>(i)),\n                    Type::TEXT_ARRAY | Type::VARCHAR_ARRAY => {\n                        convert_optional_to_json(row.try_get::<_, Option<Vec<String>>>(i))\n                    }\n                    _ => {\n                        // For unsupported types, try to convert to string\n                        convert_optional_to_json(row.try_get::<_, Option<String>>(i))\n                    }\n                };\n                map.insert(column.name().to_string(), value);\n            }\n            Value::Object(map)\n        })\n        .collect()\n}\n\npub struct Pagination {\n    pub page: i64,\n    pub limit: i64,\n    pub offset: i64,\n    pub total_pages: i64,\n}\n\nimpl Pagination {\n    pub fn new(query: Query<PaginationParams>, total_count: i64) -> Self {\n        let limit = query.limit.unwrap_or(200).clamp(1, 1000);\n        let total_pages = (total_count as f64 / limit as f64).ceil() as i64;\n\n        let page = query.page.unwrap_or(1).clamp(1, total_pages);\n\n        let offset = (page - 1) * limit;\n        Self {\n            page,\n            limit,\n            offset,\n            total_pages,\n        }\n    }\n}\n\n// Helper function to get cached projects and return missing ones\npub fn get_cached_projects(\n    cache: Arc<DashMap<Uuid, ProjectCacheEntry>>,\n    project_ids: &[Uuid],\n) -> (Vec<Arc<Value>>, Vec<Uuid>) {\n    let mut cached_projects = Vec::new();\n    let mut missing_ids = Vec::new();\n\n    for &project_id in project_ids {\n        if let Some(entry) = cache.get(&project_id) {\n            if !entry.is_expired() {\n                cached_projects.push(entry.data.clone());\n                continue;\n            }\n        }\n        missing_ids.push(project_id);\n    }\n\n    (cached_projects, missing_ids)\n}\n"
  },
  {
    "path": "core/README.md",
    "content": "# Core Tools for CHAI Python Loaders\n\nThis directory contains a set of core tools and utilities to facilitate loading the CHAI\ndatabase with package manager data, using python helpers. These tools provide a common\nfoundation for fetching, transforming, and loading data from various package managers\ninto the database.\n\nIn general, the flow of an indexer is:\n\n1. Fetch data from source\n2. Fetch data from CHAI\n3. Do a giant diff\n4. Create new entries, updated entries for each package model in the db\n\nThe best example is [Homebrew's](../package_managers/homebrew/main.py).\n\n## Key Components\n\n### [Config](config.py)\n\nEntrypoint for all loaders, generally has all the information needed for the pipeline\nto start. Includes:\n\n- Execution flags:\n  - `FETCH` determines whether we request the data from source\n  - `TEST` enables a test mode, to test specific portions of the pipeline\n  - `NO_CACHE` to determine whether we save the intermediate pipeline files\n- Package Manager flags\n  - `pm_id` gets the package manager id from the db, that we'd run the pipeline for\n  - `source` is the data source for that package manager. `SOURCES` defines the map.\n\nThe next 4 configuration classes retrieve the IDs for url types (homepage, documentation,\netc.), dependency types (build, runtime, etc.), user types (crates user, github user),\nand all the package manager IDs as well.\n\n### 2. [Database](db.py)\n\nThe DB class offers a set of methods for interacting with the database, including:\n\n- Running queries to build a cache for the current state of the graph for a package\n  manager\n- Batching utilities\n- Some load functions\n\n### 3. [Fetcher](fetcher.py)\n\nThe Fetcher class provides functionality for downloading and extracting data from\npackage manager sources. It supports:\n\n- Downloading tarball / GZIP / Git files\n- Extracting contents to a specified directory\n- Maintaining a \"latest\" symlink so we always know where to look\n\n### 4. [Logger](logger.py)\n\nA custom logging utility that provides consistent logging across all loaders.\n\n### 5. [Models](models/__init__.py)\n\nSQLAlchemy models representing the database schema, including:\n\n- Package, Version, User, License, DependsOn, and other relevant tables\n\n> [!NOTE]\n>\n> This is currently used to actually generate the migrations as well\n\n### 6. [Scheduler](scheduler.py)\n\nA scheduling utility that allows loaders to run at specified intervals.\n\n### 7. [Transformer](transformer.py)\n\nThe Transformer class provides a base for creating package manager-specific transformers.\nIt includes:\n\n- Methods for locating and reading input files\n- Placeholder methods for transforming data into the required format\n\n## Usage\n\nTo create a new loader for a package manager:\n\n1. Create a new directory under `package_managers/` for your package manager.\n1. Implement a fetcher that inherits from the base Fetcher, that is able to fetch\n   the raw data from the package manager's source.\n1. Implement a custom Transformer class that inherits from the base Transformer, that\n   figures out how to map the raw data provided by the package managers into the data\n   model described in the [models](models/__init__.py) module.\n1. Load the cache for data currently in CHAI for that package manager\n1. Implement a diff to compare them\n1. Pass diff objects (lists of new / updated data points) to `db.ingest`\n1. Orchestrate via a `main.py`.\n\nExample usage can be found in the [crates](../package_managers/crates) loader.\n\n# TODOs\n\n- [ ] `Diff` currently has separate implementations for Homebrew and Crates, and could\n      be centralized - open to help here!\n"
  },
  {
    "path": "core/config.py",
    "content": "from enum import Enum\n\nfrom sqlalchemy import UUID\n\nfrom core.db import ConfigDB\nfrom core.logger import Logger\nfrom core.utils import env_vars\n\nlogger = Logger(\"config\")\n\n\nclass PackageManager(Enum):\n    CRATES = \"crates\"\n    HOMEBREW = \"homebrew\"\n    DEBIAN = \"debian\"\n    NPM = \"npm\"\n    PKGX = \"pkgx\"\n\n\nTEST = env_vars(\"TEST\", \"false\")\nFETCH = env_vars(\"FETCH\", \"true\")\nNO_CACHE = env_vars(\"NO_CACHE\", \"true\")\nSOURCES = {\n    PackageManager.CRATES: \"https://static.crates.io/db-dump.tar.gz\",\n    PackageManager.HOMEBREW: \"https://formulae.brew.sh/api/formula.json\",\n    # for debian, sources contains the urls, packages is tied to the linux distribution\n    PackageManager.DEBIAN: [\n        \"https://ftp.debian.org/debian/dists/stable/main/binary-amd64/Packages.gz\",\n        \"https://ftp.debian.org/debian/dists/stable/main/source/Sources.gz\",\n    ],\n    PackageManager.NPM: \"https://registry.npmjs.org/-/all\",  # fake for now\n    PackageManager.PKGX: \"https://github.com/pkgxdev/pantry.git\",\n}\n\n# The three configuration values URLTypes, DependencyTypes, and UserTypes will query the\n# DB to get the respective IDs. If the values don't exist in the database, they will\n# raise an AttributeError (None has no attribute id) at the start\n\n\nclass ExecConf:\n    test: bool\n    fetch: bool\n    no_cache: bool\n\n    def __init__(self) -> None:\n        self.test = TEST\n        self.fetch = FETCH\n        self.no_cache = NO_CACHE\n\n    def __str__(self):\n        return f\"ExecConf(test={self.test},fetch={self.fetch},no_cache={self.no_cache})\"\n\n\nclass PMConf:\n    package_manager: PackageManager\n    pm_id: str\n    source: str | list[str]\n\n    def __init__(self, pm: PackageManager, db: ConfigDB):\n        self.package_manager = pm\n        self.pm_id = db.select_package_manager_by_name(pm.value).id\n        self.source = SOURCES[pm]\n\n    def __str__(self):\n        return f\"PMConf(pm_id={self.pm_id},source={self.source})\"\n\n\nclass URLTypes:\n    homepage: UUID\n    repository: UUID\n    documentation: UUID\n    source: UUID\n\n    def __init__(self, db: ConfigDB):\n        self.load_url_types(db)\n\n    def load_url_types(self, db: ConfigDB) -> None:\n        self.homepage = db.select_url_types_by_name(\"homepage\").id\n        self.repository = db.select_url_types_by_name(\"repository\").id\n        self.documentation = db.select_url_types_by_name(\"documentation\").id\n        self.source = db.select_url_types_by_name(\"source\").id\n\n    def __str__(self) -> str:\n        return f\"URLs(homepage={self.homepage},repo={self.repository},docs={self.documentation},src={self.source})\"\n\n\nclass UserTypes:\n    crates: UUID\n    github: UUID\n\n    def __init__(self, db: ConfigDB):\n        self.crates = db.select_source_by_name(\"crates\").id\n        self.github = db.select_source_by_name(\"github\").id\n\n    def __str__(self) -> str:\n        return f\"UserTypes(crates={self.crates},github={self.github})\"\n\n\nclass DependencyTypes:\n    build: UUID\n    development: UUID\n    runtime: UUID\n    test: UUID\n    optional: UUID\n    recommended: UUID\n\n    def __init__(self, db: ConfigDB):\n        self.build = db.select_dependency_type_by_name(\"build\").id\n        self.development = db.select_dependency_type_by_name(\"development\").id\n        self.runtime = db.select_dependency_type_by_name(\"runtime\").id\n        self.test = db.select_dependency_type_by_name(\"test\").id\n        self.optional = db.select_dependency_type_by_name(\"optional\").id\n        self.recommended = db.select_dependency_type_by_name(\"recommended\").id\n\n    def __str__(self) -> str:\n        return f\"DependencyTypes(build={self.build},development={self.development},runtime={self.runtime},test={self.test},optional={self.optional},recommended={self.recommended})\"\n\n\nclass PackageManagers:\n    crates: UUID\n    homebrew: UUID\n    debian: UUID\n    npm: UUID\n    pkgx: UUID\n\n    def __init__(self, db: ConfigDB):\n        self.crates = db.select_package_manager_by_name(\"crates\").id\n        self.homebrew = db.select_package_manager_by_name(\"homebrew\").id\n        self.debian = db.select_package_manager_by_name(\"debian\").id\n        self.npm = db.select_package_manager_by_name(\"npm\").id\n        self.pkgx = db.select_package_manager_by_name(\"pkgx\").id\n\n\nclass Config:\n    exec_config: ExecConf\n    pm_config: PMConf\n    url_types: URLTypes\n    user_types: UserTypes\n    dependency_types: DependencyTypes\n    package_managers: PackageManagers\n\n    def __init__(self, pm: PackageManager) -> None:\n        db = ConfigDB()\n        self.exec_config = ExecConf()\n        self.pm_config = PMConf(pm, db)\n        self.url_types = URLTypes(db)\n        self.user_types = UserTypes(db)\n        self.dependency_types = DependencyTypes(db)\n        self.package_managers = PackageManagers(db)\n\n    def __str__(self):\n        return f\"Config(exec_config={self.exec_config}, pm_config={self.pm_config}, url_types={self.url_types}, user_types={self.user_types}, dependency_types={self.dependency_types}, package_managers={self.package_managers})\"\n"
  },
  {
    "path": "core/db.py",
    "content": "import os\nfrom collections import defaultdict\nfrom datetime import datetime\nfrom typing import Any\nfrom uuid import UUID\n\nfrom sqlalchemy import Insert, Result, Update, create_engine, select, update\nfrom sqlalchemy.dialects import postgresql\nfrom sqlalchemy.orm import Session, sessionmaker\n\nfrom core.logger import Logger\nfrom core.models import (\n    URL,\n    BaseModel,\n    DependsOnType,\n    LegacyDependency,\n    LoadHistory,\n    Package,\n    PackageManager,\n    PackageURL,\n    Source,\n    URLType,\n)\nfrom core.structs import CurrentGraph, CurrentURLs, URLKey\n\nCHAI_DATABASE_URL = os.getenv(\"CHAI_DATABASE_URL\")\nDEFAULT_BATCH_SIZE = 10000\n\n\nclass DB:\n    def __init__(self, logger_name: str):\n        self.logger = Logger(logger_name)\n        self.engine = create_engine(CHAI_DATABASE_URL)\n        self.session = sessionmaker(self.engine)\n        self.logger.debug(\"connected\")\n        self.now: datetime = datetime.now()\n\n    def insert_load_history(self, package_manager_id: str):\n        with self.session() as session:\n            session.add(LoadHistory(package_manager_id=package_manager_id))\n            session.commit()\n\n    def print_statement(self, stmt):\n        dialect = postgresql.dialect()\n        compiled_stmt = stmt.compile(\n            dialect=dialect, compile_kwargs={\"literal_binds\": True}\n        )\n        self.logger.log(str(compiled_stmt))\n\n    def close(self):\n        self.logger.debug(\"closing\")\n        self.engine.dispose()\n\n    def search_names(\n        self, package_names: list[str], package_managers: list[UUID]\n    ) -> list[str]:\n        \"\"\"Return Homepage URLs for packages with these names\"\"\"\n\n        with self.session() as session:\n            results = (\n                session.query(Package, URL)\n                .join(PackageURL, PackageURL.package_id == Package.id)\n                .join(URL, PackageURL.url_id == URL.id)\n                .join(URLType, URL.url_type_id == URLType.id)\n                .filter(URLType.name == \"homepage\")\n                .filter(Package.name.in_(package_names))\n                .filter(Package.package_manager_id.in_(package_managers))\n                .all()\n            )\n\n            # build a mapping\n            name_to_url = {result.Package.name: result.URL.url for result in results}\n\n            # return in the order preserved by the input (bc its relevant)\n            # and account for the fact that some names might not have a URL\n            return [\n                name_to_url.get(name) for name in package_names if name in name_to_url\n            ]\n\n    def current_graph(self, package_manager_id: UUID) -> CurrentGraph:\n        \"\"\"Get the packages and dependencies for a specific package manager\"\"\"\n        package_map: dict[str, Package] = defaultdict(Package)\n        dependencies: dict[UUID, set[LegacyDependency]] = defaultdict(set)\n\n        stmt = (\n            select(Package, LegacyDependency)\n            .select_from(Package)\n            .join(\n                LegacyDependency,\n                onclause=Package.id == LegacyDependency.package_id,\n                isouter=True,\n            )\n            .where(Package.package_manager_id == package_manager_id)\n        )\n\n        with self.session() as session:\n            result: Result[tuple[Package, LegacyDependency]] = session.execute(stmt)\n\n            for pkg, dep in result:\n                # add to the package map, by import_id, which is usually name\n                package_map[pkg.import_id] = pkg\n\n                # and add to the dependencies map as well\n                if dep:  # check because it's an outer join, so might be None\n                    dependencies[pkg.id].add(dep)\n\n        self.logger.debug(f\"Cached {len(package_map)} packages\")\n\n        return CurrentGraph(package_map, dependencies)\n\n    def _build_current_urls(\n        self, result: Result[tuple[Package, PackageURL, URL]]\n    ) -> CurrentURLs:\n        \"\"\"Build the CurrentURLs result based on a query of Package, PackageURL, URL\"\"\"\n        url_map: dict[URLKey, URL] = {}\n        package_urls: dict[UUID, set[PackageURL]] = {}\n\n        for pkg, pkg_url, url in result:\n            url_key = URLKey(url.url, url.url_type_id)\n            url_map[url_key] = url\n\n            # since it's a left join, we need to check if pkg is None\n            if pkg is not None:\n                if pkg.id not in package_urls:\n                    package_urls[pkg.id] = set()\n                package_urls[pkg.id].add(pkg_url)\n\n        self.logger.debug(f\"Cached {len(url_map)} URLs\")\n        self.logger.debug(f\"Cached {len(package_urls)} package URLs\")\n\n        return CurrentURLs(url_map=url_map, package_urls=package_urls)\n\n    def current_urls(self, urls: set[str]) -> CurrentURLs:\n        \"\"\"Get the Package URL Relationships for a given set of URLs\"\"\"\n        stmt = (\n            select(Package, PackageURL, URL)\n            .select_from(URL)\n            .join(PackageURL, PackageURL.url_id == URL.id, isouter=True)\n            .join(Package, Package.id == PackageURL.package_id, isouter=True)\n            .where(URL.url.in_(urls))\n        )\n\n        with self.session() as session:\n            result: Result[tuple[Package, PackageURL, URL]] = session.execute(stmt)\n            return self._build_current_urls(result)\n\n    def all_current_urls(self) -> CurrentURLs:\n        \"\"\"Get all the URLs and the Packages they are tied to from CHAI\"\"\"\n        stmt = (\n            select(Package, PackageURL, URL)\n            .select_from(URL)\n            .join(PackageURL, PackageURL.url_id == URL.id, isouter=True)\n            .join(Package, Package.id == PackageURL.package_id, isouter=True)\n        )\n        with self.session() as session:\n            result: Result[tuple[Package, PackageURL, URL]] = session.execute(stmt)\n            return self._build_current_urls(result)\n\n    def load(\n        self, session: Session, data: list[BaseModel], stmt: Insert | Update\n    ) -> None:\n        \"\"\"Smart batching utility\"\"\"\n        if data:\n            values: list[dict[str, str | UUID | datetime]] = [\n                obj.to_dict_v2() for obj in data\n            ]\n            self.batch(session, stmt, values, DEFAULT_BATCH_SIZE)\n\n    def batch(\n        self,\n        session: Session,\n        stmt: Insert | Update,\n        values: list[dict[str, str | UUID | datetime]],\n        batch_size: int = DEFAULT_BATCH_SIZE,\n    ):\n        \"\"\"\n        Utility to batch insert or update, but doesn't commit!\n\n        Inputs:\n        - session: the sessionmaker object, so create it before you use it\n        - stmt: the insert or update statement to construct, without the values\n        - values: the values to insert or update - generally using to_dict_v2()\n        - batch_size: the batch size, defaults to 10000\n        - obj_name: the name of the object being inserted for logging\n        \"\"\"\n        for i in range(0, len(values), batch_size):\n            batch = values[i : i + batch_size]\n            self.logger.log(\n                f\"Processing batch {i // batch_size + 1}/{(len(values) - 1) // batch_size + 1} ({len(batch)})\"\n            )\n            value_stmt = stmt.values(batch)\n            session.execute(value_stmt)\n\n    def ingest(\n        self,\n        new_packages: list[Package],\n        new_urls: list[URL],\n        new_package_urls: list[PackageURL],\n        new_deps: list[LegacyDependency],\n        removed_deps: list[LegacyDependency],\n        updated_packages: list[dict[str, UUID | str | datetime]],\n        updated_package_urls: list[dict[str, UUID | datetime]],\n    ) -> None:\n        \"\"\"\n        Ingests a list of new, updated, and deleted objects from the database.\n\n        It flushes after each insert, to ensure that the database is in a valid\n        state prior to the next batch of ingestions.\n\n        TODO: if pkey is set in the values provided, then sqlalchemy will use\n        psycopg2.executemany(...), which is quicker, but still the slowest of all\n        execution options provided by psycopg2. The best one is execute_values, which\n        is **only** available for inserts, and can be used as follows:\n\n        looks like sqlalchemy^2 has a native support for insertmanyvalues, but\n        **I think** we need to pass the data in as a list[dict] instead of objects.\n        See: https://docs.sqlalchemy.org/en/20/core/connections.html#engine-insertmanyvalues\n\n\n        Inputs:\n        - new_packages: a list of new Package objects\n        - new_urls: a list of new URL objects\n        - new_package_urls: a list of new PackageURL objects\n        - updated_packages: a list of updated Package objects\n        - updated_package_urls: a list of updated PackageURL objects\n        - new_deps: a list of new LegacyDependency objects\n        - removed_deps: a list of removed LegacyDependency objects\n        \"\"\"\n        self.logger.log(\"-\" * 100)\n        self.logger.log(\"Going to load\")\n        self.logger.log(f\"New packages: {len(new_packages)}\")\n        self.logger.log(f\"New URLs: {len(new_urls)}\")\n        self.logger.log(f\"New package URLs: {len(new_package_urls)}\")\n        self.logger.log(f\"Updated packages: {len(updated_packages)}\")\n        self.logger.log(f\"Updated package URLs: {len(updated_package_urls)}\")\n        self.logger.log(f\"New dependencies: {len(new_deps)}\")\n        self.logger.log(f\"Removed dependencies: {len(removed_deps)}\")\n        self.logger.log(\"-\" * 100)\n\n        with self.session() as session:\n            try:\n                # 1. Add all new objects with granular flushes\n                self.execute(session, new_packages, \"add\", \"new packages\")\n                self.execute(session, new_urls, \"add\", \"new urls\")\n                self.execute(session, new_package_urls, \"add\", \"new package urls\")\n                self.execute(session, removed_deps, \"delete\", \"removed dependencies\")\n                self.execute(session, new_deps, \"add\", \"new dependencies\")\n\n                # 2. Perform updates (these will now operate on a flushed state)\n                if updated_packages:\n                    session.execute(update(Package), updated_packages)\n\n                if updated_package_urls:\n                    session.execute(update(PackageURL), updated_package_urls)\n\n                # 3. Commit all changes\n                session.commit()\n                self.logger.log(\"✅ Successfully ingested\")\n            except Exception as e:\n                self.logger.error(f\"Error during batched ingest: {e}\")\n                session.rollback()\n                raise e\n\n    def execute(self, session: Session, data: list[Any], method: str, log: str) -> None:\n        if method not in [\"add\", \"delete\"]:\n            raise ValueError(f\"db.execute({method}) is unknown\")\n\n        if data:\n            match method:\n                case \"add\":\n                    session.add_all(data)\n                case \"delete\":\n                    self.remove_all(session, data)\n                case _:\n                    pass\n\n            session.flush()\n        self.logger.log(f\"✅ {len(data):,} {log}\")\n\n    def remove_all(self, session: Session, data: list[Any]) -> None:\n        for item in data:\n            session.delete(item)\n\n\nclass ConfigDB(DB):\n    def __init__(self):\n        super().__init__(\"ConfigDB\")\n\n    def select_package_manager_by_name(self, package_manager: str) -> PackageManager:\n        with self.session() as session:\n            result = (\n                session.query(PackageManager)\n                .join(Source, PackageManager.source_id == Source.id)\n                .filter(Source.type == package_manager)\n                .first()\n            )\n\n            if result:\n                return result\n\n            raise ValueError(f\"Package manager {package_manager} not found\")\n\n    def select_url_types_by_name(self, name: str) -> URLType:\n        with self.session() as session:\n            return session.query(URLType).filter(URLType.name == name).first()\n\n    def select_source_by_name(self, name: str) -> Source:\n        with self.session() as session:\n            return session.query(Source).filter(Source.type == name).first()\n\n    def select_dependency_type_by_name(self, name: str) -> DependsOnType:\n        with self.session() as session:\n            return (\n                session.query(DependsOnType).filter(DependsOnType.name == name).first()\n            )\n\n\nif __name__ == \"__main__\":\n    db = ConfigDB()\n    print(db.search_names([\"elfutils.org\", \"elfutils\"]))\n"
  },
  {
    "path": "core/fetcher.py",
    "content": "import gzip\nimport json\nimport os\nimport tarfile\nfrom dataclasses import dataclass\nfrom datetime import datetime\nfrom io import BytesIO\nfrom shutil import rmtree\nfrom typing import Any\n\nimport git\nfrom requests import get\n\nfrom core.logger import Logger\n\n\n@dataclass\nclass Data:\n    file_path: str\n    file_name: str\n    content: Any  # json or bytes\n\n\nclass Fetcher:\n    def __init__(self, name: str, source: str, no_cache: bool, test: bool):\n        self.name = name\n        self.source = source\n        self.output = f\"data/{name}\"\n        self.logger = Logger(f\"{name}_fetcher\")\n        self.no_cache = no_cache\n        self.test = test\n\n    def write(self, files: list[Data]):\n        \"\"\"generic write function for some collection of files\"\"\"\n\n        # prep the file location\n        now = datetime.now().strftime(\"%Y-%m-%d\")\n        root_path = f\"{self.output}/{now}\"\n\n        # write\n        # it can be anything - json, tarball, etc.\n        for item in files:\n            self.logger.debug(f\"writing {item.file_path}/{item.file_name}\")\n            file_path = item.file_path\n            file_name = item.file_name\n            file_content = item.content\n            full_path = os.path.join(root_path, file_path)\n\n            # make sure the path exists\n            os.makedirs(full_path, exist_ok=True)\n\n            with open(os.path.join(full_path, file_name), \"wb\") as f:\n                if isinstance(file_content, list | dict):\n                    # Convert JSON-serializable objects to string\n                    file_content = json.dumps(file_content)\n\n                # Ensure content is bytes before writing\n                if isinstance(file_content, str):\n                    file_content = file_content.encode(\"utf-8\")\n\n                f.write(file_content)\n\n        # update the latest symlink\n        self.update_symlink(now)\n\n    def update_symlink(self, latest_path: str):\n        latest_symlink = f\"{self.output}/latest\"\n        if os.path.islink(latest_symlink):\n            self.logger.debug(f\"removing existing symlink {latest_symlink}\")\n            os.remove(latest_symlink)\n\n        self.logger.debug(f\"creating symlink {latest_symlink} -> {latest_path}\")\n        os.symlink(latest_path, latest_symlink)\n\n    def fetch(self) -> bytes:\n        if not self.source:\n            raise ValueError(\"source is not set\")\n\n        response = get(self.source)\n        try:\n            response.raise_for_status()\n        except Exception as e:\n            self.logger.error(f\"error fetching {self.source}: {e}\")\n            raise e\n        return response.content\n\n    def cleanup(self):\n        if self.no_cache:\n            # TODO: it's deleting everything here\n            rmtree(self.output, ignore_errors=True)\n            os.makedirs(self.output, exist_ok=True)\n\n\nclass TarballFetcher(Fetcher):\n    def __init__(self, name: str, source: str, no_cache: bool, test: bool):\n        super().__init__(name, source, no_cache, test)\n\n    def fetch(self) -> list[Data]:\n        content = super().fetch()\n\n        bytes_io_object = BytesIO(content)\n        bytes_io_object.seek(0)\n\n        files = []\n        with tarfile.open(fileobj=bytes_io_object, mode=\"r:gz\") as tar:\n            for member in tar.getmembers():\n                if member.isfile():\n                    bytes_io_file = BytesIO(tar.extractfile(member).read())\n                    destination_key = member.name\n                    file_name = destination_key.split(\"/\")[-1]\n                    file_path = \"/\".join(destination_key.split(\"/\")[:-1])\n                    self.logger.debug(f\"file_path/file_name: {file_path}/{file_name}\")\n                    files.append(Data(file_path, file_name, bytes_io_file.read()))\n\n        return files\n\n\n# GZip compresses only one file, so file_path and file_name are not used\nclass GZipFetcher(Fetcher):\n    def __init__(\n        self,\n        name: str,\n        source: str,\n        no_cache: bool,\n        test: bool,\n        file_path: str,\n        file_name: str,\n    ):\n        super().__init__(name, source, no_cache, test)\n        self.file_path = file_path\n        self.file_name = file_name\n\n    def fetch(self) -> list[Data]:\n        content = super().fetch()\n        files = []\n\n        decompressed = gzip.decompress(content).decode(\"utf-8\")\n        files.append(Data(self.file_path, self.file_name, decompressed.encode(\"utf-8\")))\n\n        return files\n\n\nclass GitFetcher(Fetcher):\n    def __init__(self, name: str, source: str, no_cache: bool, test: bool):\n        super().__init__(name, source, no_cache, test)\n\n    def fetch(self) -> str:\n        # assume that source is a git repo whose main branch needs to be cloned\n        # we'll first prep the output directory, then clone, then update the symlinks\n        # NOTE: this is what the main Fetcher does, but slightly modified for this case\n\n        now = datetime.now().strftime(\"%Y-%m-%d\")\n        root_dir = f\"{self.output}/{now}\"\n        os.makedirs(root_dir, exist_ok=True)\n\n        # now, clone the repo here\n        self.logger.debug(f\"Cloning {self.source} into {root_dir}...\")\n        _ = git.Repo.clone_from(self.source, root_dir, depth=1, branch=\"main\")\n        self.logger.debug(\"Repository cloned successfully.\")\n\n        # update the symlinks\n        self.update_symlink(now)\n\n        return root_dir\n"
  },
  {
    "path": "core/logger.py",
    "content": "import sys\nimport time\nimport traceback\n\nfrom core.utils import env_vars\n\nDEBUG = env_vars(\"DEBUG\", \"false\")\n\n\ndef as_minutes(seconds: float) -> float:\n    return seconds / 60\n\n\nclass Logger:\n    SILENT = 0\n    NORMAL = 1\n    VERBOSE = 2\n\n    def __init__(\n        self, name: str, mode: int = NORMAL, start: float | None = None\n    ) -> None:\n        self.name = name\n        self.start = start or time.time()\n        self.mode = Logger.VERBOSE if DEBUG else mode\n\n    def print(self, msg: str):\n        print(f\"{self.time_diff():.2f}: [{self.name}]: {msg}\", flush=True)\n\n    def error(self, message):\n        self.print(f\"[ERROR]: {message}\")\n\n    def log(self, message):\n        if self.mode >= Logger.NORMAL:\n            self.print(f\"{message}\")\n\n    def debug(self, message):\n        if self.mode >= Logger.VERBOSE:\n            self.print(f\"[DEBUG]: {message}\")\n\n    def warn(self, message):\n        if self.mode >= Logger.NORMAL:\n            self.print(f\"[WARN]: {message}\")\n\n    def is_verbose(self):\n        return self.mode >= Logger.VERBOSE\n\n    def time_diff(self):\n        return time.time() - self.start\n\n    def exception(self):\n        exc_type, exc_value, exc_traceback = sys.exc_info()\n        self.print(f\"{exc_type.__name__}: {exc_value}\")\n        self.print(\"***** TRACEBACK *****\")\n        print(f\"{''.join(traceback.format_tb(exc_traceback))}\")\n\n    def info(self, message):\n        self.log(message)\n\n    def warning(self, message):\n        self.warn(message)\n"
  },
  {
    "path": "core/models/__init__.py",
    "content": "# __init__.py\nfrom __future__ import annotations\n\nfrom datetime import datetime\n\nfrom sqlalchemy import (\n    Column,\n    DateTime,\n    ForeignKey,\n    Index,\n    Integer,\n    MetaData,\n    String,\n    UniqueConstraint,\n    func,\n)\nfrom sqlalchemy.dialects.postgresql import UUID\nfrom sqlalchemy.orm import Mapped, declarative_base, relationship\n\nnaming_convention = {\n    \"ix\": \"ix_%(column_0_label)s\",\n    \"uq\": \"uq_%(table_name)s_%(column_0_name)s\",\n    \"ck\": \"ck_%(table_name)s_%(constraint_name)s\",\n    \"fk\": \"fk_%(table_name)s_%(column_0_name)s_%(referred_table_name)s\",\n    \"pk\": \"pk_%(table_name)s\",\n}\nmetadata = MetaData(naming_convention=naming_convention)\n\n\nclass BaseModel:\n    # we have UUIDs, strings, datetimes, ints, and floats\n    def to_dict_v2(self) -> dict[str, str | UUID | datetime | int | float]:\n        \"\"\"Return a dictionary of all non-None attributes.\"\"\"\n        return {\n            attr: getattr(self, attr)\n            for attr in self.__table__.columns.keys()  # noqa: SIM118\n            if getattr(self, attr) is not None\n        }\n\n\nBase = declarative_base(metadata=metadata, cls=BaseModel)\n\n\nclass Package(Base):\n    __tablename__ = \"packages\"\n    __table_args__ = (\n        UniqueConstraint(\n            \"package_manager_id\", \"import_id\", name=\"uq_package_manager_import_id\"\n        ),\n    )\n\n    id = Column(\n        UUID(as_uuid=True),\n        primary_key=True,\n        default=func.uuid_generate_v4(),\n        server_default=func.uuid_generate_v4(),\n    )\n    derived_id = Column(String, nullable=False, unique=True)  # package_manager/name\n    name = Column(String, nullable=False, index=True)\n    package_manager_id = Column(\n        UUID(as_uuid=True),\n        ForeignKey(\"package_managers.id\"),\n        nullable=False,\n        index=True,\n    )\n    import_id = Column(String, nullable=False, index=True)\n    readme = Column(String, nullable=True)\n    created_at = Column(\n        DateTime, nullable=False, default=func.now(), server_default=func.now()\n    )\n    updated_at = Column(\n        DateTime, nullable=False, default=func.now(), server_default=func.now()\n    )\n\n    def to_dict(self):\n        return {\n            \"derived_id\": self.derived_id,\n            \"name\": self.name,\n            \"package_manager_id\": self.package_manager_id,\n            \"import_id\": self.import_id,\n            \"readme\": self.readme,\n        }\n\n\nclass PackageManager(Base):\n    __tablename__ = \"package_managers\"\n    id = Column(\n        UUID(as_uuid=True),\n        primary_key=True,\n        default=func.uuid_generate_v4(),\n        server_default=func.uuid_generate_v4(),\n    )\n    source_id = Column(\n        UUID(as_uuid=True), ForeignKey(\"sources.id\"), nullable=False, unique=True\n    )\n    created_at = Column(\n        DateTime, nullable=False, default=func.now(), server_default=func.now()\n    )\n    updated_at = Column(\n        DateTime, nullable=False, default=func.now(), server_default=func.now()\n    )\n\n\nclass Version(Base):\n    __tablename__ = \"versions\"\n    __table_args__ = (\n        UniqueConstraint(\"package_id\", \"version\", name=\"uq_package_version\"),\n    )\n    id = Column(\n        UUID(as_uuid=True),\n        primary_key=True,\n        default=func.uuid_generate_v4(),\n        server_default=func.uuid_generate_v4(),\n    )\n    package_id = Column(\n        UUID(as_uuid=True), ForeignKey(\"packages.id\"), nullable=False, index=True\n    )\n    version = Column(String, nullable=False, index=True)\n    import_id = Column(String, nullable=False, index=True)\n    # size, published_at, license_id, downloads, checksum\n    # are nullable bc not all sources provide them\n    size = Column(Integer, nullable=True, index=True)\n    published_at = Column(DateTime, nullable=True, index=True)\n    license_id = Column(\n        UUID(as_uuid=True), ForeignKey(\"licenses.id\"), nullable=True, index=True\n    )\n    downloads = Column(Integer, nullable=True, index=True)\n    checksum = Column(String, nullable=True)\n    created_at = Column(\n        DateTime, nullable=False, default=func.now(), server_default=func.now()\n    )\n    updated_at = Column(\n        DateTime, nullable=False, default=func.now(), server_default=func.now()\n    )\n\n    package: Mapped[Package] = relationship()\n    license: Mapped[License] = relationship()\n\n    def to_dict(self):\n        return {\n            \"package_id\": self.package_id,\n            \"version\": self.version,\n            \"import_id\": self.import_id,\n            \"size\": self.size,\n            \"published_at\": self.published_at,\n            \"license_id\": self.license_id,\n            \"downloads\": self.downloads,\n            \"checksum\": self.checksum,\n        }\n\n\nclass License(Base):\n    __tablename__ = \"licenses\"\n    id = Column(\n        UUID(as_uuid=True),\n        primary_key=True,\n        default=func.uuid_generate_v4(),\n        server_default=func.uuid_generate_v4(),\n    )\n    name = Column(String, nullable=False, unique=True, index=True)\n    created_at = Column(\n        DateTime, nullable=False, default=func.now(), server_default=func.now()\n    )\n    updated_at = Column(\n        DateTime, nullable=False, default=func.now(), server_default=func.now()\n    )\n\n\nclass DependsOn(Base):\n    __tablename__ = \"dependencies\"\n    __table_args__ = (\n        UniqueConstraint(\n            \"version_id\",\n            \"dependency_id\",\n            \"dependency_type_id\",\n            name=\"uq_version_dependency_type\",\n        ),\n    )\n    id = Column(\n        UUID(as_uuid=True),\n        primary_key=True,\n        default=func.uuid_generate_v4(),\n        server_default=func.uuid_generate_v4(),\n    )\n    version_id = Column(\n        UUID(as_uuid=True), ForeignKey(\"versions.id\"), nullable=False, index=True\n    )\n    dependency_id = Column(\n        UUID(as_uuid=True), ForeignKey(\"packages.id\"), nullable=False, index=True\n    )\n    # ideally, these are non-nullable but diff package managers are picky about this\n    dependency_type_id = Column(\n        UUID(as_uuid=True), ForeignKey(\"depends_on_types.id\"), nullable=True, index=True\n    )\n    semver_range = Column(String, nullable=True)\n    created_at = Column(\n        DateTime, nullable=False, default=func.now(), server_default=func.now()\n    )\n    updated_at = Column(\n        DateTime, nullable=False, default=func.now(), server_default=func.now()\n    )\n\n    version: Mapped[Version] = relationship()\n    dependency: Mapped[Package] = relationship()\n    dependency_type: Mapped[DependsOnType] = relationship()\n\n    def to_dict(self):\n        return {\n            \"version_id\": self.version_id,\n            \"dependency_id\": self.dependency_id,\n            # \"dependency_type_id\": self.dependency_type_id,\n            \"semver_range\": self.semver_range,\n        }\n\n\nclass DependsOnType(Base):\n    __tablename__ = \"depends_on_types\"\n    id = Column(\n        UUID(as_uuid=True),\n        primary_key=True,\n        default=func.uuid_generate_v4(),\n        server_default=func.uuid_generate_v4(),\n    )\n    name = Column(String, nullable=False, unique=True, index=True)\n    created_at = Column(\n        DateTime, nullable=False, default=func.now(), server_default=func.now()\n    )\n    updated_at = Column(\n        DateTime, nullable=False, default=func.now(), server_default=func.now()\n    )\n\n\nclass LoadHistory(Base):\n    __tablename__ = \"load_history\"\n    id = Column(\n        UUID(as_uuid=True),\n        primary_key=True,\n        default=func.uuid_generate_v4(),\n        server_default=func.uuid_generate_v4(),\n    )\n    package_manager_id = Column(\n        UUID(as_uuid=True), ForeignKey(\"package_managers.id\"), nullable=False\n    )\n    created_at = Column(\n        DateTime, nullable=False, default=func.now(), server_default=func.now()\n    )\n    updated_at = Column(\n        DateTime, nullable=False, default=func.now(), server_default=func.now()\n    )\n\n\n# authoritative source of truth for all our sources\nclass Source(Base):\n    __tablename__ = \"sources\"\n    id = Column(\n        UUID(as_uuid=True),\n        primary_key=True,\n        default=func.uuid_generate_v4(),\n        server_default=func.uuid_generate_v4(),\n    )\n    type = Column(String, nullable=False, unique=True)\n    created_at = Column(\n        DateTime, nullable=False, default=func.now(), server_default=func.now()\n    )\n    updated_at = Column(\n        DateTime, nullable=False, default=func.now(), server_default=func.now()\n    )\n\n\n# this is a collection of all the different type of URLs\nclass URL(Base):\n    __tablename__ = \"urls\"\n    __table_args__ = (UniqueConstraint(\"url_type_id\", \"url\", name=\"uq_url_type_url\"),)\n    id = Column(\n        UUID(as_uuid=True),\n        primary_key=True,\n        default=func.uuid_generate_v4(),\n        server_default=func.uuid_generate_v4(),\n    )\n    url_trgm_idx = Index(\n        \"ix_urls_url_trgm\",\n        \"url\",\n        postgresql_using=\"gin\",\n        postgresql_ops={\"url\": \"gin_trgm_ops\"},\n    )\n    url = Column(String, nullable=False)\n    url_type_id = Column(\n        UUID(as_uuid=True), ForeignKey(\"url_types.id\"), nullable=False, index=True\n    )\n    created_at = Column(\n        DateTime, nullable=False, default=func.now(), server_default=func.now()\n    )\n    updated_at = Column(\n        DateTime, nullable=False, default=func.now(), server_default=func.now()\n    )\n\n    def to_dict(self):\n        return {\"url\": self.url, \"url_type_id\": self.url_type_id}\n\n\n# homepage, repository, documentation, etc.\nclass URLType(Base):\n    __tablename__ = \"url_types\"\n    id = Column(\n        UUID(as_uuid=True),\n        primary_key=True,\n        default=func.uuid_generate_v4(),\n        server_default=func.uuid_generate_v4(),\n    )\n    name = Column(String, nullable=False, unique=True)\n    created_at = Column(\n        DateTime, nullable=False, default=func.now(), server_default=func.now()\n    )\n    updated_at = Column(\n        DateTime, nullable=False, default=func.now(), server_default=func.now()\n    )\n\n\nclass User(Base):\n    __tablename__ = \"users\"\n    __table_args__ = (\n        UniqueConstraint(\"source_id\", \"import_id\", name=\"uq_source_import_id\"),\n    )\n    id = Column(\n        UUID(as_uuid=True),\n        primary_key=True,\n        default=func.uuid_generate_v4(),\n        server_default=func.uuid_generate_v4(),\n    )\n    username = Column(String, nullable=False, index=True)\n    source_id = Column(\n        UUID(as_uuid=True), ForeignKey(\"sources.id\"), nullable=False, index=True\n    )\n    import_id = Column(String, nullable=False, unique=False, index=True)\n    created_at = Column(\n        DateTime, nullable=False, default=func.now(), server_default=func.now()\n    )\n    updated_at = Column(\n        DateTime, nullable=False, default=func.now(), server_default=func.now()\n    )\n\n    def to_dict(self):\n        return {\n            \"username\": self.username,\n            \"source_id\": self.source_id,\n            \"import_id\": self.import_id,\n        }\n\n\nclass UserVersion(Base):\n    __tablename__ = \"user_versions\"\n    __table_args__ = (\n        UniqueConstraint(\"user_id\", \"version_id\", name=\"uq_user_version\"),\n    )\n    id = Column(\n        UUID(as_uuid=True),\n        primary_key=True,\n        default=func.uuid_generate_v4(),\n        server_default=func.uuid_generate_v4(),\n    )\n    user_id = Column(\n        UUID(as_uuid=True), ForeignKey(\"users.id\"), nullable=False, index=True\n    )\n    version_id = Column(\n        UUID(as_uuid=True), ForeignKey(\"versions.id\"), nullable=False, index=True\n    )\n    created_at = Column(\n        DateTime, nullable=False, default=func.now(), server_default=func.now()\n    )\n    updated_at = Column(\n        DateTime, nullable=False, default=func.now(), server_default=func.now()\n    )\n\n    def to_dict(self):\n        return {\n            \"user_id\": self.user_id,\n            \"version_id\": self.version_id,\n        }\n\n\nclass UserPackage(Base):\n    __tablename__ = \"user_packages\"\n    __table_args__ = (\n        UniqueConstraint(\"user_id\", \"package_id\", name=\"uq_user_package\"),\n    )\n    id = Column(\n        UUID(as_uuid=True),\n        primary_key=True,\n        default=func.uuid_generate_v4(),\n        server_default=func.uuid_generate_v4(),\n    )\n    user_id = Column(\n        UUID(as_uuid=True), ForeignKey(\"users.id\"), nullable=False, index=True\n    )\n    package_id = Column(\n        UUID(as_uuid=True), ForeignKey(\"packages.id\"), nullable=False, index=True\n    )\n    created_at = Column(\n        DateTime, nullable=False, default=func.now(), server_default=func.now()\n    )\n    updated_at = Column(\n        DateTime, nullable=False, default=func.now(), server_default=func.now()\n    )\n\n    def to_dict(self):\n        return {\n            \"user_id\": self.user_id,\n            \"package_id\": self.package_id,\n        }\n\n\nclass PackageURL(Base):\n    __tablename__ = \"package_urls\"\n    __table_args__ = (UniqueConstraint(\"package_id\", \"url_id\", name=\"uq_package_url\"),)\n    id = Column(\n        UUID(as_uuid=True),\n        primary_key=True,\n        default=func.uuid_generate_v4(),\n        server_default=func.uuid_generate_v4(),\n    )\n    package_id = Column(\n        UUID(as_uuid=True), ForeignKey(\"packages.id\"), nullable=False, index=True\n    )\n    url_id = Column(\n        UUID(as_uuid=True), ForeignKey(\"urls.id\"), nullable=False, index=True\n    )\n    created_at = Column(\n        DateTime, nullable=False, default=func.now(), server_default=func.now()\n    )\n    updated_at = Column(\n        DateTime, nullable=False, default=func.now(), server_default=func.now()\n    )\n\n    # TODO: deprecated\n    def to_dict(self):\n        return {\n            \"package_id\": self.package_id,\n            \"url_id\": self.url_id,\n        }\n\n\nclass LegacyDependency(Base):\n    __tablename__ = \"legacy_dependencies\"\n    __table_args__ = (\n        UniqueConstraint(\"package_id\", \"dependency_id\", name=\"uq_package_dependency\"),\n    )\n    id = Column(Integer, primary_key=True)\n    package_id = Column(\n        UUID(as_uuid=True), ForeignKey(\"packages.id\"), nullable=False, index=True\n    )\n    dependency_id = Column(\n        UUID(as_uuid=True), ForeignKey(\"packages.id\"), nullable=False, index=True\n    )\n    dependency_type_id = Column(\n        UUID(as_uuid=True),\n        ForeignKey(\"depends_on_types.id\"),\n        nullable=False,\n        index=True,\n    )\n    semver_range = Column(String, nullable=True)\n    created_at = Column(\n        DateTime, nullable=False, default=func.now(), server_default=func.now()\n    )\n    updated_at = Column(\n        DateTime, nullable=False, default=func.now(), server_default=func.now()\n    )\n\n\nclass Canon(Base):\n    __tablename__ = \"canons\"\n    id = Column(\n        UUID(as_uuid=True),\n        primary_key=True,\n        default=func.uuid_generate_v4(),\n        server_default=func.uuid_generate_v4(),\n    )\n    url_id = Column(\n        UUID(as_uuid=True),\n        ForeignKey(\"urls.id\"),\n        nullable=False,\n        index=True,\n        unique=True,\n    )\n    name_trgm_idx = Index(\n        \"ix_canons_name_trgm\",\n        \"name\",\n        postgresql_using=\"gin\",\n        postgresql_ops={\"name\": \"gin_trgm_ops\"},\n    )\n    name = Column(String, nullable=False)\n    created_at = Column(\n        DateTime, nullable=False, default=func.now(), server_default=func.now()\n    )\n    updated_at = Column(\n        DateTime, nullable=False, default=func.now(), server_default=func.now()\n    )\n\n    url: Mapped[URL] = relationship()\n\n\nclass CanonPackage(Base):\n    __tablename__ = \"canon_packages\"\n    id = Column(UUID(as_uuid=True), primary_key=True)\n    canon_id = Column(\n        UUID(as_uuid=True), ForeignKey(\"canons.id\"), nullable=False, index=True\n    )\n    package_id = Column(\n        UUID(as_uuid=True),\n        ForeignKey(\"packages.id\"),\n        nullable=False,\n        index=True,\n        unique=True,\n    )\n    created_at = Column(\n        DateTime, nullable=False, default=func.now(), server_default=func.now()\n    )\n    updated_at = Column(\n        DateTime, nullable=False, default=func.now(), server_default=func.now()\n    )\n\n\nclass TeaRankRun(Base):\n    __tablename__ = \"tea_rank_runs\"\n    id = Column(\n        UUID(as_uuid=True),\n        primary_key=True,\n        default=func.uuid_generate_v4(),\n        server_default=func.uuid_generate_v4(),\n    )\n    run = Column(Integer, nullable=False)\n    split_ratio = Column(String, nullable=False)\n    created_at = Column(\n        DateTime, nullable=False, default=func.now(), server_default=func.now()\n    )\n\n\nclass TeaRank(Base):\n    __tablename__ = \"tea_ranks\"\n    id = Column(\n        UUID(as_uuid=True),\n        primary_key=True,\n        default=func.uuid_generate_v4(),\n        server_default=func.uuid_generate_v4(),\n    )\n    tea_rank_run = Column(Integer, nullable=False, index=True)\n    canon_id = Column(\n        UUID(as_uuid=True), ForeignKey(\"canons.id\"), nullable=False, index=True\n    )\n    rank = Column(String, nullable=False)\n    created_at = Column(\n        DateTime, nullable=False, default=func.now(), server_default=func.now()\n    )\n"
  },
  {
    "path": "core/requirements.txt",
    "content": "# This file was autogenerated by uv via the following command:\n#    uv pip compile --group indexers -o core/requirements.txt\nalembic==1.13.2\n    # via chai (pyproject.toml:indexers)\ncertifi==2025.4.26\n    # via\n    #   chai (pyproject.toml:indexers)\n    #   requests\ncharset-normalizer==3.4.2\n    # via\n    #   chai (pyproject.toml:indexers)\n    #   requests\nfilelock==3.18.0\n    # via tldextract\ngitdb==4.0.12\n    # via gitpython\ngitpython==3.1.44\n    # via chai (pyproject.toml:indexers)\nidna==3.10\n    # via\n    #   chai (pyproject.toml:indexers)\n    #   requests\n    #   tldextract\nmako==1.3.10\n    # via alembic\nmarkupsafe==3.0.2\n    # via mako\npermalint==0.1.15\n    # via chai (pyproject.toml:indexers)\npsycopg2-binary==2.9.10\n    # via chai (pyproject.toml:indexers)\npyyaml==6.0.2\n    # via chai (pyproject.toml:indexers)\nrequests==2.32.4\n    # via\n    #   chai (pyproject.toml:indexers)\n    #   requests-file\n    #   tldextract\nrequests-file==2.1.0\n    # via tldextract\nruff==0.11.13\n    # via permalint\nschedule==1.2.2\n    # via chai (pyproject.toml:indexers)\nsmmap==5.0.2\n    # via gitdb\nsqlalchemy==2.0.41\n    # via\n    #   chai (pyproject.toml:indexers)\n    #   alembic\ntldextract==5.3.0\n    # via permalint\ntyping-extensions==4.14.0\n    # via\n    #   alembic\n    #   sqlalchemy\nurllib3==2.4.0\n    # via\n    #   chai (pyproject.toml:indexers)\n    #   requests\n"
  },
  {
    "path": "core/scheduler.py",
    "content": "import time\nfrom collections.abc import Callable\nfrom os import getenv\nfrom threading import Thread\n\nimport schedule\n\nfrom core.logger import Logger\n\nFREQUENCY = int(getenv(\"FREQUENCY\", 24))\n\n\nclass Scheduler:\n    def __init__(self, name: str, frequency: int = FREQUENCY):\n        self.name = name\n        self.frequency = frequency\n        self.logger = Logger(f\"{name}_scheduler\")\n        self.job = None\n        self.is_running = False\n\n    def start(self, task: Callable, *args):\n        self.job = schedule.every(self.frequency).hours.do(task, *args)\n        self.is_running = True\n        self.logger.log(f\"scheduled {self.name} to run every {self.frequency} hours\")\n\n        def run_schedule():\n            while self.is_running:\n                schedule.run_pending()\n                time.sleep(1)\n\n        Thread(target=run_schedule, daemon=True).start()\n\n    def stop(self):\n        if self.job:\n            schedule.cancel_job(self.job)\n        self.is_running = False\n        self.logger.log(f\"stopped {self.name} scheduler\")\n\n    def run_now(self, task: Callable, *args):\n        self.logger.log(f\"running {self.name} now\")\n        task(*args)\n"
  },
  {
    "path": "core/structs.py",
    "content": "from dataclasses import dataclass\nfrom datetime import datetime\nfrom uuid import UUID\n\nfrom core.models import URL, LegacyDependency, Package, PackageURL\n\n\n@dataclass\nclass CurrentGraph:\n    package_map: dict[str, Package]\n    dependencies: dict[UUID, set[LegacyDependency]]\n\n\n@dataclass(frozen=True)\nclass URLKey:\n    url: str\n    url_type_id: UUID\n\n\n@dataclass\nclass CurrentURLs:\n    url_map: dict[URLKey, URL]  # URL and URL Type ID to URL object\n    package_urls: dict[UUID, set[PackageURL]]  # Package ID to PackageURL rows\n\n\n@dataclass\nclass Cache:\n    package_map: dict[str, Package]\n    url_map: dict[URLKey, URL]\n    package_urls: dict[UUID, set[PackageURL]]\n    dependencies: dict[UUID, set[LegacyDependency]]\n\n\n@dataclass\nclass DiffResult:\n    new_packages: list[Package]\n    new_urls: dict[URLKey, URL]\n    new_package_urls: list[PackageURL]\n    updated_packages: list[dict[str, UUID | str | datetime]]\n    updated_package_urls: list[dict[str, UUID | datetime]]\n    new_deps: list[LegacyDependency]\n    removed_deps: list[LegacyDependency]\n"
  },
  {
    "path": "core/test.json",
    "content": "[\n    {\n        'id': UUID('b3133e5e-6d6b-458b-bd83-bf31032875a4'), \n        'package_id': UUID('7d6c7a3f-2c75-425f-8674-12efd7ce1ca4'), \n        'url_id': UUID('736acfdc-c3c2-4b53-ae6e-102fdd4f375a'), \n        'created_at': datetime.datetime(2025, 5, 19, 17, 5, 10, 255947), \n        'updated_at': datetime.datetime(2025, 5, 19, 17, 5, 10, 255947)\n    }, {'id': UUID('a274bb96-a443-46a7-86ed-71c6ee87a89b'), 'package_id': UUID('506f5abc-f385-4cbf-9fb1-cd34053397f4'), 'url_id': UUID('d0346cef-80b0-456c-8de3-eb1b95481bac'), 'created_at': datetime.datetime(2025, 5, 19, 17, 5, 10, 255947), 'updated_at': datetime.datetime(2025, 5, 19, 17, 5, 10, 255947)}, {'id': UUID('17fe8d3a-78d8-42f5-a9f6-7b7abaa37d53'), 'package_id': UUID('a08b41eb-723d-4800-929d-cb6c6d3aeca4'), 'url_id': UUID('334ec74b-dda3-4bb0-99c5-f39abc132f5a'), 'created_at': datetime.datetime(2025, 5, 19, 17, 5, 10, 255947), 'updated_at': datetime.datetime(2025, 5, 19, 17, 5, 10, 255947)}, {'id': UUID('5dd47edf-bc5d-43b5-9acd-d099ae9a22f0'), 'package_id': UUID('624c333c-e303-4d9b-a66e-c499bb3b4806'), 'url_id': UUID('6d866142-e2a9-4da0-96de-b5bfadc7cee9'), 'created_at': datetime.datetime(2025, 5, 19, 17, 5, 10, 255947), 'updated_at': datetime.datetime(2025, 5, 19, 17, 5, 10, 255947)}, {'id': UUID('c924c668-c6cb-4b6b-bac2-b588377a695d'), 'package_id': UUID('2d182e7a-1960-4376-8272-5ce401c369fd'), 'url_id': UUID('359015d5-8807-4cdc-b1c8-a4771b0069fe'), 'created_at': datetime.datetime(2025, 5, 19, 17, 5, 10, 255947), 'updated_at': datetime.datetime(2025, 5, 19, 17, 5, 10, 255947)}]"
  },
  {
    "path": "core/transformer.py",
    "content": "import csv\nimport os\n\nfrom permalint import normalize_url, possible_names\nfrom sqlalchemy import UUID\n\nfrom core.db import DB\nfrom core.logger import Logger\n\n# this is a temporary fix, but sometimes the raw files have weird characters\n# and lots of data within certain fields\n# this fix allows us to read the files with no hassles\ncsv.field_size_limit(10000000)\n\n\n# the transformer class knows what files to open, and provide a generic wrapper\n# for the data within the files\n# each package manager will have its own transformer, that knows what data needs to be\n# extracted for our data model\nclass Transformer:\n    def __init__(self, name: str):\n        self.name = name\n        self.input = f\"data/{name}/latest\"\n        self.logger = Logger(f\"{name}_transformer\")\n        self.files: dict[str, str] = {\n            \"projects\": \"\",\n            \"versions\": \"\",\n            \"dependencies\": \"\",\n            \"users\": \"\",\n            \"urls\": \"\",\n        }\n        self.url_types: dict[str, UUID] = {}\n\n    def finder(self, file_name: str) -> str:\n        input_dir = os.path.realpath(self.input)\n\n        for root, _, files in os.walk(input_dir):\n            if file_name in files:\n                return os.path.join(root, file_name)\n        else:\n            self.logger.error(f\"{file_name} not found in {input_dir}\")\n            raise FileNotFoundError(f\"Missing {file_name} file\")\n\n    def open(self, file_name: str) -> str:\n        file_path = self.finder(file_name)\n        with open(file_path) as file:\n            return file.read()\n\n    def canonicalize(self, url: str) -> str:\n        return normalize_url(url)\n\n    def guess(self, db_client: DB, url: str, package_managers: list[UUID]) -> list[str]:\n        names = possible_names(url)\n        urls = db_client.search_names(names, package_managers)\n        return urls\n"
  },
  {
    "path": "core/utils.py",
    "content": "from os import getenv\nfrom os.path import exists, join\nfrom typing import Any\n\n\ndef safe_int(val: str) -> int | None:\n    if val == \"\":\n        return None\n    return int(val)\n\n\n# TODO: needs explanation or simplification\ndef build_query_params(\n    items: list[dict[str, str]], cache: dict, attr: str\n) -> list[str]:\n    params = set()\n    for item in items:\n        if item[attr] not in cache:\n            params.add(item[attr])\n    return list(params)\n\n\n# env vars could be true or 1, or anything else -- here's a centralized location to\n# handle that\ndef env_vars(env_var: str, default: str) -> bool:\n    var = getenv(env_var, default).lower()\n    return var == \"true\" or var == \"1\"\n\n\n# convert keys to snake case\ndef convert_keys_to_snake_case(data: dict[str, Any]) -> dict[str, Any]:\n    \"\"\"Recursively converts dictionary keys from hyphen-case to snake_case.\"\"\"\n    if isinstance(data, dict):\n        new_dict = {}\n        for key, value in data.items():\n            new_key = key.replace(\"-\", \"_\")\n            new_dict[new_key] = convert_keys_to_snake_case(value)  # handle nested\n        return new_dict\n    elif isinstance(data, list):\n        return [convert_keys_to_snake_case(item) for item in data]\n    else:\n        return data\n\n\ndef is_github_url(url: str) -> bool:\n    \"\"\"Assumes the url has been canonicalized by permalint\"\"\"\n    return url.startswith(\"github.com/\")\n\n\ndef file_exists(*args) -> str:\n    \"\"\"Confirms if a file exists\"\"\"\n    file_path = join(*args)\n    if not exists(file_path):\n        raise FileNotFoundError(f\"{file_path} not found\")\n    return file_path\n"
  },
  {
    "path": "db/README.md",
    "content": "# CHAI Data Model\n\nThe CHAI data model is designed to represent the package manager data in a unified and\nconsistent form. The model's goal is _standardization_ - of the various complexities,\nand idiosyncrasies of each individual package manager. We want to provide a standard way\nfor analysis, querying, and whatever your use case might be.\n\n## Definitions\n\nWe use certain nomenclature throughout the codebase:\n\n- `derived_id`: A unique identifier combining the package manager and package name. Like\n  `crates/serde`, or `homebrew/a2ps`, or `npm/lodash`.\n- `import_id`: The original identifier from the source system. Like the `crate_id`\n  integers provided by crates, or the package name provided by Homebrew\n\n# Core Entities\n\n## Packages\n\nThe Package model is a fundamental unit in our system. Each package is uniquely\nidentified and associated with a specific package manager.\n\nKey fields:\n\n- `derived_id`\n- `name`\n- `package_manager_id`: Reference to the associated package manager.\n- `import_id`: The original identifier from the source system.\n- `readme`: Optional field for package documentation.\n\n### Versions\n\nEach version is a different release of a package, and **must** be associated with a\npackage.\n\nKey fields:\n\n- `package_id`: Reference to the associated package.\n- `version`: The version string.\n- `import_id`: The original identifier from the source system.\n- `size`, `published_at`, `license_id`, `downloads`, `checksum`: Optional metadata\n  fields.\n\n### Users\n\nThe User model represents individuals or entities associated with packages. This is not\nnecessarily always available, but if it is, it's interesting data.\n\nKey fields:\n\n- `username`: The user's name or identifier.\n- `source_id`: Reference to the data source (e.g., GitHub, npm user, crates user, etc).\n- `import_id`: The original identifier from the source system.\n\n### URLs\n\nThe URL model is populated with all the URLs that are provided by the package manager\nsource data - this includes documentation, repository, source, issues, and other url\ntypes as well. Each URL is associated with a URL type. The relationships between a URL\nand a Package are captured in the PackageURL model.\n\nKey fields:\n\n- `url`: The URL.\n- `url_type_id`: Reference to the type of URL. (e.g., homepage, repository, etc)\n\n## Type Models\n\nThese models define categorizations and types used across the system. All these values\nare loaded from the alembic service, specifically in the\n[load-values.sql](../alembic/load-values.sql) script.\n\n### URLType\n\nRepresents different types of URLs associated with packages.\n\nPredefined types (from load-values.sql):\n\n- `source`\n- `homepage`\n- `documentation`\n- `repository`\n\n### DependsOnType\n\nCategorizes different types of dependencies between packages.\nPredefined types (from load-values.sql):\n\n- `build`\n- `development`\n- `runtime`\n- `test`\n- `optional`\n- `recommended`\n- `uses_from_macos` (Homebrew only)\n\n### Source\n\nRepresents the authoritative sources of package data.\n\n- `crates`\n- `homebrew`\n\nThe below are not yet supported:\n\n- `npm`\n- `pypi`\n- `rubygems`\n- `github`\n\n## Relationship Models\n\nThese models establish connections between core entities.\n\n### DependsOn\n\nIn our data model, a specific release depends on a specific package. We include a field\n`semver_range`, which would represent the range of dependency releases compatible with\nthat specific release.\n\n> [!NOTE]\n> Not all package managers provide semantic versions. Homebrew does not, for example.\n> This is why `semver_range` is optional.\n>\n> On the other hand, the dependency type is non-optional, and the combination of\n> `version_id`, `dependency_id`, and `dependency_type_id` must be unique.\n\nKey fields:\n\n- `version_id`: The version that has the dependency.\n- `dependency_id`: The package that is depended upon.\n- `dependency_type_id`: The type of dependency.\n- `semver_range`: The version range for the dependency (optional).\n\n### UserVersion and UserPackage\n\nThese models associate users with specific versions and packages, respectively.\n\n### PackageURL\n\nAssociates packages with their various URLs.\n\n## Caveats\n\n### `Source` and `PackageManager` Relationship\n\nWe've chosen to separate `Source` and `PackageManager` into distinct entities:\n\n- `Source`: Represents data sources that can provide information about packages, users,\n  or both.\n- `PackageManager`: Specifically represents sources that are package managers.\n\nFor example, 'crates' functions both as a package manager and as a source of user data.\nBy keeping these concepts separate, we can accurately represent such systems, and have\none point where we can modify any information about 'crates'.\n\n## Additional Models\n\n### License\n\nRepresents software licenses associated with package versions. Great place to start\ncontributions!\n\n### LoadHistory\n\nTracks the history of data loads for each package manager, useful for auditing and\nincremental updates.\n"
  },
  {
    "path": "db/queries.md",
    "content": "# Chai Data Exploration\n\n```sql\n-- Packages with the longest lifetime\nSELECT p.name,\nSUM(v.downloads) AS \"downloads\",\ncount(v.package_id) AS versions,\nmin(v.published_at) AS \"first published\",\nmax(v.published_at) AS \"last published\",\nmax(v.published_at) - min(v.published_at) AS lifetime\nFROM packages AS p\nJOIN versions v ON v.package_id = p.id\nGROUP BY p.name\nORDER BY lifetime DESC limit 100;\n\n-- Packages sorted by dependents\nSELECT p.name, count(d.id) AS dependents\nFROM packages AS p\nJOIN dependencies AS d ON d.dependency_id = p.id\nGROUP BY p.name\nORDER BY count(d.id) DESC LIMIT 100;\n\n-- Packages sorted by dependents with lifetime\nSELECT p.name,\ncount(d.id) AS dependents,\nmin(v.published_at) AS \"first published\",\nmax(v.published_at) AS \"last published\",\nmax(v.published_at) - min(v.published_at) AS lifetime\nFROM packages AS p\nJOIN dependencies AS d ON d.dependency_id = p.id\nJOIN versions v ON v.package_id = p.id\nGROUP BY p.name\nORDER BY count(d.id) DESC LIMIT 100;\n\n-- Packages sorted by dependents with downloads\nSELECT p.name,\ncount(d.id) AS dependents,\nsum(v.downloads) AS downloads\nFROM packages AS p\nJOIN dependencies AS d ON d.dependency_id = p.id\nJOIN versions v ON v.package_id = p.id\nGROUP BY p.name\nORDER BY count(d.id) DESC LIMIT 100;\n\n-- Packages with most dependents sorted by download/dependent ratio\nSELECT name, dependents, downloads, (downloads / dependents) AS ratio FROM\n(SELECT p.name,\ncount(d.id) AS dependents,\nsum(v.downloads) AS downloads\nFROM packages AS p\nJOIN dependencies AS d ON d.dependency_id = p.id\nJOIN versions v ON v.package_id = p.id\nGROUP BY p.name\nORDER BY count(d.id) DESC LIMIT 1000)\nORDER BY ratio DESC;\n```\n"
  },
  {
    "path": "docker-compose.yml",
    "content": "services:\n  db:\n    image: postgres\n    shm_size: 256m\n    environment:\n      - POSTGRES_USER=postgres\n      - POSTGRES_PASSWORD=s3cr3t\n    ports:\n      - \"5435:5432\"\n    volumes:\n      - ./data/db/data:/var/lib/postgresql\n    healthcheck:\n      test: [\"CMD-SHELL\", \"pg_isready -U postgres\"]\n      interval: 5s\n      timeout: 5s\n      retries: 5\n\n  alembic:\n    build:\n      context: .\n      dockerfile: ./alembic/Dockerfile\n    environment:\n      - CHAI_DATABASE_URL=${CHAI_DATABASE_URL:-postgresql://postgres:s3cr3t@db:5432/chai}\n      - CHAI_DATABASE_ADMIN_URL=${CHAI_DATABASE_ADMIN_URL:-postgresql://postgres:s3cr3t@db:5432/postgres}\n      - PGPASSWORD=${PGPASSWORD:-s3cr3t}\n    depends_on:\n      db:\n        condition: service_healthy\n    working_dir: /alembic\n    entrypoint: [\"./run_migrations.sh\"]\n\n  crates:\n    build:\n      context: .\n      dockerfile: ./package_managers/crates/Dockerfile\n    environment:\n      - CHAI_DATABASE_URL=${CHAI_DATABASE_URL:-postgresql://postgres:s3cr3t@db:5432/chai}\n      - NO_CACHE=${NO_CACHE:-true}\n      - PYTHONPATH=/\n      - DEBUG=${DEBUG:-false}\n      - TEST=${TEST:-false}\n      - FETCH=${FETCH:-true}\n      - FREQUENCY=${FREQUENCY:-24}\n      - ENABLE_SCHEDULER=${ENABLE_SCHEDULER:-true}\n    volumes:\n      - ./data/crates:/data/crates\n    depends_on:\n      db:\n        condition: service_healthy\n      alembic:\n        condition: service_completed_successfully\n\n  homebrew:\n    build:\n      context: .\n      dockerfile: ./package_managers/homebrew/Dockerfile\n    environment:\n      - CHAI_DATABASE_URL=${CHAI_DATABASE_URL:-postgresql://postgres:s3cr3t@db:5432/chai}\n      - NO_CACHE=${NO_CACHE:-false}\n      - DEBUG=${DEBUG:-false}\n      - TEST=${TEST:-false}\n      - FETCH=${FETCH:-true}\n      - FREQUENCY=${FREQUENCY:-1}\n      - ENABLE_SCHEDULER=${ENABLE_SCHEDULER:-true}\n      - PYTHONPATH=/\n    volumes:\n      - ./data/homebrew:/data/homebrew\n    depends_on:\n      db:\n        condition: service_healthy\n      alembic:\n        condition: service_completed_successfully\n\n  api:\n    build:\n      context: ./api\n      dockerfile: Dockerfile\n    environment:\n      - DATABASE_URL=postgresql://postgres:s3cr3t@db:5432/chai\n      - HOST=0.0.0.0\n      - PORT=8080\n    ports:\n      - \"8080:8080\"\n    depends_on:\n      db:\n        condition: service_healthy\n      alembic:\n        condition: service_completed_successfully\n    restart: unless-stopped\n    healthcheck:\n      test: [\"CMD\", \"curl\", \"-f\", \"http://localhost:8080/heartbeat\"]\n      interval: 30s\n      timeout: 10s\n      retries: 3\n      start_period: 5s\n\n  debian:\n    build:\n      context: .\n      dockerfile: ./package_managers/debian/Dockerfile\n    environment:\n      - CHAI_DATABASE_URL=${CHAI_DATABASE_URL:-postgresql://postgres:s3cr3t@db:5432/chai}\n      - NO_CACHE=${NO_CACHE:-false}\n      - PYTHONPATH=/\n      - DEBUG=${DEBUG:-false}\n      - TEST=${TEST:-false}\n      - FETCH=${FETCH:-true}\n      - FREQUENCY=${FREQUENCY:-24}\n      - ENABLE_SCHEDULER=${ENABLE_SCHEDULER:-true}\n    volumes:\n      - ./data/debian:/data/debian\n    depends_on:\n      db:\n        condition: service_healthy\n      alembic:\n        condition: service_completed_successfully\n\n  pkgx:\n    build:\n      context: .\n      dockerfile: ./package_managers/pkgx/Dockerfile\n    environment:\n      - CHAI_DATABASE_URL=${CHAI_DATABASE_URL:-postgresql://postgres:s3cr3t@db:5432/chai}\n      - NO_CACHE=${NO_CACHE:-false}\n      - PYTHONPATH=/\n      - DEBUG=${DEBUG:-false}\n      - TEST=${TEST:-false}\n      - FETCH=${FETCH:-true}\n      - FREQUENCY=${FREQUENCY:-24}\n      - ENABLE_SCHEDULER=${ENABLE_SCHEDULER:-true}\n    volumes:\n      - ./data/pkgx:/data/pkgx\n    depends_on:\n      db:\n        condition: service_healthy\n      alembic:\n        condition: service_completed_successfully\n\n  ranker:\n    build:\n      context: .\n      dockerfile: ./ranker/Dockerfile\n    environment:\n      - CHAI_DATABASE_URL=${CHAI_DATABASE_URL:-postgresql://postgres:s3cr3t@db:5432/chai}\n      - PYTHONPATH=/\n      - LOAD=${LOAD:-false}\n      - DEBUG=${DEBUG:-false}\n    depends_on:\n      db:\n        condition: service_healthy\n      alembic:\n        condition: service_completed_successfully\n    deploy:\n      resources:\n        limits:\n          memory: 8G\n"
  },
  {
    "path": "examples/sbom-meta/README.md",
    "content": "# SBOM-Meta\n\nAn example Chai application that displays package metadata for\n[SBOMs](https://github.com/anchore/syft) (software bill of materials).\n\n## Installation\n\n1. Start the [Chai DB](https://github.com/teaxyz/chai-oss) with `docker compose up`.\n2. Run `go install` or `go build` to generate a binary.\n\n## Usage\n\nRun `sbom-meta` in the root directory of any repository to get a list of\ndependencies with metadata.\n\n```bash\ngit clone git@github.com:starship/starship.git\ncd starship\nsbom-meta\n```\n\nYou can sort any of the fields, ascending or descending:\n\n```bash\nsbom-meta --sort downloads,desc\nsbom-meta --sort published,asc\n```\n\nUse the `--json` flag to output JSON:\n\n```bash\nsbom-meta --json | jq .[1].name\n```\n"
  },
  {
    "path": "examples/sbom-meta/go.mod",
    "content": "module sbom-meta\n\ngo 1.23.2\n\nrequire (\n\tgithub.com/anchore/syft v1.14.0\n\tgithub.com/caarlos0/env v3.5.0+incompatible\n\tgithub.com/dustin/go-humanize v1.0.1\n\tgithub.com/fatih/color v1.17.1-0.20241003070628-1c8d8706604e\n\tgithub.com/jedib0t/go-pretty/v6 v6.6.0\n\tgithub.com/jmoiron/sqlx v1.4.0\n\tgithub.com/lib/pq v1.10.9\n)\n\nrequire (\n\tdario.cat/mergo v1.0.1 // indirect\n\tgithub.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 // indirect\n\tgithub.com/AdamKorcz/go-118-fuzz-build v0.0.0-20230306123547-8075edf89bb0 // indirect\n\tgithub.com/BurntSushi/toml v1.4.0 // indirect\n\tgithub.com/CycloneDX/cyclonedx-go v0.9.1 // indirect\n\tgithub.com/DataDog/zstd v1.5.5 // indirect\n\tgithub.com/Masterminds/goutils v1.1.1 // indirect\n\tgithub.com/Masterminds/semver v1.5.0 // indirect\n\tgithub.com/Masterminds/semver/v3 v3.3.0 // indirect\n\tgithub.com/Masterminds/sprig/v3 v3.3.0 // indirect\n\tgithub.com/Microsoft/go-winio v0.6.2 // indirect\n\tgithub.com/Microsoft/hcsshim v0.11.7 // indirect\n\tgithub.com/ProtonMail/go-crypto v1.1.6 // indirect\n\tgithub.com/acobaugh/osrelease v0.1.0 // indirect\n\tgithub.com/adrg/xdg v0.5.0 // indirect\n\tgithub.com/anchore/clio v0.0.0-20240522144804-d81e109008aa // indirect\n\tgithub.com/anchore/fangs v0.0.0-20240903175602-e716ef12c23d // indirect\n\tgithub.com/anchore/go-collections v0.0.0-20240216171411-9321230ce537 // indirect\n\tgithub.com/anchore/go-logger v0.0.0-20230725134548-c21dafa1ec5a // indirect\n\tgithub.com/anchore/go-macholibre v0.0.0-20220308212642-53e6d0aaf6fb // indirect\n\tgithub.com/anchore/go-struct-converter v0.0.0-20221118182256-c68fdcfa2092 // indirect\n\tgithub.com/anchore/go-version v1.2.2-0.20200701162849-18adb9c92b9b // indirect\n\tgithub.com/anchore/packageurl-go v0.1.1-0.20240507183024-848e011fc24f // indirect\n\tgithub.com/anchore/stereoscope v0.0.4-0.20241005180410-efa76446cc1c // indirect\n\tgithub.com/andybalholm/brotli v1.0.4 // indirect\n\tgithub.com/aquasecurity/go-pep440-version v0.0.0-20210121094942-22b2f8951d46 // indirect\n\tgithub.com/aquasecurity/go-version v0.0.0-20210121072130-637058cfe492 // indirect\n\tgithub.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect\n\tgithub.com/becheran/wildmatch-go v1.0.0 // indirect\n\tgithub.com/bmatcuk/doublestar/v4 v4.6.1 // indirect\n\tgithub.com/charmbracelet/lipgloss v0.13.0 // indirect\n\tgithub.com/charmbracelet/x/ansi v0.2.3 // indirect\n\tgithub.com/cloudflare/circl v1.6.1 // indirect\n\tgithub.com/containerd/cgroups v1.1.0 // indirect\n\tgithub.com/containerd/containerd v1.7.28 // indirect\n\tgithub.com/containerd/containerd/api v1.8.0 // indirect\n\tgithub.com/containerd/continuity v0.4.4 // indirect\n\tgithub.com/containerd/errdefs v0.3.0 // indirect\n\tgithub.com/containerd/fifo v1.1.0 // indirect\n\tgithub.com/containerd/log v0.1.0 // indirect\n\tgithub.com/containerd/platforms v0.2.1 // indirect\n\tgithub.com/containerd/stargz-snapshotter/estargz v0.14.3 // indirect\n\tgithub.com/containerd/ttrpc v1.2.7 // indirect\n\tgithub.com/containerd/typeurl/v2 v2.1.1 // indirect\n\tgithub.com/cyphar/filepath-securejoin v0.4.1 // indirect\n\tgithub.com/deitch/magic v0.0.0-20230404182410-1ff89d7342da // indirect\n\tgithub.com/distribution/reference v0.6.0 // indirect\n\tgithub.com/docker/cli v27.1.1+incompatible // indirect\n\tgithub.com/docker/distribution v2.8.3+incompatible // indirect\n\tgithub.com/docker/docker v27.3.1+incompatible // indirect\n\tgithub.com/docker/docker-credential-helpers v0.7.0 // indirect\n\tgithub.com/docker/go-connections v0.4.0 // indirect\n\tgithub.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c // indirect\n\tgithub.com/docker/go-units v0.5.0 // indirect\n\tgithub.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect\n\tgithub.com/edsrzf/mmap-go v1.1.0 // indirect\n\tgithub.com/elliotchance/phpserialize v1.4.0 // indirect\n\tgithub.com/emirpasic/gods v1.18.1 // indirect\n\tgithub.com/facebookincubator/nvdtools v0.1.5 // indirect\n\tgithub.com/felixge/fgprof v0.9.3 // indirect\n\tgithub.com/felixge/httpsnoop v1.0.4 // indirect\n\tgithub.com/fsnotify/fsnotify v1.7.0 // indirect\n\tgithub.com/gabriel-vasile/mimetype v1.4.6 // indirect\n\tgithub.com/github/go-spdx/v2 v2.3.2 // indirect\n\tgithub.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 // indirect\n\tgithub.com/go-git/go-billy/v5 v5.6.2 // indirect\n\tgithub.com/go-git/go-git/v5 v5.16.2 // indirect\n\tgithub.com/go-logr/logr v1.4.2 // indirect\n\tgithub.com/go-logr/stdr v1.2.2 // indirect\n\tgithub.com/go-restruct/restruct v1.2.0-alpha // indirect\n\tgithub.com/gogo/protobuf v1.3.2 // indirect\n\tgithub.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect\n\tgithub.com/golang/protobuf v1.5.4 // indirect\n\tgithub.com/golang/snappy v0.0.4 // indirect\n\tgithub.com/google/go-cmp v0.7.0 // indirect\n\tgithub.com/google/go-containerregistry v0.20.2 // indirect\n\tgithub.com/google/licensecheck v0.3.1 // indirect\n\tgithub.com/google/pprof v0.0.0-20240409012703-83162a5b38cd // indirect\n\tgithub.com/google/uuid v1.6.0 // indirect\n\tgithub.com/gookit/color v1.5.4 // indirect\n\tgithub.com/hashicorp/errwrap v1.1.0 // indirect\n\tgithub.com/hashicorp/go-multierror v1.1.1 // indirect\n\tgithub.com/hashicorp/hcl v1.0.0 // indirect\n\tgithub.com/huandu/xstrings v1.5.0 // indirect\n\tgithub.com/iancoleman/strcase v0.3.0 // indirect\n\tgithub.com/inconshreveable/mousetrap v1.1.0 // indirect\n\tgithub.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect\n\tgithub.com/jinzhu/copier v0.4.0 // indirect\n\tgithub.com/kastenhq/goversion v0.0.0-20230811215019-93b2f8823953 // indirect\n\tgithub.com/kevinburke/ssh_config v1.2.0 // indirect\n\tgithub.com/klauspost/compress v1.17.8 // indirect\n\tgithub.com/klauspost/pgzip v1.2.5 // indirect\n\tgithub.com/knqyf263/go-rpmdb v0.1.1 // indirect\n\tgithub.com/lucasb-eyer/go-colorful v1.2.0 // indirect\n\tgithub.com/magiconair/properties v1.8.7 // indirect\n\tgithub.com/mattn/go-colorable v0.1.13 // indirect\n\tgithub.com/mattn/go-isatty v0.0.20 // indirect\n\tgithub.com/mattn/go-runewidth v0.0.16 // indirect\n\tgithub.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d // indirect\n\tgithub.com/mholt/archiver/v3 v3.5.1 // indirect\n\tgithub.com/microsoft/go-rustaudit v0.0.0-20220730194248-4b17361d90a5 // indirect\n\tgithub.com/mitchellh/copystructure v1.2.0 // indirect\n\tgithub.com/mitchellh/go-homedir v1.1.0 // indirect\n\tgithub.com/mitchellh/hashstructure/v2 v2.0.2 // indirect\n\tgithub.com/mitchellh/mapstructure v1.5.0 // indirect\n\tgithub.com/mitchellh/reflectwalk v1.0.2 // indirect\n\tgithub.com/moby/docker-image-spec v1.3.1 // indirect\n\tgithub.com/moby/locker v1.0.1 // indirect\n\tgithub.com/moby/sys/mountinfo v0.7.2 // indirect\n\tgithub.com/moby/sys/sequential v0.5.0 // indirect\n\tgithub.com/moby/sys/signal v0.7.0 // indirect\n\tgithub.com/moby/sys/user v0.3.0 // indirect\n\tgithub.com/moby/sys/userns v0.1.0 // indirect\n\tgithub.com/muesli/termenv v0.15.2 // indirect\n\tgithub.com/nwaples/rardecode v1.1.0 // indirect\n\tgithub.com/olekukonko/tablewriter v0.0.5 // indirect\n\tgithub.com/opencontainers/go-digest v1.0.0 // indirect\n\tgithub.com/opencontainers/image-spec v1.1.0 // indirect\n\tgithub.com/opencontainers/runtime-spec v1.1.0 // indirect\n\tgithub.com/opencontainers/selinux v1.11.0 // indirect\n\tgithub.com/pborman/indent v1.2.1 // indirect\n\tgithub.com/pelletier/go-toml v1.9.5 // indirect\n\tgithub.com/pelletier/go-toml/v2 v2.2.2 // indirect\n\tgithub.com/pierrec/lz4/v4 v4.1.19 // indirect\n\tgithub.com/pjbgf/sha1cd v0.3.2 // indirect\n\tgithub.com/pkg/errors v0.9.1 // indirect\n\tgithub.com/pkg/profile v1.7.0 // indirect\n\tgithub.com/rivo/uniseg v0.4.7 // indirect\n\tgithub.com/saferwall/pe v1.5.4 // indirect\n\tgithub.com/sagikazarmark/locafero v0.4.0 // indirect\n\tgithub.com/sagikazarmark/slog-shim v0.1.0 // indirect\n\tgithub.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d // indirect\n\tgithub.com/sassoftware/go-rpmutils v0.4.0 // indirect\n\tgithub.com/scylladb/go-set v1.0.3-0.20200225121959-cc7b2070d91e // indirect\n\tgithub.com/secDre4mer/pkcs7 v0.0.0-20240322103146-665324a4461d // indirect\n\tgithub.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 // indirect\n\tgithub.com/shopspring/decimal v1.4.0 // indirect\n\tgithub.com/sirupsen/logrus v1.9.3 // indirect\n\tgithub.com/skeema/knownhosts v1.3.1 // indirect\n\tgithub.com/sourcegraph/conc v0.3.0 // indirect\n\tgithub.com/spdx/tools-golang v0.5.5 // indirect\n\tgithub.com/spf13/afero v1.11.0 // indirect\n\tgithub.com/spf13/cast v1.7.0 // indirect\n\tgithub.com/spf13/cobra v1.8.1 // indirect\n\tgithub.com/spf13/pflag v1.0.5 // indirect\n\tgithub.com/spf13/viper v1.19.0 // indirect\n\tgithub.com/subosito/gotenv v1.6.0 // indirect\n\tgithub.com/sylabs/sif/v2 v2.17.1 // indirect\n\tgithub.com/sylabs/squashfs v1.0.0 // indirect\n\tgithub.com/therootcompany/xz v1.0.1 // indirect\n\tgithub.com/ulikunitz/xz v0.5.12 // indirect\n\tgithub.com/vbatts/go-mtree v0.5.4 // indirect\n\tgithub.com/vbatts/tar-split v0.11.3 // indirect\n\tgithub.com/vifraa/gopom v1.0.0 // indirect\n\tgithub.com/wagoodman/go-partybus v0.0.0-20230516145632-8ccac152c651 // indirect\n\tgithub.com/wagoodman/go-progress v0.0.0-20230925121702-07e42b3cdba0 // indirect\n\tgithub.com/xanzy/ssh-agent v0.3.3 // indirect\n\tgithub.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect\n\tgithub.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect\n\tgo.opencensus.io v0.24.0 // indirect\n\tgo.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 // indirect\n\tgo.opentelemetry.io/otel v1.24.0 // indirect\n\tgo.opentelemetry.io/otel/metric v1.24.0 // indirect\n\tgo.opentelemetry.io/otel/trace v1.24.0 // indirect\n\tgo.uber.org/atomic v1.9.0 // indirect\n\tgo.uber.org/multierr v1.9.0 // indirect\n\tgolang.org/x/crypto v0.40.0 // indirect\n\tgolang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect\n\tgolang.org/x/mod v0.26.0 // indirect\n\tgolang.org/x/net v0.42.0 // indirect\n\tgolang.org/x/sync v0.16.0 // indirect\n\tgolang.org/x/sys v0.34.0 // indirect\n\tgolang.org/x/term v0.33.0 // indirect\n\tgolang.org/x/text v0.27.0 // indirect\n\tgolang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect\n\tgoogle.golang.org/genproto v0.0.0-20240213162025-012b6fc9bca9 // indirect\n\tgoogle.golang.org/genproto/googleapis/rpc v0.0.0-20240401170217-c3f982113cda // indirect\n\tgoogle.golang.org/grpc v1.62.1 // indirect\n\tgoogle.golang.org/protobuf v1.35.2 // indirect\n\tgopkg.in/ini.v1 v1.67.0 // indirect\n\tgopkg.in/warnings.v0 v0.1.2 // indirect\n\tgopkg.in/yaml.v3 v3.0.1 // indirect\n)\n"
  },
  {
    "path": "examples/sbom-meta/go.sum",
    "content": "cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=\ncloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=\ncloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU=\ncloud.google.com/go v0.44.1/go.mod h1:iSa0KzasP4Uvy3f1mN/7PiObzGgflwredwwASm/v6AU=\ncloud.google.com/go v0.44.2/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY=\ncloud.google.com/go v0.45.1/go.mod h1:RpBamKRgapWJb87xiFSdk4g1CME7QZg3uwTez+TSTjc=\ncloud.google.com/go v0.46.3/go.mod h1:a6bKKbmY7er1mI7TEI4lsAkts/mkhTSZK8w33B4RAg0=\ncloud.google.com/go v0.50.0/go.mod h1:r9sluTvynVuxRIOHXQEHMFffphuXHOMZMycpNR5e6To=\ncloud.google.com/go v0.52.0/go.mod h1:pXajvRH/6o3+F9jDHZWQ5PbGhn+o8w9qiu/CffaVdO4=\ncloud.google.com/go v0.53.0/go.mod h1:fp/UouUEsRkN6ryDKNW/Upv/JBKnv6WDthjR6+vze6M=\ncloud.google.com/go v0.54.0/go.mod h1:1rq2OEkV3YMf6n/9ZvGWI3GWw0VoqH/1x2nd8Is/bPc=\ncloud.google.com/go v0.56.0/go.mod h1:jr7tqZxxKOVYizybht9+26Z/gUq7tiRzu+ACVAMbKVk=\ncloud.google.com/go v0.57.0/go.mod h1:oXiQ6Rzq3RAkkY7N6t3TcE6jE+CIBBbA36lwQ1JyzZs=\ncloud.google.com/go v0.62.0/go.mod h1:jmCYTdRCQuc1PHIIJ/maLInMho30T/Y0M4hTdTShOYc=\ncloud.google.com/go v0.65.0/go.mod h1:O5N8zS7uWy9vkA9vayVHs65eM1ubvY4h553ofrNHObY=\ncloud.google.com/go v0.72.0/go.mod h1:M+5Vjvlc2wnp6tjzE102Dw08nGShTscUx2nZMufOKPI=\ncloud.google.com/go v0.74.0/go.mod h1:VV1xSbzvo+9QJOxLDaJfTjx5e+MePCpCWwvftOeQmWk=\ncloud.google.com/go v0.78.0/go.mod h1:QjdrLG0uq+YwhjoVOLsS1t7TW8fs36kLs4XO5R5ECHg=\ncloud.google.com/go v0.79.0/go.mod h1:3bzgcEeQlzbuEAYu4mrWhKqWjmpprinYgKJLgKHnbb8=\ncloud.google.com/go v0.81.0/go.mod h1:mk/AM35KwGk/Nm2YSeZbxXdrNK3KZOYHmLkOqC2V6E0=\ncloud.google.com/go v0.83.0/go.mod h1:Z7MJUsANfY0pYPdw0lbnivPx4/vhy/e2FEkSkF7vAVY=\ncloud.google.com/go v0.84.0/go.mod h1:RazrYuxIK6Kb7YrzzhPoLmCVzl7Sup4NrbKPg8KHSUM=\ncloud.google.com/go v0.87.0/go.mod h1:TpDYlFy7vuLzZMMZ+B6iRiELaY7z/gJPaqbMx6mlWcY=\ncloud.google.com/go v0.90.0/go.mod h1:kRX0mNRHe0e2rC6oNakvwQqzyDmg57xJ+SZU1eT2aDQ=\ncloud.google.com/go v0.93.3/go.mod h1:8utlLll2EF5XMAV15woO4lSbWQlk8rer9aLOfLh7+YI=\ncloud.google.com/go v0.94.1/go.mod h1:qAlAugsXlC+JWO+Bke5vCtc9ONxjQT3drlTTnAplMW4=\ncloud.google.com/go v0.97.0/go.mod h1:GF7l59pYBVlXQIBLx3a761cZ41F9bBH3JUlihCt2Udc=\ncloud.google.com/go v0.98.0/go.mod h1:ua6Ush4NALrHk5QXDWnjvZHN93OuF0HfuEPq9I1X0cM=\ncloud.google.com/go v0.99.0/go.mod h1:w0Xx2nLzqWJPuozYQX+hFfCSI8WioryfRDzkoI/Y2ZA=\ncloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o=\ncloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE=\ncloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc=\ncloud.google.com/go/bigquery v1.5.0/go.mod h1:snEHRnqQbz117VIFhE8bmtwIDY80NLUZUMb4Nv6dBIg=\ncloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4gLoIoXIAPc=\ncloud.google.com/go/bigquery v1.8.0/go.mod h1:J5hqkt3O0uAFnINi6JXValWIb1v0goeZM77hZzJN/fQ=\ncloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE=\ncloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk=\ncloud.google.com/go/firestore v1.6.1/go.mod h1:asNXNOzBdyVQmEU+ggO8UPodTkEVFW5Qx+rwHnAz+EY=\ncloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I=\ncloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw=\ncloud.google.com/go/pubsub v1.2.0/go.mod h1:jhfEVHT8odbXTkndysNHCcx0awwzvfOlguIAii9o8iA=\ncloud.google.com/go/pubsub v1.3.1/go.mod h1:i+ucay31+CNRpDW4Lu78I4xXG+O1r/MAHgjpRVR+TSU=\ncloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw=\ncloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0ZeosJ0Rtdos=\ncloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk=\ncloud.google.com/go/storage v1.8.0/go.mod h1:Wv1Oy7z6Yz3DshWRJFhqM/UCfaWIRTdp0RXyy7KQOVs=\ncloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9ullr3+Kg0=\ndario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s=\ndario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk=\ndmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=\nfilippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA=\nfilippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4=\ngithub.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 h1:bvDV9vkmnHYOMsOr4WLk+Vo07yKIzd94sVoIqshQ4bU=\ngithub.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24/go.mod h1:8o94RPi1/7XTJvwPpRSzSUedZrtlirdB3r9Z20bi2f8=\ngithub.com/AdamKorcz/go-118-fuzz-build v0.0.0-20230306123547-8075edf89bb0 h1:59MxjQVfjXsBpLy+dbd2/ELV5ofnUkUZBvWSC85sheA=\ngithub.com/AdamKorcz/go-118-fuzz-build v0.0.0-20230306123547-8075edf89bb0/go.mod h1:OahwfttHWG6eJ0clwcfBAHoDI6X/LV/15hx/wlMZSrU=\ngithub.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0=\ngithub.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=\ngithub.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=\ngithub.com/BurntSushi/toml v0.4.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=\ngithub.com/BurntSushi/toml v1.2.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=\ngithub.com/BurntSushi/toml v1.4.0 h1:kuoIxZQy2WRRk1pttg9asf+WVv6tWQuBNVmK8+nqPr0=\ngithub.com/BurntSushi/toml v1.4.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho=\ngithub.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=\ngithub.com/CycloneDX/cyclonedx-go v0.9.1 h1:yffaWOZsv77oTJa/SdVZYdgAgFioCeycBUKkqS2qzQM=\ngithub.com/CycloneDX/cyclonedx-go v0.9.1/go.mod h1:NE/EWvzELOFlG6+ljX/QeMlVt9VKcTwu8u0ccsACEsw=\ngithub.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ=\ngithub.com/DataDog/zstd v1.5.5 h1:oWf5W7GtOLgp6bciQYDmhHHjdhYkALu6S/5Ni9ZgSvQ=\ngithub.com/DataDog/zstd v1.5.5/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwSAmyw=\ngithub.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=\ngithub.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU=\ngithub.com/Masterminds/semver v1.5.0 h1:H65muMkzWKEuNDnfl9d70GUjFniHKHRbFPGBuZ3QEww=\ngithub.com/Masterminds/semver v1.5.0/go.mod h1:MB6lktGJrhw8PrUyiEoblNEGEQ+RzHPF078ddwwvV3Y=\ngithub.com/Masterminds/semver/v3 v3.3.0 h1:B8LGeaivUe71a5qox1ICM/JLl0NqZSW5CHyL+hmvYS0=\ngithub.com/Masterminds/semver/v3 v3.3.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=\ngithub.com/Masterminds/sprig/v3 v3.3.0 h1:mQh0Yrg1XPo6vjYXgtf5OtijNAKJRNcTdOOGZe3tPhs=\ngithub.com/Masterminds/sprig/v3 v3.3.0/go.mod h1:Zy1iXRYNqNLUolqCpL4uhk6SHUMAOSCzdgBfDb35Lz0=\ngithub.com/Microsoft/go-winio v0.5.2/go.mod h1:WpS1mjBmmwHBEWmogvA2mj8546UReBk4v8QkMxJ6pZY=\ngithub.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=\ngithub.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=\ngithub.com/Microsoft/hcsshim v0.11.7 h1:vl/nj3Bar/CvJSYo7gIQPyRWc9f3c6IeSNavBTSZNZQ=\ngithub.com/Microsoft/hcsshim v0.11.7/go.mod h1:MV8xMfmECjl5HdO7U/3/hFVnkmSBjAjmA09d4bExKcU=\ngithub.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=\ngithub.com/OneOfOne/xxhash v1.2.8 h1:31czK/TI9sNkxIKfaUfGlU47BAxQ0ztGgd9vPyqimf8=\ngithub.com/OneOfOne/xxhash v1.2.8/go.mod h1:eZbhyaAYD41SGSSsnmcpxVoRiQ/MPUTjUdIIOT9Um7Q=\ngithub.com/ProtonMail/go-crypto v1.1.6 h1:ZcV+Ropw6Qn0AX9brlQLAUXfqLBc7Bl+f/DmNxpLfdw=\ngithub.com/ProtonMail/go-crypto v1.1.6/go.mod h1:rA3QumHc/FZ8pAHreoekgiAbzpNsfQAosU5td4SnOrE=\ngithub.com/acobaugh/osrelease v0.1.0 h1:Yb59HQDGGNhCj4suHaFQQfBps5wyoKLSSX/J/+UifRE=\ngithub.com/acobaugh/osrelease v0.1.0/go.mod h1:4bFEs0MtgHNHBrmHCt67gNisnabCRAlzdVasCEGHTWY=\ngithub.com/adrg/xdg v0.5.0 h1:dDaZvhMXatArP1NPHhnfaQUqWBLBsmx1h1HXQdMoFCY=\ngithub.com/adrg/xdg v0.5.0/go.mod h1:dDdY4M4DF9Rjy4kHPeNL+ilVF+p2lK8IdM9/rTSGcI4=\ngithub.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=\ngithub.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=\ngithub.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=\ngithub.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=\ngithub.com/anchore/clio v0.0.0-20240522144804-d81e109008aa h1:pwlAn4O9SBUnlgfa69YcqIynbUyobLVFYu8HxSoCffA=\ngithub.com/anchore/clio v0.0.0-20240522144804-d81e109008aa/go.mod h1:nD3H5uIvjxlfmakOBgtyFQbk5Zjp3l538kxfpHPslzI=\ngithub.com/anchore/fangs v0.0.0-20240903175602-e716ef12c23d h1:ZD4wdCBgJJzJybjTUIEiiupLF7B9H3WLuBTjspBO2Mc=\ngithub.com/anchore/fangs v0.0.0-20240903175602-e716ef12c23d/go.mod h1:Xh4ObY3fmoMzOEVXwDtS1uK44JC7+nRD0n29/1KYFYg=\ngithub.com/anchore/go-collections v0.0.0-20240216171411-9321230ce537 h1:GjNGuwK5jWjJMyVppBjYS54eOiiSNv4Ba869k4wh72Q=\ngithub.com/anchore/go-collections v0.0.0-20240216171411-9321230ce537/go.mod h1:1aiktV46ATCkuVg0O573ZrH56BUawTECPETbZyBcqT8=\ngithub.com/anchore/go-logger v0.0.0-20230725134548-c21dafa1ec5a h1:nJ2G8zWKASyVClGVgG7sfM5mwoZlZ2zYpIzN2OhjWkw=\ngithub.com/anchore/go-logger v0.0.0-20230725134548-c21dafa1ec5a/go.mod h1:ubLFmlsv8/DFUQrZwY5syT5/8Er3ugSr4rDFwHsE3hg=\ngithub.com/anchore/go-macholibre v0.0.0-20220308212642-53e6d0aaf6fb h1:iDMnx6LIjtjZ46C0akqveX83WFzhpTD3eqOthawb5vU=\ngithub.com/anchore/go-macholibre v0.0.0-20220308212642-53e6d0aaf6fb/go.mod h1:DmTY2Mfcv38hsHbG78xMiTDdxFtkHpgYNVDPsF2TgHk=\ngithub.com/anchore/go-struct-converter v0.0.0-20221118182256-c68fdcfa2092 h1:aM1rlcoLz8y5B2r4tTLMiVTrMtpfY0O8EScKJxaSaEc=\ngithub.com/anchore/go-struct-converter v0.0.0-20221118182256-c68fdcfa2092/go.mod h1:rYqSE9HbjzpHTI74vwPvae4ZVYZd1lue2ta6xHPdblA=\ngithub.com/anchore/go-testutils v0.0.0-20200925183923-d5f45b0d3c04 h1:VzprUTpc0vW0nnNKJfJieyH/TZ9UYAnTZs5/gHTdAe8=\ngithub.com/anchore/go-testutils v0.0.0-20200925183923-d5f45b0d3c04/go.mod h1:6dK64g27Qi1qGQZ67gFmBFvEHScy0/C8qhQhNe5B5pQ=\ngithub.com/anchore/go-version v1.2.2-0.20200701162849-18adb9c92b9b h1:e1bmaoJfZVsCYMrIZBpFxwV26CbsuoEh5muXD5I1Ods=\ngithub.com/anchore/go-version v1.2.2-0.20200701162849-18adb9c92b9b/go.mod h1:Bkc+JYWjMCF8OyZ340IMSIi2Ebf3uwByOk6ho4wne1E=\ngithub.com/anchore/packageurl-go v0.1.1-0.20240507183024-848e011fc24f h1:B/E9ixKNCasntpoch61NDaQyGPDXLEJlL+B9B/PbdbA=\ngithub.com/anchore/packageurl-go v0.1.1-0.20240507183024-848e011fc24f/go.mod h1:Blo6OgJNiYF41ufcgHKkbCKF2MDOMlrqhXv/ij6ocR4=\ngithub.com/anchore/stereoscope v0.0.4-0.20241005180410-efa76446cc1c h1:JXezMk8fF5ns4AgRGW49SGfoRgDjJHsDmcpNw272jkU=\ngithub.com/anchore/stereoscope v0.0.4-0.20241005180410-efa76446cc1c/go.mod h1:GMupz2FoBhy5RTTmawU06c2pZxgVTceahLWiwJef2uI=\ngithub.com/anchore/syft v1.14.0 h1:BeMmc3a9d/63O+nPM8QfV1Olh3r+pYf95JOqbfN4gQg=\ngithub.com/anchore/syft v1.14.0/go.mod h1:8bN2W/Tr4Mmm42h2XB9LPiPOps+NzCFIaQOKLBGb2b8=\ngithub.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883/go.mod h1:rCTlJbsFo29Kk6CurOXKm700vrz8f0KW0JNfpkRJY/8=\ngithub.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y=\ngithub.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY=\ngithub.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=\ngithub.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be h1:9AeTilPcZAjCFIImctFaOjnTIavg87rW78vTPkQqLI8=\ngithub.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be/go.mod h1:ySMOLuWl6zY27l47sB3qLNK6tF2fkHG55UZxx8oIVo4=\ngithub.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=\ngithub.com/aquasecurity/go-pep440-version v0.0.0-20210121094942-22b2f8951d46 h1:vmXNl+HDfqqXgr0uY1UgK1GAhps8nbAAtqHNBcgyf+4=\ngithub.com/aquasecurity/go-pep440-version v0.0.0-20210121094942-22b2f8951d46/go.mod h1:olhPNdiiAAMiSujemd1O/sc6GcyePr23f/6uGKtthNg=\ngithub.com/aquasecurity/go-version v0.0.0-20210121072130-637058cfe492 h1:rcEG5HI490FF0a7zuvxOxen52ddygCfNVjP0XOCMl+M=\ngithub.com/aquasecurity/go-version v0.0.0-20210121072130-637058cfe492/go.mod h1:9Beu8XsUNNfzml7WBf3QmyPToP1wm1Gj/Vc5UJKqTzU=\ngithub.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o=\ngithub.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY=\ngithub.com/armon/go-metrics v0.3.10/go.mod h1:4O98XIr/9W0sxpJ8UaYkvjk10Iff7SnFrb4QAOwNTFc=\ngithub.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=\ngithub.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=\ngithub.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=\ngithub.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=\ngithub.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4=\ngithub.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI=\ngithub.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=\ngithub.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8=\ngithub.com/becheran/wildmatch-go v1.0.0 h1:mE3dGGkTmpKtT4Z+88t8RStG40yN9T+kFEGj2PZFSzA=\ngithub.com/becheran/wildmatch-go v1.0.0/go.mod h1:gbMvj0NtVdJ15Mg/mH9uxk2R1QCistMyU7d9KFzroX4=\ngithub.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=\ngithub.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=\ngithub.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=\ngithub.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=\ngithub.com/bmatcuk/doublestar/v4 v4.6.1 h1:FH9SifrbvJhnlQpztAx++wlkk70QBf0iBWDwNy7PA4I=\ngithub.com/bmatcuk/doublestar/v4 v4.6.1/go.mod h1:xBQ8jztBU6kakFMg+8WGxn0c6z1fTSPVIjEY1Wr7jzc=\ngithub.com/bradleyjkemp/cupaloy/v2 v2.8.0 h1:any4BmKE+jGIaMpnU8YgH/I2LPiLBufr6oMMlVBbn9M=\ngithub.com/bradleyjkemp/cupaloy/v2 v2.8.0/go.mod h1:bm7JXdkRd4BHJk9HpwqAI8BoAY1lps46Enkdqw6aRX0=\ngithub.com/caarlos0/env v3.5.0+incompatible h1:Yy0UN8o9Wtr/jGHZDpCBLpNrzcFLLM2yixi/rBrKyJs=\ngithub.com/caarlos0/env v3.5.0+incompatible/go.mod h1:tdCsowwCzMLdkqRYDlHpZCp2UooDD3MspDBjZ2AD02Y=\ngithub.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM=\ngithub.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=\ngithub.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=\ngithub.com/census-instrumentation/opencensus-proto v0.3.0/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=\ngithub.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc=\ngithub.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=\ngithub.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=\ngithub.com/charmbracelet/bubbles v0.20.0 h1:jSZu6qD8cRQ6k9OMfR1WlM+ruM8fkPWkHvQWD9LIutE=\ngithub.com/charmbracelet/bubbles v0.20.0/go.mod h1:39slydyswPy+uVOHZ5x/GjwVAFkCsV8IIVy+4MhzwwU=\ngithub.com/charmbracelet/bubbletea v1.1.1 h1:KJ2/DnmpfqFtDNVTvYZ6zpPFL9iRCRr0qqKOCvppbPY=\ngithub.com/charmbracelet/bubbletea v1.1.1/go.mod h1:9Ogk0HrdbHolIKHdjfFpyXJmiCzGwy+FesYkZr7hYU4=\ngithub.com/charmbracelet/lipgloss v0.13.0 h1:4X3PPeoWEDCMvzDvGmTajSyYPcZM4+y8sCA/SsA3cjw=\ngithub.com/charmbracelet/lipgloss v0.13.0/go.mod h1:nw4zy0SBX/F/eAO1cWdcvy6qnkDUxr8Lw7dvFrAIbbY=\ngithub.com/charmbracelet/x/ansi v0.2.3 h1:VfFN0NUpcjBRd4DnKfRaIRo53KRgey/nhOoEqosGDEY=\ngithub.com/charmbracelet/x/ansi v0.2.3/go.mod h1:dk73KoMTT5AX5BsX0KrqhsTqAnhZZoCBjs7dGWp4Ktw=\ngithub.com/charmbracelet/x/term v0.2.0 h1:cNB9Ot9q8I711MyZ7myUR5HFWL/lc3OpU8jZ4hwm0x0=\ngithub.com/charmbracelet/x/term v0.2.0/go.mod h1:GVxgxAbjUrmpvIINHIQnJJKpMlHiZ4cktEQCN6GWyF0=\ngithub.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=\ngithub.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=\ngithub.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=\ngithub.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag=\ngithub.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I=\ngithub.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=\ngithub.com/cloudflare/circl v1.6.1 h1:zqIqSPIndyBh1bjLVVDHMPpVKqp8Su/V+6MeDzzQBQ0=\ngithub.com/cloudflare/circl v1.6.1/go.mod h1:uddAzsPgqdMAYatqJ0lsjX1oECcQLIlRpzZh3pJrofs=\ngithub.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=\ngithub.com/cncf/udpa/go v0.0.0-20200629203442-efcf912fb354/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk=\ngithub.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk=\ngithub.com/cncf/udpa/go v0.0.0-20210930031921-04548b0d99d4/go.mod h1:6pvJx4me5XPnfI9Z40ddWsdw2W/uZgQLFXToKeRcDiI=\ngithub.com/cncf/xds/go v0.0.0-20210312221358-fbca930ec8ed/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs=\ngithub.com/cncf/xds/go v0.0.0-20210805033703-aa0b78936158/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs=\ngithub.com/cncf/xds/go v0.0.0-20210922020428-25de7278fc84/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs=\ngithub.com/cncf/xds/go v0.0.0-20211001041855-01bcc9b48dfe/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs=\ngithub.com/cncf/xds/go v0.0.0-20211011173535-cb28da3451f1/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs=\ngithub.com/cncf/xds/go v0.0.0-20211130200136-a8f946100490/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs=\ngithub.com/containerd/cgroups v1.1.0 h1:v8rEWFl6EoqHB+swVNjVoCJE8o3jX7e8nqBGPLaDFBM=\ngithub.com/containerd/cgroups v1.1.0/go.mod h1:6ppBcbh/NOOUU+dMKrykgaBnK9lCIBxHqJDGwsa1mIw=\ngithub.com/containerd/containerd v1.7.28 h1:Nsgm1AtcmEh4AHAJ4gGlNSaKgXiNccU270Dnf81FQ3c=\ngithub.com/containerd/containerd v1.7.28/go.mod h1:azUkWcOvHrWvaiUjSQH0fjzuHIwSPg1WL5PshGP4Szs=\ngithub.com/containerd/containerd/api v1.8.0 h1:hVTNJKR8fMc/2Tiw60ZRijntNMd1U+JVMyTRdsD2bS0=\ngithub.com/containerd/containerd/api v1.8.0/go.mod h1:dFv4lt6S20wTu/hMcP4350RL87qPWLVa/OHOwmmdnYc=\ngithub.com/containerd/continuity v0.4.4 h1:/fNVfTJ7wIl/YPMHjf+5H32uFhl63JucB34PlCpMKII=\ngithub.com/containerd/continuity v0.4.4/go.mod h1:/lNJvtJKUQStBzpVQ1+rasXO1LAWtUQssk28EZvJ3nE=\ngithub.com/containerd/errdefs v0.3.0 h1:FSZgGOeK4yuT/+DnF07/Olde/q4KBoMsaamhXxIMDp4=\ngithub.com/containerd/errdefs v0.3.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M=\ngithub.com/containerd/fifo v1.1.0 h1:4I2mbh5stb1u6ycIABlBw9zgtlK8viPI9QkQNRQEEmY=\ngithub.com/containerd/fifo v1.1.0/go.mod h1:bmC4NWMbXlt2EZ0Hc7Fx7QzTFxgPID13eH0Qu+MAb2o=\ngithub.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I=\ngithub.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo=\ngithub.com/containerd/platforms v0.2.1 h1:zvwtM3rz2YHPQsF2CHYM8+KtB5dvhISiXh5ZpSBQv6A=\ngithub.com/containerd/platforms v0.2.1/go.mod h1:XHCb+2/hzowdiut9rkudds9bE5yJ7npe7dG/wG+uFPw=\ngithub.com/containerd/stargz-snapshotter/estargz v0.14.3 h1:OqlDCK3ZVUO6C3B/5FSkDwbkEETK84kQgEeFwDC+62k=\ngithub.com/containerd/stargz-snapshotter/estargz v0.14.3/go.mod h1:KY//uOCIkSuNAHhJogcZtrNHdKrA99/FCCRjE3HD36o=\ngithub.com/containerd/ttrpc v1.2.7 h1:qIrroQvuOL9HQ1X6KHe2ohc7p+HP/0VE6XPU7elJRqQ=\ngithub.com/containerd/ttrpc v1.2.7/go.mod h1:YCXHsb32f+Sq5/72xHubdiJRQY9inL4a4ZQrAbN1q9o=\ngithub.com/containerd/typeurl/v2 v2.1.1 h1:3Q4Pt7i8nYwy2KmQWIw2+1hTvwTE/6w9FqcttATPO/4=\ngithub.com/containerd/typeurl/v2 v2.1.1/go.mod h1:IDp2JFvbwZ31H8dQbEIY7sDl2L3o3HZj1hsSQlywkQ0=\ngithub.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=\ngithub.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=\ngithub.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=\ngithub.com/cpuguy83/go-md2man/v2 v2.0.1/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=\ngithub.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=\ngithub.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=\ngithub.com/cyphar/filepath-securejoin v0.4.1 h1:JyxxyPEaktOD+GAnqIqTf9A8tHyAG22rowi7HkoSU1s=\ngithub.com/cyphar/filepath-securejoin v0.4.1/go.mod h1:Sdj7gXlvMcPZsbhwhQ33GguGLDGQL7h7bg04C/+u9jI=\ngithub.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=\ngithub.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=\ngithub.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=\ngithub.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=\ngithub.com/deitch/magic v0.0.0-20230404182410-1ff89d7342da h1:ZOjWpVsFZ06eIhnh4mkaceTiVoktdU67+M7KDHJ268M=\ngithub.com/deitch/magic v0.0.0-20230404182410-1ff89d7342da/go.mod h1:B3tI9iGHi4imdLi4Asdha1Sc6feLMTfPLXh9IUYmysk=\ngithub.com/dgrijalva/jwt-go/v4 v4.0.0-preview1/go.mod h1:+hnT3ywWDTAFrW5aE+u2Sa/wT555ZqwoCS+pk3p6ry4=\ngithub.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk=\ngithub.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=\ngithub.com/docker/cli v27.1.1+incompatible h1:goaZxOqs4QKxznZjjBWKONQci/MywhtRv2oNn0GkeZE=\ngithub.com/docker/cli v27.1.1+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8=\ngithub.com/docker/distribution v2.8.3+incompatible h1:AtKxIZ36LoNK51+Z6RpzLpddBirtxJnzDrHLEKxTAYk=\ngithub.com/docker/distribution v2.8.3+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w=\ngithub.com/docker/docker v27.3.1+incompatible h1:KttF0XoteNTicmUtBO0L2tP+J7FGRFTjaEF4k6WdhfI=\ngithub.com/docker/docker v27.3.1+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=\ngithub.com/docker/docker-credential-helpers v0.7.0 h1:xtCHsjxogADNZcdv1pKUHXryefjlVRqWqIhk/uXJp0A=\ngithub.com/docker/docker-credential-helpers v0.7.0/go.mod h1:rETQfLdHNT3foU5kuNkFR1R1V12OJRRO5lzt2D1b5X0=\ngithub.com/docker/go-connections v0.4.0 h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKohAFqRJQ=\ngithub.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec=\ngithub.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c h1:+pKlWGMw7gf6bQ+oDZB4KHQFypsfjYlq/C4rfL7D3g8=\ngithub.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c/go.mod h1:Uw6UezgYA44ePAFQYUehOuCzmy5zmg/+nl2ZfMWGkpA=\ngithub.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=\ngithub.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=\ngithub.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 h1:iFaUwBSo5Svw6L7HYpRu/0lE3e0BaElwnNO1qkNQxBY=\ngithub.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s=\ngithub.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=\ngithub.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=\ngithub.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=\ngithub.com/edsrzf/mmap-go v1.1.0 h1:6EUwBLQ/Mcr1EYLE4Tn1VdW1A4ckqCQWZBw8Hr0kjpQ=\ngithub.com/edsrzf/mmap-go v1.1.0/go.mod h1:19H/e8pUPLicwkyNgOykDXkJ9F0MHE+Z52B8EIth78Q=\ngithub.com/elazarl/goproxy v1.7.2 h1:Y2o6urb7Eule09PjlhQRGNsqRfPmYI3KKQLFpCAV3+o=\ngithub.com/elazarl/goproxy v1.7.2/go.mod h1:82vkLNir0ALaW14Rc399OTTjyNREgmdL2cVoIbS6XaE=\ngithub.com/elliotchance/phpserialize v1.4.0 h1:cAp/9+KSnEbUC8oYCE32n2n84BeW8HOY3HMDI8hG2OY=\ngithub.com/elliotchance/phpserialize v1.4.0/go.mod h1:gt7XX9+ETUcLXbtTKEuyrqW3lcLUAeS/AnGZ2e49TZs=\ngithub.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc=\ngithub.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ=\ngithub.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=\ngithub.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=\ngithub.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=\ngithub.com/envoyproxy/go-control-plane v0.9.7/go.mod h1:cwu0lG7PUMfa9snN8LXBig5ynNVH9qI8YYLbd1fK2po=\ngithub.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk=\ngithub.com/envoyproxy/go-control-plane v0.9.9-0.20210217033140-668b12f5399d/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk=\ngithub.com/envoyproxy/go-control-plane v0.9.9-0.20210512163311-63b5d3c536b0/go.mod h1:hliV/p42l8fGbc6Y9bQ70uLwIvmJyVE5k4iMKlh8wCQ=\ngithub.com/envoyproxy/go-control-plane v0.9.10-0.20210907150352-cf90f659a021/go.mod h1:AFq3mo9L8Lqqiid3OhADV3RfLJnjiw63cSpi+fDTRC0=\ngithub.com/envoyproxy/go-control-plane v0.10.1/go.mod h1:AY7fTTXNdv/aJ2O5jwpxAPOWUZ7hQAEvzN5Pf27BkQQ=\ngithub.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=\ngithub.com/envoyproxy/protoc-gen-validate v0.6.2/go.mod h1:2t7qjJNvHPx8IjnBOzl9E9/baC+qXE/TeeyBRzgJDws=\ngithub.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4=\ngithub.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f/go.mod h1:vw97MGsxSvLiUE2X8qFplwetxpGLQrlU1Q9AUEIzCaM=\ngithub.com/facebookincubator/flog v0.0.0-20190930132826-d2511d0ce33c/go.mod h1:QGzNH9ujQ2ZUr/CjDGZGWeDAVStrWNjHeEcjJL96Nuk=\ngithub.com/facebookincubator/nvdtools v0.1.5 h1:jbmDT1nd6+k+rlvKhnkgMokrCAzHoASWE5LtHbX2qFQ=\ngithub.com/facebookincubator/nvdtools v0.1.5/go.mod h1:Kh55SAWnjckS96TBSrXI99KrEKH4iB0OJby3N8GRJO4=\ngithub.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=\ngithub.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU=\ngithub.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk=\ngithub.com/fatih/color v1.17.1-0.20241003070628-1c8d8706604e h1:43jO1Ogdyp9HrUaSFfg1v8fsKxciHMlmK7lAUCHa0SE=\ngithub.com/fatih/color v1.17.1-0.20241003070628-1c8d8706604e/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU=\ngithub.com/fatih/set v0.2.1 h1:nn2CaJyknWE/6txyUDGwysr3G5QC6xWB/PtVjPBbeaA=\ngithub.com/fatih/set v0.2.1/go.mod h1:+RKtMCH+favT2+3YecHGxcc0b4KyVWA1QWWJUs4E0CI=\ngithub.com/felixge/fgprof v0.9.3 h1:VvyZxILNuCiUCSXtPtYmmtGvb65nqXh2QFWc0Wpf2/g=\ngithub.com/felixge/fgprof v0.9.3/go.mod h1:RdbpDgzqYVh/T9fPELJyV7EYJuHB55UTEULNun8eiPw=\ngithub.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=\ngithub.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=\ngithub.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=\ngithub.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=\ngithub.com/fsnotify/fsnotify v1.5.1/go.mod h1:T3375wBYaZdLLcVNkcVbzGHY7f1l/uK5T5Ai1i3InKU=\ngithub.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=\ngithub.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=\ngithub.com/gabriel-vasile/mimetype v1.4.6 h1:3+PzJTKLkvgjeTbts6msPJt4DixhT4YtFNf1gtGe3zc=\ngithub.com/gabriel-vasile/mimetype v1.4.6/go.mod h1:JX1qVKqZd40hUPpAfiNTe0Sne7hdfKSbOqqmkq8GCXc=\ngithub.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=\ngithub.com/github/go-spdx/v2 v2.3.2 h1:IfdyNHTqzs4zAJjXdVQfRnxt1XMfycXoHBE2Vsm1bjs=\ngithub.com/github/go-spdx/v2 v2.3.2/go.mod h1:2ZxKsOhvBp+OYBDlsGnUMcchLeo2mrpEBn2L1C+U3IQ=\ngithub.com/glebarez/go-sqlite v1.20.3 h1:89BkqGOXR9oRmG58ZrzgoY/Fhy5x0M+/WV48U5zVrZ4=\ngithub.com/glebarez/go-sqlite v1.20.3/go.mod h1:u3N6D/wftiAzIOJtZl6BmedqxmmkDfH3q+ihjqxC9u0=\ngithub.com/gliderlabs/ssh v0.3.8 h1:a4YXD1V7xMF9g5nTkdfnja3Sxy1PVDCj1Zg4Wb8vY6c=\ngithub.com/gliderlabs/ssh v0.3.8/go.mod h1:xYoytBv1sV0aL3CavoDuJIQNURXkkfPA/wxQ1pL1fAU=\ngithub.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 h1:+zs/tPmkDkHx3U66DAb0lQFJrpS6731Oaa12ikc+DiI=\ngithub.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376/go.mod h1:an3vInlBmSxCcxctByoQdvwPiA7DTK7jaaFDBTtu0ic=\ngithub.com/go-git/go-billy/v5 v5.6.2 h1:6Q86EsPXMa7c3YZ3aLAQsMA0VlWmy43r6FHqa/UNbRM=\ngithub.com/go-git/go-billy/v5 v5.6.2/go.mod h1:rcFC2rAsp/erv7CMz9GczHcuD0D32fWzH+MJAU+jaUU=\ngithub.com/go-git/go-git-fixtures/v4 v4.3.2-0.20231010084843-55a94097c399 h1:eMje31YglSBqCdIqdhKBW8lokaMrL3uTkpGYlE2OOT4=\ngithub.com/go-git/go-git-fixtures/v4 v4.3.2-0.20231010084843-55a94097c399/go.mod h1:1OCfN199q1Jm3HZlxleg+Dw/mwps2Wbk9frAWm+4FII=\ngithub.com/go-git/go-git/v5 v5.16.2 h1:fT6ZIOjE5iEnkzKyxTHK1W4HGAsPhqEqiSAssSO77hM=\ngithub.com/go-git/go-git/v5 v5.16.2/go.mod h1:4Ge4alE/5gPs30F2H1esi2gPd69R0C39lolkucHBOp8=\ngithub.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=\ngithub.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=\ngithub.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=\ngithub.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=\ngithub.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=\ngithub.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=\ngithub.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=\ngithub.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=\ngithub.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=\ngithub.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=\ngithub.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=\ngithub.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=\ngithub.com/go-restruct/restruct v1.2.0-alpha h1:2Lp474S/9660+SJjpVxoKuWX09JsXHSrdV7Nv3/gkvc=\ngithub.com/go-restruct/restruct v1.2.0-alpha/go.mod h1:KqrpKpn4M8OLznErihXTGLlsXFGeLxHUrLRRI/1YjGk=\ngithub.com/go-sql-driver/mysql v1.6.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg=\ngithub.com/go-sql-driver/mysql v1.8.1 h1:LedoTUt/eveggdHS9qUFC1EFSa8bU2+1pZjSRpvNJ1Y=\ngithub.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg=\ngithub.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=\ngithub.com/go-test/deep v1.1.1 h1:0r/53hagsehfO4bzD2Pgr/+RgHqhmf+k1Bpse2cTu1U=\ngithub.com/go-test/deep v1.1.1/go.mod h1:5C2ZWiW0ErCdrYzpqxLbTX7MG14M9iiw8DgHncVwcsE=\ngithub.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=\ngithub.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=\ngithub.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=\ngithub.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=\ngithub.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=\ngithub.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=\ngithub.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=\ngithub.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=\ngithub.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=\ngithub.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 h1:f+oWsMOmNPc8JmEHVZIycC7hBoQxHH9pNKQORJNozsQ=\ngithub.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8/go.mod h1:wcDNUvekVysuuOpQKo3191zZyTpiI6se1N1ULghS0sw=\ngithub.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=\ngithub.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=\ngithub.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y=\ngithub.com/golang/mock v1.4.0/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=\ngithub.com/golang/mock v1.4.1/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=\ngithub.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=\ngithub.com/golang/mock v1.4.4/go.mod h1:l3mdAwkq5BuhzHwde/uurv3sEJeZMXNpwsxVWU71h+4=\ngithub.com/golang/mock v1.5.0/go.mod h1:CWnOUgYIOo4TcNZ0wHX3YZCqsaM1I1Jvs6v3mP3KVu8=\ngithub.com/golang/mock v1.6.0/go.mod h1:p6yTPP+5HYm5mzsMV8JkE6ZKdX+/wYM6Hr+LicevLPs=\ngithub.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=\ngithub.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=\ngithub.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=\ngithub.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=\ngithub.com/golang/protobuf v1.3.4/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=\ngithub.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk=\ngithub.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=\ngithub.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=\ngithub.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=\ngithub.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=\ngithub.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=\ngithub.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=\ngithub.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=\ngithub.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=\ngithub.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=\ngithub.com/golang/protobuf v1.5.1/go.mod h1:DopwsBzvsk0Fs44TXzsVbJyPhcCPeIwnvohx4u74HPM=\ngithub.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=\ngithub.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=\ngithub.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=\ngithub.com/golang/snappy v0.0.2/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=\ngithub.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=\ngithub.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=\ngithub.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=\ngithub.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=\ngithub.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=\ngithub.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=\ngithub.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=\ngithub.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=\ngithub.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=\ngithub.com/google/go-cmp v0.4.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=\ngithub.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=\ngithub.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=\ngithub.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=\ngithub.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=\ngithub.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=\ngithub.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=\ngithub.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=\ngithub.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=\ngithub.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=\ngithub.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=\ngithub.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=\ngithub.com/google/go-containerregistry v0.20.2 h1:B1wPJ1SN/S7pB+ZAimcciVD+r+yV/l/DSArMxlbwseo=\ngithub.com/google/go-containerregistry v0.20.2/go.mod h1:z38EKdKh4h7IP2gSfUUqEvalZBqs6AoLeWfUy34nQC8=\ngithub.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=\ngithub.com/google/licensecheck v0.3.1 h1:QoxgoDkaeC4nFrtGN1jV7IPmDCHFNIVh54e5hSt6sPs=\ngithub.com/google/licensecheck v0.3.1/go.mod h1:ORkR35t/JjW+emNKtfJDII0zlciG9JgbT7SmsohlHmY=\ngithub.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=\ngithub.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0=\ngithub.com/google/martian/v3 v3.1.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0=\ngithub.com/google/martian/v3 v3.2.1/go.mod h1:oBOf6HBosgwRXnUGWUB05QECsc6uvmMiJ3+6W4l/CUk=\ngithub.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=\ngithub.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=\ngithub.com/google/pprof v0.0.0-20191218002539-d4f498aebedc/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=\ngithub.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=\ngithub.com/google/pprof v0.0.0-20200229191704-1ebb73c60ed3/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=\ngithub.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=\ngithub.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=\ngithub.com/google/pprof v0.0.0-20201023163331-3e6fc7fc9c4c/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=\ngithub.com/google/pprof v0.0.0-20201203190320-1bf35d6f28c2/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=\ngithub.com/google/pprof v0.0.0-20210122040257-d980be63207e/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=\ngithub.com/google/pprof v0.0.0-20210226084205-cbba55b83ad5/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=\ngithub.com/google/pprof v0.0.0-20210601050228-01bbb1931b22/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=\ngithub.com/google/pprof v0.0.0-20210609004039-a478d1d731e9/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=\ngithub.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=\ngithub.com/google/pprof v0.0.0-20211214055906-6f57359322fd/go.mod h1:KgnwoLYCZ8IQu3XUZ8Nc/bM9CCZFOyjUNOSygVozoDg=\ngithub.com/google/pprof v0.0.0-20240409012703-83162a5b38cd h1:gbpYu9NMq8jhDVbvlGkMFWCjLFlqqEZjEmObmhUy6Vo=\ngithub.com/google/pprof v0.0.0-20240409012703-83162a5b38cd/go.mod h1:kf6iHlnVGwgKolg33glAes7Yg/8iWP8ukqeldJSO7jw=\ngithub.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=\ngithub.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=\ngithub.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=\ngithub.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=\ngithub.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg=\ngithub.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk=\ngithub.com/googleapis/gax-go/v2 v2.1.0/go.mod h1:Q3nei7sK6ybPYH7twZdmQpAd1MKb7pfu6SK+H1/DsU0=\ngithub.com/googleapis/gax-go/v2 v2.1.1/go.mod h1:hddJymUZASv3XPyGkUpKj8pPO47Rmb0eJc8R6ouapiM=\ngithub.com/gookit/color v1.2.5/go.mod h1:AhIE+pS6D4Ql0SQWbBeXPHw7gY0/sjHoA4s/n1KB7xg=\ngithub.com/gookit/color v1.5.4 h1:FZmqs7XOyGgCAxmWyPslpiok1k05wmY3SJTytgvYFs0=\ngithub.com/gookit/color v1.5.4/go.mod h1:pZJOeOS8DM43rXbp4AZo1n9zCU2qjpcRko0b6/QJi9w=\ngithub.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo=\ngithub.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=\ngithub.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rHAxPBD8KFhJpmcqms=\ngithub.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0/go.mod h1:YN5jB8ie0yfIUg6VvR9Kz84aCaG7AsGZnLjhHbUqwPg=\ngithub.com/hashicorp/consul/api v1.11.0/go.mod h1:XjsvQN+RJGWI2TWy1/kqaE16HrR2J/FWgkYjdZQsX9M=\ngithub.com/hashicorp/consul/sdk v0.8.0/go.mod h1:GBvyrGALthsZObzUGsfgHZQDXjg4lOjagTIwIR1vPms=\ngithub.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=\ngithub.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I=\ngithub.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=\ngithub.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=\ngithub.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=\ngithub.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48=\ngithub.com/hashicorp/go-hclog v0.12.0/go.mod h1:whpDNt7SSdeAju8AWKIWsul05p54N/39EeqMAyrmvFQ=\ngithub.com/hashicorp/go-hclog v1.0.0/go.mod h1:whpDNt7SSdeAju8AWKIWsul05p54N/39EeqMAyrmvFQ=\ngithub.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60=\ngithub.com/hashicorp/go-immutable-radix v1.3.1/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60=\ngithub.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM=\ngithub.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk=\ngithub.com/hashicorp/go-multierror v1.1.0/go.mod h1:spPvp8C1qA32ftKqdAHm4hHTbPw+vmowP0z+KUhOZdA=\ngithub.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=\ngithub.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=\ngithub.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs=\ngithub.com/hashicorp/go-rootcerts v1.0.2/go.mod h1:pqUvnprVnM5bf7AOirdbb01K4ccR319Vf4pU3K5EGc8=\ngithub.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU=\ngithub.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4=\ngithub.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=\ngithub.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=\ngithub.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=\ngithub.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=\ngithub.com/hashicorp/golang-lru v0.5.4 h1:YDjusn29QI/Das2iO9M0BHnIbxPeyuCHsjMW+lJfyTc=\ngithub.com/hashicorp/golang-lru v0.5.4/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4=\ngithub.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=\ngithub.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=\ngithub.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4=\ngithub.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=\ngithub.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64=\ngithub.com/hashicorp/mdns v1.0.1/go.mod h1:4gW7WsVCke5TE7EPeYliwHlRUyBtfCwuFwuMg2DmyNY=\ngithub.com/hashicorp/mdns v1.0.4/go.mod h1:mtBihi+LeNXGtG8L9dX59gAEa12BDtBQSp4v/YAJqrc=\ngithub.com/hashicorp/memberlist v0.2.2/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE=\ngithub.com/hashicorp/memberlist v0.3.0/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE=\ngithub.com/hashicorp/serf v0.9.5/go.mod h1:UWDWwZeL5cuWDJdl0C6wrvrUwEqtQ4ZKBKKENpqIUyk=\ngithub.com/hashicorp/serf v0.9.6/go.mod h1:TXZNMjZQijwlDvp+r0b63xZ45H7JmCmgg4gpTwn9UV4=\ngithub.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=\ngithub.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=\ngithub.com/iancoleman/strcase v0.2.0/go.mod h1:iwCmte+B7n89clKwxIoIXy/HfoL7AsD47ZCWhYzw7ho=\ngithub.com/iancoleman/strcase v0.3.0 h1:nTXanmYxhfFAMjZL34Ov6gkzEsSJZ5DbhxWjvSASxEI=\ngithub.com/iancoleman/strcase v0.3.0/go.mod h1:iwCmte+B7n89clKwxIoIXy/HfoL7AsD47ZCWhYzw7ho=\ngithub.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=\ngithub.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=\ngithub.com/ianlancetaylor/demangle v0.0.0-20210905161508-09a460cdf81d/go.mod h1:aYm2/VgdVmcIU8iMfdMvDMsRAQjcfZSKFby6HOFvi/w=\ngithub.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=\ngithub.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=\ngithub.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=\ngithub.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 h1:BQSFePA1RWJOlocH6Fxy8MmwDt+yVQYULKfN0RoTN8A=\ngithub.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99/go.mod h1:1lJo3i6rXxKeerYnT8Nvf0QmHCRC1n8sfWVwXF2Frvo=\ngithub.com/jedib0t/go-pretty/v6 v6.6.0 h1:wmZVuAcEkZRT+Aq1xXpE8IGat4vE5WXOMmBpbQqERXw=\ngithub.com/jedib0t/go-pretty/v6 v6.6.0/go.mod h1:zbn98qrYlh95FIhwwsbIip0LYpwSG8SUOScs+v9/t0E=\ngithub.com/jinzhu/copier v0.4.0 h1:w3ciUoD19shMCRargcpm0cm91ytaBhDvuRpz1ODO/U8=\ngithub.com/jinzhu/copier v0.4.0/go.mod h1:DfbEm0FYsaqBcKcFuvmOZb218JkPGtvSHsKg8S8hyyg=\ngithub.com/jmoiron/sqlx v1.4.0 h1:1PLqN7S1UYp5t4SrVVnt4nUVNemrDAtxlulVe+Qgm3o=\ngithub.com/jmoiron/sqlx v1.4.0/go.mod h1:ZrZ7UsYB/weZdl2Bxg6jCRO9c3YHl8r3ahlKmRT4JLY=\ngithub.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=\ngithub.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=\ngithub.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=\ngithub.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=\ngithub.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=\ngithub.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=\ngithub.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=\ngithub.com/kastenhq/goversion v0.0.0-20230811215019-93b2f8823953 h1:WdAeg/imY2JFPc/9CST4bZ80nNJbiBFCAdSZCSgrS5Y=\ngithub.com/kastenhq/goversion v0.0.0-20230811215019-93b2f8823953/go.mod h1:6o+UrvuZWc4UTyBhQf0LGjW9Ld7qJxLz/OqvSOWWlEc=\ngithub.com/kevinburke/ssh_config v1.2.0 h1:x584FjTGwHzMwvHx18PXxbBVzfnxogHaAReU4gf13a4=\ngithub.com/kevinburke/ssh_config v1.2.0/go.mod h1:CT57kijsi8u/K/BOFA39wgDQJ9CxiF4nAY/ojJ6r6mM=\ngithub.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=\ngithub.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=\ngithub.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=\ngithub.com/klauspost/compress v1.11.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=\ngithub.com/klauspost/compress v1.17.8 h1:YcnTYrq7MikUT7k0Yb5eceMmALQPYBW/Xltxn0NAMnU=\ngithub.com/klauspost/compress v1.17.8/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=\ngithub.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=\ngithub.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE=\ngithub.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=\ngithub.com/knqyf263/go-rpmdb v0.1.1 h1:oh68mTCvp1XzxdU7EfafcWzzfstUZAEa3MW0IJye584=\ngithub.com/knqyf263/go-rpmdb v0.1.1/go.mod h1:9LQcoMCMQ9vrF7HcDtXfvqGO4+ddxFQ8+YF/0CVGDww=\ngithub.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=\ngithub.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg=\ngithub.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=\ngithub.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=\ngithub.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=\ngithub.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=\ngithub.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=\ngithub.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=\ngithub.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=\ngithub.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=\ngithub.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=\ngithub.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=\ngithub.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=\ngithub.com/logrusorgru/aurora v2.0.3+incompatible h1:tOpm7WcpBTn4fjmVfgpQq0EfczGlG91VSDkswnjF5A8=\ngithub.com/logrusorgru/aurora v2.0.3+incompatible/go.mod h1:7rIyQOR62GCctdiQpZ/zOJlFyk6y+94wXzv6RNZgaR4=\ngithub.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY=\ngithub.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=\ngithub.com/lyft/protoc-gen-star v0.5.3/go.mod h1:V0xaHgaf5oCCqmcxYcWiDfTiKsZsRc87/1qhoTACD8w=\ngithub.com/magiconair/properties v1.8.5/go.mod h1:y3VJvCyxH9uVvJTWEGAELF3aiYNyPKd5NZ3oSwXrF60=\ngithub.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0VQdvPDY=\ngithub.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0=\ngithub.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU=\ngithub.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE=\ngithub.com/mattn/go-colorable v0.1.6/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=\ngithub.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=\ngithub.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4=\ngithub.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=\ngithub.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=\ngithub.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4=\ngithub.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=\ngithub.com/mattn/go-isatty v0.0.10/go.mod h1:qgIWMr58cqv1PHHyhnkY9lrL7etaEgOFcMEpPG5Rm84=\ngithub.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOAqxQCu2WE=\ngithub.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=\ngithub.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=\ngithub.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=\ngithub.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=\ngithub.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=\ngithub.com/mattn/go-localereader v0.0.2-0.20220822084749-2491eb6c1c75 h1:P8UmIzZMYDR+NGImiFvErt6VWfIRPuGM+vyjiEdkmIw=\ngithub.com/mattn/go-localereader v0.0.2-0.20220822084749-2491eb6c1c75/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88=\ngithub.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=\ngithub.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc=\ngithub.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=\ngithub.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU=\ngithub.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=\ngithub.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=\ngithub.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d h1:5PJl274Y63IEHC+7izoQE9x6ikvDFZS2mDVS3drnohI=\ngithub.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d/go.mod h1:01TrycV0kFyexm33Z7vhZRXopbI8J3TDReVlkTgMUxE=\ngithub.com/mholt/archiver/v3 v3.5.1 h1:rDjOBX9JSF5BvoJGvjqK479aL70qh9DIpZCl+k7Clwo=\ngithub.com/mholt/archiver/v3 v3.5.1/go.mod h1:e3dqJ7H78uzsRSEACH1joayhuSyhnonssnDhppzS1L4=\ngithub.com/microsoft/go-rustaudit v0.0.0-20220730194248-4b17361d90a5 h1:tQRHcLQwnwrPq2j2Qra/NnyjyESBGwdeBeVdAE9kXYg=\ngithub.com/microsoft/go-rustaudit v0.0.0-20220730194248-4b17361d90a5/go.mod h1:vYT9HE7WCvL64iVeZylKmCsWKfE+JZ8105iuh2Trk8g=\ngithub.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg=\ngithub.com/miekg/dns v1.1.26/go.mod h1:bPDLeHnStXmXAq1m/Ch/hvfNHr14JKNPMBo3VZKjuso=\ngithub.com/miekg/dns v1.1.41/go.mod h1:p6aan82bvRIyn+zDIv9xYNUpwa73JcSh9BKwknJysuI=\ngithub.com/mitchellh/cli v1.1.0/go.mod h1:xcISNoH86gajksDmfB23e/pu+B+GeFRMYmoHXxx3xhI=\ngithub.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw=\ngithub.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s=\ngithub.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=\ngithub.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=\ngithub.com/mitchellh/go-testing-interface v1.0.0/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI=\ngithub.com/mitchellh/hashstructure/v2 v2.0.2 h1:vGKWl0YJqUNxE8d+h8f6NJLcCJrgbhC4NcD46KavDd4=\ngithub.com/mitchellh/hashstructure/v2 v2.0.2/go.mod h1:MG3aRVU/N29oo/V/IhBX8GR/zz4kQkprJgF2EVszyDE=\ngithub.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=\ngithub.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=\ngithub.com/mitchellh/mapstructure v1.4.3/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=\ngithub.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=\ngithub.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=\ngithub.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ=\ngithub.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=\ngithub.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0=\ngithub.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo=\ngithub.com/moby/locker v1.0.1 h1:fOXqR41zeveg4fFODix+1Ch4mj/gT0NE1XJbp/epuBg=\ngithub.com/moby/locker v1.0.1/go.mod h1:S7SDdo5zpBK84bzzVlKr2V0hz+7x9hWbYC/kq7oQppc=\ngithub.com/moby/sys/mountinfo v0.7.2 h1:1shs6aH5s4o5H2zQLn796ADW1wMrIwHsyJ2v9KouLrg=\ngithub.com/moby/sys/mountinfo v0.7.2/go.mod h1:1YOa8w8Ih7uW0wALDUgT1dTTSBrZ+HiBLGws92L2RU4=\ngithub.com/moby/sys/sequential v0.5.0 h1:OPvI35Lzn9K04PBbCLW0g4LcFAJgHsvXsRyewg5lXtc=\ngithub.com/moby/sys/sequential v0.5.0/go.mod h1:tH2cOOs5V9MlPiXcQzRC+eEyab644PWKGRYaaV5ZZlo=\ngithub.com/moby/sys/signal v0.7.0 h1:25RW3d5TnQEoKvRbEKUGay6DCQ46IxAVTT9CUMgmsSI=\ngithub.com/moby/sys/signal v0.7.0/go.mod h1:GQ6ObYZfqacOwTtlXvcmh9A26dVRul/hbOZn88Kg8Tg=\ngithub.com/moby/sys/user v0.3.0 h1:9ni5DlcW5an3SvRSx4MouotOygvzaXbaSrc/wGDFWPo=\ngithub.com/moby/sys/user v0.3.0/go.mod h1:bG+tYYYJgaMtRKgEmuueC0hJEAZWwtIbZTB+85uoHjs=\ngithub.com/moby/sys/userns v0.1.0 h1:tVLXkFOxVu9A64/yh59slHVv9ahO9UIev4JZusOLG/g=\ngithub.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcYfDHOl28=\ngithub.com/moby/term v0.0.0-20221205130635-1aeaba878587 h1:HfkjXDfhgVaN5rmueG8cL8KKeFNecRCXFhaJ2qZ5SKA=\ngithub.com/moby/term v0.0.0-20221205130635-1aeaba878587/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=\ngithub.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=\ngithub.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=\ngithub.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=\ngithub.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=\ngithub.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=\ngithub.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=\ngithub.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=\ngithub.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 h1:ZK8zHtRHOkbHy6Mmr5D264iyp3TiX5OmNcI5cIARiQI=\ngithub.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6/go.mod h1:CJlz5H+gyd6CUWT45Oy4q24RdLyn7Md9Vj2/ldJBSIo=\ngithub.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA=\ngithub.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo=\ngithub.com/muesli/termenv v0.15.2 h1:GohcuySI0QmI3wN8Ok9PtKGkgkFIk7y6Vpb5PvrY+Wo=\ngithub.com/muesli/termenv v0.15.2/go.mod h1:Epx+iuz8sNs7mNKhxzH4fWXGNpZwUaJKRS1noLXviQ8=\ngithub.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=\ngithub.com/ncruces/go-strftime v0.1.9 h1:bY0MQC28UADQmHmaF5dgpLmImcShSi2kHU9XLdhx/f4=\ngithub.com/ncruces/go-strftime v0.1.9/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=\ngithub.com/nwaples/rardecode v1.1.0 h1:vSxaY8vQhOcVr4mm5e8XllHWTiM4JF507A0Katqw7MQ=\ngithub.com/nwaples/rardecode v1.1.0/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0=\ngithub.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec=\ngithub.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=\ngithub.com/onsi/gomega v1.34.1 h1:EUMJIKUjM8sKjYbtxQI9A4z2o+rruxnzNvpknOXie6k=\ngithub.com/onsi/gomega v1.34.1/go.mod h1:kU1QgUvBDLXBJq618Xvm2LUX6rSAfRaFRTcdOeDLwwY=\ngithub.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=\ngithub.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=\ngithub.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug=\ngithub.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM=\ngithub.com/opencontainers/runtime-spec v1.1.0 h1:HHUyrt9mwHUjtasSbXSMvs4cyFxh+Bll4AjJ9odEGpg=\ngithub.com/opencontainers/runtime-spec v1.1.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=\ngithub.com/opencontainers/selinux v1.11.0 h1:+5Zbo97w3Lbmb3PeqQtpmTkMwsW5nRI3YaLpt7tQ7oU=\ngithub.com/opencontainers/selinux v1.11.0/go.mod h1:E5dMC3VPuVvVHDYmi78qvhJp8+M586T4DlDRYpFkyec=\ngithub.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=\ngithub.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=\ngithub.com/pborman/indent v1.2.1 h1:lFiviAbISHv3Rf0jcuh489bi06hj98JsVMtIDZQb9yM=\ngithub.com/pborman/indent v1.2.1/go.mod h1:FitS+t35kIYtB5xWTZAPhnmrxcciEEOdbyrrpz5K6Vw=\ngithub.com/pelletier/go-toml v1.9.4/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=\ngithub.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8=\ngithub.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=\ngithub.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM=\ngithub.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs=\ngithub.com/pierrec/lz4/v4 v4.1.2/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=\ngithub.com/pierrec/lz4/v4 v4.1.19 h1:tYLzDnjDXh9qIxSTKHwXwOYmm9d887Y7Y1ZkyXYHAN4=\ngithub.com/pierrec/lz4/v4 v4.1.19/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=\ngithub.com/pjbgf/sha1cd v0.3.2 h1:a9wb0bp1oC2TGwStyn0Umc/IGKQnEgF0vVaZ8QF8eo4=\ngithub.com/pjbgf/sha1cd v0.3.2/go.mod h1:zQWigSxVmsHEZow5qaLtPYxpcKMMQpa09ixqBxuCS6A=\ngithub.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=\ngithub.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=\ngithub.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=\ngithub.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=\ngithub.com/pkg/profile v1.7.0 h1:hnbDkaNWPCLMO9wGLdBFTIZvzDrDfBM2072E1S9gJkA=\ngithub.com/pkg/profile v1.7.0/go.mod h1:8Uer0jas47ZQMJ7VD+OHknK4YDY07LPUC6dEvqDjvNo=\ngithub.com/pkg/sftp v1.10.1/go.mod h1:lYOWFsE0bwd1+KfKJaKeuokY15vzFx25BLbzYYoAxZI=\ngithub.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=\ngithub.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=\ngithub.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=\ngithub.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI=\ngithub.com/posener/complete v1.2.3/go.mod h1:WZIdtGGp+qx0sLrYKtIRAruyNpv6hFCicSgv7Sy7s/s=\ngithub.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=\ngithub.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=\ngithub.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU=\ngithub.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=\ngithub.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=\ngithub.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=\ngithub.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=\ngithub.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=\ngithub.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4=\ngithub.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=\ngithub.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=\ngithub.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A=\ngithub.com/prometheus/procfs v0.10.1 h1:kYK1Va/YMlutzCGazswoHKo//tZVlFpKYh+PymziUAg=\ngithub.com/prometheus/procfs v0.10.1/go.mod h1:nwNm2aOCAYw8uTR/9bWRREkZFxAUcWzPHWJq+XBB/FM=\ngithub.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=\ngithub.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=\ngithub.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=\ngithub.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=\ngithub.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=\ngithub.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=\ngithub.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=\ngithub.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=\ngithub.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=\ngithub.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=\ngithub.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=\ngithub.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=\ngithub.com/saferwall/pe v1.5.4 h1:tLmMggEMUfeqrpJ25zS/okUQmyFdD5xWKL2+z9njCqg=\ngithub.com/saferwall/pe v1.5.4/go.mod h1:mJx+PuptmNpoPFBNhWs/uDMFL/kTHVZIkg0d4OUJFbQ=\ngithub.com/sagikazarmark/crypt v0.3.0/go.mod h1:uD/D+6UF4SrIR1uGEv7bBNkNqLGqUr43MRiaGWX1Nig=\ngithub.com/sagikazarmark/locafero v0.4.0 h1:HApY1R9zGo4DBgr7dqsTH/JJxLTTsOt7u6keLGt6kNQ=\ngithub.com/sagikazarmark/locafero v0.4.0/go.mod h1:Pe1W6UlPYUk/+wc/6KFhbORCfqzgYEpgQ3O5fPuL3H4=\ngithub.com/sagikazarmark/slog-shim v0.1.0 h1:diDBnUNK9N/354PgrxMywXnAwEr1QZcOr6gto+ugjYE=\ngithub.com/sagikazarmark/slog-shim v0.1.0/go.mod h1:SrcSrq8aKtyuqEI1uvTDTK1arOWRIczQRv+GVI1AkeQ=\ngithub.com/sahilm/fuzzy v0.1.1 h1:ceu5RHF8DGgoi+/dR5PsECjCDH1BE3Fnmpo7aVXOdRA=\ngithub.com/sahilm/fuzzy v0.1.1/go.mod h1:VFvziUEIMCrT6A6tw2RFIXPXXmzXbOsSHF0DOI8ZK9Y=\ngithub.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d h1:hrujxIzL1woJ7AwssoOcM/tq5JjjG2yYOc8odClEiXA=\ngithub.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU=\ngithub.com/sanity-io/litter v1.5.5 h1:iE+sBxPBzoK6uaEP5Lt3fHNgpKcHXc/A2HGETy0uJQo=\ngithub.com/sanity-io/litter v1.5.5/go.mod h1:9gzJgR2i4ZpjZHsKvUXIRQVk7P+yM3e+jAF7bU2UI5U=\ngithub.com/sassoftware/go-rpmutils v0.4.0 h1:ojND82NYBxgwrV+mX1CWsd5QJvvEZTKddtCdFLPWhpg=\ngithub.com/sassoftware/go-rpmutils v0.4.0/go.mod h1:3goNWi7PGAT3/dlql2lv3+MSN5jNYPjT5mVcQcIsYzI=\ngithub.com/scylladb/go-set v1.0.3-0.20200225121959-cc7b2070d91e h1:7q6NSFZDeGfvvtIRwBrU/aegEYJYmvev0cHAwo17zZQ=\ngithub.com/scylladb/go-set v1.0.3-0.20200225121959-cc7b2070d91e/go.mod h1:DkpGd78rljTxKAnTDPFqXSGxvETQnJyuSOQwsHycqfs=\ngithub.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=\ngithub.com/sebdah/goldie/v2 v2.5.3 h1:9ES/mNN+HNUbNWpVAlrzuZ7jE+Nrczbj8uFRjM7624Y=\ngithub.com/sebdah/goldie/v2 v2.5.3/go.mod h1:oZ9fp0+se1eapSRjfYbsV/0Hqhbuu3bJVvKI/NNtssI=\ngithub.com/secDre4mer/pkcs7 v0.0.0-20240322103146-665324a4461d h1:RQqyEogx5J6wPdoxqL132b100j8KjcVHO1c0KLRoIhc=\ngithub.com/secDre4mer/pkcs7 v0.0.0-20240322103146-665324a4461d/go.mod h1:PegD7EVqlN88z7TpCqH92hHP+GBpfomGCCnw1PFtNOA=\ngithub.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=\ngithub.com/sergi/go-diff v1.2.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=\ngithub.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 h1:n661drycOFuPLCN3Uc8sB6B/s6Z4t2xvBgU1htSHuq8=\ngithub.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4=\ngithub.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=\ngithub.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME=\ngithub.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=\ngithub.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=\ngithub.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=\ngithub.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=\ngithub.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=\ngithub.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=\ngithub.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=\ngithub.com/skeema/knownhosts v1.3.1 h1:X2osQ+RAjK76shCbvhHHHVl3ZlgDm8apHEHFqRjnBY8=\ngithub.com/skeema/knownhosts v1.3.1/go.mod h1:r7KTdC8l4uxWRyK2TpQZ/1o5HaSzh06ePQNxPwTcfiY=\ngithub.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo=\ngithub.com/sourcegraph/conc v0.3.0/go.mod h1:Sdozi7LEKbFPqYX2/J+iBAM6HpqSLTASQIKqDmF7Mt0=\ngithub.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=\ngithub.com/spdx/gordf v0.0.0-20201111095634-7098f93598fb/go.mod h1:uKWaldnbMnjsSAXRurWqqrdyZen1R7kxl8TkmWk2OyM=\ngithub.com/spdx/tools-golang v0.5.5 h1:61c0KLfAcNqAjlg6UNMdkwpMernhw3zVRwDZ2x9XOmk=\ngithub.com/spdx/tools-golang v0.5.5/go.mod h1:MVIsXx8ZZzaRWNQpUDhC4Dud34edUYJYecciXgrw5vE=\ngithub.com/spf13/afero v1.3.3/go.mod h1:5KUK8ByomD5Ti5Artl0RtHeI5pTF7MIDuXL3yY520V4=\ngithub.com/spf13/afero v1.6.0/go.mod h1:Ai8FlHk4v/PARR026UzYexafAt9roJ7LcLMAmO6Z93I=\ngithub.com/spf13/afero v1.11.0 h1:WJQKhtpdm3v2IzqG8VMqrr6Rf3UYpEF239Jy9wNepM8=\ngithub.com/spf13/afero v1.11.0/go.mod h1:GH9Y3pIexgf1MTIWtNGyogA5MwRIDXGUr+hbWNoBjkY=\ngithub.com/spf13/cast v1.4.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=\ngithub.com/spf13/cast v1.7.0 h1:ntdiHjuueXFgm5nzDRdOS4yfT43P5Fnud6DH50rz/7w=\ngithub.com/spf13/cast v1.7.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo=\ngithub.com/spf13/cobra v1.3.0/go.mod h1:BrRVncBjOJa/eUcVVm9CE+oC6as8k+VYr4NY7WCi9V4=\ngithub.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM=\ngithub.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y=\ngithub.com/spf13/jwalterweatherman v1.1.0/go.mod h1:aNWZUN0dPAAO/Ljvb5BEdw96iTZ0EXowPYD95IqWIGo=\ngithub.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=\ngithub.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=\ngithub.com/spf13/viper v1.10.0/go.mod h1:SoyBPwAtKDzypXNDFKN5kzH7ppppbGZtls1UpIy5AsM=\ngithub.com/spf13/viper v1.19.0 h1:RWq5SEjt8o25SROyN3z2OrDB9l7RPd3lwTWU8EcEdcI=\ngithub.com/spf13/viper v1.19.0/go.mod h1:GQUN9bilAbhU/jgc1bKs99f/suXKeUMct8Adx5+Ntkg=\ngithub.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=\ngithub.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=\ngithub.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=\ngithub.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=\ngithub.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=\ngithub.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=\ngithub.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=\ngithub.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=\ngithub.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=\ngithub.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=\ngithub.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=\ngithub.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=\ngithub.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=\ngithub.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=\ngithub.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=\ngithub.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=\ngithub.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=\ngithub.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=\ngithub.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=\ngithub.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw=\ngithub.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8=\ngithub.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU=\ngithub.com/sylabs/sif/v2 v2.17.1 h1:p6Sl0LWyShXBj2SBsS1dMOMIMrZHe8pwBnBrYt6uo4M=\ngithub.com/sylabs/sif/v2 v2.17.1/go.mod h1:XUGB6AQUXGkms3qPOPdevctT3lBLRLWZNWHVnt5HMKE=\ngithub.com/sylabs/squashfs v1.0.0 h1:xAyMS21ogglkuR5HaY55PCfqY3H32ma9GkasTYo28Zg=\ngithub.com/sylabs/squashfs v1.0.0/go.mod h1:rhWzvgefq1X+R+LZdts10hfMsTg3g74OfGunW8tvg/4=\ngithub.com/terminalstatic/go-xsd-validate v0.1.5 h1:RqpJnf6HGE2CB/lZB1A8BYguk8uRtcvYAPLCF15qguo=\ngithub.com/terminalstatic/go-xsd-validate v0.1.5/go.mod h1:18lsvYFofBflqCrvo1umpABZ99+GneNTw2kEEc8UPJw=\ngithub.com/therootcompany/xz v1.0.1 h1:CmOtsn1CbtmyYiusbfmhmkpAAETj0wBIH6kCYaX+xzw=\ngithub.com/therootcompany/xz v1.0.1/go.mod h1:3K3UH1yCKgBneZYhuQUvJ9HPD19UEXEI0BWbMn8qNMY=\ngithub.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM=\ngithub.com/ulikunitz/xz v0.5.8/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=\ngithub.com/ulikunitz/xz v0.5.9/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=\ngithub.com/ulikunitz/xz v0.5.12 h1:37Nm15o69RwBkXM0J6A5OlE67RZTfzUxTj8fB3dfcsc=\ngithub.com/ulikunitz/xz v0.5.12/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=\ngithub.com/urfave/cli v1.22.12/go.mod h1:sSBEIC79qR6OvcmsD4U3KABeOTxDqQtdDnaFuUN30b8=\ngithub.com/urfave/cli/v2 v2.3.0/go.mod h1:LJmUH05zAU44vOAcrfzZQKsZbVcdbOG8rtL3/XcUArI=\ngithub.com/vbatts/go-mtree v0.5.4 h1:OMAb8jaCyiFA7zXj0Zc/oARcxBDBoeu2LizjB8BVJl0=\ngithub.com/vbatts/go-mtree v0.5.4/go.mod h1:5GqJbVhm9BBiCc4K5uc/c42FPgXulHaQs4sFUEfIWMo=\ngithub.com/vbatts/tar-split v0.11.3 h1:hLFqsOLQ1SsppQNTMpkpPXClLDfC2A3Zgy9OUU+RVck=\ngithub.com/vbatts/tar-split v0.11.3/go.mod h1:9QlHN18E+fEH7RdG+QAJJcuya3rqT7eXSTY7wGrAokY=\ngithub.com/vifraa/gopom v1.0.0 h1:L9XlKbyvid8PAIK8nr0lihMApJQg/12OBvMA28BcWh0=\ngithub.com/vifraa/gopom v1.0.0/go.mod h1:oPa1dcrGrtlO37WPDBm5SqHAT+wTgF8An1Q71Z6Vv4o=\ngithub.com/wagoodman/go-partybus v0.0.0-20230516145632-8ccac152c651 h1:jIVmlAFIqV3d+DOxazTR9v+zgj8+VYuQBzPgBZvWBHA=\ngithub.com/wagoodman/go-partybus v0.0.0-20230516145632-8ccac152c651/go.mod h1:b26F2tHLqaoRQf8DywqzVaV1MQ9yvjb0OMcNl7Nxu20=\ngithub.com/wagoodman/go-progress v0.0.0-20230925121702-07e42b3cdba0 h1:0KGbf+0SMg+UFy4e1A/CPVvXn21f1qtWdeJwxZFoQG8=\ngithub.com/wagoodman/go-progress v0.0.0-20230925121702-07e42b3cdba0/go.mod h1:jLXFoL31zFaHKAAyZUh+sxiTDFe1L1ZHrcK2T1itVKA=\ngithub.com/xanzy/ssh-agent v0.3.3 h1:+/15pJfg/RsTxqYcX6fHqOXZwwMP+2VyYWJeWM2qQFM=\ngithub.com/xanzy/ssh-agent v0.3.3/go.mod h1:6dzNDKs0J9rVPHPhaGCukekBHKqfl+L3KghI1Bc68Uw=\ngithub.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo=\ngithub.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=\ngithub.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHovont7NscjpAxXsDA8S8BMYve8Y5+7cuRE7R0=\ngithub.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ=\ngithub.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17UxZ74=\ngithub.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y=\ngithub.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofmx9yWTog9BfvIu0q41lo=\ngithub.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos=\ngithub.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no=\ngithub.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=\ngithub.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=\ngithub.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=\ngithub.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=\ngithub.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=\ngithub.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=\ngithub.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=\ngo.etcd.io/etcd/api/v3 v3.5.1/go.mod h1:cbVKeC6lCfl7j/8jBhAK6aIYO9XOjdptoxU/nLQcPvs=\ngo.etcd.io/etcd/client/pkg/v3 v3.5.1/go.mod h1:IJHfcCEKxYu1Os13ZdwCwIUTUVGYTSAM3YSwc9/Ac1g=\ngo.etcd.io/etcd/client/v2 v2.305.1/go.mod h1:pMEacxZW7o8pg4CrFE7pquyCJJzZvkvdD2RibOCCCGs=\ngo.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=\ngo.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=\ngo.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=\ngo.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=\ngo.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=\ngo.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk=\ngo.opencensus.io v0.23.0/go.mod h1:XItmlyltB5F7CS4xOC1DcqMoFqwtC6OG2xF7mCv7P7E=\ngo.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0=\ngo.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo=\ngo.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 h1:jq9TW8u3so/bN+JPT166wjOI6/vQPF6Xe7nMNIltagk=\ngo.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0/go.mod h1:p8pYQP+m5XfbZm9fxtSKAbM6oIllS7s2AfxrChvc7iw=\ngo.opentelemetry.io/otel v1.24.0 h1:0LAOdjNmQeSTzGBzduGe/rU4tZhMwL5rWgtp9Ku5Jfo=\ngo.opentelemetry.io/otel v1.24.0/go.mod h1:W7b9Ozg4nkF5tWI5zsXkaKKDjdVjpD4oAt9Qi/MArHo=\ngo.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0 h1:Mne5On7VWdx7omSrSSZvM4Kw7cS7NQkOOmLcgscI51U=\ngo.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0/go.mod h1:IPtUMKL4O3tH5y+iXVyAXqpAwMuzC1IrxVS81rummfE=\ngo.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.19.0 h1:IeMeyr1aBvBiPVYihXIaeIZba6b8E1bYp7lbdxK8CQg=\ngo.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.19.0/go.mod h1:oVdCUtjq9MK9BlS7TtucsQwUcXcymNiEDjgDD2jMtZU=\ngo.opentelemetry.io/otel/metric v1.24.0 h1:6EhoGWWK28x1fbpA4tYTOWBkPefTDQnb8WSGXlc88kI=\ngo.opentelemetry.io/otel/metric v1.24.0/go.mod h1:VYhLe1rFfxuTXLgj4CBiyz+9WYBA8pNGJgDcSFRKBco=\ngo.opentelemetry.io/otel/sdk v1.21.0 h1:FTt8qirL1EysG6sTQRZ5TokkU8d0ugCj8htOgThZXQ8=\ngo.opentelemetry.io/otel/sdk v1.21.0/go.mod h1:Nna6Yv7PWTdgJHVRD9hIYywQBRx7pbox6nwBnZIxl/E=\ngo.opentelemetry.io/otel/trace v1.24.0 h1:CsKnnL4dUAr/0llH9FKuc698G04IrpWV0MQA/Y1YELI=\ngo.opentelemetry.io/otel/trace v1.24.0/go.mod h1:HPc3Xr/cOApsBI154IU0OI0HJexz+aw5uPdbs3UCjNU=\ngo.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI=\ngo.opentelemetry.io/proto/otlp v1.0.0 h1:T0TX0tmXU8a3CbNXzEKGeU5mIVOdf0oykP+u2lIVU/I=\ngo.opentelemetry.io/proto/otlp v1.0.0/go.mod h1:Sy6pihPLfYHkr3NkUbEhGHFhINUSI/v80hjKIs5JXpM=\ngo.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=\ngo.uber.org/atomic v1.9.0 h1:ECmE8Bn/WFTYwEW/bpKD3M8VtR/zQVbavAoalC1PYyE=\ngo.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=\ngo.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=\ngo.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=\ngo.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU=\ngo.uber.org/multierr v1.9.0 h1:7fIwc/ZtS0q++VgcfqFDxSBZVv/Xo49/SYnDFupUwlI=\ngo.uber.org/multierr v1.9.0/go.mod h1:X2jQV1h+kxSjClGpnseKVIxpmcjrj7MNnI0bnlfKTVQ=\ngo.uber.org/zap v1.17.0/go.mod h1:MXVU+bhUf/A7Xi2HNOnopQOrmycQ5Ih87HtOu4q5SSo=\ngolang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=\ngolang.org/x/crypto v0.0.0-20181029021203-45a5f77698d3/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=\ngolang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=\ngolang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=\ngolang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=\ngolang.org/x/crypto v0.0.0-20190820162420-60c769a6c586/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=\ngolang.org/x/crypto v0.0.0-20190923035154-9ee001bba392/go.mod h1:/lpIB1dKB+9EgE3H3cr1v9wB50oz8l4C4h62xy7jSTY=\ngolang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=\ngolang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=\ngolang.org/x/crypto v0.0.0-20210817164053-32db794688a5/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=\ngolang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=\ngolang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=\ngolang.org/x/crypto v0.40.0 h1:r4x+VvoG5Fm+eJcxMaY8CQM7Lb0l1lsmjGBQ6s8BfKM=\ngolang.org/x/crypto v0.40.0/go.mod h1:Qr1vMER5WyS2dfPHAlsOj01wgLbsyWtFn/aY+5+ZdxY=\ngolang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=\ngolang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=\ngolang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=\ngolang.org/x/exp v0.0.0-20190829153037-c13cbed26979/go.mod h1:86+5VVa7VpoJ4kLfm080zCjGlMRFzhUhsZKEZO7MGek=\ngolang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136/go.mod h1:JXzH8nQsPlswgeRAPE3MuO9GYsAcnJvJ4vnMwN/5qkY=\ngolang.org/x/exp v0.0.0-20191129062945-2f5052295587/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=\ngolang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=\ngolang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=\ngolang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM=\ngolang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU=\ngolang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8=\ngolang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY=\ngolang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=\ngolang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=\ngolang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=\ngolang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=\ngolang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=\ngolang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=\ngolang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=\ngolang.org/x/lint v0.0.0-20190909230951-414d861bb4ac/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=\ngolang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=\ngolang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRuDixDT3tpyyb+LUpUlRWLxfhWrs=\ngolang.org/x/lint v0.0.0-20200130185559-910be7a94367/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=\ngolang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=\ngolang.org/x/lint v0.0.0-20201208152925-83fdc39ff7b5/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=\ngolang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=\ngolang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE=\ngolang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o=\ngolang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc=\ngolang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY=\ngolang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=\ngolang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=\ngolang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=\ngolang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=\ngolang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=\ngolang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=\ngolang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=\ngolang.org/x/mod v0.5.0/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro=\ngolang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=\ngolang.org/x/mod v0.26.0 h1:EGMPT//Ezu+ylkCijjPc+f4Aih7sZvaAr+O3EHBxvZg=\ngolang.org/x/mod v0.26.0/go.mod h1:/j6NAhSk8iQ723BGAUyoAcn7SlD7s15Dp9Nd/SfeaFQ=\ngolang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=\ngolang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=\ngolang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=\ngolang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=\ngolang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=\ngolang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=\ngolang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=\ngolang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=\ngolang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=\ngolang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=\ngolang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=\ngolang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=\ngolang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=\ngolang.org/x/net v0.0.0-20190628185345-da137c7871d7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=\ngolang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=\ngolang.org/x/net v0.0.0-20190923162816-aa69164e4478/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=\ngolang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=\ngolang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=\ngolang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=\ngolang.org/x/net v0.0.0-20200222125558-5a598a2470a0/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=\ngolang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=\ngolang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=\ngolang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=\ngolang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=\ngolang.org/x/net v0.0.0-20200506145744-7e3656a0809f/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=\ngolang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=\ngolang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=\ngolang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=\ngolang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=\ngolang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=\ngolang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=\ngolang.org/x/net v0.0.0-20201031054903-ff519b6c9102/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=\ngolang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=\ngolang.org/x/net v0.0.0-20201209123823-ac852fbbde11/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=\ngolang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=\ngolang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=\ngolang.org/x/net v0.0.0-20210316092652-d523dce5a7f4/go.mod h1:RBQZq4jEuRlivfhVLdyRGr576XBO4/greRjx4P4O3yc=\ngolang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=\ngolang.org/x/net v0.0.0-20210410081132-afb366fc7cd1/go.mod h1:9tjilg8BloeKEkVJvy7fQ90B1CfIiPueXVOjqfkSzI8=\ngolang.org/x/net v0.0.0-20210503060351-7fd8e65b6420/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=\ngolang.org/x/net v0.0.0-20210813160813-60bc85c4be6d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=\ngolang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=\ngolang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=\ngolang.org/x/net v0.42.0 h1:jzkYrhi3YQWD6MLBJcsklgQsoAcw89EcZbJw8Z614hs=\ngolang.org/x/net v0.42.0/go.mod h1:FF1RA5d3u7nAYA4z2TkclSCKh68eSXtiFwcWQpPXdt8=\ngolang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=\ngolang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=\ngolang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=\ngolang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=\ngolang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=\ngolang.org/x/oauth2 v0.0.0-20200902213428-5d25da1a8d43/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=\ngolang.org/x/oauth2 v0.0.0-20201109201403-9fd604954f58/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=\ngolang.org/x/oauth2 v0.0.0-20201208152858-08078c50e5b5/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=\ngolang.org/x/oauth2 v0.0.0-20210218202405-ba52d332ba99/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=\ngolang.org/x/oauth2 v0.0.0-20210220000619-9bb904979d93/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=\ngolang.org/x/oauth2 v0.0.0-20210313182246-cd4f82c27b84/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=\ngolang.org/x/oauth2 v0.0.0-20210514164344-f6687ab2804c/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=\ngolang.org/x/oauth2 v0.0.0-20210628180205-a41e5a781914/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=\ngolang.org/x/oauth2 v0.0.0-20210805134026-6f1e6394065a/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=\ngolang.org/x/oauth2 v0.0.0-20210819190943-2bc19b11175f/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=\ngolang.org/x/oauth2 v0.0.0-20211005180243-6b3c2da341f1/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=\ngolang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=\ngolang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw=\ngolang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=\ngolang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=\ngolang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=\ngolang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=\ngolang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=\ngolang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=\ngolang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=\ngolang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=\ngolang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20190922100055-0a153f010e69/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20190924154521-2837fb4f24fe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20191008105621-543471e840be/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200124204421-9fbb57f87de9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200331124033-c3d80250170d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200501052902-10377860bb8e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200511232937-7e40ca221e25/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200515095857-1151b9dac4a9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200523222454-059865788121/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200905004654-be1d3432aa8f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20201201145000-ef89a241ccb3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20210104204734-6f8348627aad/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20210220050731-9a76102bfb43/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20210303074136-134d130e1a04/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20210305230114-8fe3ee5dd75b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20210315160823-c6e025ad8005/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20210403161142-5e06dd20ab57/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.0.0-20210514084401-e8d321eab015/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.0.0-20210603125802-9665404d3644/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.0.0-20210806184541-e5e7981a1069/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.0.0-20210816183151-1e6c022a8912/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.0.0-20210823070655-63515b42dcdf/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.0.0-20210908233432-aa78b53d3365/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.0.0-20211124211545-fe61309f8881/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.0.0-20211205182925-97ca703d548d/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.0.0-20220906165534-d0df966e6959/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA=\ngolang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=\ngolang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=\ngolang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=\ngolang.org/x/term v0.33.0 h1:NuFncQrRcaRvVmgRkvM3j/F00gWIAlcmlB8ACEKmGIg=\ngolang.org/x/term v0.33.0/go.mod h1:s18+ql9tYWp1IfpV9DmCtQDDSRBUjKaw9M1eAv5UeF0=\ngolang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=\ngolang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=\ngolang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=\ngolang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=\ngolang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=\ngolang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=\ngolang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=\ngolang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=\ngolang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=\ngolang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=\ngolang.org/x/text v0.27.0 h1:4fGWRpyh641NLlecmyl4LOe6yDdfaYNrGb2zdfo4JV4=\ngolang.org/x/text v0.27.0/go.mod h1:1D28KMCvyooCX9hBiosv5Tz/+YLxj0j7XhWjpSUF7CU=\ngolang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=\ngolang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=\ngolang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=\ngolang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE=\ngolang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg=\ngolang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=\ngolang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=\ngolang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=\ngolang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=\ngolang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=\ngolang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=\ngolang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=\ngolang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=\ngolang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=\ngolang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=\ngolang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=\ngolang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=\ngolang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=\ngolang.org/x/tools v0.0.0-20190907020128-2ca718005c18/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=\ngolang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=\ngolang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=\ngolang.org/x/tools v0.0.0-20191113191852-77e3bb0ad9e7/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=\ngolang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=\ngolang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=\ngolang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=\ngolang.org/x/tools v0.0.0-20191130070609-6e064ea0cf2d/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=\ngolang.org/x/tools v0.0.0-20191216173652-a0e659d51361/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=\ngolang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=\ngolang.org/x/tools v0.0.0-20200117161641-43d50277825c/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=\ngolang.org/x/tools v0.0.0-20200122220014-bf1340f18c4a/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=\ngolang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=\ngolang.org/x/tools v0.0.0-20200204074204-1cc6d1ef6c74/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=\ngolang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=\ngolang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=\ngolang.org/x/tools v0.0.0-20200224181240-023911ca70b2/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=\ngolang.org/x/tools v0.0.0-20200227222343-706bc42d1f0d/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=\ngolang.org/x/tools v0.0.0-20200304193943-95d2e580d8eb/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw=\ngolang.org/x/tools v0.0.0-20200312045724-11d5b4c81c7d/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw=\ngolang.org/x/tools v0.0.0-20200331025713-a30bf2db82d4/go.mod h1:Sl4aGygMT6LrqrWclx+PTx3U+LnKx/seiNR+3G19Ar8=\ngolang.org/x/tools v0.0.0-20200501065659-ab2804fb9c9d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=\ngolang.org/x/tools v0.0.0-20200512131952-2bc93b1c0c88/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=\ngolang.org/x/tools v0.0.0-20200515010526-7d3b6ebf133d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=\ngolang.org/x/tools v0.0.0-20200618134242-20370b0cb4b2/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=\ngolang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=\ngolang.org/x/tools v0.0.0-20200729194436-6467de6f59a7/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=\ngolang.org/x/tools v0.0.0-20200804011535-6c149bb5ef0d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=\ngolang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=\ngolang.org/x/tools v0.0.0-20200904185747-39188db58858/go.mod h1:Cj7w3i3Rnn0Xh82ur9kSqwfTHTeVxaDqrfMjpcNT6bE=\ngolang.org/x/tools v0.0.0-20201110124207-079ba7bd75cd/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=\ngolang.org/x/tools v0.0.0-20201201161351-ac6f37ff4c2a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=\ngolang.org/x/tools v0.0.0-20201208233053-a543418bbed2/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=\ngolang.org/x/tools v0.0.0-20210105154028-b0ab187a4818/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=\ngolang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=\ngolang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0=\ngolang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=\ngolang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=\ngolang.org/x/tools v0.1.3/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=\ngolang.org/x/tools v0.1.4/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=\ngolang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=\ngolang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=\ngolang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=\ngolang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=\ngolang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=\ngolang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=\ngolang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 h1:H2TDz8ibqkAF6YGhCdN3jS9O0/s90v0rJh3X/OLHEUk=\ngolang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8=\ngoogle.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE=\ngoogle.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M=\ngoogle.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=\ngoogle.golang.org/api v0.9.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=\ngoogle.golang.org/api v0.13.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI=\ngoogle.golang.org/api v0.14.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI=\ngoogle.golang.org/api v0.15.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI=\ngoogle.golang.org/api v0.17.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=\ngoogle.golang.org/api v0.18.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=\ngoogle.golang.org/api v0.19.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=\ngoogle.golang.org/api v0.20.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=\ngoogle.golang.org/api v0.22.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=\ngoogle.golang.org/api v0.24.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE=\ngoogle.golang.org/api v0.28.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE=\ngoogle.golang.org/api v0.29.0/go.mod h1:Lcubydp8VUV7KeIHD9z2Bys/sm/vGKnG1UHuDBSrHWM=\ngoogle.golang.org/api v0.30.0/go.mod h1:QGmEvQ87FHZNiUVJkT14jQNYJ4ZJjdRF23ZXz5138Fc=\ngoogle.golang.org/api v0.35.0/go.mod h1:/XrVsuzM0rZmrsbjJutiuftIzeuTQcEeaYcSk/mQ1dg=\ngoogle.golang.org/api v0.36.0/go.mod h1:+z5ficQTmoYpPn8LCUNVpK5I7hwkpjbcgqA7I34qYtE=\ngoogle.golang.org/api v0.40.0/go.mod h1:fYKFpnQN0DsDSKRVRcQSDQNtqWPfM9i+zNPxepjRCQ8=\ngoogle.golang.org/api v0.41.0/go.mod h1:RkxM5lITDfTzmyKFPt+wGrCJbVfniCr2ool8kTBzRTU=\ngoogle.golang.org/api v0.43.0/go.mod h1:nQsDGjRXMo4lvh5hP0TKqF244gqhGcr/YSIykhUk/94=\ngoogle.golang.org/api v0.47.0/go.mod h1:Wbvgpq1HddcWVtzsVLyfLp8lDg6AA241LmgIL59tHXo=\ngoogle.golang.org/api v0.48.0/go.mod h1:71Pr1vy+TAZRPkPs/xlCf5SsU8WjuAWv1Pfjbtukyy4=\ngoogle.golang.org/api v0.50.0/go.mod h1:4bNT5pAuq5ji4SRZm+5QIkjny9JAyVD/3gaSihNefaw=\ngoogle.golang.org/api v0.51.0/go.mod h1:t4HdrdoNgyN5cbEfm7Lum0lcLDLiise1F8qDKX00sOU=\ngoogle.golang.org/api v0.54.0/go.mod h1:7C4bFFOvVDGXjfDTAsgGwDgAxRDeQ4X8NvUedIt6z3k=\ngoogle.golang.org/api v0.55.0/go.mod h1:38yMfeP1kfjsl8isn0tliTjIb1rJXcQi4UXlbqivdVE=\ngoogle.golang.org/api v0.56.0/go.mod h1:38yMfeP1kfjsl8isn0tliTjIb1rJXcQi4UXlbqivdVE=\ngoogle.golang.org/api v0.57.0/go.mod h1:dVPlbZyBo2/OjBpmvNdpn2GRm6rPy75jyU7bmhdrMgI=\ngoogle.golang.org/api v0.59.0/go.mod h1:sT2boj7M9YJxZzgeZqXogmhfmRWDtPzT31xkieUbuZU=\ngoogle.golang.org/api v0.61.0/go.mod h1:xQRti5UdCmoCEqFxcz93fTl338AVqDgyaDRuOZ3hg9I=\ngoogle.golang.org/api v0.62.0/go.mod h1:dKmwPCydfsad4qCH08MSdgWjfHOyfpd4VtDGgRFdavw=\ngoogle.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=\ngoogle.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=\ngoogle.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=\ngoogle.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0=\ngoogle.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=\ngoogle.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=\ngoogle.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=\ngoogle.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=\ngoogle.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=\ngoogle.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=\ngoogle.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=\ngoogle.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=\ngoogle.golang.org/genproto v0.0.0-20190801165951-fa694d86fc64/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=\ngoogle.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=\ngoogle.golang.org/genproto v0.0.0-20190911173649-1774047e7e51/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8=\ngoogle.golang.org/genproto v0.0.0-20191108220845-16a3f7862a1a/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=\ngoogle.golang.org/genproto v0.0.0-20191115194625-c23dd37a84c9/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=\ngoogle.golang.org/genproto v0.0.0-20191216164720-4f79533eabd1/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=\ngoogle.golang.org/genproto v0.0.0-20191230161307-f3c370f40bfb/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=\ngoogle.golang.org/genproto v0.0.0-20200115191322-ca5a22157cba/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=\ngoogle.golang.org/genproto v0.0.0-20200122232147-0452cf42e150/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=\ngoogle.golang.org/genproto v0.0.0-20200204135345-fa8e72b47b90/go.mod h1:GmwEX6Z4W5gMy59cAlVYjN9JhxgbQH6Gn+gFDQe2lzA=\ngoogle.golang.org/genproto v0.0.0-20200212174721-66ed5ce911ce/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=\ngoogle.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=\ngoogle.golang.org/genproto v0.0.0-20200228133532-8c2c7df3a383/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=\ngoogle.golang.org/genproto v0.0.0-20200305110556-506484158171/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=\ngoogle.golang.org/genproto v0.0.0-20200312145019-da6875a35672/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=\ngoogle.golang.org/genproto v0.0.0-20200331122359-1ee6d9798940/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=\ngoogle.golang.org/genproto v0.0.0-20200430143042-b979b6f78d84/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=\ngoogle.golang.org/genproto v0.0.0-20200511104702-f5ebc3bea380/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=\ngoogle.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=\ngoogle.golang.org/genproto v0.0.0-20200515170657-fc4c6c6a6587/go.mod h1:YsZOwe1myG/8QRHRsmBRE1LrgQY60beZKjly0O1fX9U=\ngoogle.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=\ngoogle.golang.org/genproto v0.0.0-20200618031413-b414f8b61790/go.mod h1:jDfRM7FcilCzHH/e9qn6dsT145K34l5v+OpcnNgKAAA=\ngoogle.golang.org/genproto v0.0.0-20200729003335-053ba62fc06f/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=\ngoogle.golang.org/genproto v0.0.0-20200804131852-c06518451d9c/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=\ngoogle.golang.org/genproto v0.0.0-20200825200019-8632dd797987/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=\ngoogle.golang.org/genproto v0.0.0-20200904004341-0bd0a958aa1d/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=\ngoogle.golang.org/genproto v0.0.0-20201109203340-2640f1f9cdfb/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=\ngoogle.golang.org/genproto v0.0.0-20201201144952-b05cb90ed32e/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=\ngoogle.golang.org/genproto v0.0.0-20201210142538-e3217bee35cc/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=\ngoogle.golang.org/genproto v0.0.0-20201214200347-8c77b98c765d/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=\ngoogle.golang.org/genproto v0.0.0-20210222152913-aa3ee6e6a81c/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=\ngoogle.golang.org/genproto v0.0.0-20210303154014-9728d6b83eeb/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=\ngoogle.golang.org/genproto v0.0.0-20210310155132-4ce2db91004e/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=\ngoogle.golang.org/genproto v0.0.0-20210319143718-93e7006c17a6/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=\ngoogle.golang.org/genproto v0.0.0-20210402141018-6c239bbf2bb1/go.mod h1:9lPAdzaEmUacj36I+k7YKbEc5CXzPIeORRgDAUOu28A=\ngoogle.golang.org/genproto v0.0.0-20210513213006-bf773b8c8384/go.mod h1:P3QM42oQyzQSnHPnZ/vqoCdDmzH28fzWByN9asMeM8A=\ngoogle.golang.org/genproto v0.0.0-20210602131652-f16073e35f0c/go.mod h1:UODoCrxHCcBojKKwX1terBiRUaqAsFqJiF615XL43r0=\ngoogle.golang.org/genproto v0.0.0-20210604141403-392c879c8b08/go.mod h1:UODoCrxHCcBojKKwX1terBiRUaqAsFqJiF615XL43r0=\ngoogle.golang.org/genproto v0.0.0-20210608205507-b6d2f5bf0d7d/go.mod h1:UODoCrxHCcBojKKwX1terBiRUaqAsFqJiF615XL43r0=\ngoogle.golang.org/genproto v0.0.0-20210624195500-8bfb893ecb84/go.mod h1:SzzZ/N+nwJDaO1kznhnlzqS8ocJICar6hYhVyhi++24=\ngoogle.golang.org/genproto v0.0.0-20210713002101-d411969a0d9a/go.mod h1:AxrInvYm1dci+enl5hChSFPOmmUF1+uAa/UsgNRWd7k=\ngoogle.golang.org/genproto v0.0.0-20210716133855-ce7ef5c701ea/go.mod h1:AxrInvYm1dci+enl5hChSFPOmmUF1+uAa/UsgNRWd7k=\ngoogle.golang.org/genproto v0.0.0-20210728212813-7823e685a01f/go.mod h1:ob2IJxKrgPT52GcgX759i1sleT07tiKowYBGbczaW48=\ngoogle.golang.org/genproto v0.0.0-20210805201207-89edb61ffb67/go.mod h1:ob2IJxKrgPT52GcgX759i1sleT07tiKowYBGbczaW48=\ngoogle.golang.org/genproto v0.0.0-20210813162853-db860fec028c/go.mod h1:cFeNkxwySK631ADgubI+/XFU/xp8FD5KIVV4rj8UC5w=\ngoogle.golang.org/genproto v0.0.0-20210821163610-241b8fcbd6c8/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY=\ngoogle.golang.org/genproto v0.0.0-20210828152312-66f60bf46e71/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY=\ngoogle.golang.org/genproto v0.0.0-20210831024726-fe130286e0e2/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY=\ngoogle.golang.org/genproto v0.0.0-20210903162649-d08c68adba83/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY=\ngoogle.golang.org/genproto v0.0.0-20210909211513-a8c4777a87af/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY=\ngoogle.golang.org/genproto v0.0.0-20210924002016-3dee208752a0/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc=\ngoogle.golang.org/genproto v0.0.0-20211008145708-270636b82663/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc=\ngoogle.golang.org/genproto v0.0.0-20211028162531-8db9c33dc351/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc=\ngoogle.golang.org/genproto v0.0.0-20211118181313-81c1377c94b1/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc=\ngoogle.golang.org/genproto v0.0.0-20211129164237-f09f9a12af12/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc=\ngoogle.golang.org/genproto v0.0.0-20211203200212-54befc351ae9/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc=\ngoogle.golang.org/genproto v0.0.0-20211206160659-862468c7d6e0/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc=\ngoogle.golang.org/genproto v0.0.0-20211208223120-3a66f561d7aa/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc=\ngoogle.golang.org/genproto v0.0.0-20240213162025-012b6fc9bca9 h1:9+tzLLstTlPTRyJTh+ah5wIMsBW5c4tQwGTN3thOW9Y=\ngoogle.golang.org/genproto v0.0.0-20240213162025-012b6fc9bca9/go.mod h1:mqHbVIp48Muh7Ywss/AD6I5kNVKZMmAa/QEW58Gxp2s=\ngoogle.golang.org/genproto/googleapis/api v0.0.0-20240311132316-a219d84964c2 h1:rIo7ocm2roD9DcFIX67Ym8icoGCKSARAiPljFhh5suQ=\ngoogle.golang.org/genproto/googleapis/api v0.0.0-20240311132316-a219d84964c2/go.mod h1:O1cOfN1Cy6QEYr7VxtjOyP5AdAuR0aJ/MYZaaof623Y=\ngoogle.golang.org/genproto/googleapis/rpc v0.0.0-20240401170217-c3f982113cda h1:LI5DOvAxUPMv/50agcLLoo+AdWc1irS9Rzz4vPuD1V4=\ngoogle.golang.org/genproto/googleapis/rpc v0.0.0-20240401170217-c3f982113cda/go.mod h1:WtryC6hu0hhx87FDGxWCDptyssuo68sk10vYjF+T9fY=\ngoogle.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=\ngoogle.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38=\ngoogle.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM=\ngoogle.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=\ngoogle.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=\ngoogle.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=\ngoogle.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=\ngoogle.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=\ngoogle.golang.org/grpc v1.28.0/go.mod h1:rpkK4SK4GF4Ach/+MFLZUBavHOvF2JJB5uozKKal+60=\ngoogle.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk=\ngoogle.golang.org/grpc v1.30.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak=\ngoogle.golang.org/grpc v1.31.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak=\ngoogle.golang.org/grpc v1.31.1/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak=\ngoogle.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0=\ngoogle.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc=\ngoogle.golang.org/grpc v1.34.0/go.mod h1:WotjhfgOW/POjDeRt8vscBtXq+2VjORFy659qA51WJ8=\ngoogle.golang.org/grpc v1.35.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU=\ngoogle.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU=\ngoogle.golang.org/grpc v1.36.1/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU=\ngoogle.golang.org/grpc v1.37.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM=\ngoogle.golang.org/grpc v1.37.1/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM=\ngoogle.golang.org/grpc v1.38.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM=\ngoogle.golang.org/grpc v1.39.0/go.mod h1:PImNr+rS9TWYb2O4/emRugxiyHZ5JyHW5F+RPnDzfrE=\ngoogle.golang.org/grpc v1.39.1/go.mod h1:PImNr+rS9TWYb2O4/emRugxiyHZ5JyHW5F+RPnDzfrE=\ngoogle.golang.org/grpc v1.40.0/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34=\ngoogle.golang.org/grpc v1.40.1/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34=\ngoogle.golang.org/grpc v1.42.0/go.mod h1:k+4IHHFw41K8+bbowsex27ge2rCb65oeWqe4jJ590SU=\ngoogle.golang.org/grpc v1.62.1 h1:B4n+nfKzOICUXMgyrNd19h/I9oH0L1pizfk1d4zSgTk=\ngoogle.golang.org/grpc v1.62.1/go.mod h1:IWTG0VlJLCh1SkC58F7np9ka9mx/WNkjl4PGJaiq+QE=\ngoogle.golang.org/grpc/cmd/protoc-gen-go-grpc v1.1.0/go.mod h1:6Kw0yEErY5E/yWrBtf03jp27GLLJujG4z/JK95pnjjw=\ngoogle.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=\ngoogle.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=\ngoogle.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=\ngoogle.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=\ngoogle.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=\ngoogle.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=\ngoogle.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=\ngoogle.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=\ngoogle.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4=\ngoogle.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=\ngoogle.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=\ngoogle.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=\ngoogle.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=\ngoogle.golang.org/protobuf v1.35.2 h1:8Ar7bF+apOIoThw1EdZl0p1oWvMqTHmpA2fRTyZO8io=\ngoogle.golang.org/protobuf v1.35.2/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=\ngopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=\ngopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=\ngopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=\ngopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=\ngopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=\ngopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=\ngopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=\ngopkg.in/ini.v1 v1.66.2/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=\ngopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA=\ngopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=\ngopkg.in/warnings.v0 v0.1.2 h1:wFXVbFY8DY5/xOe1ECiWdKCzZlxgshcYVNkBHstARME=\ngopkg.in/warnings.v0 v0.1.2/go.mod h1:jksf8JmL6Qr/oQM2OXTHunEvvTAsrWBLb6OOjuVWRNI=\ngopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=\ngopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=\ngopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=\ngopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=\ngopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=\ngopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=\ngopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=\ngopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=\ngopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=\ngopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=\ngopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=\ngotest.tools/v3 v3.0.3 h1:4AuOwCGf4lLR9u3YOe2awrHygurzhO/HeQ6laiA6Sx0=\ngotest.tools/v3 v3.0.3/go.mod h1:Z7Lb0S5l+klDB31fvDQX8ss/FlKDxtlFlw3Oa8Ymbl8=\nhonnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=\nhonnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=\nhonnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=\nhonnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=\nhonnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg=\nhonnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=\nhonnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=\nmodernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6 h1:5D53IMaUuA5InSeMu9eJtlQXS2NxAhyWQvkKEgXZhHI=\nmodernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6/go.mod h1:Qz0X07sNOR1jWYCrJMEnbW/X55x206Q7Vt4mz6/wHp4=\nmodernc.org/libc v1.55.3 h1:AzcW1mhlPNrRtjS5sS+eW2ISCgSOLLNyFzRh/V3Qj/U=\nmodernc.org/libc v1.55.3/go.mod h1:qFXepLhz+JjFThQ4kzwzOjA/y/artDeg+pcYnY+Q83w=\nmodernc.org/mathutil v1.6.0 h1:fRe9+AmYlaej+64JsEEhoWuAYBkOtQiMEU7n/XgfYi4=\nmodernc.org/mathutil v1.6.0/go.mod h1:Ui5Q9q1TR2gFm0AQRqQUaBWFLAhQpCwNcuhBOSedWPo=\nmodernc.org/memory v1.8.0 h1:IqGTL6eFMaDZZhEWwcREgeMXYwmW83LYW8cROZYkg+E=\nmodernc.org/memory v1.8.0/go.mod h1:XPZ936zp5OMKGWPqbD3JShgd/ZoQ7899TUuQqxY+peU=\nmodernc.org/sqlite v1.33.1 h1:trb6Z3YYoeM9eDL1O8do81kP+0ejv+YzgyFo+Gwy0nM=\nmodernc.org/sqlite v1.33.1/go.mod h1:pXV2xHxhzXZsgT/RtTFAPY6JJDEvOTcTdwADQCCWD4k=\nmodernc.org/strutil v1.2.0 h1:agBi9dp1I+eOnxXeiZawM8F4LawKv4NzGWSaLfyeNZA=\nmodernc.org/strutil v1.2.0/go.mod h1:/mdcBmfOibveCTBxUl5B5l6W+TTH1FXPLHZE6bTosX0=\nmodernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y=\nmodernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM=\nrsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8=\nrsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0=\nrsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA=\nsigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY=\n"
  },
  {
    "path": "examples/sbom-meta/main.go",
    "content": "package main\n\nimport (\n\t\"context\"\n\t\"encoding/json\"\n\t\"flag\"\n\t\"fmt\"\n\t\"os\"\n\t\"sort\"\n\t\"strings\"\n\t\"time\"\n\n\t\"github.com/dustin/go-humanize\"\n\t\"github.com/fatih/color\"\n\t\"github.com/jedib0t/go-pretty/v6/table\"\n\t\"github.com/jmoiron/sqlx\"\n\t_ \"github.com/lib/pq\"\n\n\t\"github.com/anchore/syft/syft\"\n\t\"github.com/caarlos0/env\"\n)\n\ntype config struct {\n\tHost     string `env:\"CHAI_DB_HOST\" envDefault:\"localhost\"`\n\tUser     string `env:\"CHAI_DB_USER\" envDefault:\"postgres\"`\n\tPassword string `env:\"CHAI_DB_PASSWORD\" envDefault:\"s3cr3t\"`\n\tPort     int    `env:\"CHAI_DB_PORT\" envDefault:\"5435\"`\n}\n\ntype packageMeta struct {\n\tName           string    `db:\"name\" json:\"name\"`\n\tDownloads      int64     `db:\"downloads\" json:\"downloads\"`\n\tDependents     int64     `db:\"dependents\" json:\"dependents,omitempty\"`\n\tURL            string    `db:\"url\" json:\"url\"`\n\tFirstPublished time.Time `db:\"first_published\" json:\"firstPublished\"`\n\tLastPublished  time.Time `db:\"last_published\" json:\"lastPublished\"`\n}\n\nconst packageMetaFullSQL = `\nSELECT p.name,\ncount(d.id) AS dependents,\nsum(v.downloads) AS downloads,\nmin(u.url) AS url,\nmin(v.published_at) AS \"first_published\",\nmax(v.published_at) AS \"last_published\"\nFROM packages AS p\nJOIN dependencies AS d ON d.dependency_id = p.id\nJOIN versions v ON v.package_id = p.id\nJOIN package_urls pu ON pu.package_id = p.id\nJOIN urls u ON u.id = pu.url_id\nJOIN url_types ut ON u.url_type_id = ut.id\nWHERE ut.name = 'repository'\nAND p.name = $1\nGROUP BY p.name`\n\nconst packageMetaSQL = `\nSELECT p.name,\nsum(v.downloads) AS downloads,\nmin(u.url) AS url,\nmin(v.published_at) AS \"first_published\",\nmax(v.published_at) AS \"last_published\"\nFROM packages AS p\nJOIN versions v ON v.package_id = p.id\nJOIN package_urls pu ON pu.package_id = p.id\nJOIN urls u ON u.id = pu.url_id\nJOIN url_types ut ON u.url_type_id = ut.id\nWHERE ut.name = 'repository'\nAND p.name = $1\nGROUP BY p.name`\n\nfunc main() {\n\tvar sourcePath string\n\tvar cfg config\n\tvar jsonFlag = flag.Bool(\"json\", false, \"Output JSON\")\n\tvar sortFlag = flag.String(\"sort\", \"published,asc\", \"Sort by field,asc|desc\")\n\tflag.Usage = usage\n\tflag.Parse()\n\targs := flag.Args()\n\terr := env.Parse(&cfg)\n\tif err != nil {\n\t\tpanic(err)\n\t}\n\t// use the current directory if no source path is specified\n\tswitch len(args) {\n\tcase 0:\n\t\tsourcePath = \".\"\n\tcase 1:\n\t\tsourcePath = args[0]\n\tdefault:\n\t\tusage()\n\t\tos.Exit(1)\n\t}\n\tsortArg := strings.ToLower(*sortFlag)\n\n\t// connect to the chai db, defaulting to the docker-compose setup\n\tconnStr := fmt.Sprintf(\"postgresql://%s:%s@%s:%d/chai?sslmode=disable\", cfg.User, cfg.Password, cfg.Host, cfg.Port)\n\t// fmt.Printf(\"connecting to: %s\\n\", connStr)\n\tdb, err := sqlx.Open(\"postgres\", connStr)\n\tif err != nil {\n\t\tpanic(err)\n\t}\n\n\t// use syft to get the sbom\n\tsrc, err := syft.GetSource(context.Background(), sourcePath, nil)\n\tif err != nil {\n\t\tpanic(err)\n\t}\n\tsbom, err := syft.CreateSBOM(context.Background(), src, nil)\n\tif err != nil {\n\t\tpanic(err)\n\t}\n\tpms := []packageMeta{}\n\tfor p := range sbom.Artifacts.Packages.Enumerate() {\n\t\trs := []packageMeta{}\n\t\terr = db.Select(&rs, packageMetaSQL, p.Name)\n\t\tif err != nil {\n\t\t\tpanic(err)\n\t\t}\n\t\tfor _, pm := range rs {\n\t\t\tpms = append(pms, pm)\n\t\t}\n\t}\n\tpms = dedupePackages(pms)\n\n\tsort.Slice(pms, func(i, j int) bool {\n\t\tswitch sortArg {\n\t\tcase \"package\", \"package,asc\":\n\t\t\treturn pms[i].Name < pms[j].Name\n\t\tcase \"package,desc\":\n\t\t\treturn pms[i].Name > pms[j].Name\n\t\tcase \"repository\", \"repository,asc\":\n\t\t\treturn pms[i].URL < pms[j].URL\n\t\tcase \"repository,desc\":\n\t\t\treturn pms[i].URL > pms[j].URL\n\t\tcase \"published\", \"published,asc\":\n\t\t\treturn pms[i].LastPublished.After(pms[j].LastPublished)\n\t\tcase \"published,desc\":\n\t\t\treturn pms[i].LastPublished.Before(pms[j].LastPublished)\n\t\tcase \"downloads\", \"downloads,asc\":\n\t\t\treturn pms[i].Downloads < pms[j].Downloads\n\t\tcase \"downloads,desc\":\n\t\t\treturn pms[i].Downloads > pms[j].Downloads\n\t\tdefault:\n\t\t\treturn pms[i].Name < pms[j].Name\n\t\t}\n\t})\n\n\tif *jsonFlag {\n\t\tjs, err := json.Marshal(pms)\n\t\tif err != nil {\n\t\t\tpanic(err)\n\t\t}\n\t\tfmt.Printf(\"%s\", js)\n\t} else {\n\t\tprintPackagesMeta(pms)\n\t}\n}\n\nfunc printPackagesMeta(pms []packageMeta) {\n\tt := table.NewWriter()\n\tt.SetOutputMirror(os.Stdout)\n\tt.AppendHeader(table.Row{\"Package\", \"Repository\", \"Published\", \"Downloads\"})\n\tt.SetColumnConfigs([]table.ColumnConfig{\n\t\t{Name: \"Package\"},\n\t\t{Name: \"Repository\"},\n\t\t{Name: \"Published\", Transformer: formatTime},\n\t\t{Name: \"Downloads\", Transformer: formatNumber},\n\t})\n\tfor _, pm := range pms {\n\t\tp := color.New(color.FgHiGreen).Sprint(pm.Name)\n\t\tu := pm.URL\n\t\tt.Style().Options.DrawBorder = false\n\t\tt.AppendRow(table.Row{p, u, pm.LastPublished, pm.Downloads})\n\t}\n\tt.Render()\n}\n\nfunc formatTime(val interface{}) string {\n\tif t, ok := val.(time.Time); ok {\n\t\treturn humanize.Time(t)\n\t}\n\treturn \"Bad time format\"\n}\n\nfunc formatNumber(val interface{}) string {\n\tif n, ok := val.(int64); ok {\n\t\treturn humanize.Comma(n)\n\t}\n\treturn \"NaN\"\n}\n\nfunc dedupePackages(pms []packageMeta) []packageMeta {\n\tpns := make(map[string]bool)\n\tdd := []packageMeta{}\n\tfor _, pm := range pms {\n\t\tif _, v := pns[pm.Name]; !v {\n\t\t\tpns[pm.Name] = true\n\t\t\tdd = append(dd, pm)\n\t\t}\n\t}\n\treturn dd\n}\n\nfunc usage() {\n\tfmt.Println(\"sbom-meta [SOURCE]\")\n\tflag.PrintDefaults()\n}\n"
  },
  {
    "path": "examples/visualizer/README.md",
    "content": "# Visualizer\n\nAn example Chai application that displays a graphical representation of a specific\npackage.\n\n## Requirements\n\n1. [python]: version 3.11\n2. [pip]: Ensure you have pip installed\n3. [virtualenv]: It's recommended to use a virtual environment to manage dependencies\n\n## Getting Started\n\n1. Set up a virtual environment\n\n```sh\npython -m venv venv\nsource venv/bin/activate\n```\n\n2. Install required packages\n\n```sh\npip install -r requirements.txt\n```\n\n3. Ensure `CHAI_DATABASE_URL` is available as an environment variable. The default\n   value from our docker config is below:\n\n```sh\nexport CHAI_DATABASE_URL=postgresql://postgres:s3cr3t@localhost:5432/chai\n```\n\n## Usage\n\n1. Start the [Chai DB](https://github.com/teaxyz/chai-oss) with `docker compose up`.\n1. Run the visualizer:\n   ```sh\n   python main.py <package>\n   ```\n\n### Arguments\n\n- `--depth`: Maximum depth to go to. Default is `9999`, meaning all possible depths\n- `--profile`: Enable performance profiling. Default is `False`.\n\n## Share your visuals\n\nIf you create interesting visuals, share them on our [Discord]. Feel free to mess\naround and create alternate ways to generate them.\n\n[python]: https://www.python.org\n[pip]: https://pip.pypa.io/en/stable/installation/\n[virtualenv]: https://virtualenv.pypa.io/en/latest/\n[Discord]: https://discord.com/invite/tea-906608167901876256\n"
  },
  {
    "path": "examples/visualizer/main.py",
    "content": "import argparse\nimport cProfile\nimport pstats\nfrom os import getenv\nfrom pstats import SortKey\n\nimport psycopg2\nimport rustworkx as rx\nfrom rustworkx.visualization import graphviz_draw\nfrom tabulate import tabulate\n\nCHAI_DATABASE_URL = getenv(\"CHAI_DATABASE_URL\")\n\n\nclass Package:\n    id: str\n    name: str\n    pagerank: float\n    depth: int | None\n\n    def __init__(self, id: str):\n        self.id = id\n        self.name = \"\"\n        self.pagerank = 0\n        self.depth = None\n\n    def __str__(self):\n        return self.name\n\n\nclass Graph(rx.PyDiGraph):\n    def __init__(self, *args, **kwargs):\n        super().__init__(*args, **kwargs)\n        self.node_index_map: dict[Package, int] = {}\n        self._package_cache: dict[str, Package] = {}\n\n    # The data model has IDs, but rustworkx uses indexes\n    # Good news - it can index by object. So, we're just keeping track of that\n    def _get_or_create_package(self, pkg_id: str) -> Package:\n        \"\"\"A cache to avoid creating the same package multiple times\"\"\"\n        if pkg_id not in self._package_cache:\n            pkg = Package(pkg_id)\n            self._package_cache[pkg_id] = pkg\n        return self._package_cache[pkg_id]\n\n    def safely_add_node(self, pkg_id: str) -> int:\n        \"\"\"Adds a node to the graph if it doesn't already exist\"\"\"\n        pkg = self._get_or_create_package(pkg_id)\n        if pkg not in self.node_index_map:\n            index = super().add_node(pkg)\n            self.node_index_map[pkg] = index\n            return index\n        return self.node_index_map[pkg]\n\n    def safely_add_nodes(self, nodes: list[str]) -> list[int]:\n        return [self.safely_add_node(node) for node in nodes]\n\n    def pagerank(self) -> None:\n        pageranks = rx.pagerank(self)\n        for index in self.node_indexes():\n            self[index].pagerank = pageranks[index]\n\n    def nameless_nodes(self) -> list[str]:\n        return [self[i].id for i in self.node_indexes() if self[i].name == \"\"]\n\n    def max_depth(self) -> int:\n        return max([self[i].depth for i in self.node_indexes()])\n\n\nclass DB:\n    \"\"\"Prepares the sql statements and connects to the database\"\"\"\n\n    def __init__(self):\n        self.connect()\n        self.cursor.execute(\n            \"PREPARE select_id AS SELECT id FROM packages WHERE name = $1\"\n        )\n        self.cursor.execute(\n            \"PREPARE select_name AS SELECT id, name FROM packages WHERE id = ANY($1)\"\n        )\n        self.cursor.execute(\n            \"PREPARE select_deps AS \\\n            SELECT DISTINCT p.id, p.name, d.dependency_id FROM packages p \\\n            JOIN versions v ON p.id = v.package_id \\\n            JOIN dependencies d ON v.id = d.version_id \\\n            WHERE p.id = ANY($1)\"\n        )\n\n    def connect(self) -> None:\n        if not CHAI_DATABASE_URL:\n            raise RuntimeError(\"Environment variable CHAI_DATABASE_URL is not set.\")\n\n        try:\n            self.conn = psycopg2.connect(CHAI_DATABASE_URL)\n            self.cursor = self.conn.cursor()\n        except psycopg2.OperationalError as e:\n            raise RuntimeError(f\"Failed to connect to the database: {e}\") from e\n\n    def select_id(self, package: str) -> int:\n        self.cursor.execute(\"EXECUTE select_id (%s)\", (package,))\n        return self.cursor.fetchone()[0]\n\n    def select_deps(self, ids: list[str]) -> dict[str, dict[str, str | set[str]]]:\n        # NOTE: this might be intense for larger package managers\n        # NOTE: I have to cast the list to a uuid[] for psycopg2 to correctly handle it\n        self.cursor.execute(\"EXECUTE select_deps (%s::uuid[])\", (ids,))\n        flat = self.cursor.fetchall()\n        # now, return this as a map capturing the package name and its dependencies\n        result = {}\n        for pkg_id, pkg_name, dep_id in flat:\n            # add the package if it doesn't already exist in result\n            if pkg_id not in result:\n                result[pkg_id] = {\"name\": pkg_name, \"dependencies\": set()}\n            # add the dependency to the dependencies set\n            result[pkg_id][\"dependencies\"].add(dep_id)\n\n        return result\n\n    def select_name(self, ids: list[str]) -> list[tuple[str, str]]:\n        self.cursor.execute(\"EXECUTE select_name (%s::uuid[])\", (ids,))\n        return self.cursor.fetchall()\n\n\ndef larger_query(db: DB, root_package: str, max_depth: int) -> Graph:\n    graph = Graph()\n    visited = set()\n    leafs = set()\n\n    # above sets will use the id of the package\n    root_id = db.select_id(root_package)\n    leafs.add(root_id)\n    depth = 0\n\n    while leafs - visited:\n        query = list(leafs - visited)\n        dependencies = db.select_deps(query)\n\n        # Increment the depth, and get out if too much\n        depth += 1\n        if depth > max_depth:\n            # Set the depth for the remaining leafs\n            for pkg_id in query:\n                i = graph.safely_add_node(pkg_id)\n                graph[i].depth = depth\n            break\n\n        for pkg_id in query:\n            i = graph.safely_add_node(pkg_id)\n\n            # Have we encountered this node before? If not, set the depth\n            if graph[i].depth is None:\n                graph[i].depth = depth\n\n            if pkg_id in dependencies:\n                graph[i].name = dependencies[pkg_id][\"name\"]\n                js = graph.safely_add_nodes(dependencies[pkg_id][\"dependencies\"])\n                edges = [(i, j, None) for j in js]\n                graph.add_edges_from(edges)\n                leafs.update(dependencies[pkg_id][\"dependencies\"])\n\n        visited.update(query)\n\n    # Add the names for the packages that don't have dependencies\n    nameless_nodes = graph.nameless_nodes()\n    names = db.select_name(nameless_nodes)\n    for pkg_id, pkg_name in names:\n        i = graph.safely_add_node(pkg_id)\n        graph[i].name = pkg_name\n\n    return graph\n\n\ndef display(graph: Graph):\n    sorted_nodes = sorted(graph.node_indexes(), key=lambda x: graph[x].depth)\n    headers = [\"Package\", \"First Depth\", \"Dependencies\", \"Dependents\", \"Pagerank\"]\n    data = []\n\n    for node in sorted_nodes:\n        data.append(\n            [\n                graph[node],\n                graph[node].depth,\n                graph.out_degree(node),\n                graph.in_degree(node),\n                graph[node].pagerank,\n            ]\n        )\n\n    print(tabulate(data, headers=headers, floatfmt=\".8f\", intfmt=\",\"))\n\n\ndef draw(graph: Graph, package: str, img_type: str = \"svg\"):\n    ALLOWABLE_FILE_TYPES = [\"svg\", \"png\"]\n    if img_type not in ALLOWABLE_FILE_TYPES:\n        raise ValueError(f\"file type must be one of {ALLOWABLE_FILE_TYPES}\")\n\n    max_depth = graph.max_depth()\n    total_nodes = graph.num_nodes()\n    total_edges = graph.num_edges()\n\n    def depth_to_grayscale(depth: int) -> str:\n        \"\"\"Convert depth to a grayscale color.\"\"\"\n        if depth == 1:\n            return \"red\"\n        return f\"gray{depth + 10 + (depth - 1) // 9}\"\n\n    # Unused because I don't visualize edges\n    def color_edge(edge):\n        out_dict = {\n            \"color\": \"lightgrey\",\n            \"fillcolor\": \"lightgrey\",\n            \"penwidth\": \"0.05\",\n            \"arrowsize\": \"0.05\",\n            \"arrowhead\": \"tee\",\n        }\n        return out_dict\n\n    def color_node(node: Package):\n        scale = 20\n\n        def label_nodes(node: Package):\n            if node.pagerank > 0.01:\n                return f\"{node.name}\"\n            return \"\"\n\n        def size_center_node(node: Package):\n            if node.depth == 1:\n                return \"1\"\n            return str(node.pagerank * scale)\n\n        out_dict = {\n            \"label\": label_nodes(node),\n            \"fontsize\": \"5\",\n            \"fontcolor\": \"gray\",\n            \"fontname\": \"Menlo\",\n            \"color\": depth_to_grayscale(node.depth),\n            \"shape\": \"circle\",\n            \"style\": \"filled\",\n            \"fixedsize\": \"True\",\n            \"width\": size_center_node(node),\n            \"height\": size_center_node(node),\n        }\n        return out_dict\n\n    label = f\"<{package} (big red dot) <br/>depth: {max_depth} <br/>nodes: {total_nodes!s} <br/>edges: {total_edges!s}>\"\n    graph_attr = {\n        \"beautify\": \"True\",\n        \"splines\": \"none\",\n        \"overlap\": \"0\",\n        \"label\": label,\n        \"labelloc\": \"t\",\n        \"labeljust\": \"l\",\n        \"fontname\": \"Menlo\",\n    }\n\n    graphviz_draw(\n        graph,\n        node_attr_fn=color_node,\n        edge_attr_fn=color_edge,\n        graph_attr=graph_attr,\n        method=\"twopi\",  # NOTE: sfdp works as well\n        filename=f\"{package}.{img_type}\",\n        image_type=img_type,\n    )\n\n\ndef latest(db: DB, package: str, depth: int, img_type: str):\n    G = larger_query(db, package, depth)\n    G.pagerank()\n    display(G)\n    draw(G, package, img_type)\n    print(\"✅ Saved image\")\n\n\nif __name__ == \"__main__\":\n    db = DB()\n\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\"package\", help=\"The package to visualize\", type=str)\n    parser.add_argument(\n        \"--depth\", help=\"Maximum depth to go to\", type=int, default=9999\n    )\n    parser.add_argument(\n        \"--profile\", help=\"Performance!\", action=\"store_true\", default=False\n    )\n    parser.add_argument(\n        \"--image-type\",\n        help=\"The file type to save the image as\",\n        type=str,\n        default=\"svg\",\n    )\n    args = parser.parse_args()\n    package = args.package\n    depth = args.depth\n    profile = args.profile\n    img_type = args.image_type\n\n    if profile:\n        profiler = cProfile.Profile()\n        profiler.enable()\n\n    latest(db, package, depth, img_type)\n\n    if profile:\n        profiler.disable()\n        stats = pstats.Stats(profiler).sort_stats(SortKey.TIME)\n        stats.print_stats()\n"
  },
  {
    "path": "examples/visualizer/monitor.py",
    "content": "import argparse\nimport time\nfrom collections import defaultdict\nfrom collections.abc import Callable\nfrom functools import wraps\n\nfrom main import DB, latest\n\nMETRICS: list[str] = [\n    \"total_execution_time\",\n    \"query_count\",\n    \"total_query_time\",\n    \"non_query_time\",\n]\n\n\nclass Result:\n    def __init__(self, **kwargs):\n        for metric in self.METRICS:\n            setattr(self, metric, kwargs[metric])\n\n    def __str__(self):\n        return \"\\n\".join(\n            f\"{metric}: {getattr(self, metric):.3f}s\"\n            if metric != \"query_count\"  # I don't like this\n            else f\"{metric}: {getattr(self, metric)}\"\n            for metric in self.METRICS\n        )\n\n\nclass MonitoredDB(DB):\n    \"\"\"Base monitoring wrapper for DB classes\"\"\"\n\n    def __init__(self):\n        self.query_count = 0\n        self.total_query_time = 0\n        super().__init__()\n\n    def _monitor_query(self, func: Callable) -> Callable:\n        @wraps(func)\n        def wrapper(*args, **kwargs):\n            self.query_count += 1\n            start_time = time.perf_counter()\n            result = func(*args, **kwargs)\n            self.total_query_time += time.perf_counter() - start_time\n            return result\n\n        return wrapper\n\n    def connect(self):\n        super().connect()\n        # and wrap all the methods with monitoring\n        for name in dir(self):\n            if name.startswith(\"select_\"):\n                setattr(self, name, self._monitor_query(getattr(self, name)))\n\n\ndef run_monitored(func: Callable, package: str) -> Result:\n    \"\"\"Run the main program with monitoring\"\"\"\n    db = MonitoredDB()\n    start_time = time.perf_counter()\n    func(db, package)\n    total_time = time.perf_counter() - start_time\n\n    return Result(\n        total_execution_time=total_time,\n        query_count=db.query_count,\n        total_query_time=db.total_query_time,\n        non_query_time=total_time - db.total_query_time,\n    )\n\n\ndef compare_implementations(package: str, runs: int = 3) -> dict[str, list[Result]]:\n    \"\"\"Compare old and new implementations\"\"\"\n    implementations = [latest]\n    results: dict[str, list[Result]] = defaultdict(list)\n\n    for i in range(runs):\n        print(f\"\\nRun {i + 1}/{runs}\")\n        for func in implementations:\n            func_name = func.__name__\n            print(f\"Running {func_name}...\")\n            result = run_monitored(func, package)\n            results[func_name].append(result)\n\n    return results\n\n\ndef compare_results(results: dict[str, list[Result]], runs: int) -> None:\n    implementations = list(results.keys())\n\n    print(\"\\nResults Comparison:\")\n    print(\"-\" * (25 + 20 * len(implementations)))\n\n    # Header row with implementation names\n    print(f\"{'Metric':<25}\", end=\"\")\n    for impl in implementations:\n        print(f\"{impl:>20}\", end=\"\")\n    print()\n    print(\"-\" * (25 + 20 * len(implementations)))\n\n    # Data rows\n    for metric in Result.METRICS:\n        print(f\"{metric:<25}\", end=\"\")\n        for impl in implementations:\n            avg = sum(getattr(r, metric) for r in results[impl]) / runs\n            if metric == \"query_count\":\n                print(f\"{avg:>20.0f}\", end=\"\")\n            else:\n                print(f\"{avg:>20.3f}s\", end=\"\")\n        print()\n\n    # Calculate improvements relative to first implementation\n    print(\"-\" * (25 + 20 * len(implementations)))\n    base_time = sum(r.total_execution_time for r in results[implementations[0]]) / runs\n    for impl in implementations[1:]:\n        new_time = sum(r.total_execution_time for r in results[impl]) / runs\n        improvement = ((base_time - new_time) / base_time) * 100\n        print(f\"Improvement ({impl} vs {implementations[0]}): {improvement:>+.1f}%\")\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\"--package\", help=\"The package to visualize\")\n    parser.add_argument(\n        \"--profile\",\n        help=\"Whether to profile the code\",\n        action=\"store_true\",\n        default=False,\n    )\n    parser.add_argument(\"--runs\", type=int, default=3, help=\"Number of runs to average\")\n    args = parser.parse_args()\n\n    results = compare_implementations(args.package, args.runs)\n    compare_results(results, args.runs)\n"
  },
  {
    "path": "package_managers/crates/Dockerfile",
    "content": "FROM ghcr.io/astral-sh/uv:python3.11-bookworm-slim\n\n# Copy everything from the root directory (build context)\nCOPY . .\n\n# Install core requirements using uv\nWORKDIR /core\nRUN uv pip install --system -r requirements.txt\n\nWORKDIR /\n\n# Run the main application\nCMD [\"python\", \"/package_managers/crates/main.py\"]\n"
  },
  {
    "path": "package_managers/crates/README.md",
    "content": "# crates\n\nThe crates service uses the database dump provided by crates.io and coerces their data\nmodel into CHAI's. It's containerized using Docker for easy deployment and consistency.\nIt's also written in `python` as a first draft, and uses a lot of the\n[core tools](../../core/).\n\n## Getting Started\n\nTo just run the crates service, you can use the following commands:\n\n```bash\ndocker compose build crates\ndocker compose run crates\n```\n\n## Execution Steps\n\nThe crates loader goes through the following steps when executed:\n\n1. **Initialization**: The loader starts by initializing the configuration and database\n   connection using `Config` and `CratesDB`.\n2. **Fetching**: If the `FETCH` flag is set to true, the loader downloads the latest\n   cargo data from the source using `TarballFetcher`. If needed, it saves to disk.\n3. **Transformation**: The downloaded data is parsed and transformed using\n   `CratesTransformer.parse()` into a format compatible with the CHAI database schema.\n4. **Deletion**: The loader identifies crates that exist in the database\n   but are no longer in the registry (crates.io allows deletion _sometimes_).\n5. **Cache Building**: The loader builds a cache by setting the current graph and URLs\n   from the database, then creates a `Cache` object for efficient diffing.\n6. **Diff Process**: The loader performs a diff operation to categorize data into:\n   - New packages vs updated packages\n   - New URLs vs existing URLs\n   - New package URLs vs updated package URLs\n   - New dependencies vs removed dependencies\n7. **Data Ingestion**: All categorized data is loaded into the database via a single\n   `db.ingest()` call.\n\nThe main execution logic is in the `main` function in [main.py](main.py):\n\n```python\ndef main(config: Config, db: CratesDB):\n    logger = Logger(\"crates_main\")\n    logger.log(\"Starting crates_main\")\n\n    # fetch, write, transform\n    if config.exec_config.fetch:\n        fetcher = TarballFetcher(...)\n        files = fetcher.fetch()\n    if not config.exec_config.no_cache:\n        fetcher.write(files)\n\n    transformer = CratesTransformer(config)\n    transformer.parse()\n\n    # identify and handle deletions\n    deletions = identify_deletions(transformer, db)\n    if deletions:\n        db.delete_packages_by_import_id(deletions)\n\n    # build cache and diff\n    db.set_current_graph()\n    db.set_current_urls(crates_urls)\n    cache = Cache(...)\n\n    # perform diff and ingest\n    diff = Diff(config, cache)\n    # ... diff process ...\n    db.ingest(new_packages, final_new_urls, new_package_urls,\n              new_deps, removed_deps, updated_packages, updated_package_urls)\n```\n\n### Configuration Flags\n\nThe crates loader supports several configuration flags:\n\n- `DEBUG`: Enables debug logging when set to true.\n- `TEST`: Runs the loader in test mode when set to true, skipping certain data insertions.\n- `FETCH`: Determines whether to fetch new data from the source when set to true.\n- `FREQUENCY`: Sets how often (in hours) the pipeline should run.\n- `NO_CACHE`: When set to true, deletes temporary files after processing.\n\nThese flags can be set in the `docker-compose.yml` file:\n\n```yaml\ncrates:\n  build:\n    context: .\n    dockerfile: ./package_managers/crates/Dockerfile\n  environment:\n    - CHAI_DATABASE_URL=postgresql://postgres:s3cr3t@db:5432/chai\n    - PYTHONPATH=/\n    - DEBUG=${DEBUG:-false}\n    - TEST=${TEST:-false}\n    - FETCH=${FETCH:-true}\n    - FREQUENCY=${FREQUENCY:-24}\n    - NO_CACHE=${NO_CACHE:-false}\n```\n\n## TODOs\n\n- [ ] `versions.csv` contains all the `published_by` ids, who are the users, who'd need to\n      be loaded as well\n- [ ] `versions.csv` also contains licenses\n"
  },
  {
    "path": "package_managers/crates/db.py",
    "content": "from uuid import UUID\n\nfrom sqlalchemy import select\n\nfrom core.config import Config\nfrom core.db import DB\nfrom core.models import (\n    CanonPackage,\n    DependsOn,\n    LegacyDependency,\n    Package,\n    PackageURL,\n    UserPackage,\n    UserVersion,\n    Version,\n)\nfrom core.structs import CurrentGraph, CurrentURLs\n\n\nclass CratesDB(DB):\n    def __init__(self, config: Config):\n        super().__init__(\"crates_db\")\n        self.config = config\n        # self.set_current_graph()\n\n    def set_current_graph(self) -> None:\n        self.graph: CurrentGraph = self.current_graph(self.config.pm_config.pm_id)\n\n    def set_current_urls(self, urls: set[str]) -> None:\n        self.urls: CurrentURLs = self.current_urls(urls)\n\n    def delete_packages_by_import_id(self, import_ids: set[int]) -> None:\n        \"\"\"\n        Delete packages identified by import_ids and all their dependent records.\n        This is a DB class method to handle the cascade deletion properly.\n        \"\"\"\n\n        # Convert import_ids to package_ids using the cache\n        package_ids: list[UUID] = []\n        for import_id in import_ids:\n            pkg_id = self.import_id_map.get(str(import_id))\n            if pkg_id:\n                package_ids.append(pkg_id)\n\n        if not package_ids:\n            self.logger.debug(\"No packages found to delete\")\n            return\n\n        self.logger.debug(f\"Deleting {len(package_ids)} crates completely\")\n\n        # Delete records in reverse dependency order\n        with self.session() as session:\n            try:\n                # 1. Delete PackageURLs\n                package_urls_deleted = (\n                    session.query(PackageURL)\n                    .filter(PackageURL.package_id.in_(package_ids))\n                    .delete(synchronize_session=False)\n                )\n\n                # 2. Delete CanonPackages\n                canon_packages_deleted = (\n                    session.query(CanonPackage)\n                    .filter(CanonPackage.package_id.in_(package_ids))\n                    .delete(synchronize_session=False)\n                )\n\n                # 3. Delete UserPackages\n                user_packages_deleted = (\n                    session.query(UserPackage)\n                    .filter(UserPackage.package_id.in_(package_ids))\n                    .delete(synchronize_session=False)\n                )\n\n                # 4. Delete LegacyDependencies (both package_id and dependency_id)\n                legacy_deps_package_deleted = (\n                    session.query(LegacyDependency)\n                    .filter(LegacyDependency.package_id.in_(package_ids))\n                    .delete(synchronize_session=False)\n                )\n\n                legacy_deps_dependency_deleted = (\n                    session.query(LegacyDependency)\n                    .filter(LegacyDependency.dependency_id.in_(package_ids))\n                    .delete(synchronize_session=False)\n                )\n\n                # TODO: this table is deprecated, but still contains records\n                # we can remove this line, once all indexers use LegacyDependencies\n                # 5. Delete DependsOn where dependency_id is in package_ids\n                depends_on_deleted = (\n                    session.query(DependsOn)\n                    .filter(DependsOn.dependency_id.in_(package_ids))\n                    .delete(synchronize_session=False)\n                )\n\n                # 6. Delete Versions and their dependencies\n                # TODO: remove this line once all indexers stop using Versions and\n                # we can truncate this table\n                # First get all version ids for these packages\n                version_ids = [\n                    vid\n                    for (vid,) in session.query(Version.id).filter(\n                        Version.package_id.in_(package_ids)\n                    )\n                ]\n\n                # Delete dependencies attached to these versions\n                version_deps_deleted = 0\n                user_versions_deleted = 0\n                if version_ids:\n                    version_deps_deleted = (\n                        session.query(DependsOn)\n                        .filter(DependsOn.version_id.in_(version_ids))\n                        .delete(synchronize_session=False)\n                    )\n\n                    user_versions_deleted = (\n                        session.query(UserVersion)\n                        .filter(UserVersion.version_id.in_(version_ids))\n                        .delete(synchronize_session=False)\n                    )\n\n                # Now delete the versions\n                versions_deleted = (\n                    session.query(Version)\n                    .filter(Version.package_id.in_(package_ids))\n                    .delete(synchronize_session=False)\n                )\n\n                # 7. Finally delete the packages\n                packages_deleted = (\n                    session.query(Package)\n                    .filter(Package.id.in_(package_ids))\n                    .delete(synchronize_session=False)\n                )\n\n                self.logger.debug(\"-\" * 100)\n                self.logger.debug(\"Going to commit delete for\")\n                self.logger.debug(f\"{packages_deleted} packages\")\n                self.logger.debug(f\"{versions_deleted} versions\")\n                self.logger.debug(f\"{version_deps_deleted} version dependencies\")\n                self.logger.debug(f\"{user_versions_deleted} user versions\")\n                self.logger.debug(f\"{depends_on_deleted} direct dependencies\")\n                self.logger.debug(\n                    f\"{legacy_deps_package_deleted + legacy_deps_dependency_deleted} legacy deps\"  # E501\n                )\n                self.logger.debug(f\"{user_packages_deleted} user packages\")\n                self.logger.debug(f\"{canon_packages_deleted} canon packages\")\n                self.logger.debug(f\"{package_urls_deleted} package URLs\")\n                self.logger.debug(\"-\" * 100)\n\n                # Commit the transaction\n                session.commit()\n\n            except Exception as e:\n                session.rollback()\n                self.logger.error(f\"Error deleting packages: {e}\")\n                raise\n\n    def get_cargo_id_to_chai_id(self) -> dict[str, UUID]:\n        \"\"\"\n        Returns a map of cargo import_ids to chai_ids\n        \"\"\"\n        with self.session() as session:\n            stmt = select(Package.import_id, Package.id).where(\n                Package.package_manager_id == self.config.pm_config.pm_id\n            )\n            self.import_id_map: dict[str, UUID] = {\n                row[0]: row[1] for row in session.execute(stmt).all()\n            }\n            return self.import_id_map\n"
  },
  {
    "path": "package_managers/crates/diff.py",
    "content": "from datetime import datetime\nfrom uuid import UUID, uuid4\n\nfrom core.config import Config\nfrom core.logger import Logger\nfrom core.models import URL, LegacyDependency, Package, PackageURL\nfrom core.structs import Cache, URLKey\nfrom package_managers.crates.structs import Crate, DependencyType\n\n\nclass Diff:\n    def __init__(self, config: Config, caches: Cache):\n        self.config = config\n        self.now = datetime.now()\n        self.caches = caches\n        self.logger = Logger(\"crates_diff\")\n\n    def diff_pkg(self, pkg: Crate) -> tuple[UUID, Package | None, dict | None]:\n        \"\"\"\n        Checks if the given pkg is in the package_cache.\n\n        Returns:\n            pkg_id: UUID, the id of the package in the db\n            pkg_obj: Package | None, the package object if it's new\n            update_payload: dict | None, the update payload if it's an update\n        \"\"\"\n        pkg_id: UUID\n        crate_id: str = str(pkg.id)  # import_ids are strings in the db\n        if crate_id not in self.caches.package_map:\n            # new package\n            p = Package(\n                id=uuid4(),\n                derived_id=f\"crates/{pkg.name}\",\n                name=pkg.name,\n                package_manager_id=self.config.pm_config.pm_id,\n                import_id=crate_id,\n                readme=pkg.readme,\n                created_at=self.now,\n                updated_at=self.now,\n            )\n            pkg_id = p.id\n            return pkg_id, p, {}\n        else:\n            # it's in the cache, so check for changes\n            p = self.caches.package_map[crate_id]\n            pkg_id = p.id\n            # check for changes\n            # right now, that's just the readme\n            if p.readme != pkg.readme:\n                return (\n                    pkg_id,\n                    None,\n                    {\"id\": p.id, \"readme\": pkg.readme, \"updated_at\": self.now},\n                )\n            else:\n                # existing package, no change\n                return pkg_id, None, None\n\n    def diff_url(self, pkg: Crate, new_urls: dict[URLKey, URL]) -> dict[UUID, UUID]:\n        \"\"\"\n        Identifies the correct URL for this crate, based on fetched data and all URL\n        strings collected so far\n\n        Returns:\n            resolved_urls: dict[UUID, UUID], the resolved URL for this crate\n        \"\"\"\n        resolved_urls: dict[UUID, UUID] = {}\n\n        urls: list[URLKey] = [\n            URLKey(pkg.homepage, self.config.url_types.homepage),\n            URLKey(pkg.repository, self.config.url_types.repository),\n            URLKey(pkg.documentation, self.config.url_types.documentation),\n        ] + ([URLKey(pkg.source, self.config.url_types.source)] if pkg.source else [])\n\n        for url_key in urls:\n            url = url_key.url\n            url_type = url_key.url_type_id\n\n            # guard: no URL\n            if not url:\n                continue\n\n            resolved_url_id: UUID\n\n            if url_key in new_urls:\n                # if we've already tried to create this URL, use that one\n                resolved_url_id = new_urls[url_key].id\n            elif url_key in self.caches.url_map:\n                # if it's already in the database, let's use that one\n                resolved_url_id = self.caches.url_map[url_key].id\n            else:\n                # most will be here because it's the first run of clean data\n                new_url = URL(\n                    id=uuid4(),\n                    url=url,\n                    url_type_id=url_type,\n                    created_at=self.now,\n                    updated_at=self.now,\n                )\n                resolved_url_id = new_url.id\n\n                # NOTE: THIS IS SUPER IMPORTANT\n                # we're adding to new_urls here, not just in main\n                new_urls[url_key] = new_url\n\n            resolved_urls[url_type] = resolved_url_id\n\n        return resolved_urls\n\n    def diff_pkg_url(\n        self, pkg_id: UUID, resolved_urls: dict[UUID, UUID]\n    ) -> tuple[list[PackageURL], list[dict]]:\n        \"\"\"Takes in a package_id and resolved URLs from diff_url, and generates\n        new PackageURL objects as well as a list of changes to existing ones\n\n        Inputs:\n          - pkg_id: the id of the package\n          - resolved_urls: a map of url types to final URL ID for this pkg\n\n        Outputs:\n          - new_package_urls: a list of new PackageURL objects\n          - updated_package_urls: a list of changes to existing PackageURL objects\n\n        TODO:\n          - We're updating every single package_url entity, which takes time. We should\n            check if the latest URL has changed, and if so, only update that one.\n        \"\"\"\n        new_links: list[PackageURL] = []\n        updates: list[dict] = []\n\n        # what are the existing links?\n        existing: set[UUID] = {\n            pu.url_id for pu in self.caches.package_urls.get(pkg_id, set())\n        }\n\n        # for the correct URL type / URL for this package:\n        for _url_type, url_id in resolved_urls.items():\n            if url_id not in existing:\n                # new link!\n                new_links.append(\n                    PackageURL(\n                        id=uuid4(),\n                        package_id=pkg_id,\n                        url_id=url_id,\n                        created_at=self.now,\n                        updated_at=self.now,\n                    )\n                )\n            else:\n                # TODO: this should only happen for `latest` URLs\n                # there is an existing link between this URL and this package\n                # let's find it\n                existing_pu = next(\n                    pu for pu in self.caches.package_urls[pkg_id] if pu.url_id == url_id\n                )\n                existing_pu.updated_at = self.now\n                updates.append({\"id\": existing_pu.id, \"updated_at\": self.now})\n\n        return new_links, updates\n\n    def diff_deps(\n        self, pkg: Crate\n    ) -> tuple[list[LegacyDependency], list[LegacyDependency]]:\n        \"\"\"\n        Identifies new and removed dependencies for a given crate\n\n        The process is:\n           1. Build a view of what the package's dependencies are according to\n              the crates.io database.\n           2. Get this crate's Package ID from CHAI\n           3. Get this crate's existing dependencies from CHAI\n           4. Compare the two sets, and identify new and removed dependencies\n\n        Note: The database has a unique constraint on (package_id, dependency_id),\n        so if a package depends on the same dependency with multiple types (e.g.,\n        both runtime and build), we choose the highest priority type:\n        NORMAL (runtime) > BUILD > DEV\n\n        Returns:\n            new_deps: list[LegacyDependency], the new dependencies\n            removed_deps: list[LegacyDependency], the removed dependencies\n        \"\"\"\n        new_deps: list[LegacyDependency] = []\n        removed_deps: list[LegacyDependency] = []\n\n        # First, collect all dependencies and deduplicate by (package_id, dependency_id)\n        # choosing the highest priority dependency type for each unique dependency\n        dependency_map: dict[UUID, DependencyType] = {}\n\n        # Priority order: NORMAL (runtime) > BUILD > DEV\n        priority_order = {\n            DependencyType.NORMAL: 1,\n            DependencyType.BUILD: 2,\n            DependencyType.DEV: 3,\n        }\n\n        # Build the map of dependencies, keeping only the highest priority type\n        if pkg.latest_version:\n            for dependency in pkg.latest_version.dependencies:\n                dep_crate_id: str = str(dependency.dependency_id)\n                dep_type: DependencyType = dependency.dependency_type\n\n                # guard: no dep_id\n                if not dep_crate_id:\n                    raise ValueError(f\"No dep_id for {dependency}\")\n\n                # guard: no dep_type\n                if dep_type is None:\n                    raise ValueError(f\"No dep_type for {dependency}\")\n\n                # get the ID from the cache\n                dependency_pkg = self.caches.package_map.get(dep_crate_id)\n\n                # if we don't have the dependency, skip it for now\n                if not dependency_pkg:\n                    self.logger.debug(\n                        f\"{dep_crate_id}, dependency of {pkg.name} is new\"\n                    )\n                    continue\n\n                dependency_id = dependency_pkg.id\n\n                # If this dependency already exists in our map, choose higher priority\n                if dependency_id in dependency_map:\n                    existing_priority = priority_order.get(\n                        dependency_map[dependency_id], 999\n                    )\n                    new_priority = priority_order.get(dep_type, 999)\n\n                    if (\n                        new_priority < existing_priority\n                    ):  # Lower number = higher priority\n                        old_type = dependency_map[dependency_id]\n                        dependency_map[dependency_id] = dep_type\n                        self.logger.debug(\n                            f\"Updated dependency type for {dep_crate_id} from \"\n                            f\"{old_type} to {dep_type} (higher priority)\"\n                        )\n                else:\n                    dependency_map[dependency_id] = dep_type\n\n        # Now build the actual set of dependencies with resolved types\n        actual: set[tuple[UUID, UUID]] = set()\n        for dependency_id, dep_type in dependency_map.items():\n            # figure out the dependency type UUID\n            dependency_type = self._resolve_dep_type(dep_type)\n            # add it to the set of actual dependencies\n            actual.add((dependency_id, dependency_type))\n\n        # establish the package that we are working with\n        crate_id: str = str(pkg.id)\n        package = self.caches.package_map.get(crate_id)\n        if not package:\n            # TODO: handle this case, though it fixes itself on the next run\n            self.logger.debug(f\"New package {pkg.name}, will grab its deps next time\")\n            return [], []\n\n        pkg_id: UUID = package.id\n\n        # what are its existing dependencies?\n        # specifically, existing dependencies IN THE SAME STRUCTURE as `actual`,\n        # so we can do an easy comparison\n        existing: set[tuple[UUID, UUID]] = {\n            (dep.dependency_id, dep.dependency_type_id)\n            for dep in self.caches.dependencies.get(pkg_id, set())\n        }\n\n        # we have two sets!\n        # actual minus existing = new_deps\n        # existing minus actual = removed_deps\n        new = actual - existing\n        removed = existing - actual\n\n        new_deps: list[LegacyDependency] = [\n            LegacyDependency(\n                # don't include the ID because it's a sequence for this table\n                package_id=pkg_id,\n                dependency_id=dep[0],\n                dependency_type_id=dep[1],\n                created_at=self.now,\n                updated_at=self.now,\n            )\n            for dep in new\n        ]\n\n        # get the existing legacy dependency, and add it to removed_deps\n        removed_deps: list[LegacyDependency] = []\n        cache_deps: set[LegacyDependency] = self.caches.dependencies.get(pkg_id, set())\n        for removed_dep_id, removed_dep_type in removed:\n            try:\n                existing_dep = next(\n                    dep\n                    for dep in cache_deps\n                    if dep.dependency_id == removed_dep_id\n                    and dep.dependency_type_id == removed_dep_type\n                )\n\n                removed_deps.append(existing_dep)\n            except StopIteration as exc:\n                cache_deps_str = \"\\n\".join(\n                    [\n                        f\"{dep.dependency_id} / {dep.dependency_type_id}\"\n                        for dep in cache_deps\n                    ]\n                )\n                raise ValueError(\n                    f\"Removing {removed_dep_id} / {removed_dep_type} for {pkg_id} but not in Cache: \\n{cache_deps_str}\"\n                ) from exc\n\n        return new_deps, removed_deps\n\n    def _resolve_dep_type(self, dep_type: DependencyType) -> UUID:\n        \"\"\"\n        Resolves the dependency type UUID from the config\n        \"\"\"\n        if dep_type == DependencyType.NORMAL:\n            return self.config.dependency_types.runtime\n        elif dep_type == DependencyType.BUILD:\n            return self.config.dependency_types.build\n        elif dep_type == DependencyType.DEV:\n            return self.config.dependency_types.development\n        else:\n            raise ValueError(f\"Unknown dependency type: {dep_type}\")\n"
  },
  {
    "path": "package_managers/crates/main.py",
    "content": "from uuid import UUID\n\nfrom core.config import Config, PackageManager\nfrom core.fetcher import TarballFetcher\nfrom core.logger import Logger\nfrom core.models import (\n    URL,\n    LegacyDependency,\n    Package,\n    PackageURL,\n)\nfrom core.structs import Cache, URLKey\nfrom package_managers.crates.db import CratesDB\nfrom package_managers.crates.diff import Diff\nfrom package_managers.crates.transformer import CratesTransformer\n\n\ndef identify_deletions(transformer: CratesTransformer, db: CratesDB) -> set[int]:\n    \"\"\"\n    Identifies crates that are in the db but not in the transformer\n\n    Cargo enables deletion of crates from the registry, if:\n      - the crate has been published for less than 72 hours\n      - the crate only has a single owner\n      - the crate has been downloaded less than 500 times for each month it has been\n      - the crate is not depended upon by any other crate on crates.io\n\n    The risk is that the namespace for an invalid import_id is now available, and\n    might be taken by a new crate, which would violate our uniqueness constraint on\n    derived_id\n\n    Returns:\n      - a set of import_ids that are in the db but not in the transformer\n\n    References:\n      - https://crates.io/policies\n      - https://rurust.github.io/cargo-docs-ru/policies.html\n    \"\"\"\n    logger = Logger(\"crates_identify_deletions\")\n\n    # db needs to know the cargo id to chai id\n    cargo_id_to_chai_id: dict[str, UUID] = db.get_cargo_id_to_chai_id()\n\n    transformer_import_ids: set[int] = {int(c.id) for c in transformer.crates.values()}\n    db_import_ids: set[int] = {int(p) for p in cargo_id_to_chai_id}\n\n    # calculate deletions\n    deletions: set[int] = db_import_ids - transformer_import_ids\n    if deletions:\n        logger.warn(\n            f\"There are {len(deletions)} crates in the db but not in the registry\"\n        )\n\n    return deletions\n\n\ndef main(config: Config, db: CratesDB):\n    logger = Logger(\"crates_main\")\n    logger.log(\"Starting crates_main\")\n\n    # fetch the files from cargo\n    if config.exec_config.fetch:\n        fetcher: TarballFetcher = TarballFetcher(\n            \"crates\",\n            str(config.pm_config.source),\n            config.exec_config.no_cache,\n            config.exec_config.test,\n        )\n        files = fetcher.fetch()\n        logger.log(f\"Fetched {len(files)} files\")\n\n    # write the files to disk\n    if not config.exec_config.fetch and not config.exec_config.no_cache:\n        fetcher.write(files)\n        logger.log(\"Wrote files to disk\")\n\n    # transform the files into a list of crates\n    transformer = CratesTransformer(config)\n    transformer.parse()\n    logger.log(f\"Parsed {len(transformer.crates)} crates\")\n\n    # identify crates we need to delete from CHAI because they are no longer on cargo\n    deletions = identify_deletions(transformer, db)\n    logger.log(f\"Identified {len(deletions)} crates to delete\")\n    if deletions:\n        db.delete_packages_by_import_id(deletions)\n        logger.log(f\"Deleted {len(deletions)} crates\")\n\n    # to build the cache, we need the graph object from the db and the URLs\n    db.set_current_graph()\n    crates_urls: set[str] = set()\n    for crate in transformer.crates.values():\n        crates_urls.add(crate.homepage)\n        crates_urls.add(crate.repository)\n        crates_urls.add(crate.documentation)\n    db.set_current_urls(crates_urls)\n\n    cache = Cache(\n        db.graph.package_map,\n        db.urls.url_map,\n        db.urls.package_urls,\n        db.graph.dependencies,\n    )\n    logger.log(\"Built cache\")\n\n    # now, we can do the diff\n    new_packages: list[Package] = []\n    updated_packages: list[dict] = []\n    new_urls: dict[URLKey, URL] = {}\n    new_package_urls: list[PackageURL] = []\n    updated_package_urls: list[dict] = []\n    new_deps: list[LegacyDependency] = []\n    removed_deps: list[LegacyDependency] = []\n\n    diff = Diff(config, cache)\n    for pkg in transformer.crates.values():\n        pkg_id, pkg_obj, update_payload = diff.diff_pkg(pkg)\n        if pkg_obj:\n            new_packages.append(pkg_obj)\n        if update_payload:\n            updated_packages.append(update_payload)\n\n        # URLs\n        resolved_urls = diff.diff_url(pkg, new_urls)\n\n        # package URLs\n        new_links, updated_links = diff.diff_pkg_url(pkg_id, resolved_urls)\n        if new_links:\n            new_package_urls.extend(new_links)\n        if updated_links:\n            updated_package_urls.extend(updated_links)\n\n        # finally, dependencies\n        new_dependencies, removed_dependencies = diff.diff_deps(pkg)\n        if new_dependencies:\n            new_deps.extend(new_dependencies)\n        if removed_dependencies:\n            removed_deps.extend(removed_dependencies)\n\n    logger.log(f\"Diffed {len(transformer.crates)} crates!\")\n\n    # make new_urls a list of new URLs\n    final_new_urls = list(new_urls.values())\n\n    db.ingest(\n        new_packages,\n        final_new_urls,\n        new_package_urls,\n        new_deps,\n        removed_deps,\n        updated_packages,\n        updated_package_urls,\n    )\n\n    logger.log(\"✅ Done\")\n\n\nif __name__ == \"__main__\":\n    config = Config(PackageManager.CRATES)\n    db = CratesDB(config)\n    main(config, db)\n"
  },
  {
    "path": "package_managers/crates/structs.py",
    "content": "from dataclasses import dataclass, field\nfrom datetime import datetime\nfrom enum import IntEnum\nfrom typing import TypedDict\nfrom uuid import UUID\n\n\nclass DependencyType(IntEnum):\n    \"\"\"\n    The kind of dependency from the crates.io database\n\n    - NORMAL: normal dependency (default)\n    - BUILD: build dependency (used for build scripts)\n    - DEV: dev dependency (used for testing or benchmarking)\n\n    Resources:\n    - https://doc.rust-lang.org/cargo/reference/specifying-dependencies.html\n    \"\"\"\n\n    NORMAL = 0\n    BUILD = 1  # used for build scripts\n    DEV = 2  # used for testing or benchmarking\n    OPTIONAL = 3\n\n    def __str__(self):\n        return self.name.lower()\n\n\n@dataclass\nclass CrateDependency:\n    crate_id: int\n    dependency_id: int\n    dependency_type: DependencyType  # kind\n    semver_range: str  # req\n\n\n@dataclass\nclass CrateUser:\n    # from users.csv or teams.csv\n    id: int\n    name: str | None = None\n    github_username: str | None = None\n\n\n@dataclass\nclass CrateLatestVersion:\n    # latest version ID is from default_versions.csv\n    # data is from versions.csv\n    id: int\n    checksum: str\n    downloads: int\n    license: str\n    num: str\n    published_at: datetime\n    published_by: CrateUser | None = None\n    # dependencies.csv\n    dependencies: list[CrateDependency] = field(default_factory=list)\n\n\n@dataclass\nclass Crate:\n    # from crates.csv\n    id: int\n    name: str\n    readme: str\n    homepage: str\n    repository: str\n    documentation: str\n    source: str | None = None\n    # from versions.csv\n    latest_version: CrateLatestVersion | None = None\n\n\nclass CanonUpdatePayload(TypedDict):\n    \"\"\"Type-safe structure for canon update operations.\"\"\"\n\n    id: UUID\n    name: str\n    updated_at: datetime\n\n\nclass CanonPackageUpdatePayload(TypedDict):\n    \"\"\"Type-safe structure for canon package update operations.\"\"\"\n\n    id: UUID\n    canon_id: UUID\n    updated_at: datetime\n"
  },
  {
    "path": "package_managers/crates/transformer.py",
    "content": "import csv\nfrom collections.abc import Generator\n\nfrom core.config import Config\nfrom core.transformer import Transformer\nfrom core.utils import is_github_url\nfrom package_managers.crates.structs import (\n    Crate,\n    CrateDependency,\n    CrateLatestVersion,\n    CrateUser,\n    DependencyType,\n)\n\n\nclass CratesTransformer(Transformer):\n    def __init__(self, config: Config):\n        super().__init__(\"crates\")\n        self.config = config\n        self.crates: dict[int, Crate] = {}\n\n        # files we need to parse\n        self.files: dict[str, str] = {\n            \"crates\": \"crates.csv\",\n            \"latest_versions\": \"default_versions.csv\",\n            \"versions\": \"versions.csv\",\n            \"dependencies\": \"dependencies.csv\",\n            \"users\": \"users.csv\",\n            \"teams\": \"teams.csv\",\n        }\n\n    def _open_csv(self, file_name: str) -> Generator[dict[str, str], None, None]:\n        try:\n            file_path = self.finder(self.files[file_name])\n            with open(file_path, newline=\"\", encoding=\"utf-8\") as f:\n                reader = csv.DictReader(f)\n                yield from reader\n        except KeyError as exc:\n            raise KeyError(\n                f\"Missing {file_name} from self.files: {self.files}\"\n            ) from exc\n        except FileNotFoundError as exc:\n            self.logger.error(f\"Missing {file_path} from data directory\")\n            raise FileNotFoundError(f\"Missing {file_path} file\") from exc\n        except Exception as e:\n            self.logger.error(f\"Error reading {file_path}: {e}\")\n            raise e\n\n    def parse(self) -> None:\n        # first go through crates.csv to\n        # here, we can get the import_id, name, homepage, documentation, repository\n        # and also source, from repo if it is like GitHub\n        for row in self._open_csv(\"crates\"):\n            crate_id = int(row[\"id\"])\n            name = row[\"name\"]\n            readme = row[\"readme\"]\n\n            # URLs:\n            homepage = self.canonicalize(row[\"homepage\"])\n            documentation = self.canonicalize(row[\"documentation\"])\n            repository = self.canonicalize(row[\"repository\"])\n\n            source: str | None = None\n            if is_github_url(repository):\n                source = repository\n\n            crate = Crate(\n                crate_id, name, readme, homepage, repository, documentation, source\n            )\n            self.crates[crate_id] = crate\n\n        self.logger.log(f\"Parsed {len(self.crates)} crates\")\n\n        # populate the map of crate_id to latest_version_id & all latest_version_ids\n        latest_versions: set[int]\n        latest_versions_map: dict[int, int]\n        latest_versions, latest_versions_map = self._load_latest_versions()\n        self.logger.log(f\"Loaded {len(latest_versions)} latest versions\")\n\n        # also build the map of user_id to CrateUser object\n        users: dict[int, CrateUser] = self._load_users()\n        self.logger.log(f\"Loaded {len(users)} users\")\n\n        # now, iterate through the versions.csv, and populate LatestVersion objects,\n        # only if the version_id is in the latest_versions set\n        for row in self._open_csv(\"versions\"):\n            version_id = int(row[\"id\"])\n            crate_id = int(row[\"crate_id\"])\n\n            # ignore if this version is not the latest\n            if version_id not in latest_versions:\n                continue\n\n            checksum = row[\"checksum\"]\n            downloads = int(row[\"downloads\"])\n            license = row[\"license\"]\n            num = row[\"num\"]\n            published_at = row[\"created_at\"]\n\n            # make a CrateUser object from the published_by\n            published_by = row[\"published_by\"]\n            published_by_user: CrateUser | None = (\n                users[int(published_by)] if published_by else None\n            )\n\n            latest_version = CrateLatestVersion(\n                version_id,\n                checksum,\n                downloads,\n                license,\n                num,\n                published_at,\n                published_by_user,\n            )\n\n            # map this LatestVersion to the crate in self.crates\n            self.crates[crate_id].latest_version = latest_version\n\n        self.logger.log(\"Parsed the latest versions for each crate\")\n\n        # finally, parse through the dependencies.csv\n        # again, we only care about the dependencies for the latest version\n        for row in self._open_csv(\"dependencies\"):\n            start_id = int(row[\"version_id\"])\n\n            # ignore if this version is not the latest\n            if start_id not in latest_versions:\n                continue\n\n            # map both ids to crates\n            end_crate_id = int(row[\"crate_id\"])\n            start_crate_id = int(latest_versions_map[start_id])\n\n            # guard\n            if start_crate_id not in self.crates:\n                raise ValueError(f\"Crate {start_crate_id} not found in self.crates\")\n\n            kind = int(row[\"kind\"])\n\n            # guard\n            if kind not in [0, 1, 2]:\n                raise ValueError(f\"Unknown dependency kind: {kind}\")\n\n            dependency_type = DependencyType(kind)\n            semver = row[\"req\"]\n\n            dependency = CrateDependency(\n                start_crate_id, end_crate_id, dependency_type, semver\n            )\n\n            # add this dependency to the crate\n            self.crates[start_crate_id].latest_version.dependencies.append(dependency)\n\n        self.logger.log(\"Parsed the dependencies for each crate\")\n\n    def _load_latest_versions(self) -> tuple[set[int], dict[int, int]]:\n        latest_versions: set[int] = set()\n        latest_versions_map: dict[int, int] = {}\n        for row in self._open_csv(\"latest_versions\"):\n            crate_id = int(row[\"crate_id\"])\n            version_id = int(row[\"version_id\"])\n            latest_versions.add(version_id)\n            latest_versions_map[version_id] = crate_id\n\n        return latest_versions, latest_versions_map\n\n    def _load_users(self) -> dict[int, CrateUser]:\n        users: dict[int, CrateUser] = {}\n        for row in self._open_csv(\"users\"):\n            user_id = int(row[\"id\"])\n            name = row[\"name\"]\n            github_username = row[\"gh_login\"]\n            user = CrateUser(user_id, name, github_username)\n            users[user_id] = user\n\n        return users\n"
  },
  {
    "path": "package_managers/debian/Dockerfile",
    "content": "FROM ghcr.io/astral-sh/uv:python3.11-bookworm-slim\n\n# Copy everything\nCOPY . .\n\n# Install core requirements using uv\nWORKDIR /core\nRUN uv pip install --system -r requirements.txt\n\nWORKDIR /\n# Run the main application\nCMD [\"python\", \"/package_managers/debian/main.py\"]\n"
  },
  {
    "path": "package_managers/debian/README.md",
    "content": "# Debian\n\n## Data Structure\n\n- Source represents the original upstream as Debian receives\n- Package is a binary that users can install\n- Sources can specify multiple binaries\n- All packages need not specify a source (transitory or virtual packages)\n\n## Scripts\n\n- `investigate_sources.py` can be run on the downloaded data dump from Debian, and\n  prints information about the data integrity\n\n## Approach\n\nThere is a many to one mapping between Packages and Sources. During the load step, we\npopulate the map between Packages and Sources (as in @investigate_sources), because\ninformation about a Debian package can be fetched from both data sources. While the\nparser currently captures all the information for each Package and Source (keep as-is),\nwe only end up loading the following information for a package from each source:\n\nSource:\n\n- Vcs-Browser => URL, PackageURL\n- Vcs-Git => URL, PackageURL\n- Build-Depends => LegacyDependency\n- Maintainer => User, UserPackage\n- Uploaders => User, UserPackage\n\nPackage:\n\n- Depends => LegacyDependency\n- Pre-Depends => LegacyDependency\n- Description => Package\n- Homepage => URL, PackageURL\n"
  },
  {
    "path": "package_managers/debian/db.py",
    "content": "#!/usr/bin/env pkgx uv run\n\nfrom core.config import Config\nfrom core.db import DB, CurrentURLs\nfrom core.structs import CurrentGraph, DiffResult\n\n\nclass DebianDB(DB):\n    def __init__(self, logger_name: str, config: Config):\n        super().__init__(logger_name)\n        self.config = config\n\n    def set_current_graph(self) -> None:\n        \"\"\"Get the debian packages and dependencies\"\"\"\n        self.graph: CurrentGraph = self.current_graph(self.config.pm_config.pm_id)\n\n    def set_current_urls(self, urls: set[str]) -> None:\n        \"\"\"Getting all the URLs and Package URLs from the database\"\"\"\n        self.urls: CurrentURLs = self.current_urls(urls)\n\n    def ingest_wrapper(self, diff_result: DiffResult) -> None:\n        \"\"\"Wrapper for the main ingest function to handle DiffResult\"\"\"\n        final_new_urls = list(diff_result.new_urls.values())\n        self.ingest(\n            diff_result.new_packages,\n            final_new_urls,\n            diff_result.new_package_urls,\n            diff_result.new_deps,\n            diff_result.removed_deps,\n            diff_result.updated_packages,\n            diff_result.updated_package_urls,\n        )\n"
  },
  {
    "path": "package_managers/debian/debian_sources.py",
    "content": "from core.logger import Logger\nfrom package_managers.debian.parser import DebianParser\nfrom package_managers.debian.structs import DebianData\n\n\ndef build_package_to_source_mapping(\n    sources_file_path: str, logger: Logger\n) -> dict[str, DebianData]:\n    \"\"\"\n    Build a mapping from binary package names to their source information.\n\n    Args:\n        sources_file_path: Path to the sources file\n        test: Whether to limit parsing for testing\n\n    Returns:\n        Dictionary mapping binary package names to source DebianData objects\n    \"\"\"\n    # Parse sources file\n    with open(sources_file_path) as f:\n        sources_content = f.read()\n    sources_parser = DebianParser(sources_content)\n\n    # Build mapping: binary_package_name -> source_debian_data\n    package_to_source: dict[str, DebianData] = {}\n\n    for source_data in sources_parser.parse():\n        # Each source may produce multiple binary packages\n        if source_data.binary:\n            # Source has explicit binary list\n            for binary_name in source_data.binary:\n                binary_name = binary_name.strip()\n                if binary_name:\n                    package_to_source[binary_name] = source_data\n        else:\n            # No explicit binary list, assume source name == binary name\n            if source_data.package:\n                package_to_source[source_data.package] = source_data\n\n    logger.log(\n        f\"Built mapping for {len(package_to_source)} binary packages from sources\"\n    )\n    return package_to_source\n\n\ndef enrich_package_with_source(\n    package_data: DebianData, source_mapping: dict[str, DebianData], logger: Logger\n) -> DebianData:\n    \"\"\"\n    Enrich a package with its corresponding source information.\n\n    Args:\n        package_data: The package data from packages file\n        source_mapping: Mapping from package names to source data\n\n    Returns:\n        Enriched DebianData with both package and source information\n    \"\"\"\n    # Start with the package data\n    enriched = package_data\n\n    # Determine source name\n    binary_name = package_data.package\n\n    # Look up source information\n    if binary_name in source_mapping:\n        source_data = source_mapping[binary_name]\n\n        # Enrich package with source information\n        # Only add source fields that aren't already populated\n        if not enriched.vcs_browser and source_data.vcs_browser:\n            enriched.vcs_browser = source_data.vcs_browser\n        if not enriched.vcs_git and source_data.vcs_git:\n            enriched.vcs_git = source_data.vcs_git\n        if not enriched.directory and source_data.directory:\n            enriched.directory = source_data.directory\n        if not enriched.build_depends and source_data.build_depends:\n            enriched.build_depends = source_data.build_depends\n        if not enriched.homepage and source_data.homepage:\n            enriched.homepage = source_data.homepage\n\n    else:\n        # Log warning for missing source\n        source_name = package_data.source or package_data.package\n        logger.warn(\n            f\"Binary '{binary_name}' of source '{source_name}' was not found in sources file\"\n        )\n\n    return enriched\n"
  },
  {
    "path": "package_managers/debian/diff.py",
    "content": "#!/usr/bin/env pkgx uv run\n\nfrom datetime import datetime\nfrom uuid import UUID, uuid4\n\nfrom core.config import Config\nfrom core.logger import Logger\nfrom core.models import URL, LegacyDependency, Package, PackageURL\nfrom core.structs import Cache, URLKey\nfrom core.utils import is_github_url\nfrom package_managers.debian.db import DebianDB\nfrom package_managers.debian.parser import DebianData\nfrom package_managers.debian.structs import Depends\n\n\nclass DebianDiff:\n    def __init__(self, config: Config, caches: Cache, db: DebianDB, logger: Logger):\n        self.config = config\n        self.now = datetime.now()\n        self.caches = caches\n        self.db = db\n        self.logger = logger\n\n    def diff_pkg(\n        self, import_id: str, debian_data: DebianData\n    ) -> tuple[UUID, Package | None, dict | None]:\n        \"\"\"\n        Checks if the given package is in the package_cache.\n\n        Returns:\n          - pkg_id: the id of the package\n          - package: If new, returns a new package object. If existing, returns None\n          - changes: a dictionary of changes (description updates)\n        \"\"\"\n        self.logger.debug(f\"Diffing package: {import_id}\")\n\n        if import_id not in self.caches.package_map:\n            # new package\n            name = import_id.split(\"/\")[1]\n            p = Package(\n                id=uuid4(),\n                derived_id=import_id,\n                name=name,\n                package_manager_id=self.config.pm_config.pm_id,\n                import_id=import_id,\n                readme=debian_data.description,\n                created_at=self.now,\n                updated_at=self.now,\n            )\n            pkg_id: UUID = p.id\n            return pkg_id, p, {}\n        else:\n            # the package exists, check if description has changed\n            existing_pkg = self.caches.package_map[import_id]\n            pkg_id = existing_pkg.id\n\n            # Check if description (readme) has changed\n            if existing_pkg.readme != debian_data.description:\n                update_payload = {\n                    \"id\": pkg_id,\n                    \"readme\": debian_data.description,\n                    \"updated_at\": self.now,\n                }\n                return pkg_id, None, update_payload\n            else:\n                return pkg_id, None, None\n\n    def diff_url(\n        self, import_id: str, debian_data: DebianData, new_urls: dict[URLKey, URL]\n    ) -> dict[UUID, UUID]:\n        \"\"\"Given a package's URLs, returns the resolved URL for this specific package\"\"\"\n        resolved_urls: dict[UUID, UUID] = {}\n\n        # Generate the URLs for this package\n        urls = self._generate_chai_urls(debian_data)\n\n        # Process each URL\n        for url_key in urls:\n            # guard: _generate_chai_urls could be None for a url type\n            if url_key is None:\n                continue\n\n            resolved_url_id: UUID\n\n            if url_key in new_urls:\n                resolved_url_id = new_urls[url_key].id\n            elif url_key in self.caches.url_map:\n                resolved_url_id = self.caches.url_map[url_key].id\n            else:\n                self.logger.debug(\n                    f\"URL {url_key.url} as {url_key.url_type_id} is entirely new\"\n                )\n                new_url = URL(\n                    id=uuid4(),\n                    url=url_key.url,\n                    url_type_id=url_key.url_type_id,\n                    created_at=self.now,\n                    updated_at=self.now,\n                )\n                resolved_url_id = new_url.id\n                new_urls[url_key] = new_url\n\n            resolved_urls[url_key.url_type_id] = resolved_url_id\n\n        return resolved_urls\n\n    def diff_pkg_url(\n        self, pkg_id: UUID, resolved_urls: dict[UUID, UUID]\n    ) -> tuple[list[PackageURL], list[dict]]:\n        \"\"\"Takes in a package_id and resolved URLs from diff_url, and generates\n        new PackageURL objects as well as a list of changes to existing ones\"\"\"\n\n        new_links: list[PackageURL] = []\n        updates: list[dict] = []\n\n        # what are the existing links?\n        existing: set[UUID] = {\n            pu.url_id for pu in self.caches.package_urls.get(pkg_id, set())\n        }\n\n        # for each URL type/URL for this package:\n        for _url_type, url_id in resolved_urls.items():\n            if url_id not in existing:\n                # new link!\n                new_links.append(\n                    PackageURL(\n                        id=uuid4(),\n                        package_id=pkg_id,\n                        url_id=url_id,\n                        created_at=self.now,\n                        updated_at=self.now,\n                    )\n                )\n            else:\n                # existing link - update timestamp\n                existing_pu = next(\n                    pu for pu in self.caches.package_urls[pkg_id] if pu.url_id == url_id\n                )\n                existing_pu.updated_at = self.now\n                updates.append({\"id\": existing_pu.id, \"updated_at\": self.now})\n\n        return new_links, updates\n\n    def diff_deps(\n        self, import_id: str, debian_data: DebianData\n    ) -> tuple[list[LegacyDependency], list[LegacyDependency]]:\n        \"\"\"\n        Takes in a debian package and figures out what dependencies have changed.\n\n        The process is:\n           1. Build a view of what the package's dependencies are according to\n              the parsed debian data, using priority-based deduplication\n           2. Get this package's ID from CHAI\n           3. Get this package's existing dependencies from CHAI\n           4. Compare the two sets, and identify new and removed dependencies\n\n        Note: The database has a unique constraint on (package_id, dependency_id),\n        so if a package depends on the same dependency with multiple types (e.g.,\n        both runtime and build), we choose the highest priority type:\n        Runtime > Build > Test\n\n        Returns:\n          - new_deps: a list of new dependencies\n          - removed_deps: a list of removed dependencies\n        \"\"\"\n        # First, collect all dependencies and deduplicate by dependency name\n        # choosing the highest priority dependency type for each unique dependency\n        dependency_map: dict[str, UUID] = {}\n\n        # Priority order: Runtime > Build > Test\n        priority_order = {\n            self.config.dependency_types.runtime: 1,\n            self.config.dependency_types.build: 2,\n            self.config.dependency_types.test: 3,\n        }\n\n        def process_deps(dependencies: list[Depends], dep_type: UUID) -> None:\n            \"\"\"Helper to process dependencies of a given type with priority\"\"\"\n            for dep in dependencies:\n                dep_name = f\"debian/{dep.package}\"  # bc the map is by import_id\n\n                # Get the dependency package from cache\n                dependency = self.caches.package_map.get(dep_name)\n\n                # try debian/dependency\n                if not dependency:\n                    self.logger.debug(f\"{dep_name} not loaded, will catch next time\")\n                    continue\n\n                # If this dependency already exists in our map, choose higher priority\n                if dep_name in dependency_map:\n                    existing_priority = priority_order.get(\n                        dependency_map[dep_name], 999\n                    )\n                    new_priority = priority_order.get(dep_type, 999)\n\n                    if new_priority < existing_priority:  # Lower is better!\n                        old_type_id = dependency_map[dep_name]\n                        dependency_map[dep_name] = dep_type\n                        self.logger.debug(\n                            f\"Updated dependency type for {dep_name} from \"\n                            f\"{old_type_id} to {dep_type} (higher priority)\"\n                        )\n                else:\n                    dependency_map[dep_name] = dep_type\n\n        # Process different types of dependencies with priority handling\n        # Debian has: depends (runtime), build_depends (build), recommends, suggests, etc.\n        process_deps(debian_data.depends, self.config.dependency_types.runtime)\n        process_deps(debian_data.build_depends, self.config.dependency_types.build)\n        # Map recommends and suggests to runtime for simplicity\n        process_deps(debian_data.recommends, self.config.dependency_types.runtime)\n        process_deps(debian_data.suggests, self.config.dependency_types.runtime)\n\n        # Now build the actual set of dependencies with resolved types\n        actual: set[tuple[UUID, UUID]] = set()\n        for dep_name, dep_type in dependency_map.items():\n            dependency = self.caches.package_map.get(dep_name)\n            if dependency:  # Double-check it still exists\n                actual.add((dependency.id, dep_type))\n\n        # get the package ID for what we are working with\n        package = self.caches.package_map.get(import_id)\n        if not package:\n            self.logger.debug(f\"New package {import_id}, will grab its deps next time\")\n            return [], []\n\n        pkg_id: UUID = package.id\n\n        # what are its existing dependencies?\n        # specifically, existing dependencies IN THE SAME STRUCTURE as `actual`,\n        # so we can do an easy comparison\n        existing: set[tuple[UUID, UUID]] = {\n            (dep.dependency_id, dep.dependency_type_id)\n            for dep in self.caches.dependencies.get(pkg_id, set())\n        }\n\n        # we have two sets!\n        # actual minus existing = new_deps\n        # existing minus actual = removed_deps\n        new = actual - existing\n        removed = existing - actual\n\n        new_deps: list[LegacyDependency] = [\n            LegacyDependency(\n                package_id=pkg_id,\n                dependency_id=dep[0],\n                dependency_type_id=dep[1],\n                created_at=self.now,\n                updated_at=self.now,\n            )\n            for dep in new\n        ]\n\n        # get the existing legacy dependency, and add it to removed_deps\n        removed_deps: list[LegacyDependency] = []\n        cache_deps: set[LegacyDependency] = self.caches.dependencies.get(pkg_id, set())\n        for removed_dep_id, removed_dep_type in removed:\n            try:\n                existing_dep = next(\n                    dep\n                    for dep in cache_deps\n                    if dep.dependency_id == removed_dep_id\n                    and dep.dependency_type_id == removed_dep_type\n                )\n                removed_deps.append(existing_dep)\n            except StopIteration as exc:\n                cache_deps_str = \"\\n\".join(\n                    [\n                        f\"{dep.dependency_id} / {dep.dependency_type_id}\"\n                        for dep in cache_deps\n                    ]\n                )\n                raise ValueError(\n                    f\"Removing {removed_dep_id} / {removed_dep_type} for {pkg_id} but not in Cache: \\n{cache_deps_str}\"\n                ) from exc\n\n        return new_deps, removed_deps\n\n    def _generate_chai_urls(self, debian_data: DebianData) -> list[URLKey]:\n        \"\"\"Generate URLs for a debian package\"\"\"\n        urls = []\n\n        # Homepage URL\n        if debian_data.homepage:\n            urls.append(URLKey(debian_data.homepage, self.config.url_types.homepage))\n\n        # Source URL\n        source_url = (\n            debian_data.vcs_git if debian_data.vcs_git else debian_data.vcs_browser\n        )\n        if source_url:\n            urls.append(URLKey(source_url, self.config.url_types.source))\n\n        # Repository URL\n        if is_github_url(source_url):\n            urls.append(URLKey(source_url, self.config.url_types.repository))\n\n        return urls\n"
  },
  {
    "path": "package_managers/debian/main.py",
    "content": "#!/usr/bin/env pkgx uv run\n\nimport os\nimport time\nfrom datetime import datetime\nfrom uuid import UUID\n\nfrom core.config import Config, PackageManager\nfrom core.fetcher import GZipFetcher\nfrom core.logger import Logger\nfrom core.models import URL, LegacyDependency, Package, PackageURL\nfrom core.scheduler import Scheduler\nfrom core.structs import Cache, DiffResult, URLKey\nfrom core.utils import file_exists\nfrom package_managers.debian.db import DebianDB\nfrom package_managers.debian.debian_sources import (\n    build_package_to_source_mapping,\n    enrich_package_with_source,\n)\nfrom package_managers.debian.diff import DebianDiff\nfrom package_managers.debian.parser import DebianData, DebianParser\n\nSCHEDULER_ENABLED = os.getenv(\"ENABLE_SCHEDULER\", \"true\").lower() == \"true\"\n\n\ndef fetch(config: Config, logger: Logger) -> tuple[GZipFetcher, GZipFetcher]:\n    \"\"\"Fetches the Debian packages & sources manifest files\"\"\"\n    package_source = config.pm_config.source[0]\n    sources_source = config.pm_config.source[1]\n    no_cache = config.exec_config.no_cache\n    test = config.exec_config.test\n\n    package_fetcher = GZipFetcher(\n        name=\"debian\",\n        source=package_source,\n        no_cache=no_cache,\n        test=test,\n        file_path=\"\",  # will autosave in data/debian/latest\n        file_name=\"packages\",\n    )\n\n    sources_fetcher = GZipFetcher(\n        name=\"debian\",\n        source=sources_source,\n        no_cache=no_cache,\n        test=test,\n        file_path=\"\",  # will autosave in data/debian/latest\n        file_name=\"sources\",\n    )\n\n    # Fetch\n    should_fetch = config.exec_config.fetch\n    if should_fetch:\n        package_files = package_fetcher.fetch()\n        package_fetcher.write(package_files)\n        logger.log(f\"Fetched {len(package_files)} package files\")\n\n        sources_files = sources_fetcher.fetch()\n        sources_fetcher.write(sources_files)\n        logger.log(f\"Fetched {len(sources_files)} sources files\")\n\n    return package_fetcher, sources_fetcher\n\n\ndef diff(\n    data: list[DebianData], config: Config, cache: Cache, db: DebianDB, logger: Logger\n) -> DiffResult:\n    # Keeps track of all the new packages we're adding\n    seen: dict[str, UUID] = {}\n    seen_new_pkg_urls: set[tuple[UUID, UUID]] = set()\n\n    # Objects that we will return\n    new_packages: list[Package] = []\n    new_urls: dict[URLKey, URL] = {}\n    new_package_urls: list[PackageURL] = []\n    updated_packages: list[dict[str, UUID | str | datetime]] = []\n    updated_package_urls: list[dict[str, UUID | datetime]] = []\n    new_deps: list[LegacyDependency] = []\n    removed_deps: list[LegacyDependency] = []\n\n    # Create diff processor\n    diff = DebianDiff(config, cache, db, logger)\n\n    # Process each enriched package\n    for i, debian_data in enumerate(data):\n        import_id = f\"debian/{debian_data.package}\"\n        if not import_id:\n            logger.warn(f\"Skipping package with empty name at index {i}\")\n            continue\n\n        # Diff the package\n        pkg_id, pkg_obj, update_payload = diff.diff_pkg(import_id, debian_data)\n\n        # Guard: if pkg_obj is not None, that means it's a new package\n        # If it's new, **and** we have seen it before, set the ID to what is seen\n        # So, duplicates absorb all URLs & Dependencies under one umbrella\n        resolved_pkg_id = seen.get(pkg_obj.import_id, pkg_id) if pkg_obj else pkg_id\n\n        if pkg_obj and pkg_obj.import_id not in seen:\n            logger.debug(f\"New package: {pkg_obj.name}\")\n            new_packages.append(pkg_obj)\n            seen[pkg_obj.import_id] = resolved_pkg_id\n        if update_payload:\n            logger.debug(f\"Updated package: {update_payload['id']}\")\n            updated_packages.append(update_payload)\n\n        # Diff URLs (resolved_urls is map of url types to final URL ID)\n        resolved_urls = diff.diff_url(import_id, debian_data, new_urls)\n\n        # Diff package URLs\n        new_links, updated_links = diff.diff_pkg_url(resolved_pkg_id, resolved_urls)\n        if new_links:\n            logger.debug(f\"New package URLs: {len(new_links)}\")\n\n            # guard: only add truly new links\n            for link in new_links:\n                if (link.package_id, link.url_id) not in seen_new_pkg_urls:\n                    new_package_urls.append(link)\n                    seen_new_pkg_urls.add((link.package_id, link.url_id))\n\n        if updated_links:\n            updated_package_urls.extend(updated_links)\n\n        # Diff dependencies\n        new_dependencies, removed_dependencies = diff.diff_deps(import_id, debian_data)\n        if new_dependencies:\n            logger.debug(f\"New dependencies: {len(new_dependencies)}\")\n            new_deps.extend(new_dependencies)\n        if removed_dependencies:\n            logger.debug(f\"Removed dependencies: {len(removed_dependencies)}\")\n            removed_deps.extend(removed_dependencies)\n\n        # In test mode, limit processing to the first 3 packages to reduce runtime and resource usage.\n        if config.exec_config.test and i > 2:\n            break\n\n    return DiffResult(\n        new_packages,\n        new_urls,\n        new_package_urls,\n        updated_packages,\n        updated_package_urls,\n        new_deps,\n        removed_deps,\n    )\n\n\ndef run_pipeline(config: Config, db: DebianDB, logger: Logger):\n    \"\"\"The Debian Indexer\"\"\"\n\n    package_fetcher, sources_fetcher = fetch(config, logger)\n    input_dir = f\"{sources_fetcher.output}/latest\"\n\n    # Build package-to-source mapping first\n    sources_file_path = file_exists(input_dir, \"sources\")\n    source_mapping = build_package_to_source_mapping(sources_file_path, logger)\n\n    # Parse packages file\n    packages_file_path = file_exists(input_dir, \"packages\")\n    with open(packages_file_path) as f:\n        packages_content = f.read()\n    packages_parser = DebianParser(packages_content)\n\n    # Process each package and enrich with source information\n    enriched_packages: list[DebianData] = []\n    for package_data in packages_parser.parse():\n        enriched_package = enrich_package_with_source(\n            package_data, source_mapping, logger\n        )\n        enriched_packages.append(enriched_package)\n    logger.log(f\"Processed {len(enriched_packages)} enriched packages\")\n\n    # Grab all the URLs from enriched packages\n    all_urls: set[str] = set()\n    for package in enriched_packages:\n        all_urls.add(package.homepage)\n        all_urls.add(package.vcs_browser)\n        all_urls.add(package.vcs_git)\n    logger.log(f\"Found {len(all_urls)} URLs to load\")\n\n    # Set up cache\n    db.set_current_graph()\n    db.set_current_urls(all_urls)\n    cache = Cache(\n        db.graph.package_map,\n        db.urls.url_map,\n        db.urls.package_urls,\n        db.graph.dependencies,\n    )\n    logger.log(\"Setup cache\")\n\n    # Perform the diff\n    result = diff(enriched_packages, config, cache, db, logger)\n\n    # Ingest all diffs\n    db.ingest_wrapper(result)\n\n    if config.exec_config.no_cache:\n        package_fetcher.cleanup()\n        sources_fetcher.cleanup()\n\n\ndef main(config: Config, db: DebianDB, logger: Logger):\n    logger.log(\"Initializing Debian package manager\")\n    logger.debug(f\"Config: {config}\")\n\n    if SCHEDULER_ENABLED:\n        logger.log(\"Scheduler enabled. Starting schedule.\")\n        scheduler = Scheduler(\"debian_scheduler\")\n        scheduler.start(run_pipeline, config, db, logger)\n\n        # run immediately as well when scheduling\n        scheduler.run_now(run_pipeline, config, db, logger)\n\n        # keep the main thread alive for scheduler\n        try:\n            while True:\n                time.sleep(3600)\n        except KeyboardInterrupt:\n            scheduler.stop()\n            logger.log(\"Scheduler stopped.\")\n    else:\n        logger.log(\"Scheduler disabled. Running pipeline once.\")\n        run_pipeline(config, db, logger)\n        logger.log(\"Pipeline finished.\")\n\n\nif __name__ == \"__main__\":\n    config = Config(PackageManager.DEBIAN)\n    db = DebianDB(\"debian_db\", config)\n    logger = Logger(\"debian\")\n    main(config, db, logger)\n"
  },
  {
    "path": "package_managers/debian/parser.py",
    "content": "import re\nfrom collections.abc import Iterator\n\nfrom permalint import normalize_url\n\nfrom package_managers.debian.structs import DebianData, Depends, Maintainer\n\n\n# NOTE: The DebianParser is the one which normalizes all the URLs!\nclass DebianParser:\n    def __init__(self, content: str):\n        # content is the Packages or Sources file\n        self.content = content\n\n    def parse(self) -> Iterator[DebianData]:\n        \"\"\"Yield packages and sources from the Packages and Sources files.\"\"\"\n        paragraphs = self.content.split(\"\\n\\n\")\n\n        # iterate over the lines\n        for paragraph in paragraphs:\n            # if the paragraph is empty, then move on\n            if not paragraph.strip():\n                continue\n\n            # each paragraph represents one object\n            obj = DebianData()\n\n            # State for handling multiline fields\n            current_field = None\n            current_value = \"\"\n\n            # populate the object\n            lines = paragraph.split(\"\\n\")\n            for _i, line in enumerate(lines):\n                # if the line is empty, then move on\n                if not line.strip():\n                    continue\n\n                # if the line starts with a tab or space, then it's a continuation of\n                # the previous field\n                if line[0] == \" \" or line[0] == \"\\t\":\n                    # Append continuation line to current field value\n                    if current_field is not None:\n                        current_value += \" \" + line.strip()\n                    continue\n\n                # Process any accumulated field before starting new one\n                if current_field is not None:\n                    self.mapper(obj, current_field, current_value)\n\n                # Start new field\n                if \":\" not in line:\n                    continue\n\n                key, value = line.split(\":\", 1)\n                current_field = key.strip()\n                current_value = value.strip()\n\n            # Process the final accumulated field\n            if current_field is not None:\n                self.mapper(obj, current_field, current_value)\n\n            if obj.package:\n                yield obj\n            else:\n                raise ValueError(f\"Invalid package: {paragraph}\")\n\n    def handle_line(self, obj: DebianData, line: str) -> None:\n        key, value = line.split(\":\", 1)\n        self.mapper(obj, key, value)\n\n    def mapper(self, obj: DebianData, key: str, value: str) -> None:\n        \"\"\"Map fields from Debian package/source files to DebianData object.\"\"\"\n        match key:\n            case \"Package\":\n                obj.package = value.strip()\n            case \"Source\":\n                obj.source = value.strip()\n            case \"Version\":\n                obj.version = value.strip()\n            case \"Installed-Size\":\n                obj.installed_size = int(value.strip())\n            case \"Architecture\":\n                obj.architecture = value.strip()\n            case \"Description\":\n                obj.description = value.strip()\n            case \"Homepage\":\n                obj.homepage = normalize_url(value.strip())\n            case \"Description-md5\":\n                obj.description_md5 = value.strip()\n            case \"Tag\":\n                obj.tag = value.strip()\n            case \"Section\":\n                obj.section = value.strip()\n            case \"Priority\":\n                obj.priority = value.strip()\n            case \"Filename\":\n                obj.filename = value.strip()\n            case \"Size\":\n                obj.size = int(value.strip())\n            case \"MD5sum\":\n                obj.md5sum = value.strip()\n            case \"SHA256\":\n                obj.sha256 = value.strip()\n            case \"Standards-Version\":\n                obj.standards_version = value.strip()\n            case \"Format\":\n                obj.format = value.strip()\n            case \"Vcs-Browser\":\n                obj.vcs_browser = normalize_url(value.strip())\n            case \"Vcs-Git\":\n                obj.vcs_git = normalize_url(value.strip())\n            case \"Directory\":\n                obj.directory = value.strip()\n            case \"Testsuite\":\n                obj.testsuite = value.strip()\n            case \"Testsuite-Triggers\":\n                obj.testsuite_triggers = value.strip()\n            case \"Binary\":\n                obj.binary = [bin.strip() for bin in value.split(\",\") if bin.strip()]\n            case \"Package-List\":\n                obj.package_list = [pkg.strip() for pkg in value.split(\",\")]\n\n            # Dependency Fields\n            case \"Depends\":\n                dependencies = value.split(\", \")\n                for dependency in dependencies:\n                    obj.depends.append(handle_depends(dependency.strip()))\n            case \"Pre-Depends\":\n                dependencies = value.split(\", \")\n                for dependency in dependencies:\n                    obj.pre_depends.append(handle_depends(dependency.strip()))\n            case \"Replaces\":\n                dependencies = value.split(\", \")\n                for dependency in dependencies:\n                    obj.replaces.append(handle_depends(dependency.strip()))\n            case \"Provides\":\n                dependencies = value.split(\", \")\n                for dependency in dependencies:\n                    obj.provides.append(handle_depends(dependency.strip()))\n            case \"Recommends\":\n                dependencies = value.split(\", \")\n                for dependency in dependencies:\n                    obj.recommends.append(handle_depends(dependency.strip()))\n            case \"Suggests\":\n                dependencies = value.split(\", \")\n                for dependency in dependencies:\n                    obj.suggests.append(handle_depends(dependency.strip()))\n            case \"Breaks\":\n                dependencies = value.split(\", \")\n                for dependency in dependencies:\n                    obj.breaks.append(handle_depends(dependency.strip()))\n            case \"Conflicts\":\n                dependencies = value.split(\", \")\n                for dependency in dependencies:\n                    obj.conflicts.append(handle_depends(dependency.strip()))\n            case \"Build-Depends\":\n                for build_depends in value.split(\", \"):\n                    obj.build_depends.append(handle_depends(build_depends.strip()))\n\n            # Maintainer fields\n            case \"Uploaders\":\n                # Split by comma but respect quoted sections\n                uploaders = []\n                in_quotes = False\n                current = \"\"\n\n                for char in value:\n                    if char == '\"':\n                        in_quotes = not in_quotes\n                        current += char\n                    elif char == \",\" and not in_quotes:\n                        if current.strip():\n                            uploaders.append(current.strip())\n                        current = \"\"\n                    else:\n                        current += char\n\n                if current.strip():\n                    uploaders.append(current.strip())\n\n                for uploader in uploaders:\n                    obj.uploaders.append(handle_maintainer(uploader.strip()))\n            case \"Maintainer\":\n                obj.maintainer = handle_maintainer(value.strip())\n\n            # TODO: File Fields\n            case _:\n                pass\n\n\n# Helpers for handling specific fields in the mapper\ndef handle_depends(dependency: str) -> Depends:\n    # Handle various dependency formats:\n    # 0ad-data (>= 0.0.26)\n    # lib32gcc1-amd64-cross [amd64 arm64 i386 ppc64el x32]\n    # gm2-11 [!powerpc !ppc64 !x32]\n    # debhelper-compat (= 13)\n    # gcc-11-source (>= 11.3.0-11~)\n\n    # First, strip platform specifications in square brackets\n    # Remove platform specs like [amd64 arm64 i386 ppc64el x32] or [!powerpc !ppc64 !x32]\n    platform_match = re.search(r\"\\s*\\[[^\\]]+\\]\", dependency)\n    if platform_match:\n        dependency = dependency.replace(platform_match.group(0), \"\").strip()\n\n    # Now handle version constraints in parentheses\n    match = re.match(r\"^(.*?)(\\s*\\((.*)\\))?$\", dependency)\n    if match:\n        dep = match.group(1).strip()\n        if match.group(2):\n            semver = match.group(3)\n            return Depends(package=dep, semver=semver)\n        else:\n            return Depends(package=dep, semver=\"*\")\n    raise ValueError(f\"Invalid dependency: {dependency}\")\n\n\ndef handle_maintainer(value: str) -> Maintainer:\n    # Remove trailing comma if present\n    value = value.rstrip(\",\")\n\n    # For names with quotes like \"Adam C. Powell, IV\" <hazelsct@debian.org>\n    if '\"' in value:\n        match = re.match(r'^\"([^\"]*)\" <([^>]*)>$', value)\n        if match:\n            return Maintainer(name=match.group(1), email=match.group(2))\n\n    # Standard format: Name <email@example.com>\n    match = re.match(r\"^(.*) <([^>]*)>$\", value)\n    if match:\n        return Maintainer(name=match.group(1), email=match.group(2))\n\n    raise ValueError(f\"Invalid maintainer: {value}\")\n"
  },
  {
    "path": "package_managers/debian/scripts/investigate_sources.py",
    "content": "#!/usr/bin/env pkgx uv run\n\n\"\"\"\nScript to investigate the relationship between Debian sources and packages files.\nThis helps understand the data structure before modifying the parser.\n\"\"\"\n\nimport os\nimport sys\n\nfrom core.logger import Logger\n\nlogger = Logger(\"debian_investigator\")\n\n\ndef parse_sources_file(file_path: str) -> dict[str, set[str]]:\n    \"\"\"\n    Parse the sources file and return a mapping of source_name -> set of binary packages.\n\n    Args:\n        file_path: Path to the sources file\n\n    Returns:\n        Dictionary mapping source package names to sets of binary package names they produce\n    \"\"\"\n    source_binary_map = {}\n\n    with open(file_path, encoding=\"utf-8\") as f:\n        current_package = None\n        current_binaries = set()\n        in_binary_field = False\n\n        for line in f:\n            original_line = line\n            line = line.strip()\n\n            if line.startswith(\"Package: \"):\n                # Save previous package if exists\n                if current_package:\n                    if current_package in source_binary_map:\n                        # Merge with existing binaries for this source name\n                        source_binary_map[current_package].update(current_binaries)\n                    else:\n                        source_binary_map[current_package] = current_binaries\n\n                # Start new package\n                current_package = line[9:].strip()\n                current_binaries = set()\n                in_binary_field = False\n\n            elif line.startswith(\"Binary: \"):\n                # Parse binary packages (comma-separated, may continue on next lines)\n                binaries_str = line[8:].strip()\n                binaries = [b.strip() for b in binaries_str.split(\",\") if b.strip()]\n                current_binaries.update(binaries)\n                in_binary_field = True\n\n            elif current_package and original_line.startswith(\" \"):\n                # Continuation line (starts with space)\n                if in_binary_field:\n                    # Continue parsing Binary field\n                    binaries_str = line.strip()\n                    binaries = [b.strip() for b in binaries_str.split(\",\") if b.strip()]\n                    current_binaries.update(binaries)\n                # If not in binary field, it's some other field continuation - ignore\n\n            elif line == \"\" and current_package:\n                # End of current package entry\n                if current_package in source_binary_map:\n                    # Merge with existing binaries for this source name\n                    source_binary_map[current_package].update(current_binaries)\n                else:\n                    source_binary_map[current_package] = current_binaries\n                current_package = None\n                current_binaries = set()\n                in_binary_field = False\n\n            else:\n                # Any other field (not Package, not Binary, not continuation)\n                # This includes new fields that don't start with space\n                in_binary_field = False\n\n        # Handle last package if file doesn't end with blank line\n        if current_package:\n            if current_package in source_binary_map:\n                # Merge with existing binaries for this source name\n                source_binary_map[current_package].update(current_binaries)\n            else:\n                source_binary_map[current_package] = current_binaries\n\n    return source_binary_map\n\n\ndef parse_packages_file(file_path: str) -> dict[str, str | None]:\n    \"\"\"\n    Parse the packages file and return a mapping of package_name -> source_name.\n\n    Args:\n        file_path: Path to the packages file\n\n    Returns:\n        Dictionary mapping package names to their source package names (None if not specified)\n    \"\"\"\n    package_source_map = {}\n\n    with open(file_path, encoding=\"utf-8\") as f:\n        current_package = None\n        current_source = None\n\n        for line in f:\n            line = line.strip()\n\n            if line.startswith(\"Package: \"):\n                # Save previous package if exists\n                if current_package:\n                    package_source_map[current_package] = current_source\n\n                # Start new package\n                current_package = line[9:].strip()\n                current_source = None\n\n            elif line.startswith(\"Source: \"):\n                # Extract source name (may include version info in parentheses)\n                source_str = line[8:].strip()\n                # Remove version info if present: \"source (version)\" -> \"source\"\n                if \"(\" in source_str:\n                    current_source = source_str.split(\"(\")[0].strip()\n                else:\n                    current_source = source_str\n\n            elif line == \"\" and current_package:\n                # End of current package entry\n                package_source_map[current_package] = current_source\n                current_package = None\n                current_source = None\n\n        # Handle last package if file doesn't end with blank line\n        if current_package:\n            package_source_map[current_package] = current_source\n\n    return package_source_map\n\n\ndef investigate_mapping(sources_file: str, packages_file: str) -> None:\n    \"\"\"\n    Investigate the mapping between sources and packages files.\n\n    Args:\n        sources_file: Path to the sources file\n        packages_file: Path to the packages file\n    \"\"\"\n    logger.log(\"Parsing sources file...\")\n    source_binary_map = parse_sources_file(sources_file)\n    logger.log(f\"Found {len(source_binary_map)} source packages\")\n\n    logger.log(\"Parsing packages file...\")\n    package_source_map = parse_packages_file(packages_file)\n    logger.log(f\"Found {len(package_source_map)} binary packages\")\n\n    # Validate mappings\n    orphaned_packages = []\n\n    logger.log(\"\\nValidating package -> source mappings...\")\n\n    for package_name, source_name in package_source_map.items():\n        if source_name:\n            # Package has explicit source reference\n            if source_name not in source_binary_map:\n                logger.log(\n                    f\"WARNING: Package '{package_name}' references unknown source '{source_name}'\"\n                )\n                orphaned_packages.append((package_name, source_name, \"unknown_source\"))\n            elif package_name not in source_binary_map[source_name]:\n                logger.log(\n                    f\"WARNING: Package '{package_name}' not listed in source '{source_name}' binaries\"\n                )\n                orphaned_packages.append((package_name, source_name, \"not_in_binaries\"))\n        else:\n            # Package has no explicit source, assume source name == package name\n            if package_name not in source_binary_map:\n                logger.log(\n                    f\"WARNING: Package '{package_name}' has no source reference and no matching source package\"\n                )\n                orphaned_packages.append(\n                    (package_name, package_name, \"no_matching_source\")\n                )\n            elif package_name not in source_binary_map[package_name]:\n                logger.log(\n                    f\"WARNING: Package '{package_name}' not listed in its own source binaries\"\n                )\n                orphaned_packages.append(\n                    (package_name, package_name, \"not_self_listed\")\n                )\n\n    # Summary\n    logger.log(\"\\n=== SUMMARY ===\")\n    logger.log(f\"Total sources: {len(source_binary_map)}\")\n    logger.log(f\"Total packages: {len(package_source_map)}\")\n    logger.log(f\"Orphaned packages: {len(orphaned_packages)}\")\n\n    if orphaned_packages:\n        logger.log(\"\\nOrphaned packages by category:\")\n        categories = {}\n        for pkg, src, reason in orphaned_packages:\n            if reason not in categories:\n                categories[reason] = []\n            categories[reason].append((pkg, src))\n\n        for reason, items in categories.items():\n            logger.log(f\"  {reason}: {len(items)} packages\")\n            for pkg, src in items[:5]:  # Show first 5 examples\n                logger.log(f\"    {pkg} -> {src}\")\n            if len(items) > 5:\n                logger.log(f\"    ... and {len(items) - 5} more\")\n\n\ndef main():\n    data_dir = \"data/debian/latest\"\n\n    # Check if data files exist\n    sources_file = os.path.join(data_dir, \"sources\")\n    packages_file = os.path.join(data_dir, \"packages\")\n\n    if not os.path.exists(sources_file):\n        logger.log(f\"ERROR: Sources file not found at {sources_file}\")\n        logger.log(\"Use --fetch to download the latest data\")\n        return 1\n\n    if not os.path.exists(packages_file):\n        logger.log(f\"ERROR: Packages file not found at {packages_file}\")\n        logger.log(\"Use --fetch to download the latest data\")\n        return 1\n\n    logger.log(f\"Using sources file: {sources_file}\")\n    logger.log(f\"Using packages file: {packages_file}\")\n\n    investigate_mapping(sources_file, packages_file)\n\n    return 0\n\n\nif __name__ == \"__main__\":\n    sys.exit(main())\n"
  },
  {
    "path": "package_managers/debian/scripts/test_investigate_sources.py",
    "content": "from unittest.mock import mock_open, patch\n\nimport pytest\n\nfrom package_managers.debian.scripts.investigate_sources import parse_sources_file\n\n\n@pytest.fixture\ndef binutils():\n    return \"\"\"\nPackage: binutils\nBinary: binutils-for-host, binutils-for-build,\n binutils-ia64-linux-gnu-dbg, binutils-m68k-linux-gnu,\n binutils-mips64el-linux-gnuabin32-dbg, binutils-mipsisa64r6-linux-gnuabin32,\n binutils-mipsisa64r6el-linux-gnuabi64-dbg\n\n\"\"\"\n\n\n@pytest.fixture\ndef linux():\n    return \"\"\"\nPackage: linux\nBinary: linux-support-6.1.0-32, linux-doc-6.1, linux-doc, linux-source-6.1, linux-source, linux-headers-6.1.0-32-common, linux-headers-6.1.0-32-common-rt, kernel-image-6.1.0-32-alpha-generic-di, nic-modules-6.1.0-32-alpha-generic-di, nic-wireless-modules-6.1.0-32-alpha-generic-di, nic-shared-modules-6.1.0-32-alpha-generic-di, serial-modules-6.1.0-32-alpha-generic-di, usb-serial-modules-6.1.0-32-alpha-generic-di, ppp-modules-6.1.0-32-alpha-generic-di, pata-modules-6.1.0-32-alpha-generic-di, cdrom-core-modules-6.1.0-32-alpha-generic-di, scsi-core-modules-6.1.0-32-alpha-generic-di, scsi-modules-6.1.0-32-alpha-generic-di, scsi-nic-modules-6.1.0-32-alpha-generic-di, loop-modules-6.1.0-32-alpha-generic-di, btrfs-modules-6.1.0-32-alpha-generic-di, ext4-modules-6.1.0-32-alpha-generic-di, isofs-modules-6.1.0-32-alpha-generic-di, jfs-modules-6.1.0-32-alpha-generic-di, xfs-modules-6.1.0-32-alpha-generic-di, fat-modules-6.1.0-32-alpha-generic-di,\n squashfs-modules-6.1.0-32-alpha-generic-di, fuse-modules-6.1.0-32-alpha-generic-di, f2fs-modules-6.1.0-32-alpha-generic-di, md-modules-6.1.0-32-alpha-generic-di, multipath-modules-6.1.0-32-alpha-generic-di, usb-modules-6.1.0-32-alpha-generic-di, usb-storage-modules-6.1.0-32-alpha-generic-di, fb-modules-6.1.0-32-alpha-generic-di, input-modules-6.1.0-32-alpha-generic-di, event-modules-6.1.0-32-alpha-generic-di, mouse-modules-6.1.0-32-alpha-generic-di, nic-pcmcia-modules-6.1.0-32-alpha-generic-di, pcmcia-modules-6.1.0-32-alpha-generic-di, nic-usb-modules-6.1.0-32-alpha-generic-di, sata-modules-6.1.0-32-alpha-generic-di, i2c-modules-6.1.0-32-alpha-generic-di, crc-modules-6.1.0-32-alpha-generic-di, crypto-modules-6.1.0-32-alpha-generic-di, crypto-dm-modules-6.1.0-32-alpha-generic-di, ata-modules-6.1.0-32-alpha-generic-di, nbd-modules-6.1.0-32-alpha-generic-di, srm-modules-6.1.0-32-alpha-generic-di, linux-libc-dev, linux-config-6.1, bpftool, linux-cpupower, libcpupower1,\n libcpupower-dev, linux-perf, usbip, hyperv-daemons, rtla, linux-kbuild-6.1, linux-bootwrapper-6.1.0-32, linux-headers-6.1.0-32-alpha-generic, linux-image-6.1.0-32-alpha-generic, linux-image-alpha-generic, linux-headers-alpha-generic, linux-image-6.1.0-32-alpha-generic-dbg, linux-image-alpha-generic-dbg, linux-headers-6.1.0-32-alpha-smp, linux-image-6.1.0-32-alpha-smp, linux-image-alpha-smp, linux-headers-alpha-smp, linux-image-6.1.0-32-alpha-smp-dbg, linux-image-alpha-smp-dbg, kernel-image-6.1.0-32-amd64-di, nic-modules-6.1.0-32-amd64-di, nic-wireless-modules-6.1.0-32-amd64-di, nic-shared-modules-6.1.0-32-amd64-di, serial-modules-6.1.0-32-amd64-di, usb-serial-modules-6.1.0-32-amd64-di, ppp-modules-6.1.0-32-amd64-di, pata-modules-6.1.0-32-amd64-di, cdrom-core-modules-6.1.0-32-amd64-di, firewire-core-modules-6.1.0-32-amd64-di, scsi-core-modules-6.1.0-32-amd64-di, scsi-modules-6.1.0-32-amd64-di, scsi-nic-modules-6.1.0-32-amd64-di, loop-modules-6.1.0-32-amd64-di,\n btrfs-modules-6.1.0-32-amd64-di, ext4-modules-6.1.0-32-amd64-di, isofs-modules-6.1.0-32-amd64-di, jfs-modules-6.1.0-32-amd64-di, xfs-modules-6.1.0-32-amd64-di, fat-modules-6.1.0-32-amd64-di, squashfs-modules-6.1.0-32-amd64-di, udf-modules-6.1.0-32-amd64-di, fuse-modules-6.1.0-32-amd64-di, f2fs-modules-6.1.0-32-amd64-di, md-modules-6.1.0-32-amd64-di, multipath-modules-6.1.0-32-amd64-di, usb-modules-6.1.0-32-amd64-di, usb-storage-modules-6.1.0-32-amd64-di, pcmcia-storage-modules-6.1.0-32-amd64-di, fb-modules-6.1.0-32-amd64-di, input-modules-6.1.0-32-amd64-di, event-modules-6.1.0-32-amd64-di, mouse-modules-6.1.0-32-amd64-di, nic-pcmcia-modules-6.1.0-32-amd64-di, pcmcia-modules-6.1.0-32-amd64-di, nic-usb-modules-6.1.0-32-amd64-di, sata-modules-6.1.0-32-amd64-di, acpi-modules-6.1.0-32-amd64-di, i2c-modules-6.1.0-32-amd64-di, crc-modules-6.1.0-32-amd64-di, crypto-modules-6.1.0-32-amd64-di, crypto-dm-modules-6.1.0-32-amd64-di, efi-modules-6.1.0-32-amd64-di,\n ata-modules-6.1.0-32-amd64-di, mmc-core-modules-6.1.0-32-amd64-di, mmc-modules-6.1.0-32-amd64-di, nbd-modules-6.1.0-32-amd64-di, speakup-modules-6.1.0-32-amd64-di, uinput-modules-6.1.0-32-amd64-di, sound-modules-6.1.0-32-amd64-di, mtd-core-modules-6.1.0-32-amd64-di, rfkill-modules-6.1.0-32-amd64-di, linux-image-amd64-signed-template, linux-headers-6.1.0-32-amd64, linux-image-6.1.0-32-amd64-unsigned, linux-image-6.1.0-32-amd64-dbg, linux-image-amd64-dbg, linux-headers-6.1.0-32-cloud-amd64, linux-image-6.1.0-32-cloud-amd64-unsigned, linux-image-6.1.0-32-cloud-amd64-dbg, linux-image-cloud-amd64-dbg, linux-headers-6.1.0-32-rt-amd64, linux-image-6.1.0-32-rt-amd64-unsigned, linux-image-6.1.0-32-rt-amd64-dbg, linux-image-rt-amd64-dbg, kernel-image-6.1.0-32-arm64-di, nic-modules-6.1.0-32-arm64-di, nic-wireless-modules-6.1.0-32-arm64-di, nic-shared-modules-6.1.0-32-arm64-di, usb-serial-modules-6.1.0-32-arm64-di, ppp-modules-6.1.0-32-arm64-di,\n cdrom-core-modules-6.1.0-32-arm64-di, scsi-core-modules-6.1.0-32-arm64-di, scsi-modules-6.1.0-32-arm64-di, scsi-nic-modules-6.1.0-32-arm64-di, loop-modules-6.1.0-32-arm64-di, btrfs-modules-6.1.0-32-arm64-di, ext4-modules-6.1.0-32-arm64-di, isofs-modules-6.1.0-32-arm64-di, jfs-modules-6.1.0-32-arm64-di, xfs-modules-6.1.0-32-arm64-di, fat-modules-6.1.0-32-arm64-di, squashfs-modules-6.1.0-32-arm64-di, udf-modules-6.1.0-32-arm64-di, fuse-modules-6.1.0-32-arm64-di, f2fs-modules-6.1.0-32-arm64-di, md-modules-6.1.0-32-arm64-di, multipath-modules-6.1.0-32-arm64-di, usb-modules-6.1.0-32-arm64-di, usb-storage-modules-6.1.0-32-arm64-di, fb-modules-6.1.0-32-arm64-di, input-modules-6.1.0-32-arm64-di, event-modules-6.1.0-32-arm64-di, nic-usb-modules-6.1.0-32-arm64-di, sata-modules-6.1.0-32-arm64-di, i2c-modules-6.1.0-32-arm64-di, crc-modules-6.1.0-32-arm64-di, crypto-modules-6.1.0-32-arm64-di, crypto-dm-modules-6.1.0-32-arm64-di, efi-modules-6.1.0-32-arm64-di,\n ata-modules-6.1.0-32-arm64-di, mmc-modules-6.1.0-32-arm64-di, nbd-modules-6.1.0-32-arm64-di, speakup-modules-6.1.0-32-arm64-di, uinput-modules-6.1.0-32-arm64-di, sound-modules-6.1.0-32-arm64-di, leds-modules-6.1.0-32-arm64-di, mtd-core-modules-6.1.0-32-arm64-di, linux-image-arm64-signed-template, linux-headers-6.1.0-32-arm64, linux-image-6.1.0-32-arm64-unsigned, linux-image-6.1.0-32-arm64-dbg, linux-image-arm64-dbg, linux-headers-6.1.0-32-cloud-arm64, linux-image-6.1.0-32-cloud-arm64-unsigned, linux-image-6.1.0-32-cloud-arm64-dbg, linux-image-cloud-arm64-dbg, linux-headers-6.1.0-32-rt-arm64, linux-image-6.1.0-32-rt-arm64-unsigned, linux-image-6.1.0-32-rt-arm64-dbg, linux-image-rt-arm64-dbg, kernel-image-6.1.0-32-marvell-di, nic-modules-6.1.0-32-marvell-di, nic-shared-modules-6.1.0-32-marvell-di, usb-serial-modules-6.1.0-32-marvell-di, ppp-modules-6.1.0-32-marvell-di, cdrom-core-modules-6.1.0-32-marvell-di, scsi-core-modules-6.1.0-32-marvell-di,\n loop-modules-6.1.0-32-marvell-di, ipv6-modules-6.1.0-32-marvell-di, btrfs-modules-6.1.0-32-marvell-di, ext4-modules-6.1.0-32-marvell-di, isofs-modules-6.1.0-32-marvell-di, jffs2-modules-6.1.0-32-marvell-di, jfs-modules-6.1.0-32-marvell-di, fat-modules-6.1.0-32-marvell-di, minix-modules-6.1.0-32-marvell-di, squashfs-modules-6.1.0-32-marvell-di, udf-modules-6.1.0-32-marvell-di, fuse-modules-6.1.0-32-marvell-di, f2fs-modules-6.1.0-32-marvell-di, md-modules-6.1.0-32-marvell-di, multipath-modules-6.1.0-32-marvell-di, usb-modules-6.1.0-32-marvell-di, usb-storage-modules-6.1.0-32-marvell-di, fb-modules-6.1.0-32-marvell-di, input-modules-6.1.0-32-marvell-di, event-modules-6.1.0-32-marvell-di, mouse-modules-6.1.0-32-marvell-di, nic-usb-modules-6.1.0-32-marvell-di, sata-modules-6.1.0-32-marvell-di, crc-modules-6.1.0-32-marvell-di, crypto-modules-6.1.0-32-marvell-di, crypto-dm-modules-6.1.0-32-marvell-di, mmc-core-modules-6.1.0-32-marvell-di, mmc-modules-6.1.0-32-marvell-di,\n nbd-modules-6.1.0-32-marvell-di, uinput-modules-6.1.0-32-marvell-di, leds-modules-6.1.0-32-marvell-di, mtd-modules-6.1.0-32-marvell-di, mtd-core-modules-6.1.0-32-marvell-di, linux-headers-6.1.0-32-marvell, linux-image-6.1.0-32-marvell, linux-image-marvell, linux-headers-marvell, linux-image-6.1.0-32-marvell-dbg, linux-image-marvell-dbg, linux-headers-6.1.0-32-rpi, linux-image-6.1.0-32-rpi, linux-image-rpi, linux-headers-rpi, linux-image-6.1.0-32-rpi-dbg, linux-image-rpi-dbg, kernel-image-6.1.0-32-armmp-di, nic-modules-6.1.0-32-armmp-di, nic-wireless-modules-6.1.0-32-armmp-di, nic-shared-modules-6.1.0-32-armmp-di, usb-serial-modules-6.1.0-32-armmp-di, ppp-modules-6.1.0-32-armmp-di, pata-modules-6.1.0-32-armmp-di, cdrom-core-modules-6.1.0-32-armmp-di, scsi-core-modules-6.1.0-32-armmp-di, scsi-modules-6.1.0-32-armmp-di, scsi-nic-modules-6.1.0-32-armmp-di, loop-modules-6.1.0-32-armmp-di, btrfs-modules-6.1.0-32-armmp-di, ext4-modules-6.1.0-32-armmp-di,\n isofs-modules-6.1.0-32-armmp-di, jfs-modules-6.1.0-32-armmp-di, fat-modules-6.1.0-32-armmp-di, squashfs-modules-6.1.0-32-armmp-di, udf-modules-6.1.0-32-armmp-di, fuse-modules-6.1.0-32-armmp-di, f2fs-modules-6.1.0-32-armmp-di, md-modules-6.1.0-32-armmp-di, multipath-modules-6.1.0-32-armmp-di, usb-modules-6.1.0-32-armmp-di, usb-storage-modules-6.1.0-32-armmp-di, fb-modules-6.1.0-32-armmp-di, input-modules-6.1.0-32-armmp-di, event-modules-6.1.0-32-armmp-di, nic-usb-modules-6.1.0-32-armmp-di, sata-modules-6.1.0-32-armmp-di, i2c-modules-6.1.0-32-armmp-di, crc-modules-6.1.0-32-armmp-di, crypto-modules-6.1.0-32-armmp-di, crypto-dm-modules-6.1.0-32-armmp-di, efi-modules-6.1.0-32-armmp-di, ata-modules-6.1.0-32-armmp-di, mmc-modules-6.1.0-32-armmp-di, nbd-modules-6.1.0-32-armmp-di, speakup-modules-6.1.0-32-armmp-di, uinput-modules-6.1.0-32-armmp-di, sound-modules-6.1.0-32-armmp-di, leds-modules-6.1.0-32-armmp-di, mtd-modules-6.1.0-32-armmp-di, linux-headers-6.1.0-32-armmp,\n linux-image-6.1.0-32-armmp, linux-image-armmp, linux-headers-armmp, linux-image-6.1.0-32-armmp-dbg, linux-image-armmp-dbg, linux-headers-6.1.0-32-armmp-lpae, linux-image-6.1.0-32-armmp-lpae, linux-image-armmp-lpae, linux-headers-armmp-lpae, linux-image-6.1.0-32-armmp-lpae-dbg, linux-image-armmp-lpae-dbg, linux-headers-6.1.0-32-rt-armmp, linux-image-6.1.0-32-rt-armmp, linux-image-rt-armmp, linux-headers-rt-armmp, linux-image-6.1.0-32-rt-armmp-dbg, linux-image-rt-armmp-dbg, kernel-image-6.1.0-32-parisc-di, nic-modules-6.1.0-32-parisc-di, nic-shared-modules-6.1.0-32-parisc-di, serial-modules-6.1.0-32-parisc-di, usb-serial-modules-6.1.0-32-parisc-di, ppp-modules-6.1.0-32-parisc-di, pata-modules-6.1.0-32-parisc-di, cdrom-core-modules-6.1.0-32-parisc-di, scsi-core-modules-6.1.0-32-parisc-di, scsi-modules-6.1.0-32-parisc-di, loop-modules-6.1.0-32-parisc-di, btrfs-modules-6.1.0-32-parisc-di, ext4-modules-6.1.0-32-parisc-di, isofs-modules-6.1.0-32-parisc-di,\n jfs-modules-6.1.0-32-parisc-di, xfs-modules-6.1.0-32-parisc-di, fat-modules-6.1.0-32-parisc-di, squashfs-modules-6.1.0-32-parisc-di, fuse-modules-6.1.0-32-parisc-di, f2fs-modules-6.1.0-32-parisc-di, md-modules-6.1.0-32-parisc-di, multipath-modules-6.1.0-32-parisc-di, usb-modules-6.1.0-32-parisc-di, usb-storage-modules-6.1.0-32-parisc-di, input-modules-6.1.0-32-parisc-di, event-modules-6.1.0-32-parisc-di, mouse-modules-6.1.0-32-parisc-di, nic-usb-modules-6.1.0-32-parisc-di, sata-modules-6.1.0-32-parisc-di, i2c-modules-6.1.0-32-parisc-di, crc-modules-6.1.0-32-parisc-di, crypto-modules-6.1.0-32-parisc-di, crypto-dm-modules-6.1.0-32-parisc-di, ata-modules-6.1.0-32-parisc-di, nbd-modules-6.1.0-32-parisc-di, kernel-image-6.1.0-32-parisc64-di, nic-modules-6.1.0-32-parisc64-di, nic-shared-modules-6.1.0-32-parisc64-di, serial-modules-6.1.0-32-parisc64-di, usb-serial-modules-6.1.0-32-parisc64-di, ppp-modules-6.1.0-32-parisc64-di, pata-modules-6.1.0-32-parisc64-di,\n cdrom-core-modules-6.1.0-32-parisc64-di, scsi-core-modules-6.1.0-32-parisc64-di, scsi-modules-6.1.0-32-parisc64-di, loop-modules-6.1.0-32-parisc64-di, btrfs-modules-6.1.0-32-parisc64-di, ext4-modules-6.1.0-32-parisc64-di, isofs-modules-6.1.0-32-parisc64-di, jfs-modules-6.1.0-32-parisc64-di, xfs-modules-6.1.0-32-parisc64-di, fat-modules-6.1.0-32-parisc64-di, squashfs-modules-6.1.0-32-parisc64-di, fuse-modules-6.1.0-32-parisc64-di, f2fs-modules-6.1.0-32-parisc64-di, md-modules-6.1.0-32-parisc64-di, multipath-modules-6.1.0-32-parisc64-di, usb-modules-6.1.0-32-parisc64-di, usb-storage-modules-6.1.0-32-parisc64-di, fb-modules-6.1.0-32-parisc64-di, input-modules-6.1.0-32-parisc64-di, event-modules-6.1.0-32-parisc64-di, mouse-modules-6.1.0-32-parisc64-di, nic-usb-modules-6.1.0-32-parisc64-di, sata-modules-6.1.0-32-parisc64-di, crc-modules-6.1.0-32-parisc64-di, crypto-modules-6.1.0-32-parisc64-di, crypto-dm-modules-6.1.0-32-parisc64-di, ata-modules-6.1.0-32-parisc64-di,\n nbd-modules-6.1.0-32-parisc64-di, linux-headers-6.1.0-32-parisc, linux-image-6.1.0-32-parisc, linux-image-parisc, linux-headers-parisc, linux-image-6.1.0-32-parisc-dbg, linux-image-parisc-dbg, linux-headers-6.1.0-32-parisc64, linux-image-6.1.0-32-parisc64, linux-image-parisc64, linux-headers-parisc64, linux-image-6.1.0-32-parisc64-dbg, linux-image-parisc64-dbg, kernel-image-6.1.0-32-686-di, nic-modules-6.1.0-32-686-di, nic-wireless-modules-6.1.0-32-686-di, nic-shared-modules-6.1.0-32-686-di, serial-modules-6.1.0-32-686-di, usb-serial-modules-6.1.0-32-686-di, ppp-modules-6.1.0-32-686-di, pata-modules-6.1.0-32-686-di, cdrom-core-modules-6.1.0-32-686-di, firewire-core-modules-6.1.0-32-686-di, scsi-core-modules-6.1.0-32-686-di, scsi-modules-6.1.0-32-686-di, scsi-nic-modules-6.1.0-32-686-di, loop-modules-6.1.0-32-686-di, btrfs-modules-6.1.0-32-686-di, ext4-modules-6.1.0-32-686-di, isofs-modules-6.1.0-32-686-di, jfs-modules-6.1.0-32-686-di, xfs-modules-6.1.0-32-686-di,\n fat-modules-6.1.0-32-686-di, squashfs-modules-6.1.0-32-686-di, udf-modules-6.1.0-32-686-di, fuse-modules-6.1.0-32-686-di, f2fs-modules-6.1.0-32-686-di, md-modules-6.1.0-32-686-di, multipath-modules-6.1.0-32-686-di, usb-modules-6.1.0-32-686-di, usb-storage-modules-6.1.0-32-686-di, pcmcia-storage-modules-6.1.0-32-686-di, fb-modules-6.1.0-32-686-di, input-modules-6.1.0-32-686-di, event-modules-6.1.0-32-686-di, mouse-modules-6.1.0-32-686-di, nic-pcmcia-modules-6.1.0-32-686-di, pcmcia-modules-6.1.0-32-686-di, nic-usb-modules-6.1.0-32-686-di, sata-modules-6.1.0-32-686-di, acpi-modules-6.1.0-32-686-di, i2c-modules-6.1.0-32-686-di, crc-modules-6.1.0-32-686-di, crypto-modules-6.1.0-32-686-di, crypto-dm-modules-6.1.0-32-686-di, efi-modules-6.1.0-32-686-di, ata-modules-6.1.0-32-686-di, mmc-core-modules-6.1.0-32-686-di, mmc-modules-6.1.0-32-686-di, nbd-modules-6.1.0-32-686-di, speakup-modules-6.1.0-32-686-di, uinput-modules-6.1.0-32-686-di, sound-modules-6.1.0-32-686-di,\n mtd-core-modules-6.1.0-32-686-di, rfkill-modules-6.1.0-32-686-di, kernel-image-6.1.0-32-686-pae-di, nic-modules-6.1.0-32-686-pae-di, nic-wireless-modules-6.1.0-32-686-pae-di, nic-shared-modules-6.1.0-32-686-pae-di, serial-modules-6.1.0-32-686-pae-di, usb-serial-modules-6.1.0-32-686-pae-di, ppp-modules-6.1.0-32-686-pae-di, pata-modules-6.1.0-32-686-pae-di, cdrom-core-modules-6.1.0-32-686-pae-di, firewire-core-modules-6.1.0-32-686-pae-di, scsi-core-modules-6.1.0-32-686-pae-di, scsi-modules-6.1.0-32-686-pae-di, scsi-nic-modules-6.1.0-32-686-pae-di, loop-modules-6.1.0-32-686-pae-di, btrfs-modules-6.1.0-32-686-pae-di, ext4-modules-6.1.0-32-686-pae-di, isofs-modules-6.1.0-32-686-pae-di, jfs-modules-6.1.0-32-686-pae-di, xfs-modules-6.1.0-32-686-pae-di, fat-modules-6.1.0-32-686-pae-di, squashfs-modules-6.1.0-32-686-pae-di, udf-modules-6.1.0-32-686-pae-di, fuse-modules-6.1.0-32-686-pae-di, f2fs-modules-6.1.0-32-686-pae-di, md-modules-6.1.0-32-686-pae-di,\n multipath-modules-6.1.0-32-686-pae-di, usb-modules-6.1.0-32-686-pae-di, usb-storage-modules-6.1.0-32-686-pae-di, pcmcia-storage-modules-6.1.0-32-686-pae-di, fb-modules-6.1.0-32-686-pae-di, input-modules-6.1.0-32-686-pae-di, event-modules-6.1.0-32-686-pae-di, mouse-modules-6.1.0-32-686-pae-di, nic-pcmcia-modules-6.1.0-32-686-pae-di, pcmcia-modules-6.1.0-32-686-pae-di, nic-usb-modules-6.1.0-32-686-pae-di, sata-modules-6.1.0-32-686-pae-di, acpi-modules-6.1.0-32-686-pae-di, i2c-modules-6.1.0-32-686-pae-di, crc-modules-6.1.0-32-686-pae-di, crypto-modules-6.1.0-32-686-pae-di, crypto-dm-modules-6.1.0-32-686-pae-di, efi-modules-6.1.0-32-686-pae-di, ata-modules-6.1.0-32-686-pae-di, mmc-core-modules-6.1.0-32-686-pae-di, mmc-modules-6.1.0-32-686-pae-di, nbd-modules-6.1.0-32-686-pae-di, speakup-modules-6.1.0-32-686-pae-di, uinput-modules-6.1.0-32-686-pae-di, sound-modules-6.1.0-32-686-pae-di, mtd-core-modules-6.1.0-32-686-pae-di, rfkill-modules-6.1.0-32-686-pae-di,\n linux-image-i386-signed-template, linux-headers-6.1.0-32-686, linux-image-6.1.0-32-686-unsigned, linux-image-6.1.0-32-686-dbg, linux-image-686-dbg, linux-headers-6.1.0-32-686-pae, linux-image-6.1.0-32-686-pae-unsigned, linux-image-6.1.0-32-686-pae-dbg, linux-image-686-pae-dbg, linux-headers-6.1.0-32-rt-686-pae, linux-image-6.1.0-32-rt-686-pae-unsigned, linux-image-6.1.0-32-rt-686-pae-dbg, linux-image-rt-686-pae-dbg, kernel-image-6.1.0-32-itanium-di, nic-modules-6.1.0-32-itanium-di, nic-shared-modules-6.1.0-32-itanium-di, serial-modules-6.1.0-32-itanium-di, usb-serial-modules-6.1.0-32-itanium-di, ppp-modules-6.1.0-32-itanium-di, pata-modules-6.1.0-32-itanium-di, cdrom-core-modules-6.1.0-32-itanium-di, firewire-core-modules-6.1.0-32-itanium-di, scsi-core-modules-6.1.0-32-itanium-di, scsi-modules-6.1.0-32-itanium-di, scsi-nic-modules-6.1.0-32-itanium-di, loop-modules-6.1.0-32-itanium-di, btrfs-modules-6.1.0-32-itanium-di, ext4-modules-6.1.0-32-itanium-di,\n isofs-modules-6.1.0-32-itanium-di, jfs-modules-6.1.0-32-itanium-di, xfs-modules-6.1.0-32-itanium-di, fat-modules-6.1.0-32-itanium-di, squashfs-modules-6.1.0-32-itanium-di, udf-modules-6.1.0-32-itanium-di, fuse-modules-6.1.0-32-itanium-di, f2fs-modules-6.1.0-32-itanium-di, md-modules-6.1.0-32-itanium-di, multipath-modules-6.1.0-32-itanium-di, usb-modules-6.1.0-32-itanium-di, usb-storage-modules-6.1.0-32-itanium-di, fb-modules-6.1.0-32-itanium-di, input-modules-6.1.0-32-itanium-di, event-modules-6.1.0-32-itanium-di, mouse-modules-6.1.0-32-itanium-di, pcmcia-modules-6.1.0-32-itanium-di, nic-usb-modules-6.1.0-32-itanium-di, sata-modules-6.1.0-32-itanium-di, i2c-modules-6.1.0-32-itanium-di, crc-modules-6.1.0-32-itanium-di, crypto-modules-6.1.0-32-itanium-di, crypto-dm-modules-6.1.0-32-itanium-di, ata-modules-6.1.0-32-itanium-di, nbd-modules-6.1.0-32-itanium-di, uinput-modules-6.1.0-32-itanium-di, mtd-core-modules-6.1.0-32-itanium-di, linux-headers-6.1.0-32-itanium,\n linux-image-6.1.0-32-itanium, linux-image-itanium, linux-headers-itanium, linux-image-6.1.0-32-itanium-dbg, linux-image-itanium-dbg, linux-headers-6.1.0-32-mckinley, linux-image-6.1.0-32-mckinley, linux-image-mckinley, linux-headers-mckinley, linux-image-6.1.0-32-mckinley-dbg, linux-image-mckinley-dbg, kernel-image-6.1.0-32-m68k-di, nic-modules-6.1.0-32-m68k-di, nic-shared-modules-6.1.0-32-m68k-di, ppp-modules-6.1.0-32-m68k-di, pata-modules-6.1.0-32-m68k-di, cdrom-core-modules-6.1.0-32-m68k-di, scsi-core-modules-6.1.0-32-m68k-di, scsi-modules-6.1.0-32-m68k-di, loop-modules-6.1.0-32-m68k-di, btrfs-modules-6.1.0-32-m68k-di, ext4-modules-6.1.0-32-m68k-di, isofs-modules-6.1.0-32-m68k-di, fat-modules-6.1.0-32-m68k-di, hfs-modules-6.1.0-32-m68k-di, affs-modules-6.1.0-32-m68k-di, squashfs-modules-6.1.0-32-m68k-di, udf-modules-6.1.0-32-m68k-di, fuse-modules-6.1.0-32-m68k-di, md-modules-6.1.0-32-m68k-di, crc-modules-6.1.0-32-m68k-di, crypto-modules-6.1.0-32-m68k-di,\n ata-modules-6.1.0-32-m68k-di, nbd-modules-6.1.0-32-m68k-di, linux-headers-6.1.0-32-m68k, linux-image-6.1.0-32-m68k, linux-image-m68k, linux-headers-m68k, linux-image-6.1.0-32-m68k-dbg, linux-image-m68k-dbg, kernel-image-6.1.0-32-4kc-malta-di, nic-modules-6.1.0-32-4kc-malta-di, nic-wireless-modules-6.1.0-32-4kc-malta-di, nic-shared-modules-6.1.0-32-4kc-malta-di, usb-serial-modules-6.1.0-32-4kc-malta-di, ppp-modules-6.1.0-32-4kc-malta-di, pata-modules-6.1.0-32-4kc-malta-di, cdrom-core-modules-6.1.0-32-4kc-malta-di, firewire-core-modules-6.1.0-32-4kc-malta-di, scsi-core-modules-6.1.0-32-4kc-malta-di, scsi-modules-6.1.0-32-4kc-malta-di, scsi-nic-modules-6.1.0-32-4kc-malta-di, loop-modules-6.1.0-32-4kc-malta-di, btrfs-modules-6.1.0-32-4kc-malta-di, ext4-modules-6.1.0-32-4kc-malta-di, isofs-modules-6.1.0-32-4kc-malta-di, jfs-modules-6.1.0-32-4kc-malta-di, xfs-modules-6.1.0-32-4kc-malta-di, fat-modules-6.1.0-32-4kc-malta-di, affs-modules-6.1.0-32-4kc-malta-di,\n minix-modules-6.1.0-32-4kc-malta-di, nfs-modules-6.1.0-32-4kc-malta-di, squashfs-modules-6.1.0-32-4kc-malta-di, udf-modules-6.1.0-32-4kc-malta-di, fuse-modules-6.1.0-32-4kc-malta-di, f2fs-modules-6.1.0-32-4kc-malta-di, md-modules-6.1.0-32-4kc-malta-di, multipath-modules-6.1.0-32-4kc-malta-di, usb-modules-6.1.0-32-4kc-malta-di, usb-storage-modules-6.1.0-32-4kc-malta-di, fb-modules-6.1.0-32-4kc-malta-di, input-modules-6.1.0-32-4kc-malta-di, event-modules-6.1.0-32-4kc-malta-di, mouse-modules-6.1.0-32-4kc-malta-di, nic-usb-modules-6.1.0-32-4kc-malta-di, sata-modules-6.1.0-32-4kc-malta-di, crc-modules-6.1.0-32-4kc-malta-di, crypto-modules-6.1.0-32-4kc-malta-di, crypto-dm-modules-6.1.0-32-4kc-malta-di, ata-modules-6.1.0-32-4kc-malta-di, mmc-core-modules-6.1.0-32-4kc-malta-di, mmc-modules-6.1.0-32-4kc-malta-di, nbd-modules-6.1.0-32-4kc-malta-di, speakup-modules-6.1.0-32-4kc-malta-di, sound-modules-6.1.0-32-4kc-malta-di, kernel-image-6.1.0-32-mips32r2eb-di,\n nic-modules-6.1.0-32-mips32r2eb-di, nic-wireless-modules-6.1.0-32-mips32r2eb-di, nic-shared-modules-6.1.0-32-mips32r2eb-di, usb-serial-modules-6.1.0-32-mips32r2eb-di, ppp-modules-6.1.0-32-mips32r2eb-di, pata-modules-6.1.0-32-mips32r2eb-di, cdrom-core-modules-6.1.0-32-mips32r2eb-di, firewire-core-modules-6.1.0-32-mips32r2eb-di, scsi-core-modules-6.1.0-32-mips32r2eb-di, scsi-modules-6.1.0-32-mips32r2eb-di, scsi-nic-modules-6.1.0-32-mips32r2eb-di, loop-modules-6.1.0-32-mips32r2eb-di, btrfs-modules-6.1.0-32-mips32r2eb-di, ext4-modules-6.1.0-32-mips32r2eb-di, isofs-modules-6.1.0-32-mips32r2eb-di, jfs-modules-6.1.0-32-mips32r2eb-di, xfs-modules-6.1.0-32-mips32r2eb-di, fat-modules-6.1.0-32-mips32r2eb-di, affs-modules-6.1.0-32-mips32r2eb-di, minix-modules-6.1.0-32-mips32r2eb-di, nfs-modules-6.1.0-32-mips32r2eb-di, squashfs-modules-6.1.0-32-mips32r2eb-di, udf-modules-6.1.0-32-mips32r2eb-di, fuse-modules-6.1.0-32-mips32r2eb-di, f2fs-modules-6.1.0-32-mips32r2eb-di,\n md-modules-6.1.0-32-mips32r2eb-di, multipath-modules-6.1.0-32-mips32r2eb-di, usb-modules-6.1.0-32-mips32r2eb-di, usb-storage-modules-6.1.0-32-mips32r2eb-di, fb-modules-6.1.0-32-mips32r2eb-di, input-modules-6.1.0-32-mips32r2eb-di, event-modules-6.1.0-32-mips32r2eb-di, mouse-modules-6.1.0-32-mips32r2eb-di, nic-usb-modules-6.1.0-32-mips32r2eb-di, sata-modules-6.1.0-32-mips32r2eb-di, crc-modules-6.1.0-32-mips32r2eb-di, crypto-modules-6.1.0-32-mips32r2eb-di, crypto-dm-modules-6.1.0-32-mips32r2eb-di, ata-modules-6.1.0-32-mips32r2eb-di, mmc-core-modules-6.1.0-32-mips32r2eb-di, mmc-modules-6.1.0-32-mips32r2eb-di, nbd-modules-6.1.0-32-mips32r2eb-di, speakup-modules-6.1.0-32-mips32r2eb-di, sound-modules-6.1.0-32-mips32r2eb-di, kernel-image-6.1.0-32-octeon-di, nic-modules-6.1.0-32-octeon-di, nic-wireless-modules-6.1.0-32-octeon-di, nic-shared-modules-6.1.0-32-octeon-di, usb-serial-modules-6.1.0-32-octeon-di, ppp-modules-6.1.0-32-octeon-di, pata-modules-6.1.0-32-octeon-di,\n cdrom-core-modules-6.1.0-32-octeon-di, firewire-core-modules-6.1.0-32-octeon-di, scsi-core-modules-6.1.0-32-octeon-di, scsi-modules-6.1.0-32-octeon-di, scsi-nic-modules-6.1.0-32-octeon-di, loop-modules-6.1.0-32-octeon-di, btrfs-modules-6.1.0-32-octeon-di, ext4-modules-6.1.0-32-octeon-di, isofs-modules-6.1.0-32-octeon-di, jfs-modules-6.1.0-32-octeon-di, xfs-modules-6.1.0-32-octeon-di, fat-modules-6.1.0-32-octeon-di, affs-modules-6.1.0-32-octeon-di, minix-modules-6.1.0-32-octeon-di, nfs-modules-6.1.0-32-octeon-di, squashfs-modules-6.1.0-32-octeon-di, udf-modules-6.1.0-32-octeon-di, fuse-modules-6.1.0-32-octeon-di, f2fs-modules-6.1.0-32-octeon-di, md-modules-6.1.0-32-octeon-di, multipath-modules-6.1.0-32-octeon-di, usb-modules-6.1.0-32-octeon-di, usb-storage-modules-6.1.0-32-octeon-di, fb-modules-6.1.0-32-octeon-di, input-modules-6.1.0-32-octeon-di, event-modules-6.1.0-32-octeon-di, mouse-modules-6.1.0-32-octeon-di, nic-usb-modules-6.1.0-32-octeon-di,\n sata-modules-6.1.0-32-octeon-di, crc-modules-6.1.0-32-octeon-di, crypto-modules-6.1.0-32-octeon-di, crypto-dm-modules-6.1.0-32-octeon-di, ata-modules-6.1.0-32-octeon-di, mmc-core-modules-6.1.0-32-octeon-di, mmc-modules-6.1.0-32-octeon-di, nbd-modules-6.1.0-32-octeon-di, speakup-modules-6.1.0-32-octeon-di, sound-modules-6.1.0-32-octeon-di, linux-headers-6.1.0-32-4kc-malta, linux-image-6.1.0-32-4kc-malta, linux-image-4kc-malta, linux-headers-4kc-malta, linux-image-6.1.0-32-4kc-malta-dbg, linux-image-4kc-malta-dbg, linux-headers-6.1.0-32-mips32r2eb, linux-image-6.1.0-32-mips32r2eb, linux-image-mips32r2eb, linux-headers-mips32r2eb, linux-image-6.1.0-32-mips32r2eb-dbg, linux-image-mips32r2eb-dbg, linux-headers-6.1.0-32-octeon, linux-image-6.1.0-32-octeon, linux-image-octeon, linux-headers-octeon, linux-image-6.1.0-32-octeon-dbg, linux-image-octeon-dbg, kernel-image-6.1.0-32-5kc-malta-di, nic-modules-6.1.0-32-5kc-malta-di, nic-wireless-modules-6.1.0-32-5kc-malta-di,\n nic-shared-modules-6.1.0-32-5kc-malta-di, usb-serial-modules-6.1.0-32-5kc-malta-di, ppp-modules-6.1.0-32-5kc-malta-di, pata-modules-6.1.0-32-5kc-malta-di, cdrom-core-modules-6.1.0-32-5kc-malta-di, firewire-core-modules-6.1.0-32-5kc-malta-di, scsi-core-modules-6.1.0-32-5kc-malta-di, scsi-modules-6.1.0-32-5kc-malta-di, scsi-nic-modules-6.1.0-32-5kc-malta-di, loop-modules-6.1.0-32-5kc-malta-di, btrfs-modules-6.1.0-32-5kc-malta-di, ext4-modules-6.1.0-32-5kc-malta-di, isofs-modules-6.1.0-32-5kc-malta-di, jfs-modules-6.1.0-32-5kc-malta-di, xfs-modules-6.1.0-32-5kc-malta-di, fat-modules-6.1.0-32-5kc-malta-di, affs-modules-6.1.0-32-5kc-malta-di, minix-modules-6.1.0-32-5kc-malta-di, nfs-modules-6.1.0-32-5kc-malta-di, squashfs-modules-6.1.0-32-5kc-malta-di, udf-modules-6.1.0-32-5kc-malta-di, fuse-modules-6.1.0-32-5kc-malta-di, f2fs-modules-6.1.0-32-5kc-malta-di, md-modules-6.1.0-32-5kc-malta-di, multipath-modules-6.1.0-32-5kc-malta-di, usb-modules-6.1.0-32-5kc-malta-di,\n usb-storage-modules-6.1.0-32-5kc-malta-di, fb-modules-6.1.0-32-5kc-malta-di, input-modules-6.1.0-32-5kc-malta-di, event-modules-6.1.0-32-5kc-malta-di, mouse-modules-6.1.0-32-5kc-malta-di, nic-usb-modules-6.1.0-32-5kc-malta-di, sata-modules-6.1.0-32-5kc-malta-di, crc-modules-6.1.0-32-5kc-malta-di, crypto-modules-6.1.0-32-5kc-malta-di, crypto-dm-modules-6.1.0-32-5kc-malta-di, ata-modules-6.1.0-32-5kc-malta-di, mmc-core-modules-6.1.0-32-5kc-malta-di, mmc-modules-6.1.0-32-5kc-malta-di, nbd-modules-6.1.0-32-5kc-malta-di, speakup-modules-6.1.0-32-5kc-malta-di, sound-modules-6.1.0-32-5kc-malta-di, kernel-image-6.1.0-32-mips64r2eb-di, nic-modules-6.1.0-32-mips64r2eb-di, nic-wireless-modules-6.1.0-32-mips64r2eb-di, nic-shared-modules-6.1.0-32-mips64r2eb-di, usb-serial-modules-6.1.0-32-mips64r2eb-di, ppp-modules-6.1.0-32-mips64r2eb-di, pata-modules-6.1.0-32-mips64r2eb-di, cdrom-core-modules-6.1.0-32-mips64r2eb-di, firewire-core-modules-6.1.0-32-mips64r2eb-di,\n scsi-core-modules-6.1.0-32-mips64r2eb-di, scsi-modules-6.1.0-32-mips64r2eb-di, scsi-nic-modules-6.1.0-32-mips64r2eb-di, loop-modules-6.1.0-32-mips64r2eb-di, btrfs-modules-6.1.0-32-mips64r2eb-di, ext4-modules-6.1.0-32-mips64r2eb-di, isofs-modules-6.1.0-32-mips64r2eb-di, jfs-modules-6.1.0-32-mips64r2eb-di, xfs-modules-6.1.0-32-mips64r2eb-di, fat-modules-6.1.0-32-mips64r2eb-di, affs-modules-6.1.0-32-mips64r2eb-di, minix-modules-6.1.0-32-mips64r2eb-di, nfs-modules-6.1.0-32-mips64r2eb-di, squashfs-modules-6.1.0-32-mips64r2eb-di, udf-modules-6.1.0-32-mips64r2eb-di, fuse-modules-6.1.0-32-mips64r2eb-di, f2fs-modules-6.1.0-32-mips64r2eb-di, md-modules-6.1.0-32-mips64r2eb-di, multipath-modules-6.1.0-32-mips64r2eb-di, usb-modules-6.1.0-32-mips64r2eb-di, usb-storage-modules-6.1.0-32-mips64r2eb-di, fb-modules-6.1.0-32-mips64r2eb-di, input-modules-6.1.0-32-mips64r2eb-di, event-modules-6.1.0-32-mips64r2eb-di, mouse-modules-6.1.0-32-mips64r2eb-di,\n nic-usb-modules-6.1.0-32-mips64r2eb-di, sata-modules-6.1.0-32-mips64r2eb-di, crc-modules-6.1.0-32-mips64r2eb-di, crypto-modules-6.1.0-32-mips64r2eb-di, crypto-dm-modules-6.1.0-32-mips64r2eb-di, ata-modules-6.1.0-32-mips64r2eb-di, mmc-core-modules-6.1.0-32-mips64r2eb-di, mmc-modules-6.1.0-32-mips64r2eb-di, nbd-modules-6.1.0-32-mips64r2eb-di, speakup-modules-6.1.0-32-mips64r2eb-di, sound-modules-6.1.0-32-mips64r2eb-di, linux-headers-6.1.0-32-5kc-malta, linux-image-6.1.0-32-5kc-malta, linux-image-5kc-malta, linux-headers-5kc-malta, linux-image-6.1.0-32-5kc-malta-dbg, linux-image-5kc-malta-dbg, linux-headers-6.1.0-32-mips64r2eb, linux-image-6.1.0-32-mips64r2eb, linux-image-mips64r2eb, linux-headers-mips64r2eb, linux-image-6.1.0-32-mips64r2eb-dbg, linux-image-mips64r2eb-dbg, kernel-image-6.1.0-32-loongson-3-di, nic-modules-6.1.0-32-loongson-3-di, nic-wireless-modules-6.1.0-32-loongson-3-di, nic-shared-modules-6.1.0-32-loongson-3-di,\n usb-serial-modules-6.1.0-32-loongson-3-di, ppp-modules-6.1.0-32-loongson-3-di, pata-modules-6.1.0-32-loongson-3-di, cdrom-core-modules-6.1.0-32-loongson-3-di, firewire-core-modules-6.1.0-32-loongson-3-di, scsi-core-modules-6.1.0-32-loongson-3-di, scsi-modules-6.1.0-32-loongson-3-di, scsi-nic-modules-6.1.0-32-loongson-3-di, loop-modules-6.1.0-32-loongson-3-di, btrfs-modules-6.1.0-32-loongson-3-di, ext4-modules-6.1.0-32-loongson-3-di, isofs-modules-6.1.0-32-loongson-3-di, jfs-modules-6.1.0-32-loongson-3-di, xfs-modules-6.1.0-32-loongson-3-di, fat-modules-6.1.0-32-loongson-3-di, affs-modules-6.1.0-32-loongson-3-di, minix-modules-6.1.0-32-loongson-3-di, nfs-modules-6.1.0-32-loongson-3-di, squashfs-modules-6.1.0-32-loongson-3-di, udf-modules-6.1.0-32-loongson-3-di, fuse-modules-6.1.0-32-loongson-3-di, f2fs-modules-6.1.0-32-loongson-3-di, md-modules-6.1.0-32-loongson-3-di, multipath-modules-6.1.0-32-loongson-3-di, usb-modules-6.1.0-32-loongson-3-di,\n usb-storage-modules-6.1.0-32-loongson-3-di, fb-modules-6.1.0-32-loongson-3-di, input-modules-6.1.0-32-loongson-3-di, event-modules-6.1.0-32-loongson-3-di, mouse-modules-6.1.0-32-loongson-3-di, nic-usb-modules-6.1.0-32-loongson-3-di, sata-modules-6.1.0-32-loongson-3-di, crc-modules-6.1.0-32-loongson-3-di, crypto-modules-6.1.0-32-loongson-3-di, crypto-dm-modules-6.1.0-32-loongson-3-di, ata-modules-6.1.0-32-loongson-3-di, mmc-core-modules-6.1.0-32-loongson-3-di, mmc-modules-6.1.0-32-loongson-3-di, nbd-modules-6.1.0-32-loongson-3-di, speakup-modules-6.1.0-32-loongson-3-di, sound-modules-6.1.0-32-loongson-3-di, kernel-image-6.1.0-32-mips64r2el-di, nic-modules-6.1.0-32-mips64r2el-di, nic-wireless-modules-6.1.0-32-mips64r2el-di, nic-shared-modules-6.1.0-32-mips64r2el-di, usb-serial-modules-6.1.0-32-mips64r2el-di, ppp-modules-6.1.0-32-mips64r2el-di, pata-modules-6.1.0-32-mips64r2el-di, cdrom-core-modules-6.1.0-32-mips64r2el-di, firewire-core-modules-6.1.0-32-mips64r2el-di,\n scsi-core-modules-6.1.0-32-mips64r2el-di, scsi-modules-6.1.0-32-mips64r2el-di, scsi-nic-modules-6.1.0-32-mips64r2el-di, loop-modules-6.1.0-32-mips64r2el-di, btrfs-modules-6.1.0-32-mips64r2el-di, ext4-modules-6.1.0-32-mips64r2el-di, isofs-modules-6.1.0-32-mips64r2el-di, jfs-modules-6.1.0-32-mips64r2el-di, xfs-modules-6.1.0-32-mips64r2el-di, fat-modules-6.1.0-32-mips64r2el-di, affs-modules-6.1.0-32-mips64r2el-di, minix-modules-6.1.0-32-mips64r2el-di, nfs-modules-6.1.0-32-mips64r2el-di, squashfs-modules-6.1.0-32-mips64r2el-di, udf-modules-6.1.0-32-mips64r2el-di, fuse-modules-6.1.0-32-mips64r2el-di, f2fs-modules-6.1.0-32-mips64r2el-di, md-modules-6.1.0-32-mips64r2el-di, multipath-modules-6.1.0-32-mips64r2el-di, usb-modules-6.1.0-32-mips64r2el-di, usb-storage-modules-6.1.0-32-mips64r2el-di, fb-modules-6.1.0-32-mips64r2el-di, input-modules-6.1.0-32-mips64r2el-di, event-modules-6.1.0-32-mips64r2el-di, mouse-modules-6.1.0-32-mips64r2el-di,\n nic-usb-modules-6.1.0-32-mips64r2el-di, sata-modules-6.1.0-32-mips64r2el-di, crc-modules-6.1.0-32-mips64r2el-di, crypto-modules-6.1.0-32-mips64r2el-di, crypto-dm-modules-6.1.0-32-mips64r2el-di, ata-modules-6.1.0-32-mips64r2el-di, mmc-core-modules-6.1.0-32-mips64r2el-di, mmc-modules-6.1.0-32-mips64r2el-di, nbd-modules-6.1.0-32-mips64r2el-di, speakup-modules-6.1.0-32-mips64r2el-di, sound-modules-6.1.0-32-mips64r2el-di, linux-headers-6.1.0-32-mips64r2el, linux-image-6.1.0-32-mips64r2el, linux-image-mips64r2el, linux-headers-mips64r2el, linux-image-6.1.0-32-mips64r2el-dbg, linux-image-mips64r2el-dbg, linux-headers-6.1.0-32-loongson-3, linux-image-6.1.0-32-loongson-3, linux-image-loongson-3, linux-headers-loongson-3, linux-image-6.1.0-32-loongson-3-dbg, linux-image-loongson-3-dbg, kernel-image-6.1.0-32-mips64r6eb-di, nic-modules-6.1.0-32-mips64r6eb-di, nic-wireless-modules-6.1.0-32-mips64r6eb-di, nic-shared-modules-6.1.0-32-mips64r6eb-di,\n usb-serial-modules-6.1.0-32-mips64r6eb-di, ppp-modules-6.1.0-32-mips64r6eb-di, pata-modules-6.1.0-32-mips64r6eb-di, cdrom-core-modules-6.1.0-32-mips64r6eb-di, firewire-core-modules-6.1.0-32-mips64r6eb-di, scsi-core-modules-6.1.0-32-mips64r6eb-di, scsi-modules-6.1.0-32-mips64r6eb-di, scsi-nic-modules-6.1.0-32-mips64r6eb-di, loop-modules-6.1.0-32-mips64r6eb-di, btrfs-modules-6.1.0-32-mips64r6eb-di, ext4-modules-6.1.0-32-mips64r6eb-di, isofs-modules-6.1.0-32-mips64r6eb-di, jfs-modules-6.1.0-32-mips64r6eb-di, xfs-modules-6.1.0-32-mips64r6eb-di, fat-modules-6.1.0-32-mips64r6eb-di, affs-modules-6.1.0-32-mips64r6eb-di, minix-modules-6.1.0-32-mips64r6eb-di, nfs-modules-6.1.0-32-mips64r6eb-di, squashfs-modules-6.1.0-32-mips64r6eb-di, udf-modules-6.1.0-32-mips64r6eb-di, fuse-modules-6.1.0-32-mips64r6eb-di, f2fs-modules-6.1.0-32-mips64r6eb-di, md-modules-6.1.0-32-mips64r6eb-di, multipath-modules-6.1.0-32-mips64r6eb-di, usb-modules-6.1.0-32-mips64r6eb-di,\n usb-storage-modules-6.1.0-32-mips64r6eb-di, fb-modules-6.1.0-32-mips64r6eb-di, input-modules-6.1.0-32-mips64r6eb-di, event-modules-6.1.0-32-mips64r6eb-di, mouse-modules-6.1.0-32-mips64r6eb-di, nic-usb-modules-6.1.0-32-mips64r6eb-di, sata-modules-6.1.0-32-mips64r6eb-di, crc-modules-6.1.0-32-mips64r6eb-di, crypto-modules-6.1.0-32-mips64r6eb-di, crypto-dm-modules-6.1.0-32-mips64r6eb-di, ata-modules-6.1.0-32-mips64r6eb-di, mmc-core-modules-6.1.0-32-mips64r6eb-di, mmc-modules-6.1.0-32-mips64r6eb-di, nbd-modules-6.1.0-32-mips64r6eb-di, speakup-modules-6.1.0-32-mips64r6eb-di, sound-modules-6.1.0-32-mips64r6eb-di, linux-headers-6.1.0-32-mips64r6eb, linux-image-6.1.0-32-mips64r6eb, linux-image-mips64r6eb, linux-headers-mips64r6eb, linux-image-6.1.0-32-mips64r6eb-dbg, linux-image-mips64r6eb-dbg, kernel-image-6.1.0-32-mips64r6el-di, nic-modules-6.1.0-32-mips64r6el-di, nic-wireless-modules-6.1.0-32-mips64r6el-di, nic-shared-modules-6.1.0-32-mips64r6el-di,\n usb-serial-modules-6.1.0-32-mips64r6el-di, ppp-modules-6.1.0-32-mips64r6el-di, pata-modules-6.1.0-32-mips64r6el-di, cdrom-core-modules-6.1.0-32-mips64r6el-di, firewire-core-modules-6.1.0-32-mips64r6el-di, scsi-core-modules-6.1.0-32-mips64r6el-di, scsi-modules-6.1.0-32-mips64r6el-di, scsi-nic-modules-6.1.0-32-mips64r6el-di, loop-modules-6.1.0-32-mips64r6el-di, btrfs-modules-6.1.0-32-mips64r6el-di, ext4-modules-6.1.0-32-mips64r6el-di, isofs-modules-6.1.0-32-mips64r6el-di, jfs-modules-6.1.0-32-mips64r6el-di, xfs-modules-6.1.0-32-mips64r6el-di, fat-modules-6.1.0-32-mips64r6el-di, affs-modules-6.1.0-32-mips64r6el-di, minix-modules-6.1.0-32-mips64r6el-di, nfs-modules-6.1.0-32-mips64r6el-di, squashfs-modules-6.1.0-32-mips64r6el-di, udf-modules-6.1.0-32-mips64r6el-di, fuse-modules-6.1.0-32-mips64r6el-di, f2fs-modules-6.1.0-32-mips64r6el-di, md-modules-6.1.0-32-mips64r6el-di, multipath-modules-6.1.0-32-mips64r6el-di, usb-modules-6.1.0-32-mips64r6el-di,\n usb-storage-modules-6.1.0-32-mips64r6el-di, fb-modules-6.1.0-32-mips64r6el-di, input-modules-6.1.0-32-mips64r6el-di, event-modules-6.1.0-32-mips64r6el-di, mouse-modules-6.1.0-32-mips64r6el-di, nic-usb-modules-6.1.0-32-mips64r6el-di, sata-modules-6.1.0-32-mips64r6el-di, crc-modules-6.1.0-32-mips64r6el-di, crypto-modules-6.1.0-32-mips64r6el-di, crypto-dm-modules-6.1.0-32-mips64r6el-di, ata-modules-6.1.0-32-mips64r6el-di, mmc-core-modules-6.1.0-32-mips64r6el-di, mmc-modules-6.1.0-32-mips64r6el-di, nbd-modules-6.1.0-32-mips64r6el-di, speakup-modules-6.1.0-32-mips64r6el-di, sound-modules-6.1.0-32-mips64r6el-di, linux-headers-6.1.0-32-mips64r6el, linux-image-6.1.0-32-mips64r6el, linux-image-mips64r6el, linux-headers-mips64r6el, linux-image-6.1.0-32-mips64r6el-dbg, linux-image-mips64r6el-dbg, kernel-image-6.1.0-32-mips32r2el-di, nic-modules-6.1.0-32-mips32r2el-di, nic-wireless-modules-6.1.0-32-mips32r2el-di, nic-shared-modules-6.1.0-32-mips32r2el-di,\n usb-serial-modules-6.1.0-32-mips32r2el-di, ppp-modules-6.1.0-32-mips32r2el-di, pata-modules-6.1.0-32-mips32r2el-di, cdrom-core-modules-6.1.0-32-mips32r2el-di, firewire-core-modules-6.1.0-32-mips32r2el-di, scsi-core-modules-6.1.0-32-mips32r2el-di, scsi-modules-6.1.0-32-mips32r2el-di, scsi-nic-modules-6.1.0-32-mips32r2el-di, loop-modules-6.1.0-32-mips32r2el-di, btrfs-modules-6.1.0-32-mips32r2el-di, ext4-modules-6.1.0-32-mips32r2el-di, isofs-modules-6.1.0-32-mips32r2el-di, jfs-modules-6.1.0-32-mips32r2el-di, xfs-modules-6.1.0-32-mips32r2el-di, fat-modules-6.1.0-32-mips32r2el-di, affs-modules-6.1.0-32-mips32r2el-di, minix-modules-6.1.0-32-mips32r2el-di, nfs-modules-6.1.0-32-mips32r2el-di, squashfs-modules-6.1.0-32-mips32r2el-di, udf-modules-6.1.0-32-mips32r2el-di, fuse-modules-6.1.0-32-mips32r2el-di, f2fs-modules-6.1.0-32-mips32r2el-di, md-modules-6.1.0-32-mips32r2el-di, multipath-modules-6.1.0-32-mips32r2el-di, usb-modules-6.1.0-32-mips32r2el-di,\n usb-storage-modules-6.1.0-32-mips32r2el-di, fb-modules-6.1.0-32-mips32r2el-di, input-modules-6.1.0-32-mips32r2el-di, event-modules-6.1.0-32-mips32r2el-di, mouse-modules-6.1.0-32-mips32r2el-di, nic-usb-modules-6.1.0-32-mips32r2el-di, sata-modules-6.1.0-32-mips32r2el-di, crc-modules-6.1.0-32-mips32r2el-di, crypto-modules-6.1.0-32-mips32r2el-di, crypto-dm-modules-6.1.0-32-mips32r2el-di, ata-modules-6.1.0-32-mips32r2el-di, mmc-core-modules-6.1.0-32-mips32r2el-di, mmc-modules-6.1.0-32-mips32r2el-di, nbd-modules-6.1.0-32-mips32r2el-di, speakup-modules-6.1.0-32-mips32r2el-di, sound-modules-6.1.0-32-mips32r2el-di, linux-headers-6.1.0-32-mips32r2el, linux-image-6.1.0-32-mips32r2el, linux-image-mips32r2el, linux-headers-mips32r2el, linux-image-6.1.0-32-mips32r2el-dbg, linux-image-mips32r2el-dbg, kernel-image-6.1.0-32-mips32r6eb-di, nic-modules-6.1.0-32-mips32r6eb-di, nic-wireless-modules-6.1.0-32-mips32r6eb-di, nic-shared-modules-6.1.0-32-mips32r6eb-di,\n usb-serial-modules-6.1.0-32-mips32r6eb-di, ppp-modules-6.1.0-32-mips32r6eb-di, pata-modules-6.1.0-32-mips32r6eb-di, cdrom-core-modules-6.1.0-32-mips32r6eb-di, firewire-core-modules-6.1.0-32-mips32r6eb-di, scsi-core-modules-6.1.0-32-mips32r6eb-di, scsi-modules-6.1.0-32-mips32r6eb-di, scsi-nic-modules-6.1.0-32-mips32r6eb-di, loop-modules-6.1.0-32-mips32r6eb-di, btrfs-modules-6.1.0-32-mips32r6eb-di, ext4-modules-6.1.0-32-mips32r6eb-di, isofs-modules-6.1.0-32-mips32r6eb-di, jfs-modules-6.1.0-32-mips32r6eb-di, xfs-modules-6.1.0-32-mips32r6eb-di, fat-modules-6.1.0-32-mips32r6eb-di, affs-modules-6.1.0-32-mips32r6eb-di, minix-modules-6.1.0-32-mips32r6eb-di, nfs-modules-6.1.0-32-mips32r6eb-di, squashfs-modules-6.1.0-32-mips32r6eb-di, udf-modules-6.1.0-32-mips32r6eb-di, fuse-modules-6.1.0-32-mips32r6eb-di, f2fs-modules-6.1.0-32-mips32r6eb-di, md-modules-6.1.0-32-mips32r6eb-di, multipath-modules-6.1.0-32-mips32r6eb-di, usb-modules-6.1.0-32-mips32r6eb-di,\n usb-storage-modules-6.1.0-32-mips32r6eb-di, fb-modules-6.1.0-32-mips32r6eb-di, input-modules-6.1.0-32-mips32r6eb-di, event-modules-6.1.0-32-mips32r6eb-di, mouse-modules-6.1.0-32-mips32r6eb-di, nic-usb-modules-6.1.0-32-mips32r6eb-di, sata-modules-6.1.0-32-mips32r6eb-di, crc-modules-6.1.0-32-mips32r6eb-di, crypto-modules-6.1.0-32-mips32r6eb-di, crypto-dm-modules-6.1.0-32-mips32r6eb-di, ata-modules-6.1.0-32-mips32r6eb-di, mmc-core-modules-6.1.0-32-mips32r6eb-di, mmc-modules-6.1.0-32-mips32r6eb-di, nbd-modules-6.1.0-32-mips32r6eb-di, speakup-modules-6.1.0-32-mips32r6eb-di, sound-modules-6.1.0-32-mips32r6eb-di, linux-headers-6.1.0-32-mips32r6eb, linux-image-6.1.0-32-mips32r6eb, linux-image-mips32r6eb, linux-headers-mips32r6eb, linux-image-6.1.0-32-mips32r6eb-dbg, linux-image-mips32r6eb-dbg, kernel-image-6.1.0-32-mips32r6el-di, nic-modules-6.1.0-32-mips32r6el-di, nic-wireless-modules-6.1.0-32-mips32r6el-di, nic-shared-modules-6.1.0-32-mips32r6el-di,\n usb-serial-modules-6.1.0-32-mips32r6el-di, ppp-modules-6.1.0-32-mips32r6el-di, pata-modules-6.1.0-32-mips32r6el-di, cdrom-core-modules-6.1.0-32-mips32r6el-di, firewire-core-modules-6.1.0-32-mips32r6el-di, scsi-core-modules-6.1.0-32-mips32r6el-di, scsi-modules-6.1.0-32-mips32r6el-di, scsi-nic-modules-6.1.0-32-mips32r6el-di, loop-modules-6.1.0-32-mips32r6el-di, btrfs-modules-6.1.0-32-mips32r6el-di, ext4-modules-6.1.0-32-mips32r6el-di, isofs-modules-6.1.0-32-mips32r6el-di, jfs-modules-6.1.0-32-mips32r6el-di, xfs-modules-6.1.0-32-mips32r6el-di, fat-modules-6.1.0-32-mips32r6el-di, affs-modules-6.1.0-32-mips32r6el-di, minix-modules-6.1.0-32-mips32r6el-di, nfs-modules-6.1.0-32-mips32r6el-di, squashfs-modules-6.1.0-32-mips32r6el-di, udf-modules-6.1.0-32-mips32r6el-di, fuse-modules-6.1.0-32-mips32r6el-di, f2fs-modules-6.1.0-32-mips32r6el-di, md-modules-6.1.0-32-mips32r6el-di, multipath-modules-6.1.0-32-mips32r6el-di, usb-modules-6.1.0-32-mips32r6el-di,\n usb-storage-modules-6.1.0-32-mips32r6el-di, fb-modules-6.1.0-32-mips32r6el-di, input-modules-6.1.0-32-mips32r6el-di, event-modules-6.1.0-32-mips32r6el-di, mouse-modules-6.1.0-32-mips32r6el-di, nic-usb-modules-6.1.0-32-mips32r6el-di, sata-modules-6.1.0-32-mips32r6el-di, crc-modules-6.1.0-32-mips32r6el-di, crypto-modules-6.1.0-32-mips32r6el-di, crypto-dm-modules-6.1.0-32-mips32r6el-di, ata-modules-6.1.0-32-mips32r6el-di, mmc-core-modules-6.1.0-32-mips32r6el-di, mmc-modules-6.1.0-32-mips32r6el-di, nbd-modules-6.1.0-32-mips32r6el-di, speakup-modules-6.1.0-32-mips32r6el-di, sound-modules-6.1.0-32-mips32r6el-di, linux-headers-6.1.0-32-mips32r6el, linux-image-6.1.0-32-mips32r6el, linux-image-mips32r6el, linux-headers-mips32r6el, linux-image-6.1.0-32-mips32r6el-dbg, linux-image-mips32r6el-dbg, kernel-image-6.1.0-32-powerpc-di, nic-modules-6.1.0-32-powerpc-di, nic-wireless-modules-6.1.0-32-powerpc-di, nic-shared-modules-6.1.0-32-powerpc-di, serial-modules-6.1.0-32-powerpc-di,\n usb-serial-modules-6.1.0-32-powerpc-di, ppp-modules-6.1.0-32-powerpc-di, pata-modules-6.1.0-32-powerpc-di, cdrom-core-modules-6.1.0-32-powerpc-di, firewire-core-modules-6.1.0-32-powerpc-di, scsi-core-modules-6.1.0-32-powerpc-di, scsi-modules-6.1.0-32-powerpc-di, scsi-nic-modules-6.1.0-32-powerpc-di, loop-modules-6.1.0-32-powerpc-di, btrfs-modules-6.1.0-32-powerpc-di, ext4-modules-6.1.0-32-powerpc-di, isofs-modules-6.1.0-32-powerpc-di, jfs-modules-6.1.0-32-powerpc-di, xfs-modules-6.1.0-32-powerpc-di, fat-modules-6.1.0-32-powerpc-di, hfs-modules-6.1.0-32-powerpc-di, affs-modules-6.1.0-32-powerpc-di, squashfs-modules-6.1.0-32-powerpc-di, udf-modules-6.1.0-32-powerpc-di, fuse-modules-6.1.0-32-powerpc-di, f2fs-modules-6.1.0-32-powerpc-di, md-modules-6.1.0-32-powerpc-di, multipath-modules-6.1.0-32-powerpc-di, usb-modules-6.1.0-32-powerpc-di, usb-storage-modules-6.1.0-32-powerpc-di, pcmcia-storage-modules-6.1.0-32-powerpc-di, fb-modules-6.1.0-32-powerpc-di,\n input-modules-6.1.0-32-powerpc-di, event-modules-6.1.0-32-powerpc-di, mouse-modules-6.1.0-32-powerpc-di, nic-pcmcia-modules-6.1.0-32-powerpc-di, pcmcia-modules-6.1.0-32-powerpc-di, nic-usb-modules-6.1.0-32-powerpc-di, sata-modules-6.1.0-32-powerpc-di, crc-modules-6.1.0-32-powerpc-di, crypto-modules-6.1.0-32-powerpc-di, crypto-dm-modules-6.1.0-32-powerpc-di, ata-modules-6.1.0-32-powerpc-di, mmc-core-modules-6.1.0-32-powerpc-di, nbd-modules-6.1.0-32-powerpc-di, uinput-modules-6.1.0-32-powerpc-di, kernel-image-6.1.0-32-powerpc64-di, nic-modules-6.1.0-32-powerpc64-di, nic-wireless-modules-6.1.0-32-powerpc64-di, nic-shared-modules-6.1.0-32-powerpc64-di, serial-modules-6.1.0-32-powerpc64-di, usb-serial-modules-6.1.0-32-powerpc64-di, ppp-modules-6.1.0-32-powerpc64-di, pata-modules-6.1.0-32-powerpc64-di, cdrom-core-modules-6.1.0-32-powerpc64-di, firewire-core-modules-6.1.0-32-powerpc64-di, scsi-core-modules-6.1.0-32-powerpc64-di, scsi-modules-6.1.0-32-powerpc64-di,\n scsi-nic-modules-6.1.0-32-powerpc64-di, loop-modules-6.1.0-32-powerpc64-di, btrfs-modules-6.1.0-32-powerpc64-di, ext4-modules-6.1.0-32-powerpc64-di, isofs-modules-6.1.0-32-powerpc64-di, jfs-modules-6.1.0-32-powerpc64-di, xfs-modules-6.1.0-32-powerpc64-di, fat-modules-6.1.0-32-powerpc64-di, hfs-modules-6.1.0-32-powerpc64-di, affs-modules-6.1.0-32-powerpc64-di, squashfs-modules-6.1.0-32-powerpc64-di, udf-modules-6.1.0-32-powerpc64-di, fuse-modules-6.1.0-32-powerpc64-di, f2fs-modules-6.1.0-32-powerpc64-di, md-modules-6.1.0-32-powerpc64-di, multipath-modules-6.1.0-32-powerpc64-di, usb-modules-6.1.0-32-powerpc64-di, usb-storage-modules-6.1.0-32-powerpc64-di, pcmcia-storage-modules-6.1.0-32-powerpc64-di, fb-modules-6.1.0-32-powerpc64-di, input-modules-6.1.0-32-powerpc64-di, event-modules-6.1.0-32-powerpc64-di, mouse-modules-6.1.0-32-powerpc64-di, nic-pcmcia-modules-6.1.0-32-powerpc64-di, pcmcia-modules-6.1.0-32-powerpc64-di, nic-usb-modules-6.1.0-32-powerpc64-di,\n sata-modules-6.1.0-32-powerpc64-di, i2c-modules-6.1.0-32-powerpc64-di, crc-modules-6.1.0-32-powerpc64-di, crypto-modules-6.1.0-32-powerpc64-di, crypto-dm-modules-6.1.0-32-powerpc64-di, ata-modules-6.1.0-32-powerpc64-di, mmc-core-modules-6.1.0-32-powerpc64-di, nbd-modules-6.1.0-32-powerpc64-di, uinput-modules-6.1.0-32-powerpc64-di, mtd-core-modules-6.1.0-32-powerpc64-di, hypervisor-modules-6.1.0-32-powerpc64-di, fancontrol-modules-6.1.0-32-powerpc64-di, linux-headers-6.1.0-32-powerpc, linux-image-6.1.0-32-powerpc, linux-image-powerpc, linux-headers-powerpc, linux-image-6.1.0-32-powerpc-dbg, linux-image-powerpc-dbg, linux-headers-6.1.0-32-powerpc-smp, linux-image-6.1.0-32-powerpc-smp, linux-image-powerpc-smp, linux-headers-powerpc-smp, linux-image-6.1.0-32-powerpc-smp-dbg, linux-image-powerpc-smp-dbg, linux-headers-6.1.0-32-powerpc64, linux-image-6.1.0-32-powerpc64, linux-image-powerpc64, linux-headers-powerpc64, linux-image-6.1.0-32-powerpc64-dbg,\n linux-image-powerpc64-dbg, kernel-image-6.1.0-32-powerpc64le-di, nic-modules-6.1.0-32-powerpc64le-di, nic-wireless-modules-6.1.0-32-powerpc64le-di, nic-shared-modules-6.1.0-32-powerpc64le-di, serial-modules-6.1.0-32-powerpc64le-di, usb-serial-modules-6.1.0-32-powerpc64le-di, ppp-modules-6.1.0-32-powerpc64le-di, cdrom-core-modules-6.1.0-32-powerpc64le-di, firewire-core-modules-6.1.0-32-powerpc64le-di, scsi-core-modules-6.1.0-32-powerpc64le-di, scsi-modules-6.1.0-32-powerpc64le-di, scsi-nic-modules-6.1.0-32-powerpc64le-di, loop-modules-6.1.0-32-powerpc64le-di, btrfs-modules-6.1.0-32-powerpc64le-di, ext4-modules-6.1.0-32-powerpc64le-di, isofs-modules-6.1.0-32-powerpc64le-di, jfs-modules-6.1.0-32-powerpc64le-di, xfs-modules-6.1.0-32-powerpc64le-di, fat-modules-6.1.0-32-powerpc64le-di, squashfs-modules-6.1.0-32-powerpc64le-di, udf-modules-6.1.0-32-powerpc64le-di, fuse-modules-6.1.0-32-powerpc64le-di, f2fs-modules-6.1.0-32-powerpc64le-di,\n md-modules-6.1.0-32-powerpc64le-di, multipath-modules-6.1.0-32-powerpc64le-di, usb-modules-6.1.0-32-powerpc64le-di, usb-storage-modules-6.1.0-32-powerpc64le-di, fb-modules-6.1.0-32-powerpc64le-di, input-modules-6.1.0-32-powerpc64le-di, event-modules-6.1.0-32-powerpc64le-di, mouse-modules-6.1.0-32-powerpc64le-di, nic-usb-modules-6.1.0-32-powerpc64le-di, sata-modules-6.1.0-32-powerpc64le-di, i2c-modules-6.1.0-32-powerpc64le-di, crc-modules-6.1.0-32-powerpc64le-di, crypto-modules-6.1.0-32-powerpc64le-di, crypto-dm-modules-6.1.0-32-powerpc64le-di, ata-modules-6.1.0-32-powerpc64le-di, nbd-modules-6.1.0-32-powerpc64le-di, uinput-modules-6.1.0-32-powerpc64le-di, mtd-core-modules-6.1.0-32-powerpc64le-di, hypervisor-modules-6.1.0-32-powerpc64le-di, fancontrol-modules-6.1.0-32-powerpc64le-di, linux-headers-6.1.0-32-powerpc64le, linux-image-6.1.0-32-powerpc64le, linux-image-powerpc64le, linux-headers-powerpc64le, linux-image-6.1.0-32-powerpc64le-dbg,\n linux-image-powerpc64le-dbg, kernel-image-6.1.0-32-riscv64-di, nic-modules-6.1.0-32-riscv64-di, nic-wireless-modules-6.1.0-32-riscv64-di, nic-shared-modules-6.1.0-32-riscv64-di, usb-serial-modules-6.1.0-32-riscv64-di, ppp-modules-6.1.0-32-riscv64-di, pata-modules-6.1.0-32-riscv64-di, cdrom-core-modules-6.1.0-32-riscv64-di, scsi-core-modules-6.1.0-32-riscv64-di, scsi-modules-6.1.0-32-riscv64-di, scsi-nic-modules-6.1.0-32-riscv64-di, loop-modules-6.1.0-32-riscv64-di, btrfs-modules-6.1.0-32-riscv64-di, ext4-modules-6.1.0-32-riscv64-di, isofs-modules-6.1.0-32-riscv64-di, jfs-modules-6.1.0-32-riscv64-di, fat-modules-6.1.0-32-riscv64-di, squashfs-modules-6.1.0-32-riscv64-di, udf-modules-6.1.0-32-riscv64-di, fuse-modules-6.1.0-32-riscv64-di, f2fs-modules-6.1.0-32-riscv64-di, md-modules-6.1.0-32-riscv64-di, multipath-modules-6.1.0-32-riscv64-di, usb-modules-6.1.0-32-riscv64-di, usb-storage-modules-6.1.0-32-riscv64-di, fb-modules-6.1.0-32-riscv64-di,\n input-modules-6.1.0-32-riscv64-di, event-modules-6.1.0-32-riscv64-di, nic-usb-modules-6.1.0-32-riscv64-di, sata-modules-6.1.0-32-riscv64-di, i2c-modules-6.1.0-32-riscv64-di, crc-modules-6.1.0-32-riscv64-di, crypto-modules-6.1.0-32-riscv64-di, crypto-dm-modules-6.1.0-32-riscv64-di, ata-modules-6.1.0-32-riscv64-di, mmc-core-modules-6.1.0-32-riscv64-di, mmc-modules-6.1.0-32-riscv64-di, nbd-modules-6.1.0-32-riscv64-di, mtd-modules-6.1.0-32-riscv64-di, mtd-core-modules-6.1.0-32-riscv64-di, linux-headers-6.1.0-32-riscv64, linux-image-6.1.0-32-riscv64, linux-image-riscv64, linux-headers-riscv64, linux-image-6.1.0-32-riscv64-dbg, linux-image-riscv64-dbg, kernel-image-6.1.0-32-s390x-di, nic-modules-6.1.0-32-s390x-di, cdrom-core-modules-6.1.0-32-s390x-di, scsi-core-modules-6.1.0-32-s390x-di, scsi-modules-6.1.0-32-s390x-di, loop-modules-6.1.0-32-s390x-di, btrfs-modules-6.1.0-32-s390x-di, ext4-modules-6.1.0-32-s390x-di, isofs-modules-6.1.0-32-s390x-di,\n xfs-modules-6.1.0-32-s390x-di, fat-modules-6.1.0-32-s390x-di, udf-modules-6.1.0-32-s390x-di, fuse-modules-6.1.0-32-s390x-di, f2fs-modules-6.1.0-32-s390x-di, md-modules-6.1.0-32-s390x-di, multipath-modules-6.1.0-32-s390x-di, crc-modules-6.1.0-32-s390x-di, crypto-modules-6.1.0-32-s390x-di, crypto-dm-modules-6.1.0-32-s390x-di, nbd-modules-6.1.0-32-s390x-di, mtd-core-modules-6.1.0-32-s390x-di, dasd-modules-6.1.0-32-s390x-di, dasd-extra-modules-6.1.0-32-s390x-di, linux-headers-6.1.0-32-s390x, linux-image-6.1.0-32-s390x, linux-image-s390x, linux-headers-s390x, linux-image-6.1.0-32-s390x-dbg, linux-image-s390x-dbg, kernel-image-6.1.0-32-sh7751r-di, nic-modules-6.1.0-32-sh7751r-di, nic-shared-modules-6.1.0-32-sh7751r-di, usb-serial-modules-6.1.0-32-sh7751r-di, ppp-modules-6.1.0-32-sh7751r-di, pata-modules-6.1.0-32-sh7751r-di, cdrom-core-modules-6.1.0-32-sh7751r-di, firewire-core-modules-6.1.0-32-sh7751r-di, loop-modules-6.1.0-32-sh7751r-di, btrfs-modules-6.1.0-32-sh7751r-di,\n ext4-modules-6.1.0-32-sh7751r-di, isofs-modules-6.1.0-32-sh7751r-di, jfs-modules-6.1.0-32-sh7751r-di, xfs-modules-6.1.0-32-sh7751r-di, fat-modules-6.1.0-32-sh7751r-di, minix-modules-6.1.0-32-sh7751r-di, squashfs-modules-6.1.0-32-sh7751r-di, udf-modules-6.1.0-32-sh7751r-di, fuse-modules-6.1.0-32-sh7751r-di, f2fs-modules-6.1.0-32-sh7751r-di, md-modules-6.1.0-32-sh7751r-di, multipath-modules-6.1.0-32-sh7751r-di, usb-storage-modules-6.1.0-32-sh7751r-di, nic-usb-modules-6.1.0-32-sh7751r-di, sata-modules-6.1.0-32-sh7751r-di, i2c-modules-6.1.0-32-sh7751r-di, crc-modules-6.1.0-32-sh7751r-di, crypto-modules-6.1.0-32-sh7751r-di, crypto-dm-modules-6.1.0-32-sh7751r-di, nbd-modules-6.1.0-32-sh7751r-di, speakup-modules-6.1.0-32-sh7751r-di, sound-modules-6.1.0-32-sh7751r-di, kernel-image-6.1.0-32-sh7785lcr-di, nic-modules-6.1.0-32-sh7785lcr-di, nic-shared-modules-6.1.0-32-sh7785lcr-di, usb-serial-modules-6.1.0-32-sh7785lcr-di, ppp-modules-6.1.0-32-sh7785lcr-di,\n pata-modules-6.1.0-32-sh7785lcr-di, cdrom-core-modules-6.1.0-32-sh7785lcr-di, firewire-core-modules-6.1.0-32-sh7785lcr-di, loop-modules-6.1.0-32-sh7785lcr-di, btrfs-modules-6.1.0-32-sh7785lcr-di, ext4-modules-6.1.0-32-sh7785lcr-di, isofs-modules-6.1.0-32-sh7785lcr-di, jfs-modules-6.1.0-32-sh7785lcr-di, xfs-modules-6.1.0-32-sh7785lcr-di, fat-modules-6.1.0-32-sh7785lcr-di, minix-modules-6.1.0-32-sh7785lcr-di, squashfs-modules-6.1.0-32-sh7785lcr-di, udf-modules-6.1.0-32-sh7785lcr-di, fuse-modules-6.1.0-32-sh7785lcr-di, f2fs-modules-6.1.0-32-sh7785lcr-di, md-modules-6.1.0-32-sh7785lcr-di, multipath-modules-6.1.0-32-sh7785lcr-di, nic-usb-modules-6.1.0-32-sh7785lcr-di, sata-modules-6.1.0-32-sh7785lcr-di, crc-modules-6.1.0-32-sh7785lcr-di, crypto-modules-6.1.0-32-sh7785lcr-di, crypto-dm-modules-6.1.0-32-sh7785lcr-di, nbd-modules-6.1.0-32-sh7785lcr-di, speakup-modules-6.1.0-32-sh7785lcr-di, sound-modules-6.1.0-32-sh7785lcr-di, linux-headers-6.1.0-32-sh7751r,\n linux-image-6.1.0-32-sh7751r, linux-image-sh7751r, linux-headers-sh7751r, linux-image-6.1.0-32-sh7751r-dbg, linux-image-sh7751r-dbg, linux-headers-6.1.0-32-sh7785lcr, linux-image-6.1.0-32-sh7785lcr, linux-image-sh7785lcr, linux-headers-sh7785lcr, linux-image-6.1.0-32-sh7785lcr-dbg, linux-image-sh7785lcr-dbg, kernel-image-6.1.0-32-sparc64-di, nic-modules-6.1.0-32-sparc64-di, nic-shared-modules-6.1.0-32-sparc64-di, usb-serial-modules-6.1.0-32-sparc64-di, ppp-modules-6.1.0-32-sparc64-di, pata-modules-6.1.0-32-sparc64-di, cdrom-core-modules-6.1.0-32-sparc64-di, scsi-core-modules-6.1.0-32-sparc64-di, scsi-modules-6.1.0-32-sparc64-di, btrfs-modules-6.1.0-32-sparc64-di, ext4-modules-6.1.0-32-sparc64-di, isofs-modules-6.1.0-32-sparc64-di, jfs-modules-6.1.0-32-sparc64-di, ufs-modules-6.1.0-32-sparc64-di, xfs-modules-6.1.0-32-sparc64-di, fat-modules-6.1.0-32-sparc64-di, squashfs-modules-6.1.0-32-sparc64-di, udf-modules-6.1.0-32-sparc64-di, fuse-modules-6.1.0-32-sparc64-di,\n f2fs-modules-6.1.0-32-sparc64-di, md-modules-6.1.0-32-sparc64-di, multipath-modules-6.1.0-32-sparc64-di, usb-modules-6.1.0-32-sparc64-di, usb-storage-modules-6.1.0-32-sparc64-di, fb-modules-6.1.0-32-sparc64-di, input-modules-6.1.0-32-sparc64-di, nic-usb-modules-6.1.0-32-sparc64-di, sata-modules-6.1.0-32-sparc64-di, i2c-modules-6.1.0-32-sparc64-di, crc-modules-6.1.0-32-sparc64-di, crypto-modules-6.1.0-32-sparc64-di, crypto-dm-modules-6.1.0-32-sparc64-di, ata-modules-6.1.0-32-sparc64-di, nbd-modules-6.1.0-32-sparc64-di, linux-headers-6.1.0-32-sparc64, linux-image-6.1.0-32-sparc64, linux-image-sparc64, linux-headers-sparc64, linux-image-6.1.0-32-sparc64-dbg, linux-image-sparc64-dbg, linux-headers-6.1.0-32-sparc64-smp, linux-image-6.1.0-32-sparc64-smp, linux-image-sparc64-smp, linux-headers-sparc64-smp, linux-image-6.1.0-32-sparc64-smp-dbg, linux-image-sparc64-smp-dbg, linux-compiler-gcc-12-arm, linux-compiler-gcc-12-s390, linux-compiler-gcc-12-x86,\n linux-image-parisc64-smp,\n linux-image-parisc-smp\n\"\"\"\n\n\ndef test_binutils(binutils):\n    m = mock_open(read_data=binutils)\n\n    with patch(\"builtins.open\", m):\n        result = parse_sources_file(\"dummy\")\n\n    assert result == {\n        \"binutils\": {\n            \"binutils-for-host\",\n            \"binutils-for-build\",\n            \"binutils-ia64-linux-gnu-dbg\",\n            \"binutils-m68k-linux-gnu\",\n            \"binutils-mips64el-linux-gnuabin32-dbg\",\n            \"binutils-mipsisa64r6-linux-gnuabin32\",\n            \"binutils-mipsisa64r6el-linux-gnuabi64-dbg\",\n        }\n    }\n\n\ndef test_linux(linux):\n    m = mock_open(read_data=linux)\n\n    with patch(\"builtins.open\", m):\n        result = parse_sources_file(\"dummy\")\n\n    assert \"linux-headers-6.1.0-32-amd64\" in result[\"linux\"]\n    assert \"linux-headers-6.1.0-32-cloud-amd64\" in result[\"linux\"]\n"
  },
  {
    "path": "package_managers/debian/structs.py",
    "content": "from dataclasses import dataclass, field\n\n\n# structures\n@dataclass\nclass Maintainer:\n    name: str = field(default_factory=str)\n    email: str = field(default_factory=str)\n\n\n@dataclass\nclass File:\n    hash: str = field(default_factory=str)\n    size: int = field(default_factory=int)\n    filename: str = field(default_factory=str)\n\n\n@dataclass\nclass Depends:\n    package: str = field(default_factory=str)\n    semver: str = field(default_factory=str)\n\n\n@dataclass\nclass Tag:\n    name: str = field(default_factory=str)\n    value: str = field(default_factory=str)\n\n\n# this represents whatever we might get from Debian...either packages or sources\n# it's immaterial what it is, we just need to know how to parse it\n@dataclass\nclass DebianData:\n    # Package fields\n    package: str = field(default_factory=str)\n    source: str = field(default_factory=str)\n    version: str = field(default_factory=str)\n    installed_size: int = field(default_factory=int)\n    maintainer: Maintainer = field(default_factory=Maintainer)\n    architecture: str = field(default_factory=str)\n    description: str = field(default_factory=str)\n    homepage: str = field(default_factory=str)\n    description_md5: str = field(default_factory=str)\n    tag: str = field(default_factory=str)\n    section: str = field(default_factory=str)\n    priority: str = field(default_factory=str)\n    filename: str = field(default_factory=str)\n    size: int = field(default_factory=int)\n    md5sum: str = field(default_factory=str)\n    sha256: str = field(default_factory=str)\n\n    # Dependency fields\n    replaces: list[Depends] = field(default_factory=list)\n    provides: list[Depends] = field(default_factory=list)\n    depends: list[Depends] = field(default_factory=list)\n    pre_depends: list[Depends] = field(default_factory=list)\n    recommends: list[Depends] = field(default_factory=list)\n    suggests: list[Depends] = field(default_factory=list)\n    breaks: list[Depends] = field(default_factory=list)\n    conflicts: list[Depends] = field(default_factory=list)\n    build_depends: list[Depends] = field(default_factory=list)  # source only\n\n    # Source fields\n    binary: list[str] = field(default_factory=list)\n    uploaders: list[Maintainer] = field(default_factory=list)\n    standards_version: str = field(default_factory=str)\n    format: str = field(default_factory=str)\n    files: list[File] = field(default_factory=list)\n    vcs_browser: str = field(default_factory=str)\n    vcs_git: str = field(default_factory=str)\n    checksums_sha256: list[File] = field(default_factory=list)\n    package_list: list[str] = field(default_factory=list)\n    directory: str = field(default_factory=str)\n    testsuite: str = field(default_factory=str)\n    testsuite_triggers: str = field(default_factory=str)\n"
  },
  {
    "path": "package_managers/homebrew/Dockerfile",
    "content": "FROM ghcr.io/astral-sh/uv:python3.11-bookworm-slim\n\n# Copy everything from the root directory (build context)\nCOPY . .\n\n# Install core requirements using uv\nWORKDIR /core\nRUN uv pip install --system -r requirements.txt\n\nWORKDIR /\n\n# Run the main application\nCMD [\"python\", \"/package_managers/homebrew/main.py\"]\n"
  },
  {
    "path": "package_managers/homebrew/README.md",
    "content": "# Homebrew\n\nThe Homebrew service uses Homebrew's JSON API Documentation to build the Homebrew\ndata model, using a diff approach to build it out.\n\n## Getting Started\n\nTo just run the Homebrew service, you can use the following commands:\n\n```bash\ndocker compose build homebrew\ndocker compose run homebrew\n```\n\n## Pipeline Overview\n\nThe Homebrew pipeline consists of one main script: `main.py`. It fetches two things\nfrom CHAI:\n\n1. Homebrew's Graph, based on packages and legacy dependencies\n2. All the URLs in CHAI for Homebrew's clean URLs\n\nBased on that, it does a diff across each object for each package, and makes changes to\nCHAI accordingly.\n\n## Notes\n\n- Homebrew's dependencies are not just restricted to the `{build,test,...}_dependencies`\n  fields listed in the JSON APIs...it also uses some system level packages denoted in\n  `uses_from_macos`, and `variations` (for linux). The pipeline currently does NOT\n  consider those dependencies\n- This job ignores the versions table entirely, and instead populates the legacy\n  dependencies table, which maintains a package to package relationship\n- Versioned formulae (like `python`, `postgresql`) are ones where the Homebrew package\n  specifies a version. The pipeline considers these packages individual packages,\n  and so creates new records in the `packages` table.\n- The data source for Homebrew does not retrieve the analytics information that is\n  available via the individual JSON API endpoints for each package.\n"
  },
  {
    "path": "package_managers/homebrew/db.py",
    "content": "from core.config import Config\nfrom core.db import DB, CurrentURLs\nfrom core.structs import CurrentGraph\n\n\nclass HomebrewDB(DB):\n    def __init__(self, logger_name: str, config: Config):\n        super().__init__(logger_name)\n        self.config = config\n        self.set_current_graph()\n\n    def set_current_graph(self) -> None:\n        \"\"\"Get the Homebrew packages and dependencies\"\"\"\n        self.graph: CurrentGraph = self.current_graph(self.config.pm_config.pm_id)\n        self.logger.log(f\"Loaded {len(self.graph.package_map)} Homebrew packages\")\n\n    def set_current_urls(self, urls: set[str]) -> None:\n        \"\"\"Wrapper for setting current urls\"\"\"\n        self.urls: CurrentURLs = self.current_urls(urls)\n        self.logger.log(f\"Found {len(self.urls.url_map)} Homebrew URLs\")\n"
  },
  {
    "path": "package_managers/homebrew/diff.py",
    "content": "from datetime import datetime\nfrom uuid import UUID, uuid4\n\nfrom core.config import Config\nfrom core.logger import Logger\nfrom core.models import URL, LegacyDependency, Package, PackageURL\nfrom core.structs import Cache, URLKey\nfrom package_managers.homebrew.structs import Actual\n\n\nclass Diff:\n    def __init__(self, config: Config, caches: Cache):\n        self.config = config\n        self.now = datetime.now()\n        self.caches = caches\n        self.logger = Logger(\"homebrew_diff\")\n\n    def diff_pkg(self, pkg: Actual) -> tuple[UUID, Package | None, dict | None]:\n        \"\"\"\n        Checks if the given pkg is in the package_cache.\n\n        Returns:\n          - pkg_id: the id of the package\n          - package: If new, returns a new package object. If existing, returns None\n          - changes: a dictionary of changes\n        \"\"\"\n        self.logger.debug(f\"Diffing package: {pkg.formula}\")\n        pkg_id: UUID\n        if pkg.formula not in self.caches.package_map:\n            # new package\n            p = Package(\n                id=uuid4(),\n                derived_id=f\"homebrew/{pkg.formula}\",\n                name=pkg.formula,\n                package_manager_id=self.config.pm_config.pm_id,\n                import_id=pkg.formula,\n                readme=pkg.description,\n                created_at=self.now,\n                updated_at=self.now,\n            )\n            pkg_id: UUID = p.id\n            # no update payload, so that's empty\n            return pkg_id, p, {}\n        else:\n            p = self.caches.package_map[pkg.formula]\n            pkg_id = p.id\n            # check for changes\n            # right now, that's just the readme\n            if p.readme != pkg.description:\n                self.logger.debug(f\"Description changed for {pkg.formula}\")\n                return (\n                    pkg_id,\n                    None,\n                    {\"id\": p.id, \"readme\": pkg.description, \"updated_at\": self.now},\n                )\n            else:\n                # existing package, no change\n                return pkg_id, None, None\n\n    def diff_url(\n        self, pkg: Actual, new_urls: dict[tuple[str, UUID], URL]\n    ) -> dict[UUID, UUID]:\n        \"\"\"Given a package's URLs, returns the resolved URL or this specific formula\"\"\"\n        resolved_urls: dict[UUID, UUID] = {}\n\n        # we need to check if (a) URLs are in our cache, or (b) if we've already handled\n        # them before. if so, we should use that\n        urls = (\n            (pkg.homepage, self.config.url_types.homepage),\n            (pkg.source, self.config.url_types.source),\n            (pkg.repository, self.config.url_types.repository),\n        )\n\n        for url, url_type in urls:\n            # guard: no URL\n            if not url:\n                continue\n\n            url_key = URLKey(url, url_type)\n            resolved_url_id: UUID\n            if url_key in new_urls:\n                resolved_url_id = new_urls[url_key].id\n            elif url_key in self.caches.url_map:\n                resolved_url_id = self.caches.url_map[url_key].id\n            else:\n                self.logger.debug(f\"URL {url} for {url_type} is entirely new\")\n                new_url = URL(\n                    id=uuid4(),\n                    url=url,\n                    url_type_id=url_type,\n                    created_at=self.now,\n                    updated_at=self.now,\n                )\n                resolved_url_id = new_url.id\n\n                # NOTE: THIS IS SUPER IMPORTANT\n                # we're not just borrowing this value, we're mutating it as well\n                new_urls[url_key] = new_url\n\n            resolved_urls[url_type] = resolved_url_id\n\n        return resolved_urls\n\n    def diff_pkg_url(\n        self, pkg_id: UUID, resolved_urls: dict[UUID, UUID]\n    ) -> tuple[list[PackageURL], list[dict]]:\n        \"\"\"Takes in a package_id and resolved URLs from diff_url, and generates\n        new PackageURL objects as well as a list of changes to existing ones\n\n        Inputs:\n          - pkg_id: the id of the package\n          - resolved_urls: a map of url types to final URL ID for this pkg\n\n        Outputs:\n          - new_package_urls: a list of new PackageURL objects\n          - updated_package_urls: a list of changes to existing PackageURL objects\n\n        TODO:\n          - We're updating every single package_url entity, which takes time. We should\n            check if the latest URL has changed, and if so, only update that one.\n        \"\"\"\n        new_links: list[PackageURL] = []\n        updates: list[dict] = []\n\n        # what are the existing links?\n        existing: set[UUID] = {\n            pu.url_id for pu in self.caches.package_urls.get(pkg_id, set())\n        }\n\n        # for the correct URL type / URL for this package:\n        for _url_type, url_id in resolved_urls.items():\n            if url_id not in existing:\n                # new link!\n                new_links.append(\n                    PackageURL(\n                        id=uuid4(),\n                        package_id=pkg_id,\n                        url_id=url_id,\n                        created_at=self.now,\n                        updated_at=self.now,\n                    )\n                )\n            else:\n                # TODO: this should only happen for `latest` URLs\n                # here is an existing link between this URL and this package\n                # let's find it\n                existing_pu = next(\n                    pu for pu in self.caches.package_urls[pkg_id] if pu.url_id == url_id\n                )\n                existing_pu.updated_at = self.now\n                updates.append({\"id\": existing_pu.id, \"updated_at\": self.now})\n\n        return new_links, updates\n\n    def diff_deps(\n        self, pkg: Actual\n    ) -> tuple[list[LegacyDependency], list[LegacyDependency]]:\n        \"\"\"\n        Takes in a Homebrew formula and figures out what dependencies have changed. Also\n        uses the LegacyDependency table, because that is package to package.\n\n        Warnings:\n          - Updates show up as removed + new\n          - This is Homebrew specific, since LegacyDependency mandates uniqueness\n            from package_id -> dependency_id, but Homebrew allows duplicate\n            dependencies across multiple dependency types. So we've got a process helper\n            that handles this.\n\n        Returns:\n          - new_deps: a list of new dependencies\n          - removed_deps: a list of removed dependencies\n        \"\"\"\n        new_deps: list[LegacyDependency] = []\n        removed_deps: list[LegacyDependency] = []\n\n        # serialize the actual dependencies into a set of tuples\n        actual: set[tuple[UUID, UUID]] = set()\n        processed: set[str] = set()\n\n        def process(dep_names: list[str] | None, dep_type: UUID) -> None:\n            \"\"\"Helper to process dependencies of a given type\"\"\"\n            # guard: no dependencies\n            if not dep_names:\n                return\n\n            for name in dep_names:\n                # guard: no dependency name / empty name\n                if not name:\n                    continue\n\n                # means one dependency is build and test, for example\n                # see https://formulae.brew.sh/api/formula/abook.json for example\n                # gettext is both a build and runtime dependency\n                if name in processed:\n                    continue\n\n                dependency = self.caches.package_map.get(name)\n\n                # guard: no dependency\n                if not dependency:\n                    # TODO: handle this case, though it fixes itself on the next run\n                    self.logger.warn(f\"{name}, dep of {pkg.formula} is new\")\n                    continue\n\n                actual.add((dependency.id, dep_type))\n                processed.add(name)\n\n        # alright, let's do it\n        if hasattr(pkg, \"dependencies\"):\n            process(pkg.dependencies, self.config.dependency_types.runtime)\n        if hasattr(pkg, \"build_dependencies\"):\n            process(pkg.build_dependencies, self.config.dependency_types.build)\n        if hasattr(pkg, \"test_dependencies\"):\n            process(pkg.test_dependencies, self.config.dependency_types.test)\n        if hasattr(pkg, \"recommended_dependencies\"):\n            process(\n                pkg.recommended_dependencies, self.config.dependency_types.recommended\n            )\n        if hasattr(pkg, \"optional_dependencies\"):\n            process(pkg.optional_dependencies, self.config.dependency_types.optional)\n\n        # get the package ID for what we are working with\n        package = self.caches.package_map.get(pkg.formula)\n        if not package:\n            # TODO: handle this case, though it fixes itself on the next run\n            self.logger.warn(f\"New package {pkg.formula}, will grab its deps next time\")\n            return [], []\n\n        pkg_id: UUID = package.id\n\n        # now, we need to figure out what's new / removed\n        # we need:\n        # 1. something in that same structure as `actual`, to track what's in CHAI\n        existing: set[tuple[UUID, UUID]] = set()\n        # 2. set of LegacyDependency objects\n        legacy_links: set[LegacyDependency] = self.caches.dependencies.get(\n            pkg_id, set()\n        )\n        # 3. easy look up to get to legacy_links to go from 1 to 2\n        existing_legacy_map: dict[tuple[UUID, UUID], LegacyDependency] = {}\n\n        for legacy in legacy_links:\n            key = (legacy.dependency_id, legacy.dependency_type_id)\n            existing_legacy_map[key] = legacy\n            existing.add(key)\n\n        # calculate our diffs\n        added_tuples: set[tuple[UUID, UUID]] = actual - existing\n        removed_tuples: set[tuple[UUID, UUID]] = existing - actual\n\n        # convert these to LegacyDependency objects\n        for dep_id, type_id in added_tuples:\n            new_dep = LegacyDependency(\n                package_id=pkg_id,\n                dependency_id=dep_id,\n                dependency_type_id=type_id,\n                created_at=self.now,\n                updated_at=self.now,\n            )\n            new_deps.append(new_dep)\n\n        for dep_id, type_id in removed_tuples:\n            removed_dep = existing_legacy_map.get((dep_id, type_id))\n            if removed_dep:\n                removed_deps.append(removed_dep)\n\n        return new_deps, removed_deps\n"
  },
  {
    "path": "package_managers/homebrew/formulae.py",
    "content": "import re\nfrom typing import Any\n\nfrom permalint import normalize_url\nfrom requests import get\n\nfrom core.config import Config\nfrom core.fetcher import Data, Fetcher\nfrom core.logger import Logger\nfrom package_managers.homebrew.structs import Actual\n\nlogger = Logger(\"homebrew_formulae\")\n\n\nclass HomebrewFetcher(Fetcher):\n    def __init__(self, config: Config):\n        super().__init__(\n            name=\"homebrew\",\n            source=config.pm_config.source,\n            no_cache=config.exec_config.no_cache,\n            test=config.exec_config.test,\n        )\n\n    def fetch(self) -> list[Actual]:\n        \"\"\"Get the current state of Homebrew\"\"\"\n        response = get(self.source)\n        try:\n            response.raise_for_status()\n        except Exception as e:\n            logger.error(f\"Error fetching Homebrew formulae: {e}\")\n            raise e\n\n        # make json\n        data: list[dict[str, Any]] = response.json()\n\n        # prep results\n        results: list[Actual] = []\n\n        for formula in data:\n            # check if deprecated\n            # TODO: should we delete\n            deprecated = formula.get(\"deprecated\", False)\n            if deprecated:\n                continue\n\n            # create temp vars for stuff we transform...basically URL\n            homepage = normalize_url(formula[\"homepage\"])\n\n            # try urls.head.url, because that generally points to GitHub / git\n            # use urls.stable.url as a backstop\n            source = normalize_url(\n                formula[\"urls\"].get(\"head\", formula[\"urls\"][\"stable\"]).get(\"url\", \"\")\n            )\n\n            # collect github / gitlab repos\n            if re.search(r\"^github.com\", source) or re.search(r\"^gitlab.com\", source):\n                repository = source\n            else:\n                repository = None\n\n            # create the actual\n            actual = Actual(\n                formula=formula[\"name\"],\n                description=formula[\"desc\"],\n                license=formula[\"license\"],\n                homepage=homepage,\n                source=source,\n                repository=repository,\n                build_dependencies=formula[\"build_dependencies\"],\n                dependencies=formula[\"dependencies\"],\n                test_dependencies=formula[\"test_dependencies\"],\n                recommended_dependencies=formula[\"recommended_dependencies\"],\n                optional_dependencies=formula[\"optional_dependencies\"],\n                # TODO: anything else?\n            )\n\n            results.append(actual)\n\n        if self.no_cache:\n            logger.log(\"No cache, so not saving to file\")\n        else:\n            write = Data(\".\", \"homebrew_formulae.json\", data)\n            self.write([write])\n\n        return results\n"
  },
  {
    "path": "package_managers/homebrew/main.py",
    "content": "#! /usr/bin/env pkgx +python@3.11 uv run\n\nfrom datetime import datetime\nfrom uuid import UUID\n\nfrom core.config import Config, PackageManager\nfrom core.logger import Logger\nfrom core.models import URL, LegacyDependency, Package, PackageURL\nfrom core.structs import Cache, URLKey\nfrom package_managers.homebrew.db import HomebrewDB\nfrom package_managers.homebrew.diff import Diff\nfrom package_managers.homebrew.formulae import HomebrewFetcher\n\n\ndef main(config: Config, db: HomebrewDB) -> None:\n    \"\"\"A diff-based attempt at loading into CHAI\"\"\"\n\n    logger = Logger(\"homebrew_main\")\n    fetcher = HomebrewFetcher(config)\n    brew = fetcher.fetch()\n\n    # get the URLs & set that\n    brew_urls = {b.source for b in brew} | {b.homepage for b in brew}\n    db.set_current_urls(brew_urls)\n    logger.log(\"Set current URLs\")\n\n    # get the caches here\n    cache = Cache(\n        db.graph.package_map,\n        db.urls.url_map,\n        db.urls.package_urls,\n        db.graph.dependencies,\n    )\n\n    # total set of updates we'll make are:\n    new_packages: list[Package] = []\n    new_urls: dict[URLKey, URL] = {}  # we'll convert this later\n    new_package_urls: list[PackageURL] = []\n    updated_packages: list[dict[str, UUID | str | datetime]] = []\n    updated_package_urls: list[dict[str, UUID | datetime]] = []\n    new_deps: list[LegacyDependency] = []\n    removed_deps: list[LegacyDependency] = []\n\n    diff = Diff(config, cache)\n    for i, pkg in enumerate(brew):\n        pkg_id, pkg_obj, update_payload = diff.diff_pkg(pkg)\n        if pkg_obj:\n            logger.debug(f\"New package: {pkg_obj.name}\")\n            new_packages.append(pkg_obj)\n        if update_payload:\n            logger.debug(f\"Updated package: {update_payload['id']}\")\n            updated_packages.append(update_payload)\n\n        # NOTE: resolved urls is a map of url types to final URL ID for this pkg\n        # also, &new_urls gets passed in AND mutated\n        resolved_urls = diff.diff_url(pkg, new_urls)\n\n        # now, new package urls\n        new_links, updated_links = diff.diff_pkg_url(pkg_id, resolved_urls)\n        if new_links:\n            logger.debug(f\"New package URLs: {len(new_links)}\")\n            new_package_urls.extend(new_links)\n        if updated_links:\n            logger.debug(f\"Updated package URLs: {len(updated_links)}\")\n            updated_package_urls.extend(updated_links)\n\n        # finally, dependencies\n        new_dependencies, removed_dependencies = diff.diff_deps(pkg)\n        if new_dependencies:\n            logger.debug(f\"New dependencies: {len(new_dependencies)}\")\n            new_deps.extend(new_dependencies)\n        if removed_dependencies:\n            logger.debug(f\"Removed dependencies: {len(removed_dependencies)}\")\n            removed_deps.extend(removed_dependencies)\n\n        if config.exec_config.test and i > 100:\n            break\n\n    # final cleanup is to replace the new_urls map with a list\n    final_new_urls = list(new_urls.values())\n\n    # send to loader\n    db.ingest(\n        new_packages,\n        final_new_urls,\n        new_package_urls,\n        new_deps,\n        removed_deps,\n        updated_packages,\n        updated_package_urls,\n    )\n\n\nif __name__ == \"__main__\":\n    config = Config(PackageManager.HOMEBREW)\n    db = HomebrewDB(\"homebrew_db_main\", config)\n    main(config, db)\n"
  },
  {
    "path": "package_managers/homebrew/structs.py",
    "content": "from dataclasses import dataclass\n\n\n@dataclass\nclass Actual:\n    formula: str\n    description: str\n    license: str\n    homepage: str\n    source: str\n    repository: str | None\n    build_dependencies: list[str] | None\n    dependencies: list[str] | None\n    test_dependencies: list[str] | None\n    recommended_dependencies: list[str] | None\n    optional_dependencies: list[str] | None\n"
  },
  {
    "path": "package_managers/pkgx/Dockerfile",
    "content": "FROM ghcr.io/astral-sh/uv:python3.11-bookworm-slim\n\n# Copy everything\nCOPY . .\n\n# Install core requirements using uv\nWORKDIR /core\nRUN uv pip install --system -r requirements.txt\n\nWORKDIR /\n# Run the main application\nCMD [\"python\", \"/package_managers/pkgx/main.py\"] \n"
  },
  {
    "path": "package_managers/pkgx/db.py",
    "content": "#!/usr/bin/env pkgx uv run\n\nfrom core.config import Config\nfrom core.db import DB, CurrentURLs\nfrom core.structs import CurrentGraph\n\n\nclass PkgxDB(DB):\n    def __init__(self, logger_name: str, config: Config):\n        super().__init__(logger_name)\n        self.config = config\n\n    def set_current_graph(self) -> None:\n        \"\"\"Get the pkgx packages and dependencies\"\"\"\n        self.graph: CurrentGraph = self.current_graph(self.config.pm_config.pm_id)\n        self.logger.log(f\"Loaded {len(self.graph.package_map)} pkgx packages\")\n\n    def set_current_urls(self) -> None:\n        \"\"\"Getting all the URLs and Package URLs from the database\"\"\"\n        self.urls: CurrentURLs = self.all_current_urls()\n        self.logger.log(f\"Loaded {len(self.urls.url_map)} URLs\")\n"
  },
  {
    "path": "package_managers/pkgx/diff.py",
    "content": "#!/usr/bin/env pkgx uv run\n\nfrom datetime import datetime\nfrom uuid import UUID, uuid4\n\nfrom core.config import Config\nfrom core.logger import Logger\nfrom core.models import URL, LegacyDependency, Package, PackageURL\nfrom core.structs import Cache, URLKey\nfrom package_managers.pkgx.db import DB\nfrom package_managers.pkgx.parser import DependencyBlock, PkgxPackage\nfrom package_managers.pkgx.url import generate_chai_urls\n\n\nclass PkgxDiff:\n    def __init__(self, config: Config, caches: Cache, db: DB, logger: Logger):\n        self.config = config\n        self.now = datetime.now()\n        self.caches = caches\n        self.db = db\n        self.logger = logger\n\n    def diff_pkg(\n        self, import_id: str, pkg: PkgxPackage\n    ) -> tuple[UUID, Package | None, dict | None]:\n        \"\"\"\n        Checks if the given pkg is in the package_cache.\n\n        Returns:\n          - pkg_id: the id of the package\n          - package: If new, returns a new package object. If existing, returns None\n          - changes: a dictionary of changes\n        \"\"\"\n        self.logger.debug(f\"Diffing package: {import_id}\")\n\n        if import_id not in self.caches.package_map:\n            # new package\n            p = Package(\n                id=uuid4(),\n                derived_id=f\"pkgx/{import_id}\",\n                name=import_id,\n                package_manager_id=self.config.pm_config.pm_id,\n                import_id=import_id,\n                readme=\"\",  # NOTE: pkgx doesn't have a description field\n                created_at=self.now,\n                updated_at=self.now,\n            )\n            pkg_id: UUID = p.id\n            return pkg_id, p, {}\n        else:\n            # the package exists, but since pkgx doesn't maintain a readme or\n            # description field, we can just return\n            pkg_id = self.caches.package_map[import_id].id\n            return pkg_id, None, None\n\n    def diff_url(\n        self, import_id: str, pkg: PkgxPackage, new_urls: dict[URLKey, URL]\n    ) -> dict[UUID, UUID]:\n        \"\"\"Given a package's URLs, returns the resolved URL for this specific package\"\"\"\n        resolved_urls: dict[UUID, UUID] = {}\n\n        # Generate the URLs for this package\n        urls = generate_chai_urls(\n            self.config, self.db, import_id, pkg.distributable[0].url, self.logger\n        )\n\n        # Process each URL\n        for url_key in urls:\n            # guard: generate_chai_urls could be None for a url type\n            if url_key is None:\n                continue\n\n            resolved_url_id: UUID\n\n            if url_key in new_urls:\n                resolved_url_id = new_urls[url_key].id\n            elif url_key in self.caches.url_map:\n                resolved_url_id = self.caches.url_map[url_key].id\n            else:\n                self.logger.debug(\n                    f\"URL {url_key.url} as {url_key.url_type_id} is entirely new\"\n                )\n                new_url = URL(\n                    id=uuid4(),\n                    url=url_key.url,\n                    url_type_id=url_key.url_type_id,\n                    created_at=self.now,\n                    updated_at=self.now,\n                )\n                resolved_url_id = new_url.id\n                new_urls[url_key] = new_url\n\n            resolved_urls[url_key.url_type_id] = resolved_url_id\n\n        return resolved_urls\n\n    def diff_pkg_url(\n        self, pkg_id: UUID, resolved_urls: dict[UUID, UUID]\n    ) -> tuple[list[PackageURL], list[dict]]:\n        \"\"\"Takes in a package_id and resolved URLs from diff_url, and generates\n        new PackageURL objects as well as a list of changes to existing ones\"\"\"\n\n        new_links: list[PackageURL] = []\n        updates: list[dict] = []\n\n        # what are the existing links?\n        existing: set[UUID] = {\n            pu.url_id for pu in self.caches.package_urls.get(pkg_id, set())\n        }\n\n        # for each URL type/URL for this package:\n        for _url_type, url_id in resolved_urls.items():\n            if url_id not in existing:\n                # new link!\n                new_links.append(\n                    PackageURL(\n                        id=uuid4(),\n                        package_id=pkg_id,\n                        url_id=url_id,\n                        created_at=self.now,\n                        updated_at=self.now,\n                    )\n                )\n            else:\n                # existing link - update timestamp\n                existing_pu = next(\n                    pu for pu in self.caches.package_urls[pkg_id] if pu.url_id == url_id\n                )\n                existing_pu.updated_at = self.now\n                updates.append({\"id\": existing_pu.id, \"updated_at\": self.now})\n\n        return new_links, updates\n\n    def diff_deps(\n        self, import_id: str, pkg: PkgxPackage\n    ) -> tuple[list[LegacyDependency], list[LegacyDependency]]:\n        \"\"\"\n        Takes in a pkgx package and figures out what dependencies have changed.\n\n        The process is:\n           1. Build a view of what the package's dependencies are according to\n              the parsed pkgx data, using priority-based deduplication\n           2. Get this package's ID from CHAI\n           3. Get this package's existing dependencies from CHAI\n           4. Compare the two sets, and identify new and removed dependencies\n\n        Note: The database has a unique constraint on (package_id, dependency_id),\n        so if a package depends on the same dependency with multiple types (e.g.,\n        both runtime and build), we choose the highest priority type:\n        Runtime > Build > Test\n\n        Returns:\n          - new_deps: a list of new dependencies\n          - removed_deps: a list of removed dependencies\n        \"\"\"\n        new_deps: list[LegacyDependency] = []\n        removed_deps: list[LegacyDependency] = []\n\n        # First, collect all dependencies and deduplicate by dependency name\n        # choosing the highest priority dependency type for each unique dependency\n        dependency_map: dict[str, UUID] = {}\n\n        # Priority order: Runtime > Build > Test\n        priority_order = {\n            self.config.dependency_types.runtime: 1,\n            self.config.dependency_types.build: 2,\n            self.config.dependency_types.test: 3,\n        }\n\n        def process_deps(dependencies: list[DependencyBlock], dep_type: UUID) -> None:\n            \"\"\"Helper to process dependencies of a given type with priority\"\"\"\n            for dep in dependencies:\n                for dep_obj in dep.dependencies:\n                    if not dep_obj.name:\n                        continue\n\n                    # Get the dependency package from cache\n                    dependency = self.caches.package_map.get(dep_obj.name)\n                    if not dependency:\n                        self.logger.warn(\n                            f\"{dep_obj.name}, dep of {import_id} is not in cache\"\n                        )\n                        continue\n\n                    # If this dependency already exists in our map, choose higher priority\n                    if dep_obj.name in dependency_map:\n                        existing_priority = priority_order.get(\n                            dependency_map[dep_obj.name], 999\n                        )\n                        new_priority = priority_order.get(dep_type, 999)\n\n                        if (\n                            new_priority < existing_priority\n                        ):  # Lower number = higher priority\n                            old_type_id = dependency_map[dep_obj.name]\n                            dependency_map[dep_obj.name] = dep_type\n                            self.logger.debug(\n                                f\"Updated dependency type for {dep_obj.name} from \"\n                                f\"{old_type_id} to {dep_type} (higher priority)\"\n                            )\n                    else:\n                        dependency_map[dep_obj.name] = dep_type\n\n        # Process different types of dependencies with priority handling\n        process_deps(pkg.dependencies, self.config.dependency_types.runtime)\n        process_deps(pkg.build.dependencies, self.config.dependency_types.build)\n        process_deps(pkg.test.dependencies, self.config.dependency_types.test)\n\n        # Now build the actual set of dependencies with resolved types\n        actual: set[tuple[UUID, UUID]] = set()\n        for dep_name, dep_type in dependency_map.items():\n            dependency = self.caches.package_map.get(dep_name)\n            if dependency:  # Double-check it still exists\n                actual.add((dependency.id, dep_type))\n\n        # get the package ID for what we are working with\n        package = self.caches.package_map.get(import_id)\n        if not package:\n            self.logger.warn(f\"New package {import_id}, will grab its deps next time\")\n            return [], []\n\n        pkg_id: UUID = package.id\n\n        # what are its existing dependencies?\n        # specifically, existing dependencies IN THE SAME STRUCTURE as `actual`,\n        # so we can do an easy comparison\n        existing: set[tuple[UUID, UUID]] = {\n            (dep.dependency_id, dep.dependency_type_id)\n            for dep in self.caches.dependencies.get(pkg_id, set())\n        }\n\n        # we have two sets!\n        # actual minus existing = new_deps\n        # existing minus actual = removed_deps\n        new = actual - existing\n        removed = existing - actual\n\n        new_deps: list[LegacyDependency] = [\n            LegacyDependency(\n                package_id=pkg_id,\n                dependency_id=dep[0],\n                dependency_type_id=dep[1],\n                created_at=self.now,\n                updated_at=self.now,\n            )\n            for dep in new\n        ]\n\n        # get the existing legacy dependency, and add it to removed_deps\n        removed_deps: list[LegacyDependency] = []\n        cache_deps: set[LegacyDependency] = self.caches.dependencies.get(pkg_id, set())\n        for removed_dep_id, removed_dep_type in removed:\n            try:\n                existing_dep = next(\n                    dep\n                    for dep in cache_deps\n                    if dep.dependency_id == removed_dep_id\n                    and dep.dependency_type_id == removed_dep_type\n                )\n                removed_deps.append(existing_dep)\n            except StopIteration as exc:\n                cache_deps_str = \"\\n\".join(\n                    [\n                        f\"{dep.dependency_id} / {dep.dependency_type_id}\"\n                        for dep in cache_deps\n                    ]\n                )\n                raise ValueError(\n                    f\"Removing {removed_dep_id} / {removed_dep_type} for {pkg_id} but not in Cache: \\n{cache_deps_str}\"\n                ) from exc\n\n        return new_deps, removed_deps\n"
  },
  {
    "path": "package_managers/pkgx/loader.py",
    "content": "from sqlalchemy import select\nfrom sqlalchemy.dialects.postgresql import insert as pg_insert\n\nfrom core.config import Config\nfrom core.db import DB\nfrom core.models import (\n    LegacyDependency,\n    Package,\n)\nfrom package_managers.pkgx.parser import DependencyBlock\nfrom package_managers.pkgx.transformer import Cache\n\nBATCH_SIZE = 10000\n\n\n# NOTE: this is a separate instance of the db that is used in main\nclass PkgxLoader(DB):\n    def __init__(self, config: Config, data: dict[str, Cache]):\n        super().__init__(\"pkgx_db\")\n        self.config = config\n        self.data = data\n        self.debug = config.exec_config.test\n        self.logger.debug(f\"Initialized PkgxLoader with {len(data)} packages\")\n\n    def load_packages(self) -> None:\n        \"\"\"\n        Efficiently load all unique packages from the cache map into the database\n        using bulk insertion and returning inserted IDs.\n        \"\"\"\n        unique_packages = {}\n        for key, cache in self.data.items():\n            package = cache.package\n            if not isinstance(package, Package):\n                self.logger.error(\n                    f\"Invalid package object for key {key}: {type(package)}\"\n                )\n                continue\n            if package.derived_id not in unique_packages:\n                unique_packages[package.derived_id] = package\n\n        self.logger.log(f\"Found {len(unique_packages)} unique packages to insert\")\n\n        package_dicts = []\n        for pkg in unique_packages.values():\n            try:\n                package_dicts.append(pkg.to_dict())\n            except Exception as e:\n                self.logger.error(f\"Error in to_dict for package {pkg.name}: {e!s}\")\n\n        if not package_dicts:\n            self.logger.log(\"No packages to insert\")\n            return\n\n        with self.session() as session:\n            try:\n                stmt = pg_insert(Package).values(package_dicts).on_conflict_do_nothing()\n\n                # TODO: can just generate the UUID myself and provide it, so no need to\n                # return\n                stmt = stmt.returning(Package.id, Package.derived_id)\n                self.logger.log(\"About to execute insert statement for packages\")\n                result = session.execute(stmt)\n                inserted_packages = {row.derived_id: row.id for row in result}\n                session.commit()\n                self.logger.log(\n                    f\"Successfully inserted {len(inserted_packages)} packages\"\n                )\n\n                missing_derived_ids = [\n                    derived_id\n                    for derived_id in unique_packages\n                    if derived_id not in inserted_packages\n                ]\n                self.logger.log(\n                    f\"Fetching {len(missing_derived_ids)} IDs for conflicting packages\"\n                )\n\n                if missing_derived_ids:\n                    # Fetch missing IDs in batches\n                    for i in range(0, len(missing_derived_ids), BATCH_SIZE):\n                        batch_ids = missing_derived_ids[i : i + BATCH_SIZE]\n                        stmt = select(Package.id, Package.derived_id).where(\n                            Package.derived_id.in_(batch_ids)\n                        )\n                        result = session.execute(stmt)\n                        for row in result:\n                            inserted_packages[row.derived_id] = row.id\n\n                updated_count = 0\n                for cache in self.data.values():\n                    if cache.package.derived_id in inserted_packages:\n                        cache.package.id = inserted_packages[cache.package.derived_id]\n                        updated_count += 1\n                self.logger.log(f\"Updated cache with IDs for {updated_count} packages\")\n\n            except Exception as e:\n                self.logger.error(f\"Error inserting packages: {e!s}\")\n                self.logger.error(f\"Error type: {type(e)}\")\n                raise\n\n    def load_dependencies(self) -> None:\n        \"\"\"\n        Load all dependencies into the LegacyDependency table.\n        This requires package IDs to be loaded first.\n        # FIXME: legacy dependencies are package to package relationships.\n        # A migration is needed to move all dependencies to the LegacyDependency structure.\n        \"\"\"\n        self.logger.log(\"Starting to load legacy dependencies\")\n\n        legacy_dependency_dicts = []\n        missing = set()\n\n        for key, cache in self.data.items():\n            # Ensure the main package has an ID\n            if not hasattr(cache.package, \"id\") or cache.package.id is None:\n                self.logger.warn(\n                    f\"Package {key} has no ID when loading dependencies, skipping\"\n                )\n                continue\n            package_id = cache.package.id\n\n            # Helper to process a list of dependency names for a given type\n            def process_deps(\n                dep_blocks: list[DependencyBlock],\n                dep_type_id: str,\n                key=key,\n                package_id=package_id,\n            ):\n                for dep_block in dep_blocks:\n                    # TODO: do we need to use this?\n                    for dep in dep_block.dependencies:\n                        dep_name = dep.name\n                        dep_semver = dep.semver\n\n                        # Find the dependency package in our cache\n                        dep_cache = self.data.get(dep_name)\n                        if not dep_cache:\n                            missing.add(dep_name)\n                            continue\n\n                        # Checks: has to have an ID\n                        if (\n                            not hasattr(dep_cache.package, \"id\")\n                            or dep_cache.package.id is None\n                        ):\n                            self.logger.warn(\n                                f\"Dependency package '{dep_name}' has no ID, skipping linkage for '{key}'\"\n                            )\n                            continue\n                        dependency_id = dep_cache.package.id\n\n                        # Append data for bulk insert\n                        legacy_dependency_dicts.append(\n                            {\n                                \"package_id\": package_id,\n                                \"dependency_id\": dependency_id,\n                                \"dependency_type_id\": dep_type_id,\n                                \"semver_range\": dep_semver,\n                            }\n                        )\n\n            # Process each dependency type\n            process_deps(cache.dependencies.build, self.config.dependency_types.build)\n            process_deps(cache.dependencies.test, self.config.dependency_types.test)\n            process_deps(\n                cache.dependencies.dependencies, self.config.dependency_types.runtime\n            )\n\n        self.logger.log(\n            f\"Found {len(legacy_dependency_dicts)} legacy dependencies to insert\"\n        )\n\n        if missing:\n            self.logger.warn(f\"{len(missing)} pkgs are deps, but have no pkgx.yaml\")\n            self.logger.warn(f\"Missing pkgs: {missing}\")\n\n        if not legacy_dependency_dicts:\n            self.logger.log(\"No legacy dependencies to insert\")\n            return\n\n        # Bulk insert legacy dependencies\n        with self.session() as session:\n            try:\n                for i in range(0, len(legacy_dependency_dicts), BATCH_SIZE):\n                    batch = legacy_dependency_dicts[i : i + BATCH_SIZE]\n                    self.logger.log(\n                        f\"Processing LegacyDependency batch {i // BATCH_SIZE + 1}/{(len(legacy_dependency_dicts) - 1) // BATCH_SIZE + 1} ({len(batch)} links)\"\n                    )\n                    stmt = (\n                        pg_insert(LegacyDependency)\n                        .values(batch)\n                        .on_conflict_do_nothing()\n                    )\n                    session.execute(stmt)\n                session.commit()\n                self.logger.log(\"Successfully inserted all pkgx dependencies\")\n\n            except Exception as e:\n                self.logger.error(f\"Error inserting legacy dependencies: {e!s}\")\n                self.logger.error(f\"Error type: {type(e)}\")\n                raise\n"
  },
  {
    "path": "package_managers/pkgx/main.py",
    "content": "#!/usr/bin/env pkgx +python@3.11 uv run\n\nimport os\nimport time\nfrom datetime import datetime\nfrom uuid import UUID\n\nfrom core.config import Config, PackageManager\nfrom core.fetcher import GitFetcher\nfrom core.logger import Logger\nfrom core.models import URL, LegacyDependency, Package, PackageURL\nfrom core.scheduler import Scheduler\nfrom core.structs import Cache, URLKey\nfrom package_managers.pkgx.db import PkgxDB\nfrom package_managers.pkgx.diff import PkgxDiff\nfrom package_managers.pkgx.parser import PkgxParser\n\nlogger = Logger(\"pkgx\")\n\nSCHEDULER_ENABLED = os.getenv(\"ENABLE_SCHEDULER\", \"true\").lower() == \"true\"\nBATCH_SIZE = 500\nPROJECTS_DIR = \"projects\"\nPACKAGE_FILE = \"package.yml\"\n\n\ndef fetch(config: Config) -> GitFetcher:\n    should_fetch = config.exec_config.fetch\n    fetcher = GitFetcher(\n        \"pkgx\",\n        config.pm_config.source,\n        config.exec_config.no_cache,\n        config.exec_config.test,\n    )\n\n    if should_fetch:\n        logger.debug(\"Starting Pkgx package fetch\")\n        fetcher.fetch()\n    else:  # symlink would still be updated\n        logger.log(\"Fetching disabled, skipping fetch\")\n\n    # if no_cache is on, we'll delete stuff from here\n    return fetcher\n\n\ndef run_pipeline(config: Config, db: PkgxDB):\n    \"\"\"A diff-based approach to loading pkgx data into CHAI\"\"\"\n\n    fetcher = fetch(config)\n    output_dir = f\"{fetcher.output}/latest\"\n\n    # Parse all packages\n    pkgx_parser = PkgxParser(output_dir)\n    packages = list(pkgx_parser.parse_packages())\n\n    logger.log(f\"Parsed {len(packages)} packages\")\n\n    # Set up cache\n    db.set_current_graph()\n    db.set_current_urls()\n    logger.log(\"Set current URLs\")\n\n    # Build cache for differential loading\n    cache = Cache(\n        db.graph.package_map,\n        db.urls.url_map,\n        db.urls.package_urls,\n        db.graph.dependencies,\n    )\n\n    # Initialize differential loading collections\n    new_packages: list[Package] = []\n    new_urls: dict[URLKey, URL] = {}\n    new_package_urls: list[PackageURL] = []\n    updated_packages: list[dict[str, UUID | str | datetime]] = []\n    updated_package_urls: list[dict[str, UUID | datetime]] = []\n    new_deps: list[LegacyDependency] = []\n    removed_deps: list[LegacyDependency] = []\n\n    # Create diff processor\n    diff = PkgxDiff(config, cache, db, logger)\n\n    # Process each package\n    for i, (pkg_data, import_id) in enumerate(packages):\n        # Diff the package\n        pkg_id, pkg_obj, update_payload = diff.diff_pkg(import_id, pkg_data)\n\n        if pkg_obj:\n            logger.debug(f\"New package: {pkg_obj.name}\")\n            new_packages.append(pkg_obj)\n        if update_payload:\n            logger.debug(f\"Updated package: {update_payload['id']}\")\n            updated_packages.append(update_payload)\n\n        # Diff URLs (resolved_urls is map of url types to final URL ID)\n        resolved_urls = diff.diff_url(import_id, pkg_data, new_urls)\n\n        # Diff package URLs\n        new_links, updated_links = diff.diff_pkg_url(pkg_id, resolved_urls)\n        if new_links:\n            logger.debug(f\"New package URLs: {len(new_links)}\")\n            new_package_urls.extend(new_links)\n        if updated_links:\n            updated_package_urls.extend(updated_links)\n\n        # Diff dependencies\n        new_dependencies, removed_dependencies = diff.diff_deps(import_id, pkg_data)\n        if new_dependencies:\n            logger.debug(f\"New dependencies: {len(new_dependencies)}\")\n            new_deps.extend(new_dependencies)\n        if removed_dependencies:\n            logger.debug(f\"Removed dependencies: {len(removed_dependencies)}\")\n            removed_deps.extend(removed_dependencies)\n\n        if config.exec_config.test and i > 10:\n            break\n\n    # Convert new_urls dict to list for ingestion\n    final_new_urls = list(new_urls.values())\n\n    # Ingest all diffs\n    db.ingest(\n        new_packages,\n        final_new_urls,\n        new_package_urls,\n        new_deps,\n        removed_deps,\n        updated_packages,\n        updated_package_urls,\n    )\n\n    if config.exec_config.no_cache:\n        fetcher.cleanup()\n\n\ndef main():\n    logger.log(\"Initializing Pkgx package manager\")\n    config = Config(PackageManager.PKGX)\n    db = PkgxDB(\"pkgx_main_db_logger\", config)\n    logger.debug(f\"Using config: {config}\")\n\n    if SCHEDULER_ENABLED:\n        logger.log(\"Scheduler enabled. Starting schedule.\")\n        scheduler = Scheduler(\"pkgx\")\n        scheduler.start(run_pipeline, config)\n\n        # run immediately as well when scheduling\n        scheduler.run_now(run_pipeline, config, db)\n\n        # keep the main thread alive for scheduler\n        try:\n            while True:\n                time.sleep(3600)\n        except KeyboardInterrupt:\n            scheduler.stop()\n            logger.log(\"Scheduler stopped.\")\n    else:\n        logger.log(\"Scheduler disabled. Running pipeline once.\")\n        run_pipeline(config, db)\n        logger.log(\"Pipeline finished.\")\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "package_managers/pkgx/parser.py",
    "content": "from collections.abc import Iterator\nfrom dataclasses import dataclass, field\nfrom pathlib import Path\nfrom typing import Any\n\nimport yaml\n\nfrom core.logger import Logger\nfrom core.utils import convert_keys_to_snake_case\n\nlogger = Logger(\"pkgx\")\nPROJECTS_DIR = \"projects\"\nPACKAGE_FILE = \"package.yml\"\n\n# IMPORTANT:\n# the package.yml maintains a warnings list, which sometimes contain \"vendored\"\n# this correlates to Homebrew's casks, and CHAI ignores them\n\n\n# structures\n# this enables everything, but we don't need all of it right now\n@dataclass\nclass Distributable:\n    url: str\n    strip_components: int | None = field(default=None)\n    ref: str | None = field(default=None)\n    sig: str | None = field(default=None)\n    sha: str | None = field(default=None)\n\n\n@dataclass\nclass Version:\n    github: str | None = field(default=None)  # (user)?(/tags/releases)\n    gitlab: str | None = field(default=None)  # (user|project)?(/tags/releases)\n    url: str | None = field(default=None)  # for non github projects\n    match: str | None = field(default=None)  # regex to match the version\n    strip: str | None = field(default=None)  # regex to strip the version\n    ignore: str | None = field(default=None)  # regex to ignore the version\n    versions: list[str] | None = field(default=None)  # list of versions\n    npm: str | None = field(default=None)  # npm package name\n    transform: str | None = field(default=None)  # regex to transform the version\n    stripe: str | None = field(default=None)  # not sure what this is\n\n\n@dataclass\nclass Dependency:\n    name: str\n    semver: str\n\n\n@dataclass\nclass EnvironmentVariable:\n    name: str\n    value: str | list[str]\n\n\n@dataclass\nclass DependencyBlock:\n    platform: str  # 'all', 'linux', 'darwin', etc.\n    dependencies: list[Dependency]\n\n\n@dataclass\nclass Build:\n    script: str\n    dependencies: list[DependencyBlock] = field(default_factory=list)\n    env: list[EnvironmentVariable] = field(default_factory=list)\n    working_directory: str | None = field(default=None)\n\n\n@dataclass\nclass Test:\n    script: str\n    dependencies: list[DependencyBlock] = field(default_factory=list)\n    env: list[EnvironmentVariable] = field(default_factory=list)\n    fixture: str | None = field(default=None)\n\n\n@dataclass\nclass PkgxPackage:\n    distributable: list[Distributable]\n    versions: Version\n    build: Build | None = field(default=None)\n    test: Test | None = field(default=None)\n    # provides: list[str] = field(default_factory=list)  # all cli commands provided\n    # platforms: list[str] = field(\n    #     default_factory=list\n    # )  # darwin, linux/x64, linux/arm64, etc.\n    # Store a list of dependency blocks, each specifying a platform and its deps\n    dependencies: list[DependencyBlock] = field(default_factory=list)\n\n\n# Pkgx Parser can look at the pantry and yield a dictionary of information in the YAML\nclass PkgxParser:\n    def __init__(self, repo_path: str):\n        self.repo_path = repo_path\n\n    def find_package_yamls(self) -> Iterator[tuple[Path, str]]:\n        \"\"\"Finds all package.yml files within the projects directory.\"\"\"\n        projects_path = Path(self.repo_path) / PROJECTS_DIR\n        if not projects_path.is_dir():\n            logger.error(f\"Projects directory not found at: {projects_path}\")\n            return\n\n        logger.debug(f\"Searching for {PACKAGE_FILE} in {projects_path}...\")\n        count = 0\n        for yaml_path in projects_path.rglob(PACKAGE_FILE):\n            if yaml_path.is_file():\n                # Calculate relative path for project identifier\n                relative_path = yaml_path.parent.relative_to(projects_path)\n                project_identifier = str(relative_path)\n                yield yaml_path, project_identifier\n                count += 1\n        logger.debug(f\"Found {count} {PACKAGE_FILE} files.\")\n\n    def is_vendored(self, data: dict[str, Any]) -> bool:\n        \"\"\"Checks if the package is vendored.\"\"\"\n        if \"warnings\" in data:\n            warnings = data.get(\"warnings\", [])\n            if \"vendored\" in warnings:\n                return True\n        return False\n\n    def parse_package_yaml(self, file_path: Path) -> PkgxPackage | None:\n        \"\"\"Parses a single package.yaml file.\"\"\"\n        try:\n            with open(file_path) as f:\n                data = yaml.safe_load(f)\n                if not isinstance(data, dict):\n                    logger.warn(\n                        f\"Expected dict, got {type(data).__name__} in {file_path}\"\n                    )\n                    return None\n\n                # check if the package is vendored\n                if self.is_vendored(data):\n                    return None\n\n                pkgx_package = self.map_package_yaml_to_pkgx_package(\n                    data, str(file_path)\n                )\n                return pkgx_package\n        except yaml.YAMLError as e:\n            logger.error(f\"Error parsing YAML file {file_path}: {e}\")\n            return None\n        except Exception as e:\n            logger.error(f\"Error reading file {file_path}: {e}\")\n            raise e\n            return None\n\n    def parse_packages(self) -> Iterator[tuple[PkgxPackage, str]]:\n        \"\"\"Parses all package.yml files found in the repository.\"\"\"\n        for yaml_path, project_identifier in self.find_package_yamls():\n            parsed_data = self.parse_package_yaml(yaml_path)\n            if parsed_data:\n                yield parsed_data, project_identifier\n\n    def _parse_dependency_list(\n        self, deps_data: Any, context: str\n    ) -> list[DependencyBlock]:\n        \"\"\"Parses a dependency dictionary into a list of DependencyBlock objects.\"\"\"\n        if not isinstance(deps_data, dict):\n            # For now, assume empty dict means no deps, but non-dict is error.\n            if deps_data is None or deps_data == {}:\n                return []\n            dep_type = type(deps_data).__name__\n            raise TypeError(\n                f\"Expected dependencies to be a dict in {context}, got {dep_type}\"\n            )\n\n        dependency_blocks = []\n        direct_deps = []\n\n        for key, value in deps_data.items():\n            # Platform-specific block\n            if isinstance(value, dict):\n                platform = key\n                platform_deps = []\n                for dep_name, semver in value.items():\n                    if isinstance(semver, str):\n                        platform_deps.append(Dependency(name=dep_name, semver=semver))\n                    elif isinstance(semver, int | float):\n                        platform_deps.append(\n                            Dependency(name=dep_name, semver=str(semver))\n                        )\n                    else:\n                        raise TypeError(\n                            f\"Unexpected semver type for {dep_name} under platform {platform} in {context}: {type(semver).__name__}\"\n                        )\n                if platform_deps:\n                    dependency_blocks.append(\n                        DependencyBlock(platform=platform, dependencies=platform_deps)\n                    )\n                # else: empty platform block is ignored\n\n            # Direct dependency declaration\n            elif isinstance(value, str):\n                dep_name = key\n                semver = value\n                direct_deps.append(Dependency(name=dep_name, semver=semver))\n\n            # Direct declaration, but sometimes the semvers are exact\n            elif isinstance(value, int | float):\n                dep_name = key\n                semver = str(value)\n                direct_deps.append(Dependency(name=dep_name, semver=semver))\n\n            # Invalid structure\n            else:\n                raise TypeError(\n                    f\"Unexpected dependency value type for key '{key}' in {context}: {type(value).__name__}. Expected dict or str or float.\"\n                )\n\n        # Add all direct dependencies under the 'all' platform\n        if direct_deps:\n            dependency_blocks.append(\n                DependencyBlock(platform=\"all\", dependencies=direct_deps)\n            )\n\n        return dependency_blocks\n\n    def _parse_build_section(self, build_data: Any, file_path_str: str) -> Build:\n        \"\"\"Parses the build section if its a dict, list, or str\"\"\"\n        if isinstance(build_data, dict):\n            # Pass original dependencies dict, don't convert keys here\n            build_deps_list = self._parse_dependency_list(\n                build_data.get(\"dependencies\"), f\"build section of {file_path_str}\"\n            )\n            # Convert env var keys just before instantiation\n            build_env = [\n                EnvironmentVariable(**convert_keys_to_snake_case(env))\n                for env in build_data.get(\"env\", [])\n                if isinstance(env, dict)\n            ]\n            # Convert build_data keys just before creating Build object\n            build_kwargs = convert_keys_to_snake_case(build_data)\n            return Build(\n                script=build_kwargs.get(\"script\", \"\"),\n                dependencies=build_deps_list,  # Use the originally parsed list\n                env=build_env,\n                working_directory=build_kwargs.get(\"working_directory\"),\n            )\n        elif isinstance(build_data, list):\n            # Generally, it's a list of build commands, so we only have script info\n            # TODO: Potentially improve handling of list-based build data\n            script = (\n                build_data[0] if build_data and isinstance(build_data[0], str) else \"\"\n            )\n            return Build(\n                script=script,\n                dependencies=[],\n                env=[],\n                working_directory=None,\n            )\n        elif isinstance(build_data, str):\n            return Build(\n                script=build_data,\n                dependencies=[],\n                env=[],\n                working_directory=None,\n            )\n        else:\n            build_type = type(build_data).__name__\n            raise TypeError(f\"Build in {file_path_str} is {build_type}\")\n\n    def _parse_test_section(self, test_data: Any, file_path_str: str) -> Test:\n        \"\"\"Parses the test section if its a dict, list, or str\"\"\"\n        if isinstance(test_data, dict):\n            # Pass original dependencies dict\n            test_deps_list = self._parse_dependency_list(\n                test_data.get(\"dependencies\"), f\"test section of {file_path_str}\"\n            )\n            # Convert env var keys just before instantiation\n            test_env = [\n                EnvironmentVariable(**convert_keys_to_snake_case(env))\n                for env in test_data.get(\"env\", [])\n                if isinstance(env, dict)\n            ]\n            # Convert test_data keys just before creating Test object\n            test_kwargs = convert_keys_to_snake_case(test_data)\n            return Test(\n                script=test_kwargs.get(\"script\", \"\"),\n                dependencies=test_deps_list,  # Use the originally parsed list\n                env=test_env,\n                fixture=test_kwargs.get(\"fixture\"),\n            )\n        elif isinstance(test_data, list):\n            # TODO: Clarify how to handle list-based test data. Assuming empty for now.\n            return Test(script=\"\", dependencies=[], env=[], fixture=None)\n        elif isinstance(test_data, str):\n            # Assuming string directly means the script\n            return Test(script=test_data, dependencies=[], env=[], fixture=None)\n        elif isinstance(test_data, bool):\n            # bad tests are sometimes just true/false\n            return Test(script=str(test_data), dependencies=[], env=[], fixture=None)\n        else:\n            test_type = type(test_data).__name__\n            raise TypeError(f\"Test for {file_path_str} is {test_type}\")\n\n    def _parse_versions_section(\n        self, versions_data: Any, file_path_str: str\n    ) -> Version:\n        \"\"\"Parses the versions section if its a list, dict, or None\"\"\"\n        if isinstance(versions_data, list):\n            # list of version strings (nums)\n            return Version(versions=versions_data)\n        elif isinstance(versions_data, dict):\n            # github or gitlab...something useful\n            # Convert keys just before creating Version object\n            return Version(**convert_keys_to_snake_case(versions_data))\n        elif versions_data is None:\n            # Handle case where versions might be missing, return default empty\n            logger.warn(f\"Missing 'versions' section in {file_path_str} using default.\")\n            return Version()\n        else:\n            version_type = type(versions_data).__name__\n            raise TypeError(f\"Versions in {file_path_str} is {version_type}\")\n\n    def _parse_distributable_section(\n        self, distributable_data: Any, file_path_str: str\n    ) -> Distributable | list[Distributable]:\n        \"\"\"Parses the distributable section from the package data.\"\"\"\n        if isinstance(distributable_data, list):\n            # Convert keys for each dict in the list before creating Distributable\n            return [\n                Distributable(**convert_keys_to_snake_case(d))\n                for d in distributable_data\n                if isinstance(d, dict)\n            ]\n        elif isinstance(distributable_data, dict):\n            # Convert keys just before creating Distributable object\n            return [Distributable(**convert_keys_to_snake_case(distributable_data))]\n        elif distributable_data is None:\n            return [Distributable(url=\"~\")]\n        else:\n            distributable_type = type(distributable_data).__name__\n            raise TypeError(f\"Distributable in {file_path_str} is {distributable_type}\")\n\n    def map_package_yaml_to_pkgx_package(\n        self, data: dict[str, Any], file_path_str: str\n    ) -> PkgxPackage:\n        \"\"\"Maps a package.yml to a PkgxPackage.\"\"\"\n        # Keep the original data, do not normalize globally here\n        # normalized_data = convert_keys_to_snake_case(data)\n\n        # Parse sections using helper functions, passing original data segments\n        build_data = data.get(\"build\")\n        build_obj = self._parse_build_section(build_data, file_path_str)\n\n        test_data = data.get(\"test\")\n        test_obj = self._parse_test_section(test_data, file_path_str)\n\n        versions_data = data.get(\"versions\")\n        versions_obj = self._parse_versions_section(versions_data, file_path_str)\n\n        distributable_data = data.get(\"distributable\")\n        distributable_obj = self._parse_distributable_section(\n            distributable_data, file_path_str\n        )\n\n        # Parse top-level dependencies using original keys\n        dependencies_data = data.get(\"dependencies\")\n        top_level_deps_list = self._parse_dependency_list(\n            dependencies_data, f\"top-level of {file_path_str}\"\n        )\n\n        # TODO: Implement parsing for 'provides' list\n        # would be useful because we have the set of \"names\" / \"commands\" for it!\n        # provides_data = data.get(\"provides\")\n        # provides_obj = self._parse_provides_section(provides_data, file_path_str)\n\n        # TODO: Implement parsing for 'platforms' list\n        # platforms_data = data.get(\"platforms\")\n        # platforms_obj = self._parse_platforms_section(platforms_data, file_path_str)\n\n        # Note: PkgxPackage itself doesn't directly take snake_case kwargs from top level\n        # Its arguments are constructed from the parsed objects.\n        return PkgxPackage(\n            distributable=distributable_obj,\n            versions=versions_obj,\n            dependencies=top_level_deps_list,\n            build=build_obj,\n            test=test_obj,\n            # provides=provides,\n            # platforms=platforms,\n        )\n"
  },
  {
    "path": "package_managers/pkgx/url.py",
    "content": "import re\nfrom uuid import UUID\n\nfrom permalint import normalize_url, possible_names\nfrom requests import Response, get\n\nfrom core.config import Config\nfrom core.logger import Logger\nfrom core.structs import URLKey\nfrom core.utils import is_github_url\nfrom package_managers.pkgx.db import DB\n\nHOMEPAGE_URL = \"https://pkgx.dev/pkgs/{name}.json\"\n\n\ndef canonicalize(url: str) -> str:\n    return normalize_url(url)\n\n\ndef guess(db_client: DB, package_managers: list[UUID], url: str) -> list[str]:\n    names = possible_names(url)\n    urls = db_client.search_names(names, package_managers)\n    return urls\n\n\ndef ask_pkgx(import_id: str) -> str | None:\n    \"\"\"\n    ask max's scraping work for the homepage of a package\n    Homepage comes from the pkgxdev/www repo\n    The API https://pkgx.dev/pkgs/{name}.json returns a blob which may contain\n    the homepage field\n    \"\"\"\n    response: Response = get(HOMEPAGE_URL.format(name=import_id))\n    if response.status_code == 200:\n        data: dict[str, str] = response.json()\n        if \"homepage\" in data:\n            return data[\"homepage\"]\n\n\ndef special_case(import_id: str, logger: Logger) -> str | None:\n    homepage: str | None = None\n\n    # if no slashes, then pkgx used the homepage as the name\n    # if two slashes, then probably github / gitlab\n    if not re.search(r\"/\", import_id) or re.search(r\"/.+/\", import_id):\n        homepage = import_id\n\n    # if it's a crates.io package, then we can use the crates URL\n    elif re.search(r\"^crates.io\", import_id):\n        if \"/\" in import_id:\n            name = import_id.split(\"/\")[1]\n            homepage = f\"https://crates.io/crates/{name}\"\n        else:\n            logger.warn(f\"Invalid format for crates.io import_id: {import_id}\")\n\n    # if it's part of the x.org family\n    elif re.search(r\"^x.org\", import_id):\n        homepage = \"https://x.org\"\n\n    # if it's part of the pkgx family\n    elif re.search(\"^pkgx.sh\", import_id):\n        tool = import_id.split(\"/\")[1]\n        homepage = f\"https://github.com/pkgxdev/{tool}\"\n\n    # python.org/typing_extensions\n    elif import_id == \"python.org/typing_extensions\":\n        homepage = \"https://github.com/python/typing_extensions\"\n\n    # thrysoee.dk/editline\n    elif import_id == \"thrysoee.dk/editline\":\n        homepage = \"https://thrysoee.dk/editline\"\n\n    # gen-ir is a Homebrew Tap, which lists this as its homepage\n    elif import_id == \"veracode.com/gen-ir\":\n        homepage = \"https://github.com/veracode/gen-ir\"\n\n    else:\n        logger.warn(f\"no homepage in pkgx for {import_id}\")\n\n    return homepage\n\n\ndef generate_chai_urls(\n    config: Config, db: DB, import_id: str, distributable_url: str, logger: Logger\n) -> list[URLKey]:\n    \"\"\"For a pkgx import_id, generate a list of URLs it could have\"\"\"\n    urls: list[URLKey] = []\n\n    # homepage\n    similar = [config.package_managers.debian, config.package_managers.homebrew]\n    maybe: list[str] = guess(db, similar, import_id)\n\n    if maybe:\n        homepage = maybe[0]\n    else:\n        homepage = ask_pkgx(import_id)\n\n        if not homepage:\n            homepage = special_case(import_id, logger)\n\n    if homepage:\n        canonical_homepage = canonicalize(homepage)\n        urls.append(URLKey(canonical_homepage, config.url_types.homepage))\n\n    # source\n    # NOTE: for non-GitHub source URLs, pkgx tells you where the version string for the\n    # downloadable tarball is...right now, we don't do anything about that\n    canonical_distributable = canonicalize(distributable_url)\n    urls.append(URLKey(canonical_distributable, config.url_types.source))\n\n    if is_github_url(canonical_distributable):\n        urls.append(URLKey(canonical_distributable, config.url_types.repository))\n\n    return urls\n"
  },
  {
    "path": "pkgx.yaml",
    "content": "# this is the pkgx config across all the services covered by docker-compose\ndependencies:\n  python.org: ~3.11\n  xcfile.dev: 0\n  cli.github.com: 2\n  astral.sh/uv: 0\n  postgresql.org: 16\n  docker.com/compose: 2\n  alembic.sqlalchemy.org: 1\n  psycopg.org/psycopg2: 2\n"
  },
  {
    "path": "pyproject.toml",
    "content": "[project]\nname = \"chai\"\nversion = \"1.0.0\"\ndescription = \"An open-source data pipeline for all package managers\"\nauthors = [\n    { name = \"Sanchit Ram Arvind\", email = \"sanchitram@gmail.com\" },\n    { name = \"Jacob Heider\", email = \"jhheider@pkgx.dev\" },\n]\nkeywords = [\"data\", \"pipeline\"]\nreadme = \"README.md\"\nrequires-python = \">= 3.11\"\ndependencies = []\n\n[project.urls]\nHomepage = \"https://github.com/teaxyz/chai\"\nRepository = \"https://github.com/teaxyz/chai\"\nSource = \"https://github.com/teaxyz/chai\"\n\n[tool.uv]\nmanaged = true\n\n[tool.pytest.ini_options]\npythonpath = [\".\"]\nminversion = \"8.0\"\npython_files = [\"test_*.py\"]\npython_classes = [\"Test*\"]\npython_functions = [\"test_\"]\naddopts = [\"-ra\", \"--strict-markers\", \"--disable-warnings\", \"--tb=short\"]\nmarkers = [\"unit\"]\n\n[tool.ruff]\nline-length = 88\nexclude = [\"__pycache__\", \".venv\", \".git\", \".pytest_cache\"]\n\n[tool.ruff.lint]\nselect = [\n    \"E\",   # pycodestyle (error)\n    \"F\",   # pyflakes\n    \"B\",   # bugbear\n    \"B9\",\n    \"C4\",  # flake8-comprehensions\n    \"SIM\", # flake8-simplify\n    \"I\",   # isort\n    \"UP\",  # pyupgrade\n    \"PIE\", # flake8-pie\n    \"PGH\", # pygrep-hooks\n    \"PYI\", # flake8-pyi\n    \"RUF\",\n]\nignore = [\n    # leave it to the formatter to split long lines and\n    # the judgement of all of us.\n    \"E501\",\n]\nfixable = [\"ALL\"]\n\n[tool.ruff.format]\nquote-style = \"double\"\nindent-style = \"space\"\n\n[dependency-groups]\ndev = [\n    \"pytest>=8.4.0\",\n    \"pytest-cov>=6.2.1\",\n    \"ruff>=0.11.13\",\n    \"testing-postgresql>=1.3.0\",\n]\nindexers = [\n    \"alembic==1.13.2\",\n    \"certifi>=2025.4.26\",\n    \"charset-normalizer>=3.4.2\",\n    \"gitpython>=3.1.44\",\n    \"idna>=3.10\",\n    \"permalint>=0.1.15\",\n    \"psycopg2-binary==2.9.10\",\n    \"pyyaml>=6.0.2\",\n    \"requests>=2.32.4\",\n    \"schedule>=1.2.2\",\n    \"sqlalchemy>=2.0.41\",\n    \"urllib3>=2.4.0\",\n]\nranker = [\"numpy>=2.3.0\", \"rustworkx>=0.16.0\"]\n"
  },
  {
    "path": "ranker/.dockerignore",
    "content": "prompts/"
  },
  {
    "path": "ranker/.gitignore",
    "content": "prompts/"
  },
  {
    "path": "ranker/Dockerfile",
    "content": "FROM python:3.11\n\n# Copy everything\nCOPY . . \n\n# Install core requirements \nWORKDIR /core \nRUN pip install --no-cache-dir -r requirements.txt \n\n# Install ranker requirements \nWORKDIR /ranker \nRUN pip install --no-cache-dir -r requirements.txt \n\n# Command to run the application\nCMD [\"python\", \"/ranker/main.py\"] "
  },
  {
    "path": "ranker/README.md",
    "content": "# ranker\n\ngenerates a deduplicated graph across all CHAI package managers by URL, and publishes a\ntea_rank\n\n## Requirements\n\n1. [pkgx](pkgx.sh)\n2. [uv](astral.sh/uv)\n\n## Deduplication (`dedupe.py`)\n\n`dedupe.py` handles the deduplication of packages based on their homepage URLs. It\nensures that packages sharing the same canonical homepage URL are grouped together.\n\n**Process:**\n\n1.  **Fetch Existing State:** Retrieves all current canonical homepage URLs, their\n    associated packages from the `canons`, `canon_packages`, and `package_urls` tables\n2.  **Determine Latest URLs:** Identifies the most recent URL\n3.  **Diff:** Identify new canons, new canon_packages, canon_packages to update\n4.  **Ingest:** Create new canons and new links if necessary, update existing ones\n\nThis process is idempotent, meaning running it multiple times converges to the same\ncorrect state based on the latest available package URL data.\n\n### Getting started\n\n1. You need `CHAI_DATABASE_URL` setup, and the CHAI db running\n2. With pkgx:\n\n   ```bash\n   chmod +x ranker/dedupe.py\n   PYTHONPATH=. LOAD=0 ranker/dedupe.py\n   ```\n\n   You can toggle LOAD to do a dry-run, where it will tell you what it's about to do\n   without loading any information\n\n## Ranking\n\n- [ ] Add a description here\n\n## Usage\n\n### With pkgx\n\n```bash\nchmod +x dedupe.py\n./main.py\n```\n\n### Without pkgx\n\n```bash\nuv run main.py\n```\n\n## Docker\n\nThis service can be run inside a Docker container. The container assumes that the `core`\nlibrary is available and that the `CHAI_DATABASE_URL` environment variable is set to\npoint to the database.\n\n**Building the Image:**\n\nFrom the root of the `chai-oss` repository:\n\n```bash\ndocker build -t chai-ranker -f ranker/Dockerfile .\n```\n\n**Running the Container:**\n\nMake sure to provide the database connection string via the `CHAI_DATABASE_URL`\nenvironment variable:\n\n```bash\ndocker run --rm -e CHAI_DATABASE_URL=postgresql://postgres:s3cr3t@localhost:5435/chai chai-ranker\n```\n\nThe container will execute `dedupe.py` followed by `main.py` and exit with code 0 on\nsuccess or a non-zero code on failure.\n"
  },
  {
    "path": "ranker/config.py",
    "content": "from dataclasses import dataclass\nfrom decimal import Decimal, getcontext\nfrom uuid import UUID\n\nfrom sqlalchemy import func\n\nfrom core.db import DB\nfrom core.logger import Logger\nfrom core.models import Canon, CanonPackage, Package, PackageManager, Source, URLType\nfrom core.utils import env_vars\n\nlogger = Logger(\"graph.config\")\nSYSTEM_PACKAGE_MANAGERS = [\"homebrew\", \"debian\", \"pkgx\"]\n\n# setup decimal\ngetcontext().prec = 9\ngetcontext().rounding = \"ROUND_HALF_UP\"\n\n\nclass ConfigDB(DB):\n    def __init__(self):\n        super().__init__(\"graph.config::db\")\n\n    def get_homepage_url_type_id(self) -> UUID:\n        with self.session() as session:\n            result = (\n                session.query(URLType.id).filter(URLType.name == \"homepage\").scalar()\n            )\n            if result is None:\n                raise ValueError(\"homepage url type not found\")\n            return result\n\n    def get_npm_pm_id(self) -> UUID:\n        return self.get_pm_id_by_name(\"npm\")[0][0]\n\n    def get_canons_with_source_types(\n        self, source_types: list[str]\n    ) -> list[tuple[UUID, list[str]]]:\n        with self.session() as session:\n            return (\n                session.query(\n                    Canon.id, func.array_agg(Source.type).label(\"source_types\")\n                )\n                .join(CanonPackage, Canon.id == CanonPackage.canon_id)\n                .join(Package, CanonPackage.package_id == Package.id)\n                .join(PackageManager, Package.package_manager_id == PackageManager.id)\n                .join(Source, PackageManager.source_id == Source.id)\n                .filter(Source.type.in_(source_types))\n                .group_by(Canon.id)\n                .all()\n            )\n\n    def get_pm_id_by_name(self, name: str | list[str]) -> UUID:\n        if isinstance(name, str):\n            name = [name]\n\n        with self.session() as session:\n            result = (\n                session.query(PackageManager.id)\n                .join(Source, PackageManager.source_id == Source.id)\n                .filter(Source.type.in_(name))\n                .all()\n            )\n            if result is None:\n                raise ValueError(f\"package manager {name} not found\")\n            return result\n\n\nclass TeaRankConfig:\n    def __init__(self, db: ConfigDB) -> None:\n        self.db = db\n        self.favorites: dict[str, Decimal] = {}\n        self.weights: dict[UUID, Decimal] = {}\n        self.personalization: dict[UUID, Decimal] = {}\n        self.map_favorites(SYSTEM_PACKAGE_MANAGERS)\n\n    alpha: Decimal = Decimal(\"0.85\")\n    split_ratio: Decimal = Decimal(\"0.5\")\n    tol: Decimal = Decimal(\"1e-6\")\n    max_iter: int = 1000000\n\n    def map_favorites(self, package_managers: list[str]) -> None:\n        for pm in package_managers:\n            match pm:\n                case \"homebrew\":\n                    pm_id = self.db.get_pm_id_by_name(\"homebrew\")[0][0]\n                    self.favorites[pm_id] = Decimal(\"0.3\")\n                case \"debian\":\n                    pm_id = self.db.get_pm_id_by_name(\"debian\")[0][0]\n                    self.favorites[pm_id] = Decimal(\"0.6\")\n                case \"pkgx\":\n                    pm_id = self.db.get_pm_id_by_name(\"pkgx\")[0][0]\n                    self.favorites[pm_id] = Decimal(\"0.1\")\n                case _:\n                    raise ValueError(f\"Unknown system package manager: {pm}\")\n\n    def personalize(\n        self, canons_with_source_types: list[tuple[UUID, list[str]]]\n    ) -> None:\n        \"\"\"Adjust canon weights proportionally to the sum of `favorites` in their\n        associated package managers, normalized to total 1.\"\"\"\n\n        def coefficient(source_types: list[str]) -> Decimal:\n            return sum(self.favorites[source_type] for source_type in source_types)\n\n        # calculate raw weights for each canon based on favorites\n        raw_weights = {}\n        total = Decimal(0)\n        for canon_id, package_manager_ids in canons_with_source_types:\n            # make source_types a set to deduplicate\n            source_types = set(package_manager_ids)\n\n            # sum the weights for all package managers this canon appears in\n            weight = coefficient(source_types)\n            raw_weights[canon_id] = weight\n            total += weight\n\n        constant = Decimal(1) / total\n\n        for canon_id, weight in raw_weights.items():\n            self.personalization[canon_id] = weight * constant\n\n        logger.debug(f\"Personalization sum: {sum(self.personalization.values())}\")\n\n    def __str__(self) -> str:\n        return f\"TeaRankConfig(alpha={self.alpha}, favorites={self.favorites}, weights={len(self.weights)}, personalization={len(self.personalization)})\"  # E501\n\n\nclass PMConfig:\n    def __init__(self, db: ConfigDB) -> None:\n        self.db = db\n        self.npm_pm_id = self.db.get_npm_pm_id()\n        self.system_pm_ids = [\n            id[0] for id in self.db.get_pm_id_by_name(SYSTEM_PACKAGE_MANAGERS)\n        ]\n\n    # TODO: we'll add PyPI, rubygems from when we load with legacy data\n\n    def __str__(self) -> str:\n        return (\n            f\"PMConfig(npm_pm_id={self.npm_pm_id}, system_pm_ids={self.system_pm_ids})\"\n        )\n\n\nclass URLTypes:\n    def __init__(self, db: ConfigDB) -> None:\n        self.db = db\n        self.homepage_url_type_id = self.db.get_homepage_url_type_id()\n\n    def __str__(self) -> str:\n        return f\"URLTypes(homepage_url_type_id={self.homepage_url_type_id})\"\n\n\nclass DedupeConfig:\n    def __init__(self, db: ConfigDB) -> None:\n        self.homepage_url_type_id = db.get_homepage_url_type_id()\n        self.load = env_vars(\"LOAD\", \"true\")\n\n    def __str__(self) -> str:\n        return f\"DedupeConfig(homepage_url_type_id={self.homepage_url_type_id}, load={self.load})\"  # E501\n\n\n@dataclass\nclass Config:\n    def __init__(self, db: ConfigDB) -> None:\n        self.db = db\n        self.tearank_config = TeaRankConfig(db=db)\n        self.pm_config = PMConfig(db=db)\n        self.url_types = URLTypes(db=db)\n\n    def __str__(self) -> str:\n        return f\"Config(tearank_config={self.tearank_config}, pm_config={self.pm_config}, url_types={self.url_types})\"  # E501\n\n\ndef load_config() -> Config:\n    logger.debug(\"Loading config\")\n    return Config(db=ConfigDB())\n\n\ndef load_dedupe_config() -> DedupeConfig:\n    return DedupeConfig(db=ConfigDB())\n"
  },
  {
    "path": "ranker/db.py",
    "content": "from uuid import UUID\n\nfrom sqlalchemy.dialects.postgresql import insert as pg_insert\n\nfrom core.db import DB\nfrom core.models import (\n    URL,\n    Canon,\n    CanonPackage,\n    DependsOn,\n    LegacyDependency,\n    Package,\n    PackageURL,\n    TeaRank,\n    TeaRankRun,\n    URLType,\n    Version,\n)\n\nBATCH_SIZE = 20000\n\n\nclass GraphDB(DB):\n    def __init__(self, legacy_pm_id: UUID, system_pm_ids: list[UUID]):\n        super().__init__(\"graph.db\")\n        self.legacy_pm_id = legacy_pm_id\n        self.system_pm_ids = system_pm_ids\n\n    def is_canon_populated(self) -> bool:\n        with self.session() as session:\n            return session.query(Canon).count() > 0\n\n    def is_canon_package_populated(self) -> bool:\n        with self.session() as session:\n            return session.query(CanonPackage).count() > 0\n\n    def get_all_canons(self) -> dict[str, UUID]:\n        \"\"\"Fetch all existing canons as a map from URL to Canon ID.\"\"\"\n        with self.session() as session:\n            results = session.query(Canon.url, Canon.id).all()\n            return dict(results)\n\n    def get_packages_with_urls(self) -> list[tuple[UUID, str, str, str]]:\n        \"\"\"\n        Retrieve packages with their associated URLs and URL types.\n\n        Returns:\n            List of tuples containing id, name, and url\n        \"\"\"\n        with self.session() as session:\n            return (\n                session.query(Package.id, Package.name, URL.url, URL.created_at)\n                .join(PackageURL, Package.id == PackageURL.package_id)\n                .join(URL, PackageURL.url_id == URL.id)\n                .join(URLType, URL.url_type_id == URLType.id)\n                .where(URLType.name == \"homepage\")  # we're deduplicating on homepage\n                .order_by(URL.created_at.desc())\n                .all()\n            )\n\n    def load_canonical_packages(self, data: list[Canon]) -> None:\n        \"\"\"\n        Load canonical packages into the database in batches, handling conflicts.\n\n        Args:\n            data: List of Canon objects.\n        \"\"\"\n        with self.session() as session:\n            for i in range(0, len(data), BATCH_SIZE):\n                batch = data[i : i + BATCH_SIZE]\n                if not batch:\n                    continue\n\n                # Convert batch objects to dictionaries for insert statement\n                insert_data = [\n                    {\"id\": item.id, \"url\": item.url, \"name\": item.name}\n                    for item in batch\n                ]\n\n                stmt = pg_insert(Canon).values(insert_data)\n                stmt = stmt.on_conflict_do_nothing(index_elements=[\"url\"])\n\n                if stmt is not None:\n                    session.execute(stmt)\n\n                # log\n                batch_number = (i // BATCH_SIZE) + 1\n                total_batches = (len(data) + BATCH_SIZE - 1) // BATCH_SIZE\n                self.logger.log(\n                    f\"Processed Canon batch {batch_number} of {total_batches}\"\n                )\n\n            session.commit()\n\n    def load_canonical_package_mappings(self, data: list[CanonPackage]) -> None:\n        \"\"\"\n        Load canonical package mappings into the database in batches, updating on\n        conflict.\n\n        Args:\n            data: List of CanonPackage objects.\n        \"\"\"\n        with self.session() as session:\n            for i in range(0, len(data), BATCH_SIZE):\n                batch = data[i : i + BATCH_SIZE]\n                if not batch:\n                    continue\n\n                # Convert batch objects to dictionaries\n                insert_data = [\n                    {\n                        \"id\": item.id,\n                        \"canon_id\": item.canon_id,\n                        \"package_id\": item.package_id,\n                    }\n                    for item in batch\n                ]\n\n                stmt = pg_insert(CanonPackage).values(insert_data)\n                update_dict = {\"canon_id\": stmt.excluded.canon_id}\n\n                # this is the unique constraint on canon_packages -> if its violated,\n                # that means that the package has changed its URL, and the dedupe\n                # logic has corrected the correct canon for this package\n                stmt = stmt.on_conflict_do_update(\n                    index_elements=[\"package_id\"], set_=update_dict\n                )\n\n                if stmt is not None:\n                    session.execute(stmt)\n\n                # log\n                batch_number = (i // BATCH_SIZE) + 1\n                total_batches = (len(data) + BATCH_SIZE - 1) // BATCH_SIZE\n                self.logger.log(\n                    f\"Processed CanonPackage batch {batch_number} of {total_batches}\"\n                )\n\n            session.commit()\n\n    def get_packages(self) -> list[tuple[UUID, UUID]]:\n        \"\"\"Gets all packages for the run\"\"\"\n        self.logger.debug(f\"Getting packages for {self.system_pm_ids} package managers\")\n        with self.session() as session:\n            return (\n                session.query(Package.id, Package.package_manager_id)\n                .where(Package.package_manager_id.in_(self.system_pm_ids))\n                .all()\n            )\n\n    def get_dependencies(self, package_id: UUID) -> list[tuple[UUID]]:\n        \"\"\"Gets all the dependencies based on the CHAI data model\"\"\"\n        with self.session() as session:\n            return (\n                session.query(DependsOn.dependency_id)\n                .join(Version, DependsOn.version_id == Version.id)\n                .join(Package, Version.package_id == Package.id)\n                .filter(Package.id == package_id)\n                .all()\n            )\n\n    def get_package_to_canon_mapping(self) -> dict[UUID, UUID]:\n        with self.session() as session:\n            return {\n                canon_package.package_id: canon.id\n                for canon, canon_package in session.query(Canon, CanonPackage)\n                .join(CanonPackage, Canon.id == CanonPackage.canon_id)\n                .join(Package, CanonPackage.package_id == Package.id)\n                .where(Package.package_manager_id != self.legacy_pm_id)\n            }\n\n    def get_legacy_dependencies(self, package_id: UUID) -> list[tuple[UUID]]:\n        \"\"\"Gets all the legacy dependencies based on the legacy CHAI data model\"\"\"\n        with self.session() as session:\n            return (\n                session.query(LegacyDependency.dependency_id)\n                .filter(LegacyDependency.package_id == package_id)\n                .filter(LegacyDependency.dependency_id != package_id)\n                .all()\n            )\n\n    def load_tea_ranks(self, data: list[TeaRank]) -> None:\n        \"\"\"Loads tea ranks into the database\"\"\"\n        with self.session() as session:\n            session.add_all(data)\n            session.commit()\n\n    def load_tea_rank_runs(self, data: list[TeaRankRun]) -> None:\n        \"\"\"Loads tea rank runs into the database\"\"\"\n        with self.session() as session:\n            session.add_all(data)\n            session.commit()\n\n    def get_current_tea_rank_run(self) -> TeaRankRun | None:\n        \"\"\"Gets the current tea rank run\"\"\"\n        with self.session() as session:\n            return (\n                session.query(TeaRankRun).order_by(TeaRankRun.created_at.desc()).first()\n            )\n"
  },
  {
    "path": "ranker/dedupe.py",
    "content": "#!/usr/bin/env uv run --with sqlalchemy==2.0.34 --with permalint==0.1.12\nfrom datetime import datetime\nfrom uuid import UUID, uuid4\n\nfrom permalint import is_canonical_url\nfrom sqlalchemy import update\nfrom sqlalchemy.orm import Session\n\nfrom core.db import DB\nfrom core.logger import Logger\nfrom core.models import URL, BaseModel, Canon, CanonPackage, Package, PackageURL\nfrom package_managers.crates.structs import (\n    CanonPackageUpdatePayload,\n    CanonUpdatePayload,\n)\nfrom ranker.config import DedupeConfig, load_dedupe_config\nfrom ranker.naming import compute_canon_name, get_effective_canon_name\n\n\nclass DedupeDB(DB):\n    def __init__(self, config: DedupeConfig):\n        super().__init__(\"ranker.db\")\n        self.config: DedupeConfig = config\n\n    def get_current_canons(self) -> dict[UUID, Canon]:\n        \"\"\"Get current canons as a mapping from URL ID to Canon object.\"\"\"\n        with self.session() as session:\n            canons = session.query(Canon).all()\n            return {canon.url_id: canon for canon in canons}\n\n    def get_current_canon_packages(self) -> dict[UUID, dict[str, UUID]]:\n        \"\"\"Get current canon-package mappings as dict[package_id -> canon_id].\"\"\"\n        with self.session() as session:\n            canon_packages = session.query(CanonPackage).all()\n            return {\n                cp.package_id: {\"id\": cp.id, \"canon_id\": cp.canon_id}\n                for cp in canon_packages\n            }\n\n    def get_packages_with_homepages(self) -> list[tuple[Package, URL]]:\n        with self.session() as session:\n            return (\n                session.query(Package, URL)\n                .join(PackageURL, Package.id == PackageURL.package_id)\n                .join(URL, PackageURL.url_id == URL.id)\n                .where(URL.url_type_id == self.config.homepage_url_type_id)\n                .order_by(Package.id, URL.created_at.desc())  # Latest URL / package\n                .all()\n            )\n\n    def get_all_package_names(self) -> dict[UUID, str]:\n        with self.session() as session:\n            return {pkg.id: pkg.name for pkg in session.query(Package).all()}\n\n    # TODO: first to be optimized\n    def ingest(\n        self,\n        new_canons: list[Canon],\n        canon_updates: list[CanonUpdatePayload],\n        new_canon_packages: list[CanonPackage],\n        updated_canon_packages: list[CanonPackageUpdatePayload],\n    ) -> None:\n        with self.session() as session:\n            if new_canons:\n                self.add_with_flush(session, new_canons)\n\n            if canon_updates:\n                session.execute(update(Canon), canon_updates)\n\n            if new_canon_packages:\n                self.add_with_flush(session, new_canon_packages)\n\n            if updated_canon_packages:\n                session.execute(update(CanonPackage), updated_canon_packages)\n\n            session.commit()\n\n    def add_with_flush(self, session: Session, rows: list[BaseModel]) -> None:\n        session.add_all(rows)\n        session.flush()\n\n\ndef get_latest_homepage_per_package(\n    packages_with_homepages: list[tuple[Package, URL]], logger: Logger\n) -> tuple[dict[UUID, URL], list[URL]]:\n    \"\"\"Get the latest homepage URL for each package.\"\"\"\n    latest_homepages: dict[UUID, URL] = {}\n    non_canonical_urls: list[URL] = []\n\n    for pkg, url in packages_with_homepages:\n        # Since we ordered by Package.id, URL.created_at.desc(),\n        # the first URL we see for each package is the latest\n        if pkg.id not in latest_homepages:\n            # skip empty or whitespace-only urls\n            if not url.url or url.url.strip() == \"\":\n                continue\n\n            # guard against non-canonicalized URLs\n            try:\n                if not is_canonical_url(url.url):\n                    non_canonical_urls.append(url)\n                else:\n                    latest_homepages[pkg.id] = url\n            except Exception as e:\n                logger.warn(f\"Error checking if {url.url} is canonical: {e}\")\n                non_canonical_urls.append(url)\n\n    if non_canonical_urls:\n        logger.warn(f\"Found {len(non_canonical_urls)} non-canonicalized URLs in URLs\")\n\n    return latest_homepages, non_canonical_urls\n\n\ndef build_canon_update_payload(\n    canon: Canon, new_name: str, now: datetime\n) -> CanonUpdatePayload:\n    \"\"\"Build update payload for a canon with a new name.\"\"\"\n    return CanonUpdatePayload(id=canon.id, name=new_name, updated_at=now)\n\n\ndef build_canon_package_update_payload(\n    current_canon_packages: dict[UUID, dict[str, UUID]],\n    pkg_id: UUID,\n    new_canon_id: UUID,\n    now: datetime,\n) -> CanonPackageUpdatePayload:\n    \"\"\"Build an update payload for a canon package.\"\"\"\n    canon_package_data = current_canon_packages.get(pkg_id)\n\n    if canon_package_data is None:\n        raise ValueError(f\"No canon package mappings for {pkg_id}\")\n\n    current_canon_package_id = canon_package_data.get(\"id\")\n    if current_canon_package_id is None:\n        raise ValueError(f\"{pkg_id} has no canon package ID but canon: {new_canon_id}\")\n\n    return CanonPackageUpdatePayload(\n        id=current_canon_package_id, canon_id=new_canon_id, updated_at=now\n    )\n\n\ndef process_deduplication_changes(\n    latest_homepages: dict[UUID, URL],\n    current_canons: dict[UUID, Canon],\n    current_canon_packages: dict[UUID, dict[str, UUID]],\n    name_map: dict[UUID, str],\n    logger: Logger,\n) -> tuple[\n    list[Canon],\n    list[CanonUpdatePayload],\n    list[CanonPackage],\n    list[CanonPackageUpdatePayload],\n]:\n    \"\"\"\n    Process deduplication changes based on current state.\n\n    Returns:\n        tuple of (canons_to_create, canons_to_update, mappings_to_create,\n        mappings_to_update)\n    \"\"\"\n    now = datetime.now()\n    canons_to_create: dict[UUID, Canon] = {}  # indexed by url_id for deduplication\n    canons_to_update: dict[UUID, CanonUpdatePayload] = {}  # indexed by canon_id\n    mappings_to_create: list[CanonPackage] = []\n    mappings_to_update: list[CanonPackageUpdatePayload] = []\n\n    for pkg_id, url in latest_homepages.items():\n        # Check if the URL has an existing canon\n        existing_canon: Canon | None = current_canons.get(url.id)\n\n        # If no existing canon, check if we're creating one for this URL\n        if existing_canon is None:\n            existing_canon = canons_to_create.get(url.id)\n\n        existing_canon_id: UUID | None = existing_canon.id if existing_canon else None\n\n        # Check if the package is already linked to a canon\n        linked_canon_id: UUID | None = current_canon_packages.get(pkg_id, {}).get(\n            \"canon_id\"\n        )\n\n        if existing_canon_id is None:\n            # No canon exists for this URL - create a new one\n            pkg_name = name_map.get(pkg_id)\n            # Compute the name for the new canon\n            name = compute_canon_name(url.url, pkg_name)\n            new_canon = Canon(\n                id=uuid4(),\n                url_id=url.id,\n                name=name,\n                created_at=now,\n                updated_at=now,\n            )\n            canons_to_create[url.id] = new_canon\n\n            # Handle package-to-canon mapping\n            if linked_canon_id is None:\n                # Create new canon package mapping\n                new_canon_package = CanonPackage(\n                    id=uuid4(),\n                    canon_id=new_canon.id,\n                    package_id=pkg_id,\n                    created_at=now,\n                    updated_at=now,\n                )\n                mappings_to_create.append(new_canon_package)\n            else:\n                # Update existing mapping to point to new canon\n                update_payload = build_canon_package_update_payload(\n                    current_canon_packages, pkg_id, new_canon.id, now\n                )\n                mappings_to_update.append(update_payload)\n        else:\n            # Canon exists - check if name needs updating\n\n            # Get the most current name (considering pending updates)\n            current_name = get_effective_canon_name(existing_canon, canons_to_update)\n            pkg_name = name_map.get(pkg_id)\n            desired_name = compute_canon_name(url.url, pkg_name, current_name)\n\n            # Update canon name if it's different\n            if desired_name != current_name:\n                update_payload = build_canon_update_payload(\n                    existing_canon, desired_name, now\n                )\n                canons_to_update[existing_canon.id] = update_payload\n\n            # Handle package-to-canon mapping\n            if linked_canon_id is None:\n                # Create new canon package mapping\n                new_canon_package = CanonPackage(\n                    id=uuid4(),\n                    canon_id=existing_canon_id,\n                    package_id=pkg_id,\n                    created_at=now,\n                    updated_at=now,\n                )\n                mappings_to_create.append(new_canon_package)\n            elif linked_canon_id != existing_canon_id:\n                # Update existing mapping to correct canon\n                update_payload = build_canon_package_update_payload(\n                    current_canon_packages, pkg_id, existing_canon_id, now\n                )\n                mappings_to_update.append(update_payload)\n            # else: mapping is already correct, no action needed\n\n    return (\n        list(canons_to_create.values()),\n        list(canons_to_update.values()),\n        mappings_to_create,\n        mappings_to_update,\n    )\n\n\ndef main(config: DedupeConfig, db: DedupeDB):\n    logger = Logger(\"ranker.dedupe\")\n    now = datetime.now()\n    logger.log(f\"Starting deduplication process at {now}\")\n\n    # 1. Get current state\n    current_canons: dict[UUID, Canon] = db.get_current_canons()\n    logger.debug(f\"Found {len(current_canons)} current canons\")\n\n    current_canon_packages: dict[UUID, dict[str, UUID]] = (\n        db.get_current_canon_packages()\n    )\n    logger.debug(f\"Found {len(current_canon_packages)} current canon packages\")\n\n    packages_with_homepages: list[tuple[Package, URL]] = (\n        db.get_packages_with_homepages()\n    )\n    logger.debug(f\"Found {len(packages_with_homepages)} packages with homepages\")\n\n    name_map: dict[UUID, str] = db.get_all_package_names()\n\n    # 2. Get latest homepage per package\n    latest_homepages, non_canonical_urls = get_latest_homepage_per_package(\n        packages_with_homepages, logger\n    )\n    logger.debug(f\"Found {len(latest_homepages)} packages with latest homepages\")\n\n    # 3. Process changes differentially\n    (canons_to_create, canons_to_update, mappings_to_create, mappings_to_update) = (\n        process_deduplication_changes(\n            latest_homepages, current_canons, current_canon_packages, name_map, logger\n        )\n    )\n\n    # 4. Apply changes\n    logger.log(\"-\" * 100)\n    logger.log(\"Changes to apply:\")\n    logger.log(f\"  Canons to create: {len(canons_to_create)}\")\n    logger.log(f\"  Canons to update: {len(canons_to_update)}\")\n    logger.log(f\"  Mappings to create: {len(mappings_to_create)}\")\n    logger.log(f\"  Mappings to update: {len(mappings_to_update)}\")\n    logger.log(\"-\" * 100)\n\n    if not config.load:\n        logger.log(\"Skipping changes because LOAD is not set\")\n        return\n\n    db.ingest(\n        canons_to_create, canons_to_update, mappings_to_create, mappings_to_update\n    )\n\n    logger.log(\"✅ Deduplication process completed\")\n\n    if non_canonical_urls:\n        logger.warn(f\"Found {len(non_canonical_urls)} non-canonical URLs\")\n\n\nif __name__ == \"__main__\":\n    config: DedupeConfig = load_dedupe_config()\n    db: DedupeDB = DedupeDB(config)\n\n    try:\n        main(config, db)\n    finally:\n        db.close()\n"
  },
  {
    "path": "ranker/main.py",
    "content": "#! /usr/bin/env pkgx +python@3.11 uv run\n\n# /// script\n# dependencies = [\n#   \"permalint==0.1.12\",\n#   \"sqlalchemy==2.0.34\",\n#   \"numpy==2.2.3\",\n#   \"rustworkx==0.16.0\",\n#   \"psycopg2-binary==2.9.10\",\n# ]\n# ///\n\nfrom dataclasses import dataclass\nfrom uuid import UUID\n\nfrom core.logger import Logger\nfrom core.models import TeaRank, TeaRankRun\nfrom ranker.config import Config, DedupeConfig, load_config, load_dedupe_config\nfrom ranker.db import GraphDB\nfrom ranker.dedupe import DedupeDB\nfrom ranker.dedupe import main as dedupe\nfrom ranker.rx_graph import CHAI, PackageNode\n\nlogger = Logger(\"ranker.main\")\n\n\n@dataclass\nclass PackageInfo:\n    id: UUID\n    package_manager_id: UUID\n\n\ndef load_graph(\n    config: Config,\n    db: GraphDB,\n    package_to_canon_mapping: dict[UUID, UUID],\n    packages: list[PackageInfo],\n    stop: int | None = None,\n) -> CHAI:\n    chai = CHAI()\n    missing: set[tuple[UUID, UUID]] = set()\n    npm_pm_id = config.pm_config.npm_pm_id\n\n    for i, package in enumerate(packages):\n        # add this package's canon to the graph\n        try:\n            canon_id = package_to_canon_mapping[package.id]\n        except KeyError:\n            missing.add((str(package.id), str(package.package_manager_id)))\n            continue\n\n        # grab the object from the graph if it exists\n        if canon_id in chai.canon_to_index:\n            node = chai[chai.canon_to_index[canon_id]]\n        else:  # otherwise, create a new one\n            node = PackageNode(canon_id=canon_id)\n            node.index = chai.add_node(node)\n\n        # add the package manager id to the node\n        node.package_manager_ids.append(package.package_manager_id)\n\n        # now grab its dependencies\n        # there are two cases: legacy CHAI or new CHAI\n        # the db helps us these two distinctions with two different helpers\n        # TODO: eventually, CHAI will be at package to package, so everything will\n        # \"get_legacy_dependencies\"\n        if package.package_manager_id == npm_pm_id:\n            dependencies = db.get_legacy_dependencies(package.id)\n        else:\n            dependencies = db.get_dependencies(package.id)\n\n        # for each dependency, add the corresponding canon to the graph\n        # and set the edge\n        for dependency in dependencies:\n            dep = dependency[0]\n            try:\n                dep_canon_id = package_to_canon_mapping[dep]\n            except KeyError:\n                missing.add((str(dep), str(package.package_manager_id)))\n                continue\n\n            dep_node = PackageNode(canon_id=dep_canon_id)\n            dep_node.index = chai.add_node(dep_node)\n            chai.add_edge(node.index, dep_node.index, {})\n\n        if stop is not None and i >= stop:\n            break\n\n        if i % 1000 == 0:\n            logger.debug(f\"Processing package {i+1}/{len(packages)} (ID: {package.id})\")\n\n    logger.log(f\"Missing {len(missing)} packages\")\n    # TODO: should we save the missing packages?\n\n    return chai\n\n\ndef main(config: Config, db: GraphDB) -> None:\n    # get the map of package_id -> canon_id\n    package_to_canon: dict[UUID, UUID] = db.get_package_to_canon_mapping()\n    logger.log(f\"{len(package_to_canon)} package to canon mappings\")\n\n    # get the list of packages\n    packages = [\n        PackageInfo(id=id, package_manager_id=pm_id) for id, pm_id in db.get_packages()\n    ]\n    logger.log(f\"{len(packages)} packages\")\n\n    # load the graph\n    chai = load_graph(config, db, package_to_canon, packages)\n    logger.log(f\"CHAI has {len(chai)} nodes and {len(chai.edge_to_index)} edges\")\n\n    # now, I need to generate the personalization vector\n    canons_with_source_types: list[tuple[UUID, list[UUID]]] = []\n    for idx in chai.node_indexes():\n        node = chai[idx]\n        canons_with_source_types.append((node.canon_id, node.package_manager_ids))\n    config.tearank_config.personalize(canons_with_source_types)\n\n    # generate tea_ranks\n    ranks = chai.distribute(\n        config.tearank_config.personalization,\n        config.tearank_config.split_ratio,\n        config.tearank_config.tol,\n        config.tearank_config.max_iter,\n    )\n    str_ranks = {str(chai[id].canon_id): f\"{rank}\" for id, rank in ranks.items()}\n\n    # Determine the next run ID\n    latest_run = db.get_current_tea_rank_run()\n    current_run = latest_run.run + 1 if latest_run else 1\n    logger.log(f\"Starting TeaRank run number: {current_run}\")\n\n    # Prepare TeaRank objects with the *next* run ID\n    tea_ranks = [\n        TeaRank(canon_id=UUID(canon_id), tea_rank_run=current_run, rank=rank)\n        for canon_id, rank in str_ranks.items()\n    ]\n    # Load all ranks first\n    db.load_tea_ranks(tea_ranks)\n\n    # Only after successfully loading ranks, load the corresponding run entry\n    tea_rank_run = TeaRankRun(\n        run=current_run, split_ratio=config.tearank_config.split_ratio\n    )\n    db.load_tea_rank_runs([tea_rank_run])\n    logger.log(\"Done!\")\n\n\nif __name__ == \"__main__\":\n    # first deduplicate\n    dedupe_config: DedupeConfig = load_dedupe_config()\n    dedupe_db: DedupeDB = DedupeDB(dedupe_config)\n    try:\n        dedupe(dedupe_config, dedupe_db)\n    except Exception as e:\n        logger.error(f\"Some error occurred when deduplicating: {e}\")\n        raise\n\n    # then rank\n    ranker_config = load_config()\n    ranker_db = GraphDB(\n        ranker_config.pm_config.npm_pm_id, ranker_config.pm_config.system_pm_ids\n    )\n    try:\n        main(ranker_config, ranker_db)\n    except Exception as e:\n        logger.error(f\"Some error occurred when ranking: {e}\")\n        raise\n"
  },
  {
    "path": "ranker/naming.py",
    "content": "#!/usr/bin/env uv run --with permalint==0.1.12\nfrom uuid import UUID\n\nfrom permalint import possible_names\n\nfrom core.models import Canon\nfrom package_managers.crates.structs import CanonUpdatePayload\n\n\ndef compute_canon_name(url: str, package_name: str, existing_name: str = \"\") -> str:\n    \"\"\"\n    Determines the name of the canon, based on the package name, URL, and canon name\n\n    Notes:\n      - the logic for determining whether it's an update or not, is left to the caller\n      - this function does not do anything for monorepos\n      - as a fallback, the original package name is always returned\n    \"\"\"\n    if not url or not package_name:\n        raise ValueError(f\"Missing one of url={url} | package_name={package_name}\")\n\n    best_guess = extract_repo_name_from_url(url)\n\n    if existing_name:\n        # guard\n        if url == existing_name:\n            return package_name\n\n        return check_if_better(best_guess, package_name, existing_name)\n\n    return package_name\n\n\ndef check_if_better(best_guess: str, package_name: str, existing_name: str) -> str:\n    \"\"\"Check if we have a better name than the existing name.\"\"\"\n    if best_guess == package_name:\n        # boom, this is the ideal case. the repo and the package share a name!\n        return package_name\n\n    package_name_score = score_name(package_name, best_guess)\n    existing_name_score = score_name(existing_name, best_guess)\n\n    if package_name_score > existing_name_score:\n        return package_name\n\n    return existing_name\n\n\ndef extract_repo_name_from_url(url: str) -> str:\n    \"\"\"\n    Extract a reasonable name from a URL, typically the repository name.\n\n    We're trusting permalint's rules for guessing a package's name based on\n    the homepage URL here. Note that the fallback is always to retrieve the full URL\n    name, which will be the only element in the result\n    \"\"\"\n    if not url:\n        return url\n\n    names: list[str] = possible_names(url)\n    if len(names) > 1:\n        return names[1].lower()\n    else:\n        return names[0].lower()\n\n\ndef score_name(name: str, best_guess: str) -> int:\n    \"\"\"\n    Score a package name based on some rules\n\n    1. Prefer shorter, simpler names\n    2. Prefer names without prefixes/suffixes that suggest forks or variations\n    \"\"\"\n    if not name and not best_guess:\n        raise ValueError(f\"Missing one of name={name} | guess={best_guess}\")\n\n    score = 0\n    clean = name.lower()\n\n    if best_guess in clean:\n        score += 1\n\n    # Prefer shorter names\n    score += max(0, 15 - len(clean))\n\n    # Penalize scoped packages\n    if clean.startswith(\"@\"):\n        score -= 3\n\n    return score\n\n\ndef get_effective_canon_name(\n    canon: Canon, pending_updates: dict[UUID, CanonUpdatePayload]\n) -> str:\n    \"\"\"\n    Get the effective name for a canon, considering both current and pending updates.\n\n    This ensures we always work with the most up-to-date name when processing\n    multiple canons that might reference each other.\n    \"\"\"\n    if canon.id in pending_updates:\n        return pending_updates[canon.id][\"name\"]\n    return canon.name\n"
  },
  {
    "path": "ranker/requirements.txt",
    "content": "# This file was autogenerated by uv via the following command:\n#    uv pip compile --group ranker -o ranker/requirements.txt\nnumpy==2.3.0\n    # via\n    #   chai (pyproject.toml:ranker)\n    #   rustworkx\nrustworkx==0.16.0\n    # via chai (pyproject.toml:ranker)\n"
  },
  {
    "path": "ranker/rx_graph.py",
    "content": "#!/usr/bin/env pkgx +python@3.11 uv run\n\nfrom collections import defaultdict, deque\nfrom dataclasses import dataclass, field\nfrom decimal import Decimal\nfrom typing import Any\nfrom uuid import UUID\n\nimport rustworkx as rx\n\nfrom core.logger import Logger\n\nlogger = Logger(\"ranker.chai_graph\")\n\n\n@dataclass\nclass PackageNode:\n    \"\"\"Note that this is different from PackageInfo in main.py!\n    This is based on canons!\"\"\"\n\n    canon_id: UUID\n    package_manager_ids: list[UUID] = field(default_factory=list)\n    weight: Decimal = field(default_factory=Decimal)\n    index: int = field(default_factory=lambda: -1)\n\n\nclass CHAI(rx.PyDiGraph):\n    def __init__(self):\n        super().__init__()\n        self.canon_to_index: dict[UUID, int] = {}\n        self.edge_to_index: dict[tuple[int, int], int] = {}\n\n    def add_node(self, node: PackageNode) -> int:\n        \"\"\"Safely add a node to the graph. If exists, return the index\"\"\"\n        if node.canon_id not in self.canon_to_index:\n            index = super().add_node(node)\n            self.canon_to_index[node.canon_id] = index\n        return self.canon_to_index[node.canon_id]\n\n    def add_edge(self, u: int, v: int, edge_data: Any) -> None:\n        \"\"\"Safely add an edge to the graph. If exists, return the index\"\"\"\n        if (u, v) not in self.edge_to_index:\n            index = super().add_edge(u, v, edge_data)\n            self.edge_to_index[(u, v)] = index\n        return self.edge_to_index[(u, v)]\n\n    def generate_personalization(\n        self, personalization: dict[UUID, Decimal]\n    ) -> dict[int, float]:\n        result = {}\n        for id, weight in personalization.items():\n            if id not in self.canon_to_index:\n                continue\n            result[self.canon_to_index[id]] = float(weight)\n        return result\n\n    def pagerank(\n        self, alpha: Decimal, personalization: dict[UUID, Decimal]\n    ) -> rx.CentralityMapping:\n        return rx.pagerank(\n            self,\n            alpha=float(alpha),\n            personalization=self.generate_personalization(personalization),\n        )\n\n    def distribute(\n        self,\n        personalization: dict[UUID, Decimal],\n        split_ratio: Decimal,\n        tol: Decimal,\n        max_iter: int = 100,\n    ) -> dict[int, Decimal]:\n        \"\"\"Distribute values across the graph based on dependencies.\"\"\"\n        if not personalization:\n            raise ValueError(\"Personalization is empty\")\n\n        # Convert personalization to index-based dict\n        result = defaultdict(Decimal)\n        q: deque[tuple[int, Decimal]] = deque()\n\n        for id, weight in personalization.items():\n            if id not in self.canon_to_index:\n                logger.log(f\"{id} is type {type(id)}\")\n                raise ValueError(f\"Canon ID {id} not found in CHAI\")\n            q.append((self.canon_to_index[id], weight))\n\n        iterations: int = 0\n\n        while q:\n            iterations += 1\n            node_id, weight = q.popleft()\n\n            # Ensure iteration count check happens regardless of other logic\n            if iterations > max_iter:\n                logger.warn(f\"Max iterations reached: {max_iter}\")\n                break\n\n            dependencies = self.successors(node_id)\n            num_dependencies = len(dependencies)\n\n            # If the weight arriving is already below tolerance, or if it's a terminal\n            # node, add the entire weight to the result and stop distributing from\n            # this node in this path.\n            if num_dependencies == 0 or weight < tol:\n                result[node_id] += weight\n                continue\n\n            # Handle non-terminal nodes with significant weight (weight >= tol)\n            # Calculate the portion of weight the current node keeps.\n            keep = weight * split_ratio\n\n            # Always add the 'keep' amount to the node's result.\n            # The tolerance check below is only for preventing further distribution\n            # of insignificant amounts, not for deciding if the current node's\n            # share is worth keeping.\n            result[node_id] += keep\n\n            # Calculate the total amount to be split among dependencies.\n            split = weight - keep  # Equivalent to weight * (1 - split_ratio)\n\n            # Calculate split per dependency.\n            split_per_dep = split / num_dependencies\n\n            # Use tolerance to gate further distribution: Only queue dependencies\n            # if the amount they would receive individually is significant enough.\n            if split_per_dep >= tol:\n                for dep in dependencies:\n                    q.append((dep.index, split_per_dep))\n            # If split_per_dep < tol, the remaining 'split' amount is effectively\n            # dropped from this distribution path, as it's deemed too small\n            # to continue propagating. This helps prune the calculation.\n\n        logger.log(f\"Iterations: {iterations}. Ranks sum to {sum(result.values()):.9f}\")\n\n        return dict(result)\n"
  },
  {
    "path": "ranker/utils/analyze_ranks.py",
    "content": "#!/usr/bin/env pkgx +python@3.11 uv run --with pandas --with sqlalchemy\n\n\"\"\"Script to analyze rank data and generate formatted CSV output.\n\nUsage:\n    python analyze_ranks.py [--file PATH_TO_RANK_FILE]\n\"\"\"\n\nfrom __future__ import annotations\n\nimport argparse\nimport json\nimport os\nfrom pathlib import Path\n\nimport pandas as pd\nfrom sqlalchemy import create_engine, distinct, func, select\nfrom sqlalchemy.orm import Session\n\nfrom core.models import (\n    Canon,\n    CanonPackage,\n    Package,\n    PackageManager,\n    Source,\n)\n\n\ndef get_latest_rank_file() -> Path:\n    \"\"\"Get the path to the latest rank file.\"\"\"\n    data_dir = Path(\"data/ranker/ranks\")\n    latest_symlink = data_dir / \"latest.json\"\n    return latest_symlink.resolve()\n\n\ndef get_rank_file(filename: str | None = None) -> Path:\n    \"\"\"Get the path to the rank file.\n\n    Args:\n        filename: Optional path to a specific rank file.\n\n    Returns:\n        Path to the rank file.\n\n    Raises:\n        FileNotFoundError: If the specified file doesn't exist.\n    \"\"\"\n    if filename:\n        file_path = Path(filename)\n        if not file_path.exists():\n            raise FileNotFoundError(f\"Rank file not found: {filename}\")\n        return file_path\n\n    return get_latest_rank_file()\n\n\ndef load_rank_data(file_path: Path) -> dict[str, float]:\n    \"\"\"Load rank data from JSON file.\"\"\"\n    with open(file_path) as f:\n        return json.load(f)\n\n\ndef get_output_filename(input_path: Path) -> Path:\n    \"\"\"Generate output filename based on input filename.\"\"\"\n    # Extract the rank number from filenames like \"ranks_37_0.7\"\n    parts = input_path.stem.split(\"_\")\n    rank_num = \"_\".join(parts[1:]) if len(parts) >= 2 else input_path.stem\n\n    output_dir = Path(\"data/ranker/analysis\")\n    output_dir.mkdir(parents=True, exist_ok=True)\n    return output_dir / f\"formatted_ranks_{rank_num}.csv\"\n\n\ndef get_package_data(ranks: dict[str, float], db_session: Session) -> pd.DataFrame:\n    \"\"\"Query database for package information and combine with ranks.\"\"\"\n    # Query for package data including URLs and aggregated package info\n    query = (\n        select(\n            Canon.id.label(\"canon_id\"),\n            Canon.url.label(\"homepage_url\"),\n            Canon.name.label(\"package_name\"),\n            func.array_agg(distinct(Source.type)).label(\"package_managers\"),\n            func.array_agg(distinct(Package.name)).label(\"package_names\"),\n        )\n        .join(CanonPackage, Canon.id == CanonPackage.canon_id)\n        .join(Package, CanonPackage.package_id == Package.id)\n        .join(PackageManager, Package.package_manager_id == PackageManager.id)\n        .join(Source, PackageManager.source_id == Source.id)\n        .group_by(Canon.id, Canon.url, Canon.name)\n    )\n\n    results = pd.DataFrame(db_session.execute(query))\n\n    # Convert UUID objects to strings in results DataFrame\n    results[\"canon_id\"] = results[\"canon_id\"].astype(str)\n\n    # Convert ranks to DataFrame and merge\n    ranks_df = pd.DataFrame.from_dict(ranks, orient=\"index\", columns=[\"tea_rank\"])\n    ranks_df.index.name = \"canon_id\"\n    ranks_df.reset_index(inplace=True)\n\n    # Merge and sort\n    final_df = pd.merge(ranks_df, results, on=\"canon_id\")\n    if final_df.empty:\n        raise ValueError(\n            \"No data to process - no matching canon_ids between ranks and database results\"\n        )\n\n    final_df.sort_values([\"tea_rank\"], ascending=[False], inplace=True)\n    return final_df[\n        [\n            \"canon_id\",\n            \"package_name\",\n            \"tea_rank\",\n            \"homepage_url\",\n            \"package_managers\",\n            \"package_names\",\n        ]\n    ]\n\n\ndef parse_args() -> argparse.Namespace:\n    \"\"\"Parse command-line arguments.\"\"\"\n    parser = argparse.ArgumentParser(\n        description=\"Analyze rank data and generate formatted CSV output\"\n    )\n    parser.add_argument(\n        \"--file\",\n        type=str,\n        default=None,\n        help=\"Path to a specific rank file. If not provided, the latest rank file will be used.\",\n    )\n    return parser.parse_args()\n\n\ndef main() -> None:\n    \"\"\"Main function to process rank data and generate CSV.\"\"\"\n    # Parse command-line arguments\n    args = parse_args()\n\n    # Setup database connection\n    engine = create_engine(os.environ[\"CHAI_DATABASE_URL\"])\n\n    # Get input and output paths\n    rank_file = get_rank_file(args.file)\n    output_file = get_output_filename(rank_file)\n    print(f\"Output will be saved to: {output_file}\")\n\n    # Process data\n    ranks = load_rank_data(rank_file)\n    with Session(engine) as session:\n        result_df = get_package_data(ranks, session)\n\n    # Save output\n    result_df.to_csv(output_file, index=False)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "ranker/utils/parse_log.py",
    "content": "#!/usr/bin/env pkgx +python@3.11 uv run\n\n\"\"\"\nParse graph run log to calculate processing metrics.\n\nThis script analyzes a log file to compute:\n1. Average time to process 1,000 packages\n2. Average packages processed per second\n\nUsage:\n  From file:    ./parse_log.py log_file\n  From tmux:    tmux capture-pane -p | ./parse_log.py\n\"\"\"\n\nimport re\nimport sys\nfrom statistics import mean\n\n\ndef parse_log_line(line: str) -> tuple[float, int]:\n    \"\"\"\n    Extract timestamp and package count from a log line.\n\n    Args:\n        line: A line from the log file\n\n    Returns:\n        Tuple of (timestamp, package_count)\n    \"\"\"\n    pattern = r\"^(\\d+\\.\\d+): \\[graph\\.main\\]: (\\d+):\"\n    match = re.match(pattern, line)\n    if match:\n        timestamp = float(match.group(1))\n        package_count = int(match.group(2))\n        return timestamp, package_count\n    return None\n\n\ndef calculate_metrics(log_lines: list[str]) -> tuple[float, float]:\n    \"\"\"\n    Calculate processing metrics from log lines.\n\n    Args:\n        log_lines: List of log file lines\n\n    Returns:\n        Tuple of (avg_time_per_1000, packages_per_second)\n    \"\"\"\n    data_points = []\n    previous_timestamp = None\n    previous_count = None\n\n    for line in log_lines:\n        result = parse_log_line(line)\n        if not result:\n            continue\n\n        timestamp, count = result\n\n        if previous_timestamp is not None and previous_count is not None:\n            time_diff = timestamp - previous_timestamp\n            count_diff = count - previous_count\n\n            # Only process if we're looking at approximately 1000 package difference\n            if 900 <= count_diff <= 1100:\n                data_points.append((time_diff, count_diff))\n\n        previous_timestamp = timestamp\n        previous_count = count\n\n    if not data_points:\n        return 0.0, 0.0\n\n    # Calculate average time for processing 1000 packages\n    time_diffs = [time for time, _ in data_points]\n    avg_time_per_1000 = mean(time_diffs)\n\n    # Calculate average packages per second\n    packages_per_second = 1000 / avg_time_per_1000\n\n    return avg_time_per_1000, packages_per_second\n\n\ndef main():\n    \"\"\"Process the log data and display metrics.\"\"\"\n    log_lines = []\n\n    # Read from file if specified, otherwise from stdin\n    if len(sys.argv) == 2:\n        log_file = sys.argv[1]\n        try:\n            with open(log_file) as f:\n                log_lines = f.readlines()\n        except OSError as e:\n            print(f\"Error reading log file: {e}\")\n            sys.exit(1)\n    else:\n        # Read from stdin (for piping from tmux)\n        log_lines = sys.stdin.readlines()\n        if not log_lines:\n            print(f\"Usage: {sys.argv[0]} [log_file]\")\n            print(f\"   or: tmux capture-pane -p | {sys.argv[0]}\")\n            sys.exit(1)\n\n    avg_time, pkg_per_second = calculate_metrics(log_lines)\n\n    print(f\"Average time to process 1,000 packages: {avg_time:.2f} seconds\")\n    print(f\"Average packages processed per second: {pkg_per_second:.2f}\")\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "scripts/chai-legacy-loader/README.md",
    "content": "# CHAI Legacy Data Loader\n\nTools for loading legacy CHAI data into the current CHAI database framework.\n\n> [!NOTE]\n> This can only be executed if you have access to the Legacy CHAI database. If not,\n> you can ignore everything inside this folder.\n\n## Requirements\n\n- pkgx.sh\n\n## Overview\n\nThis is a set of utility python scripts to efficiently transfer data from the legacy CHAI\ndatabase into the current CHAI schema.\n\n## Loader Scripts\n\n- `add_package_fields.py`: enriches package data dumps from Legacy CHAI with fields\n  required by CHAI\n- `copy_dependencies_no_thread.py`: fetches dependency data from `public.sources` for a\n  given package manager and uses psycopg2's `copy_expert` function to load it in\n  batches into CHAI\n- `add_urls.py`: add urls and package_urls relationships from Legacy CHAI\n\n## Usage\n\n1. Set up environment variables (or use defaults):\n\n```bash\nexport LEGACY_CHAI_DATABASE_URL=credentials_from_itn\nexport CHAI_DATABASE_URL=postgresql://postgres:postgres@localhost:5435/chai\n```\n\n2. Loading packages\n\n   1. `psql $LEGACY_CHAI_DATABASE_URL -t -A -F',' -f sql/packages.sql -o /path/to/output.csv`\n   1. Run `add_package_fields.py /file/from/step/1.csv /path/to/output package_manager_id`\n      to enrich it with additional fields\n   1. `psql $CHAI_DATABASE_URL -c \"CREATE TABLE temp_import (LIKE packages);\"`\n   1. `psql $CHAI_DATABASE_URL -c \"\\COPY temp_import (id, derived_id, name, package_manager_id, import_id, created_at, updated_at) FROM '/path/to/csv/from/step/2' WITH (FORMAT csv, HEADER true, DELIMITER ',');\"`\n   1. `psql $CHAI_DATABASE_URL -c \"INSERT INTO packages SELECT * FROM temp_import ON CONFLICT DO NOTHING;\"`\n   1. `psql $CHAI_DATABASE_URL -c \"DROP TABLE temp_import;\"`\n\n3. Loading dependencies\n\nWith pkgx, just invoking the script from the root directory of chai\n\n```bash\ncd ../..\nPYTHONPATH=. copy_dependencies_no_thread.py\n```\n\n4. Loading URLs\n\n   1. Run [urls.sql](sql/urls.sql), which generates a csv\n   1. Run `batch_insert_urls.py /path/to/step/1 -d` to insert the raw URLs, and get a\n      dump of the loaded IDs and the URL\n   1. Run `batch_insert_package_urls.py /path/to/step/1 --urls /path/to/step/2` to\n      insert the package_url relationships. If no cache is provided, it'll try to read\n      all loaded URLs and their IDs from the db (long)\n\n```bash\npkgx psql -h localhost -U gardener -p 5430 temp_chai < dev_chai_fixed.sql\n```\n"
  },
  {
    "path": "scripts/chai-legacy-loader/add_package_fields.py",
    "content": "#!/usr/bin/env pkgx +python@3.11 uv run\n\n\"\"\"\nFor a csv generated from legacy chai, this script adds the id, created_at, and\nupdated_at fields to the csv.\n\nThe input CSV must have a header row: \"derived_id,name,import_id\".\nThe package_manager argument must be a valid UUID.\n\nUsage:\n    chmod +x add-package-fields.py\n    ./add-package-fields.py input.csv output.csv <package_manager_uuid>\n\"\"\"\n\nimport csv\nimport sys\nimport uuid\nfrom datetime import UTC, datetime\n\n\ndef validate_uuid(uuid_string: str) -> None:\n    \"\"\"Raises ValueError if the string is not a valid UUID.\"\"\"\n    try:\n        uuid.UUID(uuid_string)\n    except ValueError as exc:\n        raise ValueError(f\"Invalid UUID format: {uuid_string}\") from exc\n\n\ndef process_csv(input_file: str, output_file: str, package_manager_id: str) -> None:\n    \"\"\"\n    Processes the input CSV, validates headers, adds new fields, and writes to the\n    output CSV.\n\n    Args:\n        input_file: Path to the input CSV file.\n        output_file: Path to the output CSV file.\n        package_manager_id: The UUID of the package manager.\n\n    Raises:\n        ValueError: If the input CSV header is missing or incorrect.\n    \"\"\"\n    now = datetime.now(UTC).isoformat()\n    expected_header: list[str] = [\"derived_id\", \"name\", \"import_id\"]\n    output_header: list[str] = [\n        \"id\",\n        \"derived_id\",\n        \"name\",\n        \"package_manager_id\",\n        \"import_id\",\n        \"created_at\",\n        \"updated_at\",\n    ]\n\n    with (\n        open(input_file, newline=\"\") as infile,\n        open(output_file, \"w\", newline=\"\") as outfile,\n    ):\n        reader: csv._reader = csv.reader(infile)\n        writer: csv._writer = csv.writer(outfile)\n\n        # 1. Validate header row\n        header: list[str] | None = next(reader, None)\n        if header is None:\n            raise ValueError(f\"Input file '{input_file}' is missing a header row.\")\n        if header != expected_header:\n            raise ValueError(\n                f\"Input file '{input_file}' header mismatch. \"\n                f\"Expected: {expected_header}, Got: {header}\"\n            )\n\n        # Write output header\n        writer.writerow(output_header)\n\n        # Process data rows\n        row_count = 0\n        for row in reader:\n            if len(row) != len(expected_header):\n                msg = f\"Warning: Skipping row {reader.line_num} due to incorrect \\\n                    column count ({len(row)} instead of {len(expected_header)}): {row}\"\n                print(msg, file=sys.stderr)\n                continue\n\n            row_uuid: str = str(uuid.uuid4())\n            derived_id, name, import_id = row\n            output_row: list[str] = [\n                row_uuid,\n                derived_id,\n                name,\n                package_manager_id,\n                import_id,\n                now,\n                now,\n            ]\n            writer.writerow(output_row)\n            row_count += 1\n\n    print(f\"Processed {row_count} rows from {input_file} -> {output_file}\")\n\n\nif __name__ == \"__main__\":\n    if len(sys.argv) != 4:\n        print(\n            f\"Usage: {sys.argv[0]} input.csv output.csv <package_manager_uuid>\",\n            file=sys.stderr,\n        )\n        sys.exit(1)\n\n    input_csv_path: str = sys.argv[1]\n    output_csv_path: str = sys.argv[2]\n    pm_uuid: str = sys.argv[3]\n\n    try:\n        # 6. Validate package_manager argument is a UUID\n        validate_uuid(pm_uuid)\n        process_csv(input_csv_path, output_csv_path, pm_uuid)\n    except FileNotFoundError as e:\n        print(f\"Error: Input file not found - {e}\", file=sys.stderr)\n        sys.exit(1)\n    except ValueError as e:\n        print(f\"Error: {e}\", file=sys.stderr)\n        sys.exit(1)\n    except Exception as e:\n        print(f\"An unexpected error occurred: {e}\", file=sys.stderr)\n        sys.exit(1)\n"
  },
  {
    "path": "scripts/chai-legacy-loader/batch_insert_package_urls.py",
    "content": "#!/usr/bin/env pkgx +python@3.11 uv run --with psycopg2==2.9.9\n\nimport argparse\nimport csv\nimport os\nimport uuid\nfrom datetime import datetime\n\nimport psycopg2\nimport psycopg2.extras\n\nfrom core.config import Config, PackageManager\nfrom core.logger import Logger\n\nCHAI_DATABASE_URL = os.environ.get(\"CHAI_DATABASE_URL\")\nDEFAULT_BATCH_SIZE = 20000\n\n\nclass ChaiPackageUrlsDB:\n    \"\"\"Handles DB interactions for batch package_urls insertion.\"\"\"\n\n    def __init__(self, logger: Logger):\n        self.logger = logger\n        if not CHAI_DATABASE_URL:\n            self.logger.error(\"CHAI_DATABASE_URL environment variable not set.\")\n            raise ValueError(\"CHAI_DATABASE_URL not set\")\n        self.conn = None\n        self.cursor = None\n        try:\n            self.conn = psycopg2.connect(CHAI_DATABASE_URL)\n            self.cursor = self.conn.cursor()\n            self.logger.log(\"CHAI database connection established for PackageUrlsDB\")\n        except psycopg2.Error as e:\n            self.logger.error(f\"PackageUrlsDB connection error: {e}\")\n            raise\n\n    def load_package_id_cache(self) -> dict[str, uuid.UUID]:\n        \"\"\"Load all packages (import_id -> id) into a cache.\"\"\"\n        self.logger.log(\"Loading package_id cache from database...\")\n        query = \"SELECT import_id, id FROM packages\"\n        try:\n            self.cursor.execute(query)\n            cache = {str(row[0]): row[1] for row in self.cursor.fetchall() if row[0]}\n            self.logger.log(f\"Loaded {len(cache)} packages into package_id cache.\")\n            return cache\n        except psycopg2.Error as e:\n            self.logger.error(f\"Error loading package_id cache: {e}\")\n            raise\n\n    def load_url_id_cache_from_db(\n        self,\n    ) -> dict[tuple[str, uuid.UUID], uuid.UUID]:\n        \"\"\"Load all URLs ( (url, url_type_id) -> id ) into a cache from DB.\"\"\"\n        self.logger.log(\"Loading url_id cache from database (fallback)...\")\n        query = \"SELECT id, url, url_type_id FROM urls\"\n        cache: dict[tuple[str, uuid.UUID], uuid.UUID] = {}\n        try:\n            self.cursor.execute(query)\n            for row in self.cursor.fetchall():\n                url_id, url_str, url_type_id = row[0], row[1], row[2]\n                if url_str and url_type_id:\n                    cache[(url_str, url_type_id)] = url_id\n            self.logger.log(f\"Loaded {len(cache)} URLs into url_id cache from DB.\")\n            return cache\n        except psycopg2.Error as e:\n            self.logger.error(f\"Error loading url_id cache from DB: {e}\")\n            raise\n\n    def batch_insert_package_urls(\n        self,\n        data_tuples: list[tuple[uuid.UUID, uuid.UUID, uuid.UUID, datetime, datetime]],\n    ) -> None:\n        \"\"\"Batch insert into package_urls table.\"\"\"\n        if not data_tuples:\n            return\n\n        query = \"\"\"\n            INSERT INTO package_urls (id, package_id, url_id, created_at, updated_at)\n            VALUES %s\n            ON CONFLICT (package_id, url_id) \n            DO UPDATE SET updated_at = EXCLUDED.updated_at\n        \"\"\"\n        try:\n            psycopg2.extras.execute_values(\n                self.cursor, query, data_tuples, page_size=len(data_tuples)\n            )\n            self.conn.commit()\n            self.logger.log(\n                f\"Successfully inserted/updated {len(data_tuples)} package_urls\"\n            )\n        except psycopg2.Error as e:\n            self.logger.error(f\"Error during batch insert into package_urls: {e}\")\n            self.logger.log(\n                f\"Failed data sample: {data_tuples[0] if data_tuples else 'N/A'}\"\n            )\n            self.conn.rollback()\n            raise\n        except Exception as e:\n            self.logger.error(f\"Unexpected error during package_urls batch insert: {e}\")\n            self.conn.rollback()\n            raise\n\n    def close(self):\n        if self.cursor:\n            self.cursor.close()\n        if self.conn:\n            self.conn.close()\n        self.logger.log(\"PackageUrlsDB connection closed.\")\n\n\ndef load_url_id_cache_from_file(\n    cache_file_path: str, logger: Logger\n) -> dict[tuple[str, uuid.UUID], uuid.UUID]:\n    \"\"\"Load URL ID cache from the CSV file generated by batch_insert_urls.py.\"\"\"\n    logger.log(f\"Loading url_id cache from file: {cache_file_path}...\")\n    cache: dict[tuple[str, uuid.UUID], uuid.UUID] = {}\n    try:\n        with open(cache_file_path, newline=\"\", encoding=\"utf-8\") as csvfile:\n            reader = csv.reader(csvfile)\n            header = next(reader, None)  # Skip header\n            if not header or header != [\"id\", \"url\", \"url_type_id\"]:\n                logger.error(\n                    f\"Invalid or missing header in URL cache file: {cache_file_path}. Expected ['id', 'url', 'url_type_id']\"\n                )\n                raise ValueError(\"Invalid URL cache file format\")\n\n            for i, row in enumerate(reader):\n                if len(row) == 3:\n                    try:\n                        url_id_str, url_str, url_type_id_str = row[0], row[1], row[2]\n                        if url_str and url_type_id_str:  # Ensure no empty strings\n                            cache[(url_str, uuid.UUID(url_type_id_str))] = uuid.UUID(\n                                url_id_str\n                            )\n                    except ValueError as ve:\n                        logger.warn(\n                            f\"Invalid UUID in URL cache file at row {i+2}: {row} - {ve}\"\n                        )\n                        continue\n                else:\n                    logger.warn(\n                        f\"Skipping malformed row in URL cache file at row {i+2}: {row}\"\n                    )\n        logger.log(f\"Loaded {len(cache)} URLs into url_id cache from file.\")\n        return cache\n    except FileNotFoundError:\n        logger.error(f\"URL cache file not found: {cache_file_path}\")\n        raise\n    except Exception as e:\n        logger.error(f\"Error loading URL cache file {cache_file_path}: {e}\")\n        raise\n\n\ndef process_package_url_associations(\n    input_csv_path: str,\n    batch_size: int,\n    script_execution_time: datetime,\n    url_cache_csv_path: str | None,\n    stop_at: int | None,\n    main_logger: Logger,\n) -> None:\n    \"\"\"Main processing logic for associating packages with URLs.\"\"\"\n    main_logger.log(f\"Starting package-URL association for: {input_csv_path}\")\n    main_logger.log(\n        f\"Batch size: {batch_size}, URL cache: {url_cache_csv_path}, Stop at: {stop_at}\"\n    )\n\n    try:\n        config = Config(PackageManager.NPM)\n        url_type_homepage_id = config.url_types.homepage\n        url_type_source_id = config.url_types.source\n    except Exception as e:\n        main_logger.error(f\"Error initializing config: {e}\")\n        return\n\n    db_handler = None\n    package_id_cache: dict[str, uuid.UUID] = {}\n    url_id_cache: dict[tuple[str, uuid.UUID], uuid.UUID] = {}\n\n    try:\n        db_handler = ChaiPackageUrlsDB(main_logger)\n        package_id_cache = db_handler.load_package_id_cache()\n\n        if url_cache_csv_path:\n            url_id_cache = load_url_id_cache_from_file(url_cache_csv_path, main_logger)\n        else:\n            main_logger.log(\n                \"No URL cache file provided, loading all URLs from database...\"\n            )\n            url_id_cache = db_handler.load_url_id_cache_from_db()\n\n    except Exception as e:\n        main_logger.error(f\"Failed during setup (DB or cache loading): {e}\")\n        if db_handler:\n            db_handler.close()\n        return\n\n    package_urls_to_insert: list[\n        tuple[uuid.UUID, uuid.UUID, uuid.UUID, datetime, datetime]\n    ] = []\n    processed_csv_rows = 0\n    total_associations_prepared = 0\n    processed_pairs: set[tuple[uuid.UUID, uuid.UUID]] = (\n        set()\n    )  # To avoid duplicates in a single batch\n\n    try:\n        with open(input_csv_path, newline=\"\", encoding=\"utf-8\") as infile:\n            reader = csv.reader(infile)\n            header = next(reader, None)\n            if not header:\n                main_logger.warn(\n                    f\"Input CSV file {input_csv_path} is empty or has no header.\"\n                )\n                return\n            main_logger.log(f\"Input CSV Header: {header}\")\n\n            for row_num, row in enumerate(reader):\n                processed_csv_rows += 1\n                current_csv_line = row_num + 2  # 1 for header, 1 for 0-indexing\n\n                if not (len(row) >= 3):\n                    main_logger.warn(\n                        f\"Skipping row {current_csv_line} (length < 3): {row}\"\n                    )\n                    continue\n\n                import_id, source_url_str, homepage_url_str = row[0], row[1], row[2]\n\n                if not import_id:\n                    main_logger.warn(\n                        f\"Skipping row {current_csv_line} due to missing import_id: {row}\"\n                    )\n                    continue\n\n                package_id = package_id_cache.get(import_id)\n                if not package_id:\n                    # We didn't load all the packages from ITN, so this is expected\n                    continue\n\n                urls_to_link = []\n                if source_url_str and source_url_str.lower() != \"null\":\n                    source_key = (source_url_str.strip(), url_type_source_id)\n                    source_url_id = url_id_cache.get(source_key)\n                    if source_url_id:\n                        urls_to_link.append(source_url_id)\n                    else:\n                        main_logger.warn(\n                            f\"Source URL for import_id '{import_id}' not found in URL cache: '{source_url_str}' (row {current_csv_line})\"\n                        )\n\n                if homepage_url_str and homepage_url_str.lower() != \"null\":\n                    homepage_key = (homepage_url_str.strip(), url_type_homepage_id)\n                    homepage_url_id = url_id_cache.get(homepage_key)\n                    if homepage_url_id:\n                        urls_to_link.append(homepage_url_id)\n                    else:\n                        main_logger.warn(\n                            f\"Homepage URL for import_id '{import_id}' not found in URL cache: '{homepage_url_str}' (row {current_csv_line})\"\n                        )\n\n                for url_id_to_link in urls_to_link:\n                    if (package_id, url_id_to_link) not in processed_pairs:\n                        package_urls_to_insert.append(\n                            (\n                                uuid.uuid4(),\n                                package_id,\n                                url_id_to_link,\n                                script_execution_time,\n                                script_execution_time,\n                            )\n                        )\n                        processed_pairs.add((package_id, url_id_to_link))\n                        total_associations_prepared += 1\n\n                if len(package_urls_to_insert) >= batch_size:\n                    db_handler.batch_insert_package_urls(package_urls_to_insert)\n                    package_urls_to_insert = []\n                    processed_pairs.clear()  # Clear after batch insert\n                    main_logger.log(\n                        f\"Processed batch. CSV rows: {processed_csv_rows}, Associations: {total_associations_prepared}\"\n                    )\n\n                if stop_at and processed_csv_rows >= stop_at:\n                    main_logger.log(f\"Reached stop limit of {stop_at} CSV rows.\")\n                    break\n\n        if package_urls_to_insert:  # Process remaining\n            db_handler.batch_insert_package_urls(package_urls_to_insert)\n            main_logger.log(\n                f\"Processed final batch. CSV rows: {processed_csv_rows}, Associations: {total_associations_prepared}\"\n            )\n\n        main_logger.log(\n            f\"Package-URL association processing complete. Total CSV rows: {processed_csv_rows}. Associations prepared: {total_associations_prepared}.\"\n        )\n\n    except FileNotFoundError:\n        main_logger.error(f\"Input CSV file not found: {input_csv_path}\")\n    except csv.Error as e:\n        main_logger.error(\n            f\"CSV reading error in {input_csv_path} near line {reader.line_num if 'reader' in locals() else 'unknown'}: {e}\"\n        )\n    except psycopg2.Error as e:\n        main_logger.error(f\"A database error occurred: {e}\")\n        main_logger.exception()\n    except Exception as e:\n        main_logger.error(f\"An unexpected error occurred: {e}\")\n        main_logger.exception()\n    finally:\n        if db_handler:\n            db_handler.close()\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser(\n        description=\"Batch insert package-URL relationships from a CSV file.\"\n    )\n    parser.add_argument(\n        \"file_path\",\n        help=\"Path to the input CSV file (import_id, source_url, homepage_url).\",\n    )\n    parser.add_argument(\n        \"--cache\",\n        metavar=\"URL_CACHE_CSV_PATH\",\n        help=\"Optional path to the CSV file containing URL IDs (output of batch_insert_urls.py).\",\n    )\n    parser.add_argument(\n        \"--batch-size\",\n        \"-b\",\n        type=int,\n        default=DEFAULT_BATCH_SIZE,\n        help=f\"Number of records to insert per batch (default: {DEFAULT_BATCH_SIZE}).\",\n    )\n    parser.add_argument(\n        \"--stop\",\n        \"-s\",\n        type=int,\n        help=\"Optional: stop processing after this many CSV rows.\",\n    )\n    args = parser.parse_args()\n\n    script_start_time = datetime.now()\n    logger = Logger(\"main_pkg_url_assoc_loader\")\n    logger.log(f\"Script started at {script_start_time.isoformat()}\")\n\n    process_package_url_associations(\n        input_csv_path=args.file_path,\n        batch_size=args.batch_size,\n        script_execution_time=script_start_time,\n        url_cache_csv_path=args.cache,\n        stop_at=args.stop,\n        main_logger=logger,\n    )\n\n    logger.log(\n        f\"Script finished. Total execution time: {datetime.now() - script_start_time}\"\n    )\n"
  },
  {
    "path": "scripts/chai-legacy-loader/batch_insert_urls.py",
    "content": "#!/usr/bin/env pkgx +python@3.11 uv run --with psycopg2==2.9.9\n\nimport argparse\nimport csv\nimport os\nimport uuid\nfrom datetime import datetime\n\nimport psycopg2\nimport psycopg2.extras\n\nfrom core.config import Config, PackageManager\nfrom core.logger import Logger\n\nCHAI_DATABASE_URL = os.environ.get(\"CHAI_DATABASE_URL\")\nDEFAULT_BATCH_SIZE = 20000\nOUTPUT_CSV_FILENAME = \"inserted_urls.csv\"\n\n\nclass ChaiDB:\n    \"\"\"Handles interactions with the CHAI database for batch URL insertion.\"\"\"\n\n    def __init__(self):\n        \"\"\"Initialize connection to the CHAI database.\"\"\"\n        self.logger = Logger(\"batch_url_db\")\n        if not CHAI_DATABASE_URL:\n            self.logger.error(\"CHAI_DATABASE_URL environment variable not set.\")\n            raise ValueError(\"CHAI_DATABASE_URL not set\")\n        self.conn = None\n        self.cursor = None\n        try:\n            self.conn = psycopg2.connect(CHAI_DATABASE_URL)\n            self.cursor = self.conn.cursor()\n            self.logger.log(\"CHAI database connection established\")\n        except psycopg2.Error as e:\n            self.logger.error(f\"Database connection error: {e}\")\n            raise\n\n    def batch_insert_urls(\n        self,\n        url_data_tuples: list[tuple[str, uuid.UUID, datetime, datetime]],\n        dump_output: bool,\n    ) -> list[tuple[uuid.UUID, str, uuid.UUID]] | None:\n        \"\"\"\n        Batch insert URLs into the database.\n\n        Args:\n            url_data_tuples: A list of tuples, each containing\n                             (url, url_type_id, created_at_ts, updated_at_ts).\n            dump_output: If True, return the inserted/updated rows.\n\n        Returns:\n            A list of (id, url, url_type_id) tuples if dump_output is True, else None.\n        \"\"\"\n        if not url_data_tuples:\n            return [] if dump_output else None\n\n        query_base = \"\"\"\n            INSERT INTO urls (url, url_type_id, created_at, updated_at)\n            VALUES %s\n            ON CONFLICT (url_type_id, url) DO UPDATE SET updated_at = EXCLUDED.updated_at\n        \"\"\"\n        if dump_output:\n            query = query_base + \" RETURNING id, url, url_type_id\"\n        else:\n            query = query_base\n\n        try:\n            psycopg2.extras.execute_values(\n                self.cursor, query, url_data_tuples, page_size=len(url_data_tuples)\n            )\n            self.conn.commit()\n            self.logger.log(\n                f\"Successfully inserted/updated {len(url_data_tuples)} URL records.\"\n            )\n            if dump_output:\n                return self.cursor.fetchall()\n            return None\n        except psycopg2.Error as e:\n            self.logger.error(f\"Error during batch insert: {e}\")\n            self.logger.log(url_data_tuples)\n            self.conn.rollback()\n            raise e\n        except Exception as e:\n            self.logger.error(f\"An unexpected error occurred during batch insert: {e}\")\n            self.conn.rollback()\n            raise e\n\n    def close(self):\n        \"\"\"Close the database connection.\"\"\"\n        if self.cursor:\n            self.cursor.close()\n        if self.conn:\n            self.conn.close()\n        self.logger.log(\"CHAI database connection closed\")\n\n\ndef process_urls_for_batch_insert(\n    file_path: str,\n    batch_size: int,\n    script_execution_time: datetime,\n    dump_output: bool,\n    stop_at: int | None = None,\n) -> None:\n    \"\"\"\n    Reads URLs from a CSV file, prepares them, and batch inserts them into the database.\n\n    Args:\n        file_path: Path to the input CSV file.\n        batch_size: Number of records to insert per batch.\n        script_execution_time: Timestamp for created_at/updated_at.\n        dump_output: Whether to dump inserted data to a CSV file.\n        stop_at: Optional number of CSV rows to process.\n    \"\"\"\n    logger = Logger(\"url_batch_processor\")\n    logger.log(f\"Starting URL batch processing for file: {file_path}\")\n    logger.log(\n        f\"Batch size: {batch_size}, Dump output: {dump_output}, Stop at: {stop_at}\"\n    )\n    cache: set[tuple[str, uuid.UUID]] = set()\n\n    try:\n        config = Config(PackageManager.NPM)\n        url_type_homepage_id = config.url_types.homepage\n        url_type_source_id = config.url_types.source\n    except AttributeError as e:\n        logger.error(\n            f\"Could not load URL types from config. Ensure DB contains these types: {e}\"\n        )\n        return\n    except Exception as e:\n        logger.error(f\"Error initializing config: {e}\")\n        return\n\n    chai_db = None\n    try:\n        chai_db = ChaiDB()\n    except Exception as e:\n        logger.error(f\"Failed to initialize ChaiDB: {e}\")\n        return  # Exit if DB connection fails\n\n    url_data_to_insert: list[tuple[str, uuid.UUID, datetime, datetime]] = []\n    all_inserted_data_for_dump: list[tuple[uuid.UUID, str, uuid.UUID]] = []\n    processed_csv_rows = 0\n    total_urls_prepared = 0\n\n    try:\n        with open(file_path, newline=\"\", encoding=\"utf-8\") as csvfile:\n            reader = csv.reader(csvfile)\n            header = next(reader, None)  # Skip header\n            if not header:\n                logger.warn(\"CSV file is empty or has no header.\")\n                return\n\n            logger.log(f\"CSV Header: {header}\")  # Log the header for context\n\n            for row in reader:\n                processed_csv_rows += 1\n                if not (len(row) >= 3):\n                    logger.warn(f\">3 cols at L{processed_csv_rows + 1}: {row}\")\n                    continue\n\n                # Assuming import_id is row[0], source is row[1], homepage is row[2]\n                # set the source data\n                source_url = row[1].strip() if row[1] else None\n                source_data = (source_url, url_type_source_id)\n\n                # set the homepage data\n                homepage_url = row[2].strip() if row[2] else None\n                homepage_data = (homepage_url, url_type_homepage_id)\n\n                # add to url_data_to_insert if valid and not in cache\n                # also, update the cache\n                urls_to_process = []\n                if (\n                    source_url\n                    and source_url.lower() != \"null\"\n                    and source_data not in cache\n                ):\n                    urls_to_process.append(source_data)\n                    cache.add(source_data)\n                if (\n                    homepage_url\n                    and homepage_url.lower() != \"null\"\n                    and homepage_data not in cache\n                ):\n                    urls_to_process.append(homepage_data)\n                    cache.add(homepage_data)\n\n                for url_str, url_type_id in urls_to_process:\n                    url_data_to_insert.append(\n                        (\n                            url_str,\n                            url_type_id,\n                            script_execution_time,\n                            script_execution_time,\n                        )\n                    )\n                    total_urls_prepared += 1\n\n                # insert the data in batches\n                if len(url_data_to_insert) >= batch_size:\n                    results = chai_db.batch_insert_urls(url_data_to_insert, dump_output)\n                    if dump_output and results:\n                        all_inserted_data_for_dump.extend(results)\n                    url_data_to_insert = []\n                    logger.log(\n                        f\"Processed batch. Total CSV rows read: {processed_csv_rows}, Total URLs prepared: {total_urls_prepared}\"\n                    )\n\n                if stop_at and processed_csv_rows >= stop_at:\n                    logger.log(f\"Reached stop limit of {stop_at} CSV rows.\")\n                    break\n\n        # Process any remaining URLs in the buffer\n        if url_data_to_insert:\n            results = chai_db.batch_insert_urls(url_data_to_insert, dump_output)\n            if dump_output and results:\n                all_inserted_data_for_dump.extend(results)\n            logger.log(\n                f\"Processed final batch. Total CSV rows read: {processed_csv_rows}, Total URLs prepared: {total_urls_prepared}\"\n            )\n\n        if dump_output:\n            with open(\n                OUTPUT_CSV_FILENAME, \"w\", newline=\"\", encoding=\"utf-8\"\n            ) as outfile:\n                writer = csv.writer(outfile)\n                writer.writerow([\"id\", \"url\", \"url_type_id\"])  # Header for output CSV\n                writer.writerows(all_inserted_data_for_dump)\n            logger.log(\n                f\"Dumped {len(all_inserted_data_for_dump)} records to {OUTPUT_CSV_FILENAME}\"\n            )\n\n        logger.log(\n            f\"URL batch processing complete. Total CSV rows processed: {processed_csv_rows}. Total URLs prepared/processed: {total_urls_prepared}.\"\n        )\n\n    except FileNotFoundError:\n        logger.error(f\"Input CSV file not found: {file_path}\")\n    except csv.Error as e:\n        logger.error(\n            f\"CSV reading error in {file_path} near line {reader.line_num}: {e}\"\n        )\n    except psycopg2.Error as e:\n        logger.error(f\"A database error occurred: {e}\")\n        logger.exception()\n    except Exception as e:\n        logger.error(f\"An unexpected error occurred: {e}\")\n        logger.exception()\n    finally:\n        if chai_db:\n            chai_db.close()\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser(\n        description=\"Batch insert URLs from a CSV file into the CHAI database.\"\n    )\n    parser.add_argument(\"file_path\", help=\"Path to the input CSV file (e.g., data.csv)\")\n    parser.add_argument(\n        \"--batch-size\",\n        \"-b\",\n        type=int,\n        default=DEFAULT_BATCH_SIZE,\n        help=f\"Number of records to insert per batch (default: {DEFAULT_BATCH_SIZE})\",\n    )\n    parser.add_argument(\n        \"--stop\",\n        \"-s\",\n        type=int,\n        help=\"Optional: stop processing after this many CSV rows.\",\n    )\n    parser.add_argument(\n        \"--dump-output\",\n        \"-d\",\n        action=\"store_true\",\n        help=f\"If set, dump all inserted/updated (id, url, url_type_id) to {OUTPUT_CSV_FILENAME}\",\n    )\n\n    args = parser.parse_args()\n\n    script_start_time = datetime.now()\n    main_logger = Logger(\"main_batch_url_loader\")\n    main_logger.log(f\"Script started at {script_start_time.isoformat()}\")\n\n    process_urls_for_batch_insert(\n        file_path=args.file_path,\n        batch_size=args.batch_size,\n        script_execution_time=script_start_time,  # Use a consistent time for the whole run\n        dump_output=args.dump_output,\n        stop_at=args.stop,\n    )\n\n    main_logger.log(\n        f\"Script finished. Total execution time: {datetime.now() - script_start_time}\"\n    )\n"
  },
  {
    "path": "scripts/chai-legacy-loader/copy_dependencies_no_thread.py",
    "content": "#!/usr/bin/env pkgx +python@3.11 uv run\nimport argparse\nimport io\nimport os\nimport uuid\n\nimport psycopg2\nimport psycopg2.errors\n\nfrom core.config import Config, PackageManager\nfrom core.logger import Logger\n\nLEGACY_CHAI_DATABASE_URL = os.environ.get(\"LEGACY_CHAI_DATABASE_URL\")\nCHAI_DATABASE_URL = os.environ.get(\"CHAI_DATABASE_URL\")\nBATCH_SIZE = 20000\nLEGACY_CHAI_PACKAGE_MANAGER_MAP: dict[PackageManager, str] = {\n    PackageManager.NPM: \"npm\",\n    PackageManager.CRATES: \"crates\",\n    PackageManager.HOMEBREW: \"brew\",\n    PackageManager.DEBIAN: \"apt\",\n    PackageManager.PKGX: \"pkgx\",\n}\n\n\nclass LegacyDB:\n    \"\"\"Handles all interactions with the legacy CHAI database.\"\"\"\n\n    def __init__(self, input_package_manager: PackageManager):\n        \"\"\"Initialize connection to the legacy database.\"\"\"\n        self.conn = psycopg2.connect(LEGACY_CHAI_DATABASE_URL)\n        # Set autocommit to False for server-side cursors\n        self.conn.set_session(autocommit=False)\n        self.logger = Logger(\"legacy_db\")\n        self.logger.debug(\"Legacy database connection established\")\n        self.package_manager_name = LEGACY_CHAI_PACKAGE_MANAGER_MAP[\n            input_package_manager\n        ]\n\n    def __del__(self):\n        \"\"\"Close connection when object is destroyed.\"\"\"\n        if hasattr(self, \"conn\") and self.conn:\n            self.conn.close()\n\n    def get_sql_content(self, filename: str) -> str:\n        \"\"\"Load SQL content from a file.\"\"\"\n        sql_file_path = os.path.join(\n            os.path.dirname(os.path.abspath(__file__)), \"sql\", filename\n        )\n        with open(sql_file_path) as f:\n            return f.read()\n\n    def create_server_cursor(self, sql_file: str, cursor_name: str) -> None:\n        \"\"\"Create a server-side cursor for efficient data fetching.\n\n        Inputs:\n            sql_file: The name of the SQL file to load\n            cursor_name: The name of the cursor to create\n            package_manager_name: The name of the package manager whose legacy data we\n                are fetching\n        \"\"\"\n        query = self.get_sql_content(sql_file)\n\n        # substitute $1 with self.package_manager_name\n        query = query.replace(\"$1\", f\"'{self.package_manager_name}'\")\n        self.logger.debug(f\"Query: {query}\")\n\n        # create a named server side cursor for retrieving data\n        declare_stmt = f\"DECLARE {cursor_name} CURSOR FOR {query}\"\n\n        # create a cursor to execute the declare statement\n        with self.conn.cursor() as cursor:\n            cursor.execute(declare_stmt)\n            self.logger.debug(\n                f\"Created server-side cursor '{cursor_name}' for {sql_file}\"\n            )\n\n    def fetch_batch(self, cursor_name: str, batch_size: int) -> list[tuple]:\n        \"\"\"Fetch a batch of records using the server-side cursor.\"\"\"\n        cursor = self.conn.cursor()\n        cursor.execute(f\"FETCH {batch_size} FROM {cursor_name}\")\n        batch = cursor.fetchall()\n        self.logger.log(f\"Fetched {len(batch)} records from cursor '{cursor_name}'\")\n        cursor.close()\n        return batch\n\n    def close_cursor(self, cursor_name: str) -> None:\n        \"\"\"Close a server-side cursor.\"\"\"\n        cursor = self.conn.cursor()\n        cursor.execute(f\"CLOSE {cursor_name}\")\n        self.logger.log(f\"Closed server-side cursor '{cursor_name}'\")\n        cursor.close()\n\n\nclass ChaiDB:\n    \"\"\"Handles all interactions with the CHAI database.\"\"\"\n\n    def __init__(self, config: Config):\n        \"\"\"Initialize connection to the CHAI database.\"\"\"\n        self.logger = Logger(\"chai_db\")\n        self.config = config\n\n        # connect to the database\n        self.conn = psycopg2.connect(CHAI_DATABASE_URL)\n        # Use autocommit=False for server-side cursors if needed within a transaction\n        # self.conn.set_session(autocommit=False)\n        self.logger.debug(\"CHAI database connection established\")\n\n        # create the cursor for general operations\n        self.cursor = self.conn.cursor()\n        self.logger.debug(\"CHAI database cursor created\")\n\n        # configure some variables\n        self.legacy_dependency_columns = [\n            \"package_id\",\n            \"dependency_id\",\n            # the below two are not available from the sources table in the legacy db\n            # assuming everything is a runtime dependency and use the semver range *\n            \"dependency_type_id\",\n            \"semver_range\",\n        ]\n        # initialize package map\n        self.package_map = self._get_package_map()\n        self.logger.debug(\n            f\"{len(self.package_map)} {self.config.pm_config.package_manager} packages in CHAI\"\n        )\n\n        # Load existing legacy dependencies to avoid duplicates\n        self.processed_pairs = set()\n        self._load_existing_dependencies()\n\n    def _get_package_map(self) -> dict[str, uuid.UUID]:\n        \"\"\"Get a map of package import_ids to their UUIDs for the configured package\n        manager\"\"\"\n        query = \"\"\"SELECT import_id, id \n            FROM packages \n            WHERE package_manager_id = %(pm_id)s\n            AND import_id IS NOT NULL\"\"\"\n        self.cursor.execute(query, {\"pm_id\": self.config.pm_config.pm_id})\n        rows = self.cursor.fetchall()\n\n        # check that we actually loaded packages for the specified manager\n        if len(rows) == 0:\n            raise ValueError(\n                f\"{self.config.pm_config.package_manager} packages not found in DB\"\n            )\n\n        return {row[0]: row[1] for row in rows}\n\n    def _load_existing_dependencies(self, batch_size: int = BATCH_SIZE) -> None:\n        \"\"\"\n        Loads existing (package_id, dependency_id) pairs from the\n        legacy_dependencies table into self.processed_pairs using a\n        server-side cursor to handle potentially large datasets efficiently.\n        \"\"\"\n        self.logger.log(\"Loading existing legacy dependencies...\")\n        query = \"SELECT package_id, dependency_id FROM legacy_dependencies\"\n        cursor_name = \"existing_deps_cursor\"\n        total_loaded = 0\n\n        # Use a transaction context for the server-side cursor\n        with self.conn, self.conn.cursor(name=cursor_name) as named_cursor:\n            named_cursor.execute(query)\n            while True:\n                batch = named_cursor.fetchmany(batch_size)\n                if not batch:\n                    break\n                # Convert batch of tuples to set for efficient update\n                self.processed_pairs.update(batch)\n                total_loaded += len(batch)\n                if total_loaded % (batch_size * 20000) == 0:\n                    self.logger.debug(\n                        f\"Loaded {total_loaded} existing dependency pairs...\"\n                    )\n\n        self.logger.log(\n            f\"Finished loading {total_loaded} existing dependency pairs into memory.\"\n        )\n\n    def init_copy_expert(self) -> None:\n        \"\"\"Initialize a StringIO object to collect CSV data for copy operation\"\"\"\n        self.csv_data = io.StringIO()\n        self.columns_str = \", \".join(self.legacy_dependency_columns)\n        self.logger.debug(\"Copy buffer initialized\")\n\n    def add_rows_to_copy_expert(self, rows: list[tuple]) -> int:\n        \"\"\"Add rows to the StringIO buffer for later COPY operation\"\"\"\n        rows_added = 0\n        for row in rows:\n            package_id = self.package_map.get(row[0])\n            dependency_id = self.package_map.get(row[1])\n\n            # if package or dependency are not found, skip the row\n            if not package_id or not dependency_id:\n                # skipping because maybe the package or dependency is\n                #  not in legacy chai\n                #  marked as spam\n                continue\n\n            # if the pair has already been processed, skip the row\n            if (package_id, dependency_id) in self.processed_pairs:\n                continue\n\n            # add the pair to the processed pairs\n            self.processed_pairs.add((package_id, dependency_id))\n\n            # get the dependency type and semver range\n            # not available from the sources table in the legacy db\n            # assume everything is a runtime dependency, and use the semver range *\n            dependency_type_id = self.config.dependency_types.runtime\n            semver_range = \"*\"\n\n            csv_line = (\n                f\"{package_id},{dependency_id},{dependency_type_id},{semver_range}\"\n            )\n            self.csv_data.write(csv_line + \"\\n\")\n            rows_added += 1\n\n        return rows_added\n\n    def add_rows_with_flush(self, rows: list[tuple], max_buffer_size=100000) -> int:\n        \"\"\"Add rows to the StringIO buffer for later COPY operation\"\"\"\n        rows_added = self.add_rows_to_copy_expert(rows)\n        self.logger.log(f\"Added {rows_added} rows to the copy expert\")\n\n        # if the buffer is too large, flush it\n        if self.csv_data.tell() > max_buffer_size:\n            self.complete_copy_expert()\n            # reinitialize the buffer\n            self.init_copy_expert()\n\n        return rows_added\n\n    def complete_copy_expert(self):\n        \"\"\"Execute the COPY operation with collected data\"\"\"\n        # Reset buffer position to start\n        self.csv_data.seek(0)\n\n        # Execute the COPY FROM operation\n        try:\n            self.cursor.copy_expert(\n                f\"COPY legacy_dependencies ({self.columns_str}) FROM STDIN WITH CSV\",\n                self.csv_data,\n            )\n            self.conn.commit()\n            self.logger.log(f\"{len(self.processed_pairs)} total rows copied\")\n        except psycopg2.errors.BadCopyFileFormat as e:\n            self.logger.log(f\"Error copying data to database: {e}\")\n            # write the csv data to a file\n            with open(\"bad_copy_file.csv\", \"w\") as f:\n                f.write(self.csv_data.getvalue())\n            self.conn.rollback()\n            raise e\n\n\ndef main(\n    logger: Logger,\n    config: Config,\n    input_package_manager: PackageManager,\n    stop: int | None,\n) -> None:\n    legacy_db = LegacyDB(input_package_manager)\n    chai_db = ChaiDB(config)\n\n    # initialize the copy expert\n    chai_db.init_copy_expert()\n\n    # set up the legacy db\n    cursor_name = \"legacy_dependencies_cursor\"\n    legacy_db.create_server_cursor(\"dependencies.sql\", cursor_name)\n\n    logger.log(\"Starting dependency loop process\")\n    total_rows = 0\n    try:\n        while True:\n            rows = legacy_db.fetch_batch(cursor_name, BATCH_SIZE)\n\n            # break if we have no more rows\n            if not rows:\n                break\n\n            # keep adding the rows to the copy expert\n            rows_added = chai_db.add_rows_with_flush(rows)\n\n            # update the total rows processed\n            total_rows += rows_added\n\n            # break if we have processed the stop number of rows\n            if stop and total_rows >= stop:\n                break\n\n        # complete the copy expert\n        logger.log(\"Completing copy expert for the last batch\")\n        chai_db.complete_copy_expert()\n\n    except KeyboardInterrupt:\n        logger.log(\"Keyboard interrupt detected\")\n        chai_db.complete_copy_expert()\n        logger.log(f\"Total rows processed: {total_rows}\")\n\n    finally:\n        logger.log(f\"Total rows processed: {total_rows}\")\n        legacy_db.close_cursor(cursor_name)\n        legacy_db.conn.close()\n        chai_db.cursor.close()\n        chai_db.conn.close()\n        logger.log(\"Database connections closed\")\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\n        \"--package-manager\",\n        type=PackageManager,\n        choices=list(PackageManager),\n        required=True,\n    )\n    parser.add_argument(\n        \"--stop\",\n        type=int,\n        default=None,\n        help=\"Stop after processing a certain number of rows\",\n    )\n    args = parser.parse_args()\n\n    input_package_manager: PackageManager = args.package_manager\n    stop: int | None = args.stop\n    logger = Logger(\"chai_legacy_loader\")\n    config = Config(input_package_manager)\n\n    logger.log(f\"Importing legacy dependencies for {args.package_manager}\")\n    main(\n        logger,\n        config,\n        input_package_manager,\n        stop,\n    )\n"
  },
  {
    "path": "scripts/chai-legacy-loader/pkgx.yaml",
    "content": "dependencies:\n  - python@3.11\n  - postgresql.org@16\n  - astral.sh/uv\n"
  },
  {
    "path": "scripts/chai-legacy-loader/sql/dependencies.sql",
    "content": "-- from old CHAI's structure, the sources table stores dependencies from package to \n-- package\n-- the projects tables stores the package managers themselves, which is where we apply\n-- the where clause\nselect s.start_id, s.end_id \nfrom public.sources s \njoin public.projects p \non s.start_id = p.id \nand $1 = any(p.package_managers)\n;"
  },
  {
    "path": "scripts/chai-legacy-loader/sql/packages.sql",
    "content": "-- TODO: swap npm for $1, and update the scripts\nselect \n\tconcat('npm', '/', project_name) as \"derived_id\",\n\tproject_name as \"name\", \n\tid as \"import_id\"   \nfrom projects \nwhere \n\t'npm' = any(package_managers)\n\tand created_at < '2024-01-01'::timestamp -- before ITN\n\tand is_spam is false -- use legacy spam filter\n;"
  },
  {
    "path": "scripts/chai-legacy-loader/sql/urls.sql",
    "content": "select \n\tid as import_id,\n\t\"source\", \n\thomepage \nfrom projects\nwhere \n\t'npm' = any(package_managers)\n\tand created_at < '2024-01-01'::timestamp -- before ITN\n\tand is_spam is false -- use legacy spam filter"
  },
  {
    "path": "scripts/npm-singleton/README.md",
    "content": "# NPM Singleton Package Loader\n\nA utility script for loading a single NPM package and its metadata into the CHAI database.\n\n## Purpose\n\nThis script allows you to:\n\n1. Check if an NPM package exists in the CHAI database\n2. Fetch package metadata from the NPM registry\n3. Verify package URLs (homepage, repository, source)\n4. Check dependencies and their existence in CHAI\n5. Add the package to the CHAI database\n\n## Usage\n\n1. You must either run this script from the project root directory or specify\n   `PYTHONPATH` to point to the root directory, since it imports modules from the `core` library.\n2. You must also specify a `CHAI_DATABASE_URL` string\n\n### Method 1: Using pkgx (recommended)\n\n```bash\n# Make the script executable\nchmod +x scripts/npm-singleton/single.py\n\n# Run with PYTHONPATH set\nPYTHONPATH=. scripts/npm-singleton/single.py <package_name> [--dry-run]\n```\n\n### Method 2: Using [uv](https://github.com/astral-sh/uv) directly\n\n```bash\nPYTHONPATH=. uv run scripts/npm-singleton/single.py <package_name> [--dry-run]\n```\n\n## Arguments\n\n- `package_name`: Name of the NPM package to load (required)\n- `--dry-run`: Run in read-only mode without committing to the database\n\n> [!NOTE]\n> Strongly recommend running with the `--dry-run` flag first, to see what changes\n> you're about to implement. The output looks like:\n\n    ```bash\n    ---------------------------------------------\n    Package: @types/jest\n    ---------------------------------------------\n    ✅ @types/jest doesn't exist on CHAI\n    ---------------------------------------------\n    ✅ OK from NPM\n    ---------------------------------------------\n    ✅ has homepage: github.com/DefinitelyTyped/DefinitelyTyped\n    ✅ has repository: github.com/DefinitelyTyped/DefinitelyTyped.git\n    ✅ has source: github.com/DefinitelyTyped/DefinitelyTyped.git\n    ---------------------------------------------\n    Runtime Dependencies:\n    ✅ expect / ^29.0.0 on CHAI\n    ✅ pretty-format / ^29.0.0 on CHAI\n    ---------------------------------------------\n    Dev Dependencies:\n    (none)\n    ---------------------------------------------\n    DRY RUN: Would create the following rows:\n    - 1 Package\n    - 3 URLs\n    - 3 PackageURLs\n    - 2 Runtime Dependencies\n    - 0 Dev Dependencies\n    ---------------------------------------------\n    ℹ️ Dry run: No changes committed to database\n    ```\n\n> If a dependency doesn't exist on CHAI, you can just run the script for that\n> dependency, and then run it for your main package\n\n## Output\n\nThe script provides detailed status information about the package:\n\n```\n---------------------------------------------\nPackage: <package_name>\n---------------------------------------------\n❌ Exiting bc <package_name> exists on CHAI | ✅ <package_name> doesn't exist on CHAI\n---------------------------------------------\n❌ Exiting bc response error from registry | ✅ OK from NPM\n---------------------------------------------\n✅ has homepage: <homepage> | ❌ no homepage\n✅ has repository: <repository> | ❌ no repository\n✅ has source: <source> | ❌ no source\n---------------------------------------------\n✅ <dependency> / <semver> on CHAI | ❌ <dependency> / <semver> not on CHAI\n... for each dependency\n---------------------------------------------\n```\n\nIn dry-run mode, the script will show what changes would be made without committing them to the database.\n\n## Examples\n\nCheck a package without adding it to the database:\n\n```bash\nPYTHONPATH=. ./scripts/npm-singleton/single.py react --dry-run\n```\n\nAdd a package to the database:\n\n```bash\nPYTHONPATH=. ./scripts/npm-singleton/single.py lodash\n```\n\n## Tasks\n\nThe below can be run using [xc](https://xcfile.dev), from within the\nscripts/npm-singleton directory\n\n### check\n\nEnv: PYTHONPATH=../..\nInputs: PACKAGE\n\n```bash\n./single.py $PACKAGE --dry-run\n```\n\n### add\n\nEnv: PYTHONPATH=../..\nInputs: PACKAGE\n\n```bash\n./single.py $PACKAGE\n```\n"
  },
  {
    "path": "scripts/npm-singleton/pkgx.yaml",
    "content": "dependencies:\n  xcfile.dev: \"*\"\n  python.org: ^3.11\n"
  },
  {
    "path": "scripts/npm-singleton/single.py",
    "content": "#!/usr/bin/env pkgx +python@3.11 uv run --with requests==2.31.0 --with permalint==0.1.15\nimport argparse\nimport sys\nfrom uuid import UUID, uuid4\n\nimport requests\nfrom permalint import normalize_url\n\nfrom core.config import Config, PackageManager\nfrom core.db import DB\nfrom core.models import URL, LegacyDependency, Package, PackageURL\n\nNPM_API_URL = \"https://registry.npmjs.org/{name}\"\n\n\nclass ChaiDB(DB):\n    def __init__(self):\n        super().__init__(\"chai-singleton\")\n\n    def check_package_exists(self, derived_id: str) -> bool:\n        with self.session() as session:\n            return (\n                session.query(Package).filter(Package.derived_id == derived_id).first()\n                is not None\n            )\n\n    def get_package_by_derived_id(self, derived_id: str) -> Package:\n        with self.session() as session:\n            return (\n                session.query(Package).filter(Package.derived_id == derived_id).first()\n            )\n\n    def load(\n        self,\n        pkg: Package,\n        urls: list[URL],\n        runtime_deps: list[LegacyDependency],\n        dev_deps: list[LegacyDependency],\n    ) -> None:\n        \"\"\"Load a package and its URLs into the database. Uses the same session to avoid\n        transactional inconsistencies.\n\n        Args:\n            pkg: The package to load.\n            urls: The URLs to load.\n        \"\"\"\n        with self.session() as session:\n            # Load the package first\n            session.add(pkg)\n            session.flush()  # to create the id\n            pkg_id = pkg.id\n\n            # Load the URLs\n            for url in urls:\n                session.add(url)\n            session.flush()  # to create the id\n            url_ids = [url.id for url in urls]\n\n            # Create the package URL relationships\n            for url_id in url_ids:\n                session.add(PackageURL(package_id=pkg_id, url_id=url_id))\n\n            # Create the legacy dependencies\n            for dep in runtime_deps:\n                session.add(dep)\n            for dep in dev_deps:\n                session.add(dep)\n            session.commit()\n\n\ndef get_package_info(npm_package: str) -> tuple[bool, dict, str | None]:\n    url = NPM_API_URL.format(name=npm_package)\n    try:\n        response = requests.get(url)\n        if response.status_code != 200:\n            return (\n                False,\n                {},\n                f\"Failed with status {response.status_code}: {response.text}\",\n            )\n        return True, response.json(), None\n    except Exception as e:\n        return False, {}, f\"Request failed: {e!s}\"\n\n\ndef get_homepage(package_info: dict) -> tuple[bool, str | None]:\n    try:\n        return True, canonicalize(package_info[\"homepage\"])\n    except KeyError:\n        return False, None\n    except Exception as e:\n        return False, str(e)\n\n\ndef get_repository_url(package_info: dict) -> tuple[bool, str | None]:\n    try:\n        return True, canonicalize(package_info[\"repository\"][\"url\"])\n    except KeyError:\n        return False, None\n    except Exception as e:\n        return False, str(e)\n\n\ndef get_source_url(package_info: dict) -> tuple[bool, str | None]:\n    try:\n        repository_obj = package_info[\"repository\"]\n        if repository_obj[\"type\"] == \"git\":\n            return True, canonicalize(repository_obj[\"url\"])\n        else:\n            return False, f\"Repository is not a git URL: {repository_obj}\"\n    except KeyError:\n        return False, None\n    except Exception as e:\n        return False, str(e)\n\n\ndef canonicalize(url: str) -> str:\n    return normalize_url(url)\n\n\ndef get_latest_version(package_info: dict) -> tuple[bool, str | None]:\n    try:\n        dist_tags = package_info[\"dist-tags\"]\n        return True, dist_tags[\"latest\"]\n    except KeyError:\n        return False, None\n\n\ndef get_version_info(package_info: dict, version: str) -> tuple[bool, dict | None]:\n    try:\n        return True, package_info[\"versions\"][version]\n    except KeyError:\n        return False, None\n\n\ndef get_latest_version_dependencies(\n    latest_version: dict,\n) -> tuple[bool, dict[str, str]]:\n    \"\"\"Gets the dependencies from a version object from NPM's Registry API\n\n    Returns:\n      - a tuple of (success, dependencies) where dependencies is a dictionary\n        keyed by dependency, with semver range as the value\n    \"\"\"\n    try:\n        deps = latest_version.get(\"dependencies\", {})\n        return True, deps\n    except Exception:\n        return False, {}\n\n\ndef get_latest_version_dev_dependencies(\n    latest_version: dict,\n) -> tuple[bool, dict[str, str]]:\n    \"\"\"Gets the development dependencies from a version object from NPM's Registry API\n\n    Returns:\n      - a tuple of (success, dependencies) where dependencies is a dictionary\n        keyed by dependency, with semver range as the value\n    \"\"\"\n    try:\n        deps = latest_version.get(\"devDependencies\", {})\n        return True, deps\n    except Exception:\n        return False, {}\n\n\ndef check_dependencies_on_chai(\n    db: ChaiDB, deps: dict[str, str]\n) -> list[tuple[str, str, bool]]:\n    \"\"\"Check if dependencies exist on CHAI\n\n    Args:\n        db: ChaiDB instance\n        deps: Dependencies to check\n\n    Returns:\n        List of tuples (dependency_name, semver_range, exists_on_chai)\n    \"\"\"\n    results = []\n    for dep_name, dep_range in deps.items():\n        derived_id = f\"npm/{dep_name}\"\n        exists = db.get_package_by_derived_id(derived_id) is not None\n        results.append((dep_name, dep_range, exists))\n\n    return results\n\n\ndef generate_url(url_type_id: UUID, url: str) -> URL:\n    return URL(id=uuid4(), url=url, url_type_id=url_type_id)\n\n\ndef generate_legacy_dependencies(\n    db: ChaiDB, pkg: Package, deps: dict[str, str], dependency_type_id: UUID\n) -> tuple[list[LegacyDependency], list[tuple[str, str, bool]]]:\n    legacy_deps: list[LegacyDependency] = []\n    dep_status: list[tuple[str, str, bool]] = []\n\n    for dep_name, dep_range in deps.items():\n        derived_id = f\"npm/{dep_name}\"\n        chai_dep: Package | None = db.get_package_by_derived_id(derived_id)\n        exists = chai_dep is not None\n        dep_status.append((dep_name, dep_range, exists))\n\n        if not exists:\n            continue\n\n        dependency = LegacyDependency(\n            package_id=pkg.id,\n            dependency_id=chai_dep.id,\n            dependency_type_id=dependency_type_id,\n            semver_range=dep_range,\n        )\n        legacy_deps.append(dependency)\n\n    return legacy_deps, dep_status\n\n\ndef print_status_report(\n    package_name: str,\n    exists_on_chai: bool,\n    npm_response_ok: bool,\n    npm_error: str | None,\n    homepage_result: tuple[bool, str | None],\n    repository_result: tuple[bool, str | None],\n    source_result: tuple[bool, str | None],\n    runtime_deps: list[tuple[str, str, bool]],\n    dev_deps: list[tuple[str, str, bool]],\n    changes_summary: dict[str, int] | None = None,\n    dry_run: bool = False,\n):\n    \"\"\"Print a formatted status report of the package processing\"\"\"\n    divider = \"-\" * 45\n\n    print(divider)\n    print(f\"Package: {package_name}\")\n    print(divider)\n\n    if exists_on_chai:\n        print(f\"❌ Exiting bc {package_name} exists on CHAI\")\n    else:\n        print(f\"✅ {package_name} doesn't exist on CHAI\")\n\n    print(divider)\n\n    if npm_response_ok:\n        print(\"✅ OK from NPM\")\n    else:\n        print(f\"❌ Exiting bc response error from registry: {npm_error}\")\n\n    print(divider)\n\n    homepage_ok, homepage = homepage_result\n    if homepage_ok:\n        print(f\"✅ has homepage: {homepage}\")\n    else:\n        print(\"❌ no homepage\")\n\n    repository_ok, repository = repository_result\n    if repository_ok:\n        print(f\"✅ has repository: {repository}\")\n    else:\n        print(\"❌ no repository\")\n\n    source_ok, source = source_result\n    if source_ok:\n        print(f\"✅ has source: {source}\")\n    else:\n        print(\"❌ no source\")\n\n    print(divider)\n    print(\"Runtime Dependencies:\")\n    if not runtime_deps:\n        print(\"(none)\")\n    else:\n        for dep, semver, exists in runtime_deps:\n            if exists:\n                print(f\"✅ {dep} / {semver} on CHAI\")\n            else:\n                print(f\"❌ {dep} / {semver} not on CHAI\")\n\n    print(divider)\n    print(\"Dev Dependencies:\")\n    if not dev_deps:\n        print(\"(none)\")\n    else:\n        for dep, semver, exists in dev_deps:\n            if exists:\n                print(f\"✅ {dep} / {semver} on CHAI\")\n            else:\n                print(f\"❌ {dep} / {semver} not on CHAI\")\n\n    print(divider)\n\n    if changes_summary:\n        if dry_run:\n            print(\"DRY RUN: Would create the following rows:\")\n        else:\n            print(\"Created the following rows:\")\n\n        for entity_type, count in changes_summary.items():\n            print(f\"  - {count} {entity_type}\")\n    else:\n        print(\"Won't even create any rows\")\n\n    print(divider)\n\n\ndef process_package(package_name: str, dry_run: bool = False) -> bool:\n    \"\"\"Process a package and return True if successful, False otherwise\"\"\"\n    config = Config(PackageManager.NPM)\n    chai_db = ChaiDB()\n\n    # Check if package exists\n    derived_id = f\"npm/{package_name}\"\n    exists_on_chai = chai_db.check_package_exists(derived_id)\n\n    # Get Package Info from NPM\n    npm_response_ok, package_info, npm_error = get_package_info(package_name)\n\n    # Check URLs\n    homepage_result = get_homepage(package_info) if npm_response_ok else (False, None)\n    repository_result = (\n        get_repository_url(package_info) if npm_response_ok else (False, None)\n    )\n    source_result = get_source_url(package_info) if npm_response_ok else (False, None)\n\n    # Check latest version\n    latest_version_result = (\n        get_latest_version(package_info) if npm_response_ok else (False, None)\n    )\n\n    # Get version info\n    version_info_result = (False, None)\n    if npm_response_ok and latest_version_result[0]:\n        version_info_result = get_version_info(package_info, latest_version_result[1])\n\n    # Get dependencies\n    runtime_deps_result = (False, {})\n    dev_deps_result = (False, {})\n    if npm_response_ok and version_info_result[0]:\n        runtime_deps_result = get_latest_version_dependencies(version_info_result[1])\n        dev_deps_result = get_latest_version_dev_dependencies(version_info_result[1])\n\n    # Check dependencies on CHAI\n    runtime_deps_status = check_dependencies_on_chai(chai_db, runtime_deps_result[1])\n    dev_deps_status = check_dependencies_on_chai(chai_db, dev_deps_result[1])\n\n    # Create entities to add to database if not in dry run mode and all checks pass\n    changes_summary = {\n        \"Package\": 1,\n        \"URLs\": 0,\n        \"PackageURLs\": 0,\n        \"Runtime Dependencies\": 0,\n        \"Dev Dependencies\": 0,\n    }\n\n    # Early exit if necessary conditions aren't met\n    if exists_on_chai or not npm_response_ok:\n        print_status_report(\n            package_name,\n            exists_on_chai,\n            npm_response_ok,\n            npm_error,\n            homepage_result,\n            repository_result,\n            source_result,\n            runtime_deps_status,\n            dev_deps_status,\n            None,\n            dry_run,\n        )\n        return False\n\n    # Create Package\n    derived_id = f\"npm/{package_name}\"\n    package_manager_id = config.pm_config.pm_id\n    import_id = f\"npm-singleton/{package_name}\"\n    readme = package_info.get(\"readme\", \"\")\n\n    pkg = Package(\n        id=uuid4(),\n        name=package_name,\n        derived_id=derived_id,\n        package_manager_id=package_manager_id,\n        import_id=import_id,\n        readme=readme,\n    )\n\n    # URLs\n    urls = []\n    if homepage_result[0]:\n        urls.append(\n            generate_url(config.url_types.homepage, normalize_url(homepage_result[1]))\n        )\n    if repository_result[0]:\n        urls.append(\n            generate_url(\n                config.url_types.repository, normalize_url(repository_result[1])\n            )\n        )\n    if source_result[0]:\n        urls.append(\n            generate_url(config.url_types.source, normalize_url(source_result[1]))\n        )\n\n    changes_summary[\"URLs\"] = len(urls)\n    changes_summary[\"PackageURLs\"] = len(urls)\n\n    # Dependencies\n    runtime_deps, _ = generate_legacy_dependencies(\n        chai_db, pkg, runtime_deps_result[1], config.dependency_types.runtime\n    )\n    dev_deps, _ = generate_legacy_dependencies(\n        chai_db, pkg, dev_deps_result[1], config.dependency_types.development\n    )\n\n    changes_summary[\"Runtime Dependencies\"] = len(runtime_deps)\n    changes_summary[\"Dev Dependencies\"] = len(dev_deps)\n\n    # Print status report\n    print_status_report(\n        package_name,\n        exists_on_chai,\n        npm_response_ok,\n        npm_error,\n        homepage_result,\n        repository_result,\n        source_result,\n        runtime_deps_status,\n        dev_deps_status,\n        changes_summary,\n        dry_run,\n    )\n\n    # Load the package into the database (unless in dry run mode)\n    if not dry_run:\n        chai_db.load(pkg, urls, runtime_deps, dev_deps)\n        print(\"✅ Successfully committed changes to database\")\n    else:\n        print(\"🌵 Dry run: No changes committed to database\")\n\n    return True\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser(\n        description=\"Load a single NPM package by name into CHAI\"\n    )\n    parser.add_argument(\"name\", help=\"Name of the NPM package\")\n    parser.add_argument(\n        \"--dry-run\",\n        action=\"store_true\",\n        help=\"Check package without committing to database\",\n    )\n    args = parser.parse_args()\n\n    success = process_package(args.name, args.dry_run)\n    if not success:\n        sys.exit(1)\n"
  },
  {
    "path": "scripts/package_to_package/package_dependencies.py",
    "content": "#! /usr/bin/env pkgx +python@3.11 uv run\nimport argparse\nimport re\nimport sys\nfrom typing import Any\n\nfrom packaging import version as packaging_version\nfrom sqlalchemy.dialects.postgresql import insert as pg_insert\nfrom sqlalchemy.exc import IntegrityError\nfrom sqlalchemy.orm import Session, sessionmaker\n\nfrom core.config import Config, PackageManager\nfrom core.db import DB\nfrom core.logger import Logger\nfrom core.models import DependsOn, LegacyDependency, Package, Version\n\n# --- Constants ---\nINSERT_BATCH_SIZE = 5000\nDEFAULT_SEMVER_RANGE = \"*\"\n\nlogger = Logger(\"package_dependency_migration\")\n\n# --- Helper Functions ---\n\n\ndef preprocess_version_string(version_str: str) -> str:\n    \"\"\"\n    Transforms known non-PEP440 version strings into a parseable format.\n    Handles specific date formats, build tags, and common non-standard separators.\n    \"\"\"\n    # Replace underscores between digits or letters/digits\n    version_str = re.sub(r\"(?<=[a-zA-Z\\d])_(?=[a-zA-Z\\d])\", \".\", version_str)\n\n    # === Pattern Matching & Transformation (Order Matters!) ===\n\n    # --- Specific Patterns First ---\n    # Handle X.Y.Z-M<number> -> X.Y.Z+M<number> (Milestone)\n    match_milestone = re.fullmatch(r\"(\\d+(\\.\\d+)*)-M(\\d+)\", version_str)\n    if match_milestone:\n        return f\"{match_milestone.group(1)}+M{match_milestone.group(3)}\"\n\n    # Handle X.Y.Z-<string>.<number> -> X.Y.Z+<string>.<number> (Vendor Build)\n    match_vendor_build = re.fullmatch(r\"(\\d+(\\.\\d+)+)-([a-zA-Z]+)\\.(\\d+)\", version_str)\n    if match_vendor_build:\n        return f\"{match_vendor_build.group(1)}+{match_vendor_build.group(3)}.{match_vendor_build.group(4)}\"\n\n    # Handle X.Y.Z-git<build> -> X.Y.Z+git<build>\n    match_git_build = re.fullmatch(r\"(\\d+(\\.\\d+)+)-(git[\\da-zA-Z]+)\", version_str)\n    if match_git_build:\n        return f\"{match_git_build.group(1)}+{match_git_build.group(2)}\"\n\n    # Handle X.Y.Z-p<number> / X.Y.Zp<number> -> X.Y.Z+p<number>\n    match_p_patch1 = re.fullmatch(r\"(\\d+(\\.\\d+)+)-p(\\d+)\", version_str)\n    if match_p_patch1:\n        return f\"{match_p_patch1.group(1)}+p{match_p_patch1.group(3)}\"\n    match_p_patch2 = re.fullmatch(r\"(\\d+(\\.\\d+)+)p(\\d+)\", version_str)\n    if match_p_patch2:\n        return f\"{match_p_patch2.group(1)}+p{match_p_patch2.group(3)}\"\n\n    # --- Date Formats ---\n    # YYYY-MM-DD -> YYYY.MM.DD\n    if re.fullmatch(r\"\\d{4}-\\d{2}-\\d{2}\", version_str):\n        return version_str.replace(\"-\", \".\")\n\n    # YYYY.MM.DD.<commit_hash> -> YYYY.MM.DD+commit_hash\n    # TODO: Hashes on the same date are compared lexicographically, which might not\n    # reflect actual order.\n    match_dot_date_hash = re.fullmatch(\n        r\"(\\d{4}\\.\\d{2}\\.\\d{2})\\.([a-zA-Z0-9]+)\", version_str\n    )\n    if match_dot_date_hash:\n        # Ensure the suffix isn't just a standard version number or time-like\n        suffix = match_dot_date_hash.group(2)\n        try:\n            # If packaging can parse \"0.<suffix>\", it's likely not a hash\n            packaging_version.parse(f\"0.{suffix}\")\n            # Also check if it looks like HH.MM.SS\n            if not re.fullmatch(r\"\\d{2}\\.\\d{2}\\.\\d{2}\", suffix):\n                return f\"{match_dot_date_hash.group(1)}+{suffix}\"  # Treat as hash\n        except packaging_version.InvalidVersion:\n            return f\"{match_dot_date_hash.group(1)}+{suffix}\"  # Treat as hash\n        except Exception:\n            return f\"{match_dot_date_hash.group(1)}+{suffix}\"  # Treat as hash\n\n    # YYYYMMDDTHHMMSS -> YYYYMMDD.HHMMSS\n    match_ymdt_compact = re.fullmatch(r\"(\\d{8})T(\\d{6})\", version_str)\n    if match_ymdt_compact:\n        return f\"{match_ymdt_compact.group(1)}.{match_ymdt_compact.group(2)}\"\n\n    # YYYY.MM.DD-HH.MM.SS -> YYYY.MM.DD+HHMMSS\n    match_ymd_time_hyphen = re.fullmatch(\n        r\"(\\d{4}\\.\\d{2}\\.\\d{2})-(\\d{2}\\.\\d{2}\\.\\d{2})\", version_str\n    )\n    if match_ymd_time_hyphen:\n        time_part = match_ymd_time_hyphen.group(2).replace(\".\", \"\")\n        return f\"{match_ymd_time_hyphen.group(1)}+{time_part}\"\n\n    # ISO 8601 subset: YYYY-MM-DDTHH-MM-SSZ -> YYYY.MM.DD+HHMMSSZ\n    match_iso_subset = re.fullmatch(\n        r\"(\\d{4})-(\\d{2})-(\\d{2})T(\\d{2})-(\\d{2})-(\\d{2})Z\", version_str\n    )\n    if match_iso_subset:\n        date_part = f\"{match_iso_subset.group(1)}.{match_iso_subset.group(2)}.{match_iso_subset.group(3)}\"\n        time_part = f\"{match_iso_subset.group(4)}{match_iso_subset.group(5)}{match_iso_subset.group(6)}Z\"\n        return f\"{date_part}+{time_part}\"\n\n    # YYYY_MM_DD.commit_hash -> YYYY.MM.DD+commit_hash\n    match_commit_hash = re.fullmatch(\n        r\"(\\d{4}_\\d{2}_\\d{2})\\.([a-zA-Z0-9]+)\", version_str\n    )\n    if match_commit_hash:\n        return f\"{match_commit_hash.group(1)}+{match_commit_hash.group(2)}\"\n\n    # <datestamp>-<string|version> -> <datestamp>+<string|version>\n    match_date_suffix = re.fullmatch(r\"(\\d{8})-?(.*)\", version_str)\n    if match_date_suffix and match_date_suffix.group(2):  # Ensure there is a suffix\n        # Check if suffix looks like a simple version number itself,\n        # otherwise treat as string\n        suffix = match_date_suffix.group(2)\n        # Normalize suffix by removing dots if it looks like a version part\n        # This helps comparison e.g., update1 vs 3.1 -> update1 vs 31\n        normalized_suffix = suffix.replace(\".\", \"\")\n        return f\"{match_date_suffix.group(1)}+{normalized_suffix}\"\n\n    # --- More General Build/Patch Identifiers ---\n    # Handle X.Y.Z.v<build> -> X.Y.Z+v<build>\n    match_v_build = re.fullmatch(r\"(\\d+(\\.\\d+)+)\\.v(.*)\", version_str)\n    if match_v_build:\n        return f\"{match_v_build.group(1)}+v{match_v_build.group(3)}\"\n\n    # Handle X.Yrel.<number> -> X.Y+rel.<number>\n    match_rel_build = re.fullmatch(r\"(\\d+(\\.\\d+)+)rel\\.(.*)\", version_str)\n    if match_rel_build:\n        return f\"{match_rel_build.group(1)}+rel.{match_rel_build.group(3)}\"\n\n    # Handle X.Yga<number> -> X.Y+ga<number>\n    match_ga_build = re.fullmatch(r\"(\\d+(\\.\\d+)+)ga(\\d+)\", version_str)\n    if match_ga_build:\n        return f\"{match_ga_build.group(1)}+ga{match_ga_build.group(3)}\"\n\n    # Handle <major>-<build> (comes after more specific hyphenated patterns)\n    match_major_build = re.fullmatch(r\"(\\d+)-([\\da-zA-Z]+)\", version_str)\n    if match_major_build:\n        return f\"{match_major_build.group(1)}+{match_major_build.group(2)}\"\n\n    # Handle r<number> -> 0+r<number>\n    match_revision = re.fullmatch(r\"r(\\d+)\", version_str)\n    if match_revision:\n        return f\"0+r{match_revision.group(1)}\"\n\n    # Handle X.Y.Z...<letter_suffix> -> X.Y.Z...+suffix (openssl@1.1.1w)\n    match_version_letter_suffix = re.fullmatch(r\"(\\d+(\\.\\d+)+)([a-zA-Z]+)\", version_str)\n    if match_version_letter_suffix:\n        base_version_part = match_version_letter_suffix.group(1)\n        if base_version_part.count(\".\") > 0:  # Ensures at least X.Y.Z format\n            return f\"{match_version_letter_suffix.group(1)}+{match_version_letter_suffix.group(3)}\"\n\n    # Handle X.Y<single_letter_suffix> / X.Y<two_letter_suffix> -> X.Y+suffix\n    match_letter_suffix = re.fullmatch(r\"(\\d+\\.\\d+)([a-zA-Z]{1,2})\", version_str)\n    if match_letter_suffix:\n        return f\"{match_letter_suffix.group(1)}+{match_letter_suffix.group(2)}\"\n\n    # Handle leading 'p' if it looks like p<version>\n    if version_str.startswith(\"p\") and re.match(r\"p\\d\", version_str):\n        potential_version = version_str[1:]\n        try:\n            packaging_version.parse(potential_version)\n            return potential_version\n        except packaging_version.InvalidVersion:\n            pass\n\n    # --- Fallback ---\n    return version_str\n\n\ndef get_latest_version_info(versions: list[Version]) -> Version | None:\n    \"\"\"\n    Identifies the latest version from a list using packaging.version for robust parsing\n    unless there is only one version provided.\n\n    Args:\n        versions: A list of Version objects for a single package.\n\n    Returns:\n        - None if the list is empty, or;\n        - The single Version object if only one is provided, or;\n        - The Version object corresponding to the latest parseable version.\n    \"\"\"\n    # Handle empty list\n    if not versions:\n        return None\n\n    # If there's only one version, return it directly without parsing\n    if len(versions) == 1:\n        return versions[0]\n\n    # Proceed with parsing and comparison if more than one version exists\n    latest_parsed_version = None\n    latest_version_obj = None\n\n    for version_obj in versions:\n        original_version_str = version_obj.version\n        preprocessed_str = preprocess_version_string(original_version_str)\n        try:\n            current_parsed_version = packaging_version.parse(preprocessed_str)\n            if (\n                latest_parsed_version is None\n                or current_parsed_version > latest_parsed_version\n            ):\n                latest_parsed_version = current_parsed_version\n                latest_version_obj = version_obj\n        except packaging_version.InvalidVersion as e_invalid:\n            logger.warn(\n                f\"Invalid version: '{original_version_str}' -> '{preprocessed_str}' -> {e_invalid}\"\n            )\n            continue\n        except Exception as e_general:\n            logger.error(\n                f\"Unexpected error: '{original_version_str}' -> '{preprocessed_str}' -> {e_general}\"\n            )\n            continue\n\n    # If no versions were successfully processed\n    if latest_version_obj is None:\n        import_id = versions[0].import_id\n        versions_str = \", \".join([v.version for v in versions])\n        logger.warn(f\"No versions for {import_id}: {versions_str}\")\n\n    return latest_version_obj\n\n\ndef insert_legacy_dependencies(\n    session: Session, data_batch: list[dict[str, Any]]\n) -> None:\n    \"\"\"\n    Inserts a batch of legacy dependency records into the database,\n    ignoring duplicates based on the (package_id, dependency_id) unique constraint.\n\n    Args:\n        session: The SQLAlchemy session object.\n        data_batch: A list of dictionaries, each representing a LegacyDependency row.\n    \"\"\"\n    if not data_batch:\n        return\n\n    try:\n        # Get the target table object\n        legacy_table = LegacyDependency.__table__\n\n        # Construct the PostgreSQL INSERT...ON CONFLICT DO NOTHING statement\n        stmt = pg_insert(legacy_table).values(data_batch)\n        # Specify the columns involved in the unique constraint\n        # The constraint name 'uq_package_dependency' is defined in the model\n        stmt = stmt.on_conflict_do_nothing(\n            index_elements=[\"package_id\", \"dependency_id\"]\n        )\n\n        # Execute the statement\n        session.execute(stmt)\n        session.commit()\n\n    except IntegrityError as e:\n        logger.error(f\"Database Integrity Error during insert: {e}\")\n        session.rollback()\n        raise e\n    except Exception as e:\n        logger.error(f\"An unexpected error occurred during bulk insert: {e}\")\n        session.rollback()\n        raise e\n\n\ndef process_package_dependencies(config: Config, session: Session) -> None:\n    legacy_deps_to_insert: list[dict[str, Any]] = []\n    total_packages_processed = 0\n    total_dependencies_found = 0\n    default_dependency_type_id = config.dependency_types.runtime\n\n    logger.log(f\"Starting migration for package manager ID: {config.pm_config.pm_id}\")\n\n    # --- Fetch ALL packages for the manager ---\n    logger.log(\"Fetching all packages for the specified manager...\")\n    all_packages: list[Package] = (\n        session.query(Package)\n        .filter(Package.package_manager_id == config.pm_config.pm_id)\n        .all()\n    )\n    logger.log(f\"Fetched {len(all_packages)} packages.\")\n\n    # --- Process all fetched packages ---\n    for pkg in all_packages:\n        total_packages_processed += 1\n\n        # debug\n        if total_packages_processed % 1000 == 0:\n            logger.debug(\n                f\"Processed {total_packages_processed}/{len(all_packages)} packages...\"\n            )\n\n        versions = session.query(Version).filter(Version.package_id == pkg.id).all()\n\n        # skip if no versions\n        if not versions:\n            continue\n\n        # grab the latest version\n        latest_version = get_latest_version_info(versions)\n        if latest_version is None:\n            continue\n\n        # grab the dependencies for the latest version\n        dependencies = (\n            session.query(DependsOn)\n            .filter(DependsOn.version_id == latest_version.id)\n            .all()\n        )\n\n        # construct the load object\n        for dependency in dependencies:\n            dep_data = {\n                \"package_id\": pkg.id,\n                \"dependency_id\": dependency.dependency_id,\n                \"dependency_type_id\": dependency.dependency_type_id\n                or default_dependency_type_id,\n                \"semver_range\": dependency.semver_range or DEFAULT_SEMVER_RANGE,\n            }\n            legacy_deps_to_insert.append(dep_data)\n            total_dependencies_found += 1\n\n        # --- Insert if batch is full ---\n        if len(legacy_deps_to_insert) >= INSERT_BATCH_SIZE:\n            logger.log(f\"Reached insert batch size ({INSERT_BATCH_SIZE}). Inserting...\")\n            insert_legacy_dependencies(session, legacy_deps_to_insert)\n            legacy_deps_to_insert = []\n\n    # --- Final Insert ---\n    if legacy_deps_to_insert:\n        logger.log(\n            f\"Inserting final batch of {len(legacy_deps_to_insert)} dependency records.\"\n        )\n        insert_legacy_dependencies(session, legacy_deps_to_insert)\n\n    logger.log(\"--- Migration Summary ---\")\n    logger.log(f\"Total packages processed: {total_packages_processed}\")\n    logger.log(f\"Total dependencies found: {total_dependencies_found}\")\n    logger.log(\"Migration process completed.\")\n\n\n# --- Main Execution ---\n\nif __name__ == \"__main__\":\n    desc = \"\"\"Migrate version-specific dependencies to package-level dependencies based \n    on the latest version.\"\"\"\n    parser = argparse.ArgumentParser(description=desc)\n    parser.add_argument(\n        \"--package-manager\",\n        type=lambda pm: PackageManager[pm.upper()],\n        choices=list(PackageManager),\n        required=True,\n        help=\"The package manager to process (e.g., NPM, CRATES).\",\n    )\n\n    args = parser.parse_args()\n\n    logger.log(\n        f\"Starting package dependency migration for: {args.package_manager.name}\"\n    )\n\n    SessionLocal = None\n    try:\n        config = Config(args.package_manager)\n        db = DB(\"db_logger\")\n        SessionLocal = sessionmaker(bind=db.engine)\n\n        with SessionLocal() as session:\n            process_package_dependencies(config, session)\n\n    except Exception as e:\n        logger.error(f\"An critical error occurred: {e}\")\n        sys.exit(1)\n    finally:\n        logger.log(\"Script finished.\")\n"
  },
  {
    "path": "scripts/upgrade_canons/.gitignore",
    "content": "*.csv"
  },
  {
    "path": "scripts/upgrade_canons/README.md",
    "content": "# Upgrade Canons Scripts\n\nCollection of scripts for managing canonical URLs and Canon IDs in CHAI database.\n\n## Scripts Overview\n\n| Script                     | Purpose                                                            | Usage                                                         | Sample Output                               |\n| -------------------------- | ------------------------------------------------------------------ | ------------------------------------------------------------- | ------------------------------------------- |\n| `main.py`                  | Creates canonical URL entries for non-standardized homepage URLs   | `./main.py --homepage-id <uuid> [--dry-run]`                  | `✅ Inserted 45678 URLs, 52341 PackageURLs` |\n| `registered_projects.py`   | Updates Canon IDs for registered projects to restore old canon IDs | `cat canon_ids.txt \\| ./registered_projects.py [--dry-run]`   | `✅ Success: 150`<br>`❌ Failure: 25`       |\n| `create_deleted_canons.py` | Creates canons for registered projects that were deleted           | `./create_deleted_canons.py --csv-file input.csv [--dry-run]` | `✅ Success: 75`<br>`❌ Failure: 12`        |\n\n## Requirements\n\n- pkgx (or uv)\n- CHAI_DATABASE_URL environment variable\n- Python dependencies: `psycopg2==2.9.10`, `permalint==0.1.14`\n\n## Common Options\n\n- `--dry-run`: Show what would be done without making changes\n- Input failures are written to CSV files for review\n\n## Database Schema Dependencies\n\nScripts interact with these tables:\n\n- `urls`, `url_types`, `package_urls`\n- `canons`, `canon_packages`, `canon_packages_old`\n- `tea_ranks`, `packages`\n"
  },
  {
    "path": "scripts/upgrade_canons/create_deleted_canons.py",
    "content": "#!/usr/bin/env pkgx uv run\n\nimport argparse\nimport csv\nimport sys\nfrom uuid import UUID\n\nfrom scripts.upgrade_canons.db import DB\n\n\ndef read_package_data_from_csv(filename: str) -> list[tuple[str, UUID]]:\n    \"\"\"Read package names and canon IDs from CSV file and return as list of tuples.\"\"\"\n    package_data = []\n    try:\n        with open(filename, newline=\"\") as csvfile:\n            reader = csv.DictReader(csvfile)\n            for row in reader:\n                try:\n                    package_name = row[\"package_name\"]\n                    canon_id = UUID(row[\"canon_id\"])\n                    package_data.append((package_name, canon_id))\n                except (ValueError, KeyError) as e:\n                    print(\n                        f\"Warning: Invalid row in CSV '{row}': {e}\",\n                        file=sys.stderr,\n                    )\n    except FileNotFoundError:\n        print(f\"Error: File '{filename}' not found\", file=sys.stderr)\n        sys.exit(1)\n    return package_data\n\n\ndef process_deleted_package(\n    db: DB, package_name: str, target_canon_id: UUID, dry_run: bool\n) -> tuple[bool, str]:\n    \"\"\"\n    Process a single package name for deleted registered projects.\n    Returns (success, reason) tuple.\n    \"\"\"\n    # Step 1: Prepend 'npm/' to the name to create derived_id\n    derived_id = f\"npm/{package_name}\"\n\n    # Step 2: Search by derived_id to get the package_id\n    db.cursor.execute(\n        \"\"\"\n        SELECT id \n        FROM packages \n        WHERE derived_id = %s\n    \"\"\",\n        (derived_id,),\n    )\n\n    package_result = db.cursor.fetchone()\n    if not package_result:\n        return False, \"could not find derived_id\"\n\n    package_id = package_result[0]\n\n    # Step 3: Join to canon_packages to retrieve the current canon_id\n    db.cursor.execute(\n        \"\"\"\n        SELECT canon_id \n        FROM canon_packages \n        WHERE package_id = %s\n    \"\"\",\n        (package_id,),\n    )\n\n    current_result = db.cursor.fetchone()\n    if not current_result:\n        return False, \"could not find current canon_id\"\n\n    current_canon_id = current_result[0]\n\n    if dry_run:\n        print(\n            f\"DRY RUN: Would update canon_id {current_canon_id} to {target_canon_id} for package {derived_id} (package_id: {package_id})\"\n        )\n        return True, \"\"\n\n    try:\n        # Run the three update statements using target_canon_id from CSV\n        # 1. Update canons table\n        db.cursor.execute(\n            \"\"\"\n            UPDATE canons\n            SET id = %s\n            WHERE id = %s\n        \"\"\",\n            (target_canon_id, current_canon_id),\n        )\n\n        # 2. Update canon_packages table\n        db.cursor.execute(\n            \"\"\"\n            UPDATE canon_packages\n            SET canon_id = %s\n            WHERE canon_id = %s\n        \"\"\",\n            (target_canon_id, current_canon_id),\n        )\n\n        # 3. Update tea_ranks table\n        db.cursor.execute(\n            \"\"\"\n            UPDATE tea_ranks\n            SET canon_id = %s\n            WHERE canon_id = %s\n        \"\"\",\n            (target_canon_id, current_canon_id),\n        )\n\n        return True, \"\"\n\n    except Exception as e:\n        print(\n            f\"Error updating canon_id for package {package_name}: {e}\", file=sys.stderr\n        )\n        return False, f\"database error: {e!s}\"\n\n\ndef write_failures_csv(\n    failures: list[tuple[str, str]], filename: str = \"deleted_canons_failures.csv\"\n):\n    \"\"\"Write failures to a CSV file.\"\"\"\n    with open(filename, \"w\", newline=\"\") as csvfile:\n        writer = csv.writer(csvfile)\n        writer.writerow([\"package_name\", \"reason\"])\n        for package_name, reason in failures:\n            writer.writerow([package_name, reason])\n\n\ndef main():\n    parser = argparse.ArgumentParser(\n        description=\"Create canons for registered projects that were deleted\"\n    )\n    parser.add_argument(\n        \"--dry-run\",\n        action=\"store_true\",\n        help=\"Show what would be done without making changes\",\n    )\n    parser.add_argument(\n        \"--csv-file\",\n        type=str,\n        required=True,\n        help=\"CSV file containing package_name and canon_id columns\",\n    )\n    args = parser.parse_args()\n\n    # Read package data from CSV\n    package_data = read_package_data_from_csv(args.csv_file)\n\n    if not package_data:\n        print(\"No package data provided\", file=sys.stderr)\n        sys.exit(1)\n\n    print(\n        f\"Processing {len(package_data)} package records for deleted registered projects...\"\n    )\n\n    # Initialize database connection\n    db = DB()\n\n    success_count = 0\n    failure_count = 0\n    failures = []\n\n    try:\n        for package_name, target_canon_id in package_data:\n            success, reason = process_deleted_package(\n                db, package_name, target_canon_id, args.dry_run\n            )\n\n            if success:\n                success_count += 1\n            else:\n                failure_count += 1\n                failures.append((package_name, reason))\n                print(f\"Warning: Failed to process package {package_name}: {reason}\")\n\n        # Commit changes if not dry run\n        if not args.dry_run and success_count > 0:\n            db.conn.commit()\n            print(\"Database changes committed.\")\n\n        # Write failures to CSV if any\n        if failures:\n            write_failures_csv(failures)\n            print(\"Failures written to deleted_canons_failures.csv\")\n\n    finally:\n        db.close()\n\n    # Print final summary\n    print(\"--------------------------------------------------\")\n    print(f\"✅ Success: {success_count}\")\n    print(f\"❌ Failure: {failure_count}\")\n    print(\"--------------------------------------------------\")\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "scripts/upgrade_canons/db.py",
    "content": "from collections import defaultdict\nfrom os import getenv\nfrom uuid import UUID\n\nimport psycopg2\nfrom psycopg2.extras import execute_values, register_uuid\nfrom psycopg2.sql import SQL, Identifier\n\nfrom scripts.upgrade_canons.structs import URL, PackageURL\n\nCHAI_DATABASE_URL = getenv(\"CHAI_DATABASE_URL\")\n\n\nclass DB:\n    def __init__(self):\n        if not CHAI_DATABASE_URL:\n            raise Exception(\"CHAI_DATABASE_URL is not set\")\n\n        self.conn = psycopg2.connect(CHAI_DATABASE_URL)\n        self.cursor = self.conn.cursor()\n        register_uuid(self.conn)\n\n    def get_urls_by_type(\n        self, url_type_name: str\n    ) -> tuple[set[str], dict[UUID, list[str]]]:\n        \"\"\"\n        Returns a set of ALL URLs of the specified type (including orphans), and a map\n        of package ID to list of URL strings for URLs that are attached to packages\n\n        Args:\n            url_type_name: The name of the URL type to filter by (e.g., 'homepage',\n            'repository', 'source')\n        \"\"\"\n        self.cursor.execute(\n            \"\"\"\n            SELECT \n                u.url, \n                pu.package_id\n            FROM urls u \n            JOIN url_types ut ON ut.id = u.url_type_id \n            LEFT JOIN package_urls pu ON pu.url_id = u.id \n            WHERE \n                ut.name = %s;\"\"\",\n            (url_type_name,),\n        )\n\n        package_url_map: dict[UUID, list[str]] = defaultdict(list)\n        all_urls: set[str] = set()\n\n        for url, package_id in self.cursor.fetchall():\n            all_urls.add(url)  # Add all URLs (including orphans)\n            if (\n                package_id is not None\n            ):  # Only add to package map if attached to a package\n                package_url_map[package_id].append(url)\n\n        return all_urls, package_url_map\n\n    def db_execute_values(\n        self, table_name: str, columns: list[str], values: list[tuple]\n    ):\n        query = (\n            SQL(\"INSERT INTO {table_name} ({columns}) VALUES %s\")\n            .format(\n                table_name=Identifier(table_name),\n                columns=SQL(\", \").join(Identifier(column) for column in columns),\n            )\n            .as_string(self.conn)\n        )\n        try:\n            execute_values(self.cursor, query, values)\n            print(f\"Inserted {len(values)} rows into {table_name}\")\n        except Exception as e:\n            print(f\"Error inserting {table_name}: {e}\")\n            raise\n\n    def ingest(\n        self,\n        urls_to_add: list[URL],\n        package_urls_to_add: list[PackageURL],\n        dry_run: bool,\n    ):\n        \"\"\"\n        inserts into the db using psycopg2's execute_values\n\n        execute_values expects the data to be formatted as a list of tuples\n        \"\"\"\n        if urls_to_add:\n            table_name = \"urls\"\n            columns = [\"id\", \"url\", \"url_type_id\", \"created_at\", \"updated_at\"]\n            values = [\n                (url.id, url.url, url.url_type_id, url.created_at, url.updated_at)\n                for url in urls_to_add\n            ]\n            self.db_execute_values(table_name, columns, values)\n\n        if package_urls_to_add:\n            table_name = \"package_urls\"\n            columns = [\"id\", \"package_id\", \"url_id\", \"created_at\", \"updated_at\"]\n            values = [\n                (\n                    package_url.id,\n                    package_url.package_id,\n                    package_url.url_id,\n                    package_url.created_at,\n                    package_url.updated_at,\n                )\n                for package_url in package_urls_to_add\n            ]\n            self.db_execute_values(table_name, columns, values)\n\n        if not dry_run:\n            self.conn.commit()\n\n    def close(self):\n        self.cursor.close()\n        self.conn.close()\n\n    def get_canons_by_url_ids(self, url_ids: list[UUID]) -> list[tuple[UUID, UUID]]:\n        if not url_ids:\n            return []\n\n        # Use unnest to properly handle UUID array comparison\n        placeholders = \",\".join([\"%s\"] * len(url_ids))\n        self.cursor.execute(\n            f\"\"\"\n            SELECT id, url_id FROM canons WHERE url_id IN ({placeholders});\n            \"\"\",\n            url_ids,\n        )\n        return self.cursor.fetchall()\n"
  },
  {
    "path": "scripts/upgrade_canons/delete_non_canonical_urls.py",
    "content": "#!/usr/bin/env pkgx uv run\n\nimport argparse\nimport csv\nimport sys\nimport warnings\nfrom uuid import UUID\n\nfrom permalint import is_canonical_url\n\nfrom scripts.upgrade_canons.db import DB\n\n\ndef write_to_csv(filename: str, headers: list[str], data: list[tuple]):\n    with open(filename, \"w\") as f:\n        writer = csv.writer(f)\n        writer.writerow(headers)\n        writer.writerows(data)\n\n\ndef get_all_urls(db: DB) -> list[tuple[UUID, str]]:\n    \"\"\"\n    Query all URLs from the urls table.\n    Returns list of tuples (url_id, url_string).\n    \"\"\"\n    db.cursor.execute(\n        \"\"\"\n        SELECT id, url\n        FROM urls\n        ORDER BY id\n        \"\"\"\n    )\n    return db.cursor.fetchall()\n\n\ndef identify_non_canonical_urls(urls: list[tuple[UUID, str]]) -> list[UUID]:\n    \"\"\"\n    Check each URL for canonicality using permalint.\n    Returns list of URL IDs that are not canonical.\n    \"\"\"\n    non_canonical_ids = []\n\n    for url_id, url_string in urls:\n        try:\n            if not is_canonical_url(url_string):\n                non_canonical_ids.append(url_id)\n        except Exception as e:\n            print(f\"Warning: Error checking URL {url_string}: {e}\")\n            # Treat URLs that can't be checked as non-canonical\n            non_canonical_ids.append(url_id)\n\n    return non_canonical_ids\n\n\ndef delete_urls_from_database(db: DB, url_ids: list[UUID], dry_run: bool) -> None:\n    \"\"\"\n    Delete URLs and their package_urls entries from the database.\n    \"\"\"\n    if not url_ids:\n        print(\"No URLs to delete.\")\n        return\n\n    if dry_run:\n        print(\n            f\"DRY RUN: Would delete {len(url_ids)} URLs and their package_urls entries\"\n        )\n        return\n\n    # Batch delete operations for efficiency\n    placeholders = \",\".join([\"%s\"] * len(url_ids))\n\n    # Delete from canons first (if any exist)\n    db.cursor.execute(f\"DELETE FROM canons WHERE url_id IN ({placeholders})\", url_ids)\n\n    # Delete from package_urls (foreign key constraint)\n    db.cursor.execute(\n        f\"DELETE FROM package_urls WHERE url_id IN ({placeholders})\", url_ids\n    )\n\n    # Then delete from urls\n    db.cursor.execute(f\"DELETE FROM urls WHERE id IN ({placeholders})\", url_ids)\n\n    # Commit the transaction\n    db.conn.commit()\n    print(f\"Successfully deleted {len(url_ids)} URLs and their package_urls entries\")\n\n\ndef main(dry_run: bool = False):\n    \"\"\"Main function to delete non-canonical URLs.\"\"\"\n    print(\"Starting deletion of non-canonical URLs...\")\n\n    db = DB()\n    try:\n        # Get all URLs from database\n        print(\"Fetching all URLs from database...\")\n        all_urls = get_all_urls(db)\n        print(f\"Found {len(all_urls)} total URLs\")\n\n        # Identify non-canonical URLs\n        print(\"Checking URLs for canonicality...\")\n        non_canonical_ids = identify_non_canonical_urls(all_urls)\n        canonical_count = len(all_urls) - len(non_canonical_ids)\n\n        print(f\"Found {len(non_canonical_ids)} non-canonical URLs\")\n        print(f\"Found {canonical_count} canonical URLs\")\n\n        # Delete non-canonical URLs\n        if non_canonical_ids:\n            canons = db.get_canons_by_url_ids(non_canonical_ids)\n\n            if canons:\n                print(f\"WARNING: Found {len(canons)} - delete them urself\")\n                write_to_csv(\n                    \"non_canonical_urls_that_have_canons.csv\",\n                    [\"canon_id\", \"url_id\"],\n                    canons,\n                )\n                sys.exit(1)\n\n            print(\"Deleting non-canonical URLs...\")\n            delete_urls_from_database(db, non_canonical_ids, dry_run)\n        else:\n            print(\"No non-canonical URLs found to delete.\")\n\n        # Summary\n        print(\"-\" * 50)\n        if dry_run:\n            print(\"DRY RUN SUMMARY:\")\n            print(f\"Would delete: {len(non_canonical_ids)} URLs\")\n            print(f\"Would keep: {canonical_count} URLs\")\n        else:\n            print(\"DELETION SUMMARY:\")\n            print(f\"✅ Deleted: {len(non_canonical_ids)} URLs\")\n            print(f\"✅ Kept: {canonical_count} URLs\")\n        print(\"-\" * 50)\n\n    finally:\n        db.close()\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser(\n        description=\"Delete non-canonical URLs from the database\"\n    )\n    parser.add_argument(\n        \"--dry-run\",\n        action=\"store_true\",\n        help=\"Run in dry-run mode without making database changes\",\n    )\n    args = parser.parse_args()\n\n    with warnings.catch_warnings(action=\"ignore\"):\n        main(args.dry_run)\n"
  },
  {
    "path": "scripts/upgrade_canons/main.py",
    "content": "#!/usr/bin/env uv run --with psycopg2==2.9.10 --with permalint==0.1.14\n\nimport argparse\nimport warnings\nfrom datetime import datetime\nfrom uuid import UUID, uuid4\n\nfrom permalint import is_canonical_url, normalize_url\n\nfrom scripts.upgrade_canons.db import DB\nfrom scripts.upgrade_canons.structs import URL, PackageURL\n\n# Valid URL types that can be canonicalized\nVALID_URL_TYPES = {\"homepage\", \"repository\", \"source\"}\n\n\ndef is_one_url_canonical(urls: list[str]) -> bool:\n    \"\"\"Returns True if at least one of the URLs is canonical\"\"\"\n    return any(is_canonical_url(url) for url in urls)\n\n\ndef generate_canonical_url(urls: list[str]) -> str:\n    \"\"\"\n    Returns the canonical URL for the given list of URLs\n\n      - TODO: we should be smart about which one to pick, like most recent perhaps?\n    \"\"\"\n    return normalize_url(urls[0])\n\n\ndef generate_new_url(url: str, url_type_id: UUID, now: datetime) -> URL:\n    \"\"\"Creates a new URL object for the given URL.\"\"\"\n    return URL(uuid4(), url, url_type_id, now, now)\n\n\ndef generate_new_package_url(\n    package_id: UUID, url_id: UUID, now: datetime\n) -> PackageURL:\n    \"\"\"Creates a new PackageURL object for the given package and URL\"\"\"\n    return PackageURL(uuid4(), package_id, url_id, now, now)\n\n\n# Pure functions for business logic - highly testable\ndef analyze_packages_needing_canonicalization(\n    package_url_map: dict[UUID, list[str]],\n    existing_urls: set[str],\n) -> dict[UUID, str]:\n    \"\"\"\n    Analyze which packages need canonical URLs created.\n    Returns a mapping of package_id to the canonical URL that should be created.\n    \"\"\"\n    packages_needing_canon: dict[UUID, str] = {}\n    canonical_urls_to_create: set[str] = set()\n\n    for package_id, urls in package_url_map.items():\n        # Skip if package already has at least one canonical URL\n        if is_one_url_canonical(urls):\n            continue\n\n        canonical_url = generate_canonical_url(urls)\n\n        # Skip if canonical URL already exists in database\n        if canonical_url in existing_urls:\n            continue\n\n        # Skip if we're already planning to create this canonical URL\n        if canonical_url in canonical_urls_to_create:\n            continue\n\n        # This package needs a canonical URL created\n        packages_needing_canon[package_id] = canonical_url\n        canonical_urls_to_create.add(canonical_url)\n\n    return packages_needing_canon\n\n\ndef create_url_and_package_url_objects(\n    packages_needing_canon: dict[UUID, str],\n    url_type_id: UUID,\n    now: datetime,\n) -> tuple[list[URL], list[PackageURL]]:\n    \"\"\"\n    Create URL and PackageURL objects for the packages that need canonicalization.\n    \"\"\"\n    new_urls: list[URL] = []\n    new_package_urls: list[PackageURL] = []\n\n    for package_id, canonical_url in packages_needing_canon.items():\n        new_url = generate_new_url(canonical_url, url_type_id, now)\n        new_package_url = generate_new_package_url(package_id, new_url.id, now)\n\n        new_urls.append(new_url)\n        new_package_urls.append(new_package_url)\n\n    return new_urls, new_package_urls\n\n\ndef main(db: DB, url_type: str, url_type_id: UUID, dry_run: bool):\n    now = datetime.now()\n    print(f\"Starting main for URL type '{url_type}': {now}\")\n\n    # Get data from database\n    all_urls, package_url_map = db.get_urls_by_type(url_type)\n    print(f\"Found {len(all_urls)} {url_type} URLs\")\n    print(f\"Found {len(package_url_map)} packages with {url_type} URLs\")\n\n    # Analyze which packages need canonicalization\n    packages_needing_canon = analyze_packages_needing_canonicalization(\n        package_url_map, all_urls\n    )\n\n    # Create objects\n    new_urls, new_package_urls = create_url_and_package_url_objects(\n        packages_needing_canon, url_type_id, now\n    )\n\n    print(\"-\" * 100)\n    print(\"Going to insert:\")\n    print(f\"  {len(new_urls)} URLs\")\n    print(f\"  {len(new_package_urls)} PackageURLs\")\n    print(\"-\" * 100)\n\n    # Ingest to database\n    db.ingest(new_urls, new_package_urls, dry_run)\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser(description=\"Canonicalize URLs of specified type\")\n    parser.add_argument(\n        \"--url-type\",\n        type=str,\n        required=True,\n        choices=VALID_URL_TYPES,\n        help=f\"Type of URLs to canonicalize. Valid options: {', '.join(VALID_URL_TYPES)}\",\n    )\n    parser.add_argument(\n        \"--url-type-id\",\n        type=UUID,\n        required=True,\n        help=\"UUID of the URL type in the database\",\n    )\n    parser.add_argument(\n        \"--dry-run\",\n        action=\"store_true\",\n        help=\"Run in dry-run mode without making database changes\",\n    )\n    args = parser.parse_args()\n\n    db = DB()\n    try:\n        with warnings.catch_warnings(action=\"ignore\"):\n            main(db, args.url_type, args.url_type_id, args.dry_run)\n    finally:\n        db.close()\n"
  },
  {
    "path": "scripts/upgrade_canons/registered_projects.py",
    "content": "#!/usr/bin/env pkgx uv run\n\nimport argparse\nimport csv\nimport sys\nfrom uuid import UUID\n\nfrom scripts.upgrade_canons.db import DB\n\n\ndef read_canon_ids_from_stdin() -> list[UUID]:\n    \"\"\"Read canon IDs from stdin and return as list of UUIDs.\"\"\"\n    canon_ids = []\n    for line in sys.stdin:\n        line = line.strip()\n        if line:\n            try:\n                canon_ids.append(UUID(line))\n            except ValueError as e:\n                print(f\"Warning: Invalid UUID format '{line}': {e}\", file=sys.stderr)\n    return canon_ids\n\n\ndef process_canon_id(db: DB, canon_id: UUID, dry_run: bool) -> tuple[bool, str]:\n    \"\"\"\n    Process a single canon ID and perform the updates.\n    Returns (success, reason) tuple.\n    \"\"\"\n    # First, join to canon_packages_old to get package_id\n    db.cursor.execute(\n        \"\"\"\n        SELECT package_id \n        FROM canon_packages_old \n        WHERE canon_id = %s\n    \"\"\",\n        (canon_id,),\n    )\n\n    old_result = db.cursor.fetchone()\n    if not old_result:\n        return False, \"could not find package_id\"\n\n    package_id = old_result[0]\n\n    # Next, join to canon_packages to get current canon_id\n    db.cursor.execute(\n        \"\"\"\n        SELECT canon_id \n        FROM canon_packages \n        WHERE package_id = %s\n    \"\"\",\n        (package_id,),\n    )\n\n    current_result = db.cursor.fetchone()\n    if not current_result:\n        return False, \"could not find new canon_id\"\n\n    new_canon_id = current_result[0]\n\n    if dry_run:\n        print(\n            f\"DRY RUN: Would update canon_id {new_canon_id} to {canon_id} for package {package_id}\"\n        )\n        return True, \"\"\n\n    try:\n        # Run the three update statements\n        # 1. Update canons table\n        db.cursor.execute(\n            \"\"\"\n            UPDATE canons\n            SET id = %s\n            WHERE id = %s\n        \"\"\",\n            (canon_id, new_canon_id),\n        )\n\n        # 2. Update canon_packages table\n        db.cursor.execute(\n            \"\"\"\n            UPDATE canon_packages\n            SET canon_id = %s\n            WHERE canon_id = %s\n        \"\"\",\n            (canon_id, new_canon_id),\n        )\n\n        # 3. Update tea_ranks table\n        db.cursor.execute(\n            \"\"\"\n            UPDATE tea_ranks\n            SET canon_id = %s\n            WHERE canon_id = %s\n        \"\"\",\n            (canon_id, new_canon_id),\n        )\n\n        return True, \"\"\n\n    except Exception as e:\n        print(f\"Error updating canon_id {canon_id}: {e}\", file=sys.stderr)\n        return False, f\"database error: {e!s}\"\n\n\ndef write_failures_csv(\n    failures: list[tuple[UUID, str]], filename: str = \"canon_update_failures.csv\"\n):\n    \"\"\"Write failures to a CSV file.\"\"\"\n    with open(filename, \"w\", newline=\"\") as csvfile:\n        writer = csv.writer(csvfile)\n        writer.writerow([\"canon_id\", \"reason\"])\n        for canon_id, reason in failures:\n            writer.writerow([str(canon_id), reason])\n\n\ndef main():\n    parser = argparse.ArgumentParser(\n        description=\"Update Canon IDs for registered projects\"\n    )\n    parser.add_argument(\n        \"--dry-run\",\n        action=\"store_true\",\n        help=\"Show what would be done without making changes\",\n    )\n    args = parser.parse_args()\n\n    # Read canon IDs from stdin\n    canon_ids = read_canon_ids_from_stdin()\n\n    if not canon_ids:\n        print(\"No canon IDs provided via stdin\", file=sys.stderr)\n        sys.exit(1)\n\n    print(f\"Processing {len(canon_ids)} canon IDs...\")\n\n    # Initialize database connection\n    db = DB()\n\n    success_count = 0\n    failure_count = 0\n    failures = []\n\n    try:\n        for canon_id in canon_ids:\n            success, reason = process_canon_id(db, canon_id, args.dry_run)\n\n            if success:\n                success_count += 1\n            else:\n                failure_count += 1\n                failures.append((canon_id, reason))\n                print(f\"Warning: Failed to process canon_id {canon_id}: {reason}\")\n\n        # Commit changes if not dry run\n        if not args.dry_run and success_count > 0:\n            db.conn.commit()\n            print(\"Database changes committed.\")\n\n        # Write failures to CSV if any\n        if failures:\n            write_failures_csv(failures)\n            print(\"Failures written to canon_update_failures.csv\")\n\n    finally:\n        db.close()\n\n    # Print final summary\n    print(\"--------------------------------------------------\")\n    print(f\"✅ Success: {success_count}\")\n    print(f\"❌ Failure: {failure_count}\")\n    print(\"--------------------------------------------------\")\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "scripts/upgrade_canons/structs.py",
    "content": "from dataclasses import dataclass\nfrom datetime import datetime\nfrom uuid import UUID\n\n\n# let's make classes defining the data models, since scripts can't really access ./core\n@dataclass\nclass URL:\n    id: UUID\n    url: str\n    url_type_id: UUID\n    created_at: datetime\n    updated_at: datetime\n\n\n@dataclass\nclass PackageURL:\n    id: UUID\n    package_id: UUID\n    url_id: UUID\n    created_at: datetime\n    updated_at: datetime\n"
  },
  {
    "path": "tests/README.md",
    "content": "# CHAI Test Suite\n\nThis directory contains the test suite for the CHAI package indexer. All tests are written using [pytest](https://docs.pytest.org/) and follow a consistent structure to ensure maintainability and ease of use.\n\n## Table of Contents\n\n- [Directory Structure](#directory-structure)\n- [Running Tests](#running-tests)\n- [Adding New Tests](#adding-new-tests)\n- [Test Coverage](#test-coverage)\n- [Fixtures and Mocking](#fixtures-and-mocking)\n- [Test Markers](#test-markers)\n\n## Directory Structure\n\nThe test suite is organized to mirror the main project structure:\n\n```\ntests/\n├── conftest.py              # Common fixtures and configuration\n├── requirements.txt         # Test dependencies\n├── unit/                    # Unit tests for core functionality\n│   ├── test_debian_parser.py\n│   └── test_pkgx_load_urls.py\n├── package_managers/        # Tests for package manager indexers\n│   ├── crates/\n│   │   ├── test_diff_deps.py\n│   │   └── test_special_case.py\n│   └── homebrew/\n│       └── test_diff_dep.py\n└── ranker/                  # Tests for ranking functionality\n    ├── test_dedupe.py\n    └── test_rx_graph.py\n```\n\n## Running Tests\n\n### Prerequisites\n\nInstall test dependencies:\n\n```bash\nuv pip install -r tests/requirements.txt\n```\n\n### Running All Tests\n\nTo run all tests:\n\n```bash\npytest tests/\n```\n\n### Running Specific Tests\n\nRun tests for a specific module:\n\n```bash\n# Run all tests in a directory\npytest tests/package_managers/crates/\n\n# Run a specific test file\npytest tests/unit/test_debian_parser.py\n\n# Run a specific test class\npytest tests/unit/test_debian_parser.py::TestDebianParser\n\n# Run a specific test method\npytest tests/unit/test_debian_parser.py::TestDebianParser::test_parse_package_data\n```\n\n### Running Tests by Marker\n\nTests are categorized with markers. To run tests for a specific category:\n\n```bash\n# Run only unit tests\npytest -m unit\n\n# Run only parser tests\npytest -m parser\n\n# Run only transformer tests\npytest -m transformer\n\n# Run only ranker tests\npytest -m ranker\n\n# Run all tests except slow ones\npytest -m \"not slow\"\n```\n\n### Verbose Output\n\nFor more detailed output:\n\n```bash\npytest -v tests/\n\n# Show captured print statements\npytest -s tests/\n\n# Show local variables in tracebacks\npytest -l tests/\n```\n\n## Adding New Tests\n\n### 1. Create a Test File\n\nTest files should:\n\n- Be placed in the appropriate directory based on what's being tested\n- Follow the naming convention `test_*.py`\n- Include a module docstring explaining what's being tested\n\nExample:\n\n```python\n\"\"\"\nTest the package parsing functionality for NewPackageManager.\n\nThis module tests the Parser class which extracts package information\nfrom the package manager's data format.\n\"\"\"\n\nimport pytest\n\nfrom package_managers.newpm.parser import Parser\n```\n\n### 2. Use Fixtures for Common Setup\n\nInstead of setUp/tearDown methods, use pytest fixtures:\n\n```python\n@pytest.fixture\ndef sample_package_data():\n    \"\"\"Provides sample package data for testing.\"\"\"\n    return {\n        \"name\": \"example-package\",\n        \"version\": \"1.0.0\",\n        \"dependencies\": [\"dep1\", \"dep2\"],\n    }\n\ndef test_parse_package(sample_package_data):\n    \"\"\"Test parsing a package with valid data.\"\"\"\n    parser = Parser()\n    result = parser.parse(sample_package_data)\n    assert result.name == \"example-package\"\n```\n\n### 3. Use Markers for Test Categories\n\nApply appropriate markers to your tests:\n\n```python\n@pytest.mark.parser\n@pytest.mark.unit\nclass TestNewParser:\n    \"\"\"Test the new package manager parser.\"\"\"\n\n    def test_parse_valid_data(self):\n        \"\"\"Test parsing valid package data.\"\"\"\n        # test implementation\n```\n\n### 4. Mock External Dependencies\n\nUse the fixtures from `conftest.py` or create specific mocks:\n\n```python\ndef test_with_mocked_config(mock_config):\n    \"\"\"Test using the common mock_config fixture.\"\"\"\n    # mock_config is automatically injected from conftest.py\n    transformer = Transformer(mock_config)\n    # test implementation\n```\n\n### 5. Write Clear Assertions\n\nUse clear, descriptive assertions:\n\n```python\n# Good\nassert len(packages) == 3, \"Should parse exactly 3 packages from the data\"\n\n# Less clear\nassert len(packages) == 3\n```\n\n## Test Coverage\n\n### Running Tests with Coverage\n\nTo generate a coverage report:\n\n```bash\n# Run with coverage and generate terminal report\npytest --cov=. --cov-report=term tests/\n\n# Generate HTML coverage report\npytest --cov=. --cov-report=html tests/\n# Open htmlcov/index.html in a browser\n\n# Generate coverage for specific modules\npytest --cov=package_managers.crates --cov=ranker tests/\n\n# Show missing lines in terminal\npytest --cov=. --cov-report=term-missing tests/\n```\n\n### Coverage by Docker Service\n\nTo check coverage for specific Docker services defined in `docker-compose.yml`:\n\n```bash\n# Coverage for crates indexer\npytest --cov=package_managers.crates --cov-report=term-missing tests/package_managers/crates/\n\n# Coverage for homebrew indexer\npytest --cov=package_managers.homebrew --cov-report=term-missing tests/package_managers/homebrew/\n\n# Coverage for debian indexer\npytest --cov=package_managers.debian --cov-report=term-missing tests/unit/test_debian_parser.py\n\n# Coverage for pkgx indexer\npytest --cov=package_managers.pkgx --cov-report=term-missing tests/unit/test_pkgx_load_urls.py\n\n# Coverage for ranker\npytest --cov=ranker --cov-report=term-missing tests/ranker/\n```\n\n### Setting Coverage Thresholds\n\nTo fail tests if coverage drops below a threshold:\n\n```bash\npytest --cov=. --cov-fail-under=80 tests/\n```\n\n## Fixtures and Mocking\n\n### Common Fixtures\n\nThe `conftest.py` file provides several reusable fixtures:\n\n- **`mock_config`**: A mocked Config object with all sub-configurations\n- **`mock_url_types`**: Mocked URL types (homepage, repository, etc.)\n- **`mock_dependency_types`**: Mocked dependency types (runtime, build, dev, test)\n- **`mock_package_managers`**: Mocked package manager configurations\n- **`sample_package_data`**: Sample data for different package managers\n\n### Using Fixtures\n\nFixtures are automatically injected into test functions:\n\n```python\ndef test_example(mock_config, sample_package_data):\n    \"\"\"Example test using multiple fixtures.\"\"\"\n    # mock_config and sample_package_data are automatically available\n    crates_data = sample_package_data[\"crates\"]\n    # test implementation\n```\n\n### Creating Test-Specific Fixtures\n\nFor test-specific setup, create local fixtures:\n\n```python\n@pytest.fixture\ndef special_cache():\n    \"\"\"Create a cache with specific test data.\"\"\"\n    return Cache(\n        package_map={\"test\": Package(id=uuid4(), name=\"test\")},\n        url_map={},\n        dependencies={},\n    )\n\ndef test_with_special_cache(special_cache):\n    \"\"\"Test using the special cache.\"\"\"\n    # test implementation\n```\n\n## Test Markers\n\nAvailable markers (defined in `conftest.py`):\n\n- **`@pytest.mark.unit`**: Unit tests\n- **`@pytest.mark.integration`**: Integration tests\n- **`@pytest.mark.slow`**: Slow-running tests\n- **`@pytest.mark.parser`**: Parser tests\n- **`@pytest.mark.transformer`**: Transformer tests\n- **`@pytest.mark.loader`**: Loader tests\n- **`@pytest.mark.ranker`**: Ranker tests\n\nTo list all available markers:\n\n```bash\npytest --markers\n```\n\n## Best Practices\n\n1. **Test One Thing**: Each test should verify a single behavior\n2. **Use Descriptive Names**: Test names should clearly indicate what they test\n3. **Keep Tests Independent**: Tests should not depend on each other\n4. **Use Fixtures**: Leverage fixtures for common setup instead of duplicating code\n5. **Mock External Dependencies**: Don't make actual database or network calls\n6. **Test Edge Cases**: Include tests for error conditions and edge cases\n7. **Document Complex Tests**: Add docstrings explaining complex test scenarios\n\n## Troubleshooting\n\n### Common Issues\n\n1. **Import Errors**: Ensure `PYTHONPATH` includes the project root:\n\n   ```bash\n   export PYTHONPATH=/workspace:$PYTHONPATH\n   ```\n\n2. **Missing Dependencies**: Install test requirements:\n\n   ```bash\n   uv sync --all-groups\n   source .venv/bin/activate\n   ```\n\n3. **Database Connection Errors**: Tests should not require `CHAI_DATABASE_URL`. If a\n   test fails due to database issues, it likely needs better mocking.\n\n### Debugging Tests\n\nTo debug a failing test:\n\n```bash\n# Drop into debugger on failure\npytest --pdb tests/failing_test.py\n\n# Show local variables in traceback\npytest -l tests/failing_test.py\n\n# Increase verbosity\npytest -vv tests/failing_test.py\n```\n"
  },
  {
    "path": "tests/conftest.py",
    "content": "\"\"\"\nCommon test fixtures and configurations for pytest.\n\nThis module provides reusable fixtures for testing the CHAI package indexer.\nInstead of mocking database operations, these fixtures focus on providing\ntest data and mock objects for testing the core logic of transformers,\nparsers, and other components.\n\"\"\"\n\nimport uuid\nfrom unittest.mock import MagicMock, Mock\n\nimport pytest\n\nfrom core.config import (\n    Config,\n    DependencyTypes,\n    PackageManagers,\n    PMConf,\n    URLTypes,\n    UserTypes,\n)\nfrom core.db import DB\nfrom core.logger import Logger\n\n\n@pytest.fixture\ndef mock_logger():\n    \"\"\"Mock logger for testing.\"\"\"\n    logger = MagicMock(spec=Logger)\n    logger.debug.side_effect = lambda x: print(f\"DEBUG: {x}\")\n    logger.warn.side_effect = lambda x: print(f\"WARN: {x}\")\n    logger.log.side_effect = lambda x: print(x)\n\n    return logger\n\n\n@pytest.fixture\ndef mock_url_types():\n    \"\"\"\n    Mock URL types with consistent UUIDs for testing.\n\n    Returns a mock URLTypes object that returns consistent URL type objects\n    for common URL type names.\n    \"\"\"\n    url_types = MagicMock(spec=URLTypes)\n\n    # Set up URL type attributes directly\n    url_types.homepage = uuid.UUID(\"00000000-0000-0000-0000-000000000001\")\n    url_types.repository = uuid.UUID(\"00000000-0000-0000-0000-000000000002\")\n    url_types.documentation = uuid.UUID(\"00000000-0000-0000-0000-000000000003\")\n    url_types.source = uuid.UUID(\"00000000-0000-0000-0000-000000000004\")\n\n    return url_types\n\n\n@pytest.fixture\ndef mock_dependency_types():\n    \"\"\"\n    Mock dependency types for testing.\n\n    Returns a mock DependencyTypes object with common dependency types.\n    \"\"\"\n    dep_types = MagicMock(spec=DependencyTypes)\n\n    # Set up dependency type attributes directly\n    dep_types.runtime = uuid.UUID(\"00000000-0000-0000-0000-000000000010\")\n    dep_types.build = uuid.UUID(\"00000000-0000-0000-0000-000000000011\")\n    dep_types.dev = uuid.UUID(\"00000000-0000-0000-0000-000000000012\")\n    dep_types.test = uuid.UUID(\"00000000-0000-0000-0000-000000000013\")\n    dep_types.development = dep_types.dev  # Alias for development\n    dep_types.recommended = uuid.UUID(\"00000000-0000-0000-0000-000000000014\")\n    dep_types.optional = uuid.UUID(\"00000000-0000-0000-0000-000000000015\")\n\n    return dep_types\n\n\n@pytest.fixture\ndef mock_sources():\n    \"\"\"\n    Mock sources with consistent UUIDs for testing.\n\n    Returns a dict mapping source names to mock Source objects.\n    \"\"\"\n    return {\n        \"github\": uuid.UUID(\"00000000-0000-0000-0000-000000000020\"),\n        \"crates\": uuid.UUID(\"00000000-0000-0000-0000-000000000021\"),\n        \"homebrew\": uuid.UUID(\"00000000-0000-0000-0000-000000000022\"),\n        \"debian\": uuid.UUID(\"00000000-0000-0000-0000-000000000023\"),\n        \"pkgx\": uuid.UUID(\"00000000-0000-0000-0000-000000000024\"),\n    }\n\n\n@pytest.fixture\ndef mock_package_managers():\n    \"\"\"\n    Mock package managers for testing.\n\n    Returns a mock PackageManagers object.\n    \"\"\"\n    package_managers = MagicMock(spec=PackageManagers)\n\n    # Set up package manager attributes directly\n    package_managers.crates = uuid.UUID(\"00000000-0000-0000-0000-000000000030\")\n    package_managers.homebrew = uuid.UUID(\"00000000-0000-0000-0000-000000000031\")\n    package_managers.debian = uuid.UUID(\"00000000-0000-0000-0000-000000000032\")\n    package_managers.pkgx = uuid.UUID(\"00000000-0000-0000-0000-000000000033\")\n\n    return package_managers\n\n\n@pytest.fixture\ndef mock_pm_config(mock_package_managers):\n    \"\"\"\n    Mock PMConf (Package Manager Configuration) for testing.\n\n    Returns a mock PMConf object with a default package manager ID.\n    \"\"\"\n    pm_config = MagicMock(spec=PMConf)\n    pm_config.pm_id = mock_package_managers.crates\n    return pm_config\n\n\n@pytest.fixture\ndef mock_config(\n    mock_url_types,\n    mock_dependency_types,\n    mock_package_managers,\n    mock_pm_config,\n    mock_sources,\n):\n    \"\"\"\n    Mock Config object with all necessary sub-configurations.\n\n    This is the main configuration fixture that most tests will use.\n    \"\"\"\n    config = MagicMock(spec=Config)\n\n    # Set up execution configuration\n    config.exec_config = MagicMock()\n    config.exec_config.test = True\n    config.exec_config.no_cache = True\n    config.exec_config.debug = False\n\n    # Set up sub-configurations\n    config.url_types = mock_url_types\n    config.dependency_types = mock_dependency_types\n    config.package_managers = mock_package_managers\n    config.pm_config = mock_pm_config\n\n    # Mock DB that returns consistent source objects\n    mock_db = MagicMock()\n    mock_db.select_source_by_name.side_effect = lambda name: mock_sources.get(name)\n    mock_db.select_url_types_by_name.side_effect = lambda name: mock_url_types.get(name)\n\n    # Set the db for config to be a minimal mock db created\n    config.db = mock_db\n\n    return config\n\n\n@pytest.fixture\ndef mock_user_types():\n    \"\"\"\n    Mock user types for testing.\n\n    Returns a mock UserTypes object.\n    \"\"\"\n    user_types = MagicMock(spec=UserTypes)\n\n    # Set up user type attributes directly\n    user_types.admin = Mock(id=uuid.UUID(\"00000000-0000-0000-0000-000000000040\"))\n    user_types.maintainer = Mock(id=uuid.UUID(\"00000000-0000-0000-0000-000000000041\"))\n    user_types.contributor = Mock(id=uuid.UUID(\"00000000-0000-0000-0000-000000000042\"))\n\n    return user_types\n\n\n@pytest.fixture\ndef sample_package_data():\n    \"\"\"\n    Provides sample package data for testing transformers and parsers.\n\n    Returns a dict with sample data for different package managers.\n    \"\"\"\n    return {\n        \"crates\": {\n            \"name\": \"serde\",\n            \"version\": \"1.0.130\",\n            \"description\": \"A generic serialization/deserialization framework\",\n            \"homepage\": \"https://serde.rs\",\n            \"repository\": \"https://github.com/serde-rs/serde\",\n            \"dependencies\": {\"serde_derive\": \"1.0.130\"},\n        },\n        \"homebrew\": {\n            \"name\": \"wget\",\n            \"version\": \"1.21.2\",\n            \"description\": \"Internet file retriever\",\n            \"homepage\": \"https://www.gnu.org/software/wget/\",\n            \"dependencies\": [\"gettext\", \"libidn2\", \"openssl@1.1\"],\n        },\n        \"debian\": {\n            \"package\": \"curl\",\n            \"version\": \"7.74.0-1.3+deb11u1\",\n            \"maintainer\": \"Alessandro Ghedini <ghedo@debian.org>\",\n            \"depends\": [\"libc6\", \"libcurl4\", \"zlib1g\"],\n        },\n        \"pkgx\": {\n            \"full_name\": \"gnu.org/wget\",\n            \"version\": \"1.21.2\",\n            \"homepage\": \"https://www.gnu.org/software/wget/\",\n            \"dependencies\": {\"gnu.org/gettext\": \"^0.21\", \"openssl.org\": \"^1.1\"},\n        },\n    }\n\n\n@pytest.fixture\ndef mock_csv_reader():\n    \"\"\"\n    Creates a mock CSV reader for testing transformers that read CSV files.\n\n    Returns a function that creates mock readers with specific data.\n    \"\"\"\n\n    def create_mock_reader(data):\n        \"\"\"\n        Create a mock reader that returns the specified data.\n\n        Args:\n            data: List of rows to return from the CSV reader\n\n        Returns:\n            A mock function that returns an iterator over the data\n        \"\"\"\n\n        def mock_reader(file_key):\n            return iter([data])\n\n        return mock_reader\n\n    return create_mock_reader\n\n\n# Markers for categorizing tests\ndef pytest_configure(config):\n    \"\"\"Register custom markers for test categorization.\"\"\"\n    config.addinivalue_line(\"markers\", \"unit: Unit tests\")\n    config.addinivalue_line(\"markers\", \"integration: Integration tests\")\n    config.addinivalue_line(\"markers\", \"slow: Slow running tests\")\n    config.addinivalue_line(\"markers\", \"parser: Parser tests\")\n    config.addinivalue_line(\"markers\", \"transformer: Transformer tests\")\n    config.addinivalue_line(\"markers\", \"loader: Loader tests\")\n    config.addinivalue_line(\"markers\", \"ranker: Ranker tests\")\n\n\n@pytest.fixture\ndef mock_db():\n    return MagicMock(spec=DB)\n"
  },
  {
    "path": "tests/package_managers/crates/conftest.py",
    "content": "from datetime import datetime\nfrom uuid import uuid4\n\nimport pytest\n\nfrom core.models import Package\nfrom core.structs import Cache\nfrom package_managers.crates.main import Diff\nfrom package_managers.crates.structs import (\n    Crate,\n    CrateLatestVersion,\n)\n\n\n@pytest.fixture\ndef package_ids():\n    \"\"\"Fixture providing consistent package IDs for testing.\"\"\"\n    return {\"main\": uuid4(), \"dep\": uuid4()}\n\n\n@pytest.fixture\ndef packages(package_ids):\n    \"\"\"Fixture providing test packages.\"\"\"\n    return {\n        \"main\": Package(\n            id=package_ids[\"main\"],\n            name=\"main_pkg\",\n            package_manager_id=1,\n            import_id=\"1048221\",\n            created_at=datetime.now(),\n            updated_at=datetime.now(),\n        ),\n        \"dep\": Package(\n            id=package_ids[\"dep\"],\n            name=\"dep_pkg\",\n            package_manager_id=1,\n            import_id=\"271975\",\n            created_at=datetime.now(),\n            updated_at=datetime.now(),\n        ),\n    }\n\n\n@pytest.fixture\ndef diff_instance(mock_config):\n    \"\"\"\n    Factory fixture to create Diff instances with specific cache configurations.\n\n    Returns a function that creates Diff instances.\n    \"\"\"\n\n    def create_diff(package_map, dependencies=None, url_map=None, package_urls=None):\n        cache = Cache(\n            package_map=package_map,\n            url_map=url_map or {},\n            package_urls=package_urls or {},\n            dependencies=dependencies or {},\n        )\n        return Diff(mock_config, cache)\n\n    return create_diff\n\n\n@pytest.fixture\ndef crate_with_dependencies():\n    \"\"\"\n    Factory fixture to create Crate objects with specified dependencies.\n\n    Returns a function that creates Crate objects.\n    \"\"\"\n\n    def create_crate(crate_id=\"1048221\", dependencies=None):\n        latest_version = CrateLatestVersion(\n            id=9337571,\n            checksum=\"some-checksum\",\n            downloads=1000,\n            license=\"MIT\",\n            num=\"1.0.0\",\n            published_by=None,\n            published_at=\"2023-01-01\",\n        )\n\n        if dependencies:\n            latest_version.dependencies = dependencies\n        else:\n            latest_version.dependencies = []\n\n        crate = Crate(\n            id=int(crate_id),\n            name=\"main_pkg\",\n            readme=\"Test readme\",\n            homepage=\"\",\n            repository=\"\",\n            documentation=\"\",\n            source=None,\n        )\n        crate.latest_version = latest_version\n\n        return crate\n\n    return create_crate\n"
  },
  {
    "path": "tests/package_managers/crates/test_crates_diff_deps.py",
    "content": "\"\"\"\nTest the diff_deps functionality for the crates package manager.\n\nThis module tests the Diff.diff_deps method which determines what dependencies\nneed to be added or removed when processing crate updates.\n\"\"\"\n\nfrom datetime import datetime\n\nimport pytest\n\nfrom core.models import LegacyDependency\nfrom package_managers.crates.structs import CrateDependency, DependencyType\n\n\n@pytest.mark.transformer\nclass TestDiffDeps:\n    \"\"\"Tests for the diff_deps method in the Diff class for crates.\"\"\"\n\n    def test_existing_dependency_no_changes(\n        self, packages, package_ids, diff_instance, crate_with_dependencies, mock_config\n    ):\n        \"\"\"\n        Test that when a dependency already exists in the database and also appears in\n        the crate object, it is neither added to new_deps nor removed_deps.\n        \"\"\"\n        # Create an existing runtime dependency\n        existing_dep = LegacyDependency(\n            id=1,\n            package_id=package_ids[\"main\"],\n            dependency_id=package_ids[\"dep\"],\n            dependency_type_id=mock_config.dependency_types.runtime,\n            created_at=datetime.now(),\n            updated_at=datetime.now(),\n        )\n\n        # Create diff with existing dependency\n        diff = diff_instance(\n            package_map={\"1048221\": packages[\"main\"], \"271975\": packages[\"dep\"]},\n            dependencies={package_ids[\"main\"]: {existing_dep}},\n        )\n\n        # Create crate with the same dependency\n        dependency = CrateDependency(\n            crate_id=1048221,\n            dependency_id=271975,\n            dependency_type=DependencyType.NORMAL,  # Runtime dependency\n            semver_range=\"^0.26.1\",\n        )\n        crate = crate_with_dependencies(dependencies=[dependency])\n\n        # Execute\n        new_deps, removed_deps = diff.diff_deps(crate)\n\n        # Assert\n        assert len(new_deps) == 0, \"No new deps should be added\"\n        assert len(removed_deps) == 0, \"No deps should be removed\"\n\n    def test_dependency_changed_type(\n        self, packages, package_ids, diff_instance, crate_with_dependencies, mock_config\n    ):\n        \"\"\"\n        Test that when a dependency exists but its type changes, it is both\n        added to new_deps and removed_deps.\n        \"\"\"\n        # Create an existing build dependency\n        existing_dep = LegacyDependency(\n            id=1,\n            package_id=package_ids[\"main\"],\n            dependency_id=package_ids[\"dep\"],\n            dependency_type_id=mock_config.dependency_types.build,  # BUILD type\n            created_at=datetime.now(),\n            updated_at=datetime.now(),\n        )\n\n        # Create diff with existing dependency\n        diff = diff_instance(\n            package_map={\"1048221\": packages[\"main\"], \"271975\": packages[\"dep\"]},\n            dependencies={package_ids[\"main\"]: {existing_dep}},\n        )\n\n        # Create crate with dependency changed to runtime\n        dependency = CrateDependency(\n            crate_id=1048221,\n            dependency_id=271975,\n            dependency_type=DependencyType.NORMAL,  # Changed to runtime\n            semver_range=\"^0.26.1\",\n        )\n        crate = crate_with_dependencies(dependencies=[dependency])\n\n        # Execute\n        new_deps, removed_deps = diff.diff_deps(crate)\n\n        # Assert\n        assert len(new_deps) == 1, \"One new dep should be added (new type)\"\n        assert len(removed_deps) == 1, \"One dep should be removed (old type)\"\n\n        # Verify new dep is runtime\n        new_dep = new_deps[0]\n        assert new_dep.package_id == package_ids[\"main\"]\n        assert new_dep.dependency_id == package_ids[\"dep\"]\n        assert new_dep.dependency_type_id == mock_config.dependency_types.runtime\n\n        # Verify removed dep is build\n        removed_dep = removed_deps[0]\n        assert removed_dep.package_id == package_ids[\"main\"]\n        assert removed_dep.dependency_id == package_ids[\"dep\"]\n        assert removed_dep.dependency_type_id == mock_config.dependency_types.build\n\n    def test_new_dependency(\n        self, packages, package_ids, diff_instance, crate_with_dependencies, mock_config\n    ):\n        \"\"\"\n        Test that when a dependency doesn't exist in the cache but appears in the\n        crate object, it is added to new_deps.\n        \"\"\"\n        # Create diff with no existing dependencies\n        diff = diff_instance(\n            package_map={\"1048221\": packages[\"main\"], \"271975\": packages[\"dep\"]}\n        )\n\n        # Create crate with a new dependency\n        dependency = CrateDependency(\n            crate_id=1048221,\n            dependency_id=271975,\n            dependency_type=DependencyType.NORMAL,\n            semver_range=\"^0.26.1\",\n        )\n        crate = crate_with_dependencies(dependencies=[dependency])\n\n        # Execute\n        new_deps, removed_deps = diff.diff_deps(crate)\n\n        # Assert\n        assert len(new_deps) == 1, \"One new dep should be added\"\n        assert len(removed_deps) == 0, \"No deps should be removed\"\n\n        # Verify new dep\n        new_dep = new_deps[0]\n        assert new_dep.package_id == package_ids[\"main\"]\n        assert new_dep.dependency_id == package_ids[\"dep\"]\n        assert new_dep.dependency_type_id == mock_config.dependency_types.runtime\n\n    def test_removed_dependency(\n        self, packages, package_ids, diff_instance, crate_with_dependencies, mock_config\n    ):\n        \"\"\"\n        Test that when a dependency exists in the cache but doesn't appear in the\n        crate object, it is added to removed_deps.\n        \"\"\"\n        # Create an existing dependency\n        existing_dep = LegacyDependency(\n            id=1,\n            package_id=package_ids[\"main\"],\n            dependency_id=package_ids[\"dep\"],\n            dependency_type_id=mock_config.dependency_types.runtime,\n            created_at=datetime.now(),\n            updated_at=datetime.now(),\n        )\n\n        # Create diff with existing dependency\n        diff = diff_instance(\n            package_map={\"1048221\": packages[\"main\"], \"271975\": packages[\"dep\"]},\n            dependencies={package_ids[\"main\"]: {existing_dep}},\n        )\n\n        # Create crate with no dependencies\n        crate = crate_with_dependencies(dependencies=[])\n\n        # Execute\n        new_deps, removed_deps = diff.diff_deps(crate)\n\n        # Assert\n        assert len(new_deps) == 0, \"No new deps should be added\"\n        assert len(removed_deps) == 1, \"One dep should be removed\"\n\n        # Verify removed dep\n        removed_dep = removed_deps[0]\n        assert removed_dep.package_id == package_ids[\"main\"]\n        assert removed_dep.dependency_id == package_ids[\"dep\"]\n        assert removed_dep.dependency_type_id == mock_config.dependency_types.runtime\n\n    def test_multiple_dependency_types_same_package(\n        self, packages, package_ids, diff_instance, crate_with_dependencies, mock_config\n    ):\n        \"\"\"\n        Test that when a package depends on the same dependency package with\n        multiple dependency types (e.g., both runtime and build), we handle\n        the unique constraint on (package_id, dependency_id) properly.\n\n        This test exposes the bug where multiple LegacyDependency records with\n        the same package_id and dependency_id but different dependency_type_id\n        would violate the DB constraint.\n        \"\"\"\n        # Create diff with no existing dependencies\n        diff = diff_instance(\n            package_map={\"1048221\": packages[\"main\"], \"271975\": packages[\"dep\"]}\n        )\n\n        # Create crate with multiple dependency types to the same package\n        runtime_dep = CrateDependency(\n            crate_id=1048221,\n            dependency_id=271975,\n            dependency_type=DependencyType.NORMAL,  # Runtime\n            semver_range=\"^0.26.1\",\n        )\n        build_dep = CrateDependency(\n            crate_id=1048221,\n            dependency_id=271975,\n            dependency_type=DependencyType.BUILD,  # Build\n            semver_range=\"^0.26.1\",\n        )\n\n        crate = crate_with_dependencies(dependencies=[runtime_dep, build_dep])\n\n        # Execute\n        new_deps, removed_deps = diff.diff_deps(crate)\n\n        # Assert\n        assert len(removed_deps) == 0, \"No deps should be removed\"\n\n        # With the fix, only create 1 dependency with highest priority type\n        # Priority: NORMAL > BUILD > DEV\n        assert len(new_deps) == 1, \"Should create only 1 dep with highest priority type\"\n\n        # Should have runtime type (NORMAL has highest priority)\n        new_dep = new_deps[0]\n        assert new_dep.package_id == package_ids[\"main\"]\n        assert new_dep.dependency_id == package_ids[\"dep\"]\n        assert new_dep.dependency_type_id == mock_config.dependency_types.runtime, (\n            \"Should choose NORMAL (runtime) over BUILD as it has higher priority\"\n        )\n\n    def test_multiple_dependency_types_build_vs_dev(\n        self, packages, package_ids, diff_instance, crate_with_dependencies, mock_config\n    ):\n        \"\"\"\n        Test that when a package depends on the same dependency package with\n        BUILD and DEV types (no NORMAL), BUILD type takes precedence.\n\n        Priority order: NORMAL > BUILD > DEV\n        \"\"\"\n        # Create diff with no existing dependencies\n        diff = diff_instance(\n            package_map={\"1048221\": packages[\"main\"], \"271975\": packages[\"dep\"]}\n        )\n\n        # Create crate with BUILD and DEV dependencies to the same package\n        build_dep = CrateDependency(\n            crate_id=1048221,\n            dependency_id=271975,\n            dependency_type=DependencyType.BUILD,\n            semver_range=\"^0.26.1\",\n        )\n        dev_dep = CrateDependency(\n            crate_id=1048221,\n            dependency_id=271975,\n            dependency_type=DependencyType.DEV,\n            semver_range=\"^0.26.1\",\n        )\n\n        # Add DEV first to test ordering doesn't matter\n        crate = crate_with_dependencies(dependencies=[dev_dep, build_dep])\n\n        # Execute\n        new_deps, removed_deps = diff.diff_deps(crate)\n\n        # Assert\n        assert len(removed_deps) == 0, \"No deps should be removed\"\n        assert len(new_deps) == 1, \"Should create only 1 dep with highest priority type\"\n\n        # Should have build type (BUILD > DEV)\n        new_dep = new_deps[0]\n        assert new_dep.package_id == package_ids[\"main\"]\n        assert new_dep.dependency_id == package_ids[\"dep\"]\n        assert new_dep.dependency_type_id == mock_config.dependency_types.build, (\n            \"Should choose BUILD over DEV as it has higher priority\"\n        )\n"
  },
  {
    "path": "tests/package_managers/debian/conftest.py",
    "content": "from package_managers.debian.parser import DebianData, Depends\n\n\ndef create_debian_package(\n    package: str = \"test-package\",\n    description: str = \"Test package\",\n    homepage: str = \"\",\n    vcs_git: str = \"\",\n    vcs_browser: str = \"\",\n    directory: str = \"\",\n    filename: str = \"\",\n    depends: list[str] | None = None,\n    build_depends: list[str] | None = None,\n    recommends: list[str] | None = None,\n    suggests: list[str] | None = None,\n) -> DebianData:\n    \"\"\"Helper to create DebianData instances for testing\"\"\"\n\n    debian_data = DebianData()\n    debian_data.package = package\n    debian_data.description = description\n    debian_data.homepage = homepage\n    debian_data.vcs_git = vcs_git\n    debian_data.vcs_browser = vcs_browser\n    debian_data.directory = directory\n    debian_data.filename = filename\n\n    # Convert string dependencies to Depends objects\n    if depends:\n        debian_data.depends = [Depends(package=dep, semver=\"*\") for dep in depends]\n    if build_depends:\n        # build_depends is now list[Depends] like other dependency fields\n        debian_data.build_depends = [\n            Depends(package=dep, semver=\"*\") for dep in build_depends\n        ]\n    if recommends:\n        debian_data.recommends = [\n            Depends(package=dep, semver=\"*\") for dep in recommends\n        ]\n    if suggests:\n        debian_data.suggests = [Depends(package=dep, semver=\"*\") for dep in suggests]\n\n    return debian_data\n"
  },
  {
    "path": "tests/package_managers/debian/test_debian_diff.py",
    "content": "from uuid import uuid4\n\nfrom core.models import URL, LegacyDependency, Package, PackageURL\nfrom core.structs import Cache, URLKey\nfrom package_managers.debian.diff import DebianDiff\nfrom package_managers.debian.main import diff as main_diff\nfrom tests.package_managers.debian.conftest import create_debian_package\n\n\nclass TestDebianDifferentialLoading:\n    \"\"\"Test cases for debian differential loading scenarios\"\"\"\n\n    def test_package_exists_url_update(self, mock_config, mock_logger, mock_db):\n        \"\"\"Tests that Diff updates URLs when the package exists and the URL changes\"\"\"\n\n        # Setup existing package and URL\n        existing_pkg_id = uuid4()\n        existing_url_id = uuid4()\n        existing_package_url_id = uuid4()\n\n        existing_package = Package(\n            id=existing_pkg_id,\n            derived_id=\"debian/url-pkg\",\n            name=\"url-pkg\",\n            package_manager_id=mock_config.pm_config.pm_id,\n            import_id=\"url-pkg\",\n            readme=\"Test package\",\n        )\n\n        existing_url = URL(\n            id=existing_url_id,\n            url=\"https://old-homepage.com\",\n            url_type_id=mock_config.url_types.homepage,\n        )\n\n        existing_package_url = PackageURL(\n            id=existing_package_url_id,\n            package_id=existing_pkg_id,\n            url_id=existing_url_id,\n        )\n\n        # Create cache\n        cache = Cache(\n            package_map={\"url-pkg\": existing_package},\n            url_map={\n                URLKey(\n                    \"https://old-homepage.com\", mock_config.url_types.homepage\n                ): existing_url\n            },\n            package_urls={existing_pkg_id: {existing_package_url}},\n            dependencies={},\n        )\n\n        # Create package data with new URL\n        new_pkg_data = create_debian_package(\n            package=\"url-pkg\",\n            homepage=\"https://new-homepage.com\",\n        )\n        new_urls = {}  # this tracks all the new URLs we've created so far\n\n        # Test the diff\n        diff = DebianDiff(mock_config, cache, mock_db, mock_logger)\n        resolved_urls = diff.diff_url(\"url-pkg\", new_pkg_data, new_urls)\n        new_links, _ = diff.diff_pkg_url(existing_pkg_id, resolved_urls)\n\n        # Assertions\n        assert len(new_links) == 1  # New URL should be created\n        new_link = new_links[0]\n        assert new_link.package_id == existing_pkg_id\n\n        # The URL should be created in new_urls dict and the link should reference it\n        assert len(new_urls) == 1  # One new URL should be created\n        new_url_key = next(iter(new_urls.keys()))\n        new_url = new_urls[new_url_key]\n        assert new_link.url_id == new_url.id  # Link should reference the new URL\n        assert new_url_key.url == \"https://new-homepage.com\"\n        assert new_url_key.url_type_id == mock_config.url_types.homepage\n\n    def test_package_exists_dependency_change(self, mock_config, mock_logger, mock_db):\n        \"\"\"\n        Tests that diff correctly records:\n\n          - New dependency\n          - Changes to existing dependencies\n          - Removed dependencies\n        \"\"\"\n\n        # Setup existing package and dependencies\n        existing_pkg_id = uuid4()\n        dep1_id = uuid4()\n        dep2_id = uuid4()\n        dep3_id = uuid4()\n\n        existing_import_id = \"debian/dep-pkg\"\n        existing_package = Package(\n            id=existing_pkg_id,\n            derived_id=existing_import_id,\n            name=\"dep-pkg\",\n            package_manager_id=mock_config.pm_config.pm_id,\n            import_id=existing_import_id,\n            readme=\"\",\n        )\n\n        # Create dependency packages\n        dep1_pkg = Package(\n            id=dep1_id, derived_id=\"debian/dep1\", name=\"dep1\", import_id=\"debian/dep1\"\n        )\n        dep2_pkg = Package(\n            id=dep2_id, derived_id=\"debian/dep2\", name=\"dep2\", import_id=\"debian/dep2\"\n        )\n        dep3_pkg = Package(\n            id=dep3_id, derived_id=\"debian/dep3\", name=\"dep3\", import_id=\"debian/dep3\"\n        )\n\n        # Create existing dependencies (dep1 as runtime, dep2 as build)\n        existing_dep1 = LegacyDependency(\n            package_id=existing_pkg_id,\n            dependency_id=dep1_id,\n            dependency_type_id=mock_config.dependency_types.runtime,\n        )\n        existing_dep2 = LegacyDependency(\n            package_id=existing_pkg_id,\n            dependency_id=dep2_id,\n            dependency_type_id=mock_config.dependency_types.build,\n        )\n\n        # Create cache\n        cache = Cache(\n            package_map={\n                existing_import_id: existing_package,\n                \"debian/dep1\": dep1_pkg,\n                \"debian/dep2\": dep2_pkg,\n                \"debian/dep3\": dep3_pkg,\n            },\n            url_map={},\n            package_urls={},\n            dependencies={existing_pkg_id: {existing_dep1, existing_dep2}},\n        )\n\n        # Create new package data with changed dependencies\n        # Remove dep2, keep dep1, add dep3 as runtime\n        new_pkg_data = create_debian_package(\n            package=\"dep-pkg\",\n            depends=[\"dep1\", \"dep3\"],  # runtime deps\n            build_depends=[],  # no build deps (removes dep2)\n        )\n\n        # Test the diff\n        diff = DebianDiff(mock_config, cache, mock_db, mock_logger)\n        new_deps, removed_deps = diff.diff_deps(existing_import_id, new_pkg_data)\n\n        # Assertions\n        assert len(new_deps) == 1  # dep3 should be added\n        assert new_deps[0].dependency_id == dep3_id\n        assert new_deps[0].dependency_type_id == mock_config.dependency_types.runtime\n\n        assert len(removed_deps) == 1  # dep2 should be removed\n        assert removed_deps[0].dependency_id == dep2_id\n        assert removed_deps[0].dependency_type_id == mock_config.dependency_types.build\n\n    def test_completely_new_package(self, mock_config, mock_logger, mock_db):\n        \"\"\"Tests the addition of completely new packages & new URLs\"\"\"\n\n        # Create empty cache (no existing packages)\n        cache = Cache(package_map={}, url_map={}, package_urls={}, dependencies={})\n\n        # Create new package data\n        new_pkg_data = create_debian_package(\n            package=\"new-pkg\",\n            description=\"A new package\",\n            homepage=\"https://github.com/example/new-pkg\",\n            depends=[\"some-dep\"],\n            build_depends=[\"build-tool\"],\n        )\n\n        # Test the diff\n        diff = DebianDiff(mock_config, cache, mock_db, mock_logger)\n        pkg_id, pkg_obj, update_payload = diff.diff_pkg(\"debian/new-pkg\", new_pkg_data)\n\n        # Assertions\n        assert pkg_obj is not None  # New package should be created\n        assert pkg_obj.derived_id == \"debian/new-pkg\"\n        assert pkg_obj.name == \"new-pkg\"\n        assert pkg_obj.import_id == \"debian/new-pkg\"\n        assert pkg_obj.package_manager_id == mock_config.pm_config.pm_id\n        assert pkg_obj.readme == \"A new package\"\n        assert update_payload == {}  # No updates for new package\n\n        # Test URL creation\n        new_urls = {}\n        resolved_urls = diff.diff_url(\"new-pkg\", new_pkg_data, new_urls)\n        new_links, updated_links = diff.diff_pkg_url(pkg_id, resolved_urls)\n\n        # Should create URL for homepage\n        assert len(new_urls) >= 1  # At least homepage\n        assert len(new_links) >= 1  # At least homepage link\n        assert len(updated_links) == 0  # No existing links to update\n\n        # Check that homepage URL was created\n        homepage_url_found = False\n        for url_key, _url in new_urls.items():\n            if url_key.url_type_id == mock_config.url_types.homepage:\n                assert url_key.url == \"https://github.com/example/new-pkg\"\n                homepage_url_found = True\n                break\n        assert homepage_url_found\n\n    def test_no_changes_scenario(self, mock_config, mock_logger, mock_db):\n        \"\"\"Tests where package exists but has no changes\"\"\"\n\n        # Setup existing package\n        existing_pkg_id = uuid4()\n        existing_package = Package(\n            id=existing_pkg_id,\n            derived_id=\"debian/unchanged-pkg\",\n            name=\"unchanged-pkg\",\n            package_manager_id=mock_config.pm_config.pm_id,\n            import_id=\"unchanged-pkg\",\n            readme=\"Unchanged description\",\n        )\n\n        cache = Cache(\n            package_map={\"unchanged-pkg\": existing_package},\n            url_map={},\n            package_urls={},\n            dependencies={},\n        )\n\n        # Create package data with same description\n        pkg_data = create_debian_package(\n            package=\"unchanged-pkg\", description=\"Unchanged description\"\n        )\n\n        # Test the diff\n        diff = DebianDiff(mock_config, cache, mock_db, mock_logger)\n        pkg_id, pkg_obj, update_payload = diff.diff_pkg(\"unchanged-pkg\", pkg_data)\n\n        # Assertions\n        assert pkg_id == existing_pkg_id\n        assert pkg_obj is None  # No new package\n        assert update_payload is None  # No changes\n\n    def test_package_description_update(self, mock_config, mock_logger, mock_db):\n        \"\"\"Test scenario where package exists but description has changed\"\"\"\n\n        # Setup existing package\n        existing_pkg_id = uuid4()\n        existing_package = Package(\n            id=existing_pkg_id,\n            derived_id=\"debian/desc-pkg\",\n            name=\"desc-pkg\",\n            package_manager_id=mock_config.pm_config.pm_id,\n            import_id=\"desc-pkg\",\n            readme=\"Old description\",\n        )\n\n        cache = Cache(\n            package_map={\"desc-pkg\": existing_package},\n            url_map={},\n            package_urls={},\n            dependencies={},\n        )\n\n        # Create package data with new description\n        pkg_data = create_debian_package(\n            package=\"desc-pkg\", description=\"New description\"\n        )\n\n        # Test the diff\n        diff = DebianDiff(mock_config, cache, mock_db, mock_logger)\n        pkg_id, pkg_obj, update_payload = diff.diff_pkg(\"desc-pkg\", pkg_data)\n\n        # Assertions\n        assert pkg_id == existing_pkg_id\n        assert pkg_obj is None  # No new package\n        assert update_payload is not None  # Should have changes\n        assert update_payload[\"id\"] == existing_pkg_id\n        assert update_payload[\"readme\"] == \"New description\"\n\n    def test_missing_dependency_handling(self, mock_config, mock_logger, mock_db):\n        \"\"\"Tests the case that we DON'T add dependencies for new packages\"\"\"\n\n        existing_pkg_id = uuid4()\n        existing_package = Package(\n            id=existing_pkg_id,\n            derived_id=\"debian/missing-dep-pkg\",\n            name=\"missing-dep-pkg\",\n            import_id=\"missing-dep-pkg\",\n        )\n\n        cache = Cache(\n            package_map={\"missing-dep-pkg\": existing_package},\n            url_map={},\n            package_urls={},\n            dependencies={},\n        )\n\n        # Create package with dependency that doesn't exist in cache\n        pkg_data = create_debian_package(\n            package=\"missing-dep-pkg\", depends=[\"non-existent-dep\"]\n        )\n\n        diff = DebianDiff(mock_config, cache, mock_db, mock_logger)\n        new_deps, removed_deps = diff.diff_deps(\"missing-dep-pkg\", pkg_data)\n\n        # Should handle gracefully - no deps added for missing packages\n        assert len(new_deps) == 0\n        assert len(removed_deps) == 0\n\n    def test_dependency_type_priority_no_change(\n        self, mock_config, mock_logger, mock_db\n    ):\n        \"\"\"\n        Scenario:\n          - p1 has runtime dependency to p2 in cache\n          - p1 depends on p2 as both runtime and build in parsed data\n\n        Expect no change (runtime has priority).\n        \"\"\"\n\n        # Setup existing package and dependencies\n        p1_id = uuid4()\n        p2_id = uuid4()\n\n        p1_pkg = Package(id=p1_id, derived_id=\"debian/p1\", name=\"p1\", import_id=\"p1\")\n        p2_pkg = Package(id=p2_id, derived_id=\"debian/p2\", name=\"p2\", import_id=\"p2\")\n\n        # Existing runtime dependency in cache\n        existing_runtime_dep = LegacyDependency(\n            package_id=p1_id,\n            dependency_id=p2_id,\n            dependency_type_id=mock_config.dependency_types.runtime,\n        )\n\n        cache = Cache(\n            package_map={\"debian/p1\": p1_pkg, \"debian/p2\": p2_pkg},\n            url_map={},\n            package_urls={},\n            dependencies={p1_id: {existing_runtime_dep}},\n        )\n\n        # Parsed data has p2 as both runtime and build dependency\n        new_pkg_data = create_debian_package(\n            package=\"p1\",\n            depends=[\"p2\"],  # runtime\n            build_depends=[\"p2\"],  # build\n        )\n\n        diff = DebianDiff(mock_config, cache, mock_db, mock_logger)\n        new_deps, removed_deps = diff.diff_deps(\"debian/p1\", new_pkg_data)\n\n        # Should have no changes - runtime priority means no change needed\n        assert len(new_deps) == 0\n        assert len(removed_deps) == 0\n\n    def test_dependency_type_change_runtime_to_build(\n        self, mock_config, mock_logger, mock_db\n    ):\n        \"\"\"\n        Scenario\n          - p1 has runtime dependency to p2 in cache\n          - p1 has build dependency to p2 in parsed data.\n\n        Expect removed runtime dependency and new build dependency\n        \"\"\"\n\n        p1_id = uuid4()\n        p2_id = uuid4()\n\n        p1_pkg = Package(id=p1_id, derived_id=\"debian/p1\", name=\"p1\", import_id=\"p1\")\n        p2_pkg = Package(id=p2_id, derived_id=\"debian/p2\", name=\"p2\", import_id=\"p2\")\n\n        # Existing runtime dependency\n        existing_runtime_dep = LegacyDependency(\n            package_id=p1_id,\n            dependency_id=p2_id,\n            dependency_type_id=mock_config.dependency_types.runtime,\n        )\n\n        cache = Cache(\n            package_map={\"debian/p1\": p1_pkg, \"debian/p2\": p2_pkg},\n            url_map={},\n            package_urls={},\n            dependencies={p1_id: {existing_runtime_dep}},\n        )\n\n        # Parsed data only has build dependency\n        new_pkg_data = create_debian_package(\n            package=\"p1\",\n            depends=[],  # no runtime deps\n            build_depends=[\"p2\"],  # only build\n        )\n\n        diff = DebianDiff(mock_config, cache, mock_db, mock_logger)\n        new_deps, removed_deps = diff.diff_deps(\"debian/p1\", new_pkg_data)\n\n        # Should remove runtime and add build\n        assert len(removed_deps) == 1\n        assert removed_deps[0].dependency_id == p2_id\n        assert (\n            removed_deps[0].dependency_type_id == mock_config.dependency_types.runtime\n        )\n\n        assert len(new_deps) == 1\n        assert new_deps[0].dependency_id == p2_id\n        assert new_deps[0].dependency_type_id == mock_config.dependency_types.build\n\n    def test_dependency_type_change_build_to_runtime(\n        self, mock_config, mock_logger, mock_db\n    ):\n        \"\"\"\n        Scenario:\n          - p1 has build dependency to p2 in cache\n          - p1 has runtime dependency to p2 in parsed data.\n\n        Expect removed build dependency and new runtime dependency\n        \"\"\"\n\n        p1_id = uuid4()\n        p2_id = uuid4()\n\n        p1_pkg = Package(id=p1_id, derived_id=\"debian/p1\", name=\"p1\", import_id=\"p1\")\n        p2_pkg = Package(id=p2_id, derived_id=\"debian/p2\", name=\"p2\", import_id=\"p2\")\n\n        # Existing build dependency\n        existing_build_dep = LegacyDependency(\n            package_id=p1_id,\n            dependency_id=p2_id,\n            dependency_type_id=mock_config.dependency_types.build,\n        )\n\n        cache = Cache(\n            package_map={\"debian/p1\": p1_pkg, \"debian/p2\": p2_pkg},\n            url_map={},\n            package_urls={},\n            dependencies={p1_id: {existing_build_dep}},\n        )\n\n        # Parsed data only has runtime dependency\n        new_pkg_data = create_debian_package(\n            package=\"p1\",\n            depends=[\"p2\"],  # runtime\n            build_depends=[],  # no build deps\n        )\n\n        diff = DebianDiff(mock_config, cache, mock_db, mock_logger)\n        new_deps, removed_deps = diff.diff_deps(\"debian/p1\", new_pkg_data)\n\n        # Should remove build and add runtime\n        assert len(removed_deps) == 1\n        assert removed_deps[0].dependency_id == p2_id\n        assert removed_deps[0].dependency_type_id == mock_config.dependency_types.build\n\n        assert len(new_deps) == 1\n        assert new_deps[0].dependency_id == p2_id\n        assert new_deps[0].dependency_type_id == mock_config.dependency_types.runtime\n\n    def test_dependency_type_priority_new_package(\n        self, mock_config, mock_logger, mock_db\n    ):\n        \"\"\"\n        Scenario:\n          - p1 has no dependencies to p2 in cache\n          - p1 has both runtime and build dependencies to p2 in parsed data\n\n        Expect one new runtime dependency (priority over build).\n        \"\"\"\n\n        p1_id = uuid4()\n        p2_id = uuid4()\n\n        p1_pkg = Package(id=p1_id, derived_id=\"debian/p1\", name=\"p1\", import_id=\"p1\")\n        p2_pkg = Package(id=p2_id, derived_id=\"debian/p2\", name=\"p2\", import_id=\"p2\")\n\n        cache = Cache(\n            package_map={\"debian/p1\": p1_pkg, \"debian/p2\": p2_pkg},\n            url_map={},\n            package_urls={},\n            dependencies={},  # No existing dependencies\n        )\n\n        # Parsed data has both runtime and build dependencies to p2\n        new_pkg_data = create_debian_package(\n            package=\"p1\",\n            depends=[\"p2\"],  # runtime\n            build_depends=[\"p2\"],  # build\n        )\n\n        diff = DebianDiff(mock_config, cache, mock_db, mock_logger)\n        new_deps, removed_deps = diff.diff_deps(\"debian/p1\", new_pkg_data)\n\n        # Should only create one new dependency - runtime (higher priority)\n        assert len(removed_deps) == 0\n        assert len(new_deps) == 1\n        assert new_deps[0].dependency_id == p2_id\n        assert new_deps[0].dependency_type_id == mock_config.dependency_types.runtime\n\n    def test_debian_specific_dependencies(self, mock_config, mock_logger, mock_db):\n        \"\"\"Test Debian-specific dependency types: recommends, suggests\"\"\"\n\n        p1_id = uuid4()\n        p2_id = uuid4()\n        p3_id = uuid4()\n\n        p1_pkg = Package(id=p1_id, derived_id=\"debian/p1\", name=\"p1\")\n        p2_pkg = Package(id=p2_id, derived_id=\"debian/p2\", name=\"p2\")\n        p3_pkg = Package(id=p3_id, derived_id=\"debian/p3\", name=\"p3\")\n\n        cache = Cache(\n            package_map={\"debian/p1\": p1_pkg, \"debian/p2\": p2_pkg, \"debian/p3\": p3_pkg},\n            url_map={},\n            package_urls={},\n            dependencies={},\n        )\n\n        # Parsed data with recommends and suggests (mapped to runtime)\n        new_pkg_data = create_debian_package(\n            package=\"p1\",\n            recommends=[\"p2\"],\n            suggests=[\"p3\"],\n        )\n\n        diff = DebianDiff(mock_config, cache, mock_db, mock_logger)\n        new_deps, removed_deps = diff.diff_deps(\"debian/p1\", new_pkg_data)\n\n        # Should create runtime dependencies for both recommends and suggests\n        assert len(removed_deps) == 0\n        assert len(new_deps) == 2\n\n        # Both should be runtime dependencies\n        for dep in new_deps:\n            assert dep.dependency_type_id == mock_config.dependency_types.runtime\n            assert dep.dependency_id in [p2_id, p3_id]\n\n\nclass TestDebianDiffFunction:\n    \"\"\"Test cases for the main.diff function\"\"\"\n\n    def test_duplicate_package_paragraphs(self, mock_config, mock_logger, mock_db):\n        \"\"\"Tests the case when the Debian Packages file contains duplicate packages\"\"\"\n        d1 = Package(id=uuid4(), derived_id=\"debian/d1\", name=\"d1\", import_id=\"d1\")\n        d2 = Package(id=uuid4(), derived_id=\"debian/d2\", name=\"d2\", import_id=\"d2\")\n        p1 = create_debian_package(\n            package=\"linux-doc\", homepage=\"homepage.org\", depends=[\"d1\"]\n        )\n        p2 = create_debian_package(\n            package=\"linux-doc\", homepage=\"homepage.org\", depends=[\"d2\"]\n        )\n        cache = Cache(\n            package_map={\"debian/d1\": d1, \"debian/d2\": d2},\n            url_map={},\n            package_urls={},\n            dependencies={},\n        )\n\n        data = [p1, p2]\n\n        result = main_diff(data, mock_config, cache, mock_db, mock_logger)\n\n        assert len(result.new_packages) == 1\n        assert len(result.new_package_urls) == 1\n        assert len(result.new_deps) == 0  # bc we don't load dependencies of new pkgs\n"
  },
  {
    "path": "tests/package_managers/debian/test_debian_parser.py",
    "content": "\"\"\"\nTest Debian package parser functionality.\n\nThis module tests the DebianParser class which parses Debian package\nand source entries from Packages and Sources files.\n\"\"\"\n\nimport pytest\n\nfrom package_managers.debian.parser import DebianParser\n\n\n@pytest.fixture\ndef simple_package():\n    return \"\"\"Package: 0ad\nVersion: 0.0.26-1\nInstalled-Size: 19162\nMaintainer: Debian Games Team <pkg-games-devel@lists.alioth.debian.org>\nArchitecture: amd64\nDepends: 0ad-data (>= 0.0.26), 0ad-data-common (>= 0.0.26), libc6 (>= 2.29), libcurl4 (>= 7.16.2), libenet7 (>= 1.3.13), libgloox18, libjsoncpp25 (>= 1.9.5), libminiupnpc17 (>= 1.9.20140610), libnspr4 (>= 2:4.9.2), libnss3 (>= 2:3.22)\nRecommends: fonts-freefont-ttf, fonts-texgyre\nSuggests: 0ad-dbg\nDescription: Real-time strategy game of ancient warfare\nHomepage: https://play0ad.com/\nSection: games\nPriority: optional\nFilename: pool/main/0/0ad/0ad_0.0.26-1_amd64.deb\nSize: 6050744\nMD5sum: a777ddf01c18dbdef15c589f8325d7a3\nSHA256: 9da19833c1a51e890aa8a11f82ec1e383c0e79410c3d2f6845fd2ec3e23249b8\n\n\n\"\"\"\n\n\n@pytest.fixture\ndef simple_source():\n    return \"\"\"Package: 0ad\nBinary: 0ad, 0ad-dbg, 0ad-data, 0ad-data-common\nVersion: 0.0.26-1\nMaintainer: Debian Games Team <pkg-games-devel@lists.alioth.debian.org>\nUploaders: Vincent Cheng <vcheng@debian.org>, Euan Kemp <euank@euank.com>\nBuild-Depends: debhelper-compat (= 13), cmake, dpkg-dev (>= 1.15.5), libboost-dev, libenet-dev (>= 1.3), libopenal-dev, libpng-dev, libsdl2-dev, libtiff5-dev, libvorbis-dev, libxcursor-dev, pkg-config, zlib1g-dev, libcurl4-gnutls-dev, libgloox-dev, libjsoncpp-dev, libminiupnpc-dev, libnspr4-dev, libnss3-dev, libsodium-dev, libwxgtk3.0-gtk3-dev | libwxgtk3.0-dev, python3, python3-dev, libxml2-dev, rust-gdb [amd64 i386 ppc64el]\nArchitecture: any all\nStandards-Version: 4.5.1\nFormat: 3.0 (quilt)\nFiles:\n 2fc0f38b8a4cf56fea7040fcf5f79ca3 2414 0ad_0.0.26-1.dsc\n 35ca57e781448c69ba31323313e972af 31463733 0ad_0.0.26.orig.tar.xz\n f78de44c8a9c32e6be3ae99f2747c330 71948 0ad_0.0.26-1.debian.tar.xz\nVcs-Browser: https://salsa.debian.org/games-team/0ad\nVcs-Git: https://salsa.debian.org/games-team/0ad.git\nDirectory: pool/main/0/0ad\nPriority: optional\nSection: games\nTestsuite: autopkgtest\nTestsuite-Triggers: g++, pyrex\n\n\n\"\"\"\n\n\n@pytest.fixture\ndef multiline_binary():\n    \"\"\"Fixture for binary fields, specifically multi-lines ones\"\"\"\n    return \"\"\"\nPackage: binutils\nBinary: binutils-for-host, binutils-for-build,\n binutils-ia64-linux-gnu-dbg, binutils-m68k-linux-gnu,\n binutils-mips64el-linux-gnuabin32-dbg, binutils-mipsisa64r6-linux-gnuabin32,\n binutils-mipsisa64r6el-linux-gnuabi64-dbg\n\n\"\"\"\n\n\n@pytest.fixture\ndef build_depends():\n    \"\"\"Fixture for all kinds of build depends.\"\"\"\n    return \"\"\"\nPackage: example\nBuild-Depends: gcc-11-source (>= 11.3.0-11~), gawk, lib32gcc1-amd64-cross [amd64 arm64 i386 ppc64el x32], g++-11, gm2-11 [!powerpc !ppc64 !x32]\n\"\"\"\n\n\n@pytest.mark.parser\nclass TestDebianParser:\n    \"\"\"Test the Debian parser functionality.\"\"\"\n\n    def test_build_depends(self, build_depends):\n        \"\"\"Test parsing build depends.\"\"\"\n        parser = DebianParser(build_depends)\n        sources = list(parser.parse())\n        assert len(sources) == 1\n        source = sources[0]\n        assert len(source.build_depends) == 5\n        assert any(dep.package == \"gcc-11-source\" for dep in source.build_depends)\n        assert any(dep.package == \"gawk\" for dep in source.build_depends)\n        assert any(\n            dep.package == \"lib32gcc1-amd64-cross\" for dep in source.build_depends\n        )\n        assert any(dep.package == \"g++-11\" for dep in source.build_depends)\n        assert any(dep.package == \"gm2-11\" for dep in source.build_depends)\n\n    def test_multiline_binary(self, multiline_binary):\n        \"\"\"Test handling of multiline binaries.\"\"\"\n        parser = DebianParser(multiline_binary)\n        sources = list(parser.parse())\n        assert len(sources) == 1\n        source = sources[0]\n        assert source.package == \"binutils\"\n        assert source.binary == [\n            \"binutils-for-host\",\n            \"binutils-for-build\",\n            \"binutils-ia64-linux-gnu-dbg\",\n            \"binutils-m68k-linux-gnu\",\n            \"binutils-mips64el-linux-gnuabin32-dbg\",\n            \"binutils-mipsisa64r6-linux-gnuabin32\",\n            \"binutils-mipsisa64r6el-linux-gnuabi64-dbg\",\n        ]\n\n    def test_parse_package_data(self, simple_package):\n        \"\"\"Test parsing a typical package entry from Packages file.\"\"\"\n        # Sample package data from a Packages file\n        package_data = simple_package\n\n        # Parse the package data\n        parser = DebianParser(package_data)\n        packages = list(parser.parse())\n\n        # Validate we have one package\n        assert len(packages) == 1\n        package = packages[0]\n\n        # Test basic fields\n        assert package.package == \"0ad\"\n        assert package.version == \"0.0.26-1\"\n        assert package.installed_size == 19162\n        assert package.architecture == \"amd64\"\n\n        # Test maintainer parsing\n        assert package.maintainer.name == \"Debian Games Team\"\n        assert package.maintainer.email == \"pkg-games-devel@lists.alioth.debian.org\"\n\n        # Test dependency parsing\n        assert len(package.depends) == 10\n        assert package.depends[0].package == \"0ad-data\"\n        assert package.depends[0].semver == \">= 0.0.26\"\n\n        # Test recommends parsing\n        assert len(package.recommends) == 2\n        assert package.recommends[0].package == \"fonts-freefont-ttf\"\n\n        # Test suggests parsing\n        assert len(package.suggests) == 1\n        assert package.suggests[0].package == \"0ad-dbg\"\n\n    def test_parse_source_data(self, simple_source):\n        \"\"\"Test parsing a typical source entry from Sources file.\"\"\"\n        source_data = simple_source\n        parser = DebianParser(source_data)\n        sources = list(parser.parse())\n\n        # Validate we have one source package\n        assert len(sources) == 1\n        source = sources[0]\n\n        # Test basic fields\n        assert source.package == \"0ad\"\n        assert source.version == \"0.0.26-1\"\n\n        # Test binary field\n        assert isinstance(source.binary, list)  # Fixed: binary should be a list\n        assert \"0ad\" in source.binary\n        assert \"0ad-dbg\" in source.binary\n        assert \"0ad-data\" in source.binary\n        assert \"0ad-data-common\" in source.binary\n\n        # Test maintainer parsing\n        assert source.maintainer.name == \"Debian Games Team\"\n        assert source.maintainer.email == \"pkg-games-devel@lists.alioth.debian.org\"\n\n        # Test uploaders parsing\n        assert len(source.uploaders) == 2\n        assert source.uploaders[0].name == \"Vincent Cheng\"\n        assert source.uploaders[0].email == \"vcheng@debian.org\"\n        assert source.uploaders[1].name == \"Euan Kemp\"\n        assert source.uploaders[1].email == \"euank@euank.com\"\n\n        # Test build depends parsing\n        assert len(source.build_depends) == 25\n        assert any(dep.package == \"debhelper-compat\" for dep in source.build_depends)\n\n        # Test other source fields\n        assert source.format == \"3.0 (quilt)\"\n        assert source.vcs_browser == \"salsa.debian.org/games-team/0ad\"\n        assert source.vcs_git == \"salsa.debian.org/games-team/0ad\"\n        assert source.testsuite == \"autopkgtest\"\n        assert source.testsuite_triggers == \"g++, pyrex\"\n"
  },
  {
    "path": "tests/package_managers/debian/test_debian_sources.py",
    "content": "from package_managers.debian.main import (\n    build_package_to_source_mapping,\n    enrich_package_with_source,\n)\nfrom tests.package_managers.debian.conftest import create_debian_package\n\n\nclass TestPackageSourceMapping:\n    \"\"\"Test cases for package to source mapping functionality\"\"\"\n\n    def test_build_package_to_source_mapping_with_binary_list(\n        self, tmp_path, mock_logger\n    ):\n        \"\"\"Test building mapping when source has explicit binary list\"\"\"\n\n        # Create a test sources file\n        sources_content = \"\"\"Package: test-source\nBinary: test-pkg1, test-pkg2, test-pkg3\nVcs-Git: https://github.com/test/test-source.git\nHomepage: https://example.com/test-source\n\nPackage: another-source\nBinary: another-pkg\nVcs-Browser: https://github.com/test/another-source\n\"\"\"\n\n        sources_file = tmp_path / \"sources\"\n        sources_file.write_text(sources_content)\n\n        # Build mapping\n        mapping = build_package_to_source_mapping(str(sources_file), mock_logger)\n\n        # Verify mapping\n        assert len(mapping) == 4  # 3 packages from first source + 1 from second\n        assert \"test-pkg1\" in mapping\n        assert \"test-pkg2\" in mapping\n        assert \"test-pkg3\" in mapping\n        assert \"another-pkg\" in mapping\n\n        # Verify source data is correctly associated\n        assert mapping[\"test-pkg1\"].package == \"test-source\"\n        # URLs are normalized by the parser - expect normalized format\n        assert mapping[\"test-pkg1\"].vcs_git == \"github.com/test/test-source\"\n        assert mapping[\"test-pkg2\"].package == \"test-source\"\n        assert mapping[\"another-pkg\"].package == \"another-source\"\n        assert mapping[\"another-pkg\"].vcs_browser == \"github.com/test/another-source\"\n\n    def test_build_package_to_source_mapping_no_binary_list(\n        self, tmp_path, mock_logger\n    ):\n        \"\"\"Test building mapping when source has no explicit binary list\"\"\"\n\n        # Create a test sources file with no Binary field\n        sources_content = \"\"\"Package: single-source\nVcs-Git: https://github.com/test/single-source.git\nHomepage: https://example.com/single-source\n\"\"\"\n\n        sources_file = tmp_path / \"sources\"\n        sources_file.write_text(sources_content)\n\n        # Build mapping\n        mapping = build_package_to_source_mapping(str(sources_file), mock_logger)\n\n        # Verify mapping - should use source package name as binary name\n        assert len(mapping) == 1\n        assert \"single-source\" in mapping\n        assert mapping[\"single-source\"].package == \"single-source\"\n        # URLs are normalized by the parser - expect normalized format\n        assert mapping[\"single-source\"].vcs_git == \"github.com/test/single-source\"\n\n    def test_enrich_package_with_explicit_source(self, mock_logger):\n        \"\"\"Test enriching package that has explicit source reference\"\"\"\n\n        # Create package data with explicit source reference\n        package_data = create_debian_package(\n            package=\"binary-pkg\",\n            description=\"A binary package\",\n        )\n        package_data.source = \"source-pkg\"\n\n        # Create source mapping\n        source_data = create_debian_package(\n            package=\"source-pkg\",\n            vcs_git=\"github.com/test/source-pkg\",  # Already normalized format\n            homepage=\"example.com/source-pkg\",  # Already normalized format\n            build_depends=[\"build-dep1\", \"build-dep2\"],\n        )\n        source_mapping = {\"binary-pkg\": source_data}\n\n        # Enrich package\n        enriched = enrich_package_with_source(package_data, source_mapping, mock_logger)\n\n        # Verify enrichment\n        assert enriched.package == \"binary-pkg\"\n        assert enriched.description == \"A binary package\"\n        assert enriched.vcs_git == \"github.com/test/source-pkg\"\n        assert enriched.homepage == \"example.com/source-pkg\"\n        assert len(enriched.build_depends) == 2\n\n        build_depend_names = [item.package for item in enriched.build_depends]\n        assert build_depend_names == [\"build-dep1\", \"build-dep2\"]\n\n    def test_enrich_package_no_explicit_source(self, mock_logger):\n        \"\"\"Test enriching package with no explicit source reference\"\"\"\n\n        # Create package data with no explicit source\n        package_data = create_debian_package(\n            package=\"self-source-pkg\",\n            description=\"A self-sourced package\",\n        )\n\n        # Create source mapping with same name as package\n        source_data = create_debian_package(\n            package=\"self-source-pkg\",\n            vcs_browser=\"github.com/test/self-source-pkg\",  # Already normalized format\n            directory=\"pool/main/s/self-source-pkg\",\n        )\n        source_mapping = {\"self-source-pkg\": source_data}\n\n        # Enrich package\n        enriched = enrich_package_with_source(package_data, source_mapping, mock_logger)\n\n        # Verify enrichment\n        assert enriched.package == \"self-source-pkg\"\n        assert enriched.vcs_browser == \"github.com/test/self-source-pkg\"\n        assert enriched.directory == \"pool/main/s/self-source-pkg\"\n\n    def test_enrich_package_missing_source_warning(self, caplog, mock_logger):\n        \"\"\"Test warning when package references missing source\"\"\"\n        from package_managers.debian.main import enrich_package_with_source\n\n        # Create package data with source that doesn't exist in mapping\n        package_data = create_debian_package(\n            package=\"orphan-pkg\",\n            description=\"An orphaned package\",\n        )\n        package_data.source = \"missing-source\"\n\n        # Empty source mapping\n        source_mapping = {}\n\n        # Enrich package (this should log a warning)\n        enriched = enrich_package_with_source(package_data, source_mapping, mock_logger)\n\n        # The warning should be present in the function execution output\n        # Check the logged warning message directly\n        # Note: The warning is logged by our function, so we check the expected behavior\n\n        # Package should remain unchanged\n        assert enriched.package == \"orphan-pkg\"\n        assert enriched.description == \"An orphaned package\"\n        assert not enriched.vcs_git\n        assert not enriched.vcs_browser\n\n    def test_enrich_package_preserves_existing_fields(self, mock_logger):\n        \"\"\"Test that existing package fields are not overwritten\"\"\"\n        # Create package data with existing homepage\n        package_data = create_debian_package(\n            package=\"pkg-with-homepage\",\n            homepage=\"pkg-homepage.com\",  # Normalized format\n        )\n\n        # Create source data with different homepage\n        source_data = create_debian_package(\n            package=\"pkg-with-homepage\",\n            homepage=\"source-homepage.com\",  # Normalized format\n            vcs_git=\"github.com/test/pkg\",  # Normalized format\n        )\n        source_mapping = {\"pkg-with-homepage\": source_data}\n\n        # Enrich package\n        enriched = enrich_package_with_source(package_data, source_mapping, mock_logger)\n\n        # Verify package homepage is preserved, but source info is added\n        assert enriched.homepage == \"pkg-homepage.com\"  # Package value preserved\n        assert enriched.vcs_git == \"github.com/test/pkg\"  # Source value added\n"
  },
  {
    "path": "tests/package_managers/homebrew/conftest.py",
    "content": "from datetime import datetime\nfrom uuid import UUID, uuid4\n\nimport pytest\n\nfrom core.models import Package\nfrom core.structs import Cache\nfrom package_managers.homebrew.diff import Diff\nfrom package_managers.homebrew.structs import Actual\n\n\n@pytest.fixture\ndef package_ids() -> dict[str, UUID]:\n    \"\"\"Fixture providing consistent package IDs for testing.\"\"\"\n    return {\"foo\": uuid4(), \"bar\": uuid4(), \"baz\": uuid4(), \"qux\": uuid4()}\n\n\n@pytest.fixture\ndef packages(package_ids) -> dict[str, Package]:\n    \"\"\"Fixture providing test packages.\"\"\"\n    return {\n        \"foo\": Package(\n            id=package_ids[\"foo\"],\n            name=\"foo\",\n            package_manager_id=1,\n            import_id=\"foo\",\n            created_at=datetime.now(),\n            updated_at=datetime.now(),\n        ),\n        \"bar\": Package(\n            id=package_ids[\"bar\"],\n            name=\"bar\",\n            package_manager_id=1,\n            import_id=\"bar\",\n            created_at=datetime.now(),\n            updated_at=datetime.now(),\n        ),\n        \"baz\": Package(\n            id=package_ids[\"baz\"],\n            name=\"baz\",\n            package_manager_id=1,\n            import_id=\"baz\",\n            created_at=datetime.now(),\n            updated_at=datetime.now(),\n        ),\n        \"qux\": Package(\n            id=package_ids[\"qux\"],\n            name=\"qux\",\n            package_manager_id=1,\n            import_id=\"qux\",\n            created_at=datetime.now(),\n            updated_at=datetime.now(),\n        ),\n    }\n\n\n@pytest.fixture\ndef diff_instance(mock_config):\n    \"\"\"\n    Factory fixture to create Diff instances with specific cache configurations.\n\n    Returns a function that creates Diff instances.\n    \"\"\"\n\n    def create_diff(\n        package_map, dependencies=None, url_map=None, package_urls=None\n    ) -> Diff:\n        cache = Cache(\n            package_map=package_map,\n            url_map=url_map or {},\n            package_urls=package_urls or {},\n            dependencies=dependencies or {},\n        )\n        return Diff(mock_config, cache)\n\n    return create_diff\n\n\n@pytest.fixture\ndef homebrew_formula():\n    \"\"\"\n    Factory fixture to create Actual homebrew formula objects.\n\n    Returns a function that creates Actual objects.\n    \"\"\"\n\n    def create_formula(\n        formula_name,\n        dependencies=None,\n        build_dependencies=None,\n        test_dependencies=None,\n        recommended_dependencies=None,\n        optional_dependencies=None,\n    ):\n        return Actual(\n            formula=formula_name,\n            description=\"Test formula\",\n            license=\"MIT\",\n            homepage=\"\",\n            source=\"\",\n            repository=\"\",\n            dependencies=dependencies or [],\n            build_dependencies=build_dependencies or [],\n            test_dependencies=test_dependencies or [],\n            recommended_dependencies=recommended_dependencies or [],\n            optional_dependencies=optional_dependencies or [],\n        )\n\n    return create_formula\n"
  },
  {
    "path": "tests/package_managers/homebrew/test_homebrew_diff_deps.py",
    "content": "\"\"\"\nTest the diff_deps functionality for the homebrew package manager.\n\nThis module tests the Diff.diff_deps method which determines what dependencies\nneed to be added or removed when processing homebrew formula updates.\n\"\"\"\n\nfrom datetime import datetime\nfrom uuid import UUID, uuid4\n\nimport pytest\n\nfrom core.models import LegacyDependency, Package\nfrom package_managers.homebrew.diff import Diff\nfrom package_managers.homebrew.structs import Actual\n\n\n@pytest.mark.transformer\nclass TestDiffDeps:\n    \"\"\"Tests for the diff_deps method in the Diff class.\"\"\"\n\n    def test_new_package_not_in_cache(self, packages, diff_instance, homebrew_formula):\n        \"\"\"\n        If the package is not even in the package cache, that means it is new.\n        Since we won't know the ID of the package during dependency loading,\n        we're going to continue to the next package and write a warning.\n        \"\"\"\n        # Create cache without the package we'll look for\n        diff = diff_instance(\n            package_map={\"bar\": packages[\"bar\"], \"baz\": packages[\"baz\"]}\n        )\n\n        # Create an Actual package that's not in the cache\n        new_pkg = homebrew_formula(\n            \"new_package\", dependencies=[\"baz\"], build_dependencies=[\"bar\"]\n        )\n\n        # Execute\n        new_deps, removed_deps = diff.diff_deps(new_pkg)\n\n        # Assert\n        assert len(new_deps) == 0, \"No new deps for new pkg\"\n        assert len(removed_deps) == 0, \"No removed deps for new pkg\"\n\n    def test_existing_package_adding_dependency(\n        self,\n        packages: dict[str, Package],\n        package_ids: dict[str, UUID],\n        diff_instance: Diff,\n        homebrew_formula: Actual,\n        mock_config,\n    ):\n        \"\"\"Test diff_deps when adding a new dependency to an existing package.\"\"\"\n        # Create existing dependency\n        existing_dep = LegacyDependency(\n            id=uuid4(),\n            package_id=package_ids[\"foo\"],\n            dependency_id=package_ids[\"bar\"],\n            dependency_type_id=mock_config.dependency_types.runtime,\n            created_at=datetime.now(),\n            updated_at=datetime.now(),\n        )\n\n        # Create diff with existing dependency\n        diff: Diff = diff_instance(\n            package_map=packages, dependencies={package_ids[\"foo\"]: {existing_dep}}\n        )\n        for k, s in diff.caches.dependencies.items():\n            print(f\"Package: {k}\")\n            for v in s:\n                print(v.dependency_id)\n                print(v.dependency_type_id)\n\n        # Create formula with existing dependency plus a new one\n        pkg = homebrew_formula(\n            \"foo\",\n            dependencies=[\"bar\"],  # existing dependency\n            build_dependencies=[\"baz\"],  # new dependency\n        )\n\n        # Execute\n        new_deps, removed_deps = diff.diff_deps(pkg)\n\n        for dep in new_deps:\n            print(\n                f\"{dep.package_id} -> {dep.dependency_id} as {dep.dependency_type_id}\"\n            )\n\n        # Assert\n        assert len(new_deps) == 1, \"One new dependency should be added\"\n        assert len(removed_deps) == 0, \"No dependencies should be removed\"\n\n        # Verify new dep is a build dep on baz\n        new_dep = new_deps[0]\n        assert new_dep.package_id == package_ids[\"foo\"]\n        assert new_dep.dependency_id == package_ids[\"baz\"]\n        assert new_dep.dependency_type_id == mock_config.dependency_types.build\n\n    def test_existing_package_removing_dependency(\n        self, packages, package_ids, diff_instance, homebrew_formula, mock_config\n    ):\n        \"\"\"Test diff_deps when removing a dependency from an existing package.\"\"\"\n        # Create existing dependencies\n        existing_dep = LegacyDependency(\n            id=uuid4(),\n            package_id=package_ids[\"foo\"],\n            dependency_id=package_ids[\"bar\"],\n            dependency_type_id=mock_config.dependency_types.runtime,\n            created_at=datetime.now(),\n            updated_at=datetime.now(),\n        )\n\n        to_be_removed_dep = LegacyDependency(\n            id=uuid4(),\n            package_id=package_ids[\"foo\"],\n            dependency_id=package_ids[\"baz\"],\n            dependency_type_id=mock_config.dependency_types.build,\n            created_at=datetime.now(),\n            updated_at=datetime.now(),\n        )\n\n        # Create diff with both dependencies\n        diff = diff_instance(\n            package_map=packages,\n            dependencies={package_ids[\"foo\"]: {existing_dep, to_be_removed_dep}},\n        )\n\n        # Create formula with only one of the previous dependencies\n        pkg = homebrew_formula(\n            \"foo\",\n            dependencies=[\"bar\"],  # only keep this dependency\n        )\n\n        # Execute\n        new_deps, removed_deps = diff.diff_deps(pkg)\n\n        # Assert\n        assert len(new_deps) == 0, \"No new deps should be added\"\n        assert len(removed_deps) == 1, \"One dep should be removed\"\n\n        # Verify removed dep is a build dep on baz\n        removed_dep = removed_deps[0]\n        assert removed_dep.package_id == package_ids[\"foo\"]\n        assert removed_dep.dependency_id == package_ids[\"baz\"]\n        assert removed_dep.dependency_type_id == mock_config.dependency_types.build\n\n    def test_existing_package_changing_dependency_type(\n        self, packages, package_ids, diff_instance, homebrew_formula, mock_config\n    ):\n        \"\"\"\n        If the dependency types for a specific package to package relationship change,\n        then Diff sees two changes: one removal and one addition.\n        \"\"\"\n        # Create existing runtime dependency\n        existing_dep = LegacyDependency(\n            id=uuid4(),\n            package_id=package_ids[\"foo\"],\n            dependency_id=package_ids[\"bar\"],\n            dependency_type_id=mock_config.dependency_types.runtime,\n            created_at=datetime.now(),\n            updated_at=datetime.now(),\n        )\n\n        # Create diff with existing dependency\n        diff = diff_instance(\n            package_map={\"foo\": packages[\"foo\"], \"bar\": packages[\"bar\"]},\n            dependencies={package_ids[\"foo\"]: {existing_dep}},\n        )\n\n        # Create formula with same dependency but changed type\n        pkg = homebrew_formula(\n            \"foo\",\n            build_dependencies=[\"bar\"],  # Changed from runtime to build\n        )\n\n        # Execute\n        new_deps, removed_deps = diff.diff_deps(pkg)\n\n        # Assert\n        assert len(new_deps) == 1, \"One new dep should be added (new type)\"\n        assert len(removed_deps) == 1, \"One dep should be removed (old type)\"\n\n        # Verify removed dep is runtime\n        removed_dep = removed_deps[0]\n        assert removed_dep.package_id == package_ids[\"foo\"]\n        assert removed_dep.dependency_id == package_ids[\"bar\"]\n        assert removed_dep.dependency_type_id == mock_config.dependency_types.runtime\n\n        # Verify new dep is build\n        new_dep = new_deps[0]\n        assert new_dep.package_id == package_ids[\"foo\"]\n        assert new_dep.dependency_id == package_ids[\"bar\"]\n        assert new_dep.dependency_type_id == mock_config.dependency_types.build\n\n    def test_existing_package_no_dependency_changes(\n        self, packages, package_ids, diff_instance, homebrew_formula, mock_config\n    ):\n        \"\"\"\n        Test a case where there's no changes to be made, because the database and\n        Homebrew's JSON response indicate the same data.\n        \"\"\"\n        # Create existing dependency\n        existing_dep = LegacyDependency(\n            id=uuid4(),\n            package_id=package_ids[\"foo\"],\n            dependency_id=package_ids[\"bar\"],\n            dependency_type_id=mock_config.dependency_types.runtime,\n            created_at=datetime.now(),\n            updated_at=datetime.now(),\n        )\n\n        # Create diff with existing dependency\n        diff = diff_instance(\n            package_map={\"foo\": packages[\"foo\"], \"bar\": packages[\"bar\"]},\n            dependencies={package_ids[\"foo\"]: {existing_dep}},\n        )\n\n        # Create formula with same dependency and type\n        pkg = homebrew_formula(\n            \"foo\",\n            dependencies=[\"bar\"],  # same dependency with same type\n        )\n\n        # Execute\n        new_deps, removed_deps = diff.diff_deps(pkg)\n\n        # Assert\n        assert len(new_deps) == 0, \"No new deps should be added\"\n        assert len(removed_deps) == 0, \"No deps should be removed\"\n\n    def test_existing_package_same_dependency_multiple_times_no_changes(\n        self, packages, package_ids, diff_instance, homebrew_formula, mock_config\n    ):\n        \"\"\"\n        The case here is that the formula specifies a runtime and build dependency,\n        and the db already captured the runtime dependency. Since the Diff class has\n        a hierarchy of which dependency to choose, and runtime is on top, we should\n        see no changes.\n        \"\"\"\n        # Create existing runtime dependency\n        existing_dep = LegacyDependency(\n            id=uuid4(),\n            package_id=package_ids[\"foo\"],\n            dependency_id=package_ids[\"bar\"],\n            dependency_type_id=mock_config.dependency_types.runtime,\n            created_at=datetime.now(),\n            updated_at=datetime.now(),\n        )\n\n        # Create diff with existing dependency\n        diff = diff_instance(\n            package_map={\"foo\": packages[\"foo\"], \"bar\": packages[\"bar\"]},\n            dependencies={package_ids[\"foo\"]: {existing_dep}},\n        )\n\n        # Create formula with same dependency multiple times\n        pkg = homebrew_formula(\"foo\", dependencies=[\"bar\"], build_dependencies=[\"bar\"])\n\n        # Execute\n        new_deps, removed_deps = diff.diff_deps(pkg)\n\n        # Assert\n        # Since runtime is encountered first and that's in the DB/cache,\n        # we should see no new dependencies\n        assert len(new_deps) == 0, \"No new deps should be added\"\n        assert len(removed_deps) == 0, \"No deps should be removed\"\n\n    def test_existing_package_same_dependency_multiple_times_yes_changes(\n        self, packages, package_ids, diff_instance, homebrew_formula, mock_config\n    ):\n        \"\"\"\n        In this case, suppose the DB maintained a build relationship between foo and bar\n        and actually there is a runtime and build dependency according to Homebrew. Here\n        CHAI updates this record to a runtime dependency.\n        \"\"\"\n        # Create existing build dependency\n        existing_dep = LegacyDependency(\n            id=uuid4(),\n            package_id=package_ids[\"foo\"],\n            dependency_id=package_ids[\"bar\"],\n            dependency_type_id=mock_config.dependency_types.build,\n            created_at=datetime.now(),\n            updated_at=datetime.now(),\n        )\n\n        # Create diff with existing dependency\n        diff = diff_instance(\n            package_map={\"foo\": packages[\"foo\"], \"bar\": packages[\"bar\"]},\n            dependencies={package_ids[\"foo\"]: {existing_dep}},\n        )\n\n        # Create formula with same dependency multiple times\n        pkg = homebrew_formula(\n            \"foo\",\n            dependencies=[\"bar\"],  # runtime has higher priority\n            build_dependencies=[\"bar\"],\n        )\n\n        # Execute\n        new_deps, removed_deps = diff.diff_deps(pkg)\n\n        # Assert\n        assert len(new_deps) == 1, \"One new dependency should be added\"\n        assert new_deps[0].dependency_type_id == mock_config.dependency_types.runtime, (\n            \"The new dependency should be runtime\"\n        )\n\n        assert len(removed_deps) == 1, \"The build dependency should be removed\"\n        assert (\n            removed_deps[0].dependency_type_id == mock_config.dependency_types.build\n        ), \"The removed dependency should be build\"\n"
  },
  {
    "path": "tests/package_managers/pkgx/test_pkgx_diff.py",
    "content": "#!/usr/bin/env pkgx uv run\n\nfrom unittest.mock import patch\nfrom uuid import uuid4\n\nfrom core.models import URL, LegacyDependency, Package, PackageURL\nfrom core.structs import Cache, URLKey\nfrom package_managers.pkgx.diff import PkgxDiff\nfrom package_managers.pkgx.parser import (\n    Dependency,\n    DependencyBlock,\n    Distributable,\n    PkgxPackage,\n    Version,\n)\n\n\ndef create_pkgx_package(\n    distributables: list[str] | None = None,\n    dependencies: list[str] | None = None,\n    build_deps: list[str] | None = None,\n    test_deps: list[str] | None = None,\n) -> PkgxPackage:\n    \"\"\"Helper to create PkgxPackage instances for testing\"\"\"\n\n    # Create distributable blocks\n    distributable_blocks = []\n    if distributables:\n        for url in distributables:\n            distributable_blocks.append(Distributable(url=url))\n\n    # Create dependency objects\n    dep_objects = [\n        DependencyBlock(\n            platform=\"all\",\n            dependencies=[\n                Dependency(name=dep, semver=\"*\") for dep in (dependencies or [])\n            ],\n        )\n    ]\n    build_dep_objects = [\n        DependencyBlock(\n            platform=\"all\",\n            dependencies=[\n                Dependency(name=dep, semver=\"*\") for dep in (build_deps or [])\n            ],\n        )\n    ]\n    test_dep_objects = [\n        DependencyBlock(\n            platform=\"all\",\n            dependencies=[\n                Dependency(name=dep, semver=\"*\") for dep in (test_deps or [])\n            ],\n        )\n    ]\n\n    # Create version object\n    version = Version()\n\n    return PkgxPackage(\n        distributable=distributable_blocks,\n        versions=version,\n        dependencies=dep_objects,\n        build=DependencyBlock(platform=\"linux\", dependencies=build_dep_objects),\n        test=DependencyBlock(platform=\"linux\", dependencies=test_dep_objects),\n    )\n\n\nclass TestPkgxDifferentialLoading:\n    \"\"\"Test cases for pkgx differential loading scenarios\"\"\"\n\n    def test_package_exists_url_update(self, mock_config, mock_logger, mock_db):\n        \"\"\"Test scenario 2: Package existed in database and needed a URL update\"\"\"\n\n        # Setup existing package and URL\n        existing_pkg_id = uuid4()\n        existing_url_id = uuid4()\n        existing_package_url_id = uuid4()\n\n        existing_package = Package(\n            id=existing_pkg_id,\n            derived_id=\"pkgx/url-pkg\",\n            name=\"url-pkg\",\n            package_manager_id=mock_config.pm_config.pm_id,\n            import_id=\"url-pkg\",\n            readme=\"Test package\",\n        )\n\n        existing_url = URL(\n            id=existing_url_id,\n            url=\"https://old-source.com/file.tar.gz\",\n            url_type_id=mock_config.url_types.source,\n        )\n\n        existing_package_url = PackageURL(\n            id=existing_package_url_id,\n            package_id=existing_pkg_id,\n            url_id=existing_url_id,\n        )\n\n        # Create cache\n        cache = Cache(\n            package_map={\"url-pkg\": existing_package},\n            url_map={\n                URLKey(\n                    \"https://old-source.com/file.tar.gz\", mock_config.url_types.source\n                ): existing_url\n            },\n            package_urls={existing_pkg_id: {existing_package_url}},\n            dependencies={},\n        )\n\n        # Create package data with new URL\n        new_pkg_data = create_pkgx_package(\n            distributables=[\"https://new-source.com/file.tar.gz\"],\n        )\n        new_generated_urls = [\n            URLKey(\"https://new-source.com/file.tar.gz\", mock_config.url_types.source)\n        ]\n        new_urls = {}  # this tracks all the new URLs we've created so far -\n        # let it be empty for this test\n\n        # Test the diff\n        diff = PkgxDiff(mock_config, cache, mock_db, mock_logger)\n\n        # Mock the URL retrieval step\n        with (\n            patch(\n                \"package_managers.pkgx.diff.generate_chai_urls\",\n                return_value=new_generated_urls,\n            ),\n        ):\n            resolved_urls = diff.diff_url(\"url-pkg\", new_pkg_data, new_urls)\n            new_links, _ = diff.diff_pkg_url(existing_pkg_id, resolved_urls)\n\n        # Assertions\n        assert len(new_links) == 1  # New URL should be created\n        new_link = new_links[0]\n        assert new_link.package_id == existing_pkg_id\n\n        # The URL should be created in new_urls dict and the link should reference it\n        assert len(new_urls) == 1  # One new URL should be created\n        new_url_key = next(iter(new_urls.keys()))\n        new_url = new_urls[new_url_key]\n        assert new_link.url_id == new_url.id  # Link should reference the new URL\n        assert new_url_key.url == \"https://new-source.com/file.tar.gz\"\n        assert new_url_key.url_type_id == mock_config.url_types.source\n\n    def test_package_exists_dependency_change(self, mock_config, mock_logger, mock_db):\n        \"\"\"Test scenario 3: Package existed in database and changed its dependencies\"\"\"\n\n        # Setup existing package and dependencies\n        existing_pkg_id = uuid4()\n        dep1_id = uuid4()\n        dep2_id = uuid4()\n        dep3_id = uuid4()\n\n        existing_package = Package(\n            id=existing_pkg_id,\n            derived_id=\"pkgx/dep-pkg\",\n            name=\"dep-pkg\",\n            package_manager_id=mock_config.pm_config.pm_id,\n            import_id=\"dep-pkg\",\n            readme=\"\",\n        )\n\n        # Create dependency packages\n        dep1_pkg = Package(\n            id=dep1_id, derived_id=\"pkgx/dep1\", name=\"dep1\", import_id=\"dep1\"\n        )\n        dep2_pkg = Package(\n            id=dep2_id, derived_id=\"pkgx/dep2\", name=\"dep2\", import_id=\"dep2\"\n        )\n        dep3_pkg = Package(\n            id=dep3_id, derived_id=\"pkgx/dep3\", name=\"dep3\", import_id=\"dep3\"\n        )\n\n        # Create existing dependencies (dep1 as runtime, dep2 as build)\n        existing_dep1 = LegacyDependency(\n            package_id=existing_pkg_id,\n            dependency_id=dep1_id,\n            dependency_type_id=mock_config.dependency_types.runtime,\n        )\n        existing_dep2 = LegacyDependency(\n            package_id=existing_pkg_id,\n            dependency_id=dep2_id,\n            dependency_type_id=mock_config.dependency_types.build,\n        )\n\n        # Create cache\n        cache = Cache(\n            package_map={\n                \"dep-pkg\": existing_package,\n                \"dep1\": dep1_pkg,\n                \"dep2\": dep2_pkg,\n                \"dep3\": dep3_pkg,\n            },\n            url_map={},\n            package_urls={},\n            dependencies={existing_pkg_id: {existing_dep1, existing_dep2}},\n        )\n\n        # Create new package data with changed dependencies\n        # Remove dep2, keep dep1, add dep3 as runtime\n        new_pkg_data = create_pkgx_package(\n            dependencies=[\"dep1\", \"dep3\"],  # runtime deps\n            build_deps=[],  # no build deps (removes dep2)\n        )\n\n        # Test the diff\n        diff = PkgxDiff(mock_config, cache, mock_db, mock_logger)\n        new_deps, removed_deps = diff.diff_deps(\"dep-pkg\", new_pkg_data)\n\n        # Assertions\n        assert len(new_deps) == 1  # dep3 should be added\n        assert new_deps[0].dependency_id == dep3_id\n        assert new_deps[0].dependency_type_id == mock_config.dependency_types.runtime\n\n        assert len(removed_deps) == 1  # dep2 should be removed\n        assert removed_deps[0].dependency_id == dep2_id\n        assert removed_deps[0].dependency_type_id == mock_config.dependency_types.build\n\n    def test_completely_new_package(self, mock_config, mock_logger, mock_db):\n        \"\"\"Test scenario 4: Package was completely new to the database\"\"\"\n\n        # Create empty cache (no existing packages)\n        cache = Cache(package_map={}, url_map={}, package_urls={}, dependencies={})\n\n        # Create new package data\n        new_pkg_data = create_pkgx_package(\n            distributables=[\"https://github.com/example/new-pkg/archive/v1.0.tar.gz\"],\n            dependencies=[\"some-dep\"],\n            build_deps=[\"build-tool\"],\n        )\n\n        # Test the diff\n        diff = PkgxDiff(mock_config, cache, mock_db, mock_logger)\n        pkg_id, pkg_obj, update_payload = diff.diff_pkg(\"new-pkg\", new_pkg_data)\n\n        # Assertions\n        assert pkg_obj is not None  # New package should be created\n        assert pkg_obj.derived_id == \"pkgx/new-pkg\"\n        assert pkg_obj.name == \"new-pkg\"\n        assert pkg_obj.import_id == \"new-pkg\"\n        assert pkg_obj.package_manager_id == mock_config.pm_config.pm_id\n        assert update_payload == {}  # No updates for new package\n\n        # Test URL creation\n        new_urls = {}\n        # Mock generate_chai_urls to return predictable URLs\n        mock_urls = [\n            URLKey(\n                \"https://github.com/example/new-pkg\", mock_config.url_types.homepage\n            ),\n            URLKey(\n                \"https://github.com/example/new-pkg/archive/v1.0.tar.gz\",\n                mock_config.url_types.source,\n            ),\n        ]\n        with patch(\n            \"package_managers.pkgx.diff.generate_chai_urls\", return_value=mock_urls\n        ):\n            resolved_urls = diff.diff_url(\"new-pkg\", new_pkg_data, new_urls)\n            new_links, updated_links = diff.diff_pkg_url(pkg_id, resolved_urls)\n\n        # Should create URLs for homepage, source, and repository (GitHub)\n        assert len(new_urls) >= 2  # At least source and homepage\n        assert len(new_links) >= 2  # At least source and homepage links\n        assert len(updated_links) == 0  # No existing links to update\n\n    def test_no_changes_scenario(self, mock_config, mock_logger, mock_db):\n        \"\"\"Test scenario where package exists but has no changes\"\"\"\n\n        # Setup existing package\n        existing_pkg_id = uuid4()\n        existing_package = Package(\n            id=existing_pkg_id,\n            derived_id=\"pkgx/unchanged-pkg\",\n            name=\"unchanged-pkg\",\n            package_manager_id=mock_config.pm_config.pm_id,\n            import_id=\"unchanged-pkg\",\n            readme=\"Unchanged description\",\n        )\n\n        cache = Cache(\n            package_map={\"unchanged-pkg\": existing_package},\n            url_map={},\n            package_urls={},\n            dependencies={},\n        )\n\n        # Create package data with same description\n        pkg_data = create_pkgx_package()\n\n        # Test the diff\n        diff = PkgxDiff(mock_config, cache, mock_db, mock_logger)\n        pkg_id, pkg_obj, update_payload = diff.diff_pkg(\"unchanged-pkg\", pkg_data)\n\n        # Assertions\n        assert pkg_id == existing_pkg_id\n        assert pkg_obj is None  # No new package\n        assert update_payload is None  # No changes\n\n    def test_missing_dependency_handling(self, mock_config, mock_logger, mock_db):\n        \"\"\"Test how missing dependencies are handled\"\"\"\n\n        existing_pkg_id = uuid4()\n        existing_package = Package(\n            id=existing_pkg_id,\n            derived_id=\"pkgx/missing-dep-pkg\",\n            name=\"missing-dep-pkg\",\n            import_id=\"missing-dep-pkg\",\n        )\n\n        cache = Cache(\n            package_map={\"missing-dep-pkg\": existing_package},\n            url_map={},\n            package_urls={},\n            dependencies={},\n        )\n\n        # Create package with dependency that doesn't exist in cache\n        pkg_data = create_pkgx_package(dependencies=[\"non-existent-dep\"])\n\n        diff = PkgxDiff(mock_config, cache, mock_db, mock_logger)\n        new_deps, removed_deps = diff.diff_deps(\"missing-dep-pkg\", pkg_data)\n\n        # Should handle gracefully - no deps added for missing packages\n        assert len(new_deps) == 0\n        assert len(removed_deps) == 0\n\n    def test_dependency_type_priority_no_change(\n        self, mock_config, mock_logger, mock_db\n    ):\n        \"\"\"Test case 1: p1 has runtime dependency to p2 in cache,\n        p1 depends on p2 as both runtime and build in parsed data.\n        Expect no change (runtime has priority).\"\"\"\n\n        # Setup existing package and dependencies\n        p1_id = uuid4()\n        p2_id = uuid4()\n\n        p1_pkg = Package(id=p1_id, derived_id=\"pkgx/p1\", name=\"p1\", import_id=\"p1\")\n        p2_pkg = Package(id=p2_id, derived_id=\"pkgx/p2\", name=\"p2\", import_id=\"p2\")\n\n        # Existing runtime dependency in cache\n        existing_runtime_dep = LegacyDependency(\n            package_id=p1_id,\n            dependency_id=p2_id,\n            dependency_type_id=mock_config.dependency_types.runtime,\n        )\n\n        cache = Cache(\n            package_map={\"p1\": p1_pkg, \"p2\": p2_pkg},\n            url_map={},\n            package_urls={},\n            dependencies={p1_id: {existing_runtime_dep}},\n        )\n\n        # Parsed data has p2 as both runtime and build dependency\n        new_pkg_data = create_pkgx_package(\n            dependencies=[\"p2\"],  # runtime\n            build_deps=[\"p2\"],  # build\n        )\n\n        diff = PkgxDiff(mock_config, cache, mock_db, mock_logger)\n        new_deps, removed_deps = diff.diff_deps(\"p1\", new_pkg_data)\n\n        # Should have no changes - runtime priority means no change needed\n        assert len(new_deps) == 0\n        assert len(removed_deps) == 0\n\n    def test_dependency_type_change_runtime_to_build(\n        self, mock_config, mock_logger, mock_db\n    ):\n        \"\"\"Test case 2: p1 has runtime dependency to p2 in cache,\n        p1 has build dependency to p2 in parsed data.\n        Expect removed runtime dependency and new build dependency.\"\"\"\n\n        p1_id = uuid4()\n        p2_id = uuid4()\n\n        p1_pkg = Package(id=p1_id, derived_id=\"pkgx/p1\", name=\"p1\", import_id=\"p1\")\n        p2_pkg = Package(id=p2_id, derived_id=\"pkgx/p2\", name=\"p2\", import_id=\"p2\")\n\n        # Existing runtime dependency\n        existing_runtime_dep = LegacyDependency(\n            package_id=p1_id,\n            dependency_id=p2_id,\n            dependency_type_id=mock_config.dependency_types.runtime,\n        )\n\n        cache = Cache(\n            package_map={\"p1\": p1_pkg, \"p2\": p2_pkg},\n            url_map={},\n            package_urls={},\n            dependencies={p1_id: {existing_runtime_dep}},\n        )\n\n        # Parsed data only has build dependency\n        new_pkg_data = create_pkgx_package(\n            dependencies=[],  # no runtime deps\n            build_deps=[\"p2\"],  # only build\n        )\n\n        diff = PkgxDiff(mock_config, cache, mock_db, mock_logger)\n        new_deps, removed_deps = diff.diff_deps(\"p1\", new_pkg_data)\n\n        # Should remove runtime and add build\n        assert len(removed_deps) == 1\n        assert removed_deps[0].dependency_id == p2_id\n        assert (\n            removed_deps[0].dependency_type_id == mock_config.dependency_types.runtime\n        )\n\n        assert len(new_deps) == 1\n        assert new_deps[0].dependency_id == p2_id\n        assert new_deps[0].dependency_type_id == mock_config.dependency_types.build\n\n    def test_dependency_type_change_build_to_runtime(\n        self, mock_config, mock_logger, mock_db\n    ):\n        \"\"\"Test case 3: p1 has build dependency to p2 in cache,\n        p1 has runtime dependency to p2 in parsed data.\n        Expect removed build dependency and new runtime dependency.\"\"\"\n\n        p1_id = uuid4()\n        p2_id = uuid4()\n\n        p1_pkg = Package(id=p1_id, derived_id=\"pkgx/p1\", name=\"p1\", import_id=\"p1\")\n        p2_pkg = Package(id=p2_id, derived_id=\"pkgx/p2\", name=\"p2\", import_id=\"p2\")\n\n        # Existing build dependency\n        existing_build_dep = LegacyDependency(\n            package_id=p1_id,\n            dependency_id=p2_id,\n            dependency_type_id=mock_config.dependency_types.build,\n        )\n\n        cache = Cache(\n            package_map={\"p1\": p1_pkg, \"p2\": p2_pkg},\n            url_map={},\n            package_urls={},\n            dependencies={p1_id: {existing_build_dep}},\n        )\n\n        # Parsed data only has runtime dependency\n        new_pkg_data = create_pkgx_package(\n            dependencies=[\"p2\"],  # runtime\n            build_deps=[],  # no build deps\n        )\n\n        diff = PkgxDiff(mock_config, cache, mock_db, mock_logger)\n        new_deps, removed_deps = diff.diff_deps(\"p1\", new_pkg_data)\n\n        # Should remove build and add runtime\n        assert len(removed_deps) == 1\n        assert removed_deps[0].dependency_id == p2_id\n        assert removed_deps[0].dependency_type_id == mock_config.dependency_types.build\n\n        assert len(new_deps) == 1\n        assert new_deps[0].dependency_id == p2_id\n        assert new_deps[0].dependency_type_id == mock_config.dependency_types.runtime\n\n    def test_dependency_type_priority_new_package(\n        self, mock_config, mock_logger, mock_db\n    ):\n        \"\"\"Test case 4: p1 has no dependencies to p2 in cache,\n        p1 has both runtime and build dependencies to p2 in parsed data.\n        Expect one new runtime dependency (priority over build).\"\"\"\n\n        p1_id = uuid4()\n        p2_id = uuid4()\n\n        p1_pkg = Package(id=p1_id, derived_id=\"pkgx/p1\", name=\"p1\", import_id=\"p1\")\n        p2_pkg = Package(id=p2_id, derived_id=\"pkgx/p2\", name=\"p2\", import_id=\"p2\")\n\n        cache = Cache(\n            package_map={\"p1\": p1_pkg, \"p2\": p2_pkg},\n            url_map={},\n            package_urls={},\n            dependencies={},  # No existing dependencies\n        )\n\n        # Parsed data has both runtime and build dependencies to p2\n        new_pkg_data = create_pkgx_package(\n            dependencies=[\"p2\"],  # runtime\n            build_deps=[\"p2\"],  # build\n        )\n\n        diff = PkgxDiff(mock_config, cache, mock_db, mock_logger)\n        new_deps, removed_deps = diff.diff_deps(\"p1\", new_pkg_data)\n\n        # Should only create one new dependency - runtime (higher priority)\n        assert len(removed_deps) == 0\n        assert len(new_deps) == 1\n        assert new_deps[0].dependency_id == p2_id\n        assert new_deps[0].dependency_type_id == mock_config.dependency_types.runtime\n\n    def test_dependency_type_priority_with_test(\n        self, mock_config, mock_logger, mock_db\n    ):\n        \"\"\"Test priority handling with test dependencies: Runtime > Build > Test\"\"\"\n\n        p1_id = uuid4()\n        p2_id = uuid4()\n        p3_id = uuid4()\n        p4_id = uuid4()\n\n        p1_pkg = Package(id=p1_id, derived_id=\"pkgx/p1\", name=\"p1\", import_id=\"p1\")\n        p2_pkg = Package(id=p2_id, derived_id=\"pkgx/p2\", name=\"p2\", import_id=\"p2\")\n        p3_pkg = Package(id=p3_id, derived_id=\"pkgx/p3\", name=\"p3\", import_id=\"p3\")\n        p4_pkg = Package(id=p4_id, derived_id=\"pkgx/p4\", name=\"p4\", import_id=\"p4\")\n\n        cache = Cache(\n            package_map={\"p1\": p1_pkg, \"p2\": p2_pkg, \"p3\": p3_pkg, \"p4\": p4_pkg},\n            url_map={},\n            package_urls={},\n            dependencies={},\n        )\n\n        # Parsed data with overlapping dependencies across different types\n        new_pkg_data = create_pkgx_package(\n            dependencies=[\"p2\", \"p3\"],  # runtime: p2, p3\n            build_deps=[\"p2\", \"p4\"],  # build: p2, p4\n            test_deps=[\"p2\", \"p3\", \"p4\"],  # test: p2, p3, p4\n        )\n\n        diff = PkgxDiff(mock_config, cache, mock_db, mock_logger)\n        new_deps, removed_deps = diff.diff_deps(\"p1\", new_pkg_data)\n\n        # Should create dependencies based on priority:\n        # p2: runtime (highest priority among runtime/build/test)\n        # p3: runtime (highest priority among runtime/test)\n        # p4: build (highest priority among build/test)\n        assert len(removed_deps) == 0\n        assert len(new_deps) == 3\n\n        # Sort by dependency_id for consistent testing\n        new_deps_sorted = sorted(new_deps, key=lambda d: str(d.dependency_id))\n\n        # p2 should be runtime (highest priority)\n        p2_dep = next(d for d in new_deps_sorted if d.dependency_id == p2_id)\n        assert p2_dep.dependency_type_id == mock_config.dependency_types.runtime\n\n        # p3 should be runtime (highest priority)\n        p3_dep = next(d for d in new_deps_sorted if d.dependency_id == p3_id)\n        assert p3_dep.dependency_type_id == mock_config.dependency_types.runtime\n\n        # p4 should be build (highest available priority)\n        p4_dep = next(d for d in new_deps_sorted if d.dependency_id == p4_id)\n        assert p4_dep.dependency_type_id == mock_config.dependency_types.build\n"
  },
  {
    "path": "tests/package_managers/pkgx/test_special_case.py",
    "content": "\"\"\"\nTest special case URL handling in PkgxTransformer.\n\nThis module tests the special_case method which handles URL transformations\nfor specific package sources like crates.io, x.org, and pkgx.sh.\n\"\"\"\n\nimport pytest\n\nfrom package_managers.pkgx.url import special_case\n\n\n@pytest.mark.transformer\nclass TestSpecialCase:\n    \"\"\"Test special case URL transformations.\"\"\"\n\n    def test_special_case_crates_io(self, mock_logger):\n        \"\"\"Test that crates.io URLs are properly transformed.\"\"\"\n        assert (\n            special_case(\"crates.io/pkgx\", mock_logger)\n            == \"https://crates.io/crates/pkgx\"\n        )\n\n    def test_special_case_x_org(self, mock_logger):\n        \"\"\"Test that x.org URLs are normalized.\"\"\"\n        assert special_case(\"x.org/ice\", mock_logger) == \"https://x.org\"\n        assert special_case(\"x.org/xxf86vm\", mock_logger) == \"https://x.org\"\n\n    def test_special_case_pkgx_sh(self, mock_logger):\n        \"\"\"Test that pkgx.sh URLs are redirected to GitHub.\"\"\"\n        assert (\n            special_case(\"pkgx.sh/pkgx\", mock_logger)\n            == \"https://github.com/pkgxdev/pkgx\"\n        )\n\n    def test_special_case_no_slashes(self, mock_logger):\n        \"\"\"Test that URLs without slashes are returned as-is.\"\"\"\n        assert special_case(\"abseil.io\", mock_logger) == \"abseil.io\"\n\n    def test_special_case_double_slashes(self, mock_logger):\n        \"\"\"Test that URLs with double slashes are returned as-is.\"\"\"\n        assert (\n            special_case(\"github.com/awslabs/llrt\", mock_logger)\n            == \"github.com/awslabs/llrt\"\n        )\n"
  },
  {
    "path": "tests/ranker/test_compute_canon_name.py",
    "content": "#!/usr/bin/env uv run --with pytest\nimport pytest\n\nfrom ranker.naming import (\n    check_if_better,\n    compute_canon_name,\n    extract_repo_name_from_url,\n    score_name,\n)\n\n\n@pytest.mark.parametrize(\n    \"url, best_guess\",\n    [\n        (\"github.com/user/repo\", \"repo\"),\n        (\"gitlab.com/user/repo\", \"repo\"),\n        (\"bitbucket.org/user/repo\", \"repo\"),\n        (\"not-a-valid-url\", \"not-a-valid-url\"),\n        (\"\", \"\"),\n    ],\n)\ndef test_extract_repo_name_from_url(url, best_guess):\n    assert extract_repo_name_from_url(url) == best_guess\n\n\n@pytest.mark.parametrize(\n    \"name, best_guess, expected_score\",\n    [\n        (\"@user/repo\", \"repo\", 3),\n        (\"test3js\", \"web3.js\", 8),\n        (\"web3\", \"web3.js\", 11),\n        (\"@platonenterprise/web3\", \"web3.js\", -3),\n        (\"eleventy-plugin-embed-everything\", \"embed-everything\", 1),\n        (\"eleventy-plugin-embed-ted\", \"embed-everything\", 0),\n    ],\n)\ndef test_score_name(name, best_guess, expected_score):\n    assert score_name(name, best_guess) == expected_score\n\n\n@pytest.mark.parametrize(\n    \"name, best_guess, package_name, expected\",\n    [\n        (\n            \"web3.js\",\n            \"test3js\",\n            \"https://github.com/ethereum/web3.js#readmeweb3.js\",\n            \"test3js\",\n        ),\n        (\"web3.js\", \"web3\", \"test3js\", \"web3\"),\n        (\"web3.js\", \"@platonenterprise/web3\", \"web3\", \"web3\"),\n    ],\n)\ndef test_check_if_better(name, best_guess, package_name, expected):\n    assert check_if_better(name, best_guess, package_name) == expected\n\n\n@pytest.mark.parametrize(\n    \"url, package_name, existing_name, expected\",\n    [\n        # new canon, we should always have the package_name\n        (\"github.com/user/repo\", \"repo\", \"\", \"repo\"),\n        (\n            \"github.com/user/repo\",\n            \"@scoped/random-name-123\",\n            \"@scoped/random-name-123\",\n            \"@scoped/random-name-123\",\n        ),\n        (\n            \"gfscott.com/embed-everything\",\n            \"eleventy-plugin-embed-everything\",\n            \"gfscott.com/embed-everything\",\n            \"eleventy-plugin-embed-everything\",\n        ),\n        (\n            \"gfscott.com/embed-everything\",\n            \"eleventy-plugin-embed-ted\",\n            \"eleventy-plugin-embed-everything\",\n            \"eleventy-plugin-embed-everything\",\n        ),\n    ],\n)\ndef test_compute_canon_name(url, package_name, existing_name, expected):\n    assert compute_canon_name(url, package_name, existing_name) == expected\n"
  },
  {
    "path": "tests/ranker/test_dedupe.py",
    "content": "\"\"\"\nTest the package deduplication functionality in the ranker.\n\nThis module tests the dedupe.main function which handles deduplication of packages\nbased on their homepage URLs, creating and managing canonical package representations.\n\"\"\"\n\nfrom datetime import datetime\nfrom unittest.mock import MagicMock, patch\nfrom uuid import uuid4\n\nimport pytest\n\nfrom core.models import URL, Canon, Package\nfrom ranker.config import DedupeConfig\nfrom ranker.dedupe import DedupeDB, main\n\n\n@pytest.fixture\ndef ids():\n    \"\"\"Fixture providing consistent IDs for testing.\"\"\"\n    return {\n        \"homepage_url_type\": uuid4(),\n        \"package_manager\": uuid4(),\n        \"pkg1\": uuid4(),\n        \"pkg2\": uuid4(),\n        \"pkg3\": uuid4(),\n        \"canon1\": uuid4(),\n        \"canon2\": uuid4(),\n        \"canon3\": uuid4(),\n        \"url1\": uuid4(),\n        \"url2\": uuid4(),\n        \"url3\": uuid4(),\n    }\n\n\n@pytest.fixture\ndef test_packages(ids):\n    \"\"\"Fixture providing test package objects.\"\"\"\n    return {\n        \"package1\": Package(\n            id=ids[\"pkg1\"],\n            name=\"package1\",\n            package_manager_id=ids[\"package_manager\"],\n            import_id=\"pkg1\",\n            derived_id=\"npm/package1\",\n            created_at=datetime.now(),\n            updated_at=datetime.now(),\n        ),\n        \"package2\": Package(\n            id=ids[\"pkg2\"],\n            name=\"package2\",\n            package_manager_id=ids[\"package_manager\"],\n            import_id=\"pkg2\",\n            derived_id=\"npm/package2\",\n            created_at=datetime.now(),\n            updated_at=datetime.now(),\n        ),\n        \"package3\": Package(\n            id=ids[\"pkg3\"],\n            name=\"package3\",\n            package_manager_id=ids[\"package_manager\"],\n            import_id=\"pkg3\",\n            derived_id=\"npm/package3\",\n            created_at=datetime.now(),\n            updated_at=datetime.now(),\n        ),\n    }\n\n\n@pytest.fixture\ndef test_urls(ids):\n    \"\"\"Fixture providing test URL objects.\"\"\"\n    canonical_url = \"github.com/example/repo\"\n    non_canonical_url = \"https://github.com/example/repo\"\n    different_url = \"https://gitlab.com/example/repo\"\n\n    return {\n        \"canonical\": URL(\n            id=ids[\"url1\"],\n            url=canonical_url,\n            url_type_id=ids[\"homepage_url_type\"],\n            created_at=datetime.now(),\n            updated_at=datetime.now(),\n        ),\n        \"non_canonical\": URL(\n            id=ids[\"url2\"],\n            url=non_canonical_url,\n            url_type_id=ids[\"homepage_url_type\"],\n            created_at=datetime.now(),\n            updated_at=datetime.now(),\n        ),\n        \"different\": URL(\n            id=ids[\"url3\"],\n            url=different_url,\n            url_type_id=ids[\"homepage_url_type\"],\n            created_at=datetime.now(),\n            updated_at=datetime.now(),\n        ),\n    }\n\n\n@pytest.fixture\ndef mock_dedupe_config(ids):\n    \"\"\"Fixture providing mock DedupeConfig.\"\"\"\n    config = MagicMock(spec=DedupeConfig)\n    config.load = True\n    config.homepage_url_type_id = ids[\"homepage_url_type\"]\n    return config\n\n\n@pytest.fixture\ndef mock_db():\n    \"\"\"Fixture providing mock DedupeDB.\"\"\"\n    return MagicMock(spec=DedupeDB)\n\n\ndef capture_ingest_calls(mock_db):\n    \"\"\"Helper function to capture arguments passed to db.ingest.\"\"\"\n    ingest_calls = []\n\n    def capture_ingest(\n        new_canons, updated_canons, new_canon_packages, updated_canon_packages\n    ):\n        ingest_calls.append(\n            (new_canons, updated_canons, new_canon_packages, updated_canon_packages)\n        )\n\n    mock_db.ingest.side_effect = capture_ingest\n    return ingest_calls\n\n\n@pytest.mark.ranker\nclass TestDedupe:\n    \"\"\"Test the deduplication of packages - focused on different cases.\"\"\"\n\n    def test_new_canon_new_mapping(\n        self, ids, test_packages, test_urls, mock_dedupe_config, mock_db\n    ):\n        \"\"\"\n        Test: URL has no canon AND package has no existing mapping\n\n        Expected: Create new canon + create new mapping\n        \"\"\"\n        # Arrange\n        package = test_packages[\"package1\"]\n        homepage_url = test_urls[\"canonical\"]\n\n        # Current state: no canons exist for this URL, no package mapping exists\n        mock_db.get_current_canons.return_value = {}  # URL has no canon\n        mock_db.get_current_canon_packages.return_value = {}  # Package has no mapping\n        mock_db.get_packages_with_homepages.return_value = [(package, homepage_url)]\n        mock_db.get_all_package_names.return_value = {ids[\"pkg1\"]: \"package1\"}\n\n        ingest_calls = capture_ingest_calls(mock_db)\n\n        # Act\n        with patch.dict(\"os.environ\", {\"LOAD\": \"true\", \"TEST\": \"false\"}):\n            main(mock_dedupe_config, mock_db)\n\n        # Assert\n        assert len(ingest_calls) == 1, \"Should call ingest exactly once\"\n\n        new_canons, updated_canons, new_canon_packages, updated_canon_packages = (\n            ingest_calls[0]\n        )\n\n        # Verify canon creation\n        assert len(new_canons) == 1, \"Should create exactly one new canon\"\n        assert len(updated_canons) == 0, \"Should not update any canons\"\n        assert len(new_canon_packages) == 1, \"Should create exactly one new mapping\"\n        assert len(updated_canon_packages) == 0, \"Should not update any mappings\"\n\n        created_canon = new_canons[0]\n        assert (\n            created_canon.url_id == ids[\"url1\"]\n        ), \"Canon should reference correct URL ID\"\n        assert created_canon.name == \"package1\", \"Canon name should be the package name\"\n\n        # Verify mapping creation\n        created_mapping = new_canon_packages[0]\n        assert created_mapping.package_id == ids[\"pkg1\"], \"Should map correct package\"\n        assert created_mapping.canon_id == created_canon.id, \"Should map to new canon\"\n\n    def test_new_canon_update_mapping(\n        self, ids, test_packages, test_urls, mock_dedupe_config, mock_db\n    ):\n        \"\"\"\n        Test: URL has no canon AND package has existing mapping to different canon\n\n        Expected: Create new canon + update existing mapping\n        \"\"\"\n        # Arrange\n        package = test_packages[\"package1\"]\n        homepage_url = test_urls[\"canonical\"]\n\n        # Create existing canon for different URL\n        existing_canon = Canon(\n            id=ids[\"canon2\"],\n            url_id=ids[\"url2\"],  # Different URL\n            name=\"old-canon\",\n            created_at=datetime.now(),\n            updated_at=datetime.now(),\n        )\n\n        # Current state: no canon for this URL, but package is mapped to different canon\n        mock_db.get_current_canons.return_value = {ids[\"url2\"]: existing_canon}\n        mock_db.get_current_canon_packages.return_value = {\n            ids[\"pkg1\"]: {\"id\": uuid4(), \"canon_id\": existing_canon.id}\n        }\n        mock_db.get_packages_with_homepages.return_value = [(package, homepage_url)]\n        mock_db.get_all_package_names.return_value = {ids[\"pkg1\"]: \"package1\"}\n\n        ingest_calls = capture_ingest_calls(mock_db)\n\n        # Act\n        with patch.dict(\"os.environ\", {\"LOAD\": \"true\", \"TEST\": \"false\"}):\n            main(mock_dedupe_config, mock_db)\n\n        # Assert\n        assert len(ingest_calls) == 1, \"Should call ingest exactly once\"\n\n        new_canons, updated_canons, new_canon_packages, updated_canon_packages = (\n            ingest_calls[0]\n        )\n\n        # Verify canon creation\n        assert len(new_canons) == 1, \"Should create exactly one new canon\"\n        assert len(updated_canons) == 0, \"Should not update any canons\"\n        assert len(new_canon_packages) == 0, \"Should not create new mappings\"\n        assert len(updated_canon_packages) == 1, \"Should update exactly one mapping\"\n\n        created_canon = new_canons[0]\n        assert (\n            created_canon.url_id == ids[\"url1\"]\n        ), \"Canon should reference correct URL ID\"\n        assert created_canon.name == \"package1\", \"Canon name should be the package name\"\n\n        # Verify mapping update (should point to NEW canon, not old one)\n        updated_mapping = updated_canon_packages[0]\n        assert \"id\" in updated_mapping, \"Update should include canon package ID\"\n        assert (\n            updated_mapping[\"canon_id\"] == created_canon.id\n        ), \"Should update to NEW canon\"\n        assert (\n            updated_mapping[\"canon_id\"] != ids[\"canon2\"]\n        ), \"Should NOT point to old canon\"\n        assert \"updated_at\" in updated_mapping, \"Update should include timestamp\"\n\n    def test_no_changes_needed(\n        self, ids, test_packages, test_urls, mock_dedupe_config, mock_db\n    ):\n        \"\"\"\n        Test: URL has canon AND package already linked to that canon\n\n        Expected: Do nothing (no changes)\n        \"\"\"\n        # Arrange\n        package = test_packages[\"package1\"]\n        homepage_url = test_urls[\"canonical\"]\n\n        existing_canon = Canon(\n            id=ids[\"canon1\"],\n            url_id=ids[\"url1\"],\n            name=\"existing-canon\",\n            created_at=datetime.now(),\n            updated_at=datetime.now(),\n        )\n\n        # Current state: URL has canon, package linked to that same canon\n        mock_db.get_current_canons.return_value = {ids[\"url1\"]: existing_canon}\n        mock_db.get_current_canon_packages.return_value = {\n            ids[\"pkg1\"]: {\"id\": uuid4(), \"canon_id\": ids[\"canon1\"]}\n        }\n        mock_db.get_packages_with_homepages.return_value = [(package, homepage_url)]\n        mock_db.get_all_package_names.return_value = {ids[\"pkg1\"]: \"existing-canon\"}\n\n        ingest_calls = capture_ingest_calls(mock_db)\n\n        # Act\n        with patch.dict(\"os.environ\", {\"LOAD\": \"true\", \"TEST\": \"false\"}):\n            main(mock_dedupe_config, mock_db)\n\n        # Assert - should call ingest with empty lists (no changes)\n        assert len(ingest_calls) == 1, \"Should call ingest exactly once\"\n\n        new_canons, updated_canons, new_canon_packages, updated_canon_packages = (\n            ingest_calls[0]\n        )\n\n        assert len(new_canons) == 0, \"Should not create any canons\"\n        assert len(updated_canons) == 0, \"Should not update any canons\"\n        assert len(new_canon_packages) == 0, \"Should not create any mappings\"\n        assert len(updated_canon_packages) == 0, \"Should not update any mappings\"\n\n    def test_update_existing_mapping(\n        self, ids, test_packages, test_urls, mock_dedupe_config, mock_db\n    ):\n        \"\"\"\n        Test: URL has canon AND package linked to different canon\n\n        Expected: Update mapping to correct canon\n        \"\"\"\n        # Arrange\n        package = test_packages[\"package1\"]\n        homepage_url = test_urls[\"canonical\"]\n\n        correct_canon = Canon(\n            id=ids[\"canon1\"],\n            url_id=ids[\"url1\"],  # This URL's canon\n            name=\"correct-name\",\n            created_at=datetime.now(),\n            updated_at=datetime.now(),\n        )\n\n        wrong_canon = Canon(\n            id=ids[\"canon2\"],\n            url_id=ids[\"url2\"],  # Different URL's canon\n            name=\"correct-name\",\n            created_at=datetime.now(),\n            updated_at=datetime.now(),\n        )\n\n        # Current state: URL has canon, but package linked to wrong canon\n        mock_db.get_current_canons.return_value = {\n            ids[\"url1\"]: correct_canon,\n            ids[\"url2\"]: wrong_canon,\n        }\n        mock_db.get_current_canon_packages.return_value = {\n            ids[\"pkg1\"]: {\n                \"id\": uuid4(),\n                \"canon_id\": ids[\"canon2\"],\n            }  # Linked to wrong canon\n        }\n        mock_db.get_packages_with_homepages.return_value = [(package, homepage_url)]\n        mock_db.get_all_package_names.return_value = {ids[\"pkg1\"]: \"correct-name\"}\n\n        ingest_calls = capture_ingest_calls(mock_db)\n\n        # Act\n        with patch.dict(\"os.environ\", {\"LOAD\": \"true\", \"TEST\": \"false\"}):\n            main(mock_dedupe_config, mock_db)\n\n        # Assert\n        assert len(ingest_calls) == 1, \"Should call ingest exactly once\"\n\n        new_canons, updated_canons, new_canon_packages, updated_canon_packages = (\n            ingest_calls[0]\n        )\n\n        # Should only update mapping, no new creations\n        assert len(new_canons) == 0, \"Should not create any canons\"\n        assert len(updated_canons) == 0, \"Should not update any canons\"\n        assert len(new_canon_packages) == 0, \"Should not create any new mappings\"\n        assert len(updated_canon_packages) == 1, \"Should update exactly one mapping\"\n\n        # Verify mapping update points to correct canon\n        updated_mapping = updated_canon_packages[0]\n        assert \"id\" in updated_mapping, \"Update should include canon package ID\"\n        assert (\n            updated_mapping[\"canon_id\"] == ids[\"canon1\"]\n        ), \"Should update to correct canon\"\n        assert (\n            updated_mapping[\"canon_id\"] != ids[\"canon2\"]\n        ), \"Should NOT point to wrong canon\"\n        assert \"updated_at\" in updated_mapping, \"Update should include timestamp\"\n\n    def test_create_new_mapping(\n        self, ids, test_packages, test_urls, mock_dedupe_config, mock_db\n    ):\n        \"\"\"\n        Test: URL has canon AND package has no mapping\n\n        Expected: Create new mapping to existing canon\n        \"\"\"\n        # Arrange\n        package = test_packages[\"package1\"]\n        homepage_url = test_urls[\"canonical\"]\n\n        existing_canon = Canon(\n            id=ids[\"canon1\"],\n            url_id=ids[\"url1\"],\n            name=\"existing-canon\",\n            created_at=datetime.now(),\n            updated_at=datetime.now(),\n        )\n\n        # Current state: URL has canon, but package has no mapping\n        mock_db.get_current_canons.return_value = {ids[\"url1\"]: existing_canon}\n        mock_db.get_current_canon_packages.return_value = {}  # Package not linked\n        mock_db.get_packages_with_homepages.return_value = [(package, homepage_url)]\n        mock_db.get_all_package_names.return_value = {ids[\"pkg1\"]: \"existing-canon\"}\n\n        ingest_calls = capture_ingest_calls(mock_db)\n\n        # Act\n        with patch.dict(\"os.environ\", {\"LOAD\": \"true\", \"TEST\": \"false\"}):\n            main(mock_dedupe_config, mock_db)\n\n        # Assert\n        assert len(ingest_calls) == 1, \"Should call ingest exactly once\"\n\n        new_canons, updated_canons, new_canon_packages, updated_canon_packages = (\n            ingest_calls[0]\n        )\n\n        # Should only create new mapping, no updates or new canons\n        assert len(new_canons) == 0, \"Should not create any canons\"\n        assert len(updated_canons) == 0, \"Should not update any canons\"\n        assert len(new_canon_packages) == 1, \"Should create exactly one new mapping\"\n        assert len(updated_canon_packages) == 0, \"Should not update any mappings\"\n\n        # Verify mapping creation points to existing canon\n        created_mapping = new_canon_packages[0]\n        assert created_mapping.package_id == ids[\"pkg1\"], \"Should map correct package\"\n        assert created_mapping.canon_id == ids[\"canon1\"], \"Should map to existing canon\"\n\n    def test_multiple_packages_same_homepage_creates_single_canon(\n        self, ids, test_packages, test_urls, mock_dedupe_config, mock_db\n    ):\n        \"\"\"\n        Test deduplication: Multiple packages with same homepage URL should create only\n        one canon\n\n        This tests the core deduplication logic where:\n        - Package 1 points to URL X (no existing canon)\n        - Package 2 also points to URL X\n        - Should create only ONE canon for URL X\n        - Both packages should be linked to the same canon\n        \"\"\"\n        # Arrange\n        package1 = test_packages[\"package1\"]\n        package2 = test_packages[\"package2\"]\n        shared_homepage_url = test_urls[\"canonical\"]\n\n        # Current state: no canons exist for this URL, no package mappings exist\n        mock_db.get_current_canons.return_value = {}  # URL has no canon\n        mock_db.get_current_canon_packages.return_value = {}  # No mappings\n        mock_db.get_packages_with_homepages.return_value = [\n            (package1, shared_homepage_url),  # Both packages point to same URL\n            (package2, shared_homepage_url),\n        ]\n        mock_db.get_all_package_names.return_value = {\n            ids[\"pkg1\"]: \"package1\",\n            ids[\"pkg2\"]: \"package2\",\n        }\n\n        ingest_calls = capture_ingest_calls(mock_db)\n\n        # Act\n        with patch.dict(\"os.environ\", {\"LOAD\": \"true\", \"TEST\": \"false\"}):\n            main(mock_dedupe_config, mock_db)\n\n        # Assert\n        assert len(ingest_calls) == 1, \"Should call ingest exactly once\"\n\n        new_canons, updated_canons, new_canon_packages, updated_canon_packages = (\n            ingest_calls[0]\n        )\n\n        # Should create only ONE canon for the shared URL\n        assert len(new_canons) == 1, \"Should create exactly one canon for shared URL\"\n        assert len(updated_canons) == 0, \"Should not update any canons\"\n        assert len(new_canon_packages) == 2, \"Should create mappings for both packages\"\n        assert len(updated_canon_packages) == 0, \"Should not update any mappings\"\n\n        # Verify single canon creation\n        created_canon = new_canons[0]\n        assert created_canon.url_id == ids[\"url1\"], \"Canon should reference shared URL\"\n\n        # Verify both packages map to the same canon\n        canon_ids = {mapping.canon_id for mapping in new_canon_packages}\n        assert len(canon_ids) == 1, \"Both packages should map to same canon\"\n        assert (\n            canon_ids.pop() == created_canon.id\n        ), \"Both should map to the created canon\"\n\n        # Verify package IDs\n        package_ids = {mapping.package_id for mapping in new_canon_packages}\n        assert package_ids == {ids[\"pkg1\"], ids[\"pkg2\"]}, \"Should map both packages\"\n\n        # Verify the name. Heuristics wouldn't rank \"package2\" higher than \"package1\"\n        assert created_canon.name == \"package1\", \"Canon name should be the package name\"\n\n    def test_empty_urls_no_deduplication(\n        self, ids, test_packages, test_urls, mock_dedupe_config, mock_db\n    ):\n        \"\"\"\n        Test that packages with empty URLs are not deduplicated with each other\n\n        A lot of packages in CHAI have no URLs, and we should not deduplicate them\n        with each other. This test case ensures that if two packages have no URLs,\n        they do not get deduplicated.\n        \"\"\"\n        # Arrange\n        package1 = test_packages[\"package1\"]\n        package2 = test_packages[\"package2\"]\n\n        empty_url1 = URL(\n            id=ids[\"url1\"],\n            url=\"\",\n            url_type_id=ids[\"homepage_url_type\"],\n            created_at=datetime.now(),\n            updated_at=datetime.now(),\n        )\n\n        empty_url2 = URL(\n            id=ids[\"url2\"],\n            url=\"\",\n            url_type_id=ids[\"homepage_url_type\"],\n            created_at=datetime.now(),\n            updated_at=datetime.now(),\n        )\n\n        # Current state: no canons exist for these URLs, no package mappings exist\n        mock_db.get_current_canons.return_value = {}  # No canons\n        mock_db.get_current_canon_packages.return_value = {}  # No mappings\n        mock_db.get_packages_with_homepages.return_value = [\n            (package1, empty_url1),  # Both packages have empty URLs\n            (package2, empty_url2),\n        ]\n\n        ingest_calls = capture_ingest_calls(mock_db)\n\n        # Act\n        with patch.dict(\"os.environ\", {\"LOAD\": \"true\", \"TEST\": \"false\"}):\n            main(mock_dedupe_config, mock_db)\n\n        # Assert\n        assert len(ingest_calls) == 1, \"Should call ingest exactly once\"\n\n        new_canons, updated_canons, new_canon_packages, updated_canon_packages = (\n            ingest_calls[0]\n        )\n\n        # Should not create any canons or mappings for empty URLs\n        assert len(new_canons) == 0, \"Should not create any canons for empty URLs\"\n        assert len(updated_canons) == 0, \"Should not update any canons\"\n        assert (\n            len(new_canon_packages) == 0\n        ), \"Should not create any mappings for empty URLs\"\n        assert len(updated_canon_packages) == 0, \"Should not update any mappings\"\n\n    def test_canon_name_update_when_url_changes(\n        self, ids, test_packages, test_urls, mock_dedupe_config, mock_db\n    ):\n        \"\"\"\n        Test that canon name gets updated when the canonical URL changes\n\n        This tests canon update functionality when:\n        - A canon exists with name \"old-url\"\n        - The canonical URL for that canon changes to \"new-url\"\n        - The canon name should be updated to match the new URL\n        \"\"\"\n        # Arrange\n        package = test_packages[\"package1\"]\n\n        # URL with updated canonical form\n        updated_url = URL(\n            id=ids[\"url1\"],\n            url=\"github.com/example/new-repo\",  # Changed URL\n            url_type_id=ids[\"homepage_url_type\"],\n            created_at=datetime.now(),\n            updated_at=datetime.now(),\n        )\n\n        # Existing canon with old name\n        existing_canon = Canon(\n            id=ids[\"canon1\"],\n            url_id=ids[\"url1\"],\n            name=\"github.com/example/old-repo\",  # Old name\n            created_at=datetime.now(),\n            updated_at=datetime.now(),\n        )\n\n        # Current state: canon exists but name doesn't match current URL\n        mock_db.get_current_canons.return_value = {ids[\"url1\"]: existing_canon}\n        mock_db.get_current_canon_packages.return_value = {\n            ids[\"pkg1\"]: {\"id\": uuid4(), \"canon_id\": ids[\"canon1\"]}\n        }\n        mock_db.get_packages_with_homepages.return_value = [(package, updated_url)]\n        mock_db.get_all_package_names.return_value = {\n            ids[\"pkg1\"]: \"github.com/example/new-repo\"\n        }\n\n        ingest_calls = capture_ingest_calls(mock_db)\n\n        # Act\n        with patch.dict(\"os.environ\", {\"LOAD\": \"true\", \"TEST\": \"false\"}):\n            main(mock_dedupe_config, mock_db)\n\n        # Assert\n        assert len(ingest_calls) == 1, \"Should call ingest exactly once\"\n\n        new_canons, updated_canons, new_canon_packages, updated_canon_packages = (\n            ingest_calls[0]\n        )\n\n        # Should update the existing canon name\n        assert len(new_canons) == 0, \"Should not create any new canons\"\n        assert len(updated_canons) == 1, \"Should update exactly one canon\"\n        assert len(new_canon_packages) == 0, \"Should not create any new mappings\"\n        assert len(updated_canon_packages) == 0, \"Should not update any mappings\"\n\n        # Verify canon update\n        updated_canon = updated_canons[0]\n        assert \"id\" in updated_canon, \"Update should include canon ID\"\n        assert updated_canon[\"id\"] == ids[\"canon1\"], \"Should update correct canon\"\n        assert (\n            updated_canon[\"name\"] == \"github.com/example/new-repo\"\n        ), \"Should update name to new URL\"\n        assert \"updated_at\" in updated_canon, \"Update should include timestamp\"\n\n    def test_canon_update_with_multiple_packages(\n        self, ids, test_packages, test_urls, mock_dedupe_config, mock_db\n    ):\n        \"\"\"\n        Test canon update when multiple packages share the same canon\n\n        This ensures that when a canon's URL changes, the canon is updated\n        but all existing package mappings remain intact.\n        \"\"\"\n        # Arrange\n        package1 = test_packages[\"package1\"]\n        package2 = test_packages[\"package2\"]\n\n        # URL with updated canonical form\n        updated_url = URL(\n            id=ids[\"url1\"],\n            url=\"github.com/example/updated-repo\",\n            url_type_id=ids[\"homepage_url_type\"],\n            created_at=datetime.now(),\n            updated_at=datetime.now(),\n        )\n\n        # Existing canon with old name\n        existing_canon = Canon(\n            id=ids[\"canon1\"],\n            url_id=ids[\"url1\"],\n            name=\"github.com/example/old-repo\",  # Different from current URL\n            created_at=datetime.now(),\n            updated_at=datetime.now(),\n        )\n\n        # Current state: both packages mapped to same canon\n        mock_db.get_current_canons.return_value = {ids[\"url1\"]: existing_canon}\n        mock_db.get_current_canon_packages.return_value = {\n            ids[\"pkg1\"]: {\"id\": uuid4(), \"canon_id\": ids[\"canon1\"]},\n            ids[\"pkg2\"]: {\"id\": uuid4(), \"canon_id\": ids[\"canon1\"]},\n        }\n        mock_db.get_packages_with_homepages.return_value = [\n            (package1, updated_url),\n            (package2, updated_url),\n        ]\n        mock_db.get_all_package_names.return_value = {\n            ids[\"pkg1\"]: \"github.com/example/updated-repo\",\n            ids[\"pkg2\"]: \"github.com/example/updated-repo\",\n        }\n\n        ingest_calls = capture_ingest_calls(mock_db)\n\n        # Act\n        with patch.dict(\"os.environ\", {\"LOAD\": \"true\", \"TEST\": \"false\"}):\n            main(mock_dedupe_config, mock_db)\n\n        # Assert\n        assert len(ingest_calls) == 1, \"Should call ingest exactly once\"\n\n        new_canons, updated_canons, new_canon_packages, updated_canon_packages = (\n            ingest_calls[0]\n        )\n\n        # Should only update the canon, no new mappings or updates to mappings\n        assert len(new_canons) == 0, \"Should not create any new canons\"\n        assert len(updated_canons) == 1, \"Should update exactly one canon\"\n        assert len(new_canon_packages) == 0, \"Should not create any new mappings\"\n        assert len(updated_canon_packages) == 0, \"Should not update any mappings\"\n\n        # Verify canon update\n        updated_canon = updated_canons[0]\n        assert updated_canon[\"id\"] == ids[\"canon1\"], \"Should update correct canon\"\n        assert (\n            updated_canon[\"name\"] == \"github.com/example/updated-repo\"\n        ), \"Should update name to new URL\"\n\n    def test_skip_when_load_disabled(self, mock_dedupe_config, mock_db):\n        \"\"\"\n        Test that no processing occurs when load is disabled\n\n        Expected: db.ingest should not be called\n        \"\"\"\n        # Arrange\n        mock_dedupe_config.load = False\n\n        # Act\n        with patch.dict(\"os.environ\", {\"LOAD\": \"false\", \"TEST\": \"false\"}):\n            main(mock_dedupe_config, mock_db)\n\n        # Assert\n        mock_db.ingest.assert_not_called()\n"
  },
  {
    "path": "tests/ranker/test_rx_graph.py",
    "content": "\"\"\"\nTest the CHAI graph ranking algorithm.\n\nThis module tests the rx_graph module which implements a custom graph-based\nranking algorithm for packages. The tests focus on verifying that the\ndistribute function conserves weight appropriately.\n\"\"\"\n\nimport random\nimport uuid\nfrom decimal import Decimal\n\nimport pytest\n\nfrom ranker.rx_graph import CHAI, PackageNode\n\n# Constants for the test\nNUM_NODES = 100000\nEDGE_PROBABILITY = 0.001\nSPLIT_RATIO = Decimal(\"0.85\")\nTOLERANCE = Decimal(\"1e-6\")\nMAX_ITER = 10000000\n\n\n@pytest.fixture\ndef large_chai_graph() -> tuple[CHAI, dict[uuid.UUID, Decimal]]:\n    \"\"\"Creates a large CHAI graph with random edges and personalization.\"\"\"\n    G = CHAI()\n    nodes = []\n    initial_personalization_raw = {}\n\n    # Create nodes\n    for _i in range(NUM_NODES):\n        canon_id = uuid.uuid4()\n        node = PackageNode(canon_id=canon_id)\n        node.index = G.add_node(node)\n        nodes.append(node)\n        # Assign random initial weight for personalization\n        initial_personalization_raw[canon_id] = Decimal(random.random())\n\n    # Normalize personalization to sum to 1\n    total_weight = sum(initial_personalization_raw.values())\n    personalization = {\n        uid: weight / total_weight\n        for uid, weight in initial_personalization_raw.items()\n    }\n    assert (\n        abs(sum(personalization.values()) - Decimal(\"1.0\")) <= TOLERANCE\n    ), f\"Initial personalization should sum to 1 within tolerance: {sum(personalization.values())}\"\n\n    # Add random edges (potential cycles)\n    node_indices = list(G.node_indices())\n    for u_idx in node_indices:\n        for v_idx in node_indices:\n            if u_idx != v_idx and random.random() < EDGE_PROBABILITY:\n                G.add_edge(u_idx, v_idx, None)  # Edge data is not used in distribute\n\n    return G, personalization\n"
  },
  {
    "path": "tests/scripts/upgrade_canons/test_analyze_packages_needing_canonicalization.py",
    "content": "#!/usr/bin/env pkgx uv run\n\nfrom unittest.mock import call, patch\nfrom uuid import UUID\n\nimport pytest\n\nfrom scripts.upgrade_canons.main import analyze_packages_needing_canonicalization\n\n\nclass TestAnalyzePackagesNeedingCanonicalization:\n    \"\"\"Test the analyze_packages_needing_canonicalization function\"\"\"\n\n    def setup_method(self):\n        \"\"\"Setup test fixtures\"\"\"\n        self.package_id_1 = UUID(\"11111111-1111-1111-1111-111111111111\")\n        self.package_id_2 = UUID(\"22222222-2222-2222-2222-222222222222\")\n        self.package_id_3 = UUID(\"33333333-3333-3333-3333-333333333333\")\n        self.package_id_4 = UUID(\"44444444-4444-4444-4444-444444444444\")\n\n    @patch(\"scripts.upgrade_canons.main.is_canonical_url\")\n    @patch(\"scripts.upgrade_canons.main.normalize_url\")\n    def test_case_1_should_create_canonical_url(\n        self, mock_normalize, mock_is_canonical\n    ):\n        \"\"\"\n        Test Case 1: Package has non-canonical URLs, canonical doesn't exist\n        Expected: Should return this package in the result\n        \"\"\"\n        # Setup mocks\n        mock_is_canonical.return_value = False\n        mock_normalize.return_value = \"github.com/org/repo\"\n\n        # Test data\n        package_url_map = {\n            self.package_id_1: [\n                \"https://github.com/org/repo\",\n                \"https://github.com/org/repo/tree/main\",\n                \"https://github.com/org/repo/blob/main/README.md\",\n            ]\n        }\n        existing_homepages = {\n            \"https://github.com/org/repo\",\n            \"https://github.com/org/repo/tree/main\",\n            \"https://github.com/org/repo/blob/main/README.md\",\n        }  # no canon\n\n        # Execute\n        result = analyze_packages_needing_canonicalization(\n            package_url_map, existing_homepages\n        )\n\n        # Verify\n        assert len(result) == 1\n        assert self.package_id_1 in result\n        assert result[self.package_id_1] == \"github.com/org/repo\"\n\n        # Verify mocks were called correctly\n        # is_canonical should be called once for each URL until it finds a canonical one (or all if none are canonical)\n        expected_calls = [\n            call(\"https://github.com/org/repo\"),\n            call(\"https://github.com/org/repo/tree/main\"),\n            call(\"https://github.com/org/repo/blob/main/README.md\"),\n        ]\n        mock_is_canonical.assert_has_calls(expected_calls)\n        assert mock_is_canonical.call_count == 3\n\n        # normalize should only be called once with the first URL\n        mock_normalize.assert_called_once_with(\"https://github.com/org/repo\")\n\n    @patch(\"scripts.upgrade_canons.main.is_canonical_url\")\n    @patch(\"scripts.upgrade_canons.main.normalize_url\")\n    def test_case_2_canonical_exists_in_database(\n        self, mock_normalize, mock_is_canonical\n    ):\n        \"\"\"\n        Test Case 2: Package has non-canonical URLs, but canonical already exists in DB\n        Expected: Should not return this package (skip it)\n        \"\"\"\n        # Setup mocks\n        mock_is_canonical.return_value = False\n        mock_normalize.return_value = \"https://example.com\"\n\n        # Test data\n        package_url_map = {\n            self.package_id_1: [\"http://example.com\", \"https://www.example.com\"]\n        }\n        existing_homepages = {\"https://example.com\"}  # Canonical already exists\n\n        # Execute\n        result = analyze_packages_needing_canonicalization(\n            package_url_map, existing_homepages\n        )\n\n        # Verify\n        assert len(result) == 0\n        assert self.package_id_1 not in result\n\n    @patch(\"scripts.upgrade_canons.main.is_canonical_url\")\n    @patch(\"scripts.upgrade_canons.main.normalize_url\")\n    def test_case_3_canonical_already_planned(self, mock_normalize, mock_is_canonical):\n        \"\"\"\n        Test Case 3: Two packages would create the same canonical URL\n        Expected: Only the first package should be included, second should be skipped\n        \"\"\"\n        # Setup mocks\n        mock_is_canonical.return_value = False\n        mock_normalize.return_value = (\n            \"https://example.com\"  # Both packages normalize to same URL\n        )\n\n        # Test data - both packages would create the same canonical URL\n        package_url_map = {\n            self.package_id_1: [\"http://example.com\"],\n            self.package_id_2: [\n                \"https://www.example.com\"\n            ],  # Different input, same canonical\n        }\n        existing_homepages = set()  # Empty - canonical doesn't exist\n\n        # Execute\n        result = analyze_packages_needing_canonicalization(\n            package_url_map, existing_homepages\n        )\n\n        # Verify - only one package should be included (whichever was processed first)\n        assert len(result) == 1\n        assert \"https://example.com\" in result.values()\n\n        # Verify that exactly one of the packages was included\n        included_packages = list(result.keys())\n        assert len(included_packages) == 1\n        assert included_packages[0] in [self.package_id_1, self.package_id_2]\n\n    @patch(\"scripts.upgrade_canons.main.is_canonical_url\")\n    def test_case_4_package_already_has_canonical(self, mock_is_canonical):\n        \"\"\"\n        Test Case 4: Package already has at least one canonical URL\n        Expected: Should not return this package (skip it)\n        \"\"\"\n        # Setup mocks - return True for canonical check\n        mock_is_canonical.return_value = True\n\n        # Test data\n        package_url_map = {\n            self.package_id_1: [\n                \"https://example.com\",\n                \"http://example.com\",\n            ]  # First URL is canonical\n        }\n        existing_homepages = set()\n\n        # Execute\n        result = analyze_packages_needing_canonicalization(\n            package_url_map, existing_homepages\n        )\n\n        # Verify\n        assert len(result) == 0\n        assert self.package_id_1 not in result\n\n        # Verify that we never tried to normalize (because we skipped early)\n        mock_is_canonical.assert_called_once_with(\"https://example.com\")\n\n    @patch(\"scripts.upgrade_canons.main.is_canonical_url\")\n    @patch(\"scripts.upgrade_canons.main.normalize_url\")\n    def test_mixed_scenarios(self, mock_normalize, mock_is_canonical):\n        \"\"\"\n        Test with multiple packages covering different scenarios\n        \"\"\"\n\n        # Setup mocks with side effects for different URLs\n        def mock_is_canonical_side_effect(url):\n            return url == \"https://canonical.com\"  # Only this URL is canonical\n\n        def mock_normalize_side_effect(url):\n            if \"example\" in url:\n                return \"https://example.com\"\n            elif \"test\" in url:\n                return \"https://test.com\"\n            else:\n                return f\"https://{url.split('://')[1]}\"\n\n        mock_is_canonical.side_effect = mock_is_canonical_side_effect\n        mock_normalize.side_effect = mock_normalize_side_effect\n\n        # Test data\n        package_url_map = {\n            self.package_id_1: [\"http://example.com\"],  # Should create canonical\n            self.package_id_2: [\"https://canonical.com\"],  # Already canonical - skip\n            self.package_id_3: [\"http://test.com\"],  # Should create canonical\n            self.package_id_4: [\n                \"https://www.example.com\"\n            ],  # Same canonical as package_id_1 - skip\n        }\n        existing_homepages = set()\n\n        # Execute\n        result = analyze_packages_needing_canonicalization(\n            package_url_map, existing_homepages\n        )\n\n        # Verify\n        assert len(result) == 2\n\n        # Package 1 should be included (creates https://example.com)\n        assert self.package_id_1 in result\n        assert result[self.package_id_1] == \"https://example.com\"\n\n        # Package 2 should be skipped (already canonical)\n        assert self.package_id_2 not in result\n\n        # Package 3 should be included (creates https://test.com)\n        assert self.package_id_3 in result\n        assert result[self.package_id_3] == \"https://test.com\"\n\n        # Package 4 should be skipped (duplicate canonical URL)\n        assert self.package_id_4 not in result\n\n    def test_empty_inputs(self):\n        \"\"\"Test with empty inputs\"\"\"\n        result = analyze_packages_needing_canonicalization({}, set())\n        assert result == {}\n\n    @patch(\"scripts.upgrade_canons.main.is_canonical_url\")\n    @patch(\"scripts.upgrade_canons.main.normalize_url\")\n    def test_edge_case_empty_url_list(self, mock_normalize, mock_is_canonical):\n        \"\"\"Test with package that has empty URL list\"\"\"\n        # This shouldn't happen in practice, but let's handle it gracefully\n        package_url_map = {\n            self.package_id_1: []  # Empty URL list\n        }\n        existing_homepages = set()\n\n        # This will raise an IndexError when trying to access urls[0] in generate_canonical_url\n        # Let's verify this behavior is expected\n        with pytest.raises(IndexError):\n            analyze_packages_needing_canonicalization(\n                package_url_map, existing_homepages\n            )\n\n\nif __name__ == \"__main__\":\n    pytest.main([__file__])\n"
  }
]