[
  {
    "path": ".docker/minio/setup.sh",
    "content": "#!/bin/sh\n\n# Simple script to set up MinIO bucket and user\n# Based on example from MinIO issues\n\n# Format bucket name to ensure compatibility\nBUCKET_NAME=$(echo \"${S3_BUCKET_NAME}\" | tr '[:upper:]' '[:lower:]' | tr '_' '-')\n\n# Configure MinIO client\nmc alias set myminio http://minio:9000 ${MINIO_ROOT_USER} ${MINIO_ROOT_PASSWORD}\n\n# Remove bucket if it exists (for clean setup)\nmc rm -r --force myminio/${BUCKET_NAME} || true\n\n# Create bucket\nmc mb myminio/${BUCKET_NAME}\n\n# Set bucket policy to allow downloads\nmc anonymous set download myminio/${BUCKET_NAME}\n\n# Create user with access and secret keys\nmc admin user add myminio ${S3_ACCESS_KEY} ${S3_SECRET_KEY} || echo \"User already exists\"\n\n# Create policy for the bucket\necho '{\"Version\":\"2012-10-17\",\"Statement\":[{\"Effect\":\"Allow\",\"Action\":[\"s3:*\"],\"Resource\":[\"arn:aws:s3:::'${BUCKET_NAME}'/*\",\"arn:aws:s3:::'${BUCKET_NAME}'\"]}]}' > /tmp/policy.json\n\n# Apply policy\nmc admin policy create myminio gitingest-policy /tmp/policy.json || echo \"Policy already exists\"\nmc admin policy attach myminio gitingest-policy --user ${S3_ACCESS_KEY}\n\necho \"MinIO setup completed successfully\"\necho \"Bucket: ${BUCKET_NAME}\"\necho \"Access via console: http://localhost:9001\"\n"
  },
  {
    "path": ".dockerignore",
    "content": "# -------------------------------------------------\n# Base: reuse patterns from .gitignore\n# -------------------------------------------------\n\n# Operating-system\n.DS_Store\nThumbs.db\n\n# Editor / IDE settings\n.vscode/\n!.vscode/launch.json\n.idea/\n*.swp\n\n# Python virtual-envs & tooling\n.venv*/\n.python-version\n__pycache__/\n*.egg-info/\n*.egg\n.ruff_cache/\n\n# Test artifacts & coverage\n.pytest_cache/\n.coverage\ncoverage.xml\nhtmlcov/\n\n# Build, distribution & docs\nbuild/\ndist/\n*.wheel\n\n# Logs & runtime output\n*.log\nlogs/\n*.tmp\ntmp/\n\n# Project-specific files\nhistory.txt\ndigest.txt\n\n\n# -------------------------------------------------\n# Extra for Docker\n# -------------------------------------------------\n\n# Git history\n.git/\n.gitignore\n\n# Tests\ntests/\n\n# Docs\ndocs/\n*.md\nLICENSE\n\n# Local overrides & secrets\n.env\n\n# Docker files\n.dockerignore\nDockerfile*\n\n# -------------------------------------------------\n# Files required during build\n# -------------------------------------------------\n!pyproject.toml\n!src/\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/bug_report.yml",
    "content": "name: Bug report 🐞\ndescription: Report a bug or internal server error when using Gitingest\ntitle: \"(bug): \"\nlabels: [\"bug\"]\nbody:\n  - type: markdown\n    attributes:\n      value: |\n        Thanks for taking the time to report a bug! :lady_beetle:\n\n        Please fill out the following details to help us reproduce and fix the issue. :point_down:\n\n  - type: dropdown\n    id: interface\n    attributes:\n      label: Which interface did you use?\n      default: 0\n      options:\n        - \"Select one...\"\n        - Web UI\n        - CLI\n        - PyPI package\n    validations:\n      required: true\n\n  - type: input\n    id: repo_url\n    attributes:\n      label: Repository URL (if public)\n      placeholder: e.g., https://github.com/<username>/<repo>/commit_branch_or_tag/blob_or_tree/subdir\n\n  - type: dropdown\n    id: git_host\n    attributes:\n      label: Git host\n      description: The Git host of the repository.\n      default: 0\n      options:\n        - \"Select one...\"\n        - GitHub (github.com)\n        - GitLab (gitlab.com)\n        - Bitbucket (bitbucket.org)\n        - Gitea (gitea.com)\n        - Codeberg (codeberg.org)\n        - Gist (gist.github.com)\n        - Kaggle (kaggle.com)\n        - GitHub Enterprise (github.company.com)\n        - Other (specify below)\n    validations:\n      required: true\n\n  - type: input\n    id: git_host_other\n    attributes:\n      label: Other Git host\n      placeholder: If you selected \"Other\", please specify the Git host here.\n\n  - type: dropdown\n    id: repo_visibility\n    attributes:\n      label: Repository visibility\n      default: 0\n      options:\n        - \"Select one...\"\n        - public\n        - private\n    validations:\n      required: true\n\n  - type: dropdown\n    id: revision\n    attributes:\n      label: Commit, branch, or tag\n      default: 0\n      options:\n        - \"Select one...\"\n        - default branch\n        - commit\n        - branch\n        - tag\n    validations:\n      required: true\n\n  - type: dropdown\n    id: ingest_scope\n    attributes:\n      label: Did you ingest the full repository or a subdirectory?\n      default: 0\n      options:\n        - \"Select one...\"\n        - full repository\n        - subdirectory\n    validations:\n      required: true\n\n  - type: dropdown\n    id: os\n    attributes:\n      label: Operating system\n      default: 0\n      options:\n        - \"Select one...\"\n        - Not relevant (Web UI)\n        - macOS\n        - Windows\n        - Linux\n    validations:\n      required: true\n\n  - type: dropdown\n    id: browser\n    attributes:\n      label: Browser (Web UI only)\n      default: 0\n      options:\n        - \"Select one...\"\n        - Not relevant (CLI / PyPI)\n        - Chrome\n        - Firefox\n        - Safari\n        - Edge\n        - Other (specify below)\n    validations:\n      required: true\n\n  - type: input\n    id: browser_other\n    attributes:\n      label: Other browser\n      placeholder: If you selected \"Other\", please specify the browser here.\n\n  - type: input\n    id: gitingest_version\n    attributes:\n      label: Gitingest version\n      placeholder: e.g., v0.1.5\n      description: Not required if you used the Web UI.\n\n  - type: input\n    id: python_version\n    attributes:\n      label: Python version\n      placeholder: e.g., 3.11.5\n      description: Not required if you used the Web UI.\n\n  - type: textarea\n    id: bug_description\n    attributes:\n      label: Bug description\n      placeholder: Describe the bug here.\n      description: A detailed but concise description of the bug.\n    validations:\n      required: true\n\n\n  - type: textarea\n    id: steps_to_reproduce\n    attributes:\n      label: Steps to reproduce\n      placeholder: Include the exact commands or actions that led to the error.\n      description: Include the exact commands or actions that led to the error *(if relevant)*.\n      render: shell\n\n  - type: textarea\n    id: expected_behavior\n    attributes:\n      label: Expected behavior\n      placeholder: Describe what you expected to happen.\n      description: Describe what you expected to happen *(if relevant)*.\n\n  - type: textarea\n    id: actual_behavior\n    attributes:\n      label: Actual behavior\n      description: Paste the full error message or stack trace here.\n\n  - type: textarea\n    id: additional_context\n    attributes:\n      label: Additional context, logs, or screenshots\n      placeholder: Add any other context, links, or screenshots about the issue here.\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/feature_request.yml",
    "content": "name: Feature request 💡\ndescription: Suggest a new feature or improvement for Gitingest\ntitle: \"(feat): \"\nlabels: [\"enhancement\"]\nbody:\n  - type: markdown\n    attributes:\n      value: |\n        Thanks for taking the time to help us improve **Gitingest**! :sparkles:\n\n        Please fill in the sections below to describe your idea. The more detail you provide, the easier it is for us to evaluate and plan the work. :point_down:\n\n  - type: input\n    id: summary\n    attributes:\n      label: Feature summary\n      placeholder: One-sentence description of the feature.\n    validations:\n      required: true\n\n  - type: textarea\n    id: problem\n    attributes:\n      label: Problem / motivation\n      description: What problem does this feature solve? How does it affect your workflow?\n      placeholder: Why is this feature important? Describe the pain point or limitation you're facing.\n    validations:\n      required: true\n\n  - type: textarea\n    id: proposal\n    attributes:\n      label: Proposed solution\n      placeholder: Describe what you would like to see happen.\n      description: Outline the feature as you imagine it. *(optional)*\n\n\n  - type: textarea\n    id: alternatives\n    attributes:\n      label: Alternatives considered\n      placeholder: List other approaches you've considered or work-arounds you use today.\n      description: Feel free to mention why those alternatives don't fully solve the problem.\n\n  - type: dropdown\n    id: interface\n    attributes:\n      label: Which interface would this affect?\n      default: 0\n      options:\n        - \"Select one...\"\n        - Web UI\n        - CLI\n        - PyPI package\n        - CLI + PyPI package\n        - All\n    validations:\n      required: true\n\n  - type: dropdown\n    id: priority\n    attributes:\n      label: How important is this to you?\n      default: 0\n      options:\n        - \"Select one...\"\n        - Nice to have\n        - Important\n        - Critical\n    validations:\n      required: true\n\n  - type: dropdown\n    id: willingness\n    attributes:\n      label: Would you like to work on this feature yourself?\n      default: 0\n      options:\n        - \"Select one...\"\n        - Yes, I'd like to implement it\n        - Maybe, if I get some guidance\n        - No, just requesting (absolutely fine!)\n    validations:\n      required: true\n\n  - type: dropdown\n    id: support_needed\n    attributes:\n      label: Would you need support from the maintainers (if you're implementing it yourself)?\n      default: 0\n      options:\n        - \"Select one...\"\n        - No, I can handle it solo\n        - Yes, I'd need some guidance\n        - Not sure yet\n        - This is just a suggestion, I'm not planning to implement it myself (absolutely fine!)\n\n  - type: textarea\n    id: additional_context\n    attributes:\n      label: Additional context, screenshots, or examples\n      placeholder: Add links, sketches, or any other context that would help us understand and implement the feature.\n"
  },
  {
    "path": ".github/workflows/ci.yml",
    "content": "name: CI\n\non:\n  push:\n    branches: [main]\n  pull_request:\n    branches: [main]\n\nconcurrency:\n  group: ${{ github.workflow }}-${{ github.ref }}\n  cancel-in-progress: true\n\npermissions:\n  contents: read\n\njobs:\n  test:\n    runs-on: ${{ matrix.os }}\n    strategy:\n      fail-fast: false\n      matrix:\n        os: [ubuntu-latest, macos-latest, windows-latest]\n        python-version: [\"3.8\", \"3.13\"]\n        include:\n          - os: ubuntu-latest\n            python-version: \"3.13\"\n            coverage: true\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@ec9f2d5744a09debf3a187a3f4f675c53b671911 # v2.13.0\n        with:\n          egress-policy: audit\n\n      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0\n\n      - name: Set up Python\n        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0\n        with:\n          python-version: ${{ matrix.python-version }}\n          cache: 'pip'\n\n      - name: Install dependencies\n        run: |\n          python -m pip install --upgrade pip\n          python -m pip install \".[dev,server]\"\n\n      - name: Cache pytest results\n        uses: actions/cache@v4\n        with:\n          path: .pytest_cache\n          key: ${{ runner.os }}-pytest-${{ matrix.python-version }}-${{ hashFiles('**/pytest.ini') }}\n          restore-keys: |\n            ${{ runner.os }}-pytest-${{ matrix.python-version }}-\n\n      - name: Run tests\n        if: ${{ matrix.coverage != true }}\n        run: pytest\n\n      - name: Run tests\n        if: ${{ matrix.coverage == true }}\n        run: pytest\n\n\n\n      - name: Run pre-commit hooks\n        uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1\n        if: ${{ matrix.python-version == '3.13' && matrix.os == 'ubuntu-latest' }}\n"
  },
  {
    "path": ".github/workflows/codeql.yml",
    "content": "# For most projects, this workflow file will not need changing; you simply need\n# to commit it to your repository.\n#\n# You may wish to alter this file to override the set of languages analyzed,\n# or to provide custom queries or build logic.\n#\n# ******** NOTE ********\n# We have attempted to detect the languages in your repository. Please check\n# the `language` matrix defined below to confirm you have the correct set of\n# supported CodeQL languages.\n#\nname: \"CodeQL\"\n\non:\n  push:\n    branches: [\"main\"]\n  pull_request:\n    # The branches below must be a subset of the branches above\n    branches: [\"main\"]\n  schedule:\n    - cron: \"0 0 * * 1\"\n\npermissions:\n  contents: read\n\njobs:\n  analyze:\n    name: Analyze\n    runs-on: ubuntu-latest\n    permissions:\n      actions: read\n      contents: read\n      security-events: write\n\n    strategy:\n      fail-fast: false\n      matrix:\n        language: [\"javascript\", \"python\"]\n        # CodeQL supports [ $supported-codeql-languages ]\n        # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@ec9f2d5744a09debf3a187a3f4f675c53b671911 # v2.13.0\n        with:\n          egress-policy: audit\n\n      - name: Checkout repository\n        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0\n\n      # Initializes the CodeQL tools for scanning.\n      - name: Initialize CodeQL\n        uses: github/codeql-action/init@df559355d593797519d70b90fc8edd5db049e7a2 # v3.29.9\n        with:\n          languages: ${{ matrix.language }}\n          # If you wish to specify custom queries, you can do so here or in a config file.\n          # By default, queries listed here will override any specified in a config file.\n          # Prefix the list here with \"+\" to use these queries and those in the config file.\n\n      # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).\n      # If this step fails, then you should remove it and run the build manually (see below)\n      - name: Autobuild\n        uses: github/codeql-action/autobuild@df559355d593797519d70b90fc8edd5db049e7a2 # v3.29.9\n\n      # ℹ️ Command-line programs to run using the OS shell.\n      # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun\n\n      #   If the Autobuild fails above, remove it and uncomment the following three lines.\n      #   modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.\n\n      # - run: |\n      #   echo \"Run, Build Application using script\"\n      #   ./location_of_script_within_repo/buildscript.sh\n\n      - name: Perform CodeQL Analysis\n        uses: github/codeql-action/analyze@df559355d593797519d70b90fc8edd5db049e7a2 # v3.29.9\n        with:\n          category: \"/language:${{matrix.language}}\"\n"
  },
  {
    "path": ".github/workflows/dependency-review.yml",
    "content": "# Dependency Review Action\n#\n# This Action will scan dependency manifest files that change as part of a Pull Request,\n# surfacing known-vulnerable versions of the packages declared or updated in the PR.\n# Once installed, if the workflow run is marked as required,\n# PRs introducing known-vulnerable packages will be blocked from merging.\n#\n# Source repository: https://github.com/actions/dependency-review-action\nname: 'Dependency Review'\non: [pull_request]\n\npermissions:\n  contents: read\n\njobs:\n  dependency-review:\n    runs-on: ubuntu-latest\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@ec9f2d5744a09debf3a187a3f4f675c53b671911 # v2.13.0\n        with:\n          egress-policy: audit\n\n      - name: 'Checkout Repository'\n        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0\n      - name: 'Dependency Review'\n        uses: actions/dependency-review-action@da24556b548a50705dd671f47852072ea4c105d9 # v4.7.1\n"
  },
  {
    "path": ".github/workflows/deploy-pr.yml",
    "content": "name: Manage PR Temp Envs\n'on':\n  pull_request:\n    types:\n      - labeled\n      - unlabeled\n      - closed\n\npermissions:\n  contents: read\n  pull-requests: write\n\nenv:\n  APP_NAME: gitingest\n  FLUX_OWNER: '${{ github.repository_owner }}'\n  FLUX_REPO: '${{ secrets.CR_FLUX_REPO }}'\n\njobs:\n  deploy-pr-env:\n    if: >-\n      ${{ github.event.action == 'labeled' && github.event.label.name ==\n      'deploy-pr-temp-env' }}\n    runs-on: ubuntu-latest\n    steps:\n      - name: Create GitHub App token\n        uses: actions/create-github-app-token@v2\n        id: app-token\n        with:\n          app-id: '${{ secrets.CR_APP_CI_APP_ID }}'\n          private-key: '${{ secrets.CR_APP_CI_PRIVATE_KEY }}'\n          owner: '${{ env.FLUX_OWNER }}'\n          repositories: '${{ env.FLUX_REPO }}'\n\n      - name: Checkout Flux repo\n        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0\n        with:\n          repository: '${{ env.FLUX_OWNER }}/${{ env.FLUX_REPO }}'\n          token: '${{ steps.app-token.outputs.token }}'\n          path: flux-repo\n          persist-credentials: false\n\n      - name: Export PR ID\n        shell: bash\n        run: 'echo \"PR_ID=${{ github.event.pull_request.number }}\" >> $GITHUB_ENV'\n\n      - name: Ensure template exists\n        shell: bash\n        run: >\n          T=\"flux-repo/pr-template/${APP_NAME}\"\n\n          [[ -d \"$T\" ]] || { echo \"Missing $T\"; exit 1; }\n\n          [[ $(find \"$T\" -type f | wc -l) -gt 0 ]] || { echo \"No files in $T\";\n          exit 1; }\n\n      - name: Render & copy template\n        shell: bash\n        run: |\n          SRC=\"flux-repo/pr-template/${APP_NAME}\"\n          DST=\"flux-repo/deployments/prs-${APP_NAME}/${PR_ID}\"\n          mkdir -p \"$DST\"\n          cp -r \"$SRC/.\" \"$DST/\"\n          find \"$DST\" -type f -print0 \\\n            | xargs -0 -n1 sed -i \"s|@PR-ID@|${PR_ID}|g\"\n\n      - name: Sanity‑check rendered output\n        shell: bash\n        run: >\n          E=$(find \"flux-repo/pr-template/${APP_NAME}\" -type f | wc -l)\n\n          G=$(find \"flux-repo/deployments/prs-${APP_NAME}/${PR_ID}\" -type f | wc\n          -l)\n\n          (( G == E )) || { echo \"Expected $E files, got $G\"; exit 1; }\n\n      - name: Commit & push creation\n        shell: bash\n        run: >\n          cd flux-repo\n\n          git config user.name  \"${{ steps.app-token.outputs.app-slug }}[bot]\"\n\n          git config user.email \"${{ steps.app-token.outputs.app-slug\n          }}[bot]@users.noreply.github.com\"\n\n          git add .\n\n          git commit -m \"chore(prs-${APP_NAME}): create temp env for PR #${{\n          env.PR_ID }} [skip ci]\" || echo \"Nothing to commit\"\n\n          git remote set-url origin \\\n            https://x-access-token:${{ steps.app-token.outputs.token }}@github.com/${{ env.FLUX_OWNER }}/${{ env.FLUX_REPO }}.git\n          git push origin HEAD:main\n\n      - name: Comment preview URL on PR\n        uses: thollander/actions-comment-pull-request@v3\n        with:\n          github-token: '${{ secrets.GITHUB_TOKEN }}'\n          pr-number: '${{ github.event.pull_request.number }}'\n          comment-tag: 'pr-preview'\n          create-if-not-exists: 'true'\n          message: |\n            🌐 [Preview environment](https://pr-${{ env.PR_ID }}.${{ env.APP_NAME }}.coderamp.dev/) for PR #${{ env.PR_ID }}\n\n            📊 [Log viewer](https://app.datadoghq.eu/logs?query=kube_namespace%3Aprs-gitingest%20version%3Apr-${{ env.PR_ID }})\n\n  remove-pr-env:\n    if: >-\n      (github.event.action == 'unlabeled' && github.event.label.name ==\n      'deploy-pr-temp-env') || (github.event.action == 'closed')\n    runs-on: ubuntu-latest\n    steps:\n      - name: Create GitHub App token\n        uses: actions/create-github-app-token@v2\n        id: app-token\n        with:\n          app-id: '${{ secrets.CR_APP_CI_APP_ID }}'\n          private-key: '${{ secrets.CR_APP_CI_PRIVATE_KEY }}'\n          owner: '${{ env.FLUX_OWNER }}'\n          repositories: '${{ env.FLUX_REPO }}'\n\n      - name: Checkout Flux repo\n        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0\n        with:\n          repository: '${{ env.FLUX_OWNER }}/${{ env.FLUX_REPO }}'\n          token: '${{ steps.app-token.outputs.token }}'\n          path: flux-repo\n          persist-credentials: false\n\n      - name: Export PR ID\n        shell: bash\n        run: 'echo \"PR_ID=${{ github.event.pull_request.number }}\" >> $GITHUB_ENV'\n\n      - name: Remove deployed directory\n        shell: bash\n        run: |\n          DST=\"flux-repo/deployments/prs-${APP_NAME}/${PR_ID}\"\n          if [[ -d \"$DST\" ]]; then\n            rm -rf \"$DST\"\n            echo \"✅ Deleted $DST\"\n          else\n            echo \"⏭️ Nothing to delete at $DST\"\n          fi\n\n      - name: Commit & push deletion\n        shell: bash\n        run: >\n          cd flux-repo\n\n          git config user.name  \"${{ steps.app-token.outputs.app-slug }}[bot]\"\n\n          git config user.email \"${{ steps.app-token.outputs.app-slug\n          }}[bot]@users.noreply.github.com\"\n\n          git add -A\n\n          git commit -m \"chore(prs-${APP_NAME}): remove temp env for PR #${{\n          env.PR_ID }} [skip ci]\" || echo \"Nothing to commit\"\n\n          git remote set-url origin \\\n            https://x-access-token:${{ steps.app-token.outputs.token }}@github.com/${{ env.FLUX_OWNER }}/${{ env.FLUX_REPO }}.git\n          git push origin HEAD:main\n\n      - name: Comment preview URL on PR\n        uses: thollander/actions-comment-pull-request@v3\n        with:\n          github-token: '${{ secrets.GITHUB_TOKEN }}'\n          pr-number: '${{ github.event.pull_request.number }}'\n          comment-tag: 'pr-preview'\n          create-if-not-exists: 'true'\n          message: |\n            ⚙️ Preview environment was undeployed.\n"
  },
  {
    "path": ".github/workflows/docker-build.ecr.yml",
    "content": "name: Build & Push Container\n\non:\n  push:\n    branches:\n      - 'main'\n    tags:\n      - '*'\n  merge_group:\n  pull_request:\n    types: [labeled, synchronize, reopened, ready_for_review, opened]\n\nenv:\n  PUSH_FROM_PR: >-\n    ${{ github.event_name == 'pull_request' &&\n       (\n         contains(github.event.pull_request.labels.*.name, 'push-container') ||\n         contains(github.event.pull_request.labels.*.name, 'deploy-pr-temp-env')\n       )\n    }}\n\njobs:\n  terraform:\n    name: \"ECR\"\n    runs-on: ubuntu-latest\n    if: github.repository == 'coderamp-labs/gitingest'\n\n    permissions:\n      id-token: write\n      contents: read\n      pull-requests: write\n\n    steps:\n      - name: Checkout\n        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0\n        with:\n          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}\n\n      - name: configure aws credentials\n        uses: aws-actions/configure-aws-credentials@v4\n        with:\n          role-to-assume: ${{ secrets.CODERAMP_AWS_ECR_REGISTRY_PUSH_ROLE_ARN }}\n          role-session-name: GitHub_to_AWS_via_FederatedOIDC\n          aws-region: eu-west-1\n\n      - name: Set current timestamp\n        id: vars\n        run: |\n          echo \"timestamp=$(date +%s)\" >> $GITHUB_OUTPUT\n          echo \"sha_short=$(git rev-parse --short HEAD)\" >> $GITHUB_OUTPUT\n          echo \"sha_full=$(git rev-parse HEAD)\" >> $GITHUB_OUTPUT\n\n      - name: Determine version and deployment context\n        id: version\n        run: |\n          REPO_URL=\"https://github.com/${{ github.repository }}\"\n\n          if [[ \"${{ github.ref_type }}\" == \"tag\" ]]; then\n            # Tag deployment - display version, link to release\n            echo \"version=${{ github.ref_name }}\" >> $GITHUB_OUTPUT\n            echo \"app_version=${{ github.ref_name }}\" >> $GITHUB_OUTPUT\n            echo \"app_version_url=${REPO_URL}/releases/tag/${{ github.ref_name }}\" >> $GITHUB_OUTPUT\n          elif [[ \"${{ github.event_name }}\" == \"pull_request\" ]]; then\n            # PR deployment - display pr-XXX, link to PR commit\n            PR_NUMBER=\"${{ github.event.pull_request.number }}\"\n            COMMIT_HASH=\"${{ steps.vars.outputs.sha_full }}\"\n            echo \"version=${PR_NUMBER}/merge-${COMMIT_HASH}\" >> $GITHUB_OUTPUT\n            echo \"app_version=pr-${PR_NUMBER}\" >> $GITHUB_OUTPUT\n            echo \"app_version_url=${REPO_URL}/pull/${PR_NUMBER}/commits/${COMMIT_HASH}\" >> $GITHUB_OUTPUT\n          else\n            # Branch deployment - display branch name, link to commit\n            BRANCH_NAME=\"${{ github.ref_name }}\"\n            COMMIT_HASH=\"${{ steps.vars.outputs.sha_full }}\"\n            echo \"app_version=${BRANCH_NAME}\" >> $GITHUB_OUTPUT\n            echo \"app_version_url=${REPO_URL}/commit/${COMMIT_HASH}\" >> $GITHUB_OUTPUT\n          fi\n\n      - name: Login to Amazon ECR\n        id: login-ecr\n        uses: aws-actions/amazon-ecr-login@v2\n\n      - name: Docker Meta\n        id: meta\n        uses: docker/metadata-action@v5\n        with:\n          images: |\n            ${{ secrets.ECR_REGISTRY_URL }}\n          flavor: |\n            latest=false\n          tags: |\n            type=ref,event=branch,branch=main,suffix=-${{ steps.vars.outputs.sha_short }}-${{ steps.vars.outputs.timestamp }}\n            type=ref,event=pr,suffix=-${{ steps.vars.outputs.sha_short }}-${{ steps.vars.outputs.timestamp }}\n            type=pep440,pattern={{raw}}\n\n      - name: Set up QEMU\n        uses: docker/setup-qemu-action@v3\n\n      - name: Set up Docker Buildx\n        uses: docker/setup-buildx-action@v3\n\n      - name: Build and push\n        uses: docker/build-push-action@v6\n        with:\n          context: .\n          platforms: linux/amd64, linux/arm64\n          push: ${{ github.event_name != 'pull_request' || env.PUSH_FROM_PR == 'true' }}\n          tags: ${{ steps.meta.outputs.tags }}\n          labels: ${{ steps.meta.outputs.labels }}\n          build-args: |\n            APP_REPOSITORY=https://github.com/${{ github.repository }}\n            APP_VERSION=${{ steps.version.outputs.app_version }}\n            APP_VERSION_URL=${{ steps.version.outputs.app_version_url }}\n          cache-from: type=gha\n          cache-to: type=gha,mode=max\n"
  },
  {
    "path": ".github/workflows/docker-build.ghcr.yml",
    "content": "name: Build & Push Container\n\non:\n  push:\n    branches:\n      - 'main'\n    tags:\n      - '*'\n  merge_group:\n  pull_request:\n    types: [labeled, synchronize, reopened, ready_for_review, opened]\n\nconcurrency:\n  group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}\n  cancel-in-progress: true\n\nenv:\n  REGISTRY: ghcr.io\n  IMAGE_NAME: ${{ github.repository }}\n  PUSH_FROM_PR: >-\n    ${{ github.event_name == 'pull_request' &&\n       (\n         contains(github.event.pull_request.labels.*.name, 'push-container') ||\n         contains(github.event.pull_request.labels.*.name, 'deploy-pr-temp-env')\n       )\n    }}\n\npermissions:\n  contents: read\n\njobs:\n  docker-build:\n    name: \"GHCR\"\n    runs-on: ubuntu-latest\n    permissions:\n      contents: read\n      packages: write\n      attestations: write\n      id-token: write\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@ec9f2d5744a09debf3a187a3f4f675c53b671911 # v2.13.0\n        with:\n          egress-policy: audit\n\n      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0\n        with:\n          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}\n\n      - name: Set current timestamp\n        id: vars\n        run: |\n          echo \"timestamp=$(date +%s)\" >> $GITHUB_OUTPUT\n          echo \"sha_short=$(git rev-parse --short HEAD)\" >> $GITHUB_OUTPUT\n          echo \"sha_full=$(git rev-parse HEAD)\" >> $GITHUB_OUTPUT\n\n      - name: Determine version and deployment context\n        id: version\n        run: |\n          REPO_URL=\"https://github.com/${{ github.repository }}\"\n\n          if [[ \"${{ github.ref_type }}\" == \"tag\" ]]; then\n            # Tag deployment - display version, link to release\n            echo \"version=${{ github.ref_name }}\" >> $GITHUB_OUTPUT\n            echo \"app_version=${{ github.ref_name }}\" >> $GITHUB_OUTPUT\n            echo \"app_version_url=${REPO_URL}/releases/tag/${{ github.ref_name }}\" >> $GITHUB_OUTPUT\n          elif [[ \"${{ github.event_name }}\" == \"pull_request\" ]]; then\n            # PR deployment - display pr-XXX, link to PR commit\n            PR_NUMBER=\"${{ github.event.pull_request.number }}\"\n            COMMIT_HASH=\"${{ steps.vars.outputs.sha_full }}\"\n            echo \"version=${PR_NUMBER}/merge-${COMMIT_HASH}\" >> $GITHUB_OUTPUT\n            echo \"app_version=pr-${PR_NUMBER}\" >> $GITHUB_OUTPUT\n            echo \"app_version_url=${REPO_URL}/pull/${PR_NUMBER}/commits/${COMMIT_HASH}\" >> $GITHUB_OUTPUT\n          else\n            # Branch deployment - display branch name, link to commit\n            BRANCH_NAME=\"${{ github.ref_name }}\"\n            COMMIT_HASH=\"${{ steps.vars.outputs.sha_full }}\"\n            echo \"app_version=${BRANCH_NAME}\" >> $GITHUB_OUTPUT\n            echo \"app_version_url=${REPO_URL}/commit/${COMMIT_HASH}\" >> $GITHUB_OUTPUT\n          fi\n\n      - name: Log in to the Container registry\n        uses: docker/login-action@184bdaa0721073962dff0199f1fb9940f07167d1 # v3.5.0\n        with:\n          registry: ${{ env.REGISTRY }}\n          username: ${{ github.actor }}\n          password: ${{ secrets.GITHUB_TOKEN }}\n\n      - name: Docker Meta\n        id: meta\n        uses: docker/metadata-action@c1e51972afc2121e065aed6d45c65596fe445f3f # v5.8.0\n        with:\n          images: |\n            ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}\n          flavor: |\n            latest=false\n          tags: |\n            type=ref,event=branch,branch=main\n            type=ref,event=branch,branch=main,suffix=-${{ steps.vars.outputs.sha_short }}-${{ steps.vars.outputs.timestamp }}\n            type=pep440,pattern={{raw}}\n            type=ref,event=pr,suffix=-${{ steps.vars.outputs.sha_short }}-${{ steps.vars.outputs.timestamp }}\n\n      - name: Set up QEMU\n        uses: docker/setup-qemu-action@29109295f81e9208d7d86ff1c6c12d2833863392 # v3.6.0\n\n      - name: Set up Docker Buildx\n        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1\n\n      - name: Build and push\n        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0\n        id: push\n        with:\n          context: .\n          platforms: linux/amd64, linux/arm64\n          push: ${{ github.event_name != 'pull_request' || env.PUSH_FROM_PR == 'true' }}\n          tags: ${{ steps.meta.outputs.tags }}\n          labels: ${{ steps.meta.outputs.labels }}\n          build-args: |\n            APP_REPOSITORY=https://github.com/${{ github.repository }}\n            APP_VERSION=${{ steps.version.outputs.app_version }}\n            APP_VERSION_URL=${{ steps.version.outputs.app_version_url }}\n          cache-from: type=gha\n          cache-to: type=gha,mode=max\n\n      - name: Generate artifact attestation\n        if: github.event_name != 'pull_request' || env.PUSH_FROM_PR == 'true'\n        uses: actions/attest-build-provenance@e8998f949152b193b063cb0ec769d69d929409be # v2.4.0\n        with:\n          subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME}}\n          subject-digest: ${{ steps.push.outputs.digest }}\n          push-to-registry: true\n"
  },
  {
    "path": ".github/workflows/pr-title-check.yml",
    "content": "name: PR Conventional Commit Validation\n\non:\n  pull_request:\n    types: [opened, synchronize, reopened, edited]\n\njobs:\n  validate-pr-title:\n    runs-on: ubuntu-latest\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@ec9f2d5744a09debf3a187a3f4f675c53b671911 # v2.13.0\n        with:\n          egress-policy: audit\n\n      - name: PR Conventional Commit Validation\n        uses:  ytanikin/pr-conventional-commits@b72758283dcbee706975950e96bc4bf323a8d8c0 # 1.4.2\n        with:\n          task_types: '[\"feat\",\"fix\",\"docs\",\"test\",\"ci\",\"refactor\",\"perf\",\"chore\",\"revert\"]'\n          add_label: 'false'\n"
  },
  {
    "path": ".github/workflows/publish_to_pypi.yml",
    "content": "name: Publish to PyPI\n\non:\n  release:\n    types: [created] # Run when you click \"Publish release\"\n  workflow_dispatch: # ... or run it manually from the Actions tab\n\npermissions:\n  contents: read\n\njobs:\n  release-build:\n    runs-on: ubuntu-latest\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@ec9f2d5744a09debf3a187a3f4f675c53b671911 # v2.13.0\n        with:\n          egress-policy: audit\n\n      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0\n\n      - name: Set up Python 3.13\n        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0\n        with:\n          python-version: \"3.13\"\n          cache: pip\n          cache-dependency-path: pyproject.toml\n\n      - name: Build package\n        run: |\n          python -m pip install --upgrade pip\n          python -m pip install build twine\n          python -m build\n          twine check dist/*\n      - name: Upload dist artefact\n        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2\n        with:\n          name: dist\n          path: dist/\n\n# Publish to PyPI (only if \"dist/\" succeeded)\n  pypi-publish:\n    needs: release-build\n    runs-on: ubuntu-latest\n    environment: pypi\n\n    permissions:\n      id-token: write # OIDC token for trusted publishing\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@ec9f2d5744a09debf3a187a3f4f675c53b671911 # v2.13.0\n        with:\n          egress-policy: audit\n\n      - uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0\n        with:\n          name: dist\n          path: dist/\n\n      - uses: pypa/gh-action-pypi-publish@76f52bc884231f62b9a034ebfe128415bbaabdfc # release/v1\n        with:\n          verbose: true\n"
  },
  {
    "path": ".github/workflows/rebase-needed.yml",
    "content": "name: PR Needs Rebase\n\non:\n  workflow_dispatch: {}\n  schedule:\n    - cron: '0 * * * *'\n\npermissions:\n  pull-requests: write\n\njobs:\n  label-rebase-needed:\n    runs-on: ubuntu-latest\n    if: github.repository == 'coderamp-labs/gitingest'\n\n    concurrency:\n      group: ${{ github.workflow }}-${{ github.ref }}\n      cancel-in-progress: true\n\n    steps:\n      - name: Check for merge conflicts\n        uses: eps1lon/actions-label-merge-conflict@v3\n        with:\n          dirtyLabel: 'rebase needed :construction:'\n          repoToken: '${{ secrets.GITHUB_TOKEN }}'\n          commentOnClean: This pull request has resolved merge conflicts and is ready for review.\n          commentOnDirty: This pull request has merge conflicts that must be resolved before it can be merged.\n          retryMax: 30\n          continueOnMissingPermissions: false\n"
  },
  {
    "path": ".github/workflows/release-please.yml",
    "content": "name: release-please\non:\n  push:\n    branches:\n      - main\n\npermissions:\n  contents: write\n  pull-requests: write\n\njobs:\n  release:\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0\n\n      - name: Create GitHub App token\n        uses: actions/create-github-app-token@v2\n        id: app-token\n        with:\n          app-id: '${{ secrets.CR_APP_CI_APP_ID }}'\n          private-key: '${{ secrets.CR_APP_CI_PRIVATE_KEY }}'\n          owner: '${{ env.FLUX_OWNER }}'\n          repositories: '${{ env.FLUX_REPO }}'\n\n      - name: Release Please\n        uses: googleapis/release-please-action@v4\n        with:\n          token: '${{ steps.app-token.outputs.token }}'\n"
  },
  {
    "path": ".github/workflows/scorecard.yml",
    "content": "name: OSSF Scorecard\non:\n  branch_protection_rule:\n  schedule:\n    - cron: '33 11 * * 2'  # Every Tuesday at 11:33 AM UTC\n  push:\n    branches: [ main ]\n\npermissions: read-all\n\nconcurrency: # avoid overlapping runs\n  group: scorecard-${{ github.ref }}\n  cancel-in-progress: true\n\njobs:\n  analysis:\n    name: Scorecard analysis\n    runs-on: ubuntu-latest\n    permissions:\n      security-events: write # upload SARIF to code-scanning\n      id-token: write # publish results for the badge\n\n    steps:\n      - name: Harden the runner (Audit all outbound calls)\n        uses: step-security/harden-runner@ec9f2d5744a09debf3a187a3f4f675c53b671911 # v2.13.0\n        with:\n          egress-policy: audit\n\n      - name: Checkout\n        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0\n        with:\n          persist-credentials: false\n\n      - name: Run Scorecard\n        uses: ossf/scorecard-action@f35c64557cf912815708bb1126d9948f3e459487\n        with:\n          results_file: results.sarif\n          results_format: sarif\n          publish_results: true  # enables the public badge\n\n      - name: Upload to code-scanning\n        uses: github/codeql-action/upload-sarif@df559355d593797519d70b90fc8edd5db049e7a2 # v3.29.9\n        with:\n          sarif_file: results.sarif\n"
  },
  {
    "path": ".github/workflows/stale.yml",
    "content": "name: \"Close stale issues and PRs\"\n\non:\n  schedule:\n    - cron: \"0 6 * * *\"\n  workflow_dispatch: {}\n\npermissions:\n  issues: write\n  pull-requests: write\n\njobs:\n  stale:\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/stale@v9\n        with:\n          repo-token: ${{ secrets.GITHUB_TOKEN }}\n          days-before-stale: 45\n          days-before-close: 10\n          stale-issue-label: stale\n          stale-pr-label: stale\n          stale-issue-message: |\n            Hi there! We haven’t seen activity here for 45 days, so I’m marking this issue as stale.\n            If you’d like to keep it open, please leave a comment within 10 days. Thanks!\n          stale-pr-message: |\n            Hi there! We haven’t seen activity on this pull request for 45 days, so I’m marking it as stale.\n            If you’d like to keep it open, please leave a comment within 10 days. Thanks!\n          close-issue-message: |\n            Hi there! We haven’t heard anything for 10 days, so I’m closing this issue. Feel free to reopen if you’d like to continue the discussion. Thanks!\n          close-pr-message: |\n            Hi there! We haven’t heard anything for 10 days, so I’m closing this pull request. Feel free to reopen if you’d like to continue working on it. Thanks!\n"
  },
  {
    "path": ".gitignore",
    "content": "# Operating-system\n.DS_Store\nThumbs.db\n\n# Editor / IDE settings\n.vscode/\n!.vscode/launch.json\n.idea/\n*.swp\n\n# Python virtual-envs & tooling\n.venv*/\nvenv/\n.python-version\n__pycache__/\n*.egg-info/\n*.egg\n.ruff_cache/\n\n# Test artifacts & coverage\n.pytest_cache/\n.coverage\ncoverage.xml\nhtmlcov/\n\n# Build, distribution & docs\nbuild/\ndist/\n*.wheel\n\n\n\n# Logs & runtime output\n*.log\nlogs/\n*.tmp\ntmp/\n\n# Project-specific files\nhistory.txt\ndigest.txt\n\n# Environment variables\n.env\n"
  },
  {
    "path": ".pre-commit-config.yaml",
    "content": "repos:\n  - repo: https://github.com/pre-commit/pre-commit-hooks\n    rev: v5.0.0\n    hooks:\n      - id: check-added-large-files\n        description: 'Prevent large files from being committed.'\n        args: ['--maxkb=10000']\n\n      - id: check-case-conflict\n        description: 'Check for files that would conflict in case-insensitive filesystems.'\n\n      - id: fix-byte-order-marker\n        description: 'Remove utf-8 byte order marker.'\n\n      - id: mixed-line-ending\n        description: 'Replace mixed line ending.'\n\n      - id: destroyed-symlinks\n        description: 'Detect symlinks which are changed to regular files with a content of a path which that symlink was pointing to.'\n\n      - id: check-ast\n        description: 'Check for parseable syntax.'\n\n      - id: end-of-file-fixer\n        description: 'Ensure that a file is either empty, or ends with one newline.'\n\n      - id: trailing-whitespace\n        description: 'Trim trailing whitespace.'\n        exclude: CHANGELOG.md\n\n      - id: check-docstring-first\n        description: 'Check a common error of defining a docstring after code.'\n\n      - id: requirements-txt-fixer\n        description: 'Sort entries in requirements.txt.'\n\n  - repo: https://github.com/MarcoGorelli/absolufy-imports\n    rev: v0.3.1\n    hooks:\n      - id: absolufy-imports\n        description: 'Automatically convert relative imports to absolute. (Use `args: [--never]` to revert.)'\n\n  - repo: https://github.com/asottile/pyupgrade\n    rev: v3.20.0\n    hooks:\n      - id: pyupgrade\n        description: 'Automatically upgrade syntax for newer versions.'\n        args: [--py3-plus, --py36-plus]\n\n  - repo: https://github.com/pre-commit/pygrep-hooks\n    rev: v1.10.0\n    hooks:\n      - id: python-check-blanket-noqa\n        description: 'Enforce that `# noqa` annotations always occur with specific codes.'\n\n      - id: python-check-blanket-type-ignore\n        description: 'Enforce that `# type: ignore` annotations always occur with specific codes.'\n\n      - id: python-use-type-annotations\n        description: 'Enforce that python3.6+ type annotations are used instead of type comments.'\n\n  - repo: https://github.com/PyCQA/isort\n    rev: 6.0.1\n    hooks:\n      - id: isort\n        description: 'Sort imports alphabetically, and automatically separated into sections and by type.'\n\n  - repo: https://github.com/pre-commit/mirrors-eslint\n    rev: v9.30.1\n    hooks:\n      - id: eslint\n        description: 'Lint javascript files.'\n        files: \\.js$\n        args: [--max-warnings=0, --fix]\n        additional_dependencies:\n          [\n            'eslint@9.30.1',\n            '@eslint/js@9.30.1',\n            'eslint-plugin-import@2.32.0',\n            'globals@16.3.0',\n          ]\n\n  - repo: https://github.com/djlint/djLint\n    rev: v1.36.4\n    hooks:\n      - id: djlint-reformat-jinja\n\n  - repo: https://github.com/igorshubovych/markdownlint-cli\n    rev: v0.45.0\n    hooks:\n      - id: markdownlint\n        description: 'Lint markdown files.'\n        args: ['--disable=line-length', '--ignore=CHANGELOG.md']\n\n  - repo: https://github.com/astral-sh/ruff-pre-commit\n    rev: v0.12.2\n    hooks:\n      - id: ruff-check\n      - id: ruff-format\n\n  - repo: https://github.com/jsh9/pydoclint\n    rev: 0.6.7\n    hooks:\n      - id: pydoclint\n        name: pydoclint for source\n        args: [--style=numpy]\n        files: ^src/\n\n  - repo: https://github.com/pycqa/pylint\n    rev: v3.3.7\n    hooks:\n      - id: pylint\n        name: pylint for source\n        files: ^src/\n        additional_dependencies:\n          [\n            boto3>=1.28.0,\n            click>=8.0.0,\n            'fastapi[standard]>=0.109.1',\n            gitpython>=3.1.0,\n            httpx,\n            loguru>=0.7.0,\n            pathspec>=0.12.1,\n            prometheus-client,\n            pydantic,\n            pytest-asyncio,\n            pytest-mock,\n            python-dotenv,\n            'sentry-sdk[fastapi]',\n            slowapi,\n            starlette>=0.40.0,\n            strenum; python_version < '3.11',\n            tiktoken>=0.7.0,\n            typing_extensions>= 4.0.0; python_version < '3.10',\n            uvicorn>=0.11.7,\n          ]\n\n      - id: pylint\n        name: pylint for tests\n        files: ^tests/\n        args:\n          - --rcfile=tests/.pylintrc\n        additional_dependencies:\n          [\n            boto3>=1.28.0,\n            click>=8.0.0,\n            'fastapi[standard]>=0.109.1',\n            gitpython>=3.1.0,\n            httpx,\n            loguru>=0.7.0,\n            pathspec>=0.12.1,\n            prometheus-client,\n            pydantic,\n            pytest-asyncio,\n            pytest-mock,\n            python-dotenv,\n            'sentry-sdk[fastapi]',\n            slowapi,\n            starlette>=0.40.0,\n            strenum; python_version < '3.11',\n            tiktoken>=0.7.0,\n            typing_extensions>= 4.0.0; python_version < '3.10',\n            uvicorn>=0.11.7,\n          ]\n\n  - repo: meta\n    hooks:\n      - id: check-hooks-apply\n      - id: check-useless-excludes\n  - repo: https://github.com/gitleaks/gitleaks\n    rev: v8.16.3\n    hooks:\n      - id: gitleaks\n"
  },
  {
    "path": ".release-please-manifest.json",
    "content": "{\".\":\"0.3.1\"}\n"
  },
  {
    "path": ".vscode/launch.json",
    "content": "{\n    \"configurations\": [\n        {\n            \"name\": \"Python Debugger: Module\",\n            \"type\": \"debugpy\",\n            \"request\": \"launch\",\n            \"module\": \"server\",\n            \"args\": [],\n            \"cwd\": \"${workspaceFolder}/src\"\n        }\n    ]\n}\n"
  },
  {
    "path": "CHANGELOG.md",
    "content": "# Changelog\n\n## [0.3.1](https://github.com/coderamp-labs/gitingest/compare/v0.3.0...v0.3.1) (2025-07-31)\n\n\n### Bug Fixes\n\n* make cache aware of subpaths ([#481](https://github.com/coderamp-labs/gitingest/issues/481)) ([8b59bef](https://github.com/coderamp-labs/gitingest/commit/8b59bef541f858ef44eba8fce6ace77df9dea01c))\n\n## [0.3.0](https://github.com/coderamp-labs/gitingest/compare/v0.2.1...v0.3.0) (2025-07-30)\n\n\n### Features\n\n* **logging:** implement loguru ([#473](https://github.com/coderamp-labs/gitingest/issues/473)) ([d061b48](https://github.com/coderamp-labs/gitingest/commit/d061b4877a253ba3f0480d329f025427c7f70177))\n* serve cached digest if available ([#462](https://github.com/coderamp-labs/gitingest/issues/462)) ([efe5a26](https://github.com/coderamp-labs/gitingest/commit/efe5a2686142b5ee4984061ebcec23c3bf3495d5))\n\n\n### Bug Fixes\n\n* handle network errors gracefully in token count estimation ([#437](https://github.com/coderamp-labs/gitingest/issues/437)) ([5fbb445](https://github.com/coderamp-labs/gitingest/commit/5fbb445cd8725e56972f43ec8b5e12cb299e9e83))\n* improved server side cleanup after ingest ([#477](https://github.com/coderamp-labs/gitingest/issues/477)) ([2df0eb4](https://github.com/coderamp-labs/gitingest/commit/2df0eb43989731ae40a9dd82d310ff76a794a46d))\n\n\n### Documentation\n\n* **contributing:** update PR title guidelines to enforce convention ([#476](https://github.com/coderamp-labs/gitingest/issues/476)) ([d1f8a80](https://github.com/coderamp-labs/gitingest/commit/d1f8a80826ca38ec105a1878742fe351d4939d6e))\n\n## [0.2.1](https://github.com/coderamp-labs/gitingest/compare/v0.2.0...v0.2.1) (2025-07-27)\n\n\n### Bug Fixes\n\n* remove logarithm conversion from the backend and correctly process max file size in kb ([#464](https://github.com/coderamp-labs/gitingest/issues/464)) ([932bfef](https://github.com/coderamp-labs/gitingest/commit/932bfef85db66704985c83f3f7c427756bd14023))\n\n## [0.2.0](https://github.com/coderamp-labs/gitingest/compare/v0.1.5...v0.2.0) (2025-07-26)\n\n### Features\n\n* `include_submodules` option ([#313](https://github.com/coderamp-labs/gitingest/issues/313)) ([38c2317](https://github.com/coderamp-labs/gitingest/commit/38c23171a14556a2cdd05c0af8219f4dc789defd))\n* add Tailwind CSS pipeline, tag-aware cloning & overhaul CI/CD ([#352](https://github.com/coderamp-labs/gitingest/issues/352)) ([b683e59](https://github.com/coderamp-labs/gitingest/commit/b683e59b5b1a31d27cc5c6ce8fb62da9b660613b))\n* add Tailwind CSS pipeline, tag-aware cloning & overhaul CI/CD ([#352](https://github.com/coderamp-labs/gitingest/issues/352)) ([016817d](https://github.com/coderamp-labs/gitingest/commit/016817d5590c1412498b7532f6e854d20239c6be))\n* **ci:** build Docker Image on PRs ([#382](https://github.com/coderamp-labs/gitingest/issues/382)) ([bc8cdb4](https://github.com/coderamp-labs/gitingest/commit/bc8cdb459482948c27e780b733ac7216d822529a))\n* implement prometheus exporter ([#406](https://github.com/coderamp-labs/gitingest/issues/406)) ([1016f6e](https://github.com/coderamp-labs/gitingest/commit/1016f6ecb3b1b066d541d1eba1ddffec49b15f16))\n* implement S3 integration for storing and retrieving digest files ([#427](https://github.com/coderamp-labs/gitingest/issues/427)) ([414e851](https://github.com/coderamp-labs/gitingest/commit/414e85189fb9055491530ba8c0665c798474451e))\n* integrate Sentry for error tracking and performance monitoring ([#408](https://github.com/coderamp-labs/gitingest/issues/408)) ([590e55a](https://github.com/coderamp-labs/gitingest/commit/590e55a4d28a4f5c0beafbd12c525828fa79e221))\n* Refactor backend to a rest api ([#346](https://github.com/coderamp-labs/gitingest/issues/346)) ([2b1f228](https://github.com/coderamp-labs/gitingest/commit/2b1f228ae1f6d1f7ee471794d258b13fcac25a96))\n* **ui:** add inline PAT info tooltip inside token field ([#348](https://github.com/coderamp-labs/gitingest/issues/348)) ([2592303](https://github.com/coderamp-labs/gitingest/commit/25923037ea6cd2f8ef33a6cf1f0406c2b4f0c9b6))\n\n\n### Bug Fixes\n\n* enable metrics if env var is defined instead of being \"True\" ([#407](https://github.com/coderamp-labs/gitingest/issues/407)) ([fa2e192](https://github.com/coderamp-labs/gitingest/commit/fa2e192c05864c8db90bda877e9efb9b03caf098))\n* fix docker container not launching ([#449](https://github.com/coderamp-labs/gitingest/issues/449)) ([998cea1](https://github.com/coderamp-labs/gitingest/commit/998cea15b4f79c5d6f840b5d3d916f83c8be3a07))\n* frontend directory tree ([#363](https://github.com/coderamp-labs/gitingest/issues/363)) ([0fcf8a9](https://github.com/coderamp-labs/gitingest/commit/0fcf8a956f7ec8403a025177f998f92ddee96de0))\n* gitignore and gitingestignore files are now correctly processed … ([#416](https://github.com/coderamp-labs/gitingest/issues/416)) ([74e503f](https://github.com/coderamp-labs/gitingest/commit/74e503fa1140feb74aa5350a32f0025c43097da1))\n* Potential fix for code scanning alert no. 75: Uncontrolled data used in path expression ([#421](https://github.com/coderamp-labs/gitingest/issues/421)) ([9ceaf6c](https://github.com/coderamp-labs/gitingest/commit/9ceaf6cbbb0cdefbc79f78c5285406b9188b2d3d))\n* reset pattern form when switching between include/exclude patterns ([#417](https://github.com/coderamp-labs/gitingest/issues/417)) ([7085e13](https://github.com/coderamp-labs/gitingest/commit/7085e138a74099b1df189b3bf9b8a333c8769380))\n* temp files cleanup after ingest([#309](https://github.com/coderamp-labs/gitingest/issues/309)) ([e669e44](https://github.com/coderamp-labs/gitingest/commit/e669e444fa1e6130f3f22952dd81f0ca3fe08fa5))\n* **ui:** update layout in PAT section to avoid overlaps & overflows ([#331](https://github.com/coderamp-labs/gitingest/issues/331)) ([b39ef54](https://github.com/coderamp-labs/gitingest/commit/b39ef5416c1f8a7993a8249161d2a898b7387595))\n* **windows:** warn if Git long path support is disabled, do not fail ([b8e375f](https://github.com/coderamp-labs/gitingest/commit/b8e375f71cae7d980cf431396c4414a6dbd0588c))\n\n\n### Documentation\n\n* add GitHub Issue Form for bug reports ([#403](https://github.com/coderamp-labs/gitingest/issues/403)) ([4546449](https://github.com/coderamp-labs/gitingest/commit/4546449bbc1e4a7ad0950c4b831b8855a98628fd))\n* add GitHub Issue Form for feature requests ([#404](https://github.com/coderamp-labs/gitingest/issues/404)) ([9b1fc58](https://github.com/coderamp-labs/gitingest/commit/9b1fc58900ae18a3416fe3cf9b5e301a65a8e9fd))\n* Fix CLI help text accuracy ([#332](https://github.com/coderamp-labs/gitingest/issues/332)) ([fdcbc53](https://github.com/coderamp-labs/gitingest/commit/fdcbc53cadde6a5dc3c3626120df1935b63693b2))\n\n\n### Code Refactoring\n\n* centralize PAT validation, streamline repo checks & misc cleanup ([#349](https://github.com/coderamp-labs/gitingest/issues/349)) ([cea0edd](https://github.com/coderamp-labs/gitingest/commit/cea0eddce8c6846bc6271cb3a8d15320e103214c))\n* centralize PAT validation, streamline repo checks & misc cleanup ([#349](https://github.com/coderamp-labs/gitingest/issues/349)) ([f8d397e](https://github.com/coderamp-labs/gitingest/commit/f8d397e66e3382d12f8a0ed05d291a39db830bda))\n"
  },
  {
    "path": "CODE_OF_CONDUCT.md",
    "content": "# Contributor Covenant Code of Conduct\n\n## Our Pledge\n\nWe as members, contributors, and leaders pledge to make participation in our\ncommunity a harassment-free experience for everyone, regardless of age, body\nsize, visible or invisible disability, ethnicity, sex characteristics, gender\nidentity and expression, level of experience, education, socio-economic status,\nnationality, personal appearance, race, religion, or sexual identity\nand orientation.\n\nWe pledge to act and interact in ways that contribute to an open, welcoming,\ndiverse, inclusive, and healthy community.\n\n## Our Standards\n\nExamples of behavior that contributes to a positive environment for our\ncommunity include:\n\n* Demonstrating empathy and kindness toward other people\n* Being respectful of differing opinions, viewpoints, and experiences\n* Giving and gracefully accepting constructive feedback\n* Accepting responsibility and apologizing to those affected by our mistakes,\n  and learning from the experience\n* Focusing on what is best not just for us as individuals, but for the\n  overall community\n\nExamples of unacceptable behavior include:\n\n* The use of sexualized language or imagery, and sexual attention or\n  advances of any kind\n* Trolling, insulting or derogatory comments, and personal or political attacks\n* Public or private harassment\n* Publishing others' private information, such as a physical or email\n  address, without their explicit permission\n* Other conduct which could reasonably be considered inappropriate in a\n  professional setting\n\n## Enforcement Responsibilities\n\nCommunity leaders are responsible for clarifying and enforcing our standards of\nacceptable behavior and will take appropriate and fair corrective action in\nresponse to any behavior that they deem inappropriate, threatening, offensive,\nor harmful.\n\nCommunity leaders have the right and responsibility to remove, edit, or reject\ncomments, commits, code, wiki edits, issues, and other contributions that are\nnot aligned to this Code of Conduct, and will communicate reasons for moderation\ndecisions when appropriate.\n\n## Scope\n\nThis Code of Conduct applies within all community spaces, and also applies when\nan individual is officially representing the community in public spaces.\nExamples of representing our community include using an official e-mail address,\nposting via an official social media account, or acting as an appointed\nrepresentative at an online or offline event.\n\n## Enforcement\n\nInstances of abusive, harassing, or otherwise unacceptable behavior may be\nreported to the community leaders responsible for enforcement at\n<romain@coderamp.io>.\nAll complaints will be reviewed and investigated promptly and fairly.\n\nAll community leaders are obligated to respect the privacy and security of the\nreporter of any incident.\n\n## Enforcement Guidelines\n\nCommunity leaders will follow these Community Impact Guidelines in determining\nthe consequences for any action they deem in violation of this Code of Conduct:\n\n### 1. Correction\n\n**Community Impact**: Use of inappropriate language or other behavior deemed\nunprofessional or unwelcome in the community.\n\n**Consequence**: A private, written warning from community leaders, providing\nclarity around the nature of the violation and an explanation of why the\nbehavior was inappropriate. A public apology may be requested.\n\n### 2. Warning\n\n**Community Impact**: A violation through a single incident or series\nof actions.\n\n**Consequence**: A warning with consequences for continued behavior. No\ninteraction with the people involved, including unsolicited interaction with\nthose enforcing the Code of Conduct, for a specified period of time. This\nincludes avoiding interactions in community spaces as well as external channels\nlike social media. Violating these terms may lead to a temporary or\npermanent ban.\n\n### 3. Temporary Ban\n\n**Community Impact**: A serious violation of community standards, including\nsustained inappropriate behavior.\n\n**Consequence**: A temporary ban from any sort of interaction or public\ncommunication with the community for a specified period of time. No public or\nprivate interaction with the people involved, including unsolicited interaction\nwith those enforcing the Code of Conduct, is allowed during this period.\nViolating these terms may lead to a permanent ban.\n\n### 4. Permanent Ban\n\n**Community Impact**: Demonstrating a pattern of violation of community\nstandards, including sustained inappropriate behavior,  harassment of an\nindividual, or aggression toward or disparagement of classes of individuals.\n\n**Consequence**: A permanent ban from any sort of public interaction within\nthe community.\n\n## Attribution\n\nThis Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org),\nversion 2.0, available at\n<https://www.contributor-covenant.org/version/2/0/code_of_conduct.html>.\n\nCommunity Impact Guidelines were inspired by [Mozilla's code of conduct\nenforcement ladder](https://github.com/mozilla/diversity).\n\nFor answers to common questions about this code of conduct, see the FAQ at\n<https://www.contributor-covenant.org/faq>. Translations are available at\n<https://www.contributor-covenant.org/translations>.\n"
  },
  {
    "path": "CONTRIBUTING.md",
    "content": "# Contributing to Gitingest\n\nThanks for your interest in contributing to **Gitingest** 🚀 Our goal is to keep the codebase friendly to first-time\ncontributors.\nIf you ever get stuck, reach out on [Discord](https://discord.com/invite/zerRaGK9EC).\n\n---\n\n## How to Contribute (non-technical)\n\n- **Create an Issue** – found a bug or have a feature idea?\n  [Open an issue](https://github.com/coderamp-labs/gitingest/issues/new).\n- **Spread the Word** – tweet, blog, or tell a friend.\n- **Use Gitingest** – real-world usage gives the best feedback. File issues or ping us\n  on [Discord](https://discord.com/invite/zerRaGK9EC) with anything you notice.\n\n---\n\n## How to submit a Pull Request\n\n> **Prerequisites**: The project uses **Python 3.9+** and `pre-commit` for development.\n\n1. **Fork** the repository.\n\n2. **Clone** your fork:\n\n   ```bash\n   git clone https://github.com/coderamp-labs/gitingest.git\n   cd gitingest\n   ```\n\n3. **Set up the dev environment**:\n\n   ```bash\n   python -m venv .venv\n   source .venv/bin/activate\n   pip install -e \".[dev,server]\"\n   pre-commit install\n   ```\n\n4. **Create a branch** for your changes:\n\n   ```bash\n   git checkout -b your-branch\n   ```\n\n5. **Make your changes** (and add tests when relevant).\n\n6. **Stage** the changes:\n\n   ```bash\n   git add .\n   ```\n\n7. **Run the backend test suite**:\n\n   ```bash\n   pytest\n   ```\n\n8. *(Optional)* **Run `pre-commit` on all files** to check hooks without committing:\n\n   ```bash\n   pre-commit run --all-files\n   ```\n\n9. **Run the local server** to sanity-check:\n\n    ```bash\n    python -m server\n    ```\n\n   Open [http://localhost:8000](http://localhost:8000) to confirm everything works.\n\n10. **Commit** (signed):\n\n    ```bash\n    git commit -S -m \"Your commit message\"\n    ```\n\n    If *pre-commit* complains, fix the problems and repeat **5 – 9**.\n\n11. **Push** your branch:\n\n    ```bash\n    git push origin your-branch\n    ```\n\n12. **Open a pull request** on GitHub with a clear description.\n\n    > **Important:** Pull request titles **must follow\n    the [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/) specification**. This helps with\n    changelogs and automated releases.\n\n13. **Iterate** on any review feedback—update your branch and repeat **6 – 11** as needed.\n\n*(Optional) Invite a maintainer to your branch for easier collaboration.*\n"
  },
  {
    "path": "Dockerfile",
    "content": "# Stage 1: Install Python dependencies\nFROM python:3.13.5-slim@sha256:4c2cf9917bd1cbacc5e9b07320025bdb7cdf2df7b0ceaccb55e9dd7e30987419 AS python-builder\n\nWORKDIR /build\n\nRUN set -eux; \\\n    apt-get update; \\\n    apt-get install -y --no-install-recommends gcc python3-dev; \\\n    rm -rf /var/lib/apt/lists/*\n\nCOPY pyproject.toml .\nCOPY src/ ./src/\n\nRUN set -eux; \\\n    pip install --no-cache-dir --upgrade pip; \\\n    pip install --no-cache-dir --timeout 1000 .[server,mcp]\n\n# Stage 2: Runtime image\nFROM python:3.13.5-slim@sha256:4c2cf9917bd1cbacc5e9b07320025bdb7cdf2df7b0ceaccb55e9dd7e30987419\n\nARG UID=1000\nARG GID=1000\nARG APP_REPOSITORY=https://github.com/coderamp-labs/gitingest\nARG APP_VERSION=unknown\nARG APP_VERSION_URL=https://github.com/coderamp-labs/gitingest\n\nENV PYTHONUNBUFFERED=1 \\\n    PYTHONDONTWRITEBYTECODE=1 \\\n    APP_REPOSITORY=${APP_REPOSITORY} \\\n    APP_VERSION=${APP_VERSION} \\\n    APP_VERSION_URL=${APP_VERSION_URL}\n\nRUN set -eux; \\\n    apt-get update; \\\n    apt-get install -y --no-install-recommends git curl; \\\n    apt-get clean; \\\n    rm -rf /var/lib/apt/lists/*\n\nWORKDIR /app\nRUN set -eux; \\\n    groupadd -g \"$GID\" appuser; \\\n    useradd -m -u \"$UID\" -g \"$GID\" appuser\n\nCOPY --from=python-builder --chown=$UID:$GID /usr/local/lib/python3.13/site-packages/ /usr/local/lib/python3.13/site-packages/\nCOPY --chown=$UID:$GID src/ ./\n\nRUN set -eux; \\\n    chown -R appuser:appuser /app\nUSER appuser\n\nEXPOSE 8000\nEXPOSE 9090\nCMD [\"python\", \"-m\", \"server\"]\n"
  },
  {
    "path": "LICENSE",
    "content": "MIT License\n\nCopyright (c) 2024 Romain Courtois\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n"
  },
  {
    "path": "README.md",
    "content": "# Gitingest\n\n[![Screenshot of Gitingest front page](https://raw.githubusercontent.com/coderamp-labs/gitingest/refs/heads/main/docs/frontpage.png)](https://gitingest.com)\n\n<!-- Badges -->\n<!-- markdownlint-disable MD033 -->\n<p align=\"center\">\n  <!-- row 1 — install & compat -->\n  <a href=\"https://pypi.org/project/gitingest\"><img src=\"https://img.shields.io/pypi/v/gitingest.svg\" alt=\"PyPI\"></a>\n  <a href=\"https://pypi.org/project/gitingest\"><img src=\"https://img.shields.io/pypi/pyversions/gitingest.svg\" alt=\"Python Versions\"></a>\n  <br>\n  <!-- row 2 — quality & community -->\n  <a href=\"https://github.com/coderamp-labs/gitingest/actions/workflows/ci.yml?query=branch%3Amain\"><img src=\"https://github.com/coderamp-labs/gitingest/actions/workflows/ci.yml/badge.svg?branch=main\" alt=\"CI\"></a>\n\n  <a href=\"https://github.com/astral-sh/ruff\"><img src=\"https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json\" alt=\"Ruff\"></a>\n  <a href=\"https://scorecard.dev/viewer/?uri=github.com/coderamp-labs/gitingest\"><img src=\"https://api.scorecard.dev/projects/github.com/coderamp-labs/gitingest/badge\" alt=\"OpenSSF Scorecard\"></a>\n  <br>\n  <a href=\"https://github.com/coderamp-labs/gitingest/blob/main/LICENSE\"><img src=\"https://img.shields.io/github/license/coderamp-labs/gitingest.svg\" alt=\"License\"></a>\n  <a href=\"https://pepy.tech/project/gitingest\"><img src=\"https://pepy.tech/badge/gitingest\" alt=\"Downloads\"></a>\n  <a href=\"https://github.com/coderamp-labs/gitingest\"><img src=\"https://img.shields.io/github/stars/coderamp-labs/gitingest\" alt=\"GitHub Stars\"></a>\n  <a href=\"https://discord.com/invite/zerRaGK9EC\"><img src=\"https://img.shields.io/badge/Discord-Join_chat-5865F2?logo=discord&logoColor=white\" alt=\"Discord\"></a>\n  <br>\n  <a href=\"https://trendshift.io/repositories/13519\"><img src=\"https://trendshift.io/api/badge/repositories/13519\" alt=\"Trendshift\" height=\"50\"></a>\n</p>\n<!-- markdownlint-enable MD033 -->\n\nTurn any Git repository into a prompt-friendly text ingest for LLMs.\n\nYou can also replace `hub` with `ingest` in any GitHub URL to access the corresponding digest.\n\n<!-- Extensions -->\n[gitingest.com](https://gitingest.com) · [Chrome Extension](https://chromewebstore.google.com/detail/adfjahbijlkjfoicpjkhjicpjpjfaood) · [Firefox Add-on](https://addons.mozilla.org/firefox/addon/gitingest)\n\n<!-- Languages -->\n[Deutsch](https://www.readme-i18n.com/coderamp-labs/gitingest?lang=de) |\n[Español](https://www.readme-i18n.com/coderamp-labs/gitingest?lang=es) |\n[Français](https://www.readme-i18n.com/coderamp-labs/gitingest?lang=fr) |\n[日本語](https://www.readme-i18n.com/coderamp-labs/gitingest?lang=ja) |\n[한국어](https://www.readme-i18n.com/coderamp-labs/gitingest?lang=ko) |\n[Português](https://www.readme-i18n.com/coderamp-labs/gitingest?lang=pt) |\n[Русский](https://www.readme-i18n.com/coderamp-labs/gitingest?lang=ru) |\n[中文](https://www.readme-i18n.com/coderamp-labs/gitingest?lang=zh)\n\n## 🚀 Features\n\n- **Easy code context**: Get a text digest from a Git repository URL or a directory\n- **Smart Formatting**: Optimized output format for LLM prompts\n- **Statistics about**:\n  - File and directory structure\n  - Size of the extract\n  - Token count\n- **CLI tool**: Run it as a shell command\n- **Python package**: Import it in your code\n\n## 📚 Requirements\n\n- Python 3.8+\n- For private repositories: A GitHub Personal Access Token (PAT). [Generate your token **here**!](https://github.com/settings/tokens/new?description=gitingest&scopes=repo)\n\n### 📦 Installation\n\nGitingest is available on [PyPI](https://pypi.org/project/gitingest/).\nYou can install it using `pip`:\n\n```bash\npip install gitingest\n```\n\nor\n\n```bash\npip install gitingest[server]\n```\n\nto include server dependencies for self-hosting.\n\nHowever, it might be a good idea to use `pipx` to install it.\nYou can install `pipx` using your preferred package manager.\n\n```bash\nbrew install pipx\napt install pipx\nscoop install pipx\n...\n```\n\nIf you are using pipx for the first time, run:\n\n```bash\npipx ensurepath\n```\n\n```bash\n# install gitingest\npipx install gitingest\n```\n\n## 🧩 Browser Extension Usage\n\n<!-- markdownlint-disable MD033 -->\n<a href=\"https://chromewebstore.google.com/detail/adfjahbijlkjfoicpjkhjicpjpjfaood\" target=\"_blank\" title=\"Get Gitingest Extension from Chrome Web Store\"><img height=\"48\" src=\"https://github.com/user-attachments/assets/20a6e44b-fd46-4e6c-8ea6-aad436035753\" alt=\"Available in the Chrome Web Store\" /></a>\n<a href=\"https://addons.mozilla.org/firefox/addon/gitingest\" target=\"_blank\" title=\"Get Gitingest Extension from Firefox Add-ons\"><img height=\"48\" src=\"https://github.com/user-attachments/assets/c0e99e6b-97cf-4af2-9737-099db7d3538b\" alt=\"Get The Add-on for Firefox\" /></a>\n<a href=\"https://microsoftedge.microsoft.com/addons/detail/nfobhllgcekbmpifkjlopfdfdmljmipf\" target=\"_blank\" title=\"Get Gitingest Extension from Microsoft Edge Add-ons\"><img height=\"48\" src=\"https://github.com/user-attachments/assets/204157eb-4cae-4c0e-b2cb-db514419fd9e\" alt=\"Get from the Edge Add-ons\" /></a>\n<!-- markdownlint-enable MD033 -->\n\nThe extension is open source at [lcandy2/gitingest-extension](https://github.com/lcandy2/gitingest-extension).\n\nIssues and feature requests are welcome to the repo.\n\n## 💡 Command line usage\n\nThe `gitingest` command line tool allows you to analyze codebases and create a text dump of their contents.\n\n```bash\n# Basic usage (writes to digest.txt by default)\ngitingest /path/to/directory\n\n# From URL\ngitingest https://github.com/coderamp-labs/gitingest\n\n# or from specific subdirectory\ngitingest https://github.com/coderamp-labs/gitingest/tree/main/src/gitingest/utils\n```\n\nFor private repositories, use the `--token/-t` option.\n\n```bash\n# Get your token from https://github.com/settings/personal-access-tokens\ngitingest https://github.com/username/private-repo --token github_pat_...\n\n# Or set it as an environment variable\nexport GITHUB_TOKEN=github_pat_...\ngitingest https://github.com/username/private-repo\n\n# Include repository submodules\ngitingest https://github.com/username/repo-with-submodules --include-submodules\n```\n\nBy default, files listed in `.gitignore` are skipped. Use `--include-gitignored` if you\nneed those files in the digest.\n\nBy default, the digest is written to a text file (`digest.txt`) in your current working directory. You can customize the output in two ways:\n\n- Use `--output/-o <filename>` to write to a specific file.\n- Use `--output/-o -` to output directly to `STDOUT` (useful for piping to other tools).\n\nSee more options and usage details with:\n\n```bash\ngitingest --help\n```\n\n## 🐍 Python package usage\n\n```python\n# Synchronous usage\nfrom gitingest import ingest\n\nsummary, tree, content = ingest(\"path/to/directory\")\n\n# or from URL\nsummary, tree, content = ingest(\"https://github.com/coderamp-labs/gitingest\")\n\n# or from a specific subdirectory\nsummary, tree, content = ingest(\"https://github.com/coderamp-labs/gitingest/tree/main/src/gitingest/utils\")\n```\n\nFor private repositories, you can pass a token:\n\n```python\n# Using token parameter\nsummary, tree, content = ingest(\"https://github.com/username/private-repo\", token=\"github_pat_...\")\n\n# Or set it as an environment variable\nimport os\nos.environ[\"GITHUB_TOKEN\"] = \"github_pat_...\"\nsummary, tree, content = ingest(\"https://github.com/username/private-repo\")\n\n# Include repository submodules\nsummary, tree, content = ingest(\"https://github.com/username/repo-with-submodules\", include_submodules=True)\n```\n\nBy default, this won't write a file but can be enabled with the `output` argument.\n\n```python\n# Asynchronous usage\nfrom gitingest import ingest_async\nimport asyncio\n\nresult = asyncio.run(ingest_async(\"path/to/directory\"))\n```\n\n### Jupyter notebook usage\n\n```python\nfrom gitingest import ingest_async\n\n# Use await directly in Jupyter\nsummary, tree, content = await ingest_async(\"path/to/directory\")\n\n```\n\nThis is because Jupyter notebooks are asynchronous by default.\n\n## 🐳 Self-host\n\n### Using Docker\n\n1. Build the image:\n\n   ``` bash\n   docker build -t gitingest .\n   ```\n\n2. Run the container:\n\n   ``` bash\n   docker run -d --name gitingest -p 8000:8000 gitingest\n   ```\n\nThe application will be available at `http://localhost:8000`.\n\nIf you are hosting it on a domain, you can specify the allowed hostnames via env variable `ALLOWED_HOSTS`.\n\n   ```bash\n   # Default: \"gitingest.com, *.gitingest.com, localhost, 127.0.0.1\".\n   ALLOWED_HOSTS=\"example.com, localhost, 127.0.0.1\"\n   ```\n\n### Environment Variables\n\nThe application can be configured using the following environment variables:\n\n- **ALLOWED_HOSTS**: Comma-separated list of allowed hostnames (default: \"gitingest.com, *.gitingest.com, localhost, 127.0.0.1\")\n- **GITINGEST_METRICS_ENABLED**: Enable Prometheus metrics server (set to any value to enable)\n- **GITINGEST_METRICS_HOST**: Host for the metrics server (default: \"127.0.0.1\")\n- **GITINGEST_METRICS_PORT**: Port for the metrics server (default: \"9090\")\n- **GITINGEST_SENTRY_ENABLED**: Enable Sentry error tracking (set to any value to enable)\n- **GITINGEST_SENTRY_DSN**: Sentry DSN (required if Sentry is enabled)\n- **GITINGEST_SENTRY_TRACES_SAMPLE_RATE**: Sampling rate for performance data (default: \"1.0\", range: 0.0-1.0)\n- **GITINGEST_SENTRY_PROFILE_SESSION_SAMPLE_RATE**: Sampling rate for profile sessions (default: \"1.0\", range: 0.0-1.0)\n- **GITINGEST_SENTRY_PROFILE_LIFECYCLE**: Profile lifecycle mode (default: \"trace\")\n- **GITINGEST_SENTRY_SEND_DEFAULT_PII**: Send default personally identifiable information (default: \"true\")\n- **S3_ALIAS_HOST**: Public URL/CDN for accessing S3 resources (default: \"127.0.0.1:9000/gitingest-bucket\")\n- **S3_DIRECTORY_PREFIX**: Optional prefix for S3 file paths (if set, prefixes all S3 paths with this value)\n\n### Using Docker Compose\n\nThe project includes a `compose.yml` file that allows you to easily run the application in both development and production environments.\n\n#### Compose File Structure\n\nThe `compose.yml` file uses YAML anchoring with `&app-base` and `<<: *app-base` to define common configuration that is shared between services:\n\n```yaml\n# Common base configuration for all services\nx-app-base: &app-base\n  build:\n    context: .\n    dockerfile: Dockerfile\n  ports:\n    - \"${APP_WEB_BIND:-8000}:8000\"  # Main application port\n    - \"${GITINGEST_METRICS_HOST:-127.0.0.1}:${GITINGEST_METRICS_PORT:-9090}:9090\"  # Metrics port\n  # ... other common configurations\n```\n\n#### Services\n\nThe file defines three services:\n\n1. **app**: Production service configuration\n   - Uses the `prod` profile\n   - Sets the Sentry environment to \"production\"\n   - Configured for stable operation with `restart: unless-stopped`\n\n2. **app-dev**: Development service configuration\n   - Uses the `dev` profile\n   - Enables debug mode\n   - Mounts the source code for live development\n   - Uses hot reloading for faster development\n\n3. **minio**: S3-compatible object storage for development\n   - Uses the `dev` profile (only available in development mode)\n   - Provides S3-compatible storage for local development\n   - Accessible via:\n     - API: Port 9000 ([localhost:9000](http://localhost:9000))\n     - Web Console: Port 9001 ([localhost:9001](http://localhost:9001))\n   - Default admin credentials:\n     - Username: `minioadmin`\n     - Password: `minioadmin`\n   - Configurable via environment variables:\n     - `MINIO_ROOT_USER`: Custom admin username (default: minioadmin)\n     - `MINIO_ROOT_PASSWORD`: Custom admin password (default: minioadmin)\n   - Includes persistent storage via Docker volume\n   - Auto-creates a bucket and application-specific credentials:\n     - Bucket name: `gitingest-bucket` (configurable via `S3_BUCKET_NAME`)\n     - Access key: `gitingest` (configurable via `S3_ACCESS_KEY`)\n     - Secret key: `gitingest123` (configurable via `S3_SECRET_KEY`)\n   - These credentials are automatically passed to the app-dev service via environment variables:\n     - `S3_ENDPOINT`: URL of the MinIO server\n     - `S3_ACCESS_KEY`: Access key for the S3 bucket\n     - `S3_SECRET_KEY`: Secret key for the S3 bucket\n     - `S3_BUCKET_NAME`: Name of the S3 bucket\n     - `S3_REGION`: Region for the S3 bucket (default: us-east-1)\n     - `S3_ALIAS_HOST`: Public URL/CDN for accessing S3 resources (default: \"127.0.0.1:9000/gitingest-bucket\")\n\n#### Usage Examples\n\nTo run the application in development mode:\n\n```bash\ndocker compose --profile dev up\n```\n\nTo run the application in production mode:\n\n```bash\ndocker compose --profile prod up -d\n```\n\nTo build and run the application:\n\n```bash\ndocker compose --profile prod build\ndocker compose --profile prod up -d\n```\n\n## 🤝 Contributing\n\n### Non-technical ways to contribute\n\n- **Create an Issue**: If you find a bug or have an idea for a new feature, please [create an issue](https://github.com/coderamp-labs/gitingest/issues/new) on GitHub. This will help us track and prioritize your request.\n- **Spread the Word**: If you like Gitingest, please share it with your friends, colleagues, and on social media. This will help us grow the community and make Gitingest even better.\n- **Use Gitingest**: The best feedback comes from real-world usage! If you encounter any issues or have ideas for improvement, please let us know by [creating an issue](https://github.com/coderamp-labs/gitingest/issues/new) on GitHub or by reaching out to us on [Discord](https://discord.com/invite/zerRaGK9EC).\n\n### Technical ways to contribute\n\nGitingest aims to be friendly for first time contributors, with a simple Python and HTML codebase. If you need any help while working with the code, reach out to us on [Discord](https://discord.com/invite/zerRaGK9EC). For detailed instructions on how to make a pull request, see [CONTRIBUTING.md](./CONTRIBUTING.md).\n\n## 🛠️ Stack\n\n- [Tailwind CSS](https://tailwindcss.com) - Frontend\n- [FastAPI](https://github.com/fastapi/fastapi) - Backend framework\n- [Jinja2](https://jinja.palletsprojects.com) - HTML templating\n- [tiktoken](https://github.com/openai/tiktoken) - Token estimation\n- [posthog](https://github.com/PostHog/posthog) - Amazing analytics\n- [Sentry](https://sentry.io) - Error tracking and performance monitoring\n\n### Looking for a JavaScript/FileSystemNode package?\n\nCheck out the NPM alternative 📦 Repomix: <https://github.com/yamadashy/repomix>\n\n## 🚀 Project Growth\n\n[![Star History Chart](https://api.star-history.com/svg?repos=coderamp-labs/gitingest&type=Date)](https://star-history.com/#coderamp-labs/gitingest&Date)\n"
  },
  {
    "path": "SECURITY.md",
    "content": "# Security Policy\n\n## Reporting a Vulnerability\n\nIf you have discovered a vulnerability inside the project, report it privately at <romain@coderamp.io>. This way the maintainer can work on a proper fix without disclosing the problem to the public before it has been solved.\n"
  },
  {
    "path": "compose.yml",
    "content": "x-base-environment: &base-environment\n  # Python Configuration\n  PYTHONUNBUFFERED: \"1\"\n  PYTHONDONTWRITEBYTECODE: \"1\"\n  # Host Configuration\n  ALLOWED_HOSTS: ${ALLOWED_HOSTS:-gitingest.com,*.gitingest.com,localhost,127.0.0.1}\n  # Metrics Configuration\n  GITINGEST_METRICS_ENABLED: ${GITINGEST_METRICS_ENABLED:-true}\n  GITINGEST_METRICS_HOST: ${GITINGEST_METRICS_HOST:-0.0.0.0}\n  GITINGEST_METRICS_PORT: ${GITINGEST_METRICS_PORT:-9090}\n  # Sentry Configuration\n  GITINGEST_SENTRY_ENABLED: ${GITINGEST_SENTRY_ENABLED:-false}\n  GITINGEST_SENTRY_DSN: ${GITINGEST_SENTRY_DSN:-}\n  GITINGEST_SENTRY_TRACES_SAMPLE_RATE: ${GITINGEST_SENTRY_TRACES_SAMPLE_RATE:-1.0}\n  GITINGEST_SENTRY_PROFILE_SESSION_SAMPLE_RATE: ${GITINGEST_SENTRY_PROFILE_SESSION_SAMPLE_RATE:-1.0}\n  GITINGEST_SENTRY_PROFILE_LIFECYCLE: ${GITINGEST_SENTRY_PROFILE_LIFECYCLE:-trace}\n  GITINGEST_SENTRY_SEND_DEFAULT_PII: ${GITINGEST_SENTRY_SEND_DEFAULT_PII:-true}\n\nx-prod-environment: &prod-environment\n  GITINGEST_SENTRY_ENVIRONMENT: ${GITINGEST_SENTRY_ENVIRONMENT:-production}\n\nx-dev-environment: &dev-environment\n  DEBUG: \"true\"\n  LOG_LEVEL: \"DEBUG\"\n  RELOAD: \"true\"\n  GITINGEST_SENTRY_ENVIRONMENT: ${GITINGEST_SENTRY_ENVIRONMENT:-development}\n  # S3 Configuration for development\n  S3_ENABLED: \"true\"\n  S3_ENDPOINT: http://minio:9000\n  S3_ACCESS_KEY: ${S3_ACCESS_KEY:-gitingest}\n  S3_SECRET_KEY: ${S3_SECRET_KEY:-gitingest123}\n  S3_BUCKET_NAME: ${S3_BUCKET_NAME:-gitingest-bucket}\n  S3_REGION: ${S3_REGION:-us-east-1}\n  S3_DIRECTORY_PREFIX: ${S3_DIRECTORY_PREFIX:-dev}\n  S3_ALIAS_HOST: ${S3_ALIAS_HOST:-http://127.0.0.1:9000/${S3_BUCKET_NAME:-gitingest-bucket}}\n\nx-app-base: &app-base\n  ports:\n    - \"${APP_WEB_BIND:-8000}:8000\"  # Main application port\n    - \"${GITINGEST_METRICS_HOST:-127.0.0.1}:${GITINGEST_METRICS_PORT:-9090}:9090\"  # Metrics port\n  user: \"1000:1000\"\n  command: [\"python\", \"-m\", \"server\"]\n\nservices:\n  # Production service configuration\n  app:\n    <<: *app-base\n    image: ghcr.io/coderamp-labs/gitingest:latest\n    profiles:\n      - prod\n    environment:\n      <<: [*base-environment, *prod-environment]\n    restart: unless-stopped\n\n  # Development service configuration\n  app-dev:\n    <<: *app-base\n    build:\n      context: .\n      dockerfile: Dockerfile\n    profiles:\n      - dev\n    environment:\n      <<: [*base-environment, *dev-environment]\n    volumes:\n      # Mount source code for live development\n      - ./src:/app:ro\n    # Use --reload flag for hot reloading during development\n    command: [\"python\", \"-m\", \"server\"]\n    depends_on:\n      minio-setup:\n        condition: service_completed_successfully\n\n  # MinIO S3-compatible object storage for development\n  minio:\n    image: minio/minio:latest\n    profiles:\n      - dev\n    ports:\n      - \"9000:9000\"  # API port\n      - \"9001:9001\"  # Console port\n    environment: &minio-environment\n      MINIO_ROOT_USER: ${MINIO_ROOT_USER:-minioadmin}\n      MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD:-minioadmin}\n    volumes:\n      - minio-data:/data\n    command: server /data --console-address \":9001\"\n    restart: unless-stopped\n    healthcheck:\n      test: [\"CMD\", \"curl\", \"-f\", \"http://localhost:9000/minio/health/live\"]\n      interval: 30s\n      timeout: 30s\n      start_period: 30s\n      start_interval: 1s\n\n  # MinIO setup service to create bucket and user\n  minio-setup:\n    image: minio/mc\n    profiles:\n      - dev\n    depends_on:\n      minio:\n        condition: service_healthy\n    environment:\n      <<: *minio-environment\n      S3_ACCESS_KEY: ${S3_ACCESS_KEY:-gitingest}\n      S3_SECRET_KEY: ${S3_SECRET_KEY:-gitingest123}\n      S3_BUCKET_NAME: ${S3_BUCKET_NAME:-gitingest-bucket}\n    volumes:\n      - ./.docker/minio/setup.sh:/setup.sh:ro\n    entrypoint: sh\n    command: -c /setup.sh\n\nvolumes:\n  minio-data:\n    driver: local\n"
  },
  {
    "path": "eslint.config.cjs",
    "content": "const js = require('@eslint/js');\nconst globals = require('globals');\nconst importPlugin = require('eslint-plugin-import');\n\nmodule.exports = [\n  js.configs.recommended,\n\n  {\n    files: ['src/static/js/**/*.js'],\n\n    languageOptions: {\n      parserOptions: { ecmaVersion: 2021, sourceType: 'module' },\n      globals: {\n        ...globals.browser,\n        changePattern: 'readonly',\n        copyFullDigest: 'readonly',\n        copyText: 'readonly',\n        downloadFullDigest: 'readonly',\n        handleSubmit: 'readonly',\n        posthog: 'readonly',\n        submitExample: 'readonly',\n        toggleAccessSettings: 'readonly',\n        toggleFile: 'readonly',\n      },\n    },\n\n    plugins: { import: importPlugin },\n\n    rules: {\n      // Import hygiene (eslint-plugin-import)\n      'import/no-extraneous-dependencies': 'error',\n      'import/no-unresolved': 'error',\n      'import/order': ['warn', { alphabetize: { order: 'asc' } }],\n\n      // Safety & bug-catchers\n      'consistent-return': 'error',\n      'default-case': 'error',\n      'no-implicit-globals': 'error',\n      'no-shadow': 'error',\n\n      // Maintainability / complexity\n      complexity: ['warn', 10],\n      'max-depth': ['warn', 4],\n      'max-lines': ['warn', 500],\n      'max-params': ['warn', 5],\n\n      // Stylistic consistency (auto-fixable)\n      'arrow-parens': ['error', 'always'],\n      curly: ['error', 'all'],\n      indent: ['error', 4, { SwitchCase: 2 }],\n      'newline-per-chained-call': ['warn', { ignoreChainWithDepth: 2 }],\n      'no-multi-spaces': 'error',\n      'object-shorthand': ['error', 'always'],\n      'padding-line-between-statements': [\n        'warn',\n        { blankLine: 'always', prev: '*', next: 'return' },\n        { blankLine: 'always', prev: ['const', 'let', 'var'], next: '*' },\n        { blankLine: 'any', prev: ['const', 'let', 'var'], next: ['const', 'let', 'var'] },\n      ],\n      'quote-props': ['error', 'consistent-as-needed'],\n      quotes: ['error', 'single', { avoidEscape: true }],\n      semi: 'error',\n\n      // Modern / performance tips\n      'arrow-body-style': ['warn', 'as-needed'],\n      'prefer-arrow-callback': 'error',\n      'prefer-exponentiation-operator': 'error',\n      'prefer-numeric-literals': 'error',\n      'prefer-object-has-own': 'warn',\n      'prefer-object-spread': 'error',\n      'prefer-template': 'error',\n    },\n  },\n];\n"
  },
  {
    "path": "pyproject.toml",
    "content": "[project]\nname = \"gitingest\"\nversion = \"0.3.1\"\ndescription=\"CLI tool to analyze and create text dumps of codebases for LLMs\"\nreadme = {file = \"README.md\", content-type = \"text/markdown\" }\nrequires-python = \">= 3.8\"\ndependencies = [\n    \"click>=8.0.0\",\n    \"gitpython>=3.1.0\",\n    \"httpx\",\n    \"loguru>=0.7.0\",\n    \"pathspec>=0.12.1\",\n    \"pydantic\",\n    \"python-dotenv\",\n    \"starlette>=0.40.0\",  # Minimum safe release (https://osv.dev/vulnerability/GHSA-f96h-pmfr-66vw)\n    \"strenum; python_version < '3.11'\",\n    \"tiktoken>=0.7.0\",  # Support for o200k_base encoding\n    \"typing_extensions>= 4.0.0; python_version < '3.10'\",\n]\n\nlicense = {file = \"LICENSE\"}\nauthors = [\n    { name = \"Romain Courtois\", email = \"romain@coderamp.io\" },\n    { name = \"Filip Christiansen\"},\n]\nclassifiers=[\n    \"Development Status :: 3 - Alpha\",\n    \"Intended Audience :: Developers\",\n    \"License :: OSI Approved :: MIT License\",\n    \"Programming Language :: Python :: 3.8\",\n    \"Programming Language :: Python :: 3.9\",\n    \"Programming Language :: Python :: 3.10\",\n    \"Programming Language :: Python :: 3.11\",\n    \"Programming Language :: Python :: 3.12\",\n    \"Programming Language :: Python :: 3.13\",\n]\n\n[project.optional-dependencies]\ndev = [\n    \"eval-type-backport\",\n    \"pre-commit\",\n    \"pytest\",\n    \"pytest-asyncio\",\n    \"pytest-mock\",\n]\n\nserver = [\n    \"boto3>=1.28.0\",  # AWS SDK for S3 support\n    \"fastapi[standard]>=0.109.1\",  # Minimum safe release (https://osv.dev/vulnerability/PYSEC-2024-38)\n    \"prometheus-client\",\n    \"sentry-sdk[fastapi]\",\n    \"slowapi\",\n    \"uvicorn>=0.11.7\",  # Minimum safe release (https://osv.dev/vulnerability/PYSEC-2020-150)\n]\n\n[project.scripts]\ngitingest = \"gitingest.__main__:main\"\n\n[project.urls]\nhomepage = \"https://gitingest.com\"\ngithub = \"https://github.com/coderamp-labs/gitingest\"\n\n[build-system]\nrequires = [\"setuptools>=61.0\", \"wheel\"]\nbuild-backend = \"setuptools.build_meta\"\n\n[tool.setuptools]\npackages = {find = {where = [\"src\"]}}\ninclude-package-data = true\n\n# Linting configuration\n[tool.pylint.format]\nmax-line-length = 119\n\n[tool.pylint.'MESSAGES CONTROL']\ndisable = [\n    \"too-many-arguments\",\n    \"too-many-positional-arguments\",\n    \"too-many-locals\",\n    \"too-few-public-methods\",\n    \"broad-exception-caught\",\n    \"duplicate-code\",\n    \"fixme\",\n]\n\n[tool.ruff]\nline-length = 119\nfix = true\n\n[tool.ruff.lint]\nselect = [\"ALL\"]\nignore = [  # https://docs.astral.sh/ruff/rules/...\n    \"D107\", # undocumented-public-init\n    \"FIX002\", # line-contains-todo\n    \"TD002\", # missing-todo-author\n    \"PLR0913\", # too-many-arguments,\n\n    # TODO: fix the following issues:\n    \"TD003\", # missing-todo-link, TODO: add issue links\n    \"S108\", # hardcoded-temp-file, TODO: replace with tempfile\n    \"BLE001\", # blind-except, TODO: replace with specific exceptions\n    \"FAST003\", # fast-api-unused-path-parameter, TODO: fix\n]\nper-file-ignores = { \"tests/**/*.py\" = [\"S101\"] } # Skip the \"assert used\" warning\n\n[tool.ruff.lint.pylint]\nmax-returns = 10\n\n[tool.ruff.lint.isort]\norder-by-type = true\ncase-sensitive = true\n\n[tool.pycln]\nall = true\n\n# TODO: Remove this once we figure out how to use ruff-isort\n[tool.isort]\nprofile = \"black\"\nline_length = 119\nremove_redundant_aliases = true\nfloat_to_top = true  # https://github.com/astral-sh/ruff/issues/6514\norder_by_type = true\nfilter_files = true\n\n# Test configuration\n[tool.pytest.ini_options]\npythonpath = [\"src\"]\ntestpaths = [\"tests/\"]\npython_files = \"test_*.py\"\nasyncio_mode = \"auto\"\nasyncio_default_fixture_loop_scope = \"function\"\npython_classes = \"Test*\"\npython_functions = \"test_*\"\n"
  },
  {
    "path": "release-please-config.json",
    "content": "{\n  \"$schema\": \"https://raw.githubusercontent.com/googleapis/release-please/main/schemas/config.json\",\n  \"packages\": {\n    \".\": {\n      \"release-type\": \"python\",\n      \"bump-minor-pre-major\": true\n    }\n  }\n}\n"
  },
  {
    "path": "renovate.json",
    "content": "{\n  \"$schema\": \"https://docs.renovatebot.com/renovate-schema.json\",\n  \"extends\": [\n    \"config:recommended\"\n  ]\n}\n"
  },
  {
    "path": "requirements-dev.txt",
    "content": "-r requirements.txt\neval-type-backport\npre-commit\npytest\npytest-asyncio\npytest-cov\npytest-mock\n"
  },
  {
    "path": "requirements.txt",
    "content": "boto3>=1.28.0  # AWS SDK for S3 support\nclick>=8.0.0\nfastapi[standard]>=0.109.1  # Vulnerable to https://osv.dev/vulnerability/PYSEC-2024-38\nhttpx\nloguru>=0.7.0\npathspec>=0.12.1\nprometheus-client\npydantic\npython-dotenv\nsentry-sdk[fastapi]\nslowapi\nstarlette>=0.40.0  # Vulnerable to https://osv.dev/vulnerability/GHSA-f96h-pmfr-66vw\ntiktoken>=0.7.0  # Support for o200k_base encoding\nuvicorn>=0.11.7  # Vulnerable to https://osv.dev/vulnerability/PYSEC-2020-150\n"
  },
  {
    "path": "src/gitingest/__init__.py",
    "content": "\"\"\"Gitingest: A package for ingesting data from Git repositories.\"\"\"\n\nfrom gitingest.entrypoint import ingest, ingest_async\n\n__all__ = [\"ingest\", \"ingest_async\"]\n"
  },
  {
    "path": "src/gitingest/__main__.py",
    "content": "\"\"\"Command-line interface (CLI) for Gitingest.\"\"\"\n\n# pylint: disable=no-value-for-parameter\nfrom __future__ import annotations\n\nimport asyncio\nfrom typing import TypedDict\n\nimport click\nfrom typing_extensions import Unpack\n\nfrom gitingest.config import MAX_FILE_SIZE, OUTPUT_FILE_NAME\nfrom gitingest.entrypoint import ingest_async\n\n# Import logging configuration first to intercept all logging\nfrom gitingest.utils.logging_config import get_logger\n\n# Initialize logger for this module\nlogger = get_logger(__name__)\n\n\nclass _CLIArgs(TypedDict):\n    source: str\n    max_size: int\n    exclude_pattern: tuple[str, ...]\n    include_pattern: tuple[str, ...]\n    branch: str | None\n    include_gitignored: bool\n    include_submodules: bool\n    token: str | None\n    output: str | None\n\n\n@click.command()\n@click.argument(\"source\", type=str, default=\".\")\n@click.option(\n    \"--max-size\",\n    \"-s\",\n    default=MAX_FILE_SIZE,\n    show_default=True,\n    help=\"Maximum file size to process in bytes\",\n)\n@click.option(\"--exclude-pattern\", \"-e\", multiple=True, help=\"Shell-style patterns to exclude.\")\n@click.option(\n    \"--include-pattern\",\n    \"-i\",\n    multiple=True,\n    help=\"Shell-style patterns to include.\",\n)\n@click.option(\"--branch\", \"-b\", default=None, help=\"Branch to clone and ingest\")\n@click.option(\n    \"--include-gitignored\",\n    is_flag=True,\n    default=False,\n    help=\"Include files matched by .gitignore and .gitingestignore\",\n)\n@click.option(\n    \"--include-submodules\",\n    is_flag=True,\n    help=\"Include repository's submodules in the analysis\",\n    default=False,\n)\n@click.option(\n    \"--token\",\n    \"-t\",\n    envvar=\"GITHUB_TOKEN\",\n    default=None,\n    help=(\n        \"GitHub personal access token (PAT) for accessing private repositories. \"\n        \"If omitted, the CLI will look for the GITHUB_TOKEN environment variable.\"\n    ),\n)\n@click.option(\n    \"--output\",\n    \"-o\",\n    default=None,\n    help=\"Output file path (default: digest.txt in current directory). Use '-' for stdout.\",\n)\ndef main(**cli_kwargs: Unpack[_CLIArgs]) -> None:\n    \"\"\"Run the CLI entry point to analyze a repo / directory and dump its contents.\n\n    Parameters\n    ----------\n    **cli_kwargs : Unpack[_CLIArgs]\n        A dictionary of keyword arguments forwarded to ``ingest_async``.\n\n    Notes\n    -----\n    See ``ingest_async`` for a detailed description of each argument.\n\n    Examples\n    --------\n    Basic usage:\n        $ gitingest\n        $ gitingest /path/to/repo\n        $ gitingest https://github.com/user/repo\n\n    Output to stdout:\n        $ gitingest -o -\n        $ gitingest https://github.com/user/repo --output -\n\n    With filtering:\n        $ gitingest -i \"*.py\" -e \"*.log\"\n        $ gitingest --include-pattern \"*.js\" --exclude-pattern \"node_modules/*\"\n\n    Private repositories:\n        $ gitingest https://github.com/user/private-repo -t ghp_token\n        $ GITHUB_TOKEN=ghp_token gitingest https://github.com/user/private-repo\n\n    Include submodules:\n        $ gitingest https://github.com/user/repo --include-submodules\n\n    \"\"\"\n    asyncio.run(_async_main(**cli_kwargs))\n\n\nasync def _async_main(\n    source: str,\n    *,\n    max_size: int = MAX_FILE_SIZE,\n    exclude_pattern: tuple[str, ...] | None = None,\n    include_pattern: tuple[str, ...] | None = None,\n    branch: str | None = None,\n    include_gitignored: bool = False,\n    include_submodules: bool = False,\n    token: str | None = None,\n    output: str | None = None,\n) -> None:\n    \"\"\"Analyze a directory or repository and create a text dump of its contents.\n\n    This command scans the specified ``source`` (a local directory or Git repo),\n    applies custom include and exclude patterns, and generates a text summary of\n    the analysis.  The summary is written to an output file or printed to ``stdout``.\n\n    Parameters\n    ----------\n    source : str\n        A directory path or a Git repository URL.\n    max_size : int\n        Maximum file size in bytes to ingest (default: 10 MB).\n    exclude_pattern : tuple[str, ...] | None\n        Glob patterns for pruning the file set.\n    include_pattern : tuple[str, ...] | None\n        Glob patterns for including files in the output.\n    branch : str | None\n        Git branch to ingest. If ``None``, the repository's default branch is used.\n    include_gitignored : bool\n        If ``True``, also ingest files matched by ``.gitignore`` or ``.gitingestignore`` (default: ``False``).\n    include_submodules : bool\n        If ``True``, recursively include all Git submodules within the repository (default: ``False``).\n    token : str | None\n        GitHub personal access token (PAT) for accessing private repositories.\n        Can also be set via the ``GITHUB_TOKEN`` environment variable.\n    output : str | None\n        The path where the output file will be written (default: ``digest.txt`` in current directory).\n        Use ``\"-\"`` to write to ``stdout``.\n\n    Raises\n    ------\n    click.Abort\n        Raised if an error occurs during execution and the command must be aborted.\n\n    \"\"\"\n    try:\n        # Normalise pattern containers (the ingest layer expects sets)\n        exclude_patterns = set(exclude_pattern) if exclude_pattern else set()\n        include_patterns = set(include_pattern) if include_pattern else set()\n\n        output_target = output if output is not None else OUTPUT_FILE_NAME\n\n        if output_target == \"-\":\n            click.echo(\"Analyzing source, preparing output for stdout...\", err=True)\n        else:\n            click.echo(f\"Analyzing source, output will be written to '{output_target}'...\", err=True)\n\n        summary, _, _ = await ingest_async(\n            source,\n            max_file_size=max_size,\n            include_patterns=include_patterns,\n            exclude_patterns=exclude_patterns,\n            branch=branch,\n            include_gitignored=include_gitignored,\n            include_submodules=include_submodules,\n            token=token,\n            output=output_target,\n        )\n    except Exception as exc:\n        # Convert any exception into Click.Abort so that exit status is non-zero\n        click.echo(f\"Error: {exc}\", err=True)\n        raise click.Abort from exc\n\n    if output_target == \"-\":  # stdout\n        click.echo(\"\\n--- Summary ---\", err=True)\n        click.echo(summary, err=True)\n        click.echo(\"--- End Summary ---\", err=True)\n        click.echo(\"Analysis complete! Output sent to stdout.\", err=True)\n    else:  # file\n        click.echo(f\"Analysis complete! Output written to: {output_target}\")\n        click.echo(\"\\nSummary:\")\n        click.echo(summary)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "src/gitingest/clone.py",
    "content": "\"\"\"Module containing functions for cloning a Git repository to a local path.\"\"\"\n\nfrom __future__ import annotations\n\nfrom pathlib import Path\nfrom typing import TYPE_CHECKING\n\nimport git\n\nfrom gitingest.config import DEFAULT_TIMEOUT\nfrom gitingest.utils.git_utils import (\n    check_repo_exists,\n    checkout_partial_clone,\n    create_git_repo,\n    ensure_git_installed,\n    git_auth_context,\n    is_github_host,\n    resolve_commit,\n)\nfrom gitingest.utils.logging_config import get_logger\nfrom gitingest.utils.os_utils import ensure_directory_exists_or_create\nfrom gitingest.utils.timeout_wrapper import async_timeout\n\nif TYPE_CHECKING:\n    from gitingest.schemas import CloneConfig\n\n# Initialize logger for this module\nlogger = get_logger(__name__)\n\n\n@async_timeout(DEFAULT_TIMEOUT)\nasync def clone_repo(config: CloneConfig, *, token: str | None = None) -> None:\n    \"\"\"Clone a repository to a local path based on the provided configuration.\n\n    This function handles the process of cloning a Git repository to the local file system.\n    It can clone a specific branch, tag, or commit if provided, and it raises exceptions if\n    any errors occur during the cloning process.\n\n    Parameters\n    ----------\n    config : CloneConfig\n        The configuration for cloning the repository.\n    token : str | None\n        GitHub personal access token (PAT) for accessing private repositories.\n\n    Raises\n    ------\n    ValueError\n        If the repository is not found, if the provided URL is invalid, or if the token format is invalid.\n    RuntimeError\n        If Git operations fail during the cloning process.\n\n    \"\"\"\n    # Extract and validate query parameters\n    url: str = config.url\n    local_path: str = config.local_path\n    partial_clone: bool = config.subpath != \"/\"\n\n    logger.info(\n        \"Starting git clone operation\",\n        extra={\n            \"url\": url,\n            \"local_path\": local_path,\n            \"partial_clone\": partial_clone,\n            \"subpath\": config.subpath,\n            \"branch\": config.branch,\n            \"tag\": config.tag,\n            \"commit\": config.commit,\n            \"include_submodules\": config.include_submodules,\n        },\n    )\n\n    logger.debug(\"Ensuring git is installed\")\n    await ensure_git_installed()\n\n    logger.debug(\"Creating local directory\", extra={\"parent_path\": str(Path(local_path).parent)})\n    await ensure_directory_exists_or_create(Path(local_path).parent)\n\n    logger.debug(\"Checking if repository exists\", extra={\"url\": url})\n    if not await check_repo_exists(url, token=token):\n        logger.error(\"Repository not found\", extra={\"url\": url})\n        msg = \"Repository not found. Make sure it is public or that you have provided a valid token.\"\n        raise ValueError(msg)\n\n    logger.debug(\"Resolving commit reference\")\n    commit = await resolve_commit(config, token=token)\n    logger.debug(\"Resolved commit\", extra={\"commit\": commit})\n\n    # Clone the repository using GitPython with proper authentication\n    logger.info(\"Executing git clone operation\", extra={\"url\": \"<redacted>\", \"local_path\": local_path})\n    try:\n        clone_kwargs = {\n            \"single_branch\": True,\n            \"no_checkout\": True,\n            \"depth\": 1,\n        }\n\n        with git_auth_context(url, token) as (git_cmd, auth_url):\n            if partial_clone:\n                # For partial clones, use git.Git() with filter and sparse options\n                cmd_args = [\"--single-branch\", \"--no-checkout\", \"--depth=1\"]\n                cmd_args.extend([\"--filter=blob:none\", \"--sparse\"])\n                cmd_args.extend([auth_url, local_path])\n                git_cmd.clone(*cmd_args)\n            elif token and is_github_host(url):\n                # For authenticated GitHub repos, use git_cmd with auth URL\n                cmd_args = [\"--single-branch\", \"--no-checkout\", \"--depth=1\", auth_url, local_path]\n                git_cmd.clone(*cmd_args)\n            else:\n                # For non-authenticated repos, use the standard GitPython method\n                git.Repo.clone_from(url, local_path, **clone_kwargs)\n\n        logger.info(\"Git clone completed successfully\")\n    except git.GitCommandError as exc:\n        msg = f\"Git clone failed: {exc}\"\n        raise RuntimeError(msg) from exc\n\n    # Checkout the subpath if it is a partial clone\n    if partial_clone:\n        logger.info(\"Setting up partial clone for subpath\", extra={\"subpath\": config.subpath})\n        await checkout_partial_clone(config, token=token)\n        logger.debug(\"Partial clone setup completed\")\n\n    # Perform post-clone operations\n    await _perform_post_clone_operations(config, local_path, url, token, commit)\n\n    logger.info(\"Git clone operation completed successfully\", extra={\"local_path\": local_path})\n\n\nasync def _perform_post_clone_operations(\n    config: CloneConfig,\n    local_path: str,\n    url: str,\n    token: str | None,\n    commit: str,\n) -> None:\n    \"\"\"Perform post-clone operations like fetching, checkout, and submodule updates.\n\n    Parameters\n    ----------\n    config : CloneConfig\n        The configuration for cloning the repository.\n    local_path : str\n        The local path where the repository was cloned.\n    url : str\n        The repository URL.\n    token : str | None\n        GitHub personal access token (PAT) for accessing private repositories.\n    commit : str\n        The commit SHA to checkout.\n\n    Raises\n    ------\n    RuntimeError\n        If any Git operation fails.\n\n    \"\"\"\n    try:\n        repo = create_git_repo(local_path, url, token)\n\n        # Ensure the commit is locally available\n        logger.debug(\"Fetching specific commit\", extra={\"commit\": commit})\n        repo.git.fetch(\"--depth=1\", \"origin\", commit)\n\n        # Write the work-tree at that commit\n        logger.info(\"Checking out commit\", extra={\"commit\": commit})\n        repo.git.checkout(commit)\n\n        # Update submodules\n        if config.include_submodules:\n            logger.info(\"Updating submodules\")\n            repo.git.submodule(\"update\", \"--init\", \"--recursive\", \"--depth=1\")\n            logger.debug(\"Submodules updated successfully\")\n    except git.GitCommandError as exc:\n        msg = f\"Git operation failed: {exc}\"\n        raise RuntimeError(msg) from exc\n"
  },
  {
    "path": "src/gitingest/config.py",
    "content": "\"\"\"Configuration file for the project.\"\"\"\n\nimport tempfile\nfrom pathlib import Path\n\nMAX_FILE_SIZE = 10 * 1024 * 1024  # Maximum size of a single file to process (10 MB)\nMAX_DIRECTORY_DEPTH = 20  # Maximum depth of directory traversal\nMAX_FILES = 10_000  # Maximum number of files to process\nMAX_TOTAL_SIZE_BYTES = 500 * 1024 * 1024  # Maximum size of output file (500 MB)\nDEFAULT_TIMEOUT = 60  # seconds\n\nOUTPUT_FILE_NAME = \"digest.txt\"\n\nTMP_BASE_PATH = Path(tempfile.gettempdir()) / \"gitingest\"\n"
  },
  {
    "path": "src/gitingest/entrypoint.py",
    "content": "\"\"\"Main entry point for ingesting a source and processing its contents.\"\"\"\n\nfrom __future__ import annotations\n\nimport asyncio\nimport errno\nimport shutil\nimport stat\nimport sys\nfrom contextlib import asynccontextmanager\nfrom pathlib import Path\nfrom typing import TYPE_CHECKING, AsyncGenerator, Callable\nfrom urllib.parse import urlparse\n\nfrom gitingest.clone import clone_repo\nfrom gitingest.config import MAX_FILE_SIZE\nfrom gitingest.ingestion import ingest_query\nfrom gitingest.query_parser import parse_local_dir_path, parse_remote_repo\nfrom gitingest.utils.auth import resolve_token\nfrom gitingest.utils.compat_func import removesuffix\nfrom gitingest.utils.ignore_patterns import load_ignore_patterns\nfrom gitingest.utils.logging_config import get_logger\nfrom gitingest.utils.pattern_utils import process_patterns\nfrom gitingest.utils.query_parser_utils import KNOWN_GIT_HOSTS\n\nif TYPE_CHECKING:\n    from types import TracebackType\n\n    from gitingest.schemas import IngestionQuery\n\n# Initialize logger for this module\nlogger = get_logger(__name__)\n\n\nasync def ingest_async(\n    source: str,\n    *,\n    max_file_size: int = MAX_FILE_SIZE,\n    include_patterns: str | set[str] | None = None,\n    exclude_patterns: str | set[str] | None = None,\n    branch: str | None = None,\n    tag: str | None = None,\n    include_gitignored: bool = False,\n    include_submodules: bool = False,\n    token: str | None = None,\n    output: str | None = None,\n) -> tuple[str, str, str]:\n    \"\"\"Ingest a source and process its contents.\n\n    This function analyzes a source (URL or local path), clones the corresponding repository (if applicable),\n    and processes its files according to the specified query parameters. It returns a summary, a tree-like\n    structure of the files, and the content of the files. The results can optionally be written to an output file.\n\n    Parameters\n    ----------\n    source : str\n        The source to analyze, which can be a URL (for a Git repository) or a local directory path.\n    max_file_size : int\n        Maximum allowed file size for file ingestion. Files larger than this size are ignored (default: 10 MB).\n    include_patterns : str | set[str] | None\n        Pattern or set of patterns specifying which files to include. If ``None``, all files are included.\n    exclude_patterns : str | set[str] | None\n        Pattern or set of patterns specifying which files to exclude. If ``None``, no files are excluded.\n    branch : str | None\n        The branch to clone and ingest (default: the default branch).\n    tag : str | None\n        The tag to clone and ingest. If ``None``, no tag is used.\n    include_gitignored : bool\n        If ``True``, include files ignored by ``.gitignore`` and ``.gitingestignore`` (default: ``False``).\n    include_submodules : bool\n        If ``True``, recursively include all Git submodules within the repository (default: ``False``).\n    token : str | None\n        GitHub personal access token (PAT) for accessing private repositories.\n        Can also be set via the ``GITHUB_TOKEN`` environment variable.\n    output : str | None\n        File path where the summary and content should be written.\n        If ``\"-\"`` (dash), the results are written to ``stdout``.\n        If ``None``, the results are not written to a file.\n\n    Returns\n    -------\n    tuple[str, str, str]\n        A tuple containing:\n        - A summary string of the analyzed repository or directory.\n        - A tree-like string representation of the file structure.\n        - The content of the files in the repository or directory.\n\n    \"\"\"\n    logger.info(\"Starting ingestion process\", extra={\"source\": source})\n\n    token = resolve_token(token)\n\n    source = removesuffix(source.strip(), \".git\")\n\n    # Determine the parsing method based on the source type\n    if urlparse(source).scheme in (\"https\", \"http\") or any(h in source for h in KNOWN_GIT_HOSTS):\n        # We either have a full URL or a domain-less slug\n        logger.info(\"Parsing remote repository\", extra={\"source\": source})\n        query = await parse_remote_repo(source, token=token)\n        query.include_submodules = include_submodules\n        _override_branch_and_tag(query, branch=branch, tag=tag)\n\n    else:\n        # Local path scenario\n        logger.info(\"Processing local directory\", extra={\"source\": source})\n        query = parse_local_dir_path(source)\n\n    query.max_file_size = max_file_size\n    query.ignore_patterns, query.include_patterns = process_patterns(\n        exclude_patterns=exclude_patterns,\n        include_patterns=include_patterns,\n    )\n\n    if query.url:\n        _override_branch_and_tag(query, branch=branch, tag=tag)\n\n    query.include_submodules = include_submodules\n\n    logger.debug(\n        \"Configuration completed\",\n        extra={\n            \"max_file_size\": query.max_file_size,\n            \"include_submodules\": query.include_submodules,\n            \"include_gitignored\": include_gitignored,\n            \"has_include_patterns\": bool(query.include_patterns),\n            \"has_exclude_patterns\": bool(query.ignore_patterns),\n        },\n    )\n\n    async with _clone_repo_if_remote(query, token=token):\n        if query.url:\n            logger.info(\"Repository cloned, starting file processing\")\n        else:\n            logger.info(\"Starting local directory processing\")\n\n        if not include_gitignored:\n            logger.debug(\"Applying gitignore patterns\")\n            _apply_gitignores(query)\n\n        logger.info(\"Processing files and generating output\")\n        summary, tree, content = ingest_query(query)\n\n        if output:\n            logger.debug(\"Writing output to file\", extra={\"output_path\": output})\n        await _write_output(tree, content=content, target=output)\n\n        logger.info(\"Ingestion completed successfully\")\n        return summary, tree, content\n\n\ndef ingest(\n    source: str,\n    *,\n    max_file_size: int = MAX_FILE_SIZE,\n    include_patterns: str | set[str] | None = None,\n    exclude_patterns: str | set[str] | None = None,\n    branch: str | None = None,\n    tag: str | None = None,\n    include_gitignored: bool = False,\n    include_submodules: bool = False,\n    token: str | None = None,\n    output: str | None = None,\n) -> tuple[str, str, str]:\n    \"\"\"Provide a synchronous wrapper around ``ingest_async``.\n\n    This function analyzes a source (URL or local path), clones the corresponding repository (if applicable),\n    and processes its files according to the specified query parameters. It returns a summary, a tree-like\n    structure of the files, and the content of the files. The results can optionally be written to an output file.\n\n    Parameters\n    ----------\n    source : str\n        The source to analyze, which can be a URL (for a Git repository) or a local directory path.\n    max_file_size : int\n        Maximum allowed file size for file ingestion. Files larger than this size are ignored (default: 10 MB).\n    include_patterns : str | set[str] | None\n        Pattern or set of patterns specifying which files to include. If ``None``, all files are included.\n    exclude_patterns : str | set[str] | None\n        Pattern or set of patterns specifying which files to exclude. If ``None``, no files are excluded.\n    branch : str | None\n        The branch to clone and ingest (default: the default branch).\n    tag : str | None\n        The tag to clone and ingest. If ``None``, no tag is used.\n    include_gitignored : bool\n        If ``True``, include files ignored by ``.gitignore`` and ``.gitingestignore`` (default: ``False``).\n    include_submodules : bool\n        If ``True``, recursively include all Git submodules within the repository (default: ``False``).\n    token : str | None\n        GitHub personal access token (PAT) for accessing private repositories.\n        Can also be set via the ``GITHUB_TOKEN`` environment variable.\n    output : str | None\n        File path where the summary and content should be written.\n        If ``\"-\"`` (dash), the results are written to ``stdout``.\n        If ``None``, the results are not written to a file.\n\n    Returns\n    -------\n    tuple[str, str, str]\n        A tuple containing:\n        - A summary string of the analyzed repository or directory.\n        - A tree-like string representation of the file structure.\n        - The content of the files in the repository or directory.\n\n    See Also\n    --------\n    ``ingest_async`` : The asynchronous version of this function.\n\n    \"\"\"\n    return asyncio.run(\n        ingest_async(\n            source=source,\n            max_file_size=max_file_size,\n            include_patterns=include_patterns,\n            exclude_patterns=exclude_patterns,\n            branch=branch,\n            tag=tag,\n            include_gitignored=include_gitignored,\n            include_submodules=include_submodules,\n            token=token,\n            output=output,\n        ),\n    )\n\n\ndef _override_branch_and_tag(query: IngestionQuery, branch: str | None, tag: str | None) -> None:\n    \"\"\"Compare the caller-supplied ``branch`` and ``tag`` with the ones already in ``query``.\n\n    If they differ, update ``query`` to the chosen values and issue a warning.\n    If both are specified, the tag wins over the branch.\n\n    Parameters\n    ----------\n    query : IngestionQuery\n        The query to update.\n    branch : str | None\n        The branch to use.\n    tag : str | None\n        The tag to use.\n\n    \"\"\"\n    if tag and query.tag and tag != query.tag:\n        msg = f\"Warning: The specified tag '{tag}' overrides the tag found in the URL '{query.tag}'.\"\n        logger.warning(msg)\n\n    query.tag = tag or query.tag\n\n    if branch and query.branch and branch != query.branch:\n        msg = f\"Warning: The specified branch '{branch}' overrides the branch found in the URL '{query.branch}'.\"\n        logger.warning(msg)\n\n    query.branch = branch or query.branch\n\n    if tag and branch:\n        msg = \"Warning: Both tag and branch are specified. The tag will be used.\"\n        logger.warning(msg)\n\n    # Tag wins over branch if both supplied\n    if query.tag:\n        query.branch = None\n\n\ndef _apply_gitignores(query: IngestionQuery) -> None:\n    \"\"\"Update ``query.ignore_patterns`` in-place.\n\n    Parameters\n    ----------\n    query : IngestionQuery\n        The query to update.\n\n    \"\"\"\n    for fname in (\".gitignore\", \".gitingestignore\"):\n        query.ignore_patterns.update(load_ignore_patterns(query.local_path, filename=fname))\n\n\n@asynccontextmanager\nasync def _clone_repo_if_remote(query: IngestionQuery, *, token: str | None) -> AsyncGenerator[None]:\n    \"\"\"Async context-manager that clones ``query.url`` if present.\n\n    If ``query.url`` is set, the repo is cloned, control is yielded, and the temp directory is removed on exit.\n    If no URL is given, the function simply yields immediately.\n\n    Parameters\n    ----------\n    query : IngestionQuery\n        Parsed query describing the source to ingest.\n    token : str | None\n        GitHub personal access token (PAT) for accessing private repositories.\n\n    \"\"\"\n    kwargs = {}\n    if sys.version_info >= (3, 12):\n        kwargs[\"onexc\"] = _handle_remove_readonly\n    else:\n        kwargs[\"onerror\"] = _handle_remove_readonly\n\n    if query.url:\n        clone_config = query.extract_clone_config()\n        await clone_repo(clone_config, token=token)\n        try:\n            yield\n        finally:\n            shutil.rmtree(query.local_path.parent, **kwargs)\n    else:\n        yield\n\n\ndef _handle_remove_readonly(\n    func: Callable,\n    path: str,\n    exc_info: BaseException | tuple[type[BaseException], BaseException, TracebackType],\n) -> None:\n    \"\"\"Handle permission errors raised by ``shutil.rmtree()``.\n\n    * Makes the target writable (removes the read-only attribute).\n    * Retries the original operation (``func``) once.\n\n    \"\"\"\n    # 'onerror' passes a (type, value, tb) tuple; 'onexc' passes the exception\n    if isinstance(exc_info, tuple):  # 'onerror' (Python <3.12)\n        exc: BaseException = exc_info[1]\n    else:  # 'onexc' (Python 3.12+)\n        exc = exc_info\n\n    # Handle only'Permission denied' and 'Operation not permitted'\n    if not isinstance(exc, OSError) or exc.errno not in {errno.EACCES, errno.EPERM}:\n        raise exc\n\n    # Make the target writable\n    Path(path).chmod(stat.S_IWRITE)\n    func(path)\n\n\nasync def _write_output(tree: str, content: str, target: str | None) -> None:\n    \"\"\"Write combined output to ``target`` (``\"-\"`` ⇒ stdout).\n\n    Parameters\n    ----------\n    tree : str\n        The tree-like string representation of the file structure.\n    content : str\n        The content of the files in the repository or directory.\n    target : str | None\n        The path to the output file. If ``None``, the results are not written to a file.\n\n    \"\"\"\n    data = f\"{tree}\\n{content}\"\n    loop = asyncio.get_running_loop()\n    if target == \"-\":\n        await loop.run_in_executor(None, sys.stdout.write, data)\n        await loop.run_in_executor(None, sys.stdout.flush)\n    elif target is not None:\n        await loop.run_in_executor(None, Path(target).write_text, data, \"utf-8\")\n"
  },
  {
    "path": "src/gitingest/ingestion.py",
    "content": "\"\"\"Functions to ingest and analyze a codebase directory or single file.\"\"\"\n\nfrom __future__ import annotations\n\nfrom pathlib import Path\nfrom typing import TYPE_CHECKING\n\nfrom gitingest.config import MAX_DIRECTORY_DEPTH, MAX_FILES, MAX_TOTAL_SIZE_BYTES\nfrom gitingest.output_formatter import format_node\nfrom gitingest.schemas import FileSystemNode, FileSystemNodeType, FileSystemStats\nfrom gitingest.utils.ingestion_utils import _should_exclude, _should_include\nfrom gitingest.utils.logging_config import get_logger\n\nif TYPE_CHECKING:\n    from gitingest.schemas import IngestionQuery\n\n# Initialize logger for this module\nlogger = get_logger(__name__)\n\n\ndef ingest_query(query: IngestionQuery) -> tuple[str, str, str]:\n    \"\"\"Run the ingestion process for a parsed query.\n\n    This is the main entry point for analyzing a codebase directory or single file. It processes the query\n    parameters, reads the file or directory content, and generates a summary, directory structure, and file content,\n    along with token estimations.\n\n    Parameters\n    ----------\n    query : IngestionQuery\n        The parsed query object containing information about the repository and query parameters.\n\n    Returns\n    -------\n    tuple[str, str, str]\n        A tuple containing the summary, directory structure, and file contents.\n\n    Raises\n    ------\n    ValueError\n        If the path cannot be found, is not a file, or the file has no content.\n\n    \"\"\"\n    logger.info(\n        \"Starting file ingestion\",\n        extra={\n            \"slug\": query.slug,\n            \"subpath\": query.subpath,\n            \"local_path\": str(query.local_path),\n            \"max_file_size\": query.max_file_size,\n        },\n    )\n\n    subpath = Path(query.subpath.strip(\"/\")).as_posix()\n    path = query.local_path / subpath\n\n    if not path.exists():\n        logger.error(\"Path not found\", extra={\"path\": str(path), \"slug\": query.slug})\n        msg = f\"{query.slug} cannot be found\"\n        raise ValueError(msg)\n\n    if (query.type and query.type == \"blob\") or query.local_path.is_file():\n        # TODO: We do this wrong! We should still check the branch and commit!\n        logger.info(\"Processing single file\", extra={\"file_path\": str(path)})\n\n        if not path.is_file():\n            logger.error(\"Expected file but found non-file\", extra={\"path\": str(path)})\n            msg = f\"Path {path} is not a file\"\n            raise ValueError(msg)\n\n        relative_path = path.relative_to(query.local_path)\n\n        file_node = FileSystemNode(\n            name=path.name,\n            type=FileSystemNodeType.FILE,\n            size=path.stat().st_size,\n            file_count=1,\n            path_str=str(relative_path),\n            path=path,\n        )\n\n        if not file_node.content:\n            logger.error(\"File has no content\", extra={\"file_name\": file_node.name})\n            msg = f\"File {file_node.name} has no content\"\n            raise ValueError(msg)\n\n        logger.info(\n            \"Single file processing completed\",\n            extra={\n                \"file_name\": file_node.name,\n                \"file_size\": file_node.size,\n            },\n        )\n        return format_node(file_node, query=query)\n\n    logger.info(\"Processing directory\", extra={\"directory_path\": str(path)})\n\n    root_node = FileSystemNode(\n        name=path.name,\n        type=FileSystemNodeType.DIRECTORY,\n        path_str=str(path.relative_to(query.local_path)),\n        path=path,\n    )\n\n    stats = FileSystemStats()\n\n    _process_node(node=root_node, query=query, stats=stats)\n\n    logger.info(\n        \"Directory processing completed\",\n        extra={\n            \"total_files\": root_node.file_count,\n            \"total_directories\": root_node.dir_count,\n            \"total_size_bytes\": root_node.size,\n            \"stats_total_files\": stats.total_files,\n            \"stats_total_size\": stats.total_size,\n        },\n    )\n\n    return format_node(root_node, query=query)\n\n\ndef _process_node(node: FileSystemNode, query: IngestionQuery, stats: FileSystemStats) -> None:\n    \"\"\"Process a file or directory item within a directory.\n\n    This function handles each file or directory item, checking if it should be included or excluded based on the\n    provided patterns. It handles symlinks, directories, and files accordingly.\n\n    Parameters\n    ----------\n    node : FileSystemNode\n        The current directory or file node being processed.\n    query : IngestionQuery\n        The parsed query object containing information about the repository and query parameters.\n    stats : FileSystemStats\n        Statistics tracking object for the total file count and size.\n\n    \"\"\"\n    if limit_exceeded(stats, depth=node.depth):\n        return\n\n    for sub_path in node.path.iterdir():\n        if query.ignore_patterns and _should_exclude(sub_path, query.local_path, query.ignore_patterns):\n            continue\n\n        if query.include_patterns and not _should_include(sub_path, query.local_path, query.include_patterns):\n            continue\n\n        if sub_path.is_symlink():\n            _process_symlink(path=sub_path, parent_node=node, stats=stats, local_path=query.local_path)\n        elif sub_path.is_file():\n            if sub_path.stat().st_size > query.max_file_size:\n                logger.debug(\n                    \"Skipping file: would exceed max file size limit\",\n                    extra={\n                        \"file_path\": str(sub_path),\n                        \"file_size\": sub_path.stat().st_size,\n                        \"max_file_size\": query.max_file_size,\n                    },\n                )\n                continue\n            _process_file(path=sub_path, parent_node=node, stats=stats, local_path=query.local_path)\n        elif sub_path.is_dir():\n            child_directory_node = FileSystemNode(\n                name=sub_path.name,\n                type=FileSystemNodeType.DIRECTORY,\n                path_str=str(sub_path.relative_to(query.local_path)),\n                path=sub_path,\n                depth=node.depth + 1,\n            )\n\n            _process_node(node=child_directory_node, query=query, stats=stats)\n\n            if not child_directory_node.children:\n                continue\n\n            node.children.append(child_directory_node)\n            node.size += child_directory_node.size\n            node.file_count += child_directory_node.file_count\n            node.dir_count += 1 + child_directory_node.dir_count\n        else:\n            logger.warning(\"Unknown file type, skipping\", extra={\"file_path\": str(sub_path)})\n\n    node.sort_children()\n\n\ndef _process_symlink(path: Path, parent_node: FileSystemNode, stats: FileSystemStats, local_path: Path) -> None:\n    \"\"\"Process a symlink in the file system.\n\n    This function checks the symlink's target.\n\n    Parameters\n    ----------\n    path : Path\n        The full path of the symlink.\n    parent_node : FileSystemNode\n        The parent directory node.\n    stats : FileSystemStats\n        Statistics tracking object for the total file count and size.\n    local_path : Path\n        The base path of the repository or directory being processed.\n\n    \"\"\"\n    child = FileSystemNode(\n        name=path.name,\n        type=FileSystemNodeType.SYMLINK,\n        path_str=str(path.relative_to(local_path)),\n        path=path,\n        depth=parent_node.depth + 1,\n    )\n    stats.total_files += 1\n    parent_node.children.append(child)\n    parent_node.file_count += 1\n\n\ndef _process_file(path: Path, parent_node: FileSystemNode, stats: FileSystemStats, local_path: Path) -> None:\n    \"\"\"Process a file in the file system.\n\n    This function checks the file's size, increments the statistics, and reads its content.\n    If the file size exceeds the maximum allowed, it raises an error.\n\n    Parameters\n    ----------\n    path : Path\n        The full path of the file.\n    parent_node : FileSystemNode\n        The dictionary to accumulate the results.\n    stats : FileSystemStats\n        Statistics tracking object for the total file count and size.\n    local_path : Path\n        The base path of the repository or directory being processed.\n\n    \"\"\"\n    if stats.total_files + 1 > MAX_FILES:\n        logger.warning(\n            \"Maximum file limit reached\",\n            extra={\n                \"current_files\": stats.total_files,\n                \"max_files\": MAX_FILES,\n                \"file_path\": str(path),\n            },\n        )\n        return\n\n    file_size = path.stat().st_size\n    if stats.total_size + file_size > MAX_TOTAL_SIZE_BYTES:\n        logger.warning(\n            \"Skipping file: would exceed total size limit\",\n            extra={\n                \"file_path\": str(path),\n                \"file_size\": file_size,\n                \"current_total_size\": stats.total_size,\n                \"max_total_size\": MAX_TOTAL_SIZE_BYTES,\n            },\n        )\n        return\n\n    stats.total_files += 1\n    stats.total_size += file_size\n\n    child = FileSystemNode(\n        name=path.name,\n        type=FileSystemNodeType.FILE,\n        size=file_size,\n        file_count=1,\n        path_str=str(path.relative_to(local_path)),\n        path=path,\n        depth=parent_node.depth + 1,\n    )\n\n    parent_node.children.append(child)\n    parent_node.size += file_size\n    parent_node.file_count += 1\n\n\ndef limit_exceeded(stats: FileSystemStats, depth: int) -> bool:\n    \"\"\"Check if any of the traversal limits have been exceeded.\n\n    This function checks if the current traversal has exceeded any of the configured limits:\n    maximum directory depth, maximum number of files, or maximum total size in bytes.\n\n    Parameters\n    ----------\n    stats : FileSystemStats\n        Statistics tracking object for the total file count and size.\n    depth : int\n        The current depth of directory traversal.\n\n    Returns\n    -------\n    bool\n        ``True`` if any limit has been exceeded, ``False`` otherwise.\n\n    \"\"\"\n    if depth > MAX_DIRECTORY_DEPTH:\n        logger.warning(\n            \"Maximum directory depth limit reached\",\n            extra={\n                \"current_depth\": depth,\n                \"max_depth\": MAX_DIRECTORY_DEPTH,\n            },\n        )\n        return True\n\n    if stats.total_files >= MAX_FILES:\n        logger.warning(\n            \"Maximum file limit reached\",\n            extra={\n                \"current_files\": stats.total_files,\n                \"max_files\": MAX_FILES,\n            },\n        )\n        return True  # TODO: end recursion\n\n    if stats.total_size >= MAX_TOTAL_SIZE_BYTES:\n        logger.warning(\n            \"Maximum total size limit reached\",\n            extra={\n                \"current_size_mb\": stats.total_size / 1024 / 1024,\n                \"max_size_mb\": MAX_TOTAL_SIZE_BYTES / 1024 / 1024,\n            },\n        )\n        return True  # TODO: end recursion\n\n    return False\n"
  },
  {
    "path": "src/gitingest/output_formatter.py",
    "content": "\"\"\"Functions to ingest and analyze a codebase directory or single file.\"\"\"\n\nfrom __future__ import annotations\n\nimport ssl\nfrom typing import TYPE_CHECKING\n\nimport requests.exceptions\nimport tiktoken\n\nfrom gitingest.schemas import FileSystemNode, FileSystemNodeType\nfrom gitingest.utils.compat_func import readlink\nfrom gitingest.utils.logging_config import get_logger\n\nif TYPE_CHECKING:\n    from gitingest.schemas import IngestionQuery\n\n# Initialize logger for this module\nlogger = get_logger(__name__)\n\n_TOKEN_THRESHOLDS: list[tuple[int, str]] = [\n    (1_000_000, \"M\"),\n    (1_000, \"k\"),\n]\n\n\ndef format_node(node: FileSystemNode, query: IngestionQuery) -> tuple[str, str, str]:\n    \"\"\"Generate a summary, directory structure, and file contents for a given file system node.\n\n    If the node represents a directory, the function will recursively process its contents.\n\n    Parameters\n    ----------\n    node : FileSystemNode\n        The file system node to be summarized.\n    query : IngestionQuery\n        The parsed query object containing information about the repository and query parameters.\n\n    Returns\n    -------\n    tuple[str, str, str]\n        A tuple containing the summary, directory structure, and file contents.\n\n    \"\"\"\n    is_single_file = node.type == FileSystemNodeType.FILE\n    summary = _create_summary_prefix(query, single_file=is_single_file)\n\n    if node.type == FileSystemNodeType.DIRECTORY:\n        summary += f\"Files analyzed: {node.file_count}\\n\"\n    elif node.type == FileSystemNodeType.FILE:\n        summary += f\"File: {node.name}\\n\"\n        summary += f\"Lines: {len(node.content.splitlines()):,}\\n\"\n\n    tree = \"Directory structure:\\n\" + _create_tree_structure(query, node=node)\n\n    content = _gather_file_contents(node)\n\n    token_estimate = _format_token_count(tree + content)\n    if token_estimate:\n        summary += f\"\\nEstimated tokens: {token_estimate}\"\n\n    return summary, tree, content\n\n\ndef _create_summary_prefix(query: IngestionQuery, *, single_file: bool = False) -> str:\n    \"\"\"Create a prefix string for summarizing a repository or local directory.\n\n    Includes repository name (if provided), commit/branch details, and subpath if relevant.\n\n    Parameters\n    ----------\n    query : IngestionQuery\n        The parsed query object containing information about the repository and query parameters.\n    single_file : bool\n        A flag indicating whether the summary is for a single file (default: ``False``).\n\n    Returns\n    -------\n    str\n        A summary prefix string containing repository, commit, branch, and subpath details.\n\n    \"\"\"\n    parts = []\n\n    if query.user_name:\n        parts.append(f\"Repository: {query.user_name}/{query.repo_name}\")\n    else:\n        # Local scenario\n        parts.append(f\"Directory: {query.slug}\")\n\n    if query.tag:\n        parts.append(f\"Tag: {query.tag}\")\n    elif query.branch and query.branch not in (\"main\", \"master\"):\n        parts.append(f\"Branch: {query.branch}\")\n\n    if query.commit:\n        parts.append(f\"Commit: {query.commit}\")\n\n    if query.subpath != \"/\" and not single_file:\n        parts.append(f\"Subpath: {query.subpath}\")\n\n    return \"\\n\".join(parts) + \"\\n\"\n\n\ndef _gather_file_contents(node: FileSystemNode) -> str:\n    \"\"\"Recursively gather contents of all files under the given node.\n\n    This function recursively processes a directory node and gathers the contents of all files\n    under that node. It returns the concatenated content of all files as a single string.\n\n    Parameters\n    ----------\n    node : FileSystemNode\n        The current directory or file node being processed.\n\n    Returns\n    -------\n    str\n        The concatenated content of all files under the given node.\n\n    \"\"\"\n    if node.type != FileSystemNodeType.DIRECTORY:\n        return node.content_string\n\n    # Recursively gather contents of all files under the current directory\n    return \"\\n\".join(_gather_file_contents(child) for child in node.children)\n\n\ndef _create_tree_structure(\n    query: IngestionQuery,\n    *,\n    node: FileSystemNode,\n    prefix: str = \"\",\n    is_last: bool = True,\n) -> str:\n    \"\"\"Generate a tree-like string representation of the file structure.\n\n    This function generates a string representation of the directory structure, formatted\n    as a tree with appropriate indentation for nested directories and files.\n\n    Parameters\n    ----------\n    query : IngestionQuery\n        The parsed query object containing information about the repository and query parameters.\n    node : FileSystemNode\n        The current directory or file node being processed.\n    prefix : str\n        A string used for indentation and formatting of the tree structure (default: ``\"\"``).\n    is_last : bool\n        A flag indicating whether the current node is the last in its directory (default: ``True``).\n\n    Returns\n    -------\n    str\n        A string representing the directory structure formatted as a tree.\n\n    \"\"\"\n    if not node.name:\n        # If no name is present, use the slug as the top-level directory name\n        node.name = query.slug\n\n    tree_str = \"\"\n    current_prefix = \"└── \" if is_last else \"├── \"\n\n    # Indicate directories with a trailing slash\n    display_name = node.name\n    if node.type == FileSystemNodeType.DIRECTORY:\n        display_name += \"/\"\n    elif node.type == FileSystemNodeType.SYMLINK:\n        display_name += \" -> \" + readlink(node.path).name\n\n    tree_str += f\"{prefix}{current_prefix}{display_name}\\n\"\n\n    if node.type == FileSystemNodeType.DIRECTORY and node.children:\n        prefix += \"    \" if is_last else \"│   \"\n        for i, child in enumerate(node.children):\n            tree_str += _create_tree_structure(query, node=child, prefix=prefix, is_last=i == len(node.children) - 1)\n    return tree_str\n\n\ndef _format_token_count(text: str) -> str | None:\n    \"\"\"Return a human-readable token-count string (e.g. 1.2k, 1.2 M).\n\n    Parameters\n    ----------\n    text : str\n        The text string for which the token count is to be estimated.\n\n    Returns\n    -------\n    str | None\n        The formatted number of tokens as a string (e.g., ``\"1.2k\"``, ``\"1.2M\"``), or ``None`` if an error occurs.\n\n    \"\"\"\n    try:\n        encoding = tiktoken.get_encoding(\"o200k_base\")  # gpt-4o, gpt-4o-mini\n        total_tokens = len(encoding.encode(text, disallowed_special=()))\n    except (ValueError, UnicodeEncodeError) as exc:\n        logger.warning(\"Failed to estimate token size\", extra={\"error\": str(exc)})\n        return None\n    except (requests.exceptions.RequestException, ssl.SSLError) as exc:\n        # If network errors, skip token count estimation instead of erroring out\n        logger.warning(\"Failed to download tiktoken model\", extra={\"error\": str(exc)})\n        return None\n\n    for threshold, suffix in _TOKEN_THRESHOLDS:\n        if total_tokens >= threshold:\n            return f\"{total_tokens / threshold:.1f}{suffix}\"\n\n    return str(total_tokens)\n"
  },
  {
    "path": "src/gitingest/query_parser.py",
    "content": "\"\"\"Module containing functions to parse and validate input sources and patterns.\"\"\"\n\nfrom __future__ import annotations\n\nimport uuid\nfrom pathlib import Path\nfrom typing import Literal\n\nfrom gitingest.config import TMP_BASE_PATH\nfrom gitingest.schemas import IngestionQuery\nfrom gitingest.utils.git_utils import fetch_remote_branches_or_tags, resolve_commit\nfrom gitingest.utils.logging_config import get_logger\nfrom gitingest.utils.query_parser_utils import (\n    PathKind,\n    _fallback_to_root,\n    _get_user_and_repo_from_path,\n    _is_valid_git_commit_hash,\n    _normalise_source,\n)\n\n# Initialize logger for this module\nlogger = get_logger(__name__)\n\n\nasync def parse_remote_repo(source: str, token: str | None = None) -> IngestionQuery:\n    \"\"\"Parse a repository URL and return an ``IngestionQuery`` object.\n\n    If source is:\n      - A fully qualified URL ('https://gitlab.com/...'), parse & verify that domain\n      - A URL missing 'https://' ('gitlab.com/...'), add 'https://' and parse\n      - A *slug* ('pandas-dev/pandas'), attempt known domains until we find one that exists.\n\n    Parameters\n    ----------\n    source : str\n        The URL or domain-less slug to parse.\n    token : str | None\n        GitHub personal access token (PAT) for accessing private repositories.\n\n    Returns\n    -------\n    IngestionQuery\n        A dictionary containing the parsed details of the repository.\n\n    \"\"\"\n    parsed_url = await _normalise_source(source, token=token)\n    host = parsed_url.netloc\n    user, repo = _get_user_and_repo_from_path(parsed_url.path)\n\n    _id = uuid.uuid4()\n    slug = f\"{user}-{repo}\"\n    local_path = TMP_BASE_PATH / str(_id) / slug\n    url = f\"https://{host}/{user}/{repo}\"\n\n    query = IngestionQuery(\n        host=host,\n        user_name=user,\n        repo_name=repo,\n        url=url,\n        local_path=local_path,\n        slug=slug,\n        id=_id,\n    )\n\n    path_parts = parsed_url.path.strip(\"/\").split(\"/\")[2:]\n\n    # main branch\n    if not path_parts:\n        return await _fallback_to_root(query, token=token)\n\n    kind = PathKind(path_parts.pop(0))  # may raise ValueError\n    query.type = kind\n\n    # TODO: Handle issues and pull requests\n    if query.type in {PathKind.ISSUES, PathKind.PULL}:\n        msg = f\"Warning: Issues and pull requests are not yet supported: {url}. Returning repository root.\"\n        return await _fallback_to_root(query, token=token, warn_msg=msg)\n\n    # If no extra path parts, just return\n    if not path_parts:\n        msg = f\"Warning: No extra path parts: {url}. Returning repository root.\"\n        return await _fallback_to_root(query, token=token, warn_msg=msg)\n\n    if query.type not in {PathKind.TREE, PathKind.BLOB}:\n        # TODO: Handle other types\n        msg = f\"Warning: Type '{query.type}' is not yet supported: {url}. Returning repository root.\"\n        return await _fallback_to_root(query, token=token, warn_msg=msg)\n\n    # Commit, branch, or tag\n    ref = path_parts[0]\n\n    if _is_valid_git_commit_hash(ref):  # Commit\n        query.commit = ref\n        path_parts.pop(0)  # Consume the commit hash\n    else:  # Branch or tag\n        # Try to resolve a tag\n        query.tag = await _configure_branch_or_tag(\n            path_parts,\n            url=url,\n            ref_type=\"tags\",\n            token=token,\n        )\n\n        # If no tag found, try to resolve a branch\n        if not query.tag:\n            query.branch = await _configure_branch_or_tag(\n                path_parts,\n                url=url,\n                ref_type=\"branches\",\n                token=token,\n            )\n\n    # Only configure subpath if we have identified a commit, branch, or tag.\n    if path_parts and (query.commit or query.branch or query.tag):\n        query.subpath += \"/\".join(path_parts)\n\n    query.commit = await resolve_commit(query.extract_clone_config(), token=token)\n\n    return query\n\n\ndef parse_local_dir_path(path_str: str) -> IngestionQuery:\n    \"\"\"Parse the given file path into a structured query dictionary.\n\n    Parameters\n    ----------\n    path_str : str\n        The file path to parse.\n\n    Returns\n    -------\n    IngestionQuery\n        A dictionary containing the parsed details of the file path.\n\n    \"\"\"\n    path_obj = Path(path_str).resolve()\n    slug = path_obj.name if path_str == \".\" else path_str.strip(\"/\")\n    return IngestionQuery(local_path=path_obj, slug=slug, id=uuid.uuid4())\n\n\nasync def _configure_branch_or_tag(\n    path_parts: list[str],\n    *,\n    url: str,\n    ref_type: Literal[\"branches\", \"tags\"],\n    token: str | None = None,\n) -> str | None:\n    \"\"\"Configure the branch or tag based on the remaining parts of the URL.\n\n    Parameters\n    ----------\n    path_parts : list[str]\n        The path parts of the URL.\n    url : str\n        The URL of the repository.\n    ref_type : Literal[\"branches\", \"tags\"]\n        The type of reference to configure. Can be \"branches\" or \"tags\".\n    token : str | None\n        GitHub personal access token (PAT) for accessing private repositories.\n\n    Returns\n    -------\n    str | None\n        The branch or tag name if found, otherwise ``None``.\n\n    \"\"\"\n    _ref_type = \"tags\" if ref_type == \"tags\" else \"branches\"\n\n    try:\n        # Fetch the list of branches or tags from the remote repository\n        branches_or_tags: list[str] = await fetch_remote_branches_or_tags(url, ref_type=_ref_type, token=token)\n    except RuntimeError as exc:\n        # If remote discovery fails, we optimistically treat the first path segment as the branch/tag.\n        msg = f\"Warning: Failed to fetch {_ref_type}: {exc}\"\n        logger.warning(msg)\n        return path_parts.pop(0) if path_parts else None\n\n    # Iterate over the path components and try to find a matching branch/tag\n    candidate_parts: list[str] = []\n\n    for part in path_parts:\n        candidate_parts.append(part)\n        candidate_name = \"/\".join(candidate_parts)\n        if candidate_name in branches_or_tags:\n            # We found a match — now consume exactly the parts that form the branch/tag\n            del path_parts[: len(candidate_parts)]\n            return candidate_name\n\n    # No match found; leave path_parts intact\n    return None\n"
  },
  {
    "path": "src/gitingest/schemas/__init__.py",
    "content": "\"\"\"Module containing the schemas for the Gitingest package.\"\"\"\n\nfrom gitingest.schemas.cloning import CloneConfig\nfrom gitingest.schemas.filesystem import FileSystemNode, FileSystemNodeType, FileSystemStats\nfrom gitingest.schemas.ingestion import IngestionQuery\n\n__all__ = [\"CloneConfig\", \"FileSystemNode\", \"FileSystemNodeType\", \"FileSystemStats\", \"IngestionQuery\"]\n"
  },
  {
    "path": "src/gitingest/schemas/cloning.py",
    "content": "\"\"\"Schema for the cloning process.\"\"\"\n\nfrom __future__ import annotations\n\nfrom pydantic import BaseModel, Field\n\n\nclass CloneConfig(BaseModel):  # pylint: disable=too-many-instance-attributes\n    \"\"\"Configuration for cloning a Git repository.\n\n    This model holds the necessary parameters for cloning a repository to a local path, including\n    the repository's URL, the target local path, and optional parameters for a specific commit, branch, or tag.\n\n    Attributes\n    ----------\n    url : str\n        The URL of the Git repository to clone.\n    local_path : str\n        The local directory where the repository will be cloned.\n    commit : str | None\n        The specific commit hash to check out after cloning.\n    branch : str | None\n        The branch to clone.\n    tag : str | None\n        The tag to clone.\n    subpath : str\n        The subpath to clone from the repository (default: ``\"/\"``).\n    blob : bool\n        Whether the repository is a blob (default: ``False``).\n    include_submodules : bool\n        Whether to clone submodules (default: ``False``).\n\n    \"\"\"\n\n    url: str\n    local_path: str\n    commit: str | None = None\n    branch: str | None = None\n    tag: str | None = None\n    subpath: str = Field(default=\"/\")\n    blob: bool = Field(default=False)\n    include_submodules: bool = Field(default=False)\n"
  },
  {
    "path": "src/gitingest/schemas/filesystem.py",
    "content": "\"\"\"Schema for the filesystem representation.\"\"\"\n\nfrom __future__ import annotations\n\nimport os\nfrom dataclasses import dataclass, field\nfrom enum import Enum, auto\nfrom typing import TYPE_CHECKING\n\nfrom gitingest.utils.compat_func import readlink\nfrom gitingest.utils.file_utils import _decodes, _get_preferred_encodings, _read_chunk\nfrom gitingest.utils.notebook import process_notebook\n\nif TYPE_CHECKING:\n    from pathlib import Path\n\nSEPARATOR = \"=\" * 48  # Tiktoken, the tokenizer openai uses, counts 2 tokens if we have more than 48\n\n\nclass FileSystemNodeType(Enum):\n    \"\"\"Enum representing the type of a file system node (directory or file).\"\"\"\n\n    DIRECTORY = auto()\n    FILE = auto()\n    SYMLINK = auto()\n\n\n@dataclass\nclass FileSystemStats:\n    \"\"\"Class for tracking statistics during file system traversal.\"\"\"\n\n    total_files: int = 0\n    total_size: int = 0\n\n\n@dataclass\nclass FileSystemNode:  # pylint: disable=too-many-instance-attributes\n    \"\"\"Class representing a node in the file system (either a file or directory).\n\n    Tracks properties of files/directories for comprehensive analysis.\n    \"\"\"\n\n    name: str\n    type: FileSystemNodeType\n    path_str: str\n    path: Path\n    size: int = 0\n    file_count: int = 0\n    dir_count: int = 0\n    depth: int = 0\n    children: list[FileSystemNode] = field(default_factory=list)\n\n    def sort_children(self) -> None:\n        \"\"\"Sort the children nodes of a directory according to a specific order.\n\n        Order of sorting:\n          2. Regular files (not starting with dot)\n          3. Hidden files (starting with dot)\n          4. Regular directories (not starting with dot)\n          5. Hidden directories (starting with dot)\n\n        All groups are sorted alphanumerically within themselves.\n\n        Raises\n        ------\n        ValueError\n            If the node is not a directory.\n\n        \"\"\"\n        if self.type != FileSystemNodeType.DIRECTORY:\n            msg = \"Cannot sort children of a non-directory node\"\n            raise ValueError(msg)\n\n        def _sort_key(child: FileSystemNode) -> tuple[int, str]:\n            # returns the priority order for the sort function, 0 is first\n            # Groups: 0=README, 1=regular file, 2=hidden file, 3=regular dir, 4=hidden dir\n            name = child.name.lower()\n            if child.type == FileSystemNodeType.FILE:\n                if name == \"readme\" or name.startswith(\"readme.\"):\n                    return (0, name)\n                return (1 if not name.startswith(\".\") else 2, name)\n            return (3 if not name.startswith(\".\") else 4, name)\n\n        self.children.sort(key=_sort_key)\n\n    @property\n    def content_string(self) -> str:\n        \"\"\"Return the content of the node as a string, including path and content.\n\n        Returns\n        -------\n        str\n            A string representation of the node's content.\n\n        \"\"\"\n        parts = [\n            SEPARATOR,\n            f\"{self.type.name}: {str(self.path_str).replace(os.sep, '/')}\"\n            + (f\" -> {readlink(self.path).name}\" if self.type == FileSystemNodeType.SYMLINK else \"\"),\n            SEPARATOR,\n            f\"{self.content}\",\n        ]\n\n        return \"\\n\".join(parts) + \"\\n\\n\"\n\n    @property\n    def content(self) -> str:  # pylint: disable=too-many-return-statements\n        \"\"\"Return file content (if text / notebook) or an explanatory placeholder.\n\n        Heuristically decides whether the file is text or binary by decoding a small chunk of the file\n        with multiple encodings and checking for common binary markers.\n\n        Returns\n        -------\n        str\n            The content of the file, or an error message if the file could not be read.\n\n        Raises\n        ------\n        ValueError\n            If the node is a directory.\n\n        \"\"\"\n        if self.type == FileSystemNodeType.DIRECTORY:\n            msg = \"Cannot read content of a directory node\"\n            raise ValueError(msg)\n\n        if self.type == FileSystemNodeType.SYMLINK:\n            return \"\"  # TODO: are we including the empty content of symlinks?\n\n        if self.path.suffix == \".ipynb\":  # Notebook\n            try:\n                return process_notebook(self.path)\n            except Exception as exc:\n                return f\"Error processing notebook: {exc}\"\n\n        chunk = _read_chunk(self.path)\n\n        if chunk is None:\n            return \"Error reading file\"\n\n        if chunk == b\"\":\n            return \"[Empty file]\"\n\n        if not _decodes(chunk, \"utf-8\"):\n            return \"[Binary file]\"\n\n        # Find the first encoding that decodes the sample\n        good_enc: str | None = next(\n            (enc for enc in _get_preferred_encodings() if _decodes(chunk, encoding=enc)),\n            None,\n        )\n\n        if good_enc is None:\n            return \"Error: Unable to decode file with available encodings\"\n\n        try:\n            with self.path.open(encoding=good_enc) as fp:\n                return fp.read()\n        except (OSError, UnicodeDecodeError) as exc:\n            return f\"Error reading file with {good_enc!r}: {exc}\"\n"
  },
  {
    "path": "src/gitingest/schemas/ingestion.py",
    "content": "\"\"\"Module containing the dataclasses for the ingestion process.\"\"\"\n\nfrom __future__ import annotations\n\nfrom pathlib import Path  # noqa: TC003 (typing-only-standard-library-import) needed for type checking (pydantic)\nfrom uuid import UUID  # noqa: TC003 (typing-only-standard-library-import) needed for type checking (pydantic)\n\nfrom pydantic import BaseModel, Field\n\nfrom gitingest.config import MAX_FILE_SIZE\nfrom gitingest.schemas.cloning import CloneConfig\n\n\nclass IngestionQuery(BaseModel):  # pylint: disable=too-many-instance-attributes\n    \"\"\"Pydantic model to store the parsed details of the repository or file path.\n\n    Attributes\n    ----------\n    host : str | None\n        The host of the repository.\n    user_name : str | None\n        The username or owner of the repository.\n    repo_name : str | None\n        The name of the repository.\n    local_path : Path\n        The local path to the repository or file.\n    url : str | None\n        The URL of the repository.\n    slug : str\n        The slug of the repository.\n    id : UUID\n        The ID of the repository.\n    subpath : str\n        The subpath to the repository or file (default: ``\"/\"``).\n    type : str | None\n        The type of the repository or file.\n    branch : str | None\n        The branch of the repository.\n    commit : str | None\n        The commit of the repository.\n    tag : str | None\n        The tag of the repository.\n    max_file_size : int\n        The maximum file size to ingest in bytes (default: 10 MB).\n    ignore_patterns : set[str]\n        The patterns to ignore (default: ``set()``).\n    include_patterns : set[str] | None\n        The patterns to include.\n    include_submodules : bool\n        Whether to include all Git submodules within the repository. (default: ``False``)\n    s3_url : str | None\n        The S3 URL where the digest is stored if S3 is enabled.\n\n    \"\"\"\n\n    host: str | None = None\n    user_name: str | None = None\n    repo_name: str | None = None\n    local_path: Path\n    url: str | None = None\n    slug: str\n    id: UUID\n    subpath: str = Field(default=\"/\")\n    type: str | None = None\n    branch: str | None = None\n    commit: str | None = None\n    tag: str | None = None\n    max_file_size: int = Field(default=MAX_FILE_SIZE)\n    ignore_patterns: set[str] = Field(default_factory=set)  # TODO: ssame type for ignore_* and include_* patterns\n    include_patterns: set[str] | None = None\n    include_submodules: bool = Field(default=False)\n    s3_url: str | None = None\n\n    def extract_clone_config(self) -> CloneConfig:\n        \"\"\"Extract the relevant fields for the CloneConfig object.\n\n        Returns\n        -------\n        CloneConfig\n            A CloneConfig object containing the relevant fields.\n\n        Raises\n        ------\n        ValueError\n            If the ``url`` parameter is not provided.\n\n        \"\"\"\n        if not self.url:\n            msg = \"The 'url' parameter is required.\"\n            raise ValueError(msg)\n\n        return CloneConfig(\n            url=self.url,\n            local_path=str(self.local_path),\n            commit=self.commit,\n            branch=self.branch,\n            tag=self.tag,\n            subpath=self.subpath,\n            blob=self.type == \"blob\",\n            include_submodules=self.include_submodules,\n        )\n"
  },
  {
    "path": "src/gitingest/utils/__init__.py",
    "content": "\"\"\"Utility functions for the gitingest package.\"\"\"\n"
  },
  {
    "path": "src/gitingest/utils/auth.py",
    "content": "\"\"\"Utilities for handling authentication.\"\"\"\n\nfrom __future__ import annotations\n\nimport os\n\nfrom gitingest.utils.git_utils import validate_github_token\n\n\ndef resolve_token(token: str | None) -> str | None:\n    \"\"\"Resolve the token to use for the query.\n\n    Parameters\n    ----------\n    token : str | None\n        GitHub personal access token (PAT) for accessing private repositories.\n\n    Returns\n    -------\n    str | None\n        The resolved token.\n\n    \"\"\"\n    token = token or os.getenv(\"GITHUB_TOKEN\")\n    if token:\n        validate_github_token(token)\n    return token\n"
  },
  {
    "path": "src/gitingest/utils/compat_func.py",
    "content": "\"\"\"Compatibility functions for Python 3.8.\"\"\"\n\nimport os\nfrom pathlib import Path\n\n\ndef readlink(path: Path) -> Path:\n    \"\"\"Read the target of a symlink.\n\n    Compatible with Python 3.8.\n\n    Parameters\n    ----------\n    path : Path\n        Path to the symlink.\n\n    Returns\n    -------\n    Path\n        The target of the symlink.\n\n    \"\"\"\n    return Path(os.readlink(path))\n\n\ndef removesuffix(s: str, suffix: str) -> str:\n    \"\"\"Remove a suffix from a string.\n\n    Compatible with Python 3.8.\n\n    Parameters\n    ----------\n    s : str\n        String to remove suffix from.\n    suffix : str\n        Suffix to remove.\n\n    Returns\n    -------\n    str\n        String with suffix removed.\n\n    \"\"\"\n    return s[: -len(suffix)] if s.endswith(suffix) else s\n"
  },
  {
    "path": "src/gitingest/utils/compat_typing.py",
    "content": "\"\"\"Compatibility layer for typing.\"\"\"\n\ntry:\n    from enum import StrEnum  # type: ignore[attr-defined]  # Py ≥ 3.11\nexcept ImportError:\n    from strenum import StrEnum  # type: ignore[import-untyped] # Py ≤ 3.10\n\ntry:\n    from typing import ParamSpec, TypeAlias  # type: ignore[attr-defined]  # Py ≥ 3.10\nexcept ImportError:\n    from typing_extensions import ParamSpec, TypeAlias  # type: ignore[attr-defined]  # Py ≤ 3.9\n\ntry:\n    from typing import Annotated  # type: ignore[attr-defined]  # Py ≥ 3.9\nexcept ImportError:\n    from typing_extensions import Annotated  # type: ignore[attr-defined]  # Py ≤ 3.8\n\n__all__ = [\"Annotated\", \"ParamSpec\", \"StrEnum\", \"TypeAlias\"]\n"
  },
  {
    "path": "src/gitingest/utils/exceptions.py",
    "content": "\"\"\"Custom exceptions for the Gitingest package.\"\"\"\n\n\nclass AsyncTimeoutError(Exception):\n    \"\"\"Exception raised when an async operation exceeds its timeout limit.\n\n    This exception is used by the ``async_timeout`` decorator to signal that the wrapped\n    asynchronous function has exceeded the specified time limit for execution.\n    \"\"\"\n\n\nclass InvalidNotebookError(Exception):\n    \"\"\"Exception raised when a Jupyter notebook is invalid or cannot be processed.\"\"\"\n\n    def __init__(self, message: str) -> None:\n        super().__init__(message)\n\n\nclass InvalidGitHubTokenError(ValueError):\n    \"\"\"Exception raised when a GitHub Personal Access Token is malformed.\"\"\"\n\n    def __init__(self) -> None:\n        msg = (\n            \"Invalid GitHub token format. To generate a token, go to \"\n            \"https://github.com/settings/tokens/new?description=gitingest&scopes=repo.\"\n        )\n        super().__init__(msg)\n"
  },
  {
    "path": "src/gitingest/utils/file_utils.py",
    "content": "\"\"\"Utility functions for working with files and directories.\"\"\"\n\nfrom __future__ import annotations\n\nimport locale\nimport platform\nfrom typing import TYPE_CHECKING\n\nif TYPE_CHECKING:\n    from pathlib import Path\n\ntry:\n    locale.setlocale(locale.LC_ALL, \"\")\nexcept locale.Error:\n    locale.setlocale(locale.LC_ALL, \"C\")\n\n_CHUNK_SIZE = 1024  # bytes\n\n\ndef _get_preferred_encodings() -> list[str]:\n    \"\"\"Get list of encodings to try, prioritized for the current platform.\n\n    Returns\n    -------\n    list[str]\n        List of encoding names to try in priority order, starting with the\n        platform's default encoding followed by common fallback encodings.\n\n    \"\"\"\n    encodings = [locale.getpreferredencoding(), \"utf-8\", \"utf-16\", \"utf-16le\", \"utf-8-sig\", \"latin\"]\n    if platform.system() == \"Windows\":\n        encodings += [\"cp1252\", \"iso-8859-1\"]\n    return list(dict.fromkeys(encodings))\n\n\ndef _read_chunk(path: Path) -> bytes | None:\n    \"\"\"Attempt to read the first *size* bytes of *path* in binary mode.\n\n    Parameters\n    ----------\n    path : Path\n        The path to the file to read.\n\n    Returns\n    -------\n    bytes | None\n        The first ``_CHUNK_SIZE`` bytes of ``path``, or ``None`` on any ``OSError``.\n\n    \"\"\"\n    try:\n        with path.open(\"rb\") as fp:\n            return fp.read(_CHUNK_SIZE)\n    except OSError:\n        return None\n\n\ndef _decodes(chunk: bytes, encoding: str) -> bool:\n    \"\"\"Return ``True`` if ``chunk`` decodes cleanly with ``encoding``.\n\n    Parameters\n    ----------\n    chunk : bytes\n        The chunk of bytes to decode.\n    encoding : str\n        The encoding to use to decode the chunk.\n\n    Returns\n    -------\n    bool\n        ``True`` if the chunk decodes cleanly with the encoding, ``False`` otherwise.\n\n    \"\"\"\n    try:\n        chunk.decode(encoding)\n    except UnicodeDecodeError:\n        return False\n    return True\n"
  },
  {
    "path": "src/gitingest/utils/git_utils.py",
    "content": "\"\"\"Utility functions for interacting with Git repositories.\"\"\"\n\nfrom __future__ import annotations\n\nimport asyncio\nimport base64\nimport re\nimport sys\nfrom contextlib import contextmanager\nfrom pathlib import Path\nfrom typing import TYPE_CHECKING, Final, Generator, Iterable\nfrom urllib.parse import urlparse, urlunparse\n\nimport git\n\nfrom gitingest.utils.compat_func import removesuffix\nfrom gitingest.utils.exceptions import InvalidGitHubTokenError\nfrom gitingest.utils.logging_config import get_logger\n\nif TYPE_CHECKING:\n    from gitingest.schemas import CloneConfig\n\n# Initialize logger for this module\nlogger = get_logger(__name__)\n\n# GitHub Personal-Access tokens (classic + fine-grained).\n#   - ghp_ / gho_ / ghu_ / ghs_ / ghr_  → 36 alphanumerics\n#   - github_pat_                       → 22 alphanumerics + \"_\" + 59 alphanumerics\n_GITHUB_PAT_PATTERN: Final[str] = r\"^(?:gh[pousr]_[A-Za-z0-9]{36}|github_pat_[A-Za-z0-9]{22}_[A-Za-z0-9]{59})$\"\n\n\ndef is_github_host(url: str) -> bool:\n    \"\"\"Check if a URL is from a GitHub host (github.com or GitHub Enterprise).\n\n    Parameters\n    ----------\n    url : str\n        The URL to check\n\n    Returns\n    -------\n    bool\n        True if the URL is from a GitHub host, False otherwise\n\n    \"\"\"\n    hostname = urlparse(url).hostname or \"\"\n    return hostname.startswith(\"github.\")\n\n\nasync def run_command(*args: str) -> tuple[bytes, bytes]:\n    \"\"\"Execute a shell command asynchronously and return (stdout, stderr) bytes.\n\n    This function is kept for backward compatibility with non-git commands.\n    Git operations should use GitPython directly.\n\n    Parameters\n    ----------\n    *args : str\n        The command and its arguments to execute.\n\n    Returns\n    -------\n    tuple[bytes, bytes]\n        A tuple containing the stdout and stderr of the command.\n\n    Raises\n    ------\n    RuntimeError\n        If command exits with a non-zero status.\n\n    \"\"\"\n    # Execute the requested command\n    proc = await asyncio.create_subprocess_exec(\n        *args,\n        stdout=asyncio.subprocess.PIPE,\n        stderr=asyncio.subprocess.PIPE,\n    )\n    stdout, stderr = await proc.communicate()\n    if proc.returncode != 0:\n        msg = f\"Command failed: {' '.join(args)}\\nError: {stderr.decode().strip()}\"\n        raise RuntimeError(msg)\n\n    return stdout, stderr\n\n\nasync def ensure_git_installed() -> None:\n    \"\"\"Ensure Git is installed and accessible on the system.\n\n    On Windows, this also checks whether Git is configured to support long file paths.\n\n    Raises\n    ------\n    RuntimeError\n        If Git is not installed or not accessible.\n\n    \"\"\"\n    try:\n        # Use GitPython to check git availability\n        git_cmd = git.Git()\n        git_cmd.version()\n    except git.GitCommandError as exc:\n        msg = \"Git is not installed or not accessible. Please install Git first.\"\n        raise RuntimeError(msg) from exc\n    except Exception as exc:\n        msg = \"Git is not installed or not accessible. Please install Git first.\"\n        raise RuntimeError(msg) from exc\n\n    if sys.platform == \"win32\":\n        try:\n            longpaths_value = git_cmd.config(\"core.longpaths\")\n            if longpaths_value.lower() != \"true\":\n                logger.warning(\n                    \"Git clone may fail on Windows due to long file paths. \"\n                    \"Consider enabling long path support with: 'git config --global core.longpaths true'. \"\n                    \"Note: This command may require administrator privileges.\",\n                    extra={\"platform\": \"windows\", \"longpaths_enabled\": False},\n                )\n        except git.GitCommandError:\n            # Ignore if checking 'core.longpaths' fails.\n            pass\n\n\nasync def check_repo_exists(url: str, token: str | None = None) -> bool:\n    \"\"\"Check whether a remote Git repository is reachable.\n\n    Parameters\n    ----------\n    url : str\n        URL of the Git repository to check.\n    token : str | None\n        GitHub personal access token (PAT) for accessing private repositories.\n\n    Returns\n    -------\n    bool\n        ``True`` if the repository exists, ``False`` otherwise.\n\n    \"\"\"\n    try:\n        # Try to resolve HEAD - if repo exists, this will work\n        await _resolve_ref_to_sha(url, \"HEAD\", token=token)\n    except (ValueError, Exception):\n        # Repository doesn't exist, is private without proper auth, or other error\n        return False\n\n    return True\n\n\ndef _parse_github_url(url: str) -> tuple[str, str, str]:\n    \"\"\"Parse a GitHub URL and return (hostname, owner, repo).\n\n    Parameters\n    ----------\n    url : str\n        The URL of the GitHub repository to parse.\n\n    Returns\n    -------\n    tuple[str, str, str]\n        A tuple containing the hostname, owner, and repository name.\n\n    Raises\n    ------\n    ValueError\n        If the URL is not a valid GitHub repository URL.\n\n    \"\"\"\n    parsed = urlparse(url)\n    if parsed.scheme not in {\"http\", \"https\"}:\n        msg = f\"URL must start with http:// or https://: {url!r}\"\n        raise ValueError(msg)\n\n    if not parsed.hostname or not parsed.hostname.startswith(\"github.\"):\n        msg = f\"Un-recognised GitHub hostname: {parsed.hostname!r}\"\n        raise ValueError(msg)\n\n    parts = removesuffix(parsed.path, \".git\").strip(\"/\").split(\"/\")\n    expected_path_length = 2\n    if len(parts) != expected_path_length:\n        msg = f\"Path must look like /<owner>/<repo>: {parsed.path!r}\"\n        raise ValueError(msg)\n\n    owner, repo = parts\n    return parsed.hostname, owner, repo\n\n\nasync def fetch_remote_branches_or_tags(url: str, *, ref_type: str, token: str | None = None) -> list[str]:\n    \"\"\"Fetch the list of branches or tags from a remote Git repository.\n\n    Parameters\n    ----------\n    url : str\n        The URL of the Git repository to fetch branches or tags from.\n    ref_type: str\n        The type of reference to fetch. Can be \"branches\" or \"tags\".\n    token : str | None\n        GitHub personal access token (PAT) for accessing private repositories.\n\n    Returns\n    -------\n    list[str]\n        A list of branch names available in the remote repository.\n\n    Raises\n    ------\n    ValueError\n        If the ``ref_type`` parameter is not \"branches\" or \"tags\".\n    RuntimeError\n        If fetching branches or tags from the remote repository fails.\n\n    \"\"\"\n    if ref_type not in (\"branches\", \"tags\"):\n        msg = f\"Invalid fetch type: {ref_type}\"\n        raise ValueError(msg)\n\n    await ensure_git_installed()\n\n    # Use GitPython to get remote references\n    try:\n        fetch_tags = ref_type == \"tags\"\n        to_fetch = \"tags\" if fetch_tags else \"heads\"\n\n        # Build ls-remote command\n        cmd_args = [f\"--{to_fetch}\"]\n        if fetch_tags:\n            cmd_args.append(\"--refs\")  # Filter out peeled tag objects\n        cmd_args.append(url)\n\n        # Run the command with proper authentication\n        with git_auth_context(url, token) as (git_cmd, auth_url):\n            # Replace the URL in cmd_args with the authenticated URL\n            cmd_args[-1] = auth_url  # URL is the last argument\n            output = git_cmd.ls_remote(*cmd_args)\n\n        # Parse output\n        return [\n            line.split(f\"refs/{to_fetch}/\", 1)[1]\n            for line in output.splitlines()\n            if line.strip() and f\"refs/{to_fetch}/\" in line\n        ]\n    except git.GitCommandError as exc:\n        msg = f\"Failed to fetch {ref_type} from {url}: {exc}\"\n        raise RuntimeError(msg) from exc\n\n\ndef create_git_repo(local_path: str, url: str, token: str | None = None) -> git.Repo:\n    \"\"\"Create a GitPython Repo object with authentication if needed.\n\n    Parameters\n    ----------\n    local_path : str\n        The local path where the git repository is located.\n    url : str\n        The repository URL to check if it's a GitHub repository.\n    token : str | None\n        GitHub personal access token (PAT) for accessing private repositories.\n\n    Returns\n    -------\n    git.Repo\n        A GitPython Repo object configured with authentication.\n\n    Raises\n    ------\n    ValueError\n        If the local path is not a valid git repository.\n\n    \"\"\"\n    try:\n        repo = git.Repo(local_path)\n\n        # Configure authentication if needed\n        if token and is_github_host(url):\n            auth_header = create_git_auth_header(token, url=url)\n            # Set the auth header in git config for this repo\n            key, value = auth_header.split(\"=\", 1)\n            repo.git.config(key, value)\n\n    except git.InvalidGitRepositoryError as exc:\n        msg = f\"Invalid git repository at {local_path}\"\n        raise ValueError(msg) from exc\n\n    return repo\n\n\ndef create_git_auth_header(token: str, url: str = \"https://github.com\") -> str:\n    \"\"\"Create a Basic authentication header for GitHub git operations.\n\n    Parameters\n    ----------\n    token : str\n        GitHub personal access token (PAT) for accessing private repositories.\n    url : str\n        The GitHub URL to create the authentication header for.\n        Defaults to \"https://github.com\" if not provided.\n\n    Returns\n    -------\n    str\n        The git config command for setting the authentication header.\n\n    Raises\n    ------\n    ValueError\n        If the URL is not a valid GitHub repository URL.\n\n    \"\"\"\n    hostname = urlparse(url).hostname\n    if not hostname:\n        msg = f\"Invalid GitHub URL: {url!r}\"\n        raise ValueError(msg)\n\n    basic = base64.b64encode(f\"x-oauth-basic:{token}\".encode()).decode()\n    return f\"http.https://{hostname}/.extraheader=Authorization: Basic {basic}\"\n\n\ndef create_authenticated_url(url: str, token: str | None = None) -> str:\n    \"\"\"Create an authenticated URL for Git operations.\n\n    This is the safest approach for multi-user environments - no global state.\n\n    Parameters\n    ----------\n    url : str\n        The repository URL.\n    token : str | None\n        GitHub personal access token (PAT) for accessing private repositories.\n\n    Returns\n    -------\n    str\n        The URL with authentication embedded (for GitHub) or original URL.\n\n    \"\"\"\n    if not (token and is_github_host(url)):\n        return url\n\n    parsed = urlparse(url)\n    # Add token as username in URL (GitHub supports this)\n    netloc = f\"x-oauth-basic:{token}@{parsed.hostname}\"\n    if parsed.port:\n        netloc += f\":{parsed.port}\"\n\n    return urlunparse(\n        (\n            parsed.scheme,\n            netloc,\n            parsed.path,\n            parsed.params,\n            parsed.query,\n            parsed.fragment,\n        ),\n    )\n\n\n@contextmanager\ndef git_auth_context(url: str, token: str | None = None) -> Generator[tuple[git.Git, str]]:\n    \"\"\"Context manager that provides Git command and authenticated URL.\n\n    Returns both a Git command object and the authenticated URL to use.\n    This avoids any global state contamination between users.\n\n    Parameters\n    ----------\n    url : str\n        The repository URL to check if authentication is needed.\n    token : str | None\n        GitHub personal access token (PAT) for accessing private repositories.\n\n    Yields\n    ------\n    Generator[tuple[git.Git, str]]\n        Tuple of (Git command object, authenticated URL to use).\n\n    \"\"\"\n    git_cmd = git.Git()\n    auth_url = create_authenticated_url(url, token)\n    yield git_cmd, auth_url\n\n\ndef validate_github_token(token: str) -> None:\n    \"\"\"Validate the format of a GitHub Personal Access Token.\n\n    Parameters\n    ----------\n    token : str\n        GitHub personal access token (PAT) for accessing private repositories.\n\n    Raises\n    ------\n    InvalidGitHubTokenError\n        If the token format is invalid.\n\n    \"\"\"\n    if not re.fullmatch(_GITHUB_PAT_PATTERN, token):\n        raise InvalidGitHubTokenError\n\n\nasync def checkout_partial_clone(config: CloneConfig, token: str | None) -> None:\n    \"\"\"Configure sparse-checkout for a partially cloned repository.\n\n    Parameters\n    ----------\n    config : CloneConfig\n        The configuration for cloning the repository, including subpath and blob flag.\n    token : str | None\n        GitHub personal access token (PAT) for accessing private repositories.\n\n    Raises\n    ------\n    RuntimeError\n        If the sparse-checkout configuration fails.\n\n    \"\"\"\n    subpath = config.subpath.lstrip(\"/\")\n    if config.blob:\n        # Remove the file name from the subpath when ingesting from a file url (e.g. blob/branch/path/file.txt)\n        subpath = str(Path(subpath).parent.as_posix())\n\n    try:\n        repo = create_git_repo(config.local_path, config.url, token)\n        repo.git.sparse_checkout(\"set\", subpath)\n    except git.GitCommandError as exc:\n        msg = f\"Failed to configure sparse-checkout: {exc}\"\n        raise RuntimeError(msg) from exc\n\n\nasync def resolve_commit(config: CloneConfig, token: str | None) -> str:\n    \"\"\"Resolve the commit to use for the clone.\n\n    Parameters\n    ----------\n    config : CloneConfig\n        The configuration for cloning the repository.\n    token : str | None\n        GitHub personal access token (PAT) for accessing private repositories.\n\n    Returns\n    -------\n    str\n        The commit SHA.\n\n    \"\"\"\n    if config.commit:\n        commit = config.commit\n    elif config.tag:\n        commit = await _resolve_ref_to_sha(config.url, pattern=f\"refs/tags/{config.tag}*\", token=token)\n    elif config.branch:\n        commit = await _resolve_ref_to_sha(config.url, pattern=f\"refs/heads/{config.branch}\", token=token)\n    else:\n        commit = await _resolve_ref_to_sha(config.url, pattern=\"HEAD\", token=token)\n    return commit\n\n\nasync def _resolve_ref_to_sha(url: str, pattern: str, token: str | None = None) -> str:\n    \"\"\"Return the commit SHA that <kind>/<ref> points to in <url>.\n\n    * Branch → first line from ``git ls-remote``.\n    * Tag    → if annotated, prefer the peeled ``^{}`` line (commit).\n\n    Parameters\n    ----------\n    url : str\n        The URL of the remote repository.\n    pattern : str\n        The pattern to use to resolve the commit SHA.\n    token : str | None\n        GitHub personal access token (PAT) for accessing private repositories.\n\n    Returns\n    -------\n    str\n        The commit SHA.\n\n    Raises\n    ------\n    ValueError\n        If the ref does not exist in the remote repository.\n\n    \"\"\"\n    try:\n        # Execute ls-remote command with proper authentication\n        with git_auth_context(url, token) as (git_cmd, auth_url):\n            output = git_cmd.ls_remote(auth_url, pattern)\n        lines = output.splitlines()\n\n        sha = _pick_commit_sha(lines)\n        if not sha:\n            msg = f\"{pattern!r} not found in {url}\"\n            raise ValueError(msg)\n\n    except git.GitCommandError as exc:\n        msg = f\"Failed to resolve {pattern} in {url}:\\n{exc}\"\n        raise ValueError(msg) from exc\n\n    return sha\n\n\ndef _pick_commit_sha(lines: Iterable[str]) -> str | None:\n    \"\"\"Return a commit SHA from ``git ls-remote`` output.\n\n    • Annotated tag            →  prefer the peeled line (<sha> refs/tags/x^{})\n    • Branch / lightweight tag →  first non-peeled line\n\n\n    Parameters\n    ----------\n    lines : Iterable[str]\n        The lines of a ``git ls-remote`` output.\n\n    Returns\n    -------\n    str | None\n        The commit SHA, or ``None`` if no commit SHA is found.\n\n    \"\"\"\n    first_non_peeled: str | None = None\n\n    for ln in lines:\n        if not ln.strip():\n            continue\n\n        sha, ref = ln.split(maxsplit=1)\n\n        if ref.endswith(\"^{}\"):  # peeled commit of annotated tag\n            return sha  # ← best match, done\n\n        if first_non_peeled is None:  # remember the first ordinary line\n            first_non_peeled = sha\n\n    return first_non_peeled  # branch or lightweight tag (or None)\n"
  },
  {
    "path": "src/gitingest/utils/ignore_patterns.py",
    "content": "\"\"\"Default ignore patterns for Gitingest.\"\"\"\n\nfrom __future__ import annotations\n\nfrom pathlib import Path\n\nDEFAULT_IGNORE_PATTERNS: set[str] = {\n    # Python\n    \"*.pyc\",\n    \"*.pyo\",\n    \"*.pyd\",\n    \"__pycache__\",\n    \".pytest_cache\",\n    \".coverage\",\n    \".tox\",\n    \".nox\",\n    \".mypy_cache\",\n    \".ruff_cache\",\n    \".hypothesis\",\n    \"poetry.lock\",\n    \"Pipfile.lock\",\n    # JavaScript/FileSystemNode\n    \"node_modules\",\n    \"bower_components\",\n    \"package-lock.json\",\n    \"yarn.lock\",\n    \".npm\",\n    \".yarn\",\n    \".pnpm-store\",\n    \"bun.lock\",\n    \"bun.lockb\",\n    # Java\n    \"*.class\",\n    \"*.jar\",\n    \"*.war\",\n    \"*.ear\",\n    \"*.nar\",\n    \".gradle/\",\n    \"build/\",\n    \".settings/\",\n    \".classpath\",\n    \"gradle-app.setting\",\n    \"*.gradle\",\n    # IDEs and editors / Java\n    \".project\",\n    # C/C++\n    \"*.o\",\n    \"*.obj\",\n    \"*.dll\",\n    \"*.dylib\",\n    \"*.exe\",\n    \"*.lib\",\n    \"*.out\",\n    \"*.a\",\n    \"*.pdb\",\n    # Binary\n    \"*.bin\",\n    # Swift/Xcode\n    \".build/\",\n    \"*.xcodeproj/\",\n    \"*.xcworkspace/\",\n    \"*.pbxuser\",\n    \"*.mode1v3\",\n    \"*.mode2v3\",\n    \"*.perspectivev3\",\n    \"*.xcuserstate\",\n    \"xcuserdata/\",\n    \".swiftpm/\",\n    # Ruby\n    \"*.gem\",\n    \".bundle/\",\n    \"vendor/bundle\",\n    \"Gemfile.lock\",\n    \".ruby-version\",\n    \".ruby-gemset\",\n    \".rvmrc\",\n    # Rust\n    \"Cargo.lock\",\n    \"**/*.rs.bk\",\n    # Java / Rust\n    \"target/\",\n    # Go\n    \"pkg/\",\n    # .NET/C#\n    \"obj/\",\n    \"*.suo\",\n    \"*.user\",\n    \"*.userosscache\",\n    \"*.sln.docstates\",\n    \"*.nupkg\",\n    # Go / .NET / C#\n    \"bin/\",\n    # Version control\n    \".git\",\n    \".svn\",\n    \".hg\",\n    \".gitignore\",\n    \".gitattributes\",\n    \".gitmodules\",\n    # Images and media\n    \"*.svg\",\n    \"*.png\",\n    \"*.jpg\",\n    \"*.jpeg\",\n    \"*.gif\",\n    \"*.ico\",\n    \"*.pdf\",\n    \"*.mov\",\n    \"*.mp4\",\n    \"*.mp3\",\n    \"*.wav\",\n    # Virtual environments\n    \"venv\",\n    \".venv\",\n    \"env\",\n    \".env\",\n    \"virtualenv\",\n    # IDEs and editors\n    \".idea\",\n    \".vscode\",\n    \".vs\",\n    \"*.swo\",\n    \"*.swn\",\n    \".settings\",\n    \"*.sublime-*\",\n    # Temporary and cache files\n    \"*.log\",\n    \"*.bak\",\n    \"*.swp\",\n    \"*.tmp\",\n    \"*.temp\",\n    \".cache\",\n    \".sass-cache\",\n    \".eslintcache\",\n    \".DS_Store\",\n    \"Thumbs.db\",\n    \"desktop.ini\",\n    # Build directories and artifacts\n    \"build\",\n    \"dist\",\n    \"target\",\n    \"out\",\n    \"*.egg-info\",\n    \"*.egg\",\n    \"*.whl\",\n    \"*.so\",\n    # Documentation\n    \"site-packages\",\n    \".docusaurus\",\n    \".next\",\n    \".nuxt\",\n    # Database\n    \"*.db\",\n    \"*.sqlite\",\n    \"*.sqlite3\",\n    # Other common patterns\n    ## Minified files\n    \"*.min.js\",\n    \"*.min.css\",\n    ## Source maps\n    \"*.map\",\n    ## Terraform\n    \"*.tfstate*\",\n    ## Dependencies in various languages\n    \"vendor/\",\n    # Gitingest\n    \"digest.txt\",\n}\n\n\ndef load_ignore_patterns(root: Path, filename: str) -> set[str]:\n    \"\"\"Load ignore patterns from ``filename`` found under ``root``.\n\n    The loader walks the directory tree, looks for the supplied ``filename``,\n    and returns a unified set of patterns. It implements the same parsing rules\n    we use for ``.gitignore`` and ``.gitingestignore`` (git-wildmatch syntax with\n    support for negation and root-relative paths).\n\n    Parameters\n    ----------\n    root : Path\n        Directory to walk.\n    filename : str\n        The filename to look for in each directory.\n\n    Returns\n    -------\n    set[str]\n        A set of ignore patterns extracted from the ``filename`` file found under the ``root`` directory.\n\n    \"\"\"\n    patterns: set[str] = set()\n\n    for ignore_file in root.rglob(filename):\n        if ignore_file.is_file():\n            patterns.update(_parse_ignore_file(ignore_file, root))\n    return patterns\n\n\ndef _parse_ignore_file(ignore_file: Path, root: Path) -> set[str]:\n    \"\"\"Parse an ignore file and return a set of ignore patterns.\n\n    Parameters\n    ----------\n    ignore_file : Path\n        The path to the ignore file.\n    root : Path\n        The root directory of the repository.\n\n    Returns\n    -------\n    set[str]\n        A set of ignore patterns.\n\n    \"\"\"\n    patterns: set[str] = set()\n\n    # Path of the ignore file relative to the repository root\n    rel_dir = ignore_file.parent.relative_to(root)\n    base_dir = Path() if rel_dir == Path() else rel_dir\n\n    with ignore_file.open(encoding=\"utf-8\") as fh:\n        for raw in fh:\n            line = raw.strip()\n            if not line or line.startswith(\"#\"):  # comments / blank lines\n                continue\n\n            # Handle negation (\"!foobar\")\n            negated = line.startswith(\"!\")\n            if negated:\n                line = line[1:]\n\n            # Handle leading slash (\"/foobar\")\n            if line.startswith(\"/\"):\n                line = line.lstrip(\"/\")\n\n            pattern_body = (base_dir / line).as_posix()\n            patterns.add(f\"!{pattern_body}\" if negated else pattern_body)\n\n    return patterns\n"
  },
  {
    "path": "src/gitingest/utils/ingestion_utils.py",
    "content": "\"\"\"Utility functions for the ingestion process.\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import TYPE_CHECKING\n\nfrom pathspec import PathSpec\n\nif TYPE_CHECKING:\n    from pathlib import Path\n\n\ndef _should_include(path: Path, base_path: Path, include_patterns: set[str]) -> bool:\n    \"\"\"Return ``True`` if ``path`` matches any of ``include_patterns``.\n\n    Parameters\n    ----------\n    path : Path\n        The absolute path of the file or directory to check.\n\n    base_path : Path\n        The base directory from which the relative path is calculated.\n\n    include_patterns : set[str]\n        A set of patterns to check against the relative path.\n\n    Returns\n    -------\n    bool\n        ``True`` if the path matches any of the include patterns, ``False`` otherwise.\n\n    \"\"\"\n    rel_path = _relative_or_none(path, base_path)\n    if rel_path is None:  # outside repo → do *not* include\n        return False\n    if path.is_dir():  # keep directories so children are visited\n        return True\n\n    spec = PathSpec.from_lines(\"gitwildmatch\", include_patterns)\n    return spec.match_file(str(rel_path))\n\n\ndef _should_exclude(path: Path, base_path: Path, ignore_patterns: set[str]) -> bool:\n    \"\"\"Return ``True`` if ``path`` matches any of ``ignore_patterns``.\n\n    Parameters\n    ----------\n    path : Path\n        The absolute path of the file or directory to check.\n    base_path : Path\n        The base directory from which the relative path is calculated.\n    ignore_patterns : set[str]\n        A set of patterns to check against the relative path.\n\n    Returns\n    -------\n    bool\n        ``True`` if the path matches any of the ignore patterns, ``False`` otherwise.\n\n    \"\"\"\n    rel_path = _relative_or_none(path, base_path)\n    if rel_path is None:  # outside repo → already \"excluded\"\n        return True\n\n    spec = PathSpec.from_lines(\"gitwildmatch\", ignore_patterns)\n    return spec.match_file(str(rel_path))\n\n\ndef _relative_or_none(path: Path, base: Path) -> Path | None:\n    \"\"\"Return *path* relative to *base* or ``None`` if *path* is outside *base*.\n\n    Parameters\n    ----------\n    path : Path\n        The absolute path of the file or directory to check.\n    base : Path\n        The base directory from which the relative path is calculated.\n\n    Returns\n    -------\n    Path | None\n        The relative path of ``path`` to ``base``, or ``None`` if ``path`` is outside ``base``.\n\n    \"\"\"\n    try:\n        return path.relative_to(base)\n    except ValueError:  # path is not a sub-path of base\n        return None\n"
  },
  {
    "path": "src/gitingest/utils/logging_config.py",
    "content": "\"\"\"Logging configuration for gitingest using loguru.\n\nThis module provides structured JSON logging suitable for Kubernetes deployments\nwhile also supporting human-readable logging for development.\n\"\"\"\n\nfrom __future__ import annotations\n\nimport json\nimport logging\nimport os\nimport sys\nfrom typing import Any\n\nfrom loguru import logger\n\n\ndef json_sink(message: Any) -> None:  # noqa: ANN401\n    \"\"\"Create JSON formatted log output.\n\n    Parameters\n    ----------\n    message : Any\n        The loguru message record\n\n    \"\"\"\n    record = message.record\n\n    log_entry = {\n        \"timestamp\": record[\"time\"].isoformat(),\n        \"level\": record[\"level\"].name.upper(),\n        \"logger\": record[\"name\"],\n        \"module\": record[\"module\"],\n        \"function\": record[\"function\"],\n        \"line\": record[\"line\"],\n        \"message\": record[\"message\"],\n    }\n\n    # Add exception info if present\n    if record[\"exception\"]:\n        log_entry[\"exception\"] = {\n            \"type\": record[\"exception\"].type.__name__,\n            \"value\": str(record[\"exception\"].value),\n            \"traceback\": record[\"exception\"].traceback,\n        }\n\n    # Add extra fields if present\n    if record[\"extra\"]:\n        log_entry.update(record[\"extra\"])\n\n    sys.stdout.write(json.dumps(log_entry, ensure_ascii=False, separators=(\",\", \":\")) + \"\\n\")\n\n\ndef format_extra_fields(record: dict) -> str:\n    \"\"\"Format extra fields as JSON string.\n\n    Parameters\n    ----------\n    record : dict\n        The loguru record dictionary\n\n    Returns\n    -------\n    str\n        JSON formatted extra fields or empty string\n\n    \"\"\"\n    if not record.get(\"extra\"):\n        return \"\"\n\n    # Filter out loguru's internal extra fields\n    filtered_extra = {k: v for k, v in record[\"extra\"].items() if not k.startswith(\"_\") and k not in [\"name\"]}\n\n    # Handle nested extra structure - if there's an 'extra' key, use its contents\n    if \"extra\" in filtered_extra and isinstance(filtered_extra[\"extra\"], dict):\n        filtered_extra = filtered_extra[\"extra\"]\n\n    if filtered_extra:\n        extra_json = json.dumps(filtered_extra, ensure_ascii=False, separators=(\",\", \":\"))\n        return f\" | {extra_json}\"\n\n    return \"\"\n\n\ndef extra_filter(record: dict) -> dict:\n    \"\"\"Filter function to add extra fields to the message.\n\n    Parameters\n    ----------\n    record : dict\n        The loguru record dictionary\n\n    Returns\n    -------\n    dict\n        Modified record with extra fields appended to message\n\n    \"\"\"\n    extra_str = format_extra_fields(record)\n    if extra_str:\n        record[\"message\"] = record[\"message\"] + extra_str\n    return record\n\n\nclass InterceptHandler(logging.Handler):\n    \"\"\"Intercept standard library logging and redirect to loguru.\"\"\"\n\n    def emit(self, record: logging.LogRecord) -> None:\n        \"\"\"Emit a record to loguru.\"\"\"\n        # Get corresponding loguru level\n        try:\n            level = logger.level(record.levelname).name\n        except ValueError:\n            level = record.levelno\n\n        # Find caller from where originated the logged message\n        frame, depth = logging.currentframe(), 2\n        while frame.f_code.co_filename == logging.__file__:\n            frame = frame.f_back\n            depth += 1\n\n        logger.opt(depth=depth, exception=record.exc_info).log(\n            level,\n            record.getMessage(),\n        )\n\n\ndef configure_logging() -> None:\n    \"\"\"Configure loguru for the application.\n\n    Sets up JSON logging for production/Kubernetes environments\n    or human-readable logging for development.\n    Intercepts all standard library logging including uvicorn.\n    \"\"\"\n    # Remove default handler\n    logger.remove()\n\n    # Check if we're in Kubernetes or production environment\n    is_k8s = os.getenv(\"KUBERNETES_SERVICE_HOST\") is not None\n    log_format = os.getenv(\"LOG_FORMAT\", \"json\" if is_k8s else \"human\")\n    log_level = os.getenv(\"LOG_LEVEL\", \"INFO\")\n\n    if log_format.lower() == \"json\":\n        # JSON format for structured logging (Kubernetes/production)\n        logger.add(\n            json_sink,\n            level=log_level,\n            enqueue=True,  # Async logging for better performance\n            diagnose=False,  # Don't include variable values in exceptions (security)\n            backtrace=True,  # Include full traceback\n            serialize=True,  # Ensure proper serialization\n        )\n    else:\n        # Human-readable format for development\n        logger_format = (\n            \"<green>{time:YYYY-MM-DD HH:mm:ss.SSS}</green> | \"\n            \"<level>{level: <8}</level> | \"\n            \"<cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> | \"\n            \"{message}\"\n        )\n        logger.add(\n            sys.stderr,\n            format=logger_format,\n            filter=extra_filter,\n            level=log_level,\n            enqueue=True,\n            diagnose=True,  # Include variable values in development\n            backtrace=True,\n        )\n\n    # Intercept all standard library logging\n    logging.basicConfig(handlers=[InterceptHandler()], level=0, force=True)\n\n    # Intercept specific loggers that might bypass basicConfig\n    for name in logging.root.manager.loggerDict:  # pylint: disable=no-member\n        logging.getLogger(name).handlers = []\n        logging.getLogger(name).propagate = True\n\n\ndef get_logger(name: str | None = None) -> logger.__class__:\n    \"\"\"Get a configured logger instance.\n\n    Parameters\n    ----------\n    name : str | None, optional\n        Logger name, defaults to the calling module name\n\n    Returns\n    -------\n    logger.__class__\n        Configured logger instance\n\n    \"\"\"\n    if name:\n        return logger.bind(name=name)\n    return logger\n\n\n# Initialize logging when module is imported\nconfigure_logging()\n"
  },
  {
    "path": "src/gitingest/utils/notebook.py",
    "content": "\"\"\"Utilities for processing Jupyter notebooks.\"\"\"\n\nfrom __future__ import annotations\n\nimport json\nfrom itertools import chain\nfrom typing import TYPE_CHECKING, Any\n\nfrom gitingest.utils.exceptions import InvalidNotebookError\nfrom gitingest.utils.logging_config import get_logger\n\nif TYPE_CHECKING:\n    from pathlib import Path\n\n# Initialize logger for this module\nlogger = get_logger(__name__)\n\n\ndef process_notebook(file: Path, *, include_output: bool = True) -> str:\n    \"\"\"Process a Jupyter notebook file and return an executable Python script as a string.\n\n    Parameters\n    ----------\n    file : Path\n        The path to the Jupyter notebook file.\n    include_output : bool\n        Whether to include cell outputs in the generated script (default: ``True``).\n\n    Returns\n    -------\n    str\n        The executable Python script as a string.\n\n    Raises\n    ------\n    InvalidNotebookError\n        If the notebook file is invalid or cannot be processed.\n\n    \"\"\"\n    try:\n        with file.open(encoding=\"utf-8\") as f:\n            notebook: dict[str, Any] = json.load(f)\n    except json.JSONDecodeError as exc:\n        msg = f\"Invalid JSON in notebook: {file}\"\n        raise InvalidNotebookError(msg) from exc\n\n    # Check if the notebook contains worksheets\n    worksheets = notebook.get(\"worksheets\")\n    if worksheets:\n        logger.warning(\n            \"Worksheets are deprecated as of IPEP-17. Consider updating the notebook. \"\n            \"(See: https://github.com/jupyter/nbformat and \"\n            \"https://github.com/ipython/ipython/wiki/IPEP-17:-Notebook-Format-4#remove-multiple-worksheets \"\n            \"for more information.)\",\n        )\n\n        if len(worksheets) > 1:\n            logger.warning(\n                \"Multiple worksheets detected. Combining all worksheets into a single script.\",\n            )\n\n        cells = list(chain.from_iterable(ws[\"cells\"] for ws in worksheets))\n\n    else:\n        cells = notebook[\"cells\"]\n\n    result = [\"# Jupyter notebook converted to Python script.\"]\n\n    for cell in cells:\n        cell_str = _process_cell(cell, include_output=include_output)\n        if cell_str:\n            result.append(cell_str)\n\n    return \"\\n\\n\".join(result) + \"\\n\"\n\n\ndef _process_cell(cell: dict[str, Any], *, include_output: bool) -> str | None:\n    \"\"\"Process a Jupyter notebook cell and return the cell content as a string.\n\n    Parameters\n    ----------\n    cell : dict[str, Any]\n        The cell dictionary from a Jupyter notebook.\n    include_output : bool\n        Whether to include cell outputs in the generated script.\n\n    Returns\n    -------\n    str | None\n        The cell content as a string, or ``None`` if the cell is empty.\n\n    Raises\n    ------\n    ValueError\n        If an unexpected cell type is encountered.\n\n    \"\"\"\n    cell_type = cell[\"cell_type\"]\n\n    # Validate cell type and handle unexpected types\n    if cell_type not in (\"markdown\", \"code\", \"raw\"):\n        msg = f\"Unknown cell type: {cell_type}\"\n        raise ValueError(msg)\n\n    cell_str = \"\".join(cell[\"source\"])\n\n    # Skip empty cells\n    if not cell_str:\n        return None\n\n    # Convert Markdown and raw cells to multi-line comments\n    if cell_type in (\"markdown\", \"raw\"):\n        return f'\"\"\"\\n{cell_str}\\n\"\"\"'\n\n    # Add cell output as comments\n    outputs = cell.get(\"outputs\")\n    if include_output and outputs:\n        # Include cell outputs as comments\n        raw_lines: list[str] = []\n        for output in outputs:\n            raw_lines += _extract_output(output)\n\n        cell_str += \"\\n# Output:\\n#   \" + \"\\n#   \".join(raw_lines)\n\n    return cell_str\n\n\ndef _extract_output(output: dict[str, Any]) -> list[str]:\n    \"\"\"Extract the output from a Jupyter notebook cell.\n\n    Parameters\n    ----------\n    output : dict[str, Any]\n        The output dictionary from a Jupyter notebook cell.\n\n    Returns\n    -------\n    list[str]\n        The output as a list of strings.\n\n    Raises\n    ------\n    ValueError\n        If an unknown output type is encountered.\n\n    \"\"\"\n    output_type = output[\"output_type\"]\n\n    if output_type == \"stream\":\n        return output[\"text\"]\n\n    if output_type in (\"execute_result\", \"display_data\"):\n        return output[\"data\"][\"text/plain\"]\n\n    if output_type == \"error\":\n        return [f\"Error: {output['ename']}: {output['evalue']}\"]\n\n    msg = f\"Unknown output type: {output_type}\"\n    raise ValueError(msg)\n"
  },
  {
    "path": "src/gitingest/utils/os_utils.py",
    "content": "\"\"\"Utility functions for working with the operating system.\"\"\"\n\nfrom pathlib import Path\n\n\nasync def ensure_directory_exists_or_create(path: Path) -> None:\n    \"\"\"Ensure the directory exists, creating it if necessary.\n\n    Parameters\n    ----------\n    path : Path\n        The path to ensure exists.\n\n    Raises\n    ------\n    OSError\n        If the directory cannot be created.\n\n    \"\"\"\n    try:\n        path.mkdir(parents=True, exist_ok=True)\n    except OSError as exc:\n        msg = f\"Failed to create directory {path}: {exc}\"\n        raise OSError(msg) from exc\n"
  },
  {
    "path": "src/gitingest/utils/pattern_utils.py",
    "content": "\"\"\"Pattern utilities for the Gitingest package.\"\"\"\n\nfrom __future__ import annotations\n\nimport re\nfrom typing import Iterable\n\nfrom gitingest.utils.ignore_patterns import DEFAULT_IGNORE_PATTERNS\n\n_PATTERN_SPLIT_RE = re.compile(r\"[,\\s]+\")\n\n\ndef process_patterns(\n    exclude_patterns: str | set[str] | None = None,\n    include_patterns: str | set[str] | None = None,\n) -> tuple[set[str], set[str] | None]:\n    \"\"\"Process include and exclude patterns.\n\n    Parameters\n    ----------\n    exclude_patterns : str | set[str] | None\n        Exclude patterns to process.\n    include_patterns : str | set[str] | None\n        Include patterns to process.\n\n    Returns\n    -------\n    tuple[set[str], set[str] | None]\n        A tuple containing the processed ignore patterns and include patterns.\n\n    \"\"\"\n    # Combine default ignore patterns + custom patterns\n    ignore_patterns_set = DEFAULT_IGNORE_PATTERNS.copy()\n    if exclude_patterns:\n        ignore_patterns_set.update(_parse_patterns(exclude_patterns))\n\n    # Process include patterns and override ignore patterns accordingly\n    if include_patterns:\n        parsed_include = _parse_patterns(include_patterns)\n        # Override ignore patterns with include patterns\n        ignore_patterns_set = set(ignore_patterns_set) - set(parsed_include)\n    else:\n        parsed_include = None\n\n    return ignore_patterns_set, parsed_include\n\n\ndef _parse_patterns(patterns: str | Iterable[str]) -> set[str]:\n    \"\"\"Normalize a collection of file or directory patterns.\n\n    Parameters\n    ----------\n    patterns : str | Iterable[str]\n        One pattern string or an iterable of pattern strings. Each pattern may contain multiple comma- or\n        whitespace-separated sub-patterns, e.g. \"src/*, tests *.md\".\n\n    Returns\n    -------\n    set[str]\n        Normalized patterns with Windows back-slashes converted to forward-slashes and duplicates removed.\n\n    \"\"\"\n    # Treat a lone string as the iterable [string]\n    if isinstance(patterns, str):\n        patterns = [patterns]\n\n    # Flatten, split on commas/whitespace, strip empties, normalise slashes\n    return {\n        part.replace(\"\\\\\", \"/\")\n        for pat in patterns\n        for part in _PATTERN_SPLIT_RE.split(pat.strip())\n        if part  # discard empty tokens\n    }\n"
  },
  {
    "path": "src/gitingest/utils/query_parser_utils.py",
    "content": "\"\"\"Utility functions for parsing and validating query parameters.\"\"\"\n\nfrom __future__ import annotations\n\nimport string\nfrom typing import TYPE_CHECKING, cast\nfrom urllib.parse import ParseResult, unquote, urlparse\n\nfrom gitingest.utils.compat_typing import StrEnum\nfrom gitingest.utils.git_utils import _resolve_ref_to_sha, check_repo_exists\nfrom gitingest.utils.logging_config import get_logger\n\nif TYPE_CHECKING:\n    from gitingest.schemas import IngestionQuery\n\n# Initialize logger for this module\nlogger = get_logger(__name__)\n\nHEX_DIGITS: set[str] = set(string.hexdigits)\n\nKNOWN_GIT_HOSTS: list[str] = [\n    \"github.com\",\n    \"gitlab.com\",\n    \"bitbucket.org\",\n    \"gitea.com\",\n    \"codeberg.org\",\n    \"gist.github.com\",\n]\n\n\nclass PathKind(StrEnum):\n    \"\"\"Path kind enum.\"\"\"\n\n    TREE = \"tree\"\n    BLOB = \"blob\"\n    ISSUES = \"issues\"\n    PULL = \"pull\"\n\n\nasync def _fallback_to_root(query: IngestionQuery, token: str | None, warn_msg: str | None = None) -> IngestionQuery:\n    \"\"\"Fallback to the root of the repository if no extra path parts are provided.\n\n    Parameters\n    ----------\n    query : IngestionQuery\n        The query to fallback to the root of the repository.\n    token : str | None\n        The token to use to access the repository.\n    warn_msg : str | None\n        The message to warn.\n\n    Returns\n    -------\n    IngestionQuery\n        The query with the fallback to the root of the repository.\n\n    \"\"\"\n    url = cast(\"str\", query.url)\n    query.commit = await _resolve_ref_to_sha(url, pattern=\"HEAD\", token=token)\n    if warn_msg:\n        logger.warning(warn_msg)\n    return query\n\n\nasync def _normalise_source(raw: str, token: str | None) -> ParseResult:\n    \"\"\"Return a fully-qualified ParseResult or raise.\n\n    Parameters\n    ----------\n    raw : str\n        The raw URL to parse.\n    token : str | None\n        The token to use to access the repository.\n\n    Returns\n    -------\n    ParseResult\n        The parsed URL.\n\n    \"\"\"\n    raw = unquote(raw)\n    parsed = urlparse(raw)\n\n    if parsed.scheme:\n        _validate_url_scheme(parsed.scheme)\n        _validate_host(parsed.netloc)\n        return parsed\n\n    # no scheme ('host/user/repo' or 'user/repo')\n    host = raw.split(\"/\", 1)[0].lower()\n    if \".\" in host:\n        _validate_host(host)\n        return urlparse(f\"https://{raw}\")\n\n    # \"user/repo\" slug\n    host = await _try_domains_for_user_and_repo(*_get_user_and_repo_from_path(raw), token=token)\n\n    return urlparse(f\"https://{host}/{raw}\")\n\n\nasync def _try_domains_for_user_and_repo(user_name: str, repo_name: str, token: str | None = None) -> str:\n    \"\"\"Attempt to find a valid repository host for the given ``user_name`` and ``repo_name``.\n\n    Parameters\n    ----------\n    user_name : str\n        The username or owner of the repository.\n    repo_name : str\n        The name of the repository.\n    token : str | None\n        GitHub personal access token (PAT) for accessing private repositories.\n\n    Returns\n    -------\n    str\n        The domain of the valid repository host.\n\n    Raises\n    ------\n    ValueError\n        If no valid repository host is found for the given ``user_name`` and ``repo_name``.\n\n    \"\"\"\n    for domain in KNOWN_GIT_HOSTS:\n        candidate = f\"https://{domain}/{user_name}/{repo_name}\"\n        if await check_repo_exists(candidate, token=token if domain.startswith(\"github.\") else None):\n            return domain\n\n    msg = f\"Could not find a valid repository host for '{user_name}/{repo_name}'.\"\n    raise ValueError(msg)\n\n\ndef _is_valid_git_commit_hash(commit: str) -> bool:\n    \"\"\"Validate if the provided string is a valid Git commit hash.\n\n    This function checks if the commit hash is a 40-character string consisting only\n    of hexadecimal digits, which is the standard format for Git commit hashes.\n\n    Parameters\n    ----------\n    commit : str\n        The string to validate as a Git commit hash.\n\n    Returns\n    -------\n    bool\n        ``True`` if the string is a valid 40-character Git commit hash, otherwise ``False``.\n\n    \"\"\"\n    sha_hex_length = 40\n    return len(commit) == sha_hex_length and all(c in HEX_DIGITS for c in commit)\n\n\ndef _validate_host(host: str) -> None:\n    \"\"\"Validate a hostname.\n\n    The host is accepted if it is either present in the hard-coded ``KNOWN_GIT_HOSTS`` list or if it satisfies the\n    simple heuristics in ``_looks_like_git_host``, which try to recognise common self-hosted Git services (e.g. GitLab\n    instances on sub-domains such as 'gitlab.example.com' or 'git.example.com').\n\n    Parameters\n    ----------\n    host : str\n        Hostname (case-insensitive).\n\n    Raises\n    ------\n    ValueError\n        If the host cannot be recognised as a probable Git hosting domain.\n\n    \"\"\"\n    host = host.lower()\n    if host not in KNOWN_GIT_HOSTS and not _looks_like_git_host(host):\n        msg = f\"Unknown domain '{host}' in URL\"\n        raise ValueError(msg)\n\n\ndef _looks_like_git_host(host: str) -> bool:\n    \"\"\"Check if the given host looks like a Git host.\n\n    The current heuristic returns ``True`` when the host starts with ``git.`` (e.g. 'git.example.com'), starts with\n    'gitlab.' (e.g. 'gitlab.company.com'), or starts with 'github.' (e.g. 'github.company.com' for GitHub Enterprise).\n\n    Parameters\n    ----------\n    host : str\n        Hostname (case-insensitive).\n\n    Returns\n    -------\n    bool\n        ``True`` if the host looks like a Git host, otherwise ``False``.\n\n    \"\"\"\n    host = host.lower()\n    return host.startswith((\"git.\", \"gitlab.\", \"github.\"))\n\n\ndef _validate_url_scheme(scheme: str) -> None:\n    \"\"\"Validate the given scheme against the known schemes.\n\n    Parameters\n    ----------\n    scheme : str\n        The scheme to validate.\n\n    Raises\n    ------\n    ValueError\n        If the scheme is not 'http' or 'https'.\n\n    \"\"\"\n    scheme = scheme.lower()\n    if scheme not in (\"https\", \"http\"):\n        msg = f\"Invalid URL scheme '{scheme}' in URL\"\n        raise ValueError(msg)\n\n\ndef _get_user_and_repo_from_path(path: str) -> tuple[str, str]:\n    \"\"\"Extract the user and repository names from a given path.\n\n    Parameters\n    ----------\n    path : str\n        The path to extract the user and repository names from.\n\n    Returns\n    -------\n    tuple[str, str]\n        A tuple containing the user and repository names.\n\n    Raises\n    ------\n    ValueError\n        If the path does not contain at least two parts.\n\n    \"\"\"\n    min_path_parts = 2\n    path_parts = path.lower().strip(\"/\").split(\"/\")\n    if len(path_parts) < min_path_parts:\n        msg = f\"Invalid repository URL '{path}'\"\n        raise ValueError(msg)\n    return path_parts[0], path_parts[1]\n"
  },
  {
    "path": "src/gitingest/utils/timeout_wrapper.py",
    "content": "\"\"\"Utility functions for the Gitingest package.\"\"\"\n\nimport asyncio\nimport functools\nfrom typing import Awaitable, Callable, TypeVar\n\nfrom gitingest.utils.compat_typing import ParamSpec\nfrom gitingest.utils.exceptions import AsyncTimeoutError\n\nT = TypeVar(\"T\")\nP = ParamSpec(\"P\")\n\n\ndef async_timeout(seconds: int) -> Callable[[Callable[P, Awaitable[T]]], Callable[P, Awaitable[T]]]:\n    \"\"\"Async Timeout decorator.\n\n    This decorator wraps an asynchronous function and ensures it does not run for\n    longer than the specified number of seconds. If the function execution exceeds\n    this limit, it raises an ``AsyncTimeoutError``.\n\n    Parameters\n    ----------\n    seconds : int\n        The maximum allowed time (in seconds) for the asynchronous function to complete.\n\n    Returns\n    -------\n    Callable[[Callable[P, Awaitable[T]]], Callable[P, Awaitable[T]]]\n        A decorator that, when applied to an async function, ensures the function\n        completes within the specified time limit. If the function takes too long,\n        an ``AsyncTimeoutError`` is raised.\n\n    \"\"\"\n\n    def decorator(func: Callable[P, Awaitable[T]]) -> Callable[P, Awaitable[T]]:\n        @functools.wraps(func)\n        async def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:\n            try:\n                return await asyncio.wait_for(func(*args, **kwargs), timeout=seconds)\n            except asyncio.TimeoutError as exc:\n                msg = f\"Operation timed out after {seconds} seconds\"\n                raise AsyncTimeoutError(msg) from exc\n\n        return wrapper\n\n    return decorator\n"
  },
  {
    "path": "src/server/__init__.py",
    "content": "\"\"\"Server module.\"\"\"\n"
  },
  {
    "path": "src/server/__main__.py",
    "content": "\"\"\"Server module entry point for running with python -m server.\"\"\"\n\nimport os\n\nimport uvicorn\n\n# Import logging configuration first to intercept all logging\nfrom gitingest.utils.logging_config import get_logger\n\nlogger = get_logger(__name__)\n\nif __name__ == \"__main__\":\n    # Get configuration from environment variables\n    host = os.getenv(\"HOST\", \"0.0.0.0\")  # noqa: S104\n    port = int(os.getenv(\"PORT\", \"8000\"))\n    reload = os.getenv(\"RELOAD\", \"false\").lower() == \"true\"\n\n    logger.info(\n        \"Starting Gitingest server\",\n        extra={\n            \"host\": host,\n            \"port\": port,\n        },\n    )\n\n    uvicorn.run(\n        \"server.main:app\",\n        host=host,\n        port=port,\n        reload=reload,\n        log_config=None,  # Disable uvicorn's default logging config\n    )\n"
  },
  {
    "path": "src/server/form_types.py",
    "content": "\"\"\"Reusable form type aliases for FastAPI form parameters.\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import TYPE_CHECKING, Optional\n\nfrom fastapi import Form\n\nfrom gitingest.utils.compat_typing import Annotated\n\nif TYPE_CHECKING:\n    from gitingest.utils.compat_typing import TypeAlias\n\nStrForm: TypeAlias = Annotated[str, Form(...)]\nIntForm: TypeAlias = Annotated[int, Form(...)]\nOptStrForm: TypeAlias = Annotated[Optional[str], Form()]\n"
  },
  {
    "path": "src/server/main.py",
    "content": "\"\"\"Main module for the FastAPI application.\"\"\"\n\nfrom __future__ import annotations\n\nimport os\nimport threading\nfrom pathlib import Path\n\nimport sentry_sdk\nfrom dotenv import load_dotenv\nfrom fastapi import FastAPI, Request\nfrom fastapi.responses import FileResponse, HTMLResponse, JSONResponse\nfrom fastapi.staticfiles import StaticFiles\nfrom slowapi.errors import RateLimitExceeded\nfrom starlette.middleware.trustedhost import TrustedHostMiddleware\n\n# Import logging configuration first to intercept all logging\nfrom gitingest.utils.logging_config import get_logger\nfrom server.metrics_server import start_metrics_server\nfrom server.routers import dynamic, index, ingest\nfrom server.server_config import get_version_info, templates\nfrom server.server_utils import limiter, rate_limit_exception_handler\n\n# Load environment variables from .env file\nload_dotenv()\n\n# Initialize logger for this module\nlogger = get_logger(__name__)\n\n# Initialize Sentry SDK if enabled\nif os.getenv(\"GITINGEST_SENTRY_ENABLED\") is not None:\n    sentry_dsn = os.getenv(\"GITINGEST_SENTRY_DSN\")\n\n    # Only initialize Sentry if DSN is provided\n    if sentry_dsn:\n        # Configure Sentry options from environment variables\n        traces_sample_rate = float(os.getenv(\"GITINGEST_SENTRY_TRACES_SAMPLE_RATE\", \"1.0\"))\n        profile_session_sample_rate = float(os.getenv(\"GITINGEST_SENTRY_PROFILE_SESSION_SAMPLE_RATE\", \"1.0\"))\n        profile_lifecycle_raw = os.getenv(\"GITINGEST_SENTRY_PROFILE_LIFECYCLE\", \"trace\")\n        profile_lifecycle = profile_lifecycle_raw if profile_lifecycle_raw in (\"manual\", \"trace\") else \"trace\"\n        send_default_pii = os.getenv(\"GITINGEST_SENTRY_SEND_DEFAULT_PII\", \"true\").lower() == \"true\"\n        sentry_environment = os.getenv(\"GITINGEST_SENTRY_ENVIRONMENT\", \"\")\n\n        sentry_sdk.init(\n            dsn=sentry_dsn,\n            # Add data like request headers and IP for users\n            send_default_pii=send_default_pii,\n            # Set traces_sample_rate to capture transactions for tracing\n            traces_sample_rate=traces_sample_rate,\n            # Set profile_session_sample_rate to profile sessions\n            profile_session_sample_rate=profile_session_sample_rate,\n            # Set profile_lifecycle to automatically run the profiler\n            profile_lifecycle=profile_lifecycle,\n            # Set environment name\n            environment=sentry_environment,\n        )\n\n# Initialize the FastAPI application\napp = FastAPI(docs_url=None, redoc_url=None)\napp.state.limiter = limiter\n\n# Register the custom exception handler for rate limits\napp.add_exception_handler(RateLimitExceeded, rate_limit_exception_handler)\n\n# Start metrics server in a separate thread if enabled\nif os.getenv(\"GITINGEST_METRICS_ENABLED\") is not None:\n    metrics_host = os.getenv(\"GITINGEST_METRICS_HOST\", \"127.0.0.1\")\n    metrics_port = int(os.getenv(\"GITINGEST_METRICS_PORT\", \"9090\"))\n    metrics_thread = threading.Thread(\n        target=start_metrics_server,\n        args=(metrics_host, metrics_port),\n        daemon=True,\n    )\n    metrics_thread.start()\n\n\n# Mount static files dynamically to serve CSS, JS, and other static assets\nstatic_dir = Path(__file__).parent.parent / \"static\"\napp.mount(\"/static\", StaticFiles(directory=static_dir), name=\"static\")\n\n\n# Fetch allowed hosts from the environment or use the default values\nallowed_hosts = os.getenv(\"ALLOWED_HOSTS\")\nif allowed_hosts:\n    allowed_hosts = allowed_hosts.split(\",\")\nelse:\n    # Define the default allowed hosts for the application\n    default_allowed_hosts = [\"gitingest.com\", \"*.gitingest.com\", \"localhost\", \"127.0.0.1\"]\n    allowed_hosts = default_allowed_hosts\n\n# Add middleware to enforce allowed hosts\napp.add_middleware(TrustedHostMiddleware, allowed_hosts=allowed_hosts)\n\n\n@app.get(\"/health\")\nasync def health_check() -> dict[str, str]:\n    \"\"\"Health check endpoint to verify that the server is running.\n\n    **Returns**\n\n    - **dict[str, str]**: A JSON object with a \"status\" key indicating the server's health status.\n\n    \"\"\"\n    return {\"status\": \"healthy\"}\n\n\n@app.head(\"/\", include_in_schema=False)\nasync def head_root() -> HTMLResponse:\n    \"\"\"Respond to HTTP HEAD requests for the root URL.\n\n    **This endpoint mirrors the headers and status code of the index page**\n    for HTTP HEAD requests, providing a lightweight way to check if the server\n    is responding without downloading the full page content.\n\n    **Returns**\n\n    - **HTMLResponse**: An empty HTML response with appropriate headers\n\n    \"\"\"\n    return HTMLResponse(content=None, headers={\"content-type\": \"text/html; charset=utf-8\"})\n\n\n@app.get(\"/robots.txt\", include_in_schema=False)\nasync def robots() -> FileResponse:\n    \"\"\"Serve the robots.txt file to guide search engine crawlers.\n\n    **This endpoint serves the ``robots.txt`` file located in the static directory**\n    to provide instructions to search engine crawlers about which parts of the site\n    they should or should not index.\n\n    **Returns**\n\n    - **FileResponse**: The ``robots.txt`` file located in the static directory\n\n    \"\"\"\n    return FileResponse(\"static/robots.txt\")\n\n\n@app.get(\"/llms.txt\")\nasync def llm_txt() -> FileResponse:\n    \"\"\"Serve the llm.txt file to provide information about the site to LLMs.\n\n    **This endpoint serves the ``llms.txt`` file located in the static directory**\n    to provide information about the site to Large Language Models (LLMs)\n    and other AI systems that may be crawling the site.\n\n    **Returns**\n\n    - **FileResponse**: The ``llms.txt`` file located in the static directory\n\n    \"\"\"\n    return FileResponse(\"static/llms.txt\")\n\n\n@app.get(\"/docs\", response_class=HTMLResponse, include_in_schema=False)\nasync def custom_swagger_ui(request: Request) -> HTMLResponse:\n    \"\"\"Serve custom Swagger UI documentation.\n\n    **This endpoint serves a custom Swagger UI interface**\n    for the API documentation, providing an interactive way to explore\n    and test the available endpoints.\n\n    **Parameters**\n\n    - **request** (`Request`): The incoming HTTP request\n\n    **Returns**\n\n    - **HTMLResponse**: Custom Swagger UI documentation page\n\n    \"\"\"\n    context = {\"request\": request}\n    context.update(get_version_info())\n    return templates.TemplateResponse(\"swagger_ui.jinja\", context)\n\n\n@app.get(\"/api\", include_in_schema=True)\ndef openapi_json_get() -> JSONResponse:\n    \"\"\"Return the OpenAPI schema.\n\n    **This endpoint returns the OpenAPI schema (openapi.json)**\n    that describes the API structure, endpoints, and data models\n    for documentation and client generation purposes.\n\n    **Returns**\n\n    - **JSONResponse**: The OpenAPI schema as JSON\n\n    \"\"\"\n    return JSONResponse(app.openapi())\n\n\n@app.api_route(\"/api\", methods=[\"POST\", \"PUT\", \"DELETE\", \"OPTIONS\", \"HEAD\"], include_in_schema=False)\n@app.api_route(\"/api/\", methods=[\"GET\", \"POST\", \"PUT\", \"DELETE\", \"OPTIONS\", \"HEAD\"], include_in_schema=False)\ndef openapi_json() -> JSONResponse:\n    \"\"\"Return the OpenAPI schema for various HTTP methods.\n\n    **This endpoint returns the OpenAPI schema (openapi.json)**\n    for multiple HTTP methods, providing API documentation\n    for clients that may use different request methods.\n\n    **Returns**\n\n    - **JSONResponse**: The OpenAPI schema as JSON\n\n    \"\"\"\n    return JSONResponse(app.openapi())\n\n\n# Include routers for modular endpoints\napp.include_router(index)\napp.include_router(ingest)\napp.include_router(dynamic)\n"
  },
  {
    "path": "src/server/metrics_server.py",
    "content": "\"\"\"Prometheus metrics server running on a separate port.\"\"\"\n\nimport uvicorn\nfrom fastapi import FastAPI\nfrom fastapi.responses import HTMLResponse\nfrom prometheus_client import REGISTRY, generate_latest\n\nfrom gitingest.utils.logging_config import get_logger\n\n# Create a logger for this module\nlogger = get_logger(__name__)\n\n# Create a separate FastAPI app for metrics\nmetrics_app = FastAPI(\n    title=\"Gitingest Metrics\",\n    description=\"Prometheus metrics for Gitingest\",\n    docs_url=None,\n    redoc_url=None,\n)\n\n\n@metrics_app.get(\"/metrics\")\nasync def metrics() -> HTMLResponse:\n    \"\"\"Serve Prometheus metrics without authentication.\n\n    This endpoint is only accessible from the local network.\n\n    Returns\n    -------\n    HTMLResponse\n        Prometheus metrics in text format\n\n    \"\"\"\n    return HTMLResponse(\n        content=generate_latest(REGISTRY),\n        status_code=200,\n        media_type=\"text/plain\",\n    )\n\n\ndef start_metrics_server(host: str = \"127.0.0.1\", port: int = 9090) -> None:\n    \"\"\"Start the metrics server on a separate port.\n\n    Parameters\n    ----------\n    host : str\n        The host to bind to (default: 127.0.0.1 for local network only)\n    port : int\n        The port to bind to (default: 9090)\n\n    Returns\n    -------\n    None\n\n    \"\"\"\n    logger.info(\"Starting metrics server\", extra={\"host\": host, \"port\": port})\n\n    # Configure uvicorn to suppress startup messages to avoid duplicates\n    # since the main server already shows similar messages\n    uvicorn.run(\n        metrics_app,\n        host=host,\n        port=port,\n        log_config=None,  # Disable uvicorn's default logging config\n        access_log=False,  # Disable access logging for metrics server\n        # Suppress uvicorn's startup messages by setting log level higher\n        log_level=\"warning\",\n    )\n"
  },
  {
    "path": "src/server/models.py",
    "content": "\"\"\"Pydantic models for the query form.\"\"\"\n\nfrom __future__ import annotations\n\nfrom enum import Enum\nfrom typing import TYPE_CHECKING, Union\n\nfrom pydantic import BaseModel, Field, field_validator\n\nfrom gitingest.utils.compat_func import removesuffix\nfrom server.server_config import MAX_FILE_SIZE_KB\n\n# needed for type checking (pydantic)\nif TYPE_CHECKING:\n    from server.form_types import IntForm, OptStrForm, StrForm\n\n\nclass PatternType(str, Enum):\n    \"\"\"Enumeration for pattern types used in file filtering.\"\"\"\n\n    INCLUDE = \"include\"\n    EXCLUDE = \"exclude\"\n\n\nclass IngestRequest(BaseModel):\n    \"\"\"Request model for the /api/ingest endpoint.\n\n    Attributes\n    ----------\n    input_text : str\n        The Git repository URL or slug to ingest.\n    max_file_size : int\n        Maximum file size slider position (0-500) for filtering files.\n    pattern_type : PatternType\n        Type of pattern to use for file filtering (include or exclude).\n    pattern : str\n        Glob/regex pattern string for file filtering.\n    token : str | None\n        GitHub personal access token (PAT) for accessing private repositories.\n\n    \"\"\"\n\n    input_text: str = Field(..., description=\"Git repository URL or slug to ingest\")\n    max_file_size: int = Field(..., ge=1, le=MAX_FILE_SIZE_KB, description=\"File size in KB\")\n    pattern_type: PatternType = Field(default=PatternType.EXCLUDE, description=\"Pattern type for file filtering\")\n    pattern: str = Field(default=\"\", description=\"Glob/regex pattern for file filtering\")\n    token: str | None = Field(default=None, description=\"GitHub PAT for private repositories\")\n\n    @field_validator(\"input_text\")\n    @classmethod\n    def validate_input_text(cls, v: str) -> str:\n        \"\"\"Validate that ``input_text`` is not empty.\"\"\"\n        if not v.strip():\n            err = \"input_text cannot be empty\"\n            raise ValueError(err)\n        return removesuffix(v.strip(), \".git\")\n\n    @field_validator(\"pattern\")\n    @classmethod\n    def validate_pattern(cls, v: str) -> str:\n        \"\"\"Validate ``pattern`` field.\"\"\"\n        return v.strip()\n\n\nclass IngestSuccessResponse(BaseModel):\n    \"\"\"Success response model for the /api/ingest endpoint.\n\n    Attributes\n    ----------\n    repo_url : str\n        The original repository URL that was processed.\n    short_repo_url : str\n        Short form of repository URL (user/repo).\n    summary : str\n        Summary of the ingestion process including token estimates.\n    digest_url : str\n        URL to download the full digest content (either S3 URL or local download endpoint).\n    tree : str\n        File tree structure of the repository.\n    content : str\n        Processed content from the repository files.\n    default_max_file_size : int\n        The file size slider position used.\n    pattern_type : str\n        The pattern type used for filtering.\n    pattern : str\n        The pattern used for filtering.\n\n    \"\"\"\n\n    repo_url: str = Field(..., description=\"Original repository URL\")\n    short_repo_url: str = Field(..., description=\"Short repository URL (user/repo)\")\n    summary: str = Field(..., description=\"Ingestion summary with token estimates\")\n    digest_url: str = Field(..., description=\"URL to download the full digest content\")\n    tree: str = Field(..., description=\"File tree structure\")\n    content: str = Field(..., description=\"Processed file content\")\n    default_max_file_size: int = Field(..., description=\"File size slider position used\")\n    pattern_type: str = Field(..., description=\"Pattern type used\")\n    pattern: str = Field(..., description=\"Pattern used\")\n\n\nclass IngestErrorResponse(BaseModel):\n    \"\"\"Error response model for the /api/ingest endpoint.\n\n    Attributes\n    ----------\n    error : str\n        Error message describing what went wrong.\n\n    \"\"\"\n\n    error: str = Field(..., description=\"Error message\")\n\n\n# Union type for API responses\nIngestResponse = Union[IngestSuccessResponse, IngestErrorResponse]\n\n\nclass S3Metadata(BaseModel):\n    \"\"\"Model for S3 metadata structure.\n\n    Attributes\n    ----------\n    summary : str\n        Summary of the ingestion process including token estimates.\n    tree : str\n        File tree structure of the repository.\n    content : str\n        Processed content from the repository files.\n\n    \"\"\"\n\n    summary: str = Field(..., description=\"Ingestion summary with token estimates\")\n    tree: str = Field(..., description=\"File tree structure\")\n    content: str = Field(..., description=\"Processed file content\")\n\n\nclass QueryForm(BaseModel):\n    \"\"\"Form data for the query.\n\n    Attributes\n    ----------\n    input_text : str\n        Text or URL supplied in the form.\n    max_file_size : int\n        The maximum allowed file size for the input, specified by the user.\n    pattern_type : str\n        The type of pattern used for the query (``include`` or ``exclude``).\n    pattern : str\n        Glob/regex pattern string.\n    token : str | None\n        GitHub personal access token (PAT) for accessing private repositories.\n\n    \"\"\"\n\n    input_text: str\n    max_file_size: int\n    pattern_type: str\n    pattern: str\n    token: str | None = None\n\n    @classmethod\n    def as_form(\n        cls,\n        input_text: StrForm,\n        max_file_size: IntForm,\n        pattern_type: StrForm,\n        pattern: StrForm,\n        token: OptStrForm,\n    ) -> QueryForm:\n        \"\"\"Create a QueryForm from FastAPI form parameters.\n\n        Parameters\n        ----------\n        input_text : StrForm\n            The input text provided by the user.\n        max_file_size : IntForm\n            The maximum allowed file size for the input.\n        pattern_type : StrForm\n            The type of pattern used for the query (``include`` or ``exclude``).\n        pattern : StrForm\n            Glob/regex pattern string.\n        token : OptStrForm\n            GitHub personal access token (PAT) for accessing private repositories.\n\n        Returns\n        -------\n        QueryForm\n            The QueryForm instance.\n\n        \"\"\"\n        return cls(\n            input_text=input_text,\n            max_file_size=max_file_size,\n            pattern_type=pattern_type,\n            pattern=pattern,\n            token=token,\n        )\n"
  },
  {
    "path": "src/server/query_processor.py",
    "content": "\"\"\"Process a query by parsing input, cloning a repository, and generating a summary.\"\"\"\n\nfrom __future__ import annotations\n\nimport shutil\nfrom pathlib import Path\nfrom typing import TYPE_CHECKING, cast\n\nfrom gitingest.clone import clone_repo\nfrom gitingest.ingestion import ingest_query\nfrom gitingest.query_parser import parse_remote_repo\nfrom gitingest.utils.git_utils import resolve_commit, validate_github_token\nfrom gitingest.utils.logging_config import get_logger\nfrom gitingest.utils.pattern_utils import process_patterns\nfrom server.models import IngestErrorResponse, IngestResponse, IngestSuccessResponse, PatternType, S3Metadata\nfrom server.s3_utils import (\n    _build_s3_url,\n    check_s3_object_exists,\n    generate_s3_file_path,\n    get_metadata_from_s3,\n    is_s3_enabled,\n    upload_metadata_to_s3,\n    upload_to_s3,\n)\nfrom server.server_config import MAX_DISPLAY_SIZE\n\n# Initialize logger for this module\nlogger = get_logger(__name__)\n\nif TYPE_CHECKING:\n    from gitingest.schemas.cloning import CloneConfig\n    from gitingest.schemas.ingestion import IngestionQuery\n\n\ndef _cleanup_repository(clone_config: CloneConfig) -> None:\n    \"\"\"Clean up the cloned repository after processing.\"\"\"\n    try:\n        local_path = Path(clone_config.local_path)\n        if local_path.exists():\n            shutil.rmtree(local_path)\n            logger.info(\"Successfully cleaned up repository\", extra={\"local_path\": str(local_path)})\n    except (PermissionError, OSError):\n        logger.exception(\"Could not delete repository\", extra={\"local_path\": str(clone_config.local_path)})\n\n\nasync def _check_s3_cache(\n    query: IngestionQuery,\n    input_text: str,\n    max_file_size: int,\n    pattern_type: str,\n    pattern: str,\n    token: str | None,\n) -> IngestSuccessResponse | None:\n    \"\"\"Check if digest already exists on S3 and return response if found.\n\n    Parameters\n    ----------\n    query : IngestionQuery\n        The parsed query object.\n    input_text : str\n        Original input text.\n    max_file_size : int\n        Maximum file size in KB.\n    pattern_type : str\n        Pattern type (include/exclude).\n    pattern : str\n        Pattern string.\n    token : str | None\n        GitHub token.\n\n    Returns\n    -------\n    IngestSuccessResponse | None\n        Response if file exists on S3, None otherwise.\n\n    \"\"\"\n    if not is_s3_enabled():\n        return None\n\n    try:\n        # Use git ls-remote to get commit SHA without cloning\n        clone_config = query.extract_clone_config()\n        logger.info(\"Resolving commit for S3 cache check\", extra={\"repo_url\": query.url})\n        query.commit = await resolve_commit(clone_config, token=token)\n        logger.info(\"Commit resolved successfully\", extra={\"repo_url\": query.url, \"commit\": query.commit})\n\n        # Generate S3 file path using the resolved commit\n        s3_file_path = generate_s3_file_path(\n            source=query.url,\n            user_name=cast(\"str\", query.user_name),\n            repo_name=cast(\"str\", query.repo_name),\n            commit=query.commit,\n            subpath=query.subpath,\n            include_patterns=query.include_patterns,\n            ignore_patterns=query.ignore_patterns,\n        )\n\n        # Check if file exists on S3\n        if check_s3_object_exists(s3_file_path):\n            # File exists on S3, serve it directly without cloning\n            s3_url = _build_s3_url(s3_file_path)\n            query.s3_url = s3_url\n\n            short_repo_url = f\"{query.user_name}/{query.repo_name}\"\n\n            # Try to get cached metadata\n            metadata = get_metadata_from_s3(s3_file_path)\n\n            if metadata:\n                # Use cached metadata if available\n                summary = metadata.summary\n                tree = metadata.tree\n                content = metadata.content\n            else:\n                # Fallback to placeholder messages if metadata not available\n                summary = \"Digest served from cache (S3). Download the full digest to see content details.\"\n                tree = \"Digest served from cache. Download the full digest to see the file tree.\"\n                content = \"Digest served from cache. Download the full digest to see the content.\"\n\n            return IngestSuccessResponse(\n                repo_url=input_text,\n                short_repo_url=short_repo_url,\n                summary=summary,\n                digest_url=s3_url,\n                tree=tree,\n                content=content,\n                default_max_file_size=max_file_size,\n                pattern_type=pattern_type,\n                pattern=pattern,\n            )\n    except Exception as exc:\n        # Log the exception but don't fail the entire request\n        logger.warning(\"S3 cache check failed, falling back to normal cloning\", extra={\"error\": str(exc)})\n\n    logger.info(\"Digest not found in S3 cache, proceeding with normal cloning\", extra={\"repo_url\": query.url})\n    return None\n\n\ndef _store_digest_content(\n    query: IngestionQuery,\n    clone_config: CloneConfig,\n    digest_content: str,\n    summary: str,\n    tree: str,\n    content: str,\n) -> None:\n    \"\"\"Store digest content either to S3 or locally based on configuration.\n\n    Parameters\n    ----------\n    query : IngestionQuery\n        The query object containing repository information.\n    clone_config : CloneConfig\n        The clone configuration object.\n    digest_content : str\n        The complete digest content to store.\n    summary : str\n        The summary content for metadata.\n    tree : str\n        The tree content for metadata.\n    content : str\n        The file content for metadata.\n\n    \"\"\"\n    if is_s3_enabled():\n        # Upload to S3 instead of storing locally\n        s3_file_path = generate_s3_file_path(\n            source=query.url,\n            user_name=cast(\"str\", query.user_name),\n            repo_name=cast(\"str\", query.repo_name),\n            commit=query.commit,\n            subpath=query.subpath,\n            include_patterns=query.include_patterns,\n            ignore_patterns=query.ignore_patterns,\n        )\n        s3_url = upload_to_s3(content=digest_content, s3_file_path=s3_file_path, ingest_id=query.id)\n\n        # Also upload metadata JSON for caching\n        metadata = S3Metadata(\n            summary=summary,\n            tree=tree,\n            content=content,\n        )\n        try:\n            upload_metadata_to_s3(metadata=metadata, s3_file_path=s3_file_path, ingest_id=query.id)\n            logger.info(\"Successfully uploaded metadata to S3\")\n        except Exception as metadata_exc:\n            # Log the error but don't fail the entire request\n            logger.warning(\"Failed to upload metadata to S3\", extra={\"error\": str(metadata_exc)})\n\n        # Store S3 URL in query for later use\n        query.s3_url = s3_url\n    else:\n        # Store locally\n        local_txt_file = Path(clone_config.local_path).with_suffix(\".txt\")\n        with local_txt_file.open(\"w\", encoding=\"utf-8\") as f:\n            f.write(digest_content)\n\n\ndef _generate_digest_url(query: IngestionQuery) -> str:\n    \"\"\"Generate the digest URL based on S3 configuration.\n\n    Parameters\n    ----------\n    query : IngestionQuery\n        The query object containing repository information.\n\n    Returns\n    -------\n    str\n        The digest URL.\n\n    Raises\n    ------\n    RuntimeError\n        If S3 is enabled but no S3 URL was generated.\n\n    \"\"\"\n    if is_s3_enabled():\n        digest_url = getattr(query, \"s3_url\", None)\n        if not digest_url:\n            # This should not happen if S3 upload was successful\n            msg = \"S3 is enabled but no S3 URL was generated\"\n            raise RuntimeError(msg)\n        return digest_url\n    return f\"/api/download/file/{query.id}\"\n\n\nasync def process_query(\n    input_text: str,\n    max_file_size: int,\n    pattern_type: PatternType,\n    pattern: str,\n    token: str | None = None,\n) -> IngestResponse:\n    \"\"\"Process a query by parsing input, cloning a repository, and generating a summary.\n\n    Handle user input, process Git repository data, and prepare\n    a response for rendering a template with the processed results or an error message.\n\n    Parameters\n    ----------\n    input_text : str\n        Input text provided by the user, typically a Git repository URL or slug.\n    max_file_size : int\n        Max file size in KB to be include in the digest.\n    pattern_type : PatternType\n        Type of pattern to use (either \"include\" or \"exclude\")\n    pattern : str\n        Pattern to include or exclude in the query, depending on the pattern type.\n    token : str | None\n        GitHub personal access token (PAT) for accessing private repositories.\n\n    Returns\n    -------\n    IngestResponse\n        A union type, corresponding to IngestErrorResponse or IngestSuccessResponse\n\n    Raises\n    ------\n    RuntimeError\n        If the commit hash is not found (should never happen).\n\n    \"\"\"\n    if token:\n        validate_github_token(token)\n\n    try:\n        query = await parse_remote_repo(input_text, token=token)\n    except Exception as exc:\n        logger.warning(\"Failed to parse remote repository\", extra={\"input_text\": input_text, \"error\": str(exc)})\n        return IngestErrorResponse(error=str(exc))\n\n    query.url = cast(\"str\", query.url)\n    query.max_file_size = max_file_size * 1024  # Convert to bytes since we currently use KB in higher levels\n    query.ignore_patterns, query.include_patterns = process_patterns(\n        exclude_patterns=pattern if pattern_type == PatternType.EXCLUDE else None,\n        include_patterns=pattern if pattern_type == PatternType.INCLUDE else None,\n    )\n\n    # Check if digest already exists on S3 before cloning\n    s3_response = await _check_s3_cache(\n        query=query,\n        input_text=input_text,\n        max_file_size=max_file_size,\n        pattern_type=pattern_type.value,\n        pattern=pattern,\n        token=token,\n    )\n    if s3_response:\n        return s3_response\n\n    clone_config = query.extract_clone_config()\n    await clone_repo(clone_config, token=token)\n\n    short_repo_url = f\"{query.user_name}/{query.repo_name}\"\n\n    # The commit hash should always be available at this point\n    if not query.commit:\n        msg = \"Unexpected error: no commit hash found\"\n        raise RuntimeError(msg)\n\n    try:\n        summary, tree, content = ingest_query(query)\n        digest_content = tree + \"\\n\" + content\n        _store_digest_content(query, clone_config, digest_content, summary, tree, content)\n    except Exception as exc:\n        _print_error(query.url, exc, max_file_size, pattern_type, pattern)\n        # Clean up repository even if processing failed\n        _cleanup_repository(clone_config)\n        return IngestErrorResponse(error=f\"{exc!s}\")\n\n    if len(content) > MAX_DISPLAY_SIZE:\n        content = (\n            f\"(Files content cropped to {int(MAX_DISPLAY_SIZE / 1_000)}k characters, \"\n            \"download full ingest to see more)\\n\" + content[:MAX_DISPLAY_SIZE]\n        )\n\n    _print_success(\n        url=query.url,\n        max_file_size=max_file_size,\n        pattern_type=pattern_type,\n        pattern=pattern,\n        summary=summary,\n    )\n\n    digest_url = _generate_digest_url(query)\n\n    # Clean up the repository after successful processing\n    _cleanup_repository(clone_config)\n\n    return IngestSuccessResponse(\n        repo_url=input_text,\n        short_repo_url=short_repo_url,\n        summary=summary,\n        digest_url=digest_url,\n        tree=tree,\n        content=content,\n        default_max_file_size=max_file_size,\n        pattern_type=pattern_type,\n        pattern=pattern,\n    )\n\n\ndef _print_query(url: str, max_file_size: int, pattern_type: str, pattern: str) -> None:\n    \"\"\"Print a formatted summary of the query details for debugging.\n\n    Parameters\n    ----------\n    url : str\n        The URL associated with the query.\n    max_file_size : int\n        The maximum file size allowed for the query, in bytes.\n    pattern_type : str\n        Specifies the type of pattern to use, either \"include\" or \"exclude\".\n    pattern : str\n        The actual pattern string to include or exclude in the query.\n\n    \"\"\"\n    default_max_file_kb = 50\n    logger.info(\n        \"Processing query\",\n        extra={\n            \"url\": url,\n            \"max_file_size_kb\": int(max_file_size / 1024),\n            \"pattern_type\": pattern_type,\n            \"pattern\": pattern,\n            \"custom_size\": int(max_file_size / 1024) != default_max_file_kb,\n        },\n    )\n\n\ndef _print_error(url: str, exc: Exception, max_file_size: int, pattern_type: str, pattern: str) -> None:\n    \"\"\"Print a formatted error message for debugging.\n\n    Parameters\n    ----------\n    url : str\n        The URL associated with the query that caused the error.\n    exc : Exception\n        The exception raised during the query or process.\n    max_file_size : int\n        The maximum file size allowed for the query, in bytes.\n    pattern_type : str\n        Specifies the type of pattern to use, either \"include\" or \"exclude\".\n    pattern : str\n        The actual pattern string to include or exclude in the query.\n\n    \"\"\"\n    logger.error(\n        \"Query processing failed\",\n        extra={\n            \"url\": url,\n            \"max_file_size_kb\": int(max_file_size / 1024),\n            \"pattern_type\": pattern_type,\n            \"pattern\": pattern,\n            \"error\": str(exc),\n        },\n    )\n\n\ndef _print_success(url: str, max_file_size: int, pattern_type: str, pattern: str, summary: str) -> None:\n    \"\"\"Print a formatted success message for debugging.\n\n    Parameters\n    ----------\n    url : str\n        The URL associated with the successful query.\n    max_file_size : int\n        The maximum file size allowed for the query, in bytes.\n    pattern_type : str\n        Specifies the type of pattern to use, either \"include\" or \"exclude\".\n    pattern : str\n        The actual pattern string to include or exclude in the query.\n    summary : str\n        A summary of the query result, including details like estimated tokens.\n\n    \"\"\"\n    estimated_tokens = summary[summary.index(\"Estimated tokens:\") + len(\"Estimated \") :]\n    logger.info(\n        \"Query processing completed successfully\",\n        extra={\n            \"url\": url,\n            \"max_file_size_kb\": int(max_file_size / 1024),\n            \"pattern_type\": pattern_type,\n            \"pattern\": pattern,\n            \"estimated_tokens\": estimated_tokens,\n        },\n    )\n"
  },
  {
    "path": "src/server/routers/__init__.py",
    "content": "\"\"\"Module containing the routers for the FastAPI application.\"\"\"\n\nfrom server.routers.dynamic import router as dynamic\nfrom server.routers.index import router as index\nfrom server.routers.ingest import router as ingest\n\n__all__ = [\"dynamic\", \"index\", \"ingest\"]\n"
  },
  {
    "path": "src/server/routers/dynamic.py",
    "content": "\"\"\"The dynamic router module defines handlers for dynamic path requests.\"\"\"\n\nfrom fastapi import APIRouter, Request\nfrom fastapi.responses import HTMLResponse\n\nfrom server.server_config import get_version_info, templates\n\nrouter = APIRouter()\n\n\n@router.get(\"/{full_path:path}\", include_in_schema=False)\nasync def catch_all(request: Request, full_path: str) -> HTMLResponse:\n    \"\"\"Render a page with a Git URL based on the provided path.\n\n    This endpoint catches all GET requests with a dynamic path, constructs a Git URL\n    using the ``full_path`` parameter, and renders the ``git.jinja`` template with that URL.\n\n    Parameters\n    ----------\n    request : Request\n        The incoming request object, which provides context for rendering the response.\n    full_path : str\n        The full path extracted from the URL, which is used to build the Git URL.\n\n    Returns\n    -------\n    HTMLResponse\n        An HTML response containing the rendered template, with the Git URL\n        and other default parameters such as file size.\n\n    \"\"\"\n    context = {\n        \"request\": request,\n        \"repo_url\": full_path,\n        \"default_max_file_size\": 243,\n    }\n    context.update(get_version_info())\n\n    return templates.TemplateResponse(\"git.jinja\", context)\n"
  },
  {
    "path": "src/server/routers/index.py",
    "content": "\"\"\"Module defining the FastAPI router for the home page of the application.\"\"\"\n\nfrom fastapi import APIRouter, Request\nfrom fastapi.responses import HTMLResponse\n\nfrom server.server_config import EXAMPLE_REPOS, get_version_info, templates\n\nrouter = APIRouter()\n\n\n@router.get(\"/\", response_class=HTMLResponse, include_in_schema=False)\nasync def home(request: Request) -> HTMLResponse:\n    \"\"\"Render the home page with example repositories and default parameters.\n\n    This endpoint serves the home page of the application, rendering the ``index.jinja`` template\n    and providing it with a list of example repositories and default file size values.\n\n    Parameters\n    ----------\n    request : Request\n        The incoming request object, which provides context for rendering the response.\n\n    Returns\n    -------\n    HTMLResponse\n        An HTML response containing the rendered home page template, with example repositories\n        and other default parameters such as file size.\n\n    \"\"\"\n    context = {\n        \"request\": request,\n        \"examples\": EXAMPLE_REPOS,\n        \"default_max_file_size\": 243,\n    }\n    context.update(get_version_info())\n\n    return templates.TemplateResponse(\"index.jinja\", context)\n"
  },
  {
    "path": "src/server/routers/ingest.py",
    "content": "\"\"\"Ingest endpoint for the API.\"\"\"\n\nfrom typing import Union\nfrom uuid import UUID\n\nfrom fastapi import APIRouter, HTTPException, Request, status\nfrom fastapi.responses import FileResponse, JSONResponse, RedirectResponse\nfrom prometheus_client import Counter\n\nfrom gitingest.config import TMP_BASE_PATH\nfrom server.models import IngestRequest\nfrom server.routers_utils import COMMON_INGEST_RESPONSES, _perform_ingestion\nfrom server.s3_utils import is_s3_enabled\nfrom server.server_config import DEFAULT_FILE_SIZE_KB\nfrom server.server_utils import limiter\n\ningest_counter = Counter(\"gitingest_ingest_total\", \"Number of ingests\", [\"status\", \"url\"])\n\nrouter = APIRouter()\n\n\n@router.post(\"/api/ingest\", responses=COMMON_INGEST_RESPONSES)\n@limiter.limit(\"10/minute\")\nasync def api_ingest(\n    request: Request,  # noqa: ARG001 (unused-function-argument) # pylint: disable=unused-argument\n    ingest_request: IngestRequest,\n) -> JSONResponse:\n    \"\"\"Ingest a Git repository and return processed content.\n\n    **This endpoint processes a Git repository by cloning it, analyzing its structure,**\n    and returning a summary with the repository's content. The response includes\n    file tree structure, processed content, and metadata about the ingestion.\n\n    **Parameters**\n\n    - **ingest_request** (`IngestRequest`): Pydantic model containing ingestion parameters\n\n    **Returns**\n\n    - **JSONResponse**: Success response with ingestion results or error response with appropriate HTTP status code\n\n    \"\"\"\n    response = await _perform_ingestion(\n        input_text=ingest_request.input_text,\n        max_file_size=ingest_request.max_file_size,\n        pattern_type=ingest_request.pattern_type.value,\n        pattern=ingest_request.pattern,\n        token=ingest_request.token,\n    )\n    # limit URL to 255 characters\n    ingest_counter.labels(status=response.status_code, url=ingest_request.input_text[:255]).inc()\n    return response\n\n\n@router.get(\"/api/{user}/{repository}\", responses=COMMON_INGEST_RESPONSES)\n@limiter.limit(\"10/minute\")\nasync def api_ingest_get(\n    request: Request,  # noqa: ARG001 (unused-function-argument) # pylint: disable=unused-argument\n    user: str,\n    repository: str,\n    max_file_size: int = DEFAULT_FILE_SIZE_KB,\n    pattern_type: str = \"exclude\",\n    pattern: str = \"\",\n    token: str = \"\",\n) -> JSONResponse:\n    \"\"\"Ingest a GitHub repository via GET and return processed content.\n\n    **This endpoint processes a GitHub repository by analyzing its structure and returning a summary**\n    with the repository's content. The response includes file tree structure, processed content, and\n    metadata about the ingestion. All ingestion parameters are optional and can be provided as query parameters.\n\n    **Path Parameters**\n    - **user** (`str`): GitHub username or organization\n    - **repository** (`str`): GitHub repository name\n\n    **Query Parameters**\n    - **max_file_size** (`int`, optional): Maximum file size in KB to include in the digest (default: 5120 KB)\n    - **pattern_type** (`str`, optional): Type of pattern to use (\"include\" or \"exclude\", default: \"exclude\")\n    - **pattern** (`str`, optional): Pattern to include or exclude in the query (default: \"\")\n    - **token** (`str`, optional): GitHub personal access token for private repositories (default: \"\")\n\n    **Returns**\n    - **JSONResponse**: Success response with ingestion results or error response with appropriate HTTP status code\n    \"\"\"\n    response = await _perform_ingestion(\n        input_text=f\"{user}/{repository}\",\n        max_file_size=max_file_size,\n        pattern_type=pattern_type,\n        pattern=pattern,\n        token=token or None,\n    )\n    # limit URL to 255 characters\n    ingest_counter.labels(status=response.status_code, url=f\"{user}/{repository}\"[:255]).inc()\n    return response\n\n\n@router.get(\"/api/download/file/{ingest_id}\", response_model=None)\nasync def download_ingest(\n    ingest_id: UUID,\n) -> Union[RedirectResponse, FileResponse]:  # noqa: FA100 (future-rewritable-type-annotation) (pydantic)\n    \"\"\"Download the first text file produced for an ingest ID.\n\n    **This endpoint retrieves the first ``*.txt`` file produced during the ingestion process**\n    and returns it as a downloadable file. When S3 is enabled, this endpoint is disabled\n    and clients should use the S3 URL provided in the ingest response instead.\n\n    **Parameters**\n\n    - **ingest_id** (`UUID`): Identifier that the ingest step emitted\n\n    **Returns**\n\n    - **FileResponse**: Streamed response with media type ``text/plain`` for local files\n\n    **Raises**\n\n    - **HTTPException**: **503** - endpoint is disabled when S3 is enabled\n    - **HTTPException**: **404** - digest directory is missing or contains no ``*.txt`` file\n    - **HTTPException**: **403** - the process lacks permission to read the directory or file\n\n    \"\"\"\n    # Disable download endpoint when S3 is enabled\n    if is_s3_enabled():\n        raise HTTPException(\n            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,\n            detail=\"Download endpoint is disabled when S3 is enabled. \"\n            \"Use the S3 URL provided in the ingest response instead.\",\n        )\n\n    # Fall back to local file serving\n    # Normalize and validate the directory path\n    directory = (TMP_BASE_PATH / str(ingest_id)).resolve()\n    if not str(directory).startswith(str(TMP_BASE_PATH.resolve())):\n        raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail=f\"Invalid ingest ID: {ingest_id!r}\")\n\n    if not directory.is_dir():\n        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f\"Digest {ingest_id!r} not found\")\n\n    try:\n        first_txt_file = next(directory.glob(\"*.txt\"))\n    except StopIteration as exc:\n        raise HTTPException(\n            status_code=status.HTTP_404_NOT_FOUND,\n            detail=f\"No .txt file found for digest {ingest_id!r}\",\n        ) from exc\n\n    try:\n        return FileResponse(path=first_txt_file, media_type=\"text/plain\", filename=first_txt_file.name)\n    except PermissionError as exc:\n        raise HTTPException(\n            status_code=status.HTTP_403_FORBIDDEN,\n            detail=f\"Permission denied for {first_txt_file}\",\n        ) from exc\n"
  },
  {
    "path": "src/server/routers_utils.py",
    "content": "\"\"\"Utility functions for the ingest endpoints.\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import Any\n\nfrom fastapi import status\nfrom fastapi.responses import JSONResponse\n\nfrom server.models import IngestErrorResponse, IngestSuccessResponse, PatternType\nfrom server.query_processor import process_query\n\nCOMMON_INGEST_RESPONSES: dict[int | str, dict[str, Any]] = {\n    status.HTTP_200_OK: {\"model\": IngestSuccessResponse, \"description\": \"Successful ingestion\"},\n    status.HTTP_400_BAD_REQUEST: {\"model\": IngestErrorResponse, \"description\": \"Bad request or processing error\"},\n    status.HTTP_500_INTERNAL_SERVER_ERROR: {\"model\": IngestErrorResponse, \"description\": \"Internal server error\"},\n}\n\n\nasync def _perform_ingestion(\n    input_text: str,\n    max_file_size: int,\n    pattern_type: str,\n    pattern: str,\n    token: str | None,\n) -> JSONResponse:\n    \"\"\"Run ``process_query`` and wrap the result in a ``FastAPI`` ``JSONResponse``.\n\n    Consolidates error handling shared by the ``POST`` and ``GET`` ingest endpoints.\n    \"\"\"\n    try:\n        pattern_type = PatternType(pattern_type)\n\n        result = await process_query(\n            input_text=input_text,\n            max_file_size=max_file_size,\n            pattern_type=pattern_type,\n            pattern=pattern,\n            token=token,\n        )\n\n        if isinstance(result, IngestErrorResponse):\n            # Return structured error response with 400 status code\n            return JSONResponse(status_code=status.HTTP_400_BAD_REQUEST, content=result.model_dump())\n\n        # Return structured success response with 200 status code\n        return JSONResponse(status_code=status.HTTP_200_OK, content=result.model_dump())\n\n    except ValueError as ve:\n        # Handle validation errors with 400 status code\n        error_response = IngestErrorResponse(error=f\"Validation error: {ve!s}\")\n        return JSONResponse(status_code=status.HTTP_400_BAD_REQUEST, content=error_response.model_dump())\n\n    except Exception as exc:\n        # Handle unexpected errors with 500 status code\n        error_response = IngestErrorResponse(error=f\"Internal server error: {exc!s}\")\n        return JSONResponse(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, content=error_response.model_dump())\n"
  },
  {
    "path": "src/server/s3_utils.py",
    "content": "\"\"\"S3 utility functions for uploading and managing digest files.\"\"\"\n\nfrom __future__ import annotations\n\nimport hashlib\nimport os\nfrom typing import TYPE_CHECKING\nfrom urllib.parse import urlparse\nfrom uuid import UUID  # noqa: TC003 (typing-only-standard-library-import) needed for type checking (pydantic)\n\nimport boto3\nfrom botocore.exceptions import ClientError\nfrom prometheus_client import Counter\n\nfrom gitingest.utils.logging_config import get_logger\nfrom server.models import S3Metadata\n\nif TYPE_CHECKING:\n    from botocore.client import BaseClient\n\n\n# Initialize logger for this module\nlogger = get_logger(__name__)\n\n_s3_ingest_lookup_counter = Counter(\"gitingest_s3_ingest_lookup\", \"Number of S3 ingest file lookups\")\n_s3_ingest_hit_counter = Counter(\"gitingest_s3_ingest_hit\", \"Number of S3 ingest file cache hits\")\n_s3_ingest_miss_counter = Counter(\"gitingest_s3_ingest_miss\", \"Number of S3 ingest file cache misses\")\n\n\nclass S3UploadError(Exception):\n    \"\"\"Custom exception for S3 upload failures.\"\"\"\n\n\ndef is_s3_enabled() -> bool:\n    \"\"\"Check if S3 is enabled via environment variables.\"\"\"\n    return os.getenv(\"S3_ENABLED\", \"false\").lower() == \"true\"\n\n\ndef get_s3_config() -> dict[str, str | None]:\n    \"\"\"Get S3 configuration from environment variables.\"\"\"\n    config = {\n        \"endpoint_url\": os.getenv(\"S3_ENDPOINT\"),\n        \"aws_access_key_id\": os.getenv(\"S3_ACCESS_KEY\"),\n        \"aws_secret_access_key\": os.getenv(\"S3_SECRET_KEY\"),\n        \"region_name\": os.getenv(\"S3_REGION\") or os.getenv(\"AWS_REGION\", \"us-east-1\"),\n    }\n    return {k: v for k, v in config.items() if v is not None}\n\n\ndef get_s3_bucket_name() -> str:\n    \"\"\"Get S3 bucket name from environment variables.\"\"\"\n    return os.getenv(\"S3_BUCKET_NAME\", \"gitingest-bucket\")\n\n\ndef get_s3_alias_host() -> str | None:\n    \"\"\"Get S3 alias host for public URLs.\"\"\"\n    return os.getenv(\"S3_ALIAS_HOST\")\n\n\ndef generate_s3_file_path(\n    source: str,\n    user_name: str,\n    repo_name: str,\n    commit: str,\n    subpath: str,\n    include_patterns: set[str] | None,\n    ignore_patterns: set[str],\n) -> str:\n    \"\"\"Generate S3 file path with proper naming convention.\n\n    The file path is formatted as:\n    [<S3_DIRECTORY_PREFIX>/]ingest/<provider>/<repo-owner>/<repo-name>/<branch>/<commit-ID>/\n    <exclude&include hash>/<owner>-<repo-name>-<subpath-hash>.txt\n\n    If S3_DIRECTORY_PREFIX environment variable is set, it will be prefixed to the path.\n    The commit-ID is always included in the URL.\n    If no specific commit is provided, the actual commit hash from the cloned repository is used.\n\n    Parameters\n    ----------\n    source : str\n        Git host (e.g., github, gitlab, bitbucket, etc.).\n    user_name : str\n        Repository owner or user.\n    repo_name : str\n        Repository name.\n    commit : str\n        Commit hash.\n    subpath : str\n        Subpath of the repository.\n    include_patterns : set[str] | None\n        Set of patterns specifying which files to include.\n    ignore_patterns : set[str]\n        Set of patterns specifying which files to exclude.\n\n    Returns\n    -------\n    str\n        S3 file path string.\n\n    Raises\n    ------\n    ValueError\n        If the source URL is invalid.\n\n    \"\"\"\n    hostname = urlparse(source).hostname\n    if hostname is None:\n        msg = \"Invalid source URL\"\n        logger.error(msg)\n        raise ValueError(msg)\n\n    # Create hash of exclude/include patterns for uniqueness\n    patterns_str = f\"include:{sorted(include_patterns) if include_patterns else []}\"\n    patterns_str += f\"exclude:{sorted(ignore_patterns)}\"\n    patterns_hash = hashlib.sha256(patterns_str.encode()).hexdigest()[:16]\n    subpath_hash = hashlib.sha256(subpath.encode()).hexdigest()[:16]\n\n    file_name = f\"{user_name}-{repo_name}-{subpath_hash}.txt\"\n    base_path = f\"ingest/{hostname}/{user_name}/{repo_name}/{commit}/{patterns_hash}/{file_name}\"\n\n    # Check for S3_DIRECTORY_PREFIX environment variable\n    s3_directory_prefix = os.getenv(\"S3_DIRECTORY_PREFIX\")\n\n    if not s3_directory_prefix:\n        return base_path\n\n    # Remove trailing slash if present and add the prefix\n    s3_directory_prefix = s3_directory_prefix.rstrip(\"/\")\n    return f\"{s3_directory_prefix}/{base_path}\"\n\n\ndef create_s3_client() -> BaseClient:\n    \"\"\"Create and return an S3 client with configuration from environment.\"\"\"\n    config = get_s3_config()\n    # Log S3 client creation (excluding sensitive info)\n    log_config = config.copy()\n    has_credentials = bool(log_config.pop(\"aws_access_key_id\", None) or log_config.pop(\"aws_secret_access_key\", None))\n    logger.debug(\n        \"Creating S3 client\",\n        extra={\n            \"s3_config\": log_config,\n            \"has_credentials\": has_credentials,\n        },\n    )\n    return boto3.client(\"s3\", **config)\n\n\ndef upload_to_s3(content: str, s3_file_path: str, ingest_id: UUID) -> str:\n    \"\"\"Upload content to S3 and return the public URL.\n\n    This function uploads the provided content to an S3 bucket and returns the public URL for the uploaded file.\n    The ingest ID is stored as an S3 object tag.\n\n    Parameters\n    ----------\n    content : str\n        The digest content to upload.\n    s3_file_path : str\n        The S3 file path where the content will be stored.\n    ingest_id : UUID\n        The ingest ID to store as an S3 object tag.\n\n    Returns\n    -------\n    str\n        Public URL to access the uploaded file.\n\n    Raises\n    ------\n    ValueError\n        If S3 is not enabled.\n    S3UploadError\n        If the upload to S3 fails.\n\n    \"\"\"\n    if not is_s3_enabled():\n        msg = \"S3 is not enabled\"\n        logger.error(msg)\n        raise ValueError(msg)\n\n    s3_client = create_s3_client()\n    bucket_name = get_s3_bucket_name()\n\n    extra_fields = {\n        \"bucket_name\": bucket_name,\n        \"s3_file_path\": s3_file_path,\n        \"ingest_id\": str(ingest_id),\n        \"content_size\": len(content),\n    }\n\n    # Log upload attempt\n    logger.info(\"Starting S3 upload\", extra=extra_fields)\n\n    try:\n        # Upload the content with ingest_id as tag\n        s3_client.put_object(\n            Bucket=bucket_name,\n            Key=s3_file_path,\n            Body=content.encode(\"utf-8\"),\n            ContentType=\"text/plain\",\n            Tagging=f\"ingest_id={ingest_id!s}\",\n        )\n    except ClientError as err:\n        # Log upload failure\n        logger.exception(\n            \"S3 upload failed\",\n            extra={\n                \"bucket_name\": bucket_name,\n                \"s3_file_path\": s3_file_path,\n                \"ingest_id\": str(ingest_id),\n                \"error_code\": err.response.get(\"Error\", {}).get(\"Code\"),\n                \"error_message\": str(err),\n            },\n        )\n        msg = f\"Failed to upload to S3: {err}\"\n        raise S3UploadError(msg) from err\n\n    # Generate public URL\n    alias_host = get_s3_alias_host()\n    if alias_host:\n        # Use alias host if configured\n        public_url = f\"{alias_host.rstrip('/')}/{s3_file_path}\"\n    else:\n        # Fallback to direct S3 URL\n        endpoint = get_s3_config().get(\"endpoint_url\")\n        if endpoint:\n            public_url = f\"{endpoint.rstrip('/')}/{bucket_name}/{s3_file_path}\"\n        else:\n            public_url = f\"https://{bucket_name}.s3.{get_s3_config()['region_name']}.amazonaws.com/{s3_file_path}\"\n\n    # Log successful upload\n    logger.info(\n        \"S3 upload completed successfully\",\n        extra={\n            \"bucket_name\": bucket_name,\n            \"s3_file_path\": s3_file_path,\n            \"ingest_id\": str(ingest_id),\n            \"public_url\": public_url,\n        },\n    )\n\n    return public_url\n\n\ndef upload_metadata_to_s3(metadata: S3Metadata, s3_file_path: str, ingest_id: UUID) -> str:\n    \"\"\"Upload metadata JSON to S3 alongside the digest file.\n\n    Parameters\n    ----------\n    metadata : S3Metadata\n        The metadata struct containing summary, tree, and content.\n    s3_file_path : str\n        The S3 file path for the digest (metadata will use .json extension).\n    ingest_id : UUID\n        The ingest ID to store as an S3 object tag.\n\n    Returns\n    -------\n    str\n        Public URL to access the uploaded metadata file.\n\n    Raises\n    ------\n    ValueError\n        If S3 is not enabled.\n    S3UploadError\n        If the upload to S3 fails.\n\n    \"\"\"\n    if not is_s3_enabled():\n        msg = \"S3 is not enabled\"\n        logger.error(msg)\n        raise ValueError(msg)\n\n    # Generate metadata file path by replacing .txt with .json\n    metadata_file_path = s3_file_path.replace(\".txt\", \".json\")\n\n    s3_client = create_s3_client()\n    bucket_name = get_s3_bucket_name()\n\n    extra_fields = {\n        \"bucket_name\": bucket_name,\n        \"metadata_file_path\": metadata_file_path,\n        \"ingest_id\": str(ingest_id),\n        \"metadata_size\": len(metadata.model_dump_json()),\n    }\n\n    # Log upload attempt\n    logger.info(\"Starting S3 metadata upload\", extra=extra_fields)\n\n    try:\n        # Upload the metadata with ingest_id as tag\n        s3_client.put_object(\n            Bucket=bucket_name,\n            Key=metadata_file_path,\n            Body=metadata.model_dump_json(indent=2).encode(\"utf-8\"),\n            ContentType=\"application/json\",\n            Tagging=f\"ingest_id={ingest_id!s}\",\n        )\n    except ClientError as err:\n        # Log upload failure\n        logger.exception(\n            \"S3 metadata upload failed\",\n            extra={\n                \"bucket_name\": bucket_name,\n                \"metadata_file_path\": metadata_file_path,\n                \"ingest_id\": str(ingest_id),\n                \"error_code\": err.response.get(\"Error\", {}).get(\"Code\"),\n                \"error_message\": str(err),\n            },\n        )\n        msg = f\"Failed to upload metadata to S3: {err}\"\n        raise S3UploadError(msg) from err\n\n    # Generate public URL\n    alias_host = get_s3_alias_host()\n    if alias_host:\n        # Use alias host if configured\n        public_url = f\"{alias_host.rstrip('/')}/{metadata_file_path}\"\n    else:\n        # Fallback to direct S3 URL\n        endpoint = get_s3_config().get(\"endpoint_url\")\n        if endpoint:\n            public_url = f\"{endpoint.rstrip('/')}/{bucket_name}/{metadata_file_path}\"\n        else:\n            public_url = (\n                f\"https://{bucket_name}.s3.{get_s3_config()['region_name']}.amazonaws.com/{metadata_file_path}\"\n            )\n\n    # Log successful upload\n    logger.info(\n        \"S3 metadata upload completed successfully\",\n        extra={\n            \"bucket_name\": bucket_name,\n            \"metadata_file_path\": metadata_file_path,\n            \"ingest_id\": str(ingest_id),\n            \"public_url\": public_url,\n        },\n    )\n\n    return public_url\n\n\ndef get_metadata_from_s3(s3_file_path: str) -> S3Metadata | None:\n    \"\"\"Retrieve metadata JSON from S3.\n\n    Parameters\n    ----------\n    s3_file_path : str\n        The S3 file path for the digest (metadata will use .json extension).\n\n    Returns\n    -------\n    S3Metadata | None\n        The metadata struct if found, None otherwise.\n\n    \"\"\"\n    if not is_s3_enabled():\n        return None\n\n    # Generate metadata file path by replacing .txt with .json\n    metadata_file_path = s3_file_path.replace(\".txt\", \".json\")\n\n    try:\n        s3_client = create_s3_client()\n        bucket_name = get_s3_bucket_name()\n\n        # Get the metadata object\n        response = s3_client.get_object(Bucket=bucket_name, Key=metadata_file_path)\n        metadata_content = response[\"Body\"].read().decode(\"utf-8\")\n\n        return S3Metadata.model_validate_json(metadata_content)\n    except ClientError as err:\n        # Object doesn't exist if we get a 404 error\n        error_code = err.response.get(\"Error\", {}).get(\"Code\")\n        if error_code == \"404\":\n            logger.info(\"Metadata file not found\", extra={\"metadata_file_path\": metadata_file_path})\n            return None\n        # Log other errors but don't fail\n        logger.warning(\"Failed to retrieve metadata from S3\", extra={\"error\": str(err)})\n        return None\n    except Exception as exc:\n        # For any other exception, log and return None\n        logger.warning(\"Unexpected error retrieving metadata from S3\", extra={\"error\": str(exc)})\n        return None\n\n\ndef _build_s3_url(key: str) -> str:\n    \"\"\"Build S3 URL for a given key.\"\"\"\n    alias_host = get_s3_alias_host()\n    if alias_host:\n        return f\"{alias_host.rstrip('/')}/{key}\"\n\n    bucket_name = get_s3_bucket_name()\n    config = get_s3_config()\n\n    endpoint = config[\"endpoint_url\"]\n    if endpoint:\n        return f\"{endpoint.rstrip('/')}/{bucket_name}/{key}\"\n\n    return f\"https://{bucket_name}.s3.{config['region_name']}.amazonaws.com/{key}\"\n\n\ndef _check_object_tags(s3_client: BaseClient, bucket_name: str, key: str, target_ingest_id: UUID) -> bool:\n    \"\"\"Check if an S3 object has the matching ingest_id tag.\"\"\"\n    try:\n        tags_response = s3_client.get_object_tagging(Bucket=bucket_name, Key=key)\n        tags = {tag[\"Key\"]: tag[\"Value\"] for tag in tags_response.get(\"TagSet\", [])}\n        return tags.get(\"ingest_id\") == str(target_ingest_id)\n    except ClientError:\n        return False\n\n\ndef check_s3_object_exists(s3_file_path: str) -> bool:\n    \"\"\"Check if an S3 object exists at the given path.\n\n    Parameters\n    ----------\n    s3_file_path : str\n        The S3 file path to check.\n\n    Returns\n    -------\n    bool\n        True if the object exists, False otherwise.\n\n    Raises\n    ------\n    ClientError\n        If there's an S3 error other than 404 (not found).\n\n    \"\"\"\n    if not is_s3_enabled():\n        logger.info(\"S3 not enabled, skipping object existence check\", extra={\"s3_file_path\": s3_file_path})\n        return False\n\n    logger.info(\"Checking S3 object existence\", extra={\"s3_file_path\": s3_file_path})\n    _s3_ingest_lookup_counter.inc()\n    try:\n        s3_client = create_s3_client()\n        bucket_name = get_s3_bucket_name()\n\n        # Use head_object to check if the object exists without downloading it\n        s3_client.head_object(Bucket=bucket_name, Key=s3_file_path)\n    except ClientError as err:\n        # Object doesn't exist if we get a 404 error\n        error_code = err.response.get(\"Error\", {}).get(\"Code\")\n        if error_code == \"404\":\n            logger.info(\n                \"S3 object not found\",\n                extra={\n                    \"s3_file_path\": s3_file_path,\n                    \"bucket_name\": get_s3_bucket_name(),\n                    \"error_code\": error_code,\n                },\n            )\n            _s3_ingest_miss_counter.inc()\n            return False\n        # Re-raise other errors (permissions, etc.)\n        raise\n    except Exception as exc:\n        # For any other exception, assume object doesn't exist\n        logger.info(\n            \"S3 object check failed with exception, assuming not found\",\n            extra={\n                \"s3_file_path\": s3_file_path,\n                \"bucket_name\": get_s3_bucket_name(),\n                \"exception\": str(exc),\n            },\n        )\n        _s3_ingest_miss_counter.inc()\n        return False\n    else:\n        logger.info(\n            \"S3 object found\",\n            extra={\n                \"s3_file_path\": s3_file_path,\n                \"bucket_name\": get_s3_bucket_name(),\n            },\n        )\n        _s3_ingest_hit_counter.inc()\n        return True\n\n\ndef get_s3_url_for_ingest_id(ingest_id: UUID) -> str | None:\n    \"\"\"Get S3 URL for a given ingest ID if it exists.\n\n    Search for files in S3 using object tags to find the matching ingest_id and returns the S3 URL if found.\n    Used by the download endpoint to redirect to S3 if available.\n\n    Parameters\n    ----------\n    ingest_id : UUID\n        The ingest ID to search for in S3 object tags.\n\n    Returns\n    -------\n    str | None\n        S3 URL if file exists, None otherwise.\n\n    \"\"\"\n    if not is_s3_enabled():\n        logger.debug(\"S3 not enabled, skipping URL lookup\", extra={\"ingest_id\": str(ingest_id)})\n        return None\n\n    logger.info(\"Starting S3 URL lookup for ingest ID\", extra={\"ingest_id\": str(ingest_id)})\n\n    try:\n        s3_client = create_s3_client()\n        bucket_name = get_s3_bucket_name()\n\n        # List all objects in the ingest/ prefix and check their tags\n        paginator = s3_client.get_paginator(\"list_objects_v2\")\n        page_iterator = paginator.paginate(Bucket=bucket_name, Prefix=\"ingest/\")\n\n        objects_checked = 0\n        for page in page_iterator:\n            if \"Contents\" not in page:\n                continue\n\n            for obj in page[\"Contents\"]:\n                key = obj[\"Key\"]\n                objects_checked += 1\n                if _check_object_tags(\n                    s3_client=s3_client,\n                    bucket_name=bucket_name,\n                    key=key,\n                    target_ingest_id=ingest_id,\n                ):\n                    s3_url = _build_s3_url(key)\n                    logger.info(\n                        \"Found S3 object for ingest ID\",\n                        extra={\n                            \"ingest_id\": str(ingest_id),\n                            \"s3_key\": key,\n                            \"s3_url\": s3_url,\n                            \"objects_checked\": objects_checked,\n                        },\n                    )\n                    return s3_url\n\n        logger.info(\n            \"No S3 object found for ingest ID\",\n            extra={\n                \"ingest_id\": str(ingest_id),\n                \"objects_checked\": objects_checked,\n            },\n        )\n\n    except ClientError as err:\n        logger.exception(\n            \"Error during S3 URL lookup\",\n            extra={\n                \"ingest_id\": str(ingest_id),\n                \"error_code\": err.response.get(\"Error\", {}).get(\"Code\"),\n                \"error_message\": str(err),\n            },\n        )\n\n    return None\n"
  },
  {
    "path": "src/server/server_config.py",
    "content": "\"\"\"Configuration for the server.\"\"\"\n\nfrom __future__ import annotations\n\nimport os\nfrom pathlib import Path\n\nfrom fastapi.templating import Jinja2Templates\n\nMAX_DISPLAY_SIZE: int = 300_000\n\n# Slider configuration (if updated, update the logSliderToSize function in src/static/js/utils.js)\nDEFAULT_FILE_SIZE_KB: int = 5 * 1024  # 5 mb\nMAX_FILE_SIZE_KB: int = 100 * 1024  # 100 mb\n\nEXAMPLE_REPOS: list[dict[str, str]] = [\n    {\"name\": \"Gitingest\", \"url\": \"https://github.com/coderamp-labs/gitingest\"},\n    {\"name\": \"FastAPI\", \"url\": \"https://github.com/fastapi/fastapi\"},\n    {\"name\": \"Flask\", \"url\": \"https://github.com/pallets/flask\"},\n    {\"name\": \"Excalidraw\", \"url\": \"https://github.com/excalidraw/excalidraw\"},\n    {\"name\": \"ApiAnalytics\", \"url\": \"https://github.com/tom-draper/api-analytics\"},\n]\n\n\n# Version and repository configuration\nAPP_REPOSITORY = os.getenv(\"APP_REPOSITORY\", \"https://github.com/coderamp-labs/gitingest\")\nAPP_VERSION = os.getenv(\"APP_VERSION\", \"unknown\")\nAPP_VERSION_URL = os.getenv(\"APP_VERSION_URL\", \"https://github.com/coderamp-labs/gitingest\")\n\n\ndef get_version_info() -> dict[str, str]:\n    \"\"\"Get version information including display version and link.\n\n    Returns\n    -------\n    dict[str, str]\n        Dictionary containing 'version' and 'version_link' keys.\n\n    \"\"\"\n    # Use pre-computed values from GitHub Actions\n    display_version = APP_VERSION\n    version_link = APP_VERSION_URL\n\n    # Fallback to repository root if no URL is provided\n    if version_link == APP_REPOSITORY or not version_link:\n        version_link = f\"{APP_REPOSITORY.rstrip('/')}/tree/main\"\n\n    return {\n        \"version\": display_version,\n        \"version_link\": version_link,\n    }\n\n\n# Use absolute path to templates directory\ntemplates_dir = Path(__file__).parent / \"templates\"\ntemplates = Jinja2Templates(directory=templates_dir)\n"
  },
  {
    "path": "src/server/server_utils.py",
    "content": "\"\"\"Utility functions for the server.\"\"\"\n\nfrom fastapi import Request\nfrom fastapi.responses import Response\nfrom slowapi import Limiter, _rate_limit_exceeded_handler\nfrom slowapi.errors import RateLimitExceeded\nfrom slowapi.util import get_remote_address\n\nfrom gitingest.utils.logging_config import get_logger\n\n# Initialize logger for this module\nlogger = get_logger(__name__)\n\n# Initialize a rate limiter\nlimiter = Limiter(key_func=get_remote_address)\n\n\nasync def rate_limit_exception_handler(request: Request, exc: Exception) -> Response:\n    \"\"\"Handle rate-limiting errors with a custom exception handler.\n\n    Parameters\n    ----------\n    request : Request\n        The incoming HTTP request.\n    exc : Exception\n        The exception raised, expected to be RateLimitExceeded.\n\n    Returns\n    -------\n    Response\n        A response indicating that the rate limit has been exceeded.\n\n    Raises\n    ------\n    exc\n        If the exception is not a RateLimitExceeded error, it is re-raised.\n\n    \"\"\"\n    if isinstance(exc, RateLimitExceeded):\n        # Delegate to the default rate limit handler\n        return _rate_limit_exceeded_handler(request, exc)\n    # Re-raise other exceptions\n    raise exc\n\n\n## Color printing utility\nclass Colors:\n    \"\"\"ANSI color codes.\"\"\"\n\n    BLACK = \"\\033[0;30m\"\n    RED = \"\\033[0;31m\"\n    GREEN = \"\\033[0;32m\"\n    BROWN = \"\\033[0;33m\"\n    BLUE = \"\\033[0;34m\"\n    PURPLE = \"\\033[0;35m\"\n    CYAN = \"\\033[0;36m\"\n    LIGHT_GRAY = \"\\033[0;37m\"\n    DARK_GRAY = \"\\033[1;30m\"\n    LIGHT_RED = \"\\033[1;31m\"\n    LIGHT_GREEN = \"\\033[1;32m\"\n    YELLOW = \"\\033[1;33m\"\n    LIGHT_BLUE = \"\\033[1;34m\"\n    LIGHT_PURPLE = \"\\033[1;35m\"\n    LIGHT_CYAN = \"\\033[1;36m\"\n    WHITE = \"\\033[1;37m\"\n    BOLD = \"\\033[1m\"\n    FAINT = \"\\033[2m\"\n    ITALIC = \"\\033[3m\"\n    UNDERLINE = \"\\033[4m\"\n    BLINK = \"\\033[5m\"\n    NEGATIVE = \"\\033[7m\"\n    CROSSED = \"\\033[9m\"\n    END = \"\\033[0m\"\n"
  },
  {
    "path": "src/server/templates/base.jinja",
    "content": "<!DOCTYPE html>\n<html lang=\"en\">\n    <head>\n        <meta charset=\"UTF-8\">\n        <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n        {# Favicons #}\n        <link rel=\"icon\" type=\"image/x-icon\" href=\"/static/favicons/favicon.ico\">\n        <link rel=\"icon\" type=\"image/svg+xml\" href=\"/static/favicons/favicon.svg\">\n        <link rel=\"icon\"\n              type=\"image/png\"\n              href=\"/static/favicons/favicon-64.png\"\n              sizes=\"64x64\">\n        <link rel=\"apple-touch-icon\"\n              type=\"image/png\"\n              href=\"/static/favicons/apple-touch-icon.png\"\n              sizes=\"180x180\">\n        {# Search Engine Meta Tags #}\n        <meta name=\"title\"       content=\"Gitingest\">\n        <meta name=\"description\"\n              content=\"Replace 'hub' with 'ingest' in any GitHub URL for a prompt-friendly text.\">\n        <meta name=\"keywords\"\n              content=\"Gitingest, AI tools, LLM integration, Ingest, Digest, Context, Prompt, Git workflow, codebase extraction, Git repository, Git automation, Summarize, prompt-friendly\">\n        <meta name=\"robots\"      content=\"index, follow\">\n        {# Open Graph Meta Tags #}\n        <meta property=\"og:title\"       content=\"Gitingest\">\n        <meta property=\"og:description\"\n              content=\"Replace 'hub' with 'ingest' in any GitHub URL for a prompt-friendly text.\">\n        <meta property=\"og:type\"        content=\"website\">\n        <meta property=\"og:url\"         content=\"{{ request.url }}\">\n        <meta property=\"og:image\"       content=\"/static/og-image.png\">\n        {# Web App Meta #}\n        <meta name=\"apple-mobile-web-app-title\"            content=\"Gitingest\">\n        <meta name=\"application-name\"                      content=\"Gitingest\">\n        <meta name=\"theme-color\"                           content=\"#FCA847\">\n        <meta name=\"mobile-web-app-capable\"                content=\"yes\">\n        <meta name=\"apple-mobile-web-app-status-bar-style\" content=\"default\">\n        {# Twitter card #}\n        <meta name=\"twitter:card\"        content=\"summary_large_image\">\n        <meta name=\"twitter:title\"       content=\"Gitingest\">\n        <meta name=\"twitter:description\"\n              content=\"Replace 'hub' with 'ingest' in any GitHub URL for a prompt-friendly text.\">\n        <meta name=\"twitter:image\"       content=\"/static/og-image.png\">\n        {# Title #}\n        <title>\n            {% block title %}\n                {% if short_repo_url %}\n                    Gitingest - {{ short_repo_url }}\n                {% else %}\n                    Gitingest\n                {% endif %}\n            {% endblock %}\n        </title>\n        <script src=\"https://cdn.tailwindcss.com\"></script>\n        {% include 'components/tailwind_components.html' %}\n    </head>\n    <body class=\"bg-[#FFFDF8] min-h-screen flex flex-col\">\n        {% include 'components/navbar.jinja' %}\n        {# Main content wrapper #}\n        <main class=\"flex-1 w-full\">\n            <div class=\"max-w-4xl mx-auto px-4 py-8\">\n                {% block content %}{% endblock %}\n            </div>\n        </main>\n        {# Footer #}\n        {% include 'components/footer.jinja' %}\n        {# Scripts #}\n        <script defer src=\"/static/js/index.js\"></script>\n        <script defer src=\"/static/js/utils.js\"></script>\n        <script defer src=\"/static/js/posthog.js\"></script>\n    </body>\n</html>\n"
  },
  {
    "path": "src/server/templates/components/_macros.jinja",
    "content": "{# Icon link #}\n{% macro footer_icon_link(href, icon, label) -%}\n    <a href=\"{{ href }}\"\n       target=\"_blank\"\n       rel=\"noopener noreferrer\"\n       class=\"hover:underline flex items-center\">\n        <img src=\"/static/{{ icon }}\" alt=\"{{ label }} logo\" class=\"w-4 h-4 mr-1\">\n        {{ label }}\n    </a>\n{%- endmacro %}\n"
  },
  {
    "path": "src/server/templates/components/footer.jinja",
    "content": "{% from 'components/_macros.jinja' import footer_icon_link %}\n<footer class=\"w-full border-t-[3px] border-gray-900 mt-auto\">\n    <div class=\"max-w-4xl mx-auto px-4 py-4\">\n        <div class=\"grid grid-cols-3 items-center text-gray-900 text-sm\">\n            {# Left column — Chrome + PyPI #}\n            <div class=\"flex items-center space-x-4\">\n                {{ footer_icon_link('https://chromewebstore.google.com/detail/adfjahbijlkjfoicpjkhjicpjpjfaood',\n                                'icons/chrome.svg',\n                                'Chrome Extension') }}\n                {{ footer_icon_link('https://pypi.org/project/gitingest',\n                                'icons/python.svg',\n                                'Python Package') }}\n            </div>\n            {# Middle column - Version information #}\n            <div class=\"flex justify-center\">\n                <span>Version:&nbsp;</span>\n                {% if version != \"unknown\" %}\n                    <a href=\"{{ version_link }}\"\n                       target=\"_blank\"\n                       rel=\"noopener noreferrer\"\n                       class=\"text-blue-600 hover:text-blue-800 underline\">{{ version }}</a>\n                {% else %}\n                    <span>{{ version }}</span>\n                {% endif %}\n            </div>\n            {# Right column - Discord #}\n            <div class=\"flex justify-end\">\n                {{ footer_icon_link('https://discord.gg/zerRaGK9EC',\n                                'icons/discord.svg',\n                                'Discord') }}\n            </div>\n        </div>\n    </div>\n</footer>\n"
  },
  {
    "path": "src/server/templates/components/git_form.jinja",
    "content": "<div class=\"relative\">\n    <div class=\"w-full h-full absolute inset-0 bg-gray-900 rounded-xl translate-y-2 translate-x-2\"></div>\n    <div class=\"rounded-xl relative z-20 p-8 sm:p-10 border-[3px] border-gray-900 bg-[#fff4da]\">\n        <img src=\"https://cdn.devdojo.com/images/january2023/shape-1.png\"\n             class=\"absolute md:block hidden left-0 h-[4.5rem] w-[4.5rem] bottom-0 -translate-x-full ml-3\">\n        <!-- Ingest Form -->\n        <form id=\"ingestForm\" method=\"post\" onsubmit=\"handleSubmit(event, true)\">\n            <!-- Top row: repo URL + Ingest button -->\n            <div class=\"flex md:flex-row flex-col w-full h-full justify-center items-stretch space-y-5 md:space-y-0 md:space-x-5\">\n                <!-- Repository URL Input -->\n                <div class=\"relative w-full h-full\">\n                    <div class=\"w-full h-full rounded bg-gray-900 translate-y-1 translate-x-1 absolute inset-0 z-10\"></div>\n                    <input type=\"text\"\n                           name=\"input_text\"\n                           id=\"input_text\"\n                           placeholder=\"https://github.com/...\"\n                           value=\"{{ repo_url if repo_url else '' }}\"\n                           required\n                           class=\"border-[3px] w-full relative z-20 border-gray-900 placeholder-gray-600 text-lg font-medium focus:outline-none py-3.5 px-6 rounded bg-[#E8F0FE]\">\n                </div>\n                <!-- Ingest button -->\n                <div class=\"relative w-auto flex-shrink-0 h-full group\">\n                    <div class=\"w-full h-full rounded bg-gray-800 translate-y-1 translate-x-1 absolute inset-0 z-10\"></div>\n                    <button type=\"submit\"\n                            class=\"py-3.5 rounded px-6 group-hover:-translate-y-px group-hover:-translate-x-px ease-out duration-300 z-20 relative w-full border-[3px] border-gray-900 font-medium bg-[#ffc480] tracking-wide text-lg flex-shrink-0 text-gray-900\">\n                        Ingest\n                    </button>\n                </div>\n            </div>\n            <!-- Hidden fields -->\n            <input type=\"hidden\" name=\"pattern_type\" value=\"exclude\">\n            <input type=\"hidden\" name=\"pattern\" value=\"\">\n            <!-- Controls row: pattern selector, file size slider, PAT checkbox with PAT field below -->\n            <div id=\"controlsRow\"\n                 class=\"mt-7 grid gap-6 grid-cols-1 sm:grid-cols-[3fr_2fr] md:gap-x-10 lg:grid-cols-[5fr_4fr_4fr] lg:gap-y-0\">\n                <!-- Pattern selector -->\n                <div class=\"w-full relative self-center\">\n                    <div class=\"w-full h-full rounded bg-gray-900 translate-y-1 translate-x-1 absolute inset-0 z-10\"></div>\n                    <div class=\"flex relative z-20 border-[3px] border-gray-900 rounded bg-white\">\n                        <!-- Pattern type selector -->\n                        <div class=\"relative flex items-center\">\n                            <select id=\"pattern_type\"\n                                    name=\"pattern_type\"\n                                    onchange=\"changePattern()\"\n                                    class=\"pattern-select\">\n                                <option value=\"exclude\"\n                                        {% if pattern_type == 'exclude' or not pattern_type %}selected{% endif %}>\n                                    Exclude\n                                </option>\n                                <option value=\"include\" {% if pattern_type == 'include' %}selected{% endif %}>Include</option>\n                            </select>\n                            <svg class=\"absolute right-2 w-4 h-4 pointer-events-none\"\n                                 xmlns=\"http://www.w3.org/2000/svg\"\n                                 viewBox=\"0 0 24 24\"\n                                 fill=\"none\"\n                                 stroke=\"currentColor\"\n                                 stroke-width=\"2\"\n                                 stroke-linecap=\"round\"\n                                 stroke-linejoin=\"round\">\n                                <polyline points=\"6 9 12 15 18 9\" />\n                            </svg>\n                        </div>\n                        <!-- Pattern input field -->\n                        <input type=\"text\"\n                               id=\"pattern\"\n                               name=\"pattern\"\n                               placeholder=\"*.md, src/ \"\n                               value=\"{{ pattern if pattern else '' }}\"\n                               class=\" py-2 px-2 bg-[#E8F0FE] focus:outline-none w-full\">\n                    </div>\n                </div>\n                <!-- File size selector -->\n                <div class=\"w-full self-center\">\n                    <label for=\"file_size\" class=\"block text-gray-700 mb-1\">\n                        Include files under: <span id=\"size_value\" class=\"font-bold\">50kB</span>\n                    </label>\n                    <input type=\"range\"\n                           id=\"file_size\"\n                           min=\"1\"\n                           max=\"500\"\n                           required\n                           value=\"{{ default_max_file_size }}\"\n                           class=\"w-full h-3 bg-[#FAFAFA] bg-no-repeat bg-[length:50%_100%] bg-[#ebdbb7] appearance-none border-[3px] border-gray-900 rounded-sm focus:outline-none bg-gradient-to-r from-[#FE4A60] to-[#FE4A60] [&::-webkit-slider-thumb]:w-5 [&::-webkit-slider-thumb]:h-7 [&::-webkit-slider-thumb]:appearance-none [&::-webkit-slider-thumb]:bg-white [&::-webkit-slider-thumb]:rounded-sm [&::-webkit-slider-thumb]:cursor-pointer [&::-webkit-slider-thumb]:border-solid [&::-webkit-slider-thumb]:border-[3px] [&::-webkit-slider-thumb]:border-gray-900 [&::-webkit-slider-thumb]:shadow-[3px_3px_0_#000]\">\n                    <input type=\"hidden\" id=\"max_file_size_kb\" name=\"max_file_size\" value=\"\">\n                </div>\n                <!-- PAT checkbox with PAT field below -->\n                <div class=\"flex flex-col items-start w-full sm:col-span-2 lg:col-span-1 lg:row-span-2 lg:pt-3.5\">\n                    <!-- PAT checkbox -->\n                    <div class=\"flex items-center space-x-2\">\n                        <label for=\"showAccessSettings\"\n                               class=\"flex gap-2 text-gray-900 cursor-pointer\">\n                            <div class=\"relative w-6 h-6\">\n                                <input type=\"checkbox\"\n                                       id=\"showAccessSettings\"\n                                       onchange=\"toggleAccessSettings()\"\n                                       {% if token %}checked{% endif %}\n                                       class=\"cursor-pointer peer appearance-none w-full h-full rounded-sm border-[3px] border-current bg-white m-0 text-current shadow-[3px_3px_0_currentColor]\" />\n                                <span class=\"absolute inset-0 w-3 h-3 m-auto scale-0 transition-transform duration-150 ease-in-out shadow-[inset_1rem_1rem_#FE4A60] bg-[CanvasText] origin-bottom-left peer-checked:scale-100\"\n                                      style=\"clip-path: polygon(14% 44%, 0 65%, 50% 100%, 100% 16%, 80% 0%, 43% 62%)\"></span>\n                            </div>\n                            Private Repository\n                        </label>\n                        <span class=\"badge-new\">NEW</span>\n                    </div>\n                    <!-- PAT field -->\n                    <div id=\"accessSettingsContainer\"\n                         class=\"{% if not token %}hidden {% endif %}mt-3 w-full\">\n                        <div class=\"relative w-full\">\n                            <div class=\"w-full h-full rounded bg-gray-900 translate-y-1 translate-x-1 absolute inset-0 z-10\"></div>\n                            <div class=\"flex relative z-20 border-[3px] border-gray-900 rounded bg-white\">\n                                <input id=\"token\"\n                                       type=\"password\"\n                                       name=\"token\"\n                                       placeholder=\"Personal Access Token\"\n                                       value=\"{{ token if token else '' }}\"\n                                       class=\"py-2 pl-2 pr-8 bg-[#E8F0FE] focus:outline-none w-full rounded\">\n                                <!-- Info icon with tooltip -->\n                                <span class=\"absolute right-3 top-1/2 -translate-y-1/2\">\n                                    <!-- Icon -->\n                                    <svg class=\"w-4 h-4 text-gray-600 cursor-pointer peer\"\n                                         xmlns=\"http://www.w3.org/2000/svg\"\n                                         fill=\"none\"\n                                         viewBox=\"0 0 24 24\"\n                                         stroke=\"currentColor\"\n                                         stroke-width=\"2\">\n                                        <circle cx=\"12\" cy=\"12\" r=\"10\" />\n                                        <path stroke-linecap=\"round\" stroke-linejoin=\"round\" d=\"M12 16v-4m0-4h.01\" />\n                                    </svg>\n                                    <!-- Tooltip (tooltip listens to peer-hover) -->\n                                    <div class=\"absolute bottom-full mb-2 left-1/2 -translate-x-1/2 bg-gray-900 text-white text-xs leading-tight py-1 px-2 rounded shadow-lg opacity-0 pointer-events-none peer-hover:opacity-100 peer-hover:pointer-events-auto transition-opacity duration-200 whitespace-nowrap\">\n                                        <ul class=\"list-disc pl-4\">\n                                            <li>PAT is never stored in the backend</li>\n                                            <li>Used once for cloning, then discarded from memory</li>\n                                            <li>No browser caching</li>\n                                            <li>Cloned repos are deleted after processing</li>\n                                        </ul>\n                                    </div>\n                                </span>\n                            </div>\n                        </div>\n                        <!-- Help section -->\n                        <div class=\"mt-2 flex items-center space-x-1\">\n                            <a href=\"https://github.com/settings/tokens/new?description=gitingest&scopes=repo\"\n                               target=\"_blank\"\n                               rel=\"noopener noreferrer\"\n                               class=\"text-sm text-gray-600 hover:text-gray-800 flex items-center space-x-1 underline\">\n                                <span>Get your token</span>\n                                <svg class=\"w-3 h-3\"\n                                     fill=\"none\"\n                                     stroke=\"currentColor\"\n                                     viewBox=\"0 0 24 24\"\n                                     xmlns=\"http://www.w3.org/2000/svg\">\n                                    <path stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\" d=\"M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14\" />\n                                </svg>\n                            </a>\n                        </div>\n                    </div>\n                </div>\n            </div>\n        </form>\n        <!-- Example repositories section -->\n        {% if show_examples %}\n            <div id=\"exampleRepositories\"\n                 class=\"{% if token %}lg:mt-0 {% endif %} mt-4\">\n                <p class=\"opacity-70 mb-1\">Try these example repositories:</p>\n                <div class=\"flex flex-wrap gap-2\">\n                    {% for example in examples %}\n                        <button onclick=\"submitExample('{{ example.url }}')\"\n                                class=\"px-4 py-1 bg-[#EBDBB7] hover:bg-[#FFC480] text-gray-900 rounded transition-colors duration-200 border-[3px] border-gray-900 relative hover:-translate-y-px hover:-translate-x-px\">\n                            {{ example.name }}\n                        </button>\n                    {% endfor %}\n                </div>\n            </div>\n        {% endif %}\n    </div>\n</div>\n<script defer src=\"/static/js/git.js\"></script>\n<script defer src=\"/static/js/git_form.js\"></script>\n"
  },
  {
    "path": "src/server/templates/components/navbar.jinja",
    "content": "<header class=\"sticky top-0 bg-[#FFFDF8] border-b-[3px] border-gray-900 z-50\">\n    <div class=\"max-w-4xl mx-auto px-4\">\n        <div class=\"flex justify-between items-center h-16\">\n            {# Logo #}\n            <div class=\"flex items-center gap-4\">\n                <h1 class=\"text-2xl font-bold tracking-tight\">\n                    <a href=\"/\" class=\"hover:opacity-80 transition-opacity\">\n                        <span class=\"text-gray-900\">Git</span><span class=\"text-[#FE4A60]\">ingest</span>\n                    </a>\n                </h1>\n            </div>\n            {# Navigation with updated styling #}\n            <nav class=\"flex items-center space-x-6\">\n                <a href=\"/llms.txt\" class=\"link-bounce flex items-center text-gray-900\">\n                    <span class=\"badge-new\">NEW</span>\n                    /llms.txt\n                </a>\n                {# GitHub link #}\n                <div class=\"flex items-center gap-2\">\n                    <a href=\"https://github.com/coderamp-labs/gitingest\"\n                       target=\"_blank\"\n                       rel=\"noopener noreferrer\"\n                       class=\"link-bounce flex items-center gap-1.5 text-gray-900\">\n                        <img src=\"/static/icons/github.svg\" class=\"w-4 h-4\" alt=\"GitHub logo\">\n                        GitHub\n                    </a>\n                    {# Star counter #}\n                    <div class=\"no-drag flex items-center text-sm text-gray-600\">\n                        <img src=\"/static/svg/github-star.svg\"\n                             class=\"w-4 h-4 mr-1\"\n                             alt=\"GitHub star icon\">\n                        <span id=\"github-stars\">0</span>\n                    </div>\n                </div>\n            </nav>\n        </div>\n    </div>\n</header>\n{# Load GitHub stars script #}\n<script defer src=\"/static/js/navbar.js\"></script>\n"
  },
  {
    "path": "src/server/templates/components/result.jinja",
    "content": "<div class=\"mt-10\">\n    <!-- Error Message (hidden by default) -->\n    <div id=\"results-error\" style=\"display:none\"></div>\n    <!-- Loading Spinner (hidden by default) -->\n    <div id=\"results-loading\" style=\"display:none\">\n        <div class=\"relative mt-10\">\n            <div class=\"w-full h-full absolute inset-0 bg-black rounded-xl translate-y-2 translate-x-2\"></div>\n            <div class=\"bg-[#fafafa] rounded-xl border-[3px] border-gray-900 p-6 relative z-20 flex flex-col items-center space-y-4\">\n                <div class=\"loader border-8 border-[#fff4da] border-t-8 border-t-[#ffc480] rounded-full w-16 h-16 animate-spin\"></div>\n                <p class=\"text-lg font-bold text-gray-900\">Loading...</p>\n            </div>\n        </div>\n    </div>\n    <!-- Results Section (hidden by default) -->\n    <div id=\"results-section\" style=\"display:none\">\n        <div class=\"relative\">\n            <div class=\"w-full h-full absolute inset-0 bg-gray-900 rounded-xl translate-y-2 translate-x-2\"></div>\n            <div class=\"bg-[#fafafa] rounded-xl border-[3px] border-gray-900 p-6 relative z-20 space-y-6\">\n                <div class=\"grid grid-cols-1 md:grid-cols-12 gap-6\">\n                    <div class=\"md:col-span-5\">\n                        <div class=\"flex justify-between items-center mb-4 py-2\">\n                            <h3 class=\"text-lg font-bold text-gray-900\">Summary</h3>\n                        </div>\n                        <div class=\"relative\">\n                            <div class=\"w-full h-full rounded bg-gray-900 translate-y-1 translate-x-1 absolute inset-0\"></div>\n                            <textarea id=\"result-summary\"\n                                      class=\"w-full h-[160px] p-4 bg-[#fff4da] border-[3px] border-gray-900 rounded font-mono text-sm resize-none focus:outline-none relative z-10\"\n                                      readonly></textarea>\n                        </div>\n                        <div class=\"relative mt-4 inline-block group ml-4\">\n                            <div class=\"w-full h-full rounded bg-gray-900 translate-y-1 translate-x-1 absolute inset-0\"></div>\n                            <button onclick=\"copyFullDigest()\"\n                                    class=\"inline-flex items-center px-4 py-2 bg-[#ffc480] border-[3px] border-gray-900 text-gray-900 rounded group-hover:-translate-y-px group-hover:-translate-x-px transition-transform relative z-10\">\n                                <svg class=\"w-4 h-4 mr-2\"\n                                     fill=\"none\"\n                                     stroke=\"currentColor\"\n                                     viewBox=\"0 0 24 24\">\n                                    <path stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\" d=\"M8 5H6a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2v-1M8 5a2 2 0 002 2h2a2 2 0 002-2M8 5a2 2 0 012-2h2a2 2 0 012 2m0 0h2a2 2 0 012 2v3m2 4H10m0 0l3-3m-3 3l3 3\" />\n                                </svg>\n                                Copy all\n                            </button>\n                        </div>\n                        <div class=\"relative mt-4 inline-block group ml-4\">\n                            <div class=\"w-full h-full rounded bg-gray-900 translate-y-1 translate-x-1 absolute inset-0\"></div>\n                            <button onclick=\"downloadFullDigest()\"\n                                    class=\"inline-flex items-center px-4 py-2 bg-[#ffc480] border-[3px] border-gray-900 text-gray-900 rounded group-hover:-translate-y-px group-hover:-translate-x-px transition-transform relative z-10\">\n                                <svg class=\"w-4 h-4 mr-2\"\n                                     fill=\"none\"\n                                     stroke=\"currentColor\"\n                                     viewBox=\"0 0 24 24\">\n                                    <path stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\" d=\"M12 10v6m0 0l-3-3m3 3l3-3m2 8H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z\" />\n                                </svg>\n                                Download\n                            </button>\n                        </div>\n                    </div>\n                    <div class=\"md:col-span-7\">\n                        <div class=\"flex justify-between items-center mb-4\">\n                            <h3 class=\"text-lg font-bold text-gray-900\">Directory Structure</h3>\n                            <div class=\"relative group\">\n                                <div class=\"w-full h-full rounded bg-gray-900 translate-y-1 translate-x-1 absolute inset-0\"></div>\n                                <button onclick=\"copyText('directory-structure')\"\n                                        class=\"px-4 py-2 bg-[#ffc480] border-[3px] border-gray-900 text-gray-900 rounded group-hover:-translate-y-px group-hover:-translate-x-px transition-transform relative z-10 flex items-center gap-2\">\n                                    <svg class=\"w-4 h-4\" fill=\"none\" stroke=\"currentColor\" viewBox=\"0 0 24 24\">\n                                        <path stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\" d=\"M8 5H6a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2v-1M8 5a2 2 0 002 2h2a2 2 0 002-2M8 5a2 2 0 012-2h2a2 2 0 012 2m0 0h2a2 2 0 012 2v3m2 4H10m0 0l3-3m-3 3l3 3\" />\n                                    </svg>\n                                    Copy\n                                </button>\n                            </div>\n                        </div>\n                        <div class=\"relative\">\n                            <div class=\"w-full h-full rounded bg-gray-900 translate-y-1 translate-x-1 absolute inset-0\"></div>\n                            <div class=\"directory-structure w-full p-4 bg-[#fff4da] border-[3px] border-gray-900 rounded font-mono text-sm resize-y focus:outline-none relative z-10 h-[215px] overflow-auto\"\n                                 id=\"directory-structure-container\"\n                                 readonly>\n                                <input type=\"hidden\" id=\"directory-structure-content\" value=\"\" />\n                                <pre id=\"directory-structure-pre\"></pre>\n                            </div>\n                        </div>\n                    </div>\n                </div>\n                <div>\n                    <div class=\"flex justify-between items-center mb-4\">\n                        <h3 class=\"text-lg font-bold text-gray-900\">Files Content</h3>\n                        <div class=\"relative group\">\n                            <div class=\"w-full h-full rounded bg-gray-900 translate-y-1 translate-x-1 absolute inset-0\"></div>\n                            <button onclick=\"copyText('result-text')\"\n                                    class=\"px-4 py-2 bg-[#ffc480] border-[3px] border-gray-900 text-gray-900 rounded group-hover:-translate-y-px group-hover:-translate-x-px transition-transform relative z-10 flex items-center gap-2\">\n                                <svg class=\"w-4 h-4\" fill=\"none\" stroke=\"currentColor\" viewBox=\"0 0 24 24\">\n                                    <path stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\" d=\"M8 5H6a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2v-1M8 5a2 2 0 002 2h2a2 2 0 002-2M8 5a2 2 0 012-2h2a2 2 0 012 2m0 0h2a2 2 0 012 2v3m2 4H10m0 0l3-3m-3 3l3 3\" />\n                                </svg>\n                                Copy\n                            </button>\n                        </div>\n                    </div>\n                    <div class=\"relative\">\n                        <div class=\"w-full h-full rounded bg-gray-900 translate-y-1 translate-x-1 absolute inset-0\"></div>\n                        <textarea id=\"result-content\"\n                                  class=\"result-text w-full p-4 bg-[#fff4da] border-[3px] border-gray-900 rounded font-mono text-sm resize-y focus:outline-none relative z-10\"\n                                  style=\"min-height: 600px\"\n                                  readonly></textarea>\n                    </div>\n                </div>\n            </div>\n        </div>\n    </div>\n</div>\n"
  },
  {
    "path": "src/server/templates/components/tailwind_components.html",
    "content": "<style type=\"text/tailwindcss\">\n  @layer components {\n    .badge-new {\n      @apply inline-block -rotate-6 -translate-y-1 mx-1 px-1 bg-[#FE4A60] border border-gray-900 text-white text-[10px] font-bold shadow-[2px_2px_0_0_rgba(0,0,0,1)];\n    }\n    .landing-page-title {\n      @apply inline-block w-full relative text-center text-4xl sm:text-5xl md:text-6xl lg:text-7xl sm:pt-20 lg:pt-5 font-bold tracking-tighter;\n    }\n    .intro-text {\n      @apply text-center text-gray-600 text-lg max-w-2xl mx-auto;\n    }\n    .sparkle-red {\n      @apply absolute flex-shrink-0 h-auto w-14 sm:w-20 md:w-24 p-2 left-0 lg:ml-32 -translate-x-2 md:translate-x-10 lg:-translate-x-full -translate-y-4 sm:-translate-y-8 md:-translate-y-0 lg:-translate-y-10;\n    }\n    .sparkle-green {\n      @apply absolute flex-shrink-0 right-0 bottom-0 w-10 sm:w-16 lg:w-20 -translate-x-10 lg:-translate-x-12 translate-y-4 sm:translate-y-10 md:translate-y-2 lg:translate-y-4;\n    }\n    .pattern-select {\n      @apply min-w-max appearance-none pr-6 pl-2 py-2 bg-[#e6e8eb] border-r-[3px] border-gray-900 cursor-pointer focus:outline-none;\n    }\n  }\n\n  @layer utilities {\n    .no-drag {\n      @apply pointer-events-none select-none;\n      -webkit-user-drag: none;\n    }\n    .link-bounce {\n      @apply transition-transform hover:-translate-y-0.5;\n    }\n  }\n</style>\n"
  },
  {
    "path": "src/server/templates/git.jinja",
    "content": "{% extends \"base.jinja\" %}\n{% block content %}\n    {% if error_message %}\n        <div class=\"mb-6 p-4 bg-red-50 border border-red-200 rounded-lg text-red-700\"\n             id=\"error-message\"\n             data-message=\"{{ error_message }}\">{{ error_message }}</div>\n    {% endif %}\n    {% with show_examples=false %}\n        {% include 'components/git_form.jinja' %}\n    {% endwith %}\n    {% include 'components/result.jinja' %}\n{% endblock content %}\n"
  },
  {
    "path": "src/server/templates/index.jinja",
    "content": "{% extends \"base.jinja\" %}\n{% block content %}\n    <div class=\"mb-8\">\n        <div class=\"relative w-full flex sm:flex-row flex-col justify-center sm:items-center\">\n            {# Title & Sparkles #}\n            <h1 class=\"landing-page-title\">\n                Prompt-friendly\n                <br>\n                codebase&nbsp;\n            </h1>\n            <img src=\"/static/svg/sparkle-red.svg\" class=\"sparkle-red no-drag\">\n            <img src=\"/static/svg/sparkle-green.svg\" class=\"sparkle-green no-drag\">\n        </div>\n        <p class=\"intro-text mt-8\">Turn any Git repository into a simple text digest of its codebase.</p>\n        <p class=\"intro-text mt-0\">This is useful for feeding a codebase into any LLM.</p>\n    </div>\n    {% if error_message %}\n        <div class=\"mb-6 p-4 bg-red-50 border border-red-200 rounded-lg text-red-700\"\n             id=\"error-message\"\n             data-message=\"{{ error_message }}\">{{ error_message }}</div>\n    {% endif %}\n    {% with show_examples=true %}\n        {% include 'components/git_form.jinja' %}\n    {% endwith %}\n    <p class=\"text-gray-600 text-sm max-w-2xl mx-auto text-center mt-4\">\n        You can also replace 'hub' with 'ingest' in any GitHub URL.\n    </p>\n    {% include 'components/result.jinja' %}\n{% endblock %}\n"
  },
  {
    "path": "src/server/templates/swagger_ui.jinja",
    "content": "{% extends \"base.jinja\" %}\n{% block title %}GitIngest API{% endblock %}\n{% block content %}\n    <div class=\"mb-8\">\n        <div class=\"relative w-full flex sm:flex-row flex-col justify-center sm:items-center\">\n            {# Title & Sparkles #}\n            <h1 class=\"landing-page-title\">\n                GitIngest\n                <br>\n                API&nbsp;\n            </h1>\n            <img src=\"/static/svg/sparkle-red.svg\" class=\"sparkle-red no-drag\">\n            <img src=\"/static/svg/sparkle-green.svg\" class=\"sparkle-green no-drag\">\n        </div>\n        <p class=\"intro-text mt-8\">Turn any Git repository into a simple text digest of its codebase.</p>\n        <p class=\"intro-text mt-0\">This is useful for feeding a codebase into any LLM.</p>\n    </div>\n    <div class=\"bg-[#fff4da] rounded-xl border-[3px] border-gray-900 p-4 md:p-8 relative z-20\">\n        <div id=\"swagger-ui\"></div>\n    </div>\n    <link rel=\"stylesheet\"\n          href=\"https://unpkg.com/swagger-ui-dist@5/swagger-ui.css\">\n    <script src=\"https://unpkg.com/swagger-ui-dist@5/swagger-ui-bundle.js\"></script>\n    <script>\n  window.onload = function() {\n    SwaggerUIBundle({\n      url: \"/openapi.json\",\n      dom_id: '#swagger-ui',\n      presets: [\n        SwaggerUIBundle.presets.apis,\n        SwaggerUIBundle.SwaggerUIStandalonePreset\n      ],\n      layout: \"BaseLayout\",\n      deepLinking: true,\n    });\n  }\n    </script>\n{% endblock %}\n"
  },
  {
    "path": "src/static/js/git.js",
    "content": "function waitForStars() {\n    return new Promise((resolve) => {\n        const check = () => {\n            const stars = document.getElementById('github-stars');\n\n            if (stars && stars.textContent !== '0') {resolve();}\n            else {setTimeout(check, 10);}\n        };\n\n        check();\n    });\n}\n\ndocument.addEventListener('DOMContentLoaded', () => {\n    const urlInput = document.getElementById('input_text');\n    const form = document.getElementById('ingestForm');\n\n    if (urlInput && urlInput.value.trim() && form) {\n    // Wait for stars to be loaded before submitting\n        waitForStars().then(() => {\n            const submitEvent = new SubmitEvent('submit', {\n                cancelable: true,\n                bubbles: true\n            });\n\n            Object.defineProperty(submitEvent, 'target', {\n                value: form,\n                enumerable: true\n            });\n            handleSubmit(submitEvent, true);\n        });\n    }\n});\n"
  },
  {
    "path": "src/static/js/git_form.js",
    "content": "// Strike-through / un-strike file lines when the pattern-type menu flips.\nfunction changePattern() {\n    const dirPre = document.getElementById('directory-structure-pre');\n\n    if (!dirPre) {return;}\n\n    const treeLineElements = Array.from(dirPre.querySelectorAll('pre[name=\"tree-line\"]'));\n\n    // Skip the first tree line element\n    treeLineElements.slice(2).forEach((element) => {\n        element.classList.remove('line-through');\n        element.classList.remove('text-gray-500');\n    });\n\n    // Reset the pattern input field\n    const patternInput = document.getElementById('pattern');\n\n    if (patternInput) {\n        patternInput.value = '';\n    }\n}\n\n// Show/hide the Personal-Access-Token section when the \"Private repository\" checkbox is toggled.\nfunction toggleAccessSettings() {\n    const container = document.getElementById('accessSettingsContainer');\n    const examples = document.getElementById('exampleRepositories');\n    const show = document.getElementById('showAccessSettings')?.checked;\n\n    container?.classList.toggle('hidden', !show);\n    examples?.classList.toggle('lg:mt-0', show);\n}\n\n\n\ndocument.addEventListener('DOMContentLoaded', () => {\n    toggleAccessSettings();\n    changePattern();\n});\n\n\n// Make them available to existing inline attributes\nwindow.changePattern = changePattern;\nwindow.toggleAccessSettings = toggleAccessSettings;\n"
  },
  {
    "path": "src/static/js/index.js",
    "content": "function submitExample(repoName) {\n    const input = document.getElementById('input_text');\n\n    if (input) {\n        input.value = repoName;\n        input.focus();\n    }\n}\n\n// Make it visible to inline onclick handlers\nwindow.submitExample = submitExample;\n"
  },
  {
    "path": "src/static/js/navbar.js",
    "content": "// Fetch GitHub stars\nfunction formatStarCount(count) {\n    if (count >= 1000) {return `${ (count / 1000).toFixed(1) }k`;}\n\n    return count.toString();\n}\n\nasync function fetchGitHubStars() {\n    try {\n        const res = await fetch('https://api.github.com/repos/coderamp-labs/gitingest');\n\n        if (!res.ok) {throw new Error(`${res.status} ${res.statusText}`);}\n        const data = await res.json();\n\n        document.getElementById('github-stars').textContent =\n        formatStarCount(data.stargazers_count);\n    } catch (err) {\n        console.error('Error fetching GitHub stars:', err);\n        const el = document.getElementById('github-stars').parentElement;\n\n        if (el) {el.style.display = 'none';}\n    }\n}\n\n// auto-run when script loads\nfetchGitHubStars();\n"
  },
  {
    "path": "src/static/js/posthog.js",
    "content": "/* eslint-disable */\n!function (t, e) {\n    let o, n, p, r;\n    if (e.__SV) {return;}                 // already loaded\n\n    window.posthog = e;\n    e._i = [];\n    e.init = function (i, s, a) {\n        function g(t, e) {\n            const o = e.split(\".\");\n            if (o.length === 2) {\n                t = t[o[0]];\n                e = o[1];\n            }\n            t[e] = function () {\n                t.push([e].concat(Array.prototype.slice.call(arguments, 0)));\n            };\n        }\n\n        p = t.createElement(\"script\");\n        p.type = \"text/javascript\";\n        p.crossOrigin = \"anonymous\";\n        p.async = true;\n        p.src = `${ s.api_host.replace(\".i.posthog.com\", \"-assets.i.posthog.com\") }/static/array.js`;\n\n        r = t.getElementsByTagName(\"script\")[0];\n        r.parentNode.insertBefore(p, r);\n\n        let u = e;\n        if (a !== undefined) {\n            u = e[a] = [];\n        } else {\n            a = \"posthog\";\n        }\n\n        u.people = u.people || [];\n        u.toString = function (t) {\n            let e = \"posthog\";\n            if (a !== \"posthog\") {e += `.${ a }`;}\n            if (!t) {e += \" (stub)\";}\n            return e;\n        };\n        u.people.toString = function () {\n            return `${ u.toString(1) }.people (stub)`;\n        };\n\n\n        o = [\n            \"init\", \"capture\", \"register\", \"register_once\", \"register_for_session\", \"unregister\",\n            \"unregister_for_session\", \"getFeatureFlag\", \"getFeatureFlagPayload\", \"isFeatureEnabled\",\n            \"reloadFeatureFlags\", \"updateEarlyAccessFeatureEnrollment\", \"getEarlyAccessFeatures\",\n            \"on\", \"onFeatureFlags\", \"onSessionId\", \"getSurveys\", \"getActiveMatchingSurveys\",\n            \"renderSurvey\", \"canRenderSurvey\", \"getNextSurveyStep\", \"identify\", \"setPersonProperties\",\n            \"group\", \"resetGroups\", \"setPersonPropertiesForFlags\", \"resetPersonPropertiesForFlags\",\n            \"setGroupPropertiesForFlags\", \"resetGroupPropertiesForFlags\", \"reset\", \"get_distinct_id\",\n            \"getGroups\", \"get_session_id\", \"get_session_replay_url\", \"alias\", \"set_config\",\n            \"startSessionRecording\", \"stopSessionRecording\", \"sessionRecordingStarted\",\n            \"captureException\", \"loadToolbar\", \"get_property\", \"getSessionProperty\",\n            \"createPersonProfile\", \"opt_in_capturing\", \"opt_out_capturing\",\n            \"has_opted_in_capturing\", \"has_opted_out_capturing\", \"clear_opt_in_out_capturing\",\n            \"debug\", \"getPageViewId\"\n        ];\n\n        for (n = 0; n < o.length; n++) {g(u, o[n]);}\n        e._i.push([i, s, a]);\n    };\n\n    e.__SV = 1;\n}(document, window.posthog || []);\n\n/* Initialise PostHog */\nposthog.init('phc_9aNpiIVH2zfTWeY84vdTWxvrJRCQQhP5kcVDXUvcdou', {\n    api_host: 'https://eu.i.posthog.com',\n    person_profiles: 'always',\n});\n"
  },
  {
    "path": "src/static/js/utils.js",
    "content": "function getFileName(element) {\n    const indentSize = 4;\n    let path = '';\n    let prevIndentLevel = null;\n\n    while (element) {\n        const line = element.textContent;\n        const index = line.search(/[a-zA-Z0-9_.-]/);\n        const indentLevel = index / indentSize;\n\n        // Stop when we reach or go above the top-level directory\n        if (indentLevel <= 1) {\n            break;\n        }\n\n        // Only include directories that are one level above the previous\n        if (prevIndentLevel === null || indentLevel === prevIndentLevel - 1) {\n            const fileName = line.substring(index).trim();\n\n            path = fileName + path;\n            prevIndentLevel = indentLevel;\n        }\n\n        element = element.previousElementSibling;\n    }\n\n    return path;\n}\n\nfunction toggleFile(element) {\n    const patternInput = document.getElementById('pattern');\n    const patternFiles = patternInput.value ? patternInput.value.split(',').map((item) => item.trim()) : [];\n\n    const directoryContainer = document.getElementById('directory-structure-container');\n    const treeLineElements = Array.from(directoryContainer.children).filter((child) => child.tagName === 'PRE');\n\n    // Skip the first two tree lines (header and repository name)\n    if (treeLineElements[0] === element || treeLineElements[1] === element) {\n        return;\n    }\n\n    element.classList.toggle('line-through');\n    element.classList.toggle('text-gray-500');\n\n    const fileName = getFileName(element);\n    const fileIndex = patternFiles.indexOf(fileName);\n\n    if (fileIndex !== -1) {\n        patternFiles.splice(fileIndex, 1);\n    } else {\n        patternFiles.push(fileName);\n    }\n\n    patternInput.value = patternFiles.join(', ');\n}\n\n// Copy functionality\nfunction copyText(className) {\n    let textToCopy;\n\n    if (className === 'directory-structure') {\n    // For directory structure, get the hidden input value\n        const hiddenInput = document.getElementById('directory-structure-content');\n\n        if (!hiddenInput) {return;}\n        textToCopy = hiddenInput.value;\n    } else {\n    // For other elements, get the textarea value\n        const textarea = document.querySelector(`.${ className }`);\n\n        if (!textarea) {return;}\n        textToCopy = textarea.value;\n    }\n\n    const button = document.querySelector(`button[onclick=\"copyText('${className}')\"]`);\n\n    if (!button) {return;}\n\n    // Copy text\n    navigator.clipboard.writeText(textToCopy)\n        .then(() => {\n            // Store original content\n            const originalContent = button.innerHTML;\n\n            // Change button content\n            button.innerHTML = 'Copied!';\n\n            // Reset after 1 second\n            setTimeout(() => {\n                button.innerHTML = originalContent;\n            }, 1000);\n        })\n        .catch((err) => {\n            console.error('Failed to copy text:', err);\n            const originalContent = button.innerHTML;\n\n            button.innerHTML = 'Failed to copy';\n            setTimeout(() => {\n                button.innerHTML = originalContent;\n            }, 1000);\n        });\n}\n\n// Helper functions for toggling result blocks\nfunction showLoading() {\n    document.getElementById('results-loading').style.display = 'block';\n    document.getElementById('results-section').style.display = 'none';\n    document.getElementById('results-error').style.display = 'none';\n}\nfunction showResults() {\n    document.getElementById('results-loading').style.display = 'none';\n    document.getElementById('results-section').style.display = 'block';\n    document.getElementById('results-error').style.display = 'none';\n}\nfunction showError(msg) {\n    document.getElementById('results-loading').style.display = 'none';\n    document.getElementById('results-section').style.display = 'none';\n    const errorDiv = document.getElementById('results-error');\n\n    errorDiv.innerHTML = msg;\n    errorDiv.style.display = 'block';\n}\n\n// Helper function to collect form data\nfunction collectFormData(form) {\n    const json_data = {};\n    const inputText = form.querySelector('[name=\"input_text\"]');\n    const token = form.querySelector('[name=\"token\"]');\n    const hiddenInput = document.getElementById('max_file_size_kb');\n    const patternType = document.getElementById('pattern_type');\n    const pattern = document.getElementById('pattern');\n\n    if (inputText) {json_data.input_text = inputText.value;}\n    if (token) {json_data.token = token.value;}\n    if (hiddenInput) {json_data.max_file_size = hiddenInput.value;}\n    if (patternType) {json_data.pattern_type = patternType.value;}\n    if (pattern) {json_data.pattern = pattern.value;}\n\n    return json_data;\n}\n\n// Helper function to manage button loading state\nfunction setButtonLoadingState(submitButton, isLoading) {\n    if (!isLoading) {\n        submitButton.disabled = false;\n        submitButton.innerHTML = submitButton.getAttribute('data-original-content') || 'Submit';\n        submitButton.classList.remove('bg-[#ffb14d]');\n\n        return;\n    }\n\n    // Store original content if not already stored\n    if (!submitButton.getAttribute('data-original-content')) {\n        submitButton.setAttribute('data-original-content', submitButton.innerHTML);\n    }\n\n    submitButton.disabled = true;\n    submitButton.innerHTML = `\n        <div class=\"flex items-center justify-center\">\n            <svg class=\"animate-spin h-5 w-5 text-gray-900\" xmlns=\"http://www.w3.org/2000/svg\" fill=\"none\" viewBox=\"0 0 24 24\">\n                <circle class=\"opacity-25\" cx=\"12\" cy=\"12\" r=\"10\" stroke=\"currentColor\" stroke-width=\"4\"></circle>\n                <path class=\"opacity-75\" fill=\"currentColor\" d=\"M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z\"></path>\n            </svg>\n            <span class=\"ml-2\">Processing...</span>\n        </div>\n    `;\n    submitButton.classList.add('bg-[#ffb14d]');\n}\n\n// Helper function to handle successful response\nfunction handleSuccessfulResponse(data) {\n    // Show results section\n    showResults();\n\n    // Store the digest_url for download functionality\n    window.currentDigestUrl = data.digest_url;\n\n    // Set plain text content for summary, tree, and content\n    document.getElementById('result-summary').value = data.summary || '';\n    document.getElementById('directory-structure-content').value = data.tree || '';\n    document.getElementById('result-content').value = data.content || '';\n\n    // Populate directory structure lines as clickable <pre> elements\n    const dirPre = document.getElementById('directory-structure-pre');\n\n    if (dirPre && data.tree) {\n        dirPre.innerHTML = '';\n        data.tree.split('\\n').forEach((line) => {\n            const pre = document.createElement('pre');\n\n            pre.setAttribute('name', 'tree-line');\n            pre.className = 'cursor-pointer hover:line-through hover:text-gray-500';\n            pre.textContent = line;\n            pre.onclick = function () { toggleFile(this); };\n            dirPre.appendChild(pre);\n        });\n    }\n\n    // Scroll to results\n    document.getElementById('results-section').scrollIntoView({ behavior: 'smooth', block: 'start' });\n}\n\nfunction handleSubmit(event, showLoadingSpinner = false) {\n    event.preventDefault();\n    const form = event.target || document.getElementById('ingestForm');\n\n    if (!form) {return;}\n\n    // Ensure hidden input is updated before collecting form data\n    const slider = document.getElementById('file_size');\n    const hiddenInput = document.getElementById('max_file_size_kb');\n\n    if (slider && hiddenInput) {\n        hiddenInput.value = logSliderToSize(slider.value);\n    }\n\n    if (showLoadingSpinner) {\n        showLoading();\n    }\n\n    const submitButton = form.querySelector('button[type=\"submit\"]');\n\n    if (!submitButton) {return;}\n\n    const json_data = collectFormData(form);\n\n    if (showLoadingSpinner) {\n        setButtonLoadingState(submitButton, true);\n    }\n\n    // Submit the form to /api/ingest as JSON\n    fetch('/api/ingest', {\n        method: 'POST',\n        headers: { 'Content-Type': 'application/json' },\n        body: JSON.stringify(json_data)\n    })\n        .then(async (response) => {\n            let data;\n\n            try {\n                data = await response.json();\n            } catch {\n                data = {};\n            }\n            setButtonLoadingState(submitButton, false);\n\n            if (!response.ok) {\n                // Show all error details if present\n                if (Array.isArray(data.detail)) {\n                    const details = data.detail.map((d) => `<li>${d.msg || JSON.stringify(d)}</li>`).join('');\n\n                    showError(`<div class='mb-6 p-4 bg-red-50 border border-red-200 rounded-lg text-red-700'><b>Error(s):</b><ul>${details}</ul></div>`);\n\n                    return;\n                }\n                // Other errors\n                showError(`<div class='mb-6 p-4 bg-red-50 border border-red-200 rounded-lg text-red-700'>${data.error || JSON.stringify(data) || 'An error occurred.'}</div>`);\n\n                return;\n            }\n\n            // Handle error in data\n            if (data.error) {\n                showError(`<div class='mb-6 p-4 bg-red-50 border border-red-200 rounded-lg text-red-700'>${data.error}</div>`);\n\n                return;\n            }\n\n            handleSuccessfulResponse(data);\n        })\n        .catch((error) => {\n            setButtonLoadingState(submitButton, false);\n            showError(`<div class='mb-6 p-4 bg-red-50 border border-red-200 rounded-lg text-red-700'>${error}</div>`);\n        });\n}\n\nfunction copyFullDigest() {\n    const directoryStructure = document.getElementById('directory-structure-content').value;\n    const filesContent = document.querySelector('.result-text').value;\n    const fullDigest = `${directoryStructure}\\n\\nFiles Content:\\n\\n${filesContent}`;\n    const button = document.querySelector('[onclick=\"copyFullDigest()\"]');\n    const originalText = button.innerHTML;\n\n    navigator.clipboard.writeText(fullDigest).then(() => {\n        button.innerHTML = `\n            <svg class=\"w-4 h-4 mr-2\" fill=\"none\" stroke=\"currentColor\" viewBox=\"0 0 24 24\">\n                <path stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\" d=\"M5 13l4 4L19 7\"></path>\n            </svg>\n            Copied!\n        `;\n\n        setTimeout(() => {\n            button.innerHTML = originalText;\n        }, 2000);\n    })\n        .catch((err) => {\n            console.error('Failed to copy text: ', err);\n        });\n}\n\nfunction downloadFullDigest() {\n    // Check if we have a digest_url\n    if (!window.currentDigestUrl) {\n        console.error('No digest_url available for download');\n\n        return;\n    }\n\n    // Show feedback on the button\n    const button = document.querySelector('[onclick=\"downloadFullDigest()\"]');\n    const originalText = button.innerHTML;\n\n    button.innerHTML = `\n        <svg class=\"w-4 h-4 mr-2\" fill=\"none\" stroke=\"currentColor\" viewBox=\"0 0 24 24\">\n            <path stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\" d=\"M4 4v5h.582m15.356 2A8.001 8.001 0 004.582 9m0 0H9m11 11v-5h-.581m0 0a8.003 8.003 0 01-15.357-2m15.357 2H15\"></path>\n        </svg>\n        Downloading...\n    `;\n\n    // Create a download link using the digest_url\n    const a = document.createElement('a');\n\n    a.href = window.currentDigestUrl;\n    a.download = 'digest.txt';\n    document.body.appendChild(a);\n    a.click();\n\n    // Clean up\n    document.body.removeChild(a);\n\n    // Update button to show success\n    button.innerHTML = `\n        <svg class=\"w-4 h-4 mr-2\" fill=\"none\" stroke=\"currentColor\" viewBox=\"0 0 24 24\">\n            <path stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\" d=\"M5 13l4 4L19 7\"></path>\n        </svg>\n        Downloaded!\n    `;\n\n    setTimeout(() => {\n        button.innerHTML = originalText;\n    }, 2000);\n}\n\n// Add the logSliderToSize helper function\nfunction logSliderToSize(position) {\n    const maxPosition = 500;\n    const maxValue = Math.log(102400); // 100 MB\n\n    const value = Math.exp(maxValue * (position / maxPosition)**1.5);\n\n    return Math.round(value);\n}\n\n// Move slider initialization to a separate function\nfunction initializeSlider() {\n    const slider = document.getElementById('file_size');\n    const sizeValue = document.getElementById('size_value');\n    const hiddenInput = document.getElementById('max_file_size_kb');\n\n    if (!slider || !sizeValue || !hiddenInput) {return;}\n\n    function updateSlider() {\n        const value = logSliderToSize(slider.value);\n\n        sizeValue.textContent = formatSize(value);\n        slider.style.backgroundSize = `${(slider.value / slider.max) * 100}% 100%`;\n        hiddenInput.value = value; // Set hidden input to KB value\n    }\n\n    // Update on slider change\n    slider.addEventListener('input', updateSlider);\n\n    // Initialize slider position\n    updateSlider();\n}\n\n// Add helper function for formatting size\nfunction formatSize(sizeInKB) {\n    if (sizeInKB >= 1024) {\n        return `${ Math.round(sizeInKB / 1024) }MB`;\n    }\n\n    return `${ Math.round(sizeInKB) }kB`;\n}\n\n// Add this new function\nfunction setupGlobalEnterHandler() {\n    document.addEventListener('keydown', (event) => {\n        if (event.key === 'Enter' && !event.target.matches('textarea')) {\n            const form = document.getElementById('ingestForm');\n\n            if (form) {\n                handleSubmit(new Event('submit'), true);\n            }\n        }\n    });\n}\n\n// Add to the DOMContentLoaded event listener\ndocument.addEventListener('DOMContentLoaded', () => {\n    initializeSlider();\n    setupGlobalEnterHandler();\n});\n\n\n// Make sure these are available globally\nwindow.handleSubmit = handleSubmit;\nwindow.toggleFile = toggleFile;\nwindow.copyText = copyText;\nwindow.copyFullDigest = copyFullDigest;\nwindow.downloadFullDigest = downloadFullDigest;\n"
  },
  {
    "path": "src/static/llms.txt",
    "content": "# GitIngest – **AI Agent Integration Guide**\n\nTurn any Git repository into a prompt-ready text digest. GitIngest fetches, cleans, and formats source code so AI agents and Large Language Models can reason over complete projects programmatically.\n\n**🤖 For AI Agents**: Use CLI or Python package for automated integration. Web UI is designed for human interaction only.\n\n---\n## 1. Installation\n\n### 1.1 CLI Installation (Recommended for Scripts & Automation)\n```bash\n# Best practice: Use pipx for CLI tools (isolated environment)\npipx install gitingest\n\n# Alternative: Use pip (may conflict with other packages)\npip install gitingest\n\n# Verify installation\ngitingest --help\n```\n\n### 1.2 Python Package Installation (For Code Integration)\n```bash\n# For projects/notebooks: Use pip in virtual environment\npython -m venv gitingest-env\nsource gitingest-env/bin/activate  # On Windows: gitingest-env\\Scripts\\activate\npip install gitingest\n\n# Or add to requirements.txt\necho \"gitingest\" >> requirements.txt\npip install -r requirements.txt\n\n# For self-hosting: Install with server dependencies\npip install gitingest[server]\n\n# For development: Install with dev dependencies\npip install gitingest[dev,server]\n```\n\n### 1.3 Installation Verification\n```bash\n# Test CLI installation\ngitingest --version\n\n# Test Python package\npython -c \"from gitingest import ingest; print('GitIngest installed successfully')\"\n\n# Quick functionality test\ngitingest https://github.com/octocat/Hello-World -o test_output.txt\n```\n\n---\n## 2. Quick-Start for AI Agents\n| Method | Best for | One-liner |\n|--------|----------|-----------|\n| **CLI** | Scripts, automation, pipelines | `gitingest https://github.com/user/repo -o - \\| your-llm` |\n| **Python** | Code integration, notebooks, async tasks | `from gitingest import ingest; s,t,c = ingest('repo-url'); process(c)` |\n| **URL Hack** | Quick web scraping (limited) | Replace `github.com` → `gitingest.com` in any GitHub URL |\n| **Web UI** | **Human use only** | ~~Not recommended for AI agents~~ |\n\n---\n## 3. Output Format for AI Processing\nGitIngest returns **structured plain-text** optimized for LLM consumption with three distinct sections:\n\n### 3.1 Repository Summary\n```\nRepository: owner/repo-name\nFiles analyzed: 42\nEstimated tokens: 15.2k\n```\nContains basic metadata: repository name, file count, and token estimation for LLM planning.\n\n### 3.2 Directory Structure\n```\nDirectory structure:\n└── project-name/\n    ├── src/\n    │   ├── main.py\n    │   └── utils.py\n    ├── tests/\n    │   └── test_main.py\n    └── README.md\n```\nHierarchical tree view showing the complete project structure for context and navigation.\n\n### 3.3 File Contents\nEach file is wrapped with clear delimiters:\n```\n================================================\nFILE: src/main.py\n================================================\ndef hello_world():\n    print(\"Hello, World!\")\n\nif __name__ == \"__main__\":\n    hello_world()\n\n\n================================================\nFILE: README.md\n================================================\n# Project Title\n\nThis is a sample project...\n```\n\n### 3.4 Usage Example\n```python\n# Python package usage\nfrom gitingest import ingest\n\nsummary, tree, content = ingest(\"https://github.com/octocat/Hello-World\")\n\n# Returns exactly:\n# summary = \"Repository: octocat/hello-world\\nFiles analyzed: 1\\nEstimated tokens: 29\"\n# tree = \"Directory structure:\\n└── octocat-hello-world/\\n    └── README\"\n# content = \"================================================\\nFILE: README\\n================================================\\nHello World!\\n\\n\\n\"\n\n# For AI processing, combine all sections:\nfull_context = f\"{summary}\\n\\n{tree}\\n\\n{content}\"\n```\n\n```bash\n# CLI usage - pipe directly to your AI system\ngitingest https://github.com/octocat/Hello-World -o - | your_llm_processor\n\n# Output streams the complete formatted text:\n# Repository: octocat/hello-world\n# Files analyzed: 1\n# Estimated tokens: 29\n#\n# Directory structure:\n# └── octocat-hello-world/\n#     └── README\n#\n# ================================================\n# FILE: README\n# ================================================\n# Hello World!\n```\n\n\n\n---\n## 4. AI Agent Integration Methods\n\n### 4.1 CLI Integration (Recommended for Automation)\n```bash\n# Basic usage - pipe directly to your AI system\ngitingest https://github.com/user/repo -o - | your_ai_processor\n\n# Advanced filtering for focused analysis (long flags)\ngitingest https://github.com/user/repo \\\n  --include-pattern \"*.py\" --include-pattern \"*.js\" --include-pattern \"*.md\" \\\n  --max-size 102400 \\\n  -o - | python your_analyzer.py\n\n# Same command with short flags (more concise)\ngitingest https://github.com/user/repo \\\n  -i \"*.py\" -i \"*.js\" -i \"*.md\" \\\n  -s 102400 \\\n  -o - | python your_analyzer.py\n\n# Exclude unwanted files and directories (long flags)\ngitingest https://github.com/user/repo \\\n  --exclude-pattern \"node_modules/*\" --exclude-pattern \"*.log\" \\\n  --exclude-pattern \"dist/*\" \\\n  -o - | your_analyzer\n\n# Same with short flags\ngitingest https://github.com/user/repo \\\n  -e \"node_modules/*\" -e \"*.log\" -e \"dist/*\" \\\n  -o - | your_analyzer\n\n# Private repositories with token (short flag)\nexport GITHUB_TOKEN=\"ghp_your_token_here\"\ngitingest https://github.com/user/private-repo -t $GITHUB_TOKEN -o -\n\n# Specific branch analysis (short flag)\ngitingest https://github.com/user/repo -b main -o -\n\n# Save to file (default: digest.txt in current directory)\ngitingest https://github.com/user/repo -o my_analysis.txt\n\n# Ultra-concise example for small files only\ngitingest https://github.com/user/repo -i \"*.py\" -s 51200 -o -\n```\n\n**Key Parameters for AI Agents**:\n- `-s` / `--max-size`: Maximum file size in bytes to process (default: no limit)\n- `-i` / `--include-pattern`: Include files matching Unix shell-style wildcards\n- `-e` / `--exclude-pattern`: Exclude files matching Unix shell-style wildcards\n- `-b` / `--branch`: Specify branch to analyze (defaults to repository's default branch)\n- `-t` / `--token`: GitHub personal access token for private repositories\n- `-o` / `--output`: Stream to STDOUT with `-` (default saves to `digest.txt`)\n\n### 4.2 Python Package (Best for Code Integration)\n```python\nfrom gitingest import ingest, ingest_async\nimport asyncio\n\n# Synchronous processing\ndef analyze_repository(repo_url: str):\n    summary, tree, content = ingest(repo_url)\n\n    # Process metadata\n    repo_info = parse_summary(summary)\n\n    # Analyze structure\n    file_structure = parse_tree(tree)\n\n    # Process code content\n    return analyze_code(content)\n\n# Asynchronous processing (recommended for AI services)\nasync def batch_analyze_repos(repo_urls: list):\n    tasks = [ingest_async(url) for url in repo_urls]\n    results = await asyncio.gather(*tasks)\n    return [process_repo_data(*result) for result in results]\n\n# Memory-efficient processing for large repos\ndef stream_process_repo(repo_url: str):\n    summary, tree, content = ingest(\n        repo_url,\n        max_file_size=51200,  # 50KB max per file\n        include_patterns=[\"*.py\", \"*.js\"],  # Focus on code files\n    )\n\n    # Process in chunks to manage memory\n    for file_content in split_content(content):\n        yield analyze_file(file_content)\n\n# Filtering with exclude patterns\ndef analyze_without_deps(repo_url: str):\n    summary, tree, content = ingest(\n        repo_url,\n        exclude_patterns=[\n            \"node_modules/*\", \"*.lock\", \"dist/*\",\n            \"build/*\", \"*.min.js\", \"*.log\"\n        ]\n    )\n    return analyze_code(content)\n```\n\n**Python Integration Patterns**:\n- **Batch Processing**: Use `ingest_async` for multiple repositories\n- **Memory Management**: Use `max_file_size` and pattern filtering for large repos\n- **Error Handling**: Wrap in try-catch for network/auth issues\n- **Caching**: Store results to avoid repeated API calls\n- **Pattern Filtering**: Use `include_patterns` and `exclude_patterns` lists\n\n### 4.3 Web UI (❌ Not for AI Agents)\nThe web interface at `https://gitingest.com` is designed for **human interaction only**.\n\n**Why AI agents should avoid the web UI**:\n- Requires manual interaction and browser automation\n- No programmatic access to results\n- Rate limiting and CAPTCHA protection\n- Inefficient for automated workflows\n\n**Use CLI or Python package instead** for all AI agent integrations.\n\n---\n## 5. AI Agent Best Practices\n\n### 5.1 Repository Analysis Workflows\n```python\n# Pattern 1: Full repository analysis\ndef full_repo_analysis(repo_url: str):\n    summary, tree, content = ingest(repo_url)\n    return {\n        'metadata': extract_metadata(summary),\n        'structure': analyze_structure(tree),\n        'code_analysis': analyze_all_files(content),\n        'insights': generate_insights(summary, tree, content)\n    }\n\n# Pattern 2: Selective file processing\ndef selective_analysis(repo_url: str, file_patterns: list):\n    summary, tree, content = ingest(\n        repo_url,\n        include_patterns=file_patterns\n    )\n    return focused_analysis(content)\n\n# Pattern 3: Streaming for large repos\ndef stream_analysis(repo_url: str):\n    # First pass: get structure and metadata only\n    summary, tree, _ = ingest(\n        repo_url,\n        include_patterns=[\"*.md\", \"*.txt\"],\n        max_file_size=10240  # 10KB limit for docs\n    )\n\n    # Then process code files selectively by language\n    for pattern in [\"*.py\", \"*.js\", \"*.go\", \"*.rs\"]:\n        _, _, content = ingest(\n            repo_url,\n            include_patterns=[pattern],\n            max_file_size=51200  # 50KB limit for code\n        )\n        yield process_language_specific(content, pattern)\n```\n\n### 5.2 Error Handling for AI Agents\n```python\nfrom gitingest import ingest\nfrom gitingest.utils.exceptions import GitIngestError\nimport time\n\ndef robust_ingest(repo_url: str, retries: int = 3):\n    for attempt in range(retries):\n        try:\n            return ingest(repo_url)\n        except GitIngestError as e:\n            if attempt == retries - 1:\n                return None, None, f\"Failed to ingest: {e}\"\n            time.sleep(2 ** attempt)  # Exponential backoff\n```\n\n### 5.3 Private Repository Access\n```python\nimport os\nfrom gitingest import ingest\n\n# Method 1: Environment variable\ndef ingest_private_repo(repo_url: str):\n    token = os.getenv('GITHUB_TOKEN')\n    if not token:\n        raise ValueError(\"GITHUB_TOKEN environment variable required\")\n    return ingest(repo_url, token=token)\n\n# Method 2: Secure token management\ndef ingest_with_token_rotation(repo_url: str, token_manager):\n    token = token_manager.get_active_token()\n    try:\n        return ingest(repo_url, token=token)\n    except AuthenticationError:\n        token = token_manager.rotate_token()\n        return ingest(repo_url, token=token)\n```\n\n---\n## 6. Integration Scenarios for AI Agents\n\n| Use Case | Recommended Method | Example Implementation |\n|----------|-------------------|----------------------|\n| **Code Review Bot** | Python async | `await ingest_async(pr_repo)` → analyze changes |\n| **Documentation Generator** | CLI with filtering | `gitingest repo -i \"*.py\" -i \"*.md\" -o -` |\n| **Vulnerability Scanner** | Python with error handling | Batch process multiple repos |\n| **Code Search Engine** | CLI → Vector DB | `gitingest repo -o - \\| embed \\| store` |\n| **AI Coding Assistant** | Python integration | Load repo context into conversation |\n| **CI/CD Analysis** | CLI integration | `gitingest repo -o - \\| analyze_pipeline` |\n| **Repository Summarization** | Python with streaming | Process large repos in chunks |\n| **Dependency Analysis** | CLI exclude patterns | `gitingest repo -e \"node_modules/*\" -e \"*.lock\" -o -` |\n| **Security Audit** | CLI with size limits | `gitingest repo -i \"*.py\" -i \"*.js\" -s 204800 -o -` |\n\n---\n## 7. Support & Resources for AI Developers\n* **Web UI official instance**: https://gitingest.com\n* **GitHub Repository**: https://github.com/coderamp-labs/gitingest\n* **Python Package**: https://pypi.org/project/gitingest/\n* **Community Support**: https://discord.gg/zerRaGK9EC\n\n_GitIngest – Purpose-built for AI agents to understand entire codebases programmatically._\n"
  },
  {
    "path": "src/static/robots.txt",
    "content": "User-agent: *\nAllow: /\nAllow: /api/\nAllow: /coderamp-labs/gitingest/\n"
  },
  {
    "path": "tests/.pylintrc",
    "content": "[MASTER]\ninit-hook=\n    import sys\n    sys.path.append('./src')\n\n[MESSAGES CONTROL]\ndisable=missing-class-docstring,missing-function-docstring,protected-access,fixme\n\n[FORMAT]\nmax-line-length=119\n"
  },
  {
    "path": "tests/__init__.py",
    "content": "\"\"\"Tests for the gitingest package.\"\"\"\n"
  },
  {
    "path": "tests/conftest.py",
    "content": "\"\"\"Fixtures for tests.\n\nThis file provides shared fixtures for creating sample queries, a temporary directory structure, and a helper function\nto write ``.ipynb`` notebooks for testing notebook utilities.\n\"\"\"\n\nfrom __future__ import annotations\n\nimport json\nimport sys\nimport uuid\nfrom pathlib import Path\nfrom typing import TYPE_CHECKING, Any, Callable, Dict\nfrom unittest.mock import AsyncMock, MagicMock\n\nimport pytest\n\nfrom gitingest.query_parser import IngestionQuery\n\nif TYPE_CHECKING:\n    from pytest_mock import MockerFixture\n\nWriteNotebookFunc = Callable[[str, Dict[str, Any]], Path]\n\nDEMO_URL = \"https://github.com/user/repo\"\nLOCAL_REPO_PATH = \"/tmp/repo\"\nDEMO_COMMIT = \"deadbeefdeadbeefdeadbeefdeadbeefdeadbeef\"\n\n\ndef get_ensure_git_installed_call_count() -> int:\n    \"\"\"Get the number of calls made by ensure_git_installed based on platform.\n\n    On Windows, ensure_git_installed makes 2 calls:\n    1. git --version\n    2. git config core.longpaths\n\n    On other platforms, it makes 1 call:\n    1. git --version\n\n    Returns\n    -------\n    int\n        The number of calls made by ensure_git_installed\n\n    \"\"\"\n    return 2 if sys.platform == \"win32\" else 1\n\n\n@pytest.fixture\ndef sample_query() -> IngestionQuery:\n    \"\"\"Provide a default ``IngestionQuery`` object for use in tests.\n\n    This fixture returns a ``IngestionQuery`` pre-populated with typical fields and some default ignore patterns.\n\n    Returns\n    -------\n    IngestionQuery\n        The sample ``IngestionQuery`` object.\n\n    \"\"\"\n    return IngestionQuery(\n        user_name=\"test_user\",\n        repo_name=\"test_repo\",\n        local_path=Path(\"/tmp/test_repo\").resolve(),\n        slug=\"test_user/test_repo\",\n        id=uuid.uuid4(),\n        branch=\"main\",\n        max_file_size=1_000_000,\n        ignore_patterns={\"*.pyc\", \"__pycache__\", \".git\"},\n    )\n\n\n@pytest.fixture\ndef temp_directory(tmp_path: Path) -> Path:\n    \"\"\"Create a temporary directory structure for testing repository scanning.\n\n    The structure includes:\n    test_repo/\n    ├── file1.txt\n    ├── file2.py\n    ├── src/\n    │   ├── subfile1.txt\n    │   ├── subfile2.py\n    │   └── subdir/\n    │       ├── file_subdir.txt\n    │       └── file_subdir.py\n    ├── dir1/\n    │   └── file_dir1.txt\n    └── dir2/\n        └── file_dir2.txt\n\n    Parameters\n    ----------\n    tmp_path : Path\n        The temporary directory path provided by the ``tmp_path`` fixture.\n\n    Returns\n    -------\n    Path\n        The path to the created ``test_repo`` directory.\n\n    \"\"\"\n    test_dir = tmp_path / \"test_repo\"\n    test_dir.mkdir()\n\n    # Root files\n    (test_dir / \"file1.txt\").write_text(\"Hello World\")\n    (test_dir / \"file2.py\").write_text(\"print('Hello')\")\n\n    # src directory and its files\n    src_dir = test_dir / \"src\"\n    src_dir.mkdir()\n    (src_dir / \"subfile1.txt\").write_text(\"Hello from src\")\n    (src_dir / \"subfile2.py\").write_text(\"print('Hello from src')\")\n\n    # src/subdir and its files\n    subdir = src_dir / \"subdir\"\n    subdir.mkdir()\n    (subdir / \"file_subdir.txt\").write_text(\"Hello from subdir\")\n    (subdir / \"file_subdir.py\").write_text(\"print('Hello from subdir')\")\n\n    # dir1 and its file\n    dir1 = test_dir / \"dir1\"\n    dir1.mkdir()\n    (dir1 / \"file_dir1.txt\").write_text(\"Hello from dir1\")\n\n    # dir2 and its file\n    dir2 = test_dir / \"dir2\"\n    dir2.mkdir()\n    (dir2 / \"file_dir2.txt\").write_text(\"Hello from dir2\")\n\n    return test_dir\n\n\n@pytest.fixture\ndef write_notebook(tmp_path: Path) -> WriteNotebookFunc:\n    \"\"\"Provide a helper function to write a ``.ipynb`` notebook file with the given content.\n\n    Parameters\n    ----------\n    tmp_path : Path\n        The temporary directory path provided by the ``tmp_path`` fixture.\n\n    Returns\n    -------\n    WriteNotebookFunc\n        A callable that accepts a filename and a dictionary (representing JSON notebook data), writes it to a\n        ``.ipynb`` file, and returns the path to the file.\n\n    \"\"\"\n\n    def _write_notebook(name: str, content: dict[str, Any]) -> Path:\n        notebook_path = tmp_path / name\n        with notebook_path.open(mode=\"w\", encoding=\"utf-8\") as f:\n            json.dump(content, f)\n        return notebook_path\n\n    return _write_notebook\n\n\n@pytest.fixture\ndef stub_resolve_sha(mocker: MockerFixture) -> dict[str, AsyncMock]:\n    \"\"\"Patch *both* async helpers that hit the network.\n\n    Include this fixture *only* in tests that should stay offline.\n    \"\"\"\n    head_mock = mocker.patch(\n        \"gitingest.utils.query_parser_utils._resolve_ref_to_sha\",\n        new_callable=mocker.AsyncMock,\n        return_value=DEMO_COMMIT,\n    )\n    ref_mock = mocker.patch(\n        \"gitingest.utils.git_utils._resolve_ref_to_sha\",\n        new_callable=mocker.AsyncMock,\n        return_value=DEMO_COMMIT,\n    )\n    # return whichever you want to assert on; here we return the dict\n    return {\"head\": head_mock, \"ref\": ref_mock}\n\n\n@pytest.fixture\ndef stub_branches(mocker: MockerFixture) -> Callable[[list[str]], None]:\n    \"\"\"Return a function that stubs git branch discovery to *branches*.\"\"\"\n\n    def _factory(branches: list[str]) -> None:\n        # Patch the GitPython fetch function\n        mocker.patch(\n            \"gitingest.utils.git_utils.fetch_remote_branches_or_tags\",\n            new_callable=AsyncMock,\n            return_value=branches,\n        )\n\n        # Patch GitPython's ls_remote method to return the mocked output\n        ls_remote_output = \"\\n\".join(f\"{DEMO_COMMIT[:12]}{i:02d}\\trefs/heads/{b}\" for i, b in enumerate(branches))\n        mock_git_cmd = mocker.patch(\"git.Git\")\n        mock_git_cmd.return_value.ls_remote.return_value = ls_remote_output\n\n        # Also patch the git module imports in our utils\n        mocker.patch(\"gitingest.utils.git_utils.git.Git\", return_value=mock_git_cmd.return_value)\n\n    return _factory\n\n\n@pytest.fixture\ndef repo_exists_true(mocker: MockerFixture) -> AsyncMock:\n    \"\"\"Patch ``gitingest.clone.check_repo_exists`` to always return ``True``.\"\"\"\n    return mocker.patch(\"gitingest.clone.check_repo_exists\", return_value=True)\n\n\n@pytest.fixture\ndef run_command_mock(mocker: MockerFixture) -> AsyncMock:\n    \"\"\"Patch ``gitingest.clone.run_command`` with an ``AsyncMock``.\n\n    The mocked function returns a dummy process whose ``communicate`` method yields generic\n    ``stdout`` / ``stderr`` bytes. Tests can still access / tweak the mock via the fixture argument.\n    \"\"\"\n    mock = AsyncMock(side_effect=_fake_run_command)\n    mocker.patch(\"gitingest.utils.git_utils.run_command\", mock)\n\n    # Mock GitPython components\n    _setup_gitpython_mocks(mocker)\n\n    return mock\n\n\n@pytest.fixture\ndef gitpython_mocks(mocker: MockerFixture) -> dict[str, MagicMock]:\n    \"\"\"Provide comprehensive GitPython mocks for testing.\"\"\"\n    return _setup_gitpython_mocks(mocker)\n\n\ndef _setup_gitpython_mocks(mocker: MockerFixture) -> dict[str, MagicMock]:\n    \"\"\"Set up comprehensive GitPython mocks.\"\"\"\n    # Mock git.Git class\n    mock_git_cmd = MagicMock()\n    mock_git_cmd.version.return_value = \"git version 2.34.1\"\n    mock_git_cmd.config.return_value = \"true\"\n    mock_git_cmd.execute.return_value = f\"{DEMO_COMMIT}\\trefs/heads/main\\n\"\n    mock_git_cmd.ls_remote.return_value = f\"{DEMO_COMMIT}\\trefs/heads/main\\n\"\n    mock_git_cmd.clone.return_value = \"\"\n\n    # Mock git.Repo class\n    mock_repo = MagicMock()\n    mock_repo.git = MagicMock()\n    mock_repo.git.fetch = MagicMock()\n    mock_repo.git.checkout = MagicMock()\n    mock_repo.git.submodule = MagicMock()\n    mock_repo.git.execute = MagicMock()\n    mock_repo.git.config = MagicMock()\n    mock_repo.git.sparse_checkout = MagicMock()\n\n    # Mock git.Repo.clone_from\n    mock_clone_from = MagicMock(return_value=mock_repo)\n\n    git_git_mock = mocker.patch(\"git.Git\", return_value=mock_git_cmd)\n    git_repo_mock = mocker.patch(\"git.Repo\", return_value=mock_repo)\n    mocker.patch(\"git.Repo.clone_from\", mock_clone_from)\n\n    # Patch imports in our modules\n    mocker.patch(\"gitingest.utils.git_utils.git.Git\", return_value=mock_git_cmd)\n    mocker.patch(\"gitingest.utils.git_utils.git.Repo\", return_value=mock_repo)\n    mocker.patch(\"gitingest.clone.git.Git\", return_value=mock_git_cmd)\n    mocker.patch(\"gitingest.clone.git.Repo\", return_value=mock_repo)\n    mocker.patch(\"gitingest.clone.git.Repo.clone_from\", mock_clone_from)\n\n    return {\n        \"git_cmd\": mock_git_cmd,\n        \"repo\": mock_repo,\n        \"clone_from\": mock_clone_from,\n        \"git_git_mock\": git_git_mock,\n        \"git_repo_mock\": git_repo_mock,\n    }\n\n\nasync def _fake_run_command(*args: str) -> tuple[bytes, bytes]:\n    if \"ls-remote\" in args:\n        # single match: <sha> <tab>refs/heads/main\n        return (f\"{DEMO_COMMIT}\\trefs/heads/main\\n\".encode(), b\"\")\n    return (b\"output\", b\"error\")\n"
  },
  {
    "path": "tests/query_parser/__init__.py",
    "content": "\"\"\"Tests for the query parser.\"\"\"\n"
  },
  {
    "path": "tests/query_parser/test_git_host_agnostic.py",
    "content": "\"\"\"Tests to verify that the query parser is Git host agnostic.\n\nThese tests confirm that ``parse_query`` correctly identifies user/repo pairs and canonical URLs for GitHub, GitLab,\nBitbucket, Gitea, and Codeberg, even if the host is omitted.\n\"\"\"\n\nfrom __future__ import annotations\n\nimport pytest\n\nfrom gitingest.config import MAX_FILE_SIZE\nfrom gitingest.query_parser import parse_remote_repo\nfrom gitingest.utils.query_parser_utils import KNOWN_GIT_HOSTS, _is_valid_git_commit_hash\n\n# Repository matrix: (host, user, repo)\n_REPOS: list[tuple[str, str, str]] = [\n    (\"github.com\", \"fastapi\", \"fastapi\"),\n    (\"gitlab.com\", \"gitlab-org\", \"gitlab-runner\"),\n    (\"bitbucket.org\", \"na-dna\", \"llm-knowledge-share\"),\n    (\"gitea.com\", \"xorm\", \"xorm\"),\n    (\"codeberg.org\", \"forgejo\", \"forgejo\"),\n    (\"git.rwth-aachen.de\", \"medialab\", \"19squared\"),\n    (\"gitlab.alpinelinux.org\", \"alpine\", \"apk-tools\"),\n]\n\n\n# Generate cartesian product of repository tuples with URL variants.\n@pytest.mark.parametrize((\"host\", \"user\", \"repo\"), _REPOS, ids=[f\"{h}:{u}/{r}\" for h, u, r in _REPOS])\n@pytest.mark.parametrize(\"variant\", [\"full\", \"noscheme\", \"slug\"])\n@pytest.mark.asyncio\nasync def test_parse_query_without_host(\n    host: str,\n    user: str,\n    repo: str,\n    variant: str,\n) -> None:\n    \"\"\"Verify that ``parse_remote_repo`` handles URLs, host-omitted URLs and raw slugs.\"\"\"\n    # Build the input URL based on the selected variant\n    if variant == \"full\":\n        url = f\"https://{host}/{user}/{repo}\"\n    elif variant == \"noscheme\":\n        url = f\"{host}/{user}/{repo}\"\n    else:  # \"slug\"\n        url = f\"{user}/{repo}\"\n\n    expected_url = f\"https://{host}/{user}/{repo}\"\n\n    # For slug form with a custom host (not in KNOWN_GIT_HOSTS) we expect a failure,\n    # because the parser cannot guess which domain to use.\n    if variant == \"slug\" and host not in KNOWN_GIT_HOSTS:\n        with pytest.raises(ValueError, match=\"Could not find a valid repository host\"):\n            await parse_remote_repo(url)\n        return\n\n    query = await parse_remote_repo(url)\n\n    # Compare against the canonical dict while ignoring unpredictable fields.\n    actual = query.model_dump(exclude={\"id\", \"local_path\", \"ignore_patterns\", \"s3_url\"})\n\n    assert \"commit\" in actual\n    assert _is_valid_git_commit_hash(actual[\"commit\"])\n    del actual[\"commit\"]\n\n    expected = {\n        \"host\": host,\n        \"user_name\": user,\n        \"repo_name\": repo,\n        \"url\": expected_url,\n        \"slug\": f\"{user}-{repo}\",\n        \"subpath\": \"/\",\n        \"type\": None,\n        \"branch\": None,\n        \"tag\": None,\n        \"max_file_size\": MAX_FILE_SIZE,\n        \"include_patterns\": None,\n        \"include_submodules\": False,\n    }\n\n    assert actual == expected\n"
  },
  {
    "path": "tests/query_parser/test_query_parser.py",
    "content": "\"\"\"Tests for the ``query_parser`` module.\n\nThese tests cover URL parsing, pattern parsing, and handling of branches/subpaths for HTTP(S) repositories and local\npaths.\n\"\"\"\n\n# pylint: disable=too-many-arguments, too-many-positional-arguments\nfrom __future__ import annotations\n\nfrom pathlib import Path\nfrom typing import TYPE_CHECKING, Callable\n\nimport pytest\n\nfrom gitingest.query_parser import parse_local_dir_path, parse_remote_repo\nfrom gitingest.utils.query_parser_utils import _is_valid_git_commit_hash\nfrom tests.conftest import DEMO_URL\n\nif TYPE_CHECKING:\n    from unittest.mock import AsyncMock\n\n    from gitingest.schemas import IngestionQuery\n\n\nURLS_HTTPS: list[str] = [\n    DEMO_URL,\n    \"https://gitlab.com/user/repo\",\n    \"https://bitbucket.org/user/repo\",\n    \"https://gitea.com/user/repo\",\n    \"https://codeberg.org/user/repo\",\n    \"https://gist.github.com/user/repo\",\n    \"https://git.example.com/user/repo\",\n    \"https://gitlab.example.com/user/repo\",\n    \"https://gitlab.example.se/user/repo\",\n]\n\nURLS_HTTP: list[str] = [url.replace(\"https://\", \"http://\") for url in URLS_HTTPS]\n\n\n@pytest.mark.parametrize(\"url\", URLS_HTTPS, ids=lambda u: u)\n@pytest.mark.asyncio\nasync def test_parse_url_valid_https(url: str, stub_resolve_sha: dict[str, AsyncMock]) -> None:\n    \"\"\"Valid HTTPS URLs parse correctly and ``query.url`` equals the input.\"\"\"\n    query = await _assert_basic_repo_fields(url, stub_resolve_sha[\"head\"])\n\n    assert query.url == url  # HTTPS: canonical URL should equal input\n\n\n@pytest.mark.parametrize(\"url\", URLS_HTTP, ids=lambda u: u)\n@pytest.mark.asyncio\nasync def test_parse_url_valid_http(url: str, stub_resolve_sha: dict[str, AsyncMock]) -> None:\n    \"\"\"Valid HTTP URLs parse correctly (slug check only).\"\"\"\n    await _assert_basic_repo_fields(url, stub_resolve_sha[\"head\"])\n\n\n@pytest.mark.asyncio\nasync def test_parse_url_invalid(stub_resolve_sha: dict[str, AsyncMock]) -> None:\n    \"\"\"Test ``parse_remote_repo`` with an invalid URL.\n\n    Given an HTTPS URL lacking a repository structure (e.g., \"https://github.com\"),\n    When ``parse_remote_repo`` is called,\n    Then a ValueError should be raised indicating an invalid repository URL.\n    \"\"\"\n    url = \"https://github.com\"\n\n    with pytest.raises(ValueError, match=\"Invalid repository URL\"):\n        await parse_remote_repo(url)\n\n    stub_resolve_sha[\"head\"].assert_not_awaited()\n\n\n@pytest.mark.asyncio\n@pytest.mark.parametrize(\"url\", [DEMO_URL, \"https://gitlab.com/user/repo\"])\nasync def test_parse_query_basic(url: str, stub_resolve_sha: dict[str, AsyncMock]) -> None:\n    \"\"\"Test ``parse_remote_repo`` with a basic valid repository URL.\n\n    Given an HTTPS URL:\n    When ``parse_remote_repo`` is called,\n    Then user/repo, URL should be parsed correctly.\n    \"\"\"\n    query = await parse_remote_repo(url)\n\n    stub_resolve_sha[\"head\"].assert_awaited_once()\n    assert query.user_name == \"user\"\n    assert query.repo_name == \"repo\"\n    assert query.url == url\n\n\n@pytest.mark.asyncio\nasync def test_parse_query_mixed_case(stub_resolve_sha: dict[str, AsyncMock]) -> None:\n    \"\"\"Test ``parse_remote_repo`` with mixed-case URLs.\n\n    Given a URL with mixed-case parts (e.g. \"Https://GitHub.COM/UsEr/rEpO\"):\n    When ``parse_remote_repo`` is called,\n    Then the user and repo names should be normalized to lowercase.\n    \"\"\"\n    url = \"Https://GitHub.COM/UsEr/rEpO\"\n    query = await parse_remote_repo(url)\n\n    stub_resolve_sha[\"head\"].assert_awaited_once()\n    assert query.user_name == \"user\"\n    assert query.repo_name == \"repo\"\n\n\n@pytest.mark.asyncio\nasync def test_parse_url_with_subpaths(\n    stub_branches: Callable[[list[str]], None],\n    stub_resolve_sha: dict[str, AsyncMock],\n) -> None:\n    \"\"\"Test ``parse_remote_repo`` with a URL containing branch and subpath.\n\n    Given a URL referencing a branch (\"main\") and a subdir (\"subdir/file\"):\n    When ``parse_remote_repo`` is called with remote branch fetching,\n    Then user, repo, branch, and subpath should be identified correctly.\n    \"\"\"\n    url = DEMO_URL + \"/tree/main/subdir/file\"\n\n    stub_branches([\"main\", \"dev\", \"feature-branch\"])\n\n    query = await _assert_basic_repo_fields(url, stub_resolve_sha[\"ref\"])\n\n    assert query.user_name == \"user\"\n    assert query.repo_name == \"repo\"\n    assert query.branch == \"main\"\n    assert query.subpath == \"/subdir/file\"\n\n\n@pytest.mark.asyncio\nasync def test_parse_url_invalid_repo_structure(stub_resolve_sha: dict[str, AsyncMock]) -> None:\n    \"\"\"Test ``parse_remote_repo`` with a URL missing a repository name.\n\n    Given a URL like \"https://github.com/user\":\n    When ``parse_remote_repo`` is called,\n    Then a ValueError should be raised indicating an invalid repository URL.\n    \"\"\"\n    url = \"https://github.com/user\"\n\n    with pytest.raises(ValueError, match=\"Invalid repository URL\"):\n        await parse_remote_repo(url)\n\n    stub_resolve_sha[\"head\"].assert_not_awaited()\n\n\nasync def test_parse_local_dir_path_local_path() -> None:\n    \"\"\"Test ``parse_local_dir_path``.\n\n    Given \"/home/user/project\":\n    When ``parse_local_dir_path`` is called,\n    Then the local path should be set, id generated, and slug formed accordingly.\n    \"\"\"\n    path = \"/home/user/project\"\n    query = parse_local_dir_path(path)\n    tail = Path(\"home/user/project\")\n\n    assert query.local_path.parts[-len(tail.parts) :] == tail.parts\n    assert query.id is not None\n    assert query.slug == \"home/user/project\"\n\n\nasync def test_parse_local_dir_path_relative_path() -> None:\n    \"\"\"Test ``parse_local_dir_path`` with a relative path.\n\n    Given \"./project\":\n    When ``parse_local_dir_path`` is called,\n    Then ``local_path`` resolves relatively, and ``slug`` ends with \"project\".\n    \"\"\"\n    path = \"./project\"\n    query = parse_local_dir_path(path)\n    tail = Path(\"project\")\n\n    assert query.local_path.parts[-len(tail.parts) :] == tail.parts\n    assert query.slug.endswith(\"project\")\n\n\n@pytest.mark.asyncio\nasync def test_parse_remote_repo_empty_source(stub_resolve_sha: dict[str, AsyncMock]) -> None:\n    \"\"\"Test ``parse_remote_repo`` with an empty string.\n\n    Given an empty source string:\n    When ``parse_remote_repo`` is called,\n    Then a ValueError should be raised indicating an invalid repository URL.\n    \"\"\"\n    url = \"\"\n\n    with pytest.raises(ValueError, match=\"Invalid repository URL\"):\n        await parse_remote_repo(url)\n\n    stub_resolve_sha[\"head\"].assert_not_awaited()\n\n\n@pytest.mark.asyncio\n@pytest.mark.parametrize(\n    (\"path\", \"expected_branch\", \"mock_name\"),\n    [\n        (\"/tree/main\", \"main\", \"ref\"),\n        (\"/tree/abcd1234abcd1234abcd1234abcd1234abcd1234\", None, \"ref\"),\n    ],\n)\nasync def test_parse_url_branch_and_commit_distinction(\n    path: str,\n    expected_branch: str,\n    stub_branches: Callable[[list[str]], None],\n    stub_resolve_sha: dict[str, AsyncMock],\n    mock_name: str,\n) -> None:\n    \"\"\"Test ``parse_remote_repo`` distinguishing branch vs. commit hash.\n\n    Given either a branch URL (e.g., \".../tree/main\") or a 40-character commit URL:\n    When ``parse_remote_repo`` is called with branch fetching,\n    Then the function should correctly set ``branch`` or ``commit`` based on the URL content.\n    \"\"\"\n    stub_branches([\"main\", \"dev\", \"feature-branch\"])\n\n    url = DEMO_URL + path\n    query = await _assert_basic_repo_fields(url, stub_resolve_sha[mock_name])\n\n    assert query.branch == expected_branch\n    assert query.commit is not None\n    assert _is_valid_git_commit_hash(query.commit)\n\n\nasync def test_parse_local_dir_path_uuid_uniqueness() -> None:\n    \"\"\"Test ``parse_local_dir_path`` for unique UUID generation.\n\n    Given the same path twice:\n    When ``parse_local_dir_path`` is called repeatedly,\n    Then each call should produce a different query id.\n    \"\"\"\n    path = \"/home/user/project\"\n    query_1 = parse_local_dir_path(path)\n    query_2 = parse_local_dir_path(path)\n\n    assert query_1.id != query_2.id\n\n\n@pytest.mark.asyncio\nasync def test_parse_url_with_query_and_fragment(stub_resolve_sha: dict[str, AsyncMock]) -> None:\n    \"\"\"Test ``parse_remote_repo`` with query parameters and a fragment.\n\n    Given a URL like \"https://github.com/user/repo?arg=value#fragment\":\n    When ``parse_remote_repo`` is called,\n    Then those parts should be stripped, leaving a clean user/repo URL.\n    \"\"\"\n    url = DEMO_URL + \"?arg=value#fragment\"\n    query = await parse_remote_repo(url)\n\n    stub_resolve_sha[\"head\"].assert_awaited_once()\n    assert query.user_name == \"user\"\n    assert query.repo_name == \"repo\"\n    assert query.url == DEMO_URL  # URL should be cleaned\n\n\n@pytest.mark.asyncio\nasync def test_parse_url_unsupported_host(stub_resolve_sha: dict[str, AsyncMock]) -> None:\n    \"\"\"Test ``parse_remote_repo`` with an unsupported host.\n\n    Given \"https://only-domain.com\":\n    When ``parse_remote_repo`` is called,\n    Then a ValueError should be raised for the unknown domain.\n    \"\"\"\n    url = \"https://only-domain.com\"\n\n    with pytest.raises(ValueError, match=\"Unknown domain 'only-domain.com' in URL\"):\n        await parse_remote_repo(url)\n\n    stub_resolve_sha[\"head\"].assert_not_awaited()\n\n\n@pytest.mark.asyncio\nasync def test_parse_query_with_branch() -> None:\n    \"\"\"Test ``parse_remote_repo`` when a branch is specified in a blob path.\n\n    Given \"https://github.com/pandas-dev/pandas/blob/2.2.x/...\":\n    When ``parse_remote_repo`` is called,\n    Then the branch should be identified, subpath set, and commit remain None.\n    \"\"\"\n    url = \"https://github.com/pandas-dev/pandas/blob/2.2.x/.github/ISSUE_TEMPLATE/documentation_improvement.yaml\"\n    query = await parse_remote_repo(url)\n\n    assert query.user_name == \"pandas-dev\"\n    assert query.repo_name == \"pandas\"\n    assert query.url == \"https://github.com/pandas-dev/pandas\"\n    assert query.slug == \"pandas-dev-pandas\"\n    assert query.id is not None\n    assert query.subpath == \"/.github/ISSUE_TEMPLATE/documentation_improvement.yaml\"\n    assert query.branch == \"2.2.x\"\n    assert query.commit is not None\n    assert _is_valid_git_commit_hash(query.commit)\n    assert query.type == \"blob\"\n\n\n@pytest.mark.asyncio\n@pytest.mark.parametrize(\n    (\"path\", \"expected_branch\", \"expected_subpath\", \"mock_name\"),\n    [\n        (\"/tree/feature/fix1/src\", \"feature/fix1\", \"/src\", \"ref\"),\n        (\"/tree/main/src\", \"main\", \"/src\", \"ref\"),\n        (\"\", None, \"/\", \"head\"),\n        (\"/tree/nonexistent-branch/src\", None, \"/\", \"ref\"),\n        (\"/tree/fix\", \"fix\", \"/\", \"ref\"),\n        (\"/blob/fix/page.html\", \"fix\", \"/page.html\", \"ref\"),\n    ],\n)\nasync def test_parse_repo_source_with_various_url_patterns(\n    path: str,\n    expected_branch: str | None,\n    expected_subpath: str,\n    stub_branches: Callable[[list[str]], None],\n    stub_resolve_sha: dict[str, AsyncMock],\n    mock_name: str,\n) -> None:\n    \"\"\"Test ``parse_remote_repo`` with various GitHub-style URL permutations.\n\n    Given various GitHub-style URL permutations:\n    When ``parse_remote_repo`` is called,\n    Then it should detect (or reject) a branch and resolve the sub-path.\n\n    Branch discovery is stubbed so that only names passed to ``stub_branches`` are considered \"remote\".\n    \"\"\"\n    stub_branches([\"feature/fix1\", \"main\", \"feature-branch\", \"fix\"])\n\n    url = DEMO_URL + path\n    query = await _assert_basic_repo_fields(url, stub_resolve_sha[mock_name])\n\n    assert query.branch == expected_branch\n    assert query.subpath == expected_subpath\n\n\n@pytest.mark.asyncio\nasync def _assert_basic_repo_fields(url: str, sha_mock: AsyncMock) -> IngestionQuery:\n    \"\"\"Run ``parse_remote_repo`` and assert user, repo and slug are parsed.\"\"\"\n    query = await parse_remote_repo(url)\n\n    assert query.commit is not None\n    assert _is_valid_git_commit_hash(query.commit)\n\n    if query.commit in url:\n        sha_mock.assert_not_awaited()\n    else:\n        sha_mock.assert_awaited_once()\n\n    assert query.user_name == \"user\"\n    assert query.repo_name == \"repo\"\n    assert query.slug == \"user-repo\"\n\n    return query\n"
  },
  {
    "path": "tests/server/__init__.py",
    "content": "\"\"\"Tests for the server.\"\"\"\n"
  },
  {
    "path": "tests/server/test_flow_integration.py",
    "content": "\"\"\"Integration tests covering core functionalities, edge cases, and concurrency handling.\"\"\"\n\nimport shutil\nimport sys\nfrom concurrent.futures import ThreadPoolExecutor\nfrom pathlib import Path\nfrom typing import Generator\n\nimport pytest\nfrom fastapi import status\nfrom fastapi.testclient import TestClient\nfrom pytest_mock import MockerFixture\n\nfrom src.server.main import app\n\nBASE_DIR = Path(__file__).resolve().parent.parent\nTEMPLATE_DIR = BASE_DIR / \"src\" / \"templates\"\n\n\n@pytest.fixture(scope=\"module\")\ndef test_client() -> Generator[TestClient, None, None]:\n    \"\"\"Create a test client fixture.\"\"\"\n    with TestClient(app) as client_instance:\n        client_instance.headers.update({\"Host\": \"localhost\"})\n        yield client_instance\n\n\n@pytest.fixture(autouse=True)\ndef mock_static_files(mocker: MockerFixture) -> None:\n    \"\"\"Mock the static file mount to avoid directory errors.\"\"\"\n    mock_static = mocker.patch(\"src.server.main.StaticFiles\", autospec=True)\n    mock_static.return_value = None\n    return mock_static\n\n\n@pytest.fixture(scope=\"module\", autouse=True)\ndef cleanup_tmp_dir() -> Generator[None, None, None]:\n    \"\"\"Remove ``/tmp/gitingest`` after this test-module is done.\"\"\"\n    yield  # run tests\n    temp_dir = Path(\"/tmp/gitingest\")\n    if temp_dir.exists():\n        try:\n            shutil.rmtree(temp_dir)\n        except PermissionError as exc:\n            sys.stderr.write(f\"Error cleaning up {temp_dir}: {exc}\\n\")\n\n\n@pytest.mark.asyncio\nasync def test_remote_repository_analysis(request: pytest.FixtureRequest) -> None:\n    \"\"\"Test the complete flow of analyzing a remote repository.\"\"\"\n    client = request.getfixturevalue(\"test_client\")\n    form_data = {\n        \"input_text\": \"https://github.com/octocat/Hello-World\",\n        \"max_file_size\": 243,\n        \"pattern_type\": \"exclude\",\n        \"pattern\": \"\",\n        \"token\": \"\",\n    }\n\n    response = client.post(\"/api/ingest\", json=form_data)\n    assert response.status_code == status.HTTP_200_OK, f\"Form submission failed: {response.text}\"\n\n    # Check that response is JSON\n    response_data = response.json()\n    assert \"content\" in response_data\n    assert response_data[\"content\"]\n    assert \"repo_url\" in response_data\n    assert \"summary\" in response_data\n    assert \"tree\" in response_data\n    assert \"content\" in response_data\n\n\n@pytest.mark.asyncio\nasync def test_invalid_repository_url(request: pytest.FixtureRequest) -> None:\n    \"\"\"Test handling of an invalid repository URL.\"\"\"\n    client = request.getfixturevalue(\"test_client\")\n    form_data = {\n        \"input_text\": \"https://github.com/nonexistent/repo\",\n        \"max_file_size\": 243,\n        \"pattern_type\": \"exclude\",\n        \"pattern\": \"\",\n        \"token\": \"\",\n    }\n\n    response = client.post(\"/api/ingest\", json=form_data)\n    # Should return 400 for invalid repository\n    assert response.status_code == status.HTTP_400_BAD_REQUEST, f\"Request failed: {response.text}\"\n\n    # Check that response is JSON error\n    response_data = response.json()\n    assert \"error\" in response_data\n\n\n@pytest.mark.asyncio\nasync def test_large_repository(request: pytest.FixtureRequest) -> None:\n    \"\"\"Simulate analysis of a large repository with nested folders.\"\"\"\n    client = request.getfixturevalue(\"test_client\")\n    # TODO: ingesting a large repo take too much time (eg: godotengine/godot repository)\n    form_data = {\n        \"input_text\": \"https://github.com/octocat/hello-world\",\n        \"max_file_size\": 10,\n        \"pattern_type\": \"exclude\",\n        \"pattern\": \"\",\n        \"token\": \"\",\n    }\n\n    response = client.post(\"/api/ingest\", json=form_data)\n    assert response.status_code == status.HTTP_200_OK, f\"Request failed: {response.text}\"\n\n    response_data = response.json()\n    if response.status_code == status.HTTP_200_OK:\n        assert \"content\" in response_data\n        assert response_data[\"content\"]\n    else:\n        assert \"error\" in response_data\n\n\n@pytest.mark.asyncio\nasync def test_concurrent_requests(request: pytest.FixtureRequest) -> None:\n    \"\"\"Test handling of multiple concurrent requests.\"\"\"\n    client = request.getfixturevalue(\"test_client\")\n\n    def make_request() -> None:\n        form_data = {\n            \"input_text\": \"https://github.com/octocat/hello-world\",\n            \"max_file_size\": 243,\n            \"pattern_type\": \"exclude\",\n            \"pattern\": \"\",\n            \"token\": \"\",\n        }\n        response = client.post(\"/api/ingest\", json=form_data)\n        assert response.status_code == status.HTTP_200_OK, f\"Request failed: {response.text}\"\n\n        response_data = response.json()\n        if response.status_code == status.HTTP_200_OK:\n            assert \"content\" in response_data\n            assert response_data[\"content\"]\n        else:\n            assert \"error\" in response_data\n\n    with ThreadPoolExecutor(max_workers=5) as executor:\n        futures = [executor.submit(make_request) for _ in range(5)]\n        for future in futures:\n            future.result()\n\n\n@pytest.mark.asyncio\nasync def test_large_file_handling(request: pytest.FixtureRequest) -> None:\n    \"\"\"Test handling of repositories with large files.\"\"\"\n    client = request.getfixturevalue(\"test_client\")\n    form_data = {\n        \"input_text\": \"https://github.com/octocat/Hello-World\",\n        \"max_file_size\": 1,\n        \"pattern_type\": \"exclude\",\n        \"pattern\": \"\",\n        \"token\": \"\",\n    }\n\n    response = client.post(\"/api/ingest\", json=form_data)\n    assert response.status_code == status.HTTP_200_OK, f\"Request failed: {response.text}\"\n\n    response_data = response.json()\n    if response.status_code == status.HTTP_200_OK:\n        assert \"content\" in response_data\n        assert response_data[\"content\"]\n    else:\n        assert \"error\" in response_data\n\n\n@pytest.mark.asyncio\nasync def test_repository_with_patterns(request: pytest.FixtureRequest) -> None:\n    \"\"\"Test repository analysis with include/exclude patterns.\"\"\"\n    client = request.getfixturevalue(\"test_client\")\n    form_data = {\n        \"input_text\": \"https://github.com/octocat/Hello-World\",\n        \"max_file_size\": 243,\n        \"pattern_type\": \"include\",\n        \"pattern\": \"*.md\",\n        \"token\": \"\",\n    }\n\n    response = client.post(\"/api/ingest\", json=form_data)\n    assert response.status_code == status.HTTP_200_OK, f\"Request failed: {response.text}\"\n\n    response_data = response.json()\n    if response.status_code == status.HTTP_200_OK:\n        assert \"content\" in response_data\n        assert \"pattern_type\" in response_data\n        assert response_data[\"pattern_type\"] == \"include\"\n        assert \"pattern\" in response_data\n        assert response_data[\"pattern\"] == \"*.md\"\n    else:\n        assert \"error\" in response_data\n"
  },
  {
    "path": "tests/test_cli.py",
    "content": "\"\"\"Tests for the Gitingest CLI.\"\"\"\n\nfrom __future__ import annotations\n\nfrom inspect import signature\nfrom pathlib import Path\n\nimport pytest\nfrom click.testing import CliRunner, Result\n\nfrom gitingest.__main__ import main\nfrom gitingest.config import MAX_FILE_SIZE, OUTPUT_FILE_NAME\n\n\n@pytest.mark.parametrize(\n    (\"cli_args\", \"expect_file\"),\n    [\n        pytest.param([\"./\"], True, id=\"default-options\"),\n        pytest.param(\n            [\n                \"./\",\n                \"--output\",\n                str(OUTPUT_FILE_NAME),\n                \"--max-size\",\n                str(MAX_FILE_SIZE),\n                \"--exclude-pattern\",\n                \"tests/\",\n                \"--include-pattern\",\n                \"src/\",\n                \"--include-submodules\",\n            ],\n            True,\n            id=\"custom-options\",\n        ),\n    ],\n)\ndef test_cli_writes_file(\n    tmp_path: Path,\n    monkeypatch: pytest.MonkeyPatch,\n    *,\n    cli_args: list[str],\n    expect_file: bool,\n) -> None:\n    \"\"\"Run the CLI and verify that the SARIF file is created (or not).\"\"\"\n    expectes_exit_code = 0\n    # Work inside an isolated temp directory\n    monkeypatch.chdir(tmp_path)\n\n    result = _invoke_isolated_cli_runner(cli_args)\n\n    assert result.exit_code == expectes_exit_code, result.stderr\n\n    # Summary line should be on STDOUT\n    stdout_lines = result.stdout.splitlines()\n    assert f\"Analysis complete! Output written to: {OUTPUT_FILE_NAME}\" in stdout_lines\n\n    # File side-effect\n    sarif_file = tmp_path / OUTPUT_FILE_NAME\n    assert sarif_file.exists() is expect_file, f\"{OUTPUT_FILE_NAME} existence did not match expectation\"\n\n\ndef test_cli_with_stdout_output() -> None:\n    \"\"\"Test CLI invocation with output directed to STDOUT.\"\"\"\n    output_file = Path(OUTPUT_FILE_NAME)\n    # Clean up any existing digest.txt file before test\n    if output_file.exists():\n        output_file.unlink()\n\n    try:\n        result = _invoke_isolated_cli_runner([\"./\", \"--output\", \"-\", \"--exclude-pattern\", \"tests/\"])\n\n        # ─── core expectations (stdout) ────────────────────────────────────-\n        assert result.exit_code == 0, f\"CLI exited with code {result.exit_code}, stderr: {result.stderr}\"\n        assert \"---\" in result.stdout, \"Expected file separator '---' not found in STDOUT\"\n        assert \"src/gitingest/__main__.py\" in result.stdout, (\n            \"Expected content (e.g., src/gitingest/__main__.py) not found in STDOUT\"\n        )\n        assert not output_file.exists(), f\"Output file {output_file} was unexpectedly created.\"\n\n        # ─── the summary must *not* pollute STDOUT, must appear on STDERR ───\n        summary = \"Analysis complete! Output sent to stdout.\"\n        stdout_lines = result.stdout.splitlines()\n        stderr_lines = result.stderr.splitlines()\n        assert summary not in stdout_lines, \"Unexpected summary message found in STDOUT\"\n        assert summary in stderr_lines, \"Expected summary message not found in STDERR\"\n        assert f\"Output written to: {output_file.name}\" not in stderr_lines\n    finally:\n        # Clean up any digest.txt file that might have been created during test\n        if output_file.exists():\n            output_file.unlink()\n\n\ndef _invoke_isolated_cli_runner(args: list[str]) -> Result:\n    \"\"\"Return a ``CliRunner`` that keeps ``stderr`` separate on Click 8.0-8.1.\"\"\"\n    kwargs = {}\n    if \"mix_stderr\" in signature(CliRunner.__init__).parameters:\n        kwargs[\"mix_stderr\"] = False  # Click 8.0-8.1\n    runner = CliRunner(**kwargs)\n    return runner.invoke(main, args)\n"
  },
  {
    "path": "tests/test_clone.py",
    "content": "\"\"\"Tests for the ``clone`` module.\n\nThese tests cover various scenarios for cloning repositories, verifying that the appropriate Git commands are invoked\nand handling edge cases such as nonexistent URLs, timeouts, redirects, and specific commits or branches.\n\"\"\"\n\nfrom __future__ import annotations\n\nimport sys\nfrom typing import TYPE_CHECKING\n\nimport pytest\n\nfrom gitingest.clone import clone_repo\nfrom gitingest.schemas import CloneConfig\nfrom gitingest.utils.git_utils import check_repo_exists\nfrom tests.conftest import DEMO_URL, LOCAL_REPO_PATH\n\nif TYPE_CHECKING:\n    from pathlib import Path\n    from unittest.mock import AsyncMock\n\n    from pytest_mock import MockerFixture\n\n\n# All cloning-related tests assume (unless explicitly overridden) that the repository exists.\n# Apply the check-repo patch automatically so individual tests don't need to repeat it.\npytestmark = pytest.mark.usefixtures(\"repo_exists_true\")\n\nGIT_INSTALLED_CALLS = 2 if sys.platform == \"win32\" else 1\n\n\n@pytest.mark.asyncio\nasync def test_clone_with_commit(repo_exists_true: AsyncMock, gitpython_mocks: dict) -> None:\n    \"\"\"Test cloning a repository with a specific commit hash.\n\n    Given a valid URL and a commit hash:\n    When ``clone_repo`` is called,\n    Then the repository should be cloned and checked out at that commit.\n    \"\"\"\n    commit_hash = \"a\" * 40  # Simulating a valid commit hash\n    clone_config = CloneConfig(\n        url=DEMO_URL,\n        local_path=LOCAL_REPO_PATH,\n        commit=commit_hash,\n        branch=\"main\",\n    )\n\n    await clone_repo(clone_config)\n\n    repo_exists_true.assert_any_call(clone_config.url, token=None)\n\n    # Verify GitPython calls were made\n    mock_git_cmd = gitpython_mocks[\"git_cmd\"]\n    mock_repo = gitpython_mocks[\"repo\"]\n    mock_clone_from = gitpython_mocks[\"clone_from\"]\n\n    # Should have called version (for ensure_git_installed)\n    mock_git_cmd.version.assert_called()\n\n    # Should have called clone_from (since partial_clone=False)\n    mock_clone_from.assert_called_once()\n\n    # Should have called fetch and checkout on the repo\n    mock_repo.git.fetch.assert_called()\n    mock_repo.git.checkout.assert_called_with(commit_hash)\n\n\n@pytest.mark.asyncio\nasync def test_clone_nonexistent_repository(repo_exists_true: AsyncMock) -> None:\n    \"\"\"Test cloning a nonexistent repository URL.\n\n    Given an invalid or nonexistent URL:\n    When ``clone_repo`` is called,\n    Then a ValueError should be raised with an appropriate error message.\n    \"\"\"\n    clone_config = CloneConfig(\n        url=\"https://github.com/user/nonexistent-repo\",\n        local_path=LOCAL_REPO_PATH,\n        commit=None,\n        branch=\"main\",\n    )\n    # Override the default fixture behaviour for this test\n    repo_exists_true.return_value = False\n\n    with pytest.raises(ValueError, match=\"Repository not found\"):\n        await clone_repo(clone_config)\n\n    repo_exists_true.assert_any_call(clone_config.url, token=None)\n\n\n@pytest.mark.asyncio\n@pytest.mark.parametrize(\n    (\"git_command_succeeds\", \"expected\"),\n    [\n        (True, True),  # git ls-remote succeeds -> repo exists\n        (False, False),  # git ls-remote fails -> repo doesn't exist or no access\n    ],\n)\nasync def test_check_repo_exists(\n    git_command_succeeds: bool,  # noqa: FBT001\n    *,\n    expected: bool,\n    mocker: MockerFixture,\n) -> None:\n    \"\"\"Verify that ``check_repo_exists`` works by using _resolve_ref_to_sha.\"\"\"\n    mock_resolve = mocker.patch(\"gitingest.utils.git_utils._resolve_ref_to_sha\")\n\n    if git_command_succeeds:\n        mock_resolve.return_value = \"abc123def456\"  # Mock SHA\n    else:\n        mock_resolve.side_effect = ValueError(\"Repository not found\")\n\n    result = await check_repo_exists(DEMO_URL)\n\n    assert result is expected\n    mock_resolve.assert_called_once_with(DEMO_URL, \"HEAD\", token=None)\n\n\n@pytest.mark.asyncio\nasync def test_clone_without_commit(repo_exists_true: AsyncMock, gitpython_mocks: dict) -> None:\n    \"\"\"Test cloning a repository when no commit hash is provided.\n\n    Given a valid URL and no commit hash:\n    When ``clone_repo`` is called,\n    Then the repository should be cloned and checked out at the resolved commit.\n    \"\"\"\n    clone_config = CloneConfig(url=DEMO_URL, local_path=LOCAL_REPO_PATH, commit=None, branch=\"main\")\n\n    await clone_repo(clone_config)\n\n    repo_exists_true.assert_any_call(clone_config.url, token=None)\n\n    # Verify GitPython calls were made\n    mock_git_cmd = gitpython_mocks[\"git_cmd\"]\n    mock_repo = gitpython_mocks[\"repo\"]\n    mock_clone_from = gitpython_mocks[\"clone_from\"]\n\n    # Should have resolved the commit via ls_remote\n    mock_git_cmd.ls_remote.assert_called()\n    # Should have cloned the repo\n    mock_clone_from.assert_called_once()\n    # Should have fetched and checked out\n    mock_repo.git.fetch.assert_called()\n    mock_repo.git.checkout.assert_called()\n\n\n@pytest.mark.asyncio\nasync def test_clone_creates_parent_directory(tmp_path: Path, gitpython_mocks: dict) -> None:\n    \"\"\"Test that ``clone_repo`` creates parent directories if they don't exist.\n\n    Given a local path with non-existent parent directories:\n    When ``clone_repo`` is called,\n    Then it should create the parent directories before attempting to clone.\n    \"\"\"\n    nested_path = tmp_path / \"deep\" / \"nested\" / \"path\" / \"repo\"\n    clone_config = CloneConfig(url=DEMO_URL, local_path=str(nested_path))\n\n    await clone_repo(clone_config)\n\n    # Verify parent directories were created\n    assert nested_path.parent.exists()\n\n    # Verify clone operation happened\n    mock_clone_from = gitpython_mocks[\"clone_from\"]\n    mock_clone_from.assert_called_once()\n\n\n@pytest.mark.asyncio\nasync def test_clone_with_specific_subpath(gitpython_mocks: dict) -> None:\n    \"\"\"Test cloning a repository with a specific subpath.\n\n    Given a valid repository URL and a specific subpath:\n    When ``clone_repo`` is called,\n    Then the repository should be cloned with sparse checkout enabled.\n    \"\"\"\n    subpath = \"src/docs\"\n    clone_config = CloneConfig(url=DEMO_URL, local_path=LOCAL_REPO_PATH, subpath=subpath)\n\n    await clone_repo(clone_config)\n\n    # Verify partial clone (using git.clone instead of Repo.clone_from)\n    mock_git_cmd = gitpython_mocks[\"git_cmd\"]\n    mock_git_cmd.clone.assert_called()\n\n    # Verify sparse checkout was configured\n    mock_repo = gitpython_mocks[\"repo\"]\n    mock_repo.git.sparse_checkout.assert_called()\n\n\n@pytest.mark.asyncio\nasync def test_clone_with_include_submodules(gitpython_mocks: dict) -> None:\n    \"\"\"Test cloning a repository with submodules included.\n\n    Given a valid URL and ``include_submodules=True``:\n    When ``clone_repo`` is called,\n    Then the repository should update submodules after cloning.\n    \"\"\"\n    clone_config = CloneConfig(url=DEMO_URL, local_path=LOCAL_REPO_PATH, branch=\"main\", include_submodules=True)\n\n    await clone_repo(clone_config)\n\n    # Verify submodule update was called\n    mock_repo = gitpython_mocks[\"repo\"]\n    mock_repo.git.submodule.assert_called_with(\"update\", \"--init\", \"--recursive\", \"--depth=1\")\n\n\n@pytest.mark.asyncio\nasync def test_check_repo_exists_with_auth_token(mocker: MockerFixture) -> None:\n    \"\"\"Test ``check_repo_exists`` with authentication token.\n\n    Given a GitHub URL and a token:\n    When ``check_repo_exists`` is called,\n    Then it should pass the token to _resolve_ref_to_sha.\n    \"\"\"\n    mock_resolve = mocker.patch(\"gitingest.utils.git_utils._resolve_ref_to_sha\")\n    mock_resolve.return_value = \"abc123def456\"  # Mock SHA\n\n    test_token = \"token123\"  # noqa: S105\n    result = await check_repo_exists(\"https://github.com/test/repo\", token=test_token)\n\n    assert result is True\n    mock_resolve.assert_called_once_with(\"https://github.com/test/repo\", \"HEAD\", token=test_token)\n"
  },
  {
    "path": "tests/test_git_utils.py",
    "content": "\"\"\"Tests for the ``git_utils`` module.\n\nThese tests validate the ``validate_github_token`` function, which ensures that\nGitHub personal access tokens (PATs) are properly formatted.\n\"\"\"\n\nfrom __future__ import annotations\n\nimport base64\nfrom typing import TYPE_CHECKING\n\nimport pytest\n\nfrom gitingest.utils.exceptions import InvalidGitHubTokenError\nfrom gitingest.utils.git_utils import create_git_auth_header, create_git_repo, is_github_host, validate_github_token\n\nif TYPE_CHECKING:\n    from pathlib import Path\n\n    from pytest_mock import MockerFixture\n\n\n@pytest.mark.parametrize(\n    \"token\",\n    [\n        # Valid tokens: correct prefixes and at least 36 allowed characters afterwards\n        \"github_pat_\" + \"a\" * 22 + \"_\" + \"b\" * 59,\n        \"ghp_\" + \"A\" * 36,\n        \"ghu_\" + \"B\" * 36,\n        \"ghs_\" + \"C\" * 36,\n        \"ghr_\" + \"D\" * 36,\n        \"gho_\" + \"E\" * 36,\n    ],\n)\ndef test_validate_github_token_valid(token: str) -> None:\n    \"\"\"validate_github_token should accept properly-formatted tokens.\"\"\"\n    # Should not raise any exception\n    validate_github_token(token)\n\n\n@pytest.mark.parametrize(\n    \"token\",\n    [\n        \"github_pat_short\",  # Too short after prefix\n        \"ghp_\" + \"b\" * 35,  # one character short\n        \"invalidprefix_\" + \"c\" * 36,  # Wrong prefix\n        \"github_pat_\" + \"!\" * 36,  # Disallowed characters\n        \"github_pat_\" + \"a\" * 36,  # Too short after 'github_pat_' prefix\n        \"\",  # Empty string\n    ],\n)\ndef test_validate_github_token_invalid(token: str) -> None:\n    \"\"\"Test that ``validate_github_token`` raises ``InvalidGitHubTokenError`` on malformed tokens.\"\"\"\n    with pytest.raises(InvalidGitHubTokenError):\n        validate_github_token(token)\n\n\n@pytest.mark.parametrize(\n    (\"local_path\", \"url\", \"token\", \"should_configure_auth\"),\n    [\n        (\n            \"/some/path\",\n            \"https://github.com/owner/repo.git\",\n            None,\n            False,  # No auth configuration expected when token is None\n        ),\n        (\n            \"/some/path\",\n            \"https://github.com/owner/repo.git\",\n            \"ghp_\" + \"d\" * 36,\n            True,  # Auth configuration expected for GitHub URL + token\n        ),\n        (\n            \"/some/path\",\n            \"https://gitlab.com/owner/repo.git\",\n            \"ghp_\" + \"e\" * 36,\n            False,  # No auth configuration for non-GitHub URL even if token provided\n        ),\n    ],\n)\ndef test_create_git_repo(\n    local_path: str,\n    url: str,\n    token: str | None,\n    should_configure_auth: bool,  # noqa: FBT001\n    mocker: MockerFixture,\n) -> None:\n    \"\"\"Test that ``create_git_repo`` creates a proper Git repo object.\"\"\"\n    # Mock git.Repo to avoid actual filesystem operations\n    mock_repo = mocker.MagicMock()\n    mock_repo_class = mocker.patch(\"git.Repo\", return_value=mock_repo)\n\n    repo = create_git_repo(local_path, url, token)\n\n    # Should create repo with correct path\n    mock_repo_class.assert_called_once_with(local_path)\n    assert repo == mock_repo\n\n    # Check auth configuration\n    if should_configure_auth:\n        mock_repo.git.config.assert_called_once()\n    else:\n        mock_repo.git.config.assert_not_called()\n\n\n@pytest.mark.parametrize(\n    \"token\",\n    [\n        \"ghp_abcdefghijklmnopqrstuvwxyz012345\",  # typical ghp_ token\n        \"github_pat_1234567890abcdef1234567890abcdef1234\",\n    ],\n)\ndef test_create_git_auth_header(token: str) -> None:\n    \"\"\"Test that ``create_git_auth_header`` produces correct base64-encoded header.\"\"\"\n    header = create_git_auth_header(token)\n    expected_basic = base64.b64encode(f\"x-oauth-basic:{token}\".encode()).decode()\n    expected = f\"http.https://github.com/.extraheader=Authorization: Basic {expected_basic}\"\n    assert header == expected\n\n\n@pytest.mark.parametrize(\n    (\"url\", \"token\", \"should_call\"),\n    [\n        (\"https://github.com/foo/bar.git\", \"ghp_\" + \"f\" * 36, True),\n        (\"https://github.com/foo/bar.git\", None, False),\n        (\"https://gitlab.com/foo/bar.git\", \"ghp_\" + \"g\" * 36, False),\n    ],\n)\ndef test_create_git_repo_helper_calls(\n    mocker: MockerFixture,\n    tmp_path: Path,\n    *,\n    url: str,\n    token: str | None,\n    should_call: bool,\n) -> None:\n    \"\"\"Test that ``create_git_auth_header`` is invoked only when appropriate.\"\"\"\n    work_dir = tmp_path / \"repo\"\n    header_mock = mocker.patch(\"gitingest.utils.git_utils.create_git_auth_header\", return_value=\"key=value\")\n    mock_repo = mocker.MagicMock()\n    mocker.patch(\"git.Repo\", return_value=mock_repo)\n\n    create_git_repo(str(work_dir), url, token)\n\n    if should_call:\n        header_mock.assert_called_once_with(token, url=url)\n        mock_repo.git.config.assert_called_once_with(\"key\", \"value\")\n    else:\n        header_mock.assert_not_called()\n        mock_repo.git.config.assert_not_called()\n\n\n@pytest.mark.parametrize(\n    (\"url\", \"expected\"),\n    [\n        # GitHub.com URLs\n        (\"https://github.com/owner/repo.git\", True),\n        (\"http://github.com/owner/repo.git\", True),\n        (\"https://github.com/owner/repo\", True),\n        # GitHub Enterprise URLs\n        (\"https://github.company.com/owner/repo.git\", True),\n        (\"https://github.enterprise.org/owner/repo.git\", True),\n        (\"http://github.internal/owner/repo.git\", True),\n        (\"https://github.example.co.uk/owner/repo.git\", True),\n        # Non-GitHub URLs\n        (\"https://gitlab.com/owner/repo.git\", False),\n        (\"https://bitbucket.org/owner/repo.git\", False),\n        (\"https://git.example.com/owner/repo.git\", False),\n        (\"https://mygithub.com/owner/repo.git\", False),  # doesn't start with \"github.\"\n        (\"https://subgithub.com/owner/repo.git\", False),\n        (\"https://example.com/github/repo.git\", False),\n        # Edge cases\n        (\"\", False),\n        (\"not-a-url\", False),\n        (\"ftp://github.com/owner/repo.git\", True),  # Different protocol but still github.com\n    ],\n)\ndef test_is_github_host(url: str, *, expected: bool) -> None:\n    \"\"\"Test that ``is_github_host`` correctly identifies GitHub and GitHub Enterprise URLs.\"\"\"\n    assert is_github_host(url) == expected\n\n\n@pytest.mark.parametrize(\n    (\"token\", \"url\", \"expected_hostname\"),\n    [\n        # GitHub.com URLs (default)\n        (\"ghp_\" + \"a\" * 36, \"https://github.com\", \"github.com\"),\n        (\"ghp_\" + \"a\" * 36, \"https://github.com/owner/repo.git\", \"github.com\"),\n        # GitHub Enterprise URLs\n        (\"ghp_\" + \"b\" * 36, \"https://github.company.com\", \"github.company.com\"),\n        (\"ghp_\" + \"c\" * 36, \"https://github.enterprise.org/owner/repo.git\", \"github.enterprise.org\"),\n        (\"ghp_\" + \"d\" * 36, \"http://github.internal\", \"github.internal\"),\n    ],\n)\ndef test_create_git_auth_header_with_ghe_url(token: str, url: str, expected_hostname: str) -> None:\n    \"\"\"Test that ``create_git_auth_header`` handles GitHub Enterprise URLs correctly.\"\"\"\n    header = create_git_auth_header(token, url=url)\n    expected_basic = base64.b64encode(f\"x-oauth-basic:{token}\".encode()).decode()\n    expected = f\"http.https://{expected_hostname}/.extraheader=Authorization: Basic {expected_basic}\"\n    assert header == expected\n\n\n@pytest.mark.parametrize(\n    (\"local_path\", \"url\", \"token\", \"expected_auth_hostname\"),\n    [\n        # GitHub.com URLs - should use default hostname\n        (\n            \"/some/path\",\n            \"https://github.com/owner/repo.git\",\n            \"ghp_\" + \"a\" * 36,\n            \"github.com\",\n        ),\n        # GitHub Enterprise URLs - should use custom hostname\n        (\n            \"/some/path\",\n            \"https://github.company.com/owner/repo.git\",\n            \"ghp_\" + \"b\" * 36,\n            \"github.company.com\",\n        ),\n        (\n            \"/some/path\",\n            \"https://github.enterprise.org/owner/repo.git\",\n            \"ghp_\" + \"c\" * 36,\n            \"github.enterprise.org\",\n        ),\n        (\n            \"/some/path\",\n            \"http://github.internal/owner/repo.git\",\n            \"ghp_\" + \"d\" * 36,\n            \"github.internal\",\n        ),\n    ],\n)\ndef test_create_git_repo_with_ghe_urls(\n    local_path: str,\n    url: str,\n    token: str,\n    expected_auth_hostname: str,\n    mocker: MockerFixture,\n) -> None:\n    \"\"\"Test that ``create_git_repo`` handles GitHub Enterprise URLs correctly.\"\"\"\n    mock_repo = mocker.MagicMock()\n    mocker.patch(\"git.Repo\", return_value=mock_repo)\n\n    create_git_repo(local_path, url, token)\n\n    # Should configure auth with the correct hostname\n    mock_repo.git.config.assert_called_once()\n    auth_config_call = mock_repo.git.config.call_args[0]\n\n    # The first argument should contain the hostname\n    assert expected_auth_hostname in auth_config_call[0]\n\n\n@pytest.mark.parametrize(\n    (\"local_path\", \"url\", \"token\"),\n    [\n        # Should NOT configure auth for non-GitHub URLs\n        (\"/some/path\", \"https://gitlab.com/owner/repo.git\", \"ghp_\" + \"a\" * 36),\n        (\"/some/path\", \"https://bitbucket.org/owner/repo.git\", \"ghp_\" + \"b\" * 36),\n        (\"/some/path\", \"https://git.example.com/owner/repo.git\", \"ghp_\" + \"c\" * 36),\n    ],\n)\ndef test_create_git_repo_ignores_non_github_urls(\n    local_path: str,\n    url: str,\n    token: str,\n    mocker: MockerFixture,\n) -> None:\n    \"\"\"Test that ``create_git_repo`` does not configure auth for non-GitHub URLs.\"\"\"\n    mock_repo = mocker.MagicMock()\n    mocker.patch(\"git.Repo\", return_value=mock_repo)\n\n    create_git_repo(local_path, url, token)\n\n    # Should not configure auth for non-GitHub URLs\n    mock_repo.git.config.assert_not_called()\n"
  },
  {
    "path": "tests/test_gitignore_feature.py",
    "content": "\"\"\"Tests for the gitignore functionality in Gitingest.\"\"\"\n\nfrom pathlib import Path\n\nimport pytest\n\nfrom gitingest.entrypoint import ingest_async\nfrom gitingest.utils.ignore_patterns import load_ignore_patterns\n\n\n@pytest.fixture(name=\"repo_path\")\ndef repo_fixture(tmp_path: Path) -> Path:\n    \"\"\"Create a temporary repository structure.\n\n    The repository structure includes:\n    - A ``.gitignore`` that excludes ``exclude.txt``\n    - ``include.txt`` (should be processed)\n    - ``exclude.txt`` (should be skipped when gitignore rules are respected)\n    \"\"\"\n    # Create a .gitignore file that excludes 'exclude.txt'\n    gitignore_file = tmp_path / \".gitignore\"\n    gitignore_file.write_text(\"exclude.txt\\n\")\n\n    # Create a file that should be included\n    include_file = tmp_path / \"include.txt\"\n    include_file.write_text(\"This file should be included.\")\n\n    # Create a file that should be excluded\n    exclude_file = tmp_path / \"exclude.txt\"\n    exclude_file.write_text(\"This file should be excluded.\")\n\n    return tmp_path\n\n\ndef test_load_gitignore_patterns(tmp_path: Path) -> None:\n    \"\"\"Test that ``load_ignore_patterns()`` correctly loads patterns from a ``.gitignore`` file.\"\"\"\n    gitignore = tmp_path / \".gitignore\"\n    # Write some sample patterns with a comment line included\n    gitignore.write_text(\"exclude.txt\\n*.log\\n# a comment\\n\")\n\n    patterns = load_ignore_patterns(tmp_path, filename=\".gitignore\")\n\n    # Check that the expected patterns are loaded\n    assert \"exclude.txt\" in patterns\n    assert \"*.log\" in patterns\n    # Ensure that comment lines are not added\n    for pattern in patterns:\n        assert not pattern.startswith(\"#\")\n\n\n@pytest.mark.asyncio\nasync def test_ingest_with_gitignore(repo_path: Path) -> None:\n    \"\"\"Integration test for ``ingest_async()`` respecting ``.gitignore`` rules.\n\n    When ``include_gitignored`` is ``False`` (default), the content of ``exclude.txt`` should be omitted.\n    When ``include_gitignored`` is ``True``, both files should be present.\n    \"\"\"\n    # Run ingestion with the gitignore functionality enabled.\n    _, _, content_with_ignore = await ingest_async(source=str(repo_path))\n    # 'exclude.txt' should be skipped.\n    assert \"This file should be excluded.\" not in content_with_ignore\n    # 'include.txt' should be processed.\n    assert \"This file should be included.\" in content_with_ignore\n\n    # Run ingestion with the gitignore functionality disabled.\n    _, _, content_without_ignore = await ingest_async(source=str(repo_path), include_gitignored=True)\n    # Now both files should be present.\n    assert \"This file should be excluded.\" in content_without_ignore\n    assert \"This file should be included.\" in content_without_ignore\n"
  },
  {
    "path": "tests/test_ingestion.py",
    "content": "\"\"\"Tests for the ``query_ingestion`` module.\n\nThese tests validate directory scanning, file content extraction, notebook handling, and the overall ingestion logic,\nincluding filtering patterns and subpaths.\n\"\"\"\n\nfrom __future__ import annotations\n\nimport re\nfrom typing import TYPE_CHECKING, TypedDict\n\nimport pytest\n\nfrom gitingest.ingestion import ingest_query\n\nif TYPE_CHECKING:\n    from pathlib import Path\n\n    from gitingest.query_parser import IngestionQuery\n\n\ndef test_run_ingest_query(temp_directory: Path, sample_query: IngestionQuery) -> None:\n    \"\"\"Test ``ingest_query`` to ensure it processes the directory and returns expected results.\n\n    Given a directory with ``.txt`` and ``.py`` files:\n    When ``ingest_query`` is invoked,\n    Then it should produce a summary string listing the files analyzed and a combined content string.\n    \"\"\"\n    sample_query.local_path = temp_directory\n    sample_query.subpath = \"/\"\n    sample_query.type = None\n\n    summary, _, content = ingest_query(sample_query)\n\n    assert \"Repository: test_user/test_repo\" in summary\n    assert \"Files analyzed: 8\" in summary\n\n    # Check presence of key files in the content\n    assert \"src/subfile1.txt\" in content\n    assert \"src/subfile2.py\" in content\n    assert \"src/subdir/file_subdir.txt\" in content\n    assert \"src/subdir/file_subdir.py\" in content\n    assert \"file1.txt\" in content\n    assert \"file2.py\" in content\n    assert \"dir1/file_dir1.txt\" in content\n    assert \"dir2/file_dir2.txt\" in content\n\n\n# TODO: Additional tests:\n# - Multiple include patterns, e.g. [\"*.txt\", \"*.py\"] or [\"/src/*\", \"*.txt\"].\n# - Edge cases with weird file names or deep subdirectory structures.\n# TODO : def test_include_nonexistent_extension\n\n\nclass PatternScenario(TypedDict):\n    \"\"\"A scenario for testing the ingestion of a set of patterns.\"\"\"\n\n    include_patterns: set[str]\n    ignore_patterns: set[str]\n    expected_num_files: int\n    expected_content: set[str]\n    expected_structure: set[str]\n    expected_not_structure: set[str]\n\n\n@pytest.mark.parametrize(\n    \"pattern_scenario\",\n    [\n        pytest.param(\n            PatternScenario(\n                {\n                    \"include_patterns\": {\"file2.py\", \"dir2/file_dir2.txt\"},\n                    \"ignore_patterns\": {*()},\n                    \"expected_num_files\": 2,\n                    \"expected_content\": {\"file2.py\", \"dir2/file_dir2.txt\"},\n                    \"expected_structure\": {\"test_repo/\", \"dir2/\"},\n                    \"expected_not_structure\": {\"src/\", \"subdir/\", \"dir1/\"},\n                },\n            ),\n            id=\"include-explicit-files\",\n        ),\n        pytest.param(\n            PatternScenario(\n                {\n                    \"include_patterns\": {\n                        \"file1.txt\",\n                        \"file2.py\",\n                        \"file_dir1.txt\",\n                        \"*/file_dir2.txt\",\n                    },\n                    \"ignore_patterns\": {*()},\n                    \"expected_num_files\": 4,\n                    \"expected_content\": {\"file1.txt\", \"file2.py\", \"dir1/file_dir1.txt\", \"dir2/file_dir2.txt\"},\n                    \"expected_structure\": {\"test_repo/\", \"dir1/\", \"dir2/\"},\n                    \"expected_not_structure\": {\"src/\", \"subdir/\"},\n                },\n            ),\n            id=\"include-wildcard-directory\",\n        ),\n        pytest.param(\n            PatternScenario(\n                {\n                    \"include_patterns\": {\"*.py\"},\n                    \"ignore_patterns\": {*()},\n                    \"expected_num_files\": 3,\n                    \"expected_content\": {\n                        \"file2.py\",\n                        \"src/subfile2.py\",\n                        \"src/subdir/file_subdir.py\",\n                    },\n                    \"expected_structure\": {\"test_repo/\", \"src/\", \"subdir/\"},\n                    \"expected_not_structure\": {\"dir1/\", \"dir2/\"},\n                },\n            ),\n            id=\"include-wildcard-files\",\n        ),\n        pytest.param(\n            PatternScenario(\n                {\n                    \"include_patterns\": {\"**/file_dir2.txt\", \"src/**/*.py\"},\n                    \"ignore_patterns\": {*()},\n                    \"expected_num_files\": 3,\n                    \"expected_content\": {\n                        \"dir2/file_dir2.txt\",\n                        \"src/subfile2.py\",\n                        \"src/subdir/file_subdir.py\",\n                    },\n                    \"expected_structure\": {\"test_repo/\", \"dir2/\", \"src/\", \"subdir/\"},\n                    \"expected_not_structure\": {\"dir1/\"},\n                },\n            ),\n            id=\"include-recursive-wildcard\",\n        ),\n        pytest.param(\n            PatternScenario(\n                {\n                    \"include_patterns\": {*()},\n                    \"ignore_patterns\": {\"file2.py\", \"dir2/file_dir2.txt\"},\n                    \"expected_num_files\": 6,\n                    \"expected_content\": {\n                        \"file1.txt\",\n                        \"src/subfile1.txt\",\n                        \"src/subfile2.py\",\n                        \"src/subdir/file_subdir.txt\",\n                        \"src/subdir/file_subdir.py\",\n                        \"dir1/file_dir1.txt\",\n                    },\n                    \"expected_structure\": {\"test_repo/\", \"src/\", \"subdir/\", \"dir1/\"},\n                    \"expected_not_structure\": {\"dir2/\"},\n                },\n            ),\n            id=\"exclude-explicit-files\",\n        ),\n        pytest.param(\n            PatternScenario(\n                {\n                    \"include_patterns\": {*()},\n                    \"ignore_patterns\": {\"file1.txt\", \"file2.py\", \"*/file_dir1.txt\"},\n                    \"expected_num_files\": 5,\n                    \"expected_content\": {\n                        \"src/subfile1.txt\",\n                        \"src/subfile2.py\",\n                        \"src/subdir/file_subdir.txt\",\n                        \"src/subdir/file_subdir.py\",\n                        \"dir2/file_dir2.txt\",\n                    },\n                    \"expected_structure\": {\"test_repo/\", \"src/\", \"subdir/\", \"dir2/\"},\n                    \"expected_not_structure\": {\"dir1/\"},\n                },\n            ),\n            id=\"exclude-wildcard-directory\",\n        ),\n        pytest.param(\n            PatternScenario(\n                {\n                    \"include_patterns\": {*()},\n                    \"ignore_patterns\": {\"src/**/*.py\"},\n                    \"expected_num_files\": 6,\n                    \"expected_content\": {\n                        \"file1.txt\",\n                        \"file2.py\",\n                        \"src/subfile1.txt\",\n                        \"src/subdir/file_subdir.txt\",\n                        \"dir1/file_dir1.txt\",\n                        \"dir2/file_dir2.txt\",\n                    },\n                    \"expected_structure\": {\n                        \"test_repo/\",\n                        \"dir1/\",\n                        \"dir2/\",\n                        \"src/\",\n                        \"subdir/\",\n                    },\n                    \"expected_not_structure\": {*()},\n                },\n            ),\n            id=\"exclude-recursive-wildcard\",\n        ),\n    ],\n)\ndef test_include_ignore_patterns(\n    temp_directory: Path,\n    sample_query: IngestionQuery,\n    pattern_scenario: PatternScenario,\n) -> None:\n    \"\"\"Test ``ingest_query`` to ensure included and ignored paths are included and ignored respectively.\n\n    Given a directory with ``.txt`` and ``.py`` files, and a set of include patterns or a set of ignore patterns:\n    When ``ingest_query`` is invoked,\n    Then it should produce a summary string listing the files analyzed and a combined content string.\n    \"\"\"\n    sample_query.local_path = temp_directory\n    sample_query.subpath = \"/\"\n    sample_query.type = None\n    sample_query.include_patterns = pattern_scenario[\"include_patterns\"]\n    sample_query.ignore_patterns = pattern_scenario[\"ignore_patterns\"]\n\n    summary, structure, content = ingest_query(sample_query)\n\n    assert \"Repository: test_user/test_repo\" in summary\n    num_files_regex = re.compile(r\"^Files analyzed: (\\d+)$\", re.MULTILINE)\n    assert (num_files_match := num_files_regex.search(summary)) is not None\n    assert int(num_files_match.group(1)) == pattern_scenario[\"expected_num_files\"]\n\n    # Check presence of key files in the content\n    for expected_content_item in pattern_scenario[\"expected_content\"]:\n        assert expected_content_item in content\n\n    # check presence of included directories in structure\n    for expected_structure_item in pattern_scenario[\"expected_structure\"]:\n        assert expected_structure_item in structure\n\n    # check non-presence of non-included directories in structure\n    for expected_not_structure_item in pattern_scenario[\"expected_not_structure\"]:\n        assert expected_not_structure_item not in structure\n"
  },
  {
    "path": "tests/test_notebook_utils.py",
    "content": "\"\"\"Tests for the ``notebook`` utils module.\n\nThese tests validate how notebooks are processed into Python-like output, ensuring that markdown/raw cells are\nconverted to triple-quoted blocks, code cells remain executable code, and various edge cases (multiple worksheets,\nempty cells, outputs, etc.) are handled appropriately.\n\"\"\"\n\nimport pytest\n\nfrom gitingest.utils.notebook import process_notebook\nfrom tests.conftest import WriteNotebookFunc\n\n\ndef test_process_notebook_all_cells(write_notebook: WriteNotebookFunc) -> None:\n    \"\"\"Test processing a notebook containing markdown, code, and raw cells.\n\n    Given a notebook with:\n      - One markdown cell\n      - One code cell\n      - One raw cell\n    When ``process_notebook`` is invoked,\n    Then markdown and raw cells should appear in triple-quoted blocks, and code cells remain as normal code.\n    \"\"\"\n    expected_count = 4\n    notebook_content = {\n        \"cells\": [\n            {\"cell_type\": \"markdown\", \"source\": [\"# Markdown cell\"]},\n            {\"cell_type\": \"code\", \"source\": ['print(\"Hello Code\")']},\n            {\"cell_type\": \"raw\", \"source\": [\"<raw content>\"]},\n        ],\n    }\n    nb_path = write_notebook(\"all_cells.ipynb\", notebook_content)\n    result = process_notebook(nb_path)\n\n    assert result.count('\"\"\"') == expected_count, (\n        \"Two non-code cells => 2 triple-quoted blocks => 4 total triple quotes.\"\n    )\n\n    # Ensure markdown and raw cells are in triple quotes\n    assert \"# Markdown cell\" in result\n    assert \"<raw content>\" in result\n\n    # Ensure code cell is not in triple quotes\n    assert 'print(\"Hello Code\")' in result\n    assert '\"\"\"\\nprint(\"Hello Code\")\\n\"\"\"' not in result\n\n\ndef test_process_notebook_with_worksheets(write_notebook: WriteNotebookFunc) -> None:\n    \"\"\"Test a notebook containing the (as of IPEP-17 deprecated) ``worksheets`` key.\n\n    Given a notebook that uses the ``worksheets`` key with a single worksheet,\n    When ``process_notebook`` is called,\n    Then a ``DeprecationWarning`` should be raised, and the content should match an equivalent notebook\n    that has top-level ``cells``.\n    \"\"\"\n    with_worksheets = {\n        \"worksheets\": [\n            {\n                \"cells\": [\n                    {\"cell_type\": \"markdown\", \"source\": [\"# Markdown cell\"]},\n                    {\"cell_type\": \"code\", \"source\": ['print(\"Hello Code\")']},\n                    {\"cell_type\": \"raw\", \"source\": [\"<raw content>\"]},\n                ],\n            },\n        ],\n    }\n    without_worksheets = with_worksheets[\"worksheets\"][0]  # same, but no 'worksheets' key\n\n    nb_with = write_notebook(\"with_worksheets.ipynb\", with_worksheets)\n    nb_without = write_notebook(\"without_worksheets.ipynb\", without_worksheets)\n\n    result_with = process_notebook(nb_with)\n\n    # Should not raise a warning\n    result_without = process_notebook(nb_without)\n\n    assert result_with == result_without, \"Content from the single worksheet should match the top-level equivalent.\"\n\n\ndef test_process_notebook_multiple_worksheets(write_notebook: WriteNotebookFunc) -> None:\n    \"\"\"Test a notebook containing multiple ``worksheets``.\n\n    Given a notebook with two worksheets:\n      - First with a markdown cell\n      - Second with a code cell\n    When ``process_notebook`` is called,\n    Then a warning about multiple worksheets should be raised, and the second worksheet's content should appear\n    in the final output.\n    \"\"\"\n    multi_worksheets = {\n        \"worksheets\": [\n            {\"cells\": [{\"cell_type\": \"markdown\", \"source\": [\"# First Worksheet\"]}]},\n            {\"cells\": [{\"cell_type\": \"code\", \"source\": [\"# Second Worksheet\"]}]},\n        ],\n    }\n\n    single_worksheet = {\n        \"worksheets\": [\n            {\"cells\": [{\"cell_type\": \"markdown\", \"source\": [\"# First Worksheet\"]}]},\n        ],\n    }\n\n    nb_multi = write_notebook(\"multiple_worksheets.ipynb\", multi_worksheets)\n    nb_single = write_notebook(\"single_worksheet.ipynb\", single_worksheet)\n\n    result_multi = process_notebook(nb_multi)\n\n    result_single = process_notebook(nb_single)\n\n    assert result_multi != result_single, \"Two worksheets should produce more content than one.\"\n    assert len(result_multi) > len(result_single), \"The multi-worksheet notebook should have extra code content.\"\n    assert \"# First Worksheet\" in result_single\n    assert \"# Second Worksheet\" not in result_single\n    assert \"# First Worksheet\" in result_multi\n    assert \"# Second Worksheet\" in result_multi\n\n\ndef test_process_notebook_code_only(write_notebook: WriteNotebookFunc) -> None:\n    \"\"\"Test a notebook containing only code cells.\n\n    Given a notebook with code cells only:\n    When ``process_notebook`` is called,\n    Then no triple quotes should appear in the output.\n    \"\"\"\n    notebook_content = {\n        \"cells\": [\n            {\"cell_type\": \"code\", \"source\": [\"print('Code Cell 1')\"]},\n            {\"cell_type\": \"code\", \"source\": [\"x = 42\"]},\n        ],\n    }\n    nb_path = write_notebook(\"code_only.ipynb\", notebook_content)\n    result = process_notebook(nb_path)\n\n    assert '\"\"\"' not in result, \"No triple quotes expected when there are only code cells.\"\n    assert \"print('Code Cell 1')\" in result\n    assert \"x = 42\" in result\n\n\ndef test_process_notebook_markdown_only(write_notebook: WriteNotebookFunc) -> None:\n    \"\"\"Test a notebook with only markdown cells.\n\n    Given a notebook with two markdown cells:\n    When ``process_notebook`` is called,\n    Then each markdown cell should become a triple-quoted block (2 blocks => 4 triple quotes total).\n    \"\"\"\n    expected_count = 4\n    notebook_content = {\n        \"cells\": [\n            {\"cell_type\": \"markdown\", \"source\": [\"# Markdown Header\"]},\n            {\"cell_type\": \"markdown\", \"source\": [\"Some more markdown.\"]},\n        ],\n    }\n    nb_path = write_notebook(\"markdown_only.ipynb\", notebook_content)\n    result = process_notebook(nb_path)\n\n    assert result.count('\"\"\"') == expected_count, \"Two markdown cells => 2 blocks => 4 triple quotes total.\"\n    assert \"# Markdown Header\" in result\n    assert \"Some more markdown.\" in result\n\n\ndef test_process_notebook_raw_only(write_notebook: WriteNotebookFunc) -> None:\n    \"\"\"Test a notebook with only raw cells.\n\n    Given two raw cells:\n    When ``process_notebook`` is called,\n    Then each raw cell should become a triple-quoted block (2 blocks => 4 triple quotes total).\n    \"\"\"\n    expected_count = 4\n    notebook_content = {\n        \"cells\": [\n            {\"cell_type\": \"raw\", \"source\": [\"Raw content line 1\"]},\n            {\"cell_type\": \"raw\", \"source\": [\"Raw content line 2\"]},\n        ],\n    }\n    nb_path = write_notebook(\"raw_only.ipynb\", notebook_content)\n    result = process_notebook(nb_path)\n\n    assert result.count('\"\"\"') == expected_count, \"Two raw cells => 2 blocks => 4 triple quotes.\"\n    assert \"Raw content line 1\" in result\n    assert \"Raw content line 2\" in result\n\n\ndef test_process_notebook_empty_cells(write_notebook: WriteNotebookFunc) -> None:\n    \"\"\"Test that cells with an empty ``source`` are skipped.\n\n    Given a notebook with 4 cells, 3 of which have empty ``source``:\n    When ``process_notebook`` is called,\n    Then only the non-empty cell should appear in the output (1 block => 2 triple quotes).\n    \"\"\"\n    expected_count = 2\n    notebook_content = {\n        \"cells\": [\n            {\"cell_type\": \"markdown\", \"source\": []},\n            {\"cell_type\": \"code\", \"source\": []},\n            {\"cell_type\": \"raw\", \"source\": []},\n            {\"cell_type\": \"markdown\", \"source\": [\"# Non-empty markdown\"]},\n        ],\n    }\n    nb_path = write_notebook(\"empty_cells.ipynb\", notebook_content)\n    result = process_notebook(nb_path)\n\n    assert result.count('\"\"\"') == expected_count, \"Only one non-empty cell => 1 block => 2 triple quotes\"\n    assert \"# Non-empty markdown\" in result\n\n\ndef test_process_notebook_invalid_cell_type(write_notebook: WriteNotebookFunc) -> None:\n    \"\"\"Test a notebook with an unknown cell type.\n\n    Given a notebook cell whose ``cell_type`` is unrecognized:\n    When ``process_notebook`` is called,\n    Then a ValueError should be raised.\n    \"\"\"\n    notebook_content = {\n        \"cells\": [\n            {\"cell_type\": \"markdown\", \"source\": [\"# Valid markdown\"]},\n            {\"cell_type\": \"unknown\", \"source\": [\"Unrecognized cell type\"]},\n        ],\n    }\n    nb_path = write_notebook(\"invalid_cell_type.ipynb\", notebook_content)\n\n    with pytest.raises(ValueError, match=\"Unknown cell type: unknown\"):\n        process_notebook(nb_path)\n\n\ndef test_process_notebook_with_output(write_notebook: WriteNotebookFunc) -> None:\n    \"\"\"Test a notebook that has code cells with outputs.\n\n    Given a code cell and multiple output objects:\n    When ``process_notebook`` is called with ``include_output=True``,\n    Then the outputs should be appended as commented lines under the code.\n    \"\"\"\n    notebook_content = {\n        \"cells\": [\n            {\n                \"cell_type\": \"code\",\n                \"source\": [\n                    \"import matplotlib.pyplot as plt\\n\",\n                    \"print('my_data')\\n\",\n                    \"my_data = [1, 2, 3, 4, 5]\\n\",\n                    \"plt.plot(my_data)\\n\",\n                    \"my_data\",\n                ],\n                \"outputs\": [\n                    {\"output_type\": \"stream\", \"text\": [\"my_data\"]},\n                    {\"output_type\": \"execute_result\", \"data\": {\"text/plain\": [\"[1, 2, 3, 4, 5]\"]}},\n                    {\"output_type\": \"display_data\", \"data\": {\"text/plain\": [\"<Figure size 640x480 with 1 Axes>\"]}},\n                ],\n            },\n        ],\n    }\n\n    nb_path = write_notebook(\"with_output.ipynb\", notebook_content)\n    with_output = process_notebook(nb_path, include_output=True)\n    without_output = process_notebook(nb_path, include_output=False)\n\n    expected_source = (\n        \"# Jupyter notebook converted to Python script.\\n\\n\"\n        \"import matplotlib.pyplot as plt\\n\"\n        \"print('my_data')\\n\"\n        \"my_data = [1, 2, 3, 4, 5]\\n\"\n        \"plt.plot(my_data)\\n\"\n        \"my_data\\n\"\n    )\n\n    expected_output = \"# Output:\\n#   my_data\\n#   [1, 2, 3, 4, 5]\\n#   <Figure size 640x480 with 1 Axes>\\n\"\n\n    expected_combined = expected_source + expected_output\n\n    assert with_output == expected_combined, \"Should include source code and comment-ified output.\"\n    assert without_output == expected_source, \"Should include only the source code without output.\"\n"
  },
  {
    "path": "tests/test_pattern_utils.py",
    "content": "\"\"\"Test pattern utilities.\"\"\"\n\nfrom gitingest.utils.ignore_patterns import DEFAULT_IGNORE_PATTERNS\nfrom gitingest.utils.pattern_utils import _parse_patterns, process_patterns\n\n\ndef test_process_patterns_empty_patterns() -> None:\n    \"\"\"Test ``process_patterns`` with empty patterns.\n\n    Given empty ``include_patterns`` and ``exclude_patterns``:\n    When ``process_patterns`` is called,\n    Then ``include_patterns`` becomes ``None`` and ``DEFAULT_IGNORE_PATTERNS`` apply.\n    \"\"\"\n    exclude_patterns, include_patterns = process_patterns(exclude_patterns=\"\", include_patterns=\"\")\n\n    assert include_patterns is None\n    assert exclude_patterns == DEFAULT_IGNORE_PATTERNS\n\n\ndef test_parse_patterns_valid() -> None:\n    \"\"\"Test ``_parse_patterns`` with valid comma-separated patterns.\n\n    Given patterns like \"*.py, *.md, docs/*\":\n    When ``_parse_patterns`` is called,\n    Then it should return a set of parsed strings.\n    \"\"\"\n    patterns = \"*.py, *.md, docs/*\"\n    parsed_patterns = _parse_patterns(patterns)\n\n    assert parsed_patterns == {\"*.py\", \"*.md\", \"docs/*\"}\n\n\ndef test_process_patterns_include_and_ignore_overlap() -> None:\n    \"\"\"Test ``process_patterns`` with overlapping patterns.\n\n    Given include=\"*.py\" and ignore={\"*.py\", \"*.txt\"}:\n    When ``process_patterns`` is called,\n    Then \"*.py\" should be removed from ignore patterns.\n    \"\"\"\n    exclude_patterns, include_patterns = process_patterns(exclude_patterns={\"*.py\", \"*.txt\"}, include_patterns=\"*.py\")\n\n    assert include_patterns == {\"*.py\"}\n    assert exclude_patterns is not None\n    assert \"*.py\" not in exclude_patterns\n    assert \"*.txt\" in exclude_patterns\n"
  },
  {
    "path": "tests/test_summary.py",
    "content": "\"\"\"Test that ``gitingest.ingest()`` emits a concise, 5-or-6-line summary.\"\"\"\n\nimport re\nfrom pathlib import Path\n\nimport pytest\n\nfrom gitingest import ingest\n\nREPO = \"pallets/flask\"\n\nPATH_CASES = [\n    (\"tree\", \"/examples/celery\"),\n    (\"blob\", \"/examples/celery/make_celery.py\"),\n    (\"blob\", \"/.gitignore\"),\n]\n\nREF_CASES = [\n    (\"Branch\", \"main\"),\n    (\"Branch\", \"stable\"),\n    (\"Tag\", \"3.0.3\"),\n    (\"Commit\", \"e9741288637e0d9abe95311247b4842a017f7d5c\"),\n]\n\n\n@pytest.mark.parametrize((\"path_type\", \"path\"), PATH_CASES)\n@pytest.mark.parametrize((\"ref_type\", \"ref\"), REF_CASES)\ndef test_ingest_summary(path_type: str, path: str, ref_type: str, ref: str) -> None:\n    \"\"\"Assert that ``gitingest.ingest()`` emits a concise, 5-or-6-line summary.\n\n    - Non-'main” refs → 5 key/value pairs + blank line (6 total).\n    - 'main” branch   → ref line omitted (5 total).\n    - Required keys:\n        - Repository\n        - ``ref_type`` (absent on 'main”)\n        - File│Subpath (chosen by ``path_type``)\n        - Lines│Files analyzed (chosen by ``path_type``)\n        - Estimated tokens (positive integer)\n\n    Any missing key, wrong value, or incorrect line count should fail.\n\n    Parameters\n    ----------\n    path_type : {\"tree\", \"blob\"}\n        GitHub object type under test.\n    path : str\n        The repository sub-path or file path to feed into the URL.\n    ref_type : {\"Branch\", \"Tag\", \"Commit\"}\n        Label expected on line 2 of the summary (absent if `ref` is \"main\").\n    ref : str\n        Actual branch name, tag, or commit hash.\n\n    \"\"\"\n    is_main_branch = ref == \"main\"\n    is_blob = path_type == \"blob\"\n    expected_lines = _calculate_expected_lines(ref_type, is_main_branch=is_main_branch)\n    expected_non_empty_lines = expected_lines - 1\n\n    summary, _, _ = ingest(f\"https://github.com/{REPO}/{path_type}/{ref}{path}\")\n    lines = summary.splitlines()\n    parsed_lines = dict(line.split(\": \", 1) for line in lines if \": \" in line)\n\n    assert parsed_lines[\"Repository\"] == REPO\n\n    if is_main_branch:\n        # We omit the 'Branch' line for 'main' branches.\n        assert ref_type not in parsed_lines\n    else:\n        assert parsed_lines[ref_type] == ref\n\n    if is_blob:\n        assert parsed_lines[\"File\"] == Path(path).name\n        assert \"Lines\" in parsed_lines\n    else:  # 'tree'\n        assert parsed_lines[\"Subpath\"] == path\n        assert \"Files analyzed\" in parsed_lines\n\n    token_match = re.search(r\"\\d+\", parsed_lines[\"Estimated tokens\"])\n    assert token_match, \"'Estimated tokens' should contain a number\"\n    assert int(token_match.group()) > 0\n\n    assert len(lines) == expected_lines\n    assert len(parsed_lines) == expected_non_empty_lines\n\n\ndef _calculate_expected_lines(ref_type: str, *, is_main_branch: bool) -> int:\n    \"\"\"Calculate the expected number of lines in the summary.\n\n    The total number of lines depends on the following:\n    - Commit type does not include the 'Branch'/'Tag' line, reducing the count by 1.\n    - The \"main\" branch omits the 'Branch' line, reducing the count by 1.\n\n    Parameters\n    ----------\n    ref_type : str\n        The type of reference, e.g., \"Branch\", \"Tag\", or \"Commit\".\n    is_main_branch : bool\n        True if the reference is the \"main\" branch, False otherwise.\n\n    Returns\n    -------\n    int\n        The expected number of lines in the summary.\n\n    \"\"\"\n    base_lines = 7\n    if is_main_branch:\n        base_lines -= 1\n    if ref_type == \"Commit\":\n        base_lines -= 1\n    return base_lines\n"
  }
]