[
  {
    "path": ".docker/router.yml",
    "content": "http:\n  services:\n    ollama:\n      loadBalancer:\n        healthCheck:\n          interval: 5s\n          path: /\n        servers:\n          - url: http://ollama-cpu:11434\n          - url: http://ollama-cuda:11434\n          - url: http://host.docker.internal:11434\n\n  routers:\n    ollama-router:\n      rule: \"PathPrefix(`/`)\"\n      service: ollama"
  },
  {
    "path": ".dockerignore",
    "content": ".venv\nmodels\n.github\n.vscode\n.DS_Store\n.mypy_cache\n.ruff_cache\nlocal_data\nterraform\ntests\nDockerfile\nDockerfile.*"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/bug.yml",
    "content": "name: Bug Report\ndescription: Report a bug or issue with the project.\ntitle: \"[BUG] \"\nlabels: [\"bug\"]\n\nbody:\n  - type: markdown\n    attributes:\n      value: |\n        **Please describe the bug you encountered.**\n\n  - type: checkboxes\n    id: pre-check\n    attributes:\n      label: Pre-check\n      description: Please confirm that you have searched for duplicate issues before creating this one.\n      options:\n        - label: I have searched the existing issues and none cover this bug.\n          required: true\n\n  - type: textarea\n    id: description\n    attributes:\n      label: Description\n      description: Provide a detailed description of the bug.\n      placeholder: \"Detailed description of the bug\"\n    validations:\n      required: true\n\n  - type: textarea\n    id: steps\n    attributes:\n      label: Steps to Reproduce\n      description: Provide the steps to reproduce the bug.\n      placeholder: \"1. Step one\\n2. Step two\\n3. Step three\"\n    validations:\n      required: true\n\n  - type: input\n    id: expected\n    attributes:\n      label: Expected Behavior\n      description: Describe what you expected to happen.\n      placeholder: \"Expected behavior\"\n    validations:\n      required: true\n\n  - type: input\n    id: actual\n    attributes:\n      label: Actual Behavior\n      description: Describe what actually happened.\n      placeholder: \"Actual behavior\"\n    validations:\n      required: true\n\n  - type: input\n    id: environment\n    attributes:\n      label: Environment\n      description: Provide details about your environment (e.g., OS, GPU, profile, etc.).\n      placeholder: \"Environment details\"\n    validations:\n      required: true\n\n  - type: input\n    id: additional\n    attributes:\n      label: Additional Information\n      description: Provide any additional information that may be relevant (e.g., logs, screenshots).\n      placeholder: \"Any additional information that may be relevant\"\n\n  - type: input\n    id: version\n    attributes:\n      label: Version\n      description: Provide the version of the project where you encountered the bug.\n      placeholder: \"Version number\"\n\n  - type: markdown\n    attributes:\n      value: |\n        **Please ensure the following setup checklist has been reviewed before submitting the bug report.**\n\n  - type: checkboxes\n    id: general-setup-checklist\n    attributes:\n      label: Setup Checklist\n      description: Verify the following general aspects of your setup.\n      options:\n        - label: Confirm that you have followed the installation instructions in the project’s documentation.\n        - label: Check that you are using the latest version of the project.\n        - label: Verify disk space availability for model storage and data processing.\n        - label: Ensure that you have the necessary permissions to run the project.\n\n  - type: checkboxes\n    id: nvidia-setup-checklist\n    attributes:\n      label: NVIDIA GPU Setup Checklist\n      description: Verify the following aspects of your NVIDIA GPU setup.\n      options:\n        - label: Check that the all CUDA dependencies are installed and are compatible with your GPU (refer to [CUDA's documentation](https://docs.nvidia.com/deploy/cuda-compatibility/#frequently-asked-questions))\n        - label: Ensure an NVIDIA GPU is installed and recognized by the system (run `nvidia-smi` to verify).\n        - label: Ensure proper permissions are set for accessing GPU resources.\n        - label: Docker users - Verify that the NVIDIA Container Toolkit is configured correctly (e.g. run `sudo docker run --rm --gpus all nvidia/cuda:11.0.3-base-ubuntu20.04 nvidia-smi`)\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/config.yml",
    "content": "blank_issues_enabled: false\ncontact_links:\n  - name: Documentation\n    url: https://docs.privategpt.dev\n    about: Please refer to our documentation for more details and guidance.\n  - name: Discord\n    url: https://discord.gg/bK6mRVpErU\n    about: Join our Discord community to ask questions and get help.\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/docs.yml",
    "content": "name: Documentation\ndescription: Suggest a change or addition to the documentation.\ntitle: \"[DOCS] \"\nlabels: [\"documentation\"]\n\nbody:\n  - type: markdown\n    attributes:\n      value: |\n        **Please describe the documentation change or addition you would like to suggest.**\n\n  - type: textarea\n    id: description\n    attributes:\n      label: Description\n      description: Provide a detailed description of the documentation change.\n      placeholder: \"Detailed description of the documentation change\"\n    validations:\n      required: true\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/feature.yml",
    "content": "name: Enhancement\ndescription: Suggest an enhancement or improvement to the project.\ntitle: \"[FEATURE] \"\nlabels: [\"enhancement\"]\n\nbody:\n  - type: markdown\n    attributes:\n      value: |\n        **Please describe the enhancement or improvement you would like to suggest.**\n\n  - type: textarea\n    id: feature_description\n    attributes:\n      label: Feature Description\n      description: Provide a detailed description of the enhancement.\n      placeholder: \"Detailed description of the enhancement\"\n    validations:\n      required: true\n\n  - type: textarea\n    id: reason\n    attributes:\n      label: Reason\n      description: Explain the reason for this enhancement.\n      placeholder: \"Reason for the enhancement\"\n    validations:\n      required: true\n\n  - type: textarea\n    id: value\n    attributes:\n      label: Value of Feature\n      description: Describe the value or benefits this feature will bring.\n      placeholder: \"Value or benefits of the feature\"\n    validations:\n      required: true\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/question.yml",
    "content": "name: Question\ndescription: Ask a question about the project.\ntitle: \"[QUESTION] \"\nlabels: [\"question\"]\n\nbody:\n  - type: markdown\n    attributes:\n      value: |\n        **Please describe your question in detail.**\n\n  - type: textarea\n    id: question\n    attributes:\n      label: Question\n      description: Provide a detailed description of your question.\n      placeholder: \"Detailed description of the question\"\n    validations:\n      required: true\n"
  },
  {
    "path": ".github/pull_request_template.md",
    "content": "# Description\n\nPlease include a summary of the change and which issue is fixed. Please also include relevant motivation and context. List any dependencies that are required for this change.\n\n## Type of Change\n\nPlease delete options that are not relevant.\n\n- [ ] Bug fix (non-breaking change which fixes an issue)\n- [ ] New feature (non-breaking change which adds functionality)\n- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected)\n- [ ] This change requires a documentation update\n\n## How Has This Been Tested?\n\nPlease describe the tests that you ran to verify your changes. Provide instructions so we can reproduce. Please also list any relevant details for your test configuration\n\n- [ ] Added new unit/integration tests\n- [ ] I stared at the code and made sure it makes sense\n\n**Test Configuration**:\n* Firmware version:\n* Hardware:\n* Toolchain:\n* SDK:\n\n## Checklist:\n\n- [ ] My code follows the style guidelines of this project\n- [ ] I have performed a self-review of my code\n- [ ] I have commented my code, particularly in hard-to-understand areas\n- [ ] I have made corresponding changes to the documentation\n- [ ] My changes generate no new warnings\n- [ ] I have added tests that prove my fix is effective or that my feature works\n- [ ] New and existing unit tests pass locally with my changes\n- [ ] Any dependent changes have been merged and published in downstream modules\n- [ ] I ran `make check; make test` to ensure mypy and tests pass"
  },
  {
    "path": ".github/release_please/.release-please-config.json",
    "content": "{\n    \"$schema\": \"https://raw.githubusercontent.com/googleapis/release-please/main/schemas/config.json\",\n    \"release-type\": \"simple\",\n    \"version-file\": \"version.txt\",\n    \"extra-files\": [\n      {\n        \"type\": \"toml\",\n        \"path\": \"pyproject.toml\",\n        \"jsonpath\": \"$.tool.poetry.version\"\n      },\n      {\n        \"type\": \"generic\",\n        \"path\": \"docker-compose.yaml\"\n      }\n    ],\n    \"packages\": {\n      \".\": {}\n    }\n  }"
  },
  {
    "path": ".github/release_please/.release-please-manifest.json",
    "content": "{\n  \".\": \"0.6.2\"\n}"
  },
  {
    "path": ".github/workflows/actions/install_dependencies/action.yml",
    "content": "name: \"Install Dependencies\"\ndescription: \"Action to build the project dependencies from the main versions\"\ninputs:\n  python_version:\n    required: true\n    type: string\n    default: \"3.11.4\"\n  poetry_version:\n    required: true\n    type: string\n    default: \"1.8.3\"\n\nruns:\n  using: composite\n  steps:\n    - name: Install Poetry\n      uses: snok/install-poetry@v1\n      with:\n        version: ${{ inputs.poetry_version }}\n        virtualenvs-create: true\n        virtualenvs-in-project: false\n        installer-parallel: true\n    - uses: actions/setup-python@v4\n      with:\n        python-version: ${{ inputs.python_version }}\n        cache: \"poetry\"\n    - name: Install Dependencies\n      run: poetry install --extras \"ui vector-stores-qdrant\" --no-root\n      shell: bash\n\n"
  },
  {
    "path": ".github/workflows/fern-check.yml",
    "content": "name: fern check\n\non:\n  pull_request:\n    branches:\n      - main\n    paths:\n      - \"fern/**\"\n      \njobs:\n  fern-check:\n    runs-on: ubuntu-latest\n    steps:\n      - name: Checkout repo\n        uses: actions/checkout@v4\n\n      - name: Install Fern\n        run:  npm install -g fern-api\n\n      - name: Check Fern API is valid\n        run: fern check"
  },
  {
    "path": ".github/workflows/generate-release.yml",
    "content": "name: generate-release\n\non:\n  release:\n    types: [ published ]\n  workflow_dispatch:\n\nenv:\n  REGISTRY: docker.io\n  IMAGE_NAME: zylonai/private-gpt\n  platforms: linux/amd64,linux/arm64\n  DEFAULT_TYPE: \"ollama\"\n\njobs:\n  build-and-push-image:\n    runs-on: ubuntu-latest\n\n    strategy:\n      matrix:\n        type: [ llamacpp-cpu, ollama ]\n\n    permissions:\n      contents: read\n      packages: write\n\n    outputs:\n      version: ${{ steps.version.outputs.version }}\n\n    steps:\n      - name: Free Disk Space (Ubuntu)\n        uses: jlumbroso/free-disk-space@main\n        with:\n          tool-cache: false\n          android: true\n          dotnet: true\n          haskell: true\n          large-packages: true\n          docker-images: false\n          swap-storage: true\n\n      - name: Checkout repository\n        uses: actions/checkout@v4\n\n      - name: Set up QEMU\n        uses: docker/setup-qemu-action@v3\n\n      - name: Set up Docker Buildx\n        uses: docker/setup-buildx-action@v3\n\n      - name: Log in to Docker Hub\n        uses: docker/login-action@v3\n        with:\n          username: ${{ secrets.DOCKER_USERNAME }}\n          password: ${{ secrets.DOCKER_PASSWORD }}\n\n      - name: Extract metadata (tags, labels) for Docker\n        id: meta\n        uses: docker/metadata-action@v5\n        with:\n          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}\n          tags: |\n            type=semver,pattern={{version}},enable=${{ matrix.type == env.DEFAULT_TYPE }}\n            type=semver,pattern={{version}}-${{ matrix.type }}\n            type=semver,pattern={{major}}.{{minor}},enable=${{ matrix.type == env.DEFAULT_TYPE }}\n            type=semver,pattern={{major}}.{{minor}}-${{ matrix.type }}\n            type=raw,value=latest,enable=${{ matrix.type == env.DEFAULT_TYPE }}\n            type=sha\n          flavor: |\n            latest=false\n\n      - name: Build and push Docker image\n        uses: docker/build-push-action@v6\n        with:\n          context: .\n          file: Dockerfile.${{ matrix.type }}\n          platforms: ${{ env.platforms }}\n          push: true\n          tags: ${{ steps.meta.outputs.tags }}\n          labels: ${{ steps.meta.outputs.labels }}\n\n      - name: Version output\n        id: version\n        run: echo \"version=${{ steps.meta.outputs.version }}\" >> \"$GITHUB_OUTPUT\""
  },
  {
    "path": ".github/workflows/preview-docs.yml",
    "content": "name: deploy preview docs\n\non:\n  pull_request_target:\n    branches:\n      - main\n    paths:\n      - \"fern/**\"\n\njobs:\n  preview-docs:\n    runs-on: ubuntu-latest\n\n    permissions:\n      contents: read\n      pull-requests: write\n\n    steps:\n      - name: Checkout repository\n        uses: actions/checkout@v4\n        with:\n          ref: refs/pull/${{ github.event.pull_request.number }}/merge\n\n      - name: Setup Node.js\n        uses: actions/setup-node@v4\n        with:\n          node-version: \"18\"\n\n      - name: Install Fern\n        run: npm install -g fern-api\n\n      - name: Generate Documentation Preview with Fern\n        id: generate_docs\n        env:\n          FERN_TOKEN: ${{ secrets.FERN_TOKEN }}\n        run: |\n          output=$(fern generate --docs --preview --log-level debug)\n          echo \"$output\"\n          # Extract the URL\n          preview_url=$(echo \"$output\" | grep -oP '(?<=Published docs to )https://[^\\s]*')\n          # Set the output for the step\n          echo \"::set-output name=preview_url::$preview_url\"\n      - name: Comment PR with URL using github-actions bot\n        uses: actions/github-script@v7\n        if: ${{ steps.generate_docs.outputs.preview_url }}\n        with:\n          script: |\n            const preview_url = '${{ steps.generate_docs.outputs.preview_url }}';\n            github.rest.issues.createComment({\n              issue_number: context.issue.number,\n              owner: context.repo.owner,\n              repo: context.repo.repo,\n              body: `Published docs preview URL: ${preview_url}`\n            })\n"
  },
  {
    "path": ".github/workflows/publish-docs.yml",
    "content": "name: publish docs\n\non: \n  push: \n    branches: \n      - main\n    paths:\n      - \"fern/**\"\n\njobs:\n  publish-docs:\n    runs-on: ubuntu-latest\n    steps:\n      - name: Checkout repo\n        uses: actions/checkout@v4\n\n      - name: Setup node\n        uses: actions/setup-node@v3\n\n      - name: Download Fern\n        run: npm install -g fern-api\n\n      - name: Generate and Publish Docs\n        env:\n          FERN_TOKEN: ${{ secrets.FERN_TOKEN }}\n        run: fern generate --docs --log-level debug\n"
  },
  {
    "path": ".github/workflows/release-please.yml",
    "content": "name: release-please\n\non:\n  push:\n    branches:\n      - main\n\npermissions:\n  contents: write\n  pull-requests: write\n\njobs:\n  release-please:\n    runs-on: ubuntu-latest\n    steps:\n      - uses: google-github-actions/release-please-action@v4\n        id: release\n        with:\n          config-file: .github/release_please/.release-please-config.json\n          manifest-file: .github/release_please/.release-please-manifest.json\n"
  },
  {
    "path": ".github/workflows/stale.yml",
    "content": "# This workflow warns and then closes issues and PRs that have had no activity for a specified amount of time.\n#\n# You can adjust the behavior by modifying this file.\n# For more information, see:\n# https://github.com/actions/stale\nname: Mark stale issues and pull requests\n\non:\n  schedule:\n  - cron: '42 5 * * *'\n\njobs:\n  stale:\n\n    runs-on: ubuntu-latest\n    permissions:\n      issues: write\n      pull-requests: write\n\n    steps:\n    - uses: actions/stale@v8\n      with:\n        repo-token: ${{ secrets.GITHUB_TOKEN }}\n        days-before-stale: 15\n        stale-issue-message: 'Stale issue'\n        stale-pr-message: 'Stale pull request'\n        stale-issue-label: 'stale'\n        stale-pr-label: 'stale'\n        exempt-issue-labels: 'autorelease: pending'\n        exempt-pr-labels: 'autorelease: pending'\n"
  },
  {
    "path": ".github/workflows/tests.yml",
    "content": "name: tests\n\non:\n  push:\n    branches:\n      - main\n  pull_request:\n\nconcurrency:\n  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.head_ref || github.ref }}\n  cancel-in-progress: ${{ github.event_name == 'pull_request' }}\n\njobs:\n  setup:\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v4\n      - uses: ./.github/workflows/actions/install_dependencies\n\n  checks:\n    needs: setup\n    runs-on: ubuntu-latest\n    name: ${{ matrix.quality-command }}\n    strategy:\n      matrix:\n        quality-command:\n          - black\n          - ruff\n          - mypy\n    steps:\n      - uses: actions/checkout@v4\n      - uses: ./.github/workflows/actions/install_dependencies\n      - name: run ${{ matrix.quality-command }}\n        run: make ${{ matrix.quality-command }}\n\n  test:\n    needs: setup\n    runs-on: ubuntu-latest\n    name: test\n    steps:\n      - uses: actions/checkout@v4\n      - uses: ./.github/workflows/actions/install_dependencies\n      - name: run test\n        run: make test-coverage\n      # Run even if make test fails for coverage reports\n      # TODO: select a better xml results displayer\n      - name: Archive test results coverage results\n        uses: actions/upload-artifact@v3\n        if: always()\n        with:\n          name: test_results\n          path: tests-results.xml\n      - name: Archive code coverage results\n        uses: actions/upload-artifact@v3\n        if: always()\n        with:\n          name: code-coverage-report\n          path: htmlcov/\n\n  all_checks_passed:\n    # Used to easily force requirements checks in GitHub\n    needs:\n      - checks\n      - test\n    runs-on: ubuntu-latest\n    steps:\n      - run: echo \"All checks passed\"\n"
  },
  {
    "path": ".gitignore",
    "content": ".venv\n.env\nvenv\n\nsettings-me.yaml\n\n.ruff_cache\n.pytest_cache\n.mypy_cache\n\n# byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n\n# unit tests / coverage reports\n/tests-results.xml\n/.coverage\n/coverage.xml\n/htmlcov/\n\n# pyenv\n/.python-version\n\n# IDE\n.idea/\n.vscode/\n/.run/\n.fleet/\n\n# macOS\n.DS_Store\n"
  },
  {
    "path": ".pre-commit-config.yaml",
    "content": "default_install_hook_types:\n# Mandatory to install both pre-commit and pre-push hooks (see https://pre-commit.com/#top_level-default_install_hook_types)\n# Add new hook types here to ensure automatic installation when running `pre-commit install`\n- pre-commit\n- pre-push\nrepos:\n- repo: https://github.com/pre-commit/pre-commit-hooks\n  rev: v4.3.0\n  hooks:\n  - id: trailing-whitespace\n  - id: end-of-file-fixer\n  - id: check-yaml\n  - id: check-json\n  - id: check-added-large-files\n\n- repo: local\n  hooks:\n  - id: black\n    name: Formatting (black)\n    entry: black\n    language: system\n    types: [python]\n    stages: [commit]\n  - id: ruff\n    name: Linter (ruff)\n    entry: ruff\n    language: system\n    types: [python]\n    stages: [commit]\n  - id: mypy\n    name: Type checking (mypy)\n    entry: make mypy\n    pass_filenames: false\n    language: system\n    types: [python]\n    stages: [commit]\n  - id: test\n    name: Unit tests (pytest)\n    entry: make test\n    pass_filenames: false\n    language: system\n    types: [python]\n    stages: [push]"
  },
  {
    "path": "CHANGELOG.md",
    "content": "# Changelog\n\n## [0.6.2](https://github.com/zylon-ai/private-gpt/compare/v0.6.1...v0.6.2) (2024-08-08)\n\n\n### Bug Fixes\n\n* add numpy issue to troubleshooting ([#2048](https://github.com/zylon-ai/private-gpt/issues/2048)) ([4ca6d0c](https://github.com/zylon-ai/private-gpt/commit/4ca6d0cb556be7a598f7d3e3b00d2a29214ee1e8))\n* auto-update version ([#2052](https://github.com/zylon-ai/private-gpt/issues/2052)) ([7fefe40](https://github.com/zylon-ai/private-gpt/commit/7fefe408b4267684c6e3c1a43c5dc2b73ec61fe4))\n* publish image name ([#2043](https://github.com/zylon-ai/private-gpt/issues/2043)) ([b1acf9d](https://github.com/zylon-ai/private-gpt/commit/b1acf9dc2cbca2047cd0087f13254ff5cda6e570))\n* update matplotlib to 3.9.1-post1 to fix win install ([b16abbe](https://github.com/zylon-ai/private-gpt/commit/b16abbefe49527ac038d235659854b98345d5387))\n\n## [0.6.1](https://github.com/zylon-ai/private-gpt/compare/v0.6.0...v0.6.1) (2024-08-05)\n\n\n### Bug Fixes\n\n* add built image from DockerHub ([#2042](https://github.com/zylon-ai/private-gpt/issues/2042)) ([f09f6dd](https://github.com/zylon-ai/private-gpt/commit/f09f6dd2553077d4566dbe6b48a450e05c2f049e))\n* Adding azopenai to model list ([#2035](https://github.com/zylon-ai/private-gpt/issues/2035)) ([1c665f7](https://github.com/zylon-ai/private-gpt/commit/1c665f7900658144f62814b51f6e3434a6d7377f))\n* **deploy:** generate docker release when new version is released ([#2038](https://github.com/zylon-ai/private-gpt/issues/2038)) ([1d4c14d](https://github.com/zylon-ai/private-gpt/commit/1d4c14d7a3c383c874b323d934be01afbaca899e))\n* **deploy:** improve Docker-Compose and quickstart on Docker ([#2037](https://github.com/zylon-ai/private-gpt/issues/2037)) ([dae0727](https://github.com/zylon-ai/private-gpt/commit/dae0727a1b4abd35d2b0851fe30e0a4ed67e0fbb))\n\n## [0.6.0](https://github.com/zylon-ai/private-gpt/compare/v0.5.0...v0.6.0) (2024-08-02)\n\n\n### Features\n\n* bump dependencies ([#1987](https://github.com/zylon-ai/private-gpt/issues/1987)) ([b687dc8](https://github.com/zylon-ai/private-gpt/commit/b687dc852413404c52d26dcb94536351a63b169d))\n* **docs:** add privategpt-ts sdk ([#1924](https://github.com/zylon-ai/private-gpt/issues/1924)) ([d13029a](https://github.com/zylon-ai/private-gpt/commit/d13029a046f6e19e8ee65bef3acd96365c738df2))\n* **docs:** Fix setup docu ([#1926](https://github.com/zylon-ai/private-gpt/issues/1926)) ([067a5f1](https://github.com/zylon-ai/private-gpt/commit/067a5f144ca6e605c99d7dbe9ca7d8207ac8808d))\n* **docs:** update doc for ipex-llm ([#1968](https://github.com/zylon-ai/private-gpt/issues/1968)) ([19a7c06](https://github.com/zylon-ai/private-gpt/commit/19a7c065ef7f42b37f289dd28ac945f7afc0e73a))\n* **docs:** update documentation and fix preview-docs ([#2000](https://github.com/zylon-ai/private-gpt/issues/2000)) ([4523a30](https://github.com/zylon-ai/private-gpt/commit/4523a30c8f004aac7a7ae224671e2c45ec0cb973))\n* **llm:** add progress bar when ollama is pulling models ([#2031](https://github.com/zylon-ai/private-gpt/issues/2031)) ([cf61bf7](https://github.com/zylon-ai/private-gpt/commit/cf61bf780f8d122e4057d002abf03563bb45614a))\n* **llm:** autopull ollama models ([#2019](https://github.com/zylon-ai/private-gpt/issues/2019)) ([20bad17](https://github.com/zylon-ai/private-gpt/commit/20bad17c9857809158e689e9671402136c1e3d84))\n* **llm:** Support for Google Gemini LLMs and Embeddings ([#1965](https://github.com/zylon-ai/private-gpt/issues/1965)) ([fc13368](https://github.com/zylon-ai/private-gpt/commit/fc13368bc72d1f4c27644677431420ed77731c03))\n* make llama3.1 as default ([#2022](https://github.com/zylon-ai/private-gpt/issues/2022)) ([9027d69](https://github.com/zylon-ai/private-gpt/commit/9027d695c11fbb01e62424b855665de71d513417))\n* prompt_style applied to all LLMs + extra LLM params. ([#1835](https://github.com/zylon-ai/private-gpt/issues/1835)) ([e21bf20](https://github.com/zylon-ai/private-gpt/commit/e21bf20c10938b24711d9f2c765997f44d7e02a9))\n* **recipe:** add our first recipe  `Summarize` ([#2028](https://github.com/zylon-ai/private-gpt/issues/2028)) ([8119842](https://github.com/zylon-ai/private-gpt/commit/8119842ae6f1f5ecfaf42b06fa0d1ffec675def4))\n* **vectordb:** Milvus vector db Integration ([#1996](https://github.com/zylon-ai/private-gpt/issues/1996)) ([43cc31f](https://github.com/zylon-ai/private-gpt/commit/43cc31f74015f8d8fcbf7a8ea7d7d9ecc66cf8c9))\n* **vectorstore:** Add clickhouse support as vectore store ([#1883](https://github.com/zylon-ai/private-gpt/issues/1883)) ([2612928](https://github.com/zylon-ai/private-gpt/commit/26129288394c7483e6fc0496a11dc35679528cc1))\n\n\n### Bug Fixes\n\n* \"no such group\" error in Dockerfile, added docx2txt and cryptography deps ([#1841](https://github.com/zylon-ai/private-gpt/issues/1841)) ([947e737](https://github.com/zylon-ai/private-gpt/commit/947e737f300adf621d2261d527192f36f3387f8e))\n* **config:** make tokenizer optional and include a troubleshooting doc ([#1998](https://github.com/zylon-ai/private-gpt/issues/1998)) ([01b7ccd](https://github.com/zylon-ai/private-gpt/commit/01b7ccd0648be032846647c9a184925d3682f612))\n* **docs:** Fix concepts.mdx referencing to installation page ([#1779](https://github.com/zylon-ai/private-gpt/issues/1779)) ([dde0224](https://github.com/zylon-ai/private-gpt/commit/dde02245bcd51a7ede7b6789c82ae217cac53d92))\n* **docs:** Update installation.mdx ([#1866](https://github.com/zylon-ai/private-gpt/issues/1866)) ([c1802e7](https://github.com/zylon-ai/private-gpt/commit/c1802e7cf0e56a2603213ec3b6a4af8fadb8a17a))\n* ffmpy dependency ([#2020](https://github.com/zylon-ai/private-gpt/issues/2020)) ([dabf556](https://github.com/zylon-ai/private-gpt/commit/dabf556dae9cb00fe0262270e5138d982585682e))\n* light mode ([#2025](https://github.com/zylon-ai/private-gpt/issues/2025)) ([1020cd5](https://github.com/zylon-ai/private-gpt/commit/1020cd53288af71a17882781f392512568f1b846))\n* **LLM:** mistral ignoring assistant messages ([#1954](https://github.com/zylon-ai/private-gpt/issues/1954)) ([c7212ac](https://github.com/zylon-ai/private-gpt/commit/c7212ac7cc891f9e3c713cc206ae9807c5dfdeb6))\n* **llm:** special tokens and leading space ([#1831](https://github.com/zylon-ai/private-gpt/issues/1831)) ([347be64](https://github.com/zylon-ai/private-gpt/commit/347be643f7929c56382a77c3f45f0867605e0e0a))\n* make embedding_api_base match api_base when on docker ([#1859](https://github.com/zylon-ai/private-gpt/issues/1859)) ([2a432bf](https://github.com/zylon-ai/private-gpt/commit/2a432bf9c5582a94eb4052b1e80cabdb118d298e))\n* nomic embeddings ([#2030](https://github.com/zylon-ai/private-gpt/issues/2030)) ([5465958](https://github.com/zylon-ai/private-gpt/commit/54659588b5b109a3dd17cca835e275240464d275))\n* prevent to ingest local files (by default) ([#2010](https://github.com/zylon-ai/private-gpt/issues/2010)) ([e54a8fe](https://github.com/zylon-ai/private-gpt/commit/e54a8fe0433252808d0a60f6a08a43c9f5a42f3b))\n* Replacing unsafe `eval()` with `json.loads()` ([#1890](https://github.com/zylon-ai/private-gpt/issues/1890)) ([9d0d614](https://github.com/zylon-ai/private-gpt/commit/9d0d614706581a8bfa57db45f62f84ab23d26f15))\n* **settings:** enable cors by default so it will work when using ts sdk (spa) ([#1925](https://github.com/zylon-ai/private-gpt/issues/1925)) ([966af47](https://github.com/zylon-ai/private-gpt/commit/966af4771dbe5cf3fdf554b5fdf8f732407859c4))\n* **ui:** gradio bug fixes ([#2021](https://github.com/zylon-ai/private-gpt/issues/2021)) ([d4375d0](https://github.com/zylon-ai/private-gpt/commit/d4375d078f18ba53562fd71651159f997fff865f))\n* unify embedding models ([#2027](https://github.com/zylon-ai/private-gpt/issues/2027)) ([40638a1](https://github.com/zylon-ai/private-gpt/commit/40638a18a5713d60fec8fe52796dcce66d88258c))\n\n## [0.5.0](https://github.com/zylon-ai/private-gpt/compare/v0.4.0...v0.5.0) (2024-04-02)\n\n\n### Features\n\n* **code:** improve concat of strings in ui ([#1785](https://github.com/zylon-ai/private-gpt/issues/1785)) ([bac818a](https://github.com/zylon-ai/private-gpt/commit/bac818add51b104cda925b8f1f7b51448e935ca1))\n* **docker:** set default Docker to use Ollama ([#1812](https://github.com/zylon-ai/private-gpt/issues/1812)) ([f83abff](https://github.com/zylon-ai/private-gpt/commit/f83abff8bc955a6952c92cc7bcb8985fcec93afa))\n* **docs:** Add guide Llama-CPP Linux AMD GPU support ([#1782](https://github.com/zylon-ai/private-gpt/issues/1782)) ([8a836e4](https://github.com/zylon-ai/private-gpt/commit/8a836e4651543f099c59e2bf497ab8c55a7cd2e5))\n* **docs:** Feature/upgrade docs ([#1741](https://github.com/zylon-ai/private-gpt/issues/1741)) ([5725181](https://github.com/zylon-ai/private-gpt/commit/572518143ac46532382db70bed6f73b5082302c1))\n* **docs:** upgrade fern ([#1596](https://github.com/zylon-ai/private-gpt/issues/1596)) ([84ad16a](https://github.com/zylon-ai/private-gpt/commit/84ad16af80191597a953248ce66e963180e8ddec))\n* **ingest:** Created a faster ingestion mode - pipeline ([#1750](https://github.com/zylon-ai/private-gpt/issues/1750)) ([134fc54](https://github.com/zylon-ai/private-gpt/commit/134fc54d7d636be91680dc531f5cbe2c5892ac56))\n* **llm - embed:** Add support for Azure OpenAI ([#1698](https://github.com/zylon-ai/private-gpt/issues/1698)) ([1efac6a](https://github.com/zylon-ai/private-gpt/commit/1efac6a3fe19e4d62325e2c2915cd84ea277f04f))\n* **llm:** adds serveral settings for llamacpp and ollama ([#1703](https://github.com/zylon-ai/private-gpt/issues/1703)) ([02dc83e](https://github.com/zylon-ai/private-gpt/commit/02dc83e8e9f7ada181ff813f25051bbdff7b7c6b))\n* **llm:** Ollama LLM-Embeddings decouple + longer keep_alive settings ([#1800](https://github.com/zylon-ai/private-gpt/issues/1800)) ([b3b0140](https://github.com/zylon-ai/private-gpt/commit/b3b0140e244e7a313bfaf4ef10eb0f7e4192710e))\n* **llm:** Ollama timeout setting ([#1773](https://github.com/zylon-ai/private-gpt/issues/1773)) ([6f6c785](https://github.com/zylon-ai/private-gpt/commit/6f6c785dac2bbad37d0b67fda215784298514d39))\n* **local:** tiktoken cache within repo for offline ([#1467](https://github.com/zylon-ai/private-gpt/issues/1467)) ([821bca3](https://github.com/zylon-ai/private-gpt/commit/821bca32e9ee7c909fd6488445ff6a04463bf91b))\n* **nodestore:** add Postgres for the doc and index store ([#1706](https://github.com/zylon-ai/private-gpt/issues/1706)) ([68b3a34](https://github.com/zylon-ai/private-gpt/commit/68b3a34b032a08ca073a687d2058f926032495b3))\n* **rag:** expose similarity_top_k and similarity_score to settings ([#1771](https://github.com/zylon-ai/private-gpt/issues/1771)) ([087cb0b](https://github.com/zylon-ai/private-gpt/commit/087cb0b7b74c3eb80f4f60b47b3a021c81272ae1))\n* **RAG:** Introduce SentenceTransformer Reranker ([#1810](https://github.com/zylon-ai/private-gpt/issues/1810)) ([83adc12](https://github.com/zylon-ai/private-gpt/commit/83adc12a8ef0fa0c13a0dec084fa596445fc9075))\n* **scripts:** Wipe qdrant and obtain db Stats command ([#1783](https://github.com/zylon-ai/private-gpt/issues/1783)) ([ea153fb](https://github.com/zylon-ai/private-gpt/commit/ea153fb92f1f61f64c0d04fff0048d4d00b6f8d0))\n* **ui:** Add Model Information to ChatInterface label ([f0b174c](https://github.com/zylon-ai/private-gpt/commit/f0b174c097c2d5e52deae8ef88de30a0d9013a38))\n* **ui:** add sources check to not repeat identical sources ([#1705](https://github.com/zylon-ai/private-gpt/issues/1705)) ([290b9fb](https://github.com/zylon-ai/private-gpt/commit/290b9fb084632216300e89bdadbfeb0380724b12))\n* **UI:** Faster startup and document listing ([#1763](https://github.com/zylon-ai/private-gpt/issues/1763)) ([348df78](https://github.com/zylon-ai/private-gpt/commit/348df781b51606b2f9810bcd46f850e54192fd16))\n* **ui:** maintain score order when curating sources ([#1643](https://github.com/zylon-ai/private-gpt/issues/1643)) ([410bf7a](https://github.com/zylon-ai/private-gpt/commit/410bf7a71f17e77c4aec723ab80c233b53765964))\n* unify settings for vector and nodestore connections to PostgreSQL ([#1730](https://github.com/zylon-ai/private-gpt/issues/1730)) ([63de7e4](https://github.com/zylon-ai/private-gpt/commit/63de7e4930ac90dd87620225112a22ffcbbb31ee))\n* wipe per storage type ([#1772](https://github.com/zylon-ai/private-gpt/issues/1772)) ([c2d6948](https://github.com/zylon-ai/private-gpt/commit/c2d694852b4696834962a42fde047b728722ad74))\n\n\n### Bug Fixes\n\n* **docs:** Minor documentation amendment ([#1739](https://github.com/zylon-ai/private-gpt/issues/1739)) ([258d02d](https://github.com/zylon-ai/private-gpt/commit/258d02d87c5cb81d6c3a6f06aa69339b670dffa9))\n* Fixed docker-compose ([#1758](https://github.com/zylon-ai/private-gpt/issues/1758)) ([774e256](https://github.com/zylon-ai/private-gpt/commit/774e2560520dc31146561d09a2eb464c68593871))\n* **ingest:** update script label ([#1770](https://github.com/zylon-ai/private-gpt/issues/1770)) ([7d2de5c](https://github.com/zylon-ai/private-gpt/commit/7d2de5c96fd42e339b26269b3155791311ef1d08))\n* **settings:** set default tokenizer to avoid running make setup fail ([#1709](https://github.com/zylon-ai/private-gpt/issues/1709)) ([d17c34e](https://github.com/zylon-ai/private-gpt/commit/d17c34e81a84518086b93605b15032e2482377f7))\n\n## [0.4.0](https://github.com/imartinez/privateGPT/compare/v0.3.0...v0.4.0) (2024-03-06)\n\n\n### Features\n\n* Upgrade to LlamaIndex to 0.10 ([#1663](https://github.com/imartinez/privateGPT/issues/1663)) ([45f0571](https://github.com/imartinez/privateGPT/commit/45f05711eb71ffccdedb26f37e680ced55795d44))\n* **Vector:** support pgvector ([#1624](https://github.com/imartinez/privateGPT/issues/1624)) ([cd40e39](https://github.com/imartinez/privateGPT/commit/cd40e3982b780b548b9eea6438c759f1c22743a8))\n\n## [0.3.0](https://github.com/imartinez/privateGPT/compare/v0.2.0...v0.3.0) (2024-02-16)\n\n\n### Features\n\n* add mistral + chatml prompts ([#1426](https://github.com/imartinez/privateGPT/issues/1426)) ([e326126](https://github.com/imartinez/privateGPT/commit/e326126d0d4cd7e46a79f080c442c86f6dd4d24b))\n* Add stream information to generate SDKs ([#1569](https://github.com/imartinez/privateGPT/issues/1569)) ([24fae66](https://github.com/imartinez/privateGPT/commit/24fae660e6913aac6b52745fb2c2fe128ba2eb79))\n* **API:** Ingest plain text ([#1417](https://github.com/imartinez/privateGPT/issues/1417)) ([6eeb95e](https://github.com/imartinez/privateGPT/commit/6eeb95ec7f17a618aaa47f5034ee5bccae02b667))\n* **bulk-ingest:** Add --ignored Flag to Exclude Specific Files and Directories During Ingestion ([#1432](https://github.com/imartinez/privateGPT/issues/1432)) ([b178b51](https://github.com/imartinez/privateGPT/commit/b178b514519550e355baf0f4f3f6beb73dca7df2))\n* **llm:** Add openailike llm mode ([#1447](https://github.com/imartinez/privateGPT/issues/1447)) ([2d27a9f](https://github.com/imartinez/privateGPT/commit/2d27a9f956d672cb1fe715cf0acdd35c37f378a5)), closes [#1424](https://github.com/imartinez/privateGPT/issues/1424)\n* **llm:** Add support for Ollama LLM ([#1526](https://github.com/imartinez/privateGPT/issues/1526)) ([6bbec79](https://github.com/imartinez/privateGPT/commit/6bbec79583b7f28d9bea4b39c099ebef149db843))\n* **settings:** Configurable context_window and tokenizer ([#1437](https://github.com/imartinez/privateGPT/issues/1437)) ([4780540](https://github.com/imartinez/privateGPT/commit/47805408703c23f0fd5cab52338142c1886b450b))\n* **settings:** Update default model to TheBloke/Mistral-7B-Instruct-v0.2-GGUF ([#1415](https://github.com/imartinez/privateGPT/issues/1415)) ([8ec7cf4](https://github.com/imartinez/privateGPT/commit/8ec7cf49f40701a4f2156c48eb2fad9fe6220629))\n* **ui:** make chat area stretch to fill the screen ([#1397](https://github.com/imartinez/privateGPT/issues/1397)) ([c71ae7c](https://github.com/imartinez/privateGPT/commit/c71ae7cee92463bbc5ea9c434eab9f99166e1363))\n* **UI:** Select file to Query or Delete + Delete ALL ([#1612](https://github.com/imartinez/privateGPT/issues/1612)) ([aa13afd](https://github.com/imartinez/privateGPT/commit/aa13afde07122f2ddda3942f630e5cadc7e4e1ee))\n\n\n### Bug Fixes\n\n* Adding an LLM param to fix broken generator from llamacpp ([#1519](https://github.com/imartinez/privateGPT/issues/1519)) ([869233f](https://github.com/imartinez/privateGPT/commit/869233f0e4f03dc23e5fae43cf7cb55350afdee9))\n* **deploy:** fix local and external dockerfiles ([fde2b94](https://github.com/imartinez/privateGPT/commit/fde2b942bc03688701ed563be6d7d597c75e4e4e))\n* **docker:** docker broken copy ([#1419](https://github.com/imartinez/privateGPT/issues/1419)) ([059f358](https://github.com/imartinez/privateGPT/commit/059f35840adbc3fb93d847d6decf6da32d08670c))\n* **docs:** Update quickstart doc and set version in pyproject.toml to 0.2.0 ([0a89d76](https://github.com/imartinez/privateGPT/commit/0a89d76cc5ed4371ffe8068858f23dfbb5e8cc37))\n* minor bug in chat stream output - python error being serialized ([#1449](https://github.com/imartinez/privateGPT/issues/1449)) ([6191bcd](https://github.com/imartinez/privateGPT/commit/6191bcdbd6e92b6f4d5995967dc196c9348c5954))\n* **settings:** correct yaml multiline string ([#1403](https://github.com/imartinez/privateGPT/issues/1403)) ([2564f8d](https://github.com/imartinez/privateGPT/commit/2564f8d2bb8c4332a6a0ab6d722a2ac15006b85f))\n* **tests:** load the test settings only when running tests ([d3acd85](https://github.com/imartinez/privateGPT/commit/d3acd85fe34030f8cfd7daf50b30c534087bdf2b))\n* **UI:** Updated ui.py. Frees up the CPU to not be bottlenecked. ([24fb80c](https://github.com/imartinez/privateGPT/commit/24fb80ca38f21910fe4fd81505d14960e9ed4faa))\n\n## [0.2.0](https://github.com/imartinez/privateGPT/compare/v0.1.0...v0.2.0) (2023-12-10)\n\n\n### Features\n\n* **llm:** drop default_system_prompt ([#1385](https://github.com/imartinez/privateGPT/issues/1385)) ([a3ed14c](https://github.com/imartinez/privateGPT/commit/a3ed14c58f77351dbd5f8f2d7868d1642a44f017))\n* **ui:** Allows User to Set System Prompt via \"Additional Options\" in Chat Interface ([#1353](https://github.com/imartinez/privateGPT/issues/1353)) ([145f3ec](https://github.com/imartinez/privateGPT/commit/145f3ec9f41c4def5abf4065a06fb0786e2d992a))\n\n## [0.1.0](https://github.com/imartinez/privateGPT/compare/v0.0.2...v0.1.0) (2023-11-30)\n\n\n### Features\n\n* Disable Gradio Analytics ([#1165](https://github.com/imartinez/privateGPT/issues/1165)) ([6583dc8](https://github.com/imartinez/privateGPT/commit/6583dc84c082773443fc3973b1cdf8095fa3fec3))\n* Drop loguru and use builtin `logging` ([#1133](https://github.com/imartinez/privateGPT/issues/1133)) ([64c5ae2](https://github.com/imartinez/privateGPT/commit/64c5ae214a9520151c9c2d52ece535867d799367))\n* enable resume download for hf_hub_download ([#1249](https://github.com/imartinez/privateGPT/issues/1249)) ([4197ada](https://github.com/imartinez/privateGPT/commit/4197ada6267c822f32c1d7ba2be6e7ce145a3404))\n* move torch and transformers to local group ([#1172](https://github.com/imartinez/privateGPT/issues/1172)) ([0d677e1](https://github.com/imartinez/privateGPT/commit/0d677e10b970aec222ec04837d0f08f1631b6d4a))\n* Qdrant support ([#1228](https://github.com/imartinez/privateGPT/issues/1228)) ([03d1ae6](https://github.com/imartinez/privateGPT/commit/03d1ae6d70dffdd2411f0d4e92f65080fff5a6e2))\n\n\n### Bug Fixes\n\n* Docker and sagemaker setup ([#1118](https://github.com/imartinez/privateGPT/issues/1118)) ([895588b](https://github.com/imartinez/privateGPT/commit/895588b82a06c2bc71a9e22fb840c7f6442a3b5b))\n* fix pytorch version to avoid wheel bug ([#1123](https://github.com/imartinez/privateGPT/issues/1123)) ([24cfddd](https://github.com/imartinez/privateGPT/commit/24cfddd60f74aadd2dade4c63f6012a2489938a1))\n* Remove global state ([#1216](https://github.com/imartinez/privateGPT/issues/1216)) ([022bd71](https://github.com/imartinez/privateGPT/commit/022bd718e3dfc197027b1e24fb97e5525b186db4))\n* sagemaker config and chat methods ([#1142](https://github.com/imartinez/privateGPT/issues/1142)) ([a517a58](https://github.com/imartinez/privateGPT/commit/a517a588c4927aa5c5c2a93e4f82a58f0599d251))\n* typo in README.md ([#1091](https://github.com/imartinez/privateGPT/issues/1091)) ([ba23443](https://github.com/imartinez/privateGPT/commit/ba23443a70d323cd4f9a242b33fd9dce1bacd2db))\n* Windows 11 failing to auto-delete tmp file ([#1260](https://github.com/imartinez/privateGPT/issues/1260)) ([0d52002](https://github.com/imartinez/privateGPT/commit/0d520026a3d5b08a9b8487be992d3095b21e710c))\n* Windows permission error on ingest service tmp files ([#1280](https://github.com/imartinez/privateGPT/issues/1280)) ([f1cbff0](https://github.com/imartinez/privateGPT/commit/f1cbff0fb7059432d9e71473cbdd039032dab60d))\n\n## [0.0.2](https://github.com/imartinez/privateGPT/compare/v0.0.1...v0.0.2) (2023-10-20)\n\n\n### Bug Fixes\n\n* chromadb max batch size ([#1087](https://github.com/imartinez/privateGPT/issues/1087)) ([f5a9bf4](https://github.com/imartinez/privateGPT/commit/f5a9bf4e374b2d4c76438cf8a97cccf222ec8e6f))\n\n## 0.0.1 (2023-10-20)\n\n### Miscellaneous Chores\n\n* Initial version ([490d93f](https://github.com/imartinez/privateGPT/commit/490d93fdc1977443c92f6c42e57a1c585aa59430))\n"
  },
  {
    "path": "CITATION.cff",
    "content": "# This CITATION.cff file was generated with cffinit.\n# Visit https://bit.ly/cffinit to generate yours today!\n\ncff-version: 1.2.0\ntitle: PrivateGPT\nmessage: >-\n  If you use this software, please cite it using the\n  metadata from this file.\ntype: software\nauthors:\n  - name: Zylon by PrivateGPT\n    address: hello@zylon.ai\n    website: 'https://www.zylon.ai/'\nrepository-code: 'https://github.com/zylon-ai/private-gpt'\nlicense: Apache-2.0\ndate-released: '2023-05-02'\n"
  },
  {
    "path": "Dockerfile.llamacpp-cpu",
    "content": "### IMPORTANT, THIS IMAGE CAN ONLY BE RUN IN LINUX DOCKER\n### You will run into a segfault in mac\nFROM python:3.11.6-slim-bookworm AS base\n\n# Install poetry\nRUN pip install pipx\nRUN python3 -m pipx ensurepath\nRUN pipx install poetry==1.8.3\nENV PATH=\"/root/.local/bin:$PATH\"\nENV PATH=\".venv/bin/:$PATH\"\n\n# Dependencies to build llama-cpp\nRUN apt update && apt install -y \\\n  libopenblas-dev\\\n  ninja-build\\\n  build-essential\\\n  pkg-config\\\n  wget\n\n# https://python-poetry.org/docs/configuration/#virtualenvsin-project\nENV POETRY_VIRTUALENVS_IN_PROJECT=true\n\nFROM base AS dependencies\nWORKDIR /home/worker/app\nCOPY pyproject.toml poetry.lock ./\n\nARG POETRY_EXTRAS=\"ui embeddings-huggingface llms-llama-cpp vector-stores-qdrant\"\nRUN poetry install --no-root --extras \"${POETRY_EXTRAS}\"\n\nFROM base AS app\n\nENV PYTHONUNBUFFERED=1\nENV PORT=8080\nENV APP_ENV=prod\nENV PYTHONPATH=\"$PYTHONPATH:/home/worker/app/private_gpt/\"\nEXPOSE 8080\n\n# Prepare a non-root user\n# More info about how to configure UIDs and GIDs in Docker:\n# https://github.com/systemd/systemd/blob/main/docs/UIDS-GIDS.md\n\n# Define the User ID (UID) for the non-root user\n# UID 100 is chosen to avoid conflicts with existing system users\nARG UID=100\n\n# Define the Group ID (GID) for the non-root user\n# GID 65534 is often used for the 'nogroup' or 'nobody' group\nARG GID=65534\n\nRUN adduser --system --gid ${GID} --uid ${UID} --home /home/worker worker\nWORKDIR /home/worker/app\n\nRUN chown worker /home/worker/app\nRUN mkdir local_data && chown worker local_data\nRUN mkdir models && chown worker models\nCOPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv\nCOPY --chown=worker private_gpt/ private_gpt\nCOPY --chown=worker *.yaml ./\nCOPY --chown=worker scripts/ scripts\n\nUSER worker\nENTRYPOINT python -m private_gpt"
  },
  {
    "path": "Dockerfile.ollama",
    "content": "FROM python:3.11.6-slim-bookworm AS base\n\n# Install poetry\nRUN pip install pipx\nRUN python3 -m pipx ensurepath\nRUN pipx install poetry==1.8.3\nENV PATH=\"/root/.local/bin:$PATH\"\nENV PATH=\".venv/bin/:$PATH\"\n\n# https://python-poetry.org/docs/configuration/#virtualenvsin-project\nENV POETRY_VIRTUALENVS_IN_PROJECT=true\n\nFROM base AS dependencies\nWORKDIR /home/worker/app\nCOPY pyproject.toml poetry.lock ./\n\nARG POETRY_EXTRAS=\"ui vector-stores-qdrant llms-ollama embeddings-ollama\"\nRUN poetry install --no-root --extras \"${POETRY_EXTRAS}\"\n\nFROM base AS app\nENV PYTHONUNBUFFERED=1\nENV PORT=8080\nENV APP_ENV=prod\nENV PYTHONPATH=\"$PYTHONPATH:/home/worker/app/private_gpt/\"\nEXPOSE 8080\n\n# Prepare a non-root user\n# More info about how to configure UIDs and GIDs in Docker:\n# https://github.com/systemd/systemd/blob/main/docs/UIDS-GIDS.md\n\n# Define the User ID (UID) for the non-root user\n# UID 100 is chosen to avoid conflicts with existing system users\nARG UID=100\n\n# Define the Group ID (GID) for the non-root user\n# GID 65534 is often used for the 'nogroup' or 'nobody' group\nARG GID=65534\n\nRUN adduser --system --gid ${GID} --uid ${UID} --home /home/worker worker\nWORKDIR /home/worker/app\n\nRUN chown worker /home/worker/app\nRUN mkdir local_data && chown worker local_data\nRUN mkdir models && chown worker models\nCOPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv\nCOPY --chown=worker private_gpt/ private_gpt\nCOPY --chown=worker *.yaml .\nCOPY --chown=worker scripts/ scripts\n\nUSER worker\nENTRYPOINT python -m private_gpt\n"
  },
  {
    "path": "LICENSE",
    "content": "                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"[]\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright [yyyy] [name of copyright owner]\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License.\n"
  },
  {
    "path": "Makefile",
    "content": "# Any args passed to the make script, use with $(call args, default_value)\nargs = `arg=\"$(filter-out $@,$(MAKECMDGOALS))\" && echo $${arg:-${1}}`\n\n########################################################################################################################\n# Quality checks\n########################################################################################################################\n\ntest:\n\tPYTHONPATH=. poetry run pytest tests\n\ntest-coverage:\n\tPYTHONPATH=. poetry run pytest tests --cov private_gpt --cov-report term --cov-report=html --cov-report xml --junit-xml=tests-results.xml\n\nblack:\n\tpoetry run black . --check\n\nruff:\n\tpoetry run ruff check private_gpt tests\n\nformat:\n\tpoetry run black .\n\tpoetry run ruff check private_gpt tests --fix\n\nmypy:\n\tpoetry run mypy private_gpt\n\ncheck:\n\tmake format\n\tmake mypy\n\n########################################################################################################################\n# Run\n########################################################################################################################\n\nrun:\n\tpoetry run python -m private_gpt\n\ndev-windows:\n\t(set PGPT_PROFILES=local & poetry run python -m uvicorn private_gpt.main:app --reload --port 8001)\n\ndev:\n\tPYTHONUNBUFFERED=1 PGPT_PROFILES=local poetry run python -m uvicorn private_gpt.main:app --reload --port 8001\n\n########################################################################################################################\n# Misc\n########################################################################################################################\n\napi-docs:\n\tPGPT_PROFILES=mock poetry run python scripts/extract_openapi.py private_gpt.main:app --out fern/openapi/openapi.json\n\ningest:\n\t@poetry run python scripts/ingest_folder.py $(call args)\n\nstats:\n\tpoetry run python scripts/utils.py stats\n\nwipe:\n\tpoetry run python scripts/utils.py wipe\n\nsetup:\n\tpoetry run python scripts/setup\n\nlist:\n\t@echo \"Available commands:\"\n\t@echo \"  test            : Run tests using pytest\"\n\t@echo \"  test-coverage   : Run tests with coverage report\"\n\t@echo \"  black           : Check code format with black\"\n\t@echo \"  ruff            : Check code with ruff\"\n\t@echo \"  format          : Format code with black and ruff\"\n\t@echo \"  mypy            : Run mypy for type checking\"\n\t@echo \"  check           : Run format and mypy commands\"\n\t@echo \"  run             : Run the application\"\n\t@echo \"  dev-windows     : Run the application in development mode on Windows\"\n\t@echo \"  dev             : Run the application in development mode\"\n\t@echo \"  api-docs        : Generate API documentation\"\n\t@echo \"  ingest          : Ingest data using specified script\"\n\t@echo \"  wipe            : Wipe data using specified script\"\n\t@echo \"  setup           : Setup the application\"\n"
  },
  {
    "path": "README.md",
    "content": "# PrivateGPT \n\n<a href=\"https://trendshift.io/repositories/2601\" target=\"_blank\"><img src=\"https://trendshift.io/api/badge/repositories/2601\" alt=\"imartinez%2FprivateGPT | Trendshift\" style=\"width: 250px; height: 55px;\" width=\"250\" height=\"55\"/></a>\n\n[![Tests](https://github.com/zylon-ai/private-gpt/actions/workflows/tests.yml/badge.svg)](https://github.com/zylon-ai/private-gpt/actions/workflows/tests.yml?query=branch%3Amain)\n[![Website](https://img.shields.io/website?up_message=check%20it&down_message=down&url=https%3A%2F%2Fdocs.privategpt.dev%2F&label=Documentation)](https://docs.privategpt.dev/)\n[![Discord](https://img.shields.io/discord/1164200432894234644?logo=discord&label=PrivateGPT)](https://discord.gg/bK6mRVpErU)\n[![X (formerly Twitter) Follow](https://img.shields.io/twitter/follow/ZylonPrivateGPT)](https://twitter.com/ZylonPrivateGPT)\n\n![Gradio UI](/fern/docs/assets/ui.png?raw=true)\n\nPrivateGPT -built by Zylon- is a production-ready AI project that allows you to ask questions about your documents using the power\nof Large Language Models (LLMs), even in scenarios without an Internet connection. 100% private, no data leaves your\nexecution environment at any point.\n\n>[!TIP]\n> If you are looking for an **enterprise-ready, fully private AI platform for regulated industries** like financial services (banks, insurance, investment), defense, critical infrastructure services, government and healthcare,\n> check out [Zylon's website](https://zylon.ai)  or [request a demo](https://cal.com/zylon/demo?source=pgpt-readme).\n> **Zylon** is an enterprise AI platform delivering private generative AI and on-premise AI software for regulated industries, enabling secure deployment inside enterprise infrastructure without external cloud dependencies.\n\nThe project provides an API offering all the primitives required to build private, context-aware AI applications.\nIt follows and extends the [OpenAI API standard](https://openai.com/blog/openai-api),\nand supports both normal and streaming responses.\n\nThe API is divided into two logical blocks:\n\n**High-level API**, which abstracts all the complexity of a RAG (Retrieval Augmented Generation)\npipeline implementation:\n- Ingestion of documents: internally managing document parsing,\nsplitting, metadata extraction, embedding generation and storage.\n- Chat & Completions using context from ingested documents:\nabstracting the retrieval of context, the prompt engineering and the response generation.\n\n**Low-level API**, which allows advanced users to implement their own complex pipelines:\n- Embeddings generation: based on a piece of text.\n- Contextual chunks retrieval: given a query, returns the most relevant chunks of text from the ingested documents.\n\nIn addition to this, a working [Gradio UI](https://www.gradio.app/)\nclient is provided to test the API, together with a set of useful tools such as bulk model\ndownload script, ingestion script, documents folder watch, etc.\n\n## 🎞️ Overview\n>[!WARNING]\n>  This README is not updated as frequently as the [documentation](https://docs.privategpt.dev/).\n>  Please check it out for the latest updates!\n\n### Motivation behind PrivateGPT\nGenerative AI is a game changer for our society, but adoption in companies of all sizes and data-sensitive\ndomains like healthcare or legal is limited by a clear concern: **privacy**.\nNot being able to ensure that your data is fully under your control when using third-party AI tools\nis a risk those industries cannot take.\n\n### Primordial version\nThe first version of PrivateGPT was launched in May 2023 as a novel approach to address the privacy\nconcerns by using LLMs in a complete offline way.\n\nThat version, which rapidly became a go-to project for privacy-sensitive setups and served as the seed\nfor thousands of local-focused generative AI projects, was the foundation of what PrivateGPT is becoming nowadays;\nthus a simpler and more educational implementation to understand the basic concepts required\nto build a fully local -and therefore, private- chatGPT-like tool.\n\nIf you want to keep experimenting with it, we have saved it in the\n[primordial branch](https://github.com/zylon-ai/private-gpt/tree/primordial) of the project.\n\n> It is strongly recommended to do a clean clone and install of this new version of\nPrivateGPT if you come from the previous, primordial version.\n\n### Present and Future of PrivateGPT\nPrivateGPT is now evolving towards becoming a gateway to generative AI models and primitives, including\ncompletions, document ingestion, RAG pipelines and other low-level building blocks.\nWe want to make it easier for any developer to build AI applications and experiences, as well as provide\na suitable extensive architecture for the community to keep contributing.\n\nStay tuned to our [releases](https://github.com/zylon-ai/private-gpt/releases) to check out all the new features and changes included.\n\n## 📄 Documentation\nFull documentation on installation, dependencies, configuration, running the server, deployment options,\ningesting local documents, API details and UI features can be found here: https://docs.privategpt.dev/\n\n## 🧩 Architecture\nConceptually, PrivateGPT is an API that wraps a RAG pipeline and exposes its\nprimitives.\n* The API is built using [FastAPI](https://fastapi.tiangolo.com/) and follows\n  [OpenAI's API scheme](https://platform.openai.com/docs/api-reference).\n* The RAG pipeline is based on [LlamaIndex](https://www.llamaindex.ai/).\n\nThe design of PrivateGPT allows to easily extend and adapt both the API and the\nRAG implementation. Some key architectural decisions are:\n* Dependency Injection, decoupling the different components and layers.\n* Usage of LlamaIndex abstractions such as `LLM`, `BaseEmbedding` or `VectorStore`,\n  making it immediate to change the actual implementations of those abstractions.\n* Simplicity, adding as few layers and new abstractions as possible.\n* Ready to use, providing a full implementation of the API and RAG\n  pipeline.\n\nMain building blocks:\n* APIs are defined in `private_gpt:server:<api>`. Each package contains an\n  `<api>_router.py` (FastAPI layer) and an `<api>_service.py` (the\n  service implementation). Each *Service* uses LlamaIndex base abstractions instead\n  of specific implementations,\n  decoupling the actual implementation from its usage.\n* Components are placed in\n  `private_gpt:components:<component>`. Each *Component* is in charge of providing\n  actual implementations to the base abstractions used in the Services - for example\n  `LLMComponent` is in charge of providing an actual implementation of an `LLM`\n  (for example `LlamaCPP` or `OpenAI`).\n\n## 💡 Contributing\nContributions are welcomed! To ensure code quality we have enabled several format and\ntyping checks, just run `make check` before committing to make sure your code is ok.\nRemember to test your code! You'll find a tests folder with helpers, and you can run\ntests using `make test` command.\n\nDon't know what to contribute? Here is the public \n[Project Board](https://github.com/users/imartinez/projects/3) with several ideas. \n\nHead over to Discord \n#contributors channel and ask for write permissions on that GitHub project.\n\n## 💬 Community\nJoin the conversation around PrivateGPT on our:\n- [Twitter (aka X)](https://twitter.com/PrivateGPT_AI)\n- [Discord](https://discord.gg/bK6mRVpErU)\n\n## 📖 Citation\nIf you use PrivateGPT in a paper, check out the [Citation file](CITATION.cff) for the correct citation.  \nYou can also use the \"Cite this repository\" button in this repo to get the citation in different formats.\n\nHere are a couple of examples:\n\n#### BibTeX\n```bibtex\n@software{Zylon_PrivateGPT_2023,\nauthor = {Zylon by PrivateGPT},\nlicense = {Apache-2.0},\nmonth = may,\ntitle = {{PrivateGPT}},\nurl = {https://github.com/zylon-ai/private-gpt},\nyear = {2023}\n}\n```\n\n#### APA\n```\nZylon by PrivateGPT (2023). PrivateGPT [Computer software]. https://github.com/zylon-ai/private-gpt\n```\n\n## 🤗 Partners & Supporters\nPrivateGPT is actively supported by the teams behind:\n* [Qdrant](https://qdrant.tech/), providing the default vector database\n* [Fern](https://buildwithfern.com/), providing Documentation and SDKs\n* [LlamaIndex](https://www.llamaindex.ai/), providing the base RAG framework and abstractions\n\nThis project has been strongly influenced and supported by other amazing projects like \n[LangChain](https://github.com/hwchase17/langchain),\n[GPT4All](https://github.com/nomic-ai/gpt4all),\n[LlamaCpp](https://github.com/ggerganov/llama.cpp),\n[Chroma](https://www.trychroma.com/)\nand [SentenceTransformers](https://www.sbert.net/).\n"
  },
  {
    "path": "docker-compose.yaml",
    "content": "services:\n\n  #-----------------------------------\n  #---- Private-GPT services ---------\n  #-----------------------------------\n\n  # Private-GPT service for the Ollama CPU and GPU modes\n  # This service builds from an external Dockerfile and runs the Ollama mode.\n  private-gpt-ollama:\n    image: ${PGPT_IMAGE:-zylonai/private-gpt}:${PGPT_TAG:-0.6.2}-ollama  # x-release-please-version\n    user: root\n    build:\n      context: .\n      dockerfile: Dockerfile.ollama\n    volumes:\n      - ./local_data:/home/worker/app/local_data\n    ports:\n      - \"8001:8001\"\n    environment:\n      PORT: 8001\n      PGPT_PROFILES: docker\n      PGPT_MODE: ollama\n      PGPT_EMBED_MODE: ollama\n      PGPT_OLLAMA_API_BASE: http://ollama:11434\n      HF_TOKEN: ${HF_TOKEN:-}\n    profiles:\n      - \"\"\n      - ollama-cpu\n      - ollama-cuda\n      - ollama-api\n    depends_on:\n      ollama:\n        condition: service_healthy\n\n  # Private-GPT service for the local mode\n  # This service builds from a local Dockerfile and runs the application in local mode.\n  private-gpt-llamacpp-cpu:\n    image: ${PGPT_IMAGE:-zylonai/private-gpt}:${PGPT_TAG:-0.6.2}-llamacpp-cpu # x-release-please-version\n    user: root\n    build:\n      context: .\n      dockerfile: Dockerfile.llamacpp-cpu\n    volumes:\n      - ./local_data/:/home/worker/app/local_data\n      - ./models/:/home/worker/app/models\n    entrypoint: sh -c \".venv/bin/python scripts/setup && .venv/bin/python -m private_gpt\"\n    ports:\n      - \"8001:8001\"\n    environment:\n      PORT: 8001\n      PGPT_PROFILES: local\n      HF_TOKEN: ${HF_TOKEN:-}\n    profiles:\n      - llamacpp-cpu\n\n  #-----------------------------------\n  #---- Ollama services --------------\n  #-----------------------------------\n\n  # Traefik reverse proxy for the Ollama service\n  # This will route requests to the Ollama service based on the profile.\n  ollama:\n    image: traefik:v2.10\n    healthcheck:\n      test: [\"CMD\", \"sh\", \"-c\", \"wget -q --spider http://ollama:11434 || exit 1\"]\n      interval: 10s\n      retries: 3\n      start_period: 5s\n      timeout: 5s\n    ports:\n      - \"8080:8080\"\n    command:\n      - \"--providers.file.filename=/etc/router.yml\"\n      - \"--log.level=ERROR\"\n      - \"--api.insecure=true\"\n      - \"--providers.docker=true\"\n      - \"--providers.docker.exposedbydefault=false\"\n      - \"--entrypoints.web.address=:11434\"\n    volumes:\n      - /var/run/docker.sock:/var/run/docker.sock:ro\n      - ./.docker/router.yml:/etc/router.yml:ro\n    extra_hosts:\n      - \"host.docker.internal:host-gateway\"\n    profiles:\n      - \"\"\n      - ollama-cpu\n      - ollama-cuda\n      - ollama-api\n\n  # Ollama service for the CPU mode\n  ollama-cpu:\n    image: ollama/ollama:latest\n    ports:\n      - \"11434:11434\"\n    volumes:\n      - ./models:/root/.ollama\n    profiles:\n      - \"\"\n      - ollama-cpu\n\n  # Ollama service for the CUDA mode\n  ollama-cuda:\n    image: ollama/ollama:latest\n    ports:\n      - \"11434:11434\"\n    volumes:\n      - ./models:/root/.ollama\n    deploy:\n      resources:\n        reservations:\n          devices:\n            - driver: nvidia\n              count: 1\n              capabilities: [gpu]\n    profiles:\n      - ollama-cuda\n"
  },
  {
    "path": "fern/README.md",
    "content": "# Documentation of PrivateGPT\n\nThe documentation of this project is being rendered thanks to [fern](https://github.com/fern-api/fern).\n\nFern is basically transforming your `.md` and `.mdx` files into a static website: your documentation.\n\nThe configuration of your documentation is done in the `./docs.yml` file.\nThere, you can configure the navbar, tabs, sections and pages being rendered.\n\nThe documentation of fern (and the syntax of its configuration `docs.yml`) is \navailable there [docs.buildwithfern.com](https://docs.buildwithfern.com/).\n\n## How to run fern\n\n**You cannot render your documentation locally without fern credentials.**\n\nTo see how your documentation looks like, you **have to** use the CICD of this\nrepository (by opening a PR, CICD job will be executed, and a preview of \nyour PR's documentation will be deployed in vercel automatically, through fern).\n\nThe only thing you can do locally, is to run `fern check`, which check the syntax of\nyour `docs.yml` file.\n\n## How to add a new page\nAdd in the `docs.yml` a new `page`, with the following syntax:\n\n```yml\nnavigation:\n  # ...\n  - tab: my-existing-tab\n    layout:\n      # ...\n      - section: My Existing Section\n        contents:\n          # ...\n          - page: My new page display name\n            # The path of the page, relative to `fern/`\n            path: ./docs/pages/my-existing-tab/new-page-content.mdx\n```"
  },
  {
    "path": "fern/docs/pages/api-reference/api-reference.mdx",
    "content": "# API Reference\n\nThe API is divided in two logical blocks:\n\n1. High-level API, abstracting all the complexity of a RAG (Retrieval Augmented Generation) pipeline implementation:\n    - Ingestion of documents: internally managing document parsing, splitting, metadata extraction,\n      embedding generation and storage.\n    - Chat & Completions using context from ingested documents: abstracting the retrieval of context, the prompt\n      engineering and the response generation.\n\n2. Low-level API, allowing advanced users to implement their own complex pipelines:\n    - Embeddings generation: based on a piece of text.\n    - Contextual chunks retrieval: given a query, returns the most relevant chunks of text from the ingested\n      documents."
  },
  {
    "path": "fern/docs/pages/api-reference/sdks.mdx",
    "content": "We use [Fern](www.buildwithfern.com) to offer API clients for Node.js, Python, Go, and Java.\nWe recommend using these clients to interact with our endpoints.\nThe clients are kept up to date automatically, so we encourage you to use the latest version.\n\n## SDKs\n\n*Coming soon!*\n\n<Cards>\n  <Card\n    title=\"TypeScript\"\n    icon=\"fa-brands fa-node\"\n    href=\"https://github.com/zylon-ai/privategpt-ts\"\n  />\n  <Card\n    title=\"Python\"\n    icon=\"fa-brands fa-python\"\n    href=\"https://github.com/zylon-ai/pgpt-python\"\n  />\n  <br />\n</Cards>\n\n<br />\n\n<Cards>\n  <Card\n    title=\"Java - WIP\"\n    icon=\"fa-brands fa-java\"\n    href=\"https://github.com/zylon-ai/private-gpt-java\"\n  />\n  <Card\n    title=\"Go - WIP\"\n    icon=\"fa-brands fa-golang\"\n    href=\"https://github.com/zylon-ai/private-gpt-go\"\n  />\n</Cards>\n\n<br />\n"
  },
  {
    "path": "fern/docs/pages/installation/concepts.mdx",
    "content": "PrivateGPT is a service that wraps a set of AI RAG primitives in a comprehensive set of APIs providing a private, secure, customizable and easy to use GenAI development framework.\n\nIt uses FastAPI and LLamaIndex as its core frameworks. Those can be customized by changing the codebase itself.\n\nIt supports a variety of LLM providers, embeddings providers, and vector stores, both local and remote. Those can be easily changed without changing the codebase.\n\n# Different Setups support\n\n## Setup configurations available\nYou get to decide the setup for these 3 main components:\n- **LLM**: the large language model provider used for inference. It can be local, or remote, or even OpenAI.\n- **Embeddings**: the embeddings provider used to encode the input, the documents and the users' queries. Same as the LLM, it can be local, or remote, or even OpenAI.\n- **Vector store**: the store used to index and retrieve the documents.\n\nThere is an extra component that can be enabled or disabled: the UI. It is a Gradio UI that allows to interact with the API in a more user-friendly way.\n\n<Callout intent = \"warning\">\nA working **Gradio UI client** is provided to test the API, together with a set of useful tools such as bulk\nmodel download script, ingestion script, documents folder watch, etc. Please refer to the [UI alternatives](/manual/user-interface/alternatives) page for more UI alternatives.\n</Callout>\n\n### Setups and Dependencies\nYour setup will be the combination of the different options available. You'll find recommended setups in the [installation](./installation) section.\nPrivateGPT uses poetry to manage its dependencies. You can install the dependencies for the different setups by running `poetry install --extras \"<extra1> <extra2>...\"`.\nExtras are the different options available for each component. For example, to install the dependencies for a a local setup with UI and qdrant as vector database, Ollama as LLM and local embeddings, you would run:\n\n```bash\npoetry install --extras \"ui vector-stores-qdrant llms-ollama embeddings-ollama\"\n```\n\nRefer to the [installation](./installation) section for more details.\n\n### Setups and Configuration\nPrivateGPT uses yaml to define its configuration in files named `settings-<profile>.yaml`.\nDifferent configuration files can be created in the root directory of the project.\nPrivateGPT will load the configuration at startup from the profile specified in the `PGPT_PROFILES` environment variable.\nFor example, running:\n```bash\nPGPT_PROFILES=ollama make run\n```\nwill load the configuration from `settings.yaml` and `settings-ollama.yaml`.\n- `settings.yaml` is always loaded and contains the default configuration.\n- `settings-ollama.yaml` is loaded if the `ollama` profile is specified in the `PGPT_PROFILES` environment variable. It can override configuration from the default `settings.yaml`\n\n## About Fully Local Setups\nIn order to run PrivateGPT in a fully local setup, you will need to run the LLM, Embeddings and Vector Store locally.\n\n### LLM\nFor local LLM there are two options:\n* (Recommended) You can use the 'ollama' option in PrivateGPT, which will connect to your local Ollama instance. Ollama simplifies a lot the installation of local LLMs.\n* You can use the 'llms-llama-cpp' option in PrivateGPT, which will use LlamaCPP. It works great on Mac with Metal most of the times (leverages Metal GPU), but it can be tricky in certain Linux and Windows distributions, depending on the GPU. In the installation document you'll find guides and troubleshooting.\n\nIn order for LlamaCPP powered LLM to work (the second option), you need to download the LLM model to the `models` folder. You can do so by running the `setup` script:\n```bash\npoetry run python scripts/setup\n```\n### Embeddings\nFor local Embeddings there are two options:\n* (Recommended) You can use the 'ollama' option in PrivateGPT, which will connect to your local Ollama instance. Ollama simplifies a lot the installation of local LLMs.\n* You can use the 'embeddings-huggingface' option in PrivateGPT, which will use HuggingFace.\n\nIn order for HuggingFace LLM to work (the second option), you need to download the embeddings model to the `models` folder. You can do so by running the `setup` script:\n```bash\npoetry run python scripts/setup\n```\n### Vector stores\nThe vector stores supported (Qdrant, Milvus, ChromaDB and Postgres) run locally by default."
  },
  {
    "path": "fern/docs/pages/installation/installation.mdx",
    "content": "It is important that you review the [Main Concepts](../concepts) section to understand the different components of PrivateGPT and how they interact with each other.\n\n## Base requirements to run PrivateGPT\n\n### 1. Clone the PrivateGPT Repository\nClone the repository and navigate to it:\n```bash\ngit clone https://github.com/zylon-ai/private-gpt\ncd private-gpt\n```\n\n### 2. Install Python 3.11\nIf you do not have Python 3.11 installed, install it using a Python version manager like `pyenv`. Earlier Python versions are not supported.\n#### macOS/Linux\nInstall and set Python 3.11 using [pyenv](https://github.com/pyenv/pyenv):\n```bash\npyenv install 3.11\npyenv local 3.11\n```\n#### Windows\nInstall and set Python 3.11 using [pyenv-win](https://github.com/pyenv-win/pyenv-win):\n```bash\npyenv install 3.11\npyenv local 3.11\n```\n\n### 3. Install `Poetry`\nInstall [Poetry](https://python-poetry.org/docs/#installing-with-the-official-installer) for dependency management:\nFollow the instructions on the official Poetry website to install it.\n\n<Callout intent=\"warning\">\nA bug exists in Poetry versions 1.7.0 and earlier. We strongly recommend upgrading to a tested version.\nTo upgrade Poetry to latest tested version, run `poetry self update 1.8.3` after installing it.\n</Callout>\n\n### 4. Optional: Install `make`\nTo run various scripts, you need to install `make`. Follow the instructions for your operating system:\n#### macOS\n(Using Homebrew):\n```bash\nbrew install make\n```\n#### Windows\n(Using Chocolatey):\n```bash\nchoco install make\n```\n\n## Install and Run Your Desired Setup\n\nPrivateGPT allows customization of the setup, from fully local to cloud-based, by deciding the modules to use. To install only the required dependencies, PrivateGPT offers different `extras` that can be combined during the installation process:\n\n```bash\npoetry install --extras \"<extra1> <extra2>...\"\n```\nWhere `<extra>` can be any of the following options described below.\n\n### Available Modules\n\nYou need to choose one option per category (LLM, Embeddings, Vector Stores, UI). Below are the tables listing the available options for each category.\n\n#### LLM\n\n| **Option**   | **Description**                                                        | **Extra**           |\n|--------------|------------------------------------------------------------------------|---------------------|\n| **ollama**   | Adds support for Ollama LLM, requires Ollama running locally           | llms-ollama         |\n| llama-cpp    | Adds support for local LLM using LlamaCPP                              | llms-llama-cpp      |\n| sagemaker    | Adds support for Amazon Sagemaker LLM, requires Sagemaker endpoints    | llms-sagemaker      |\n| openai       | Adds support for OpenAI LLM, requires OpenAI API key                   | llms-openai         |\n| openailike   | Adds support for 3rd party LLM providers compatible with OpenAI's API  | llms-openai-like    |\n| azopenai     | Adds support for Azure OpenAI LLM, requires Azure endpoints            | llms-azopenai       |\n| gemini       | Adds support for Gemini LLM, requires Gemini API key                   | llms-gemini         |\n\n#### Embeddings\n\n| **Option**       | **Description**                                                                | **Extra**               |\n|------------------|--------------------------------------------------------------------------------|-------------------------|\n| **ollama**       | Adds support for Ollama Embeddings, requires Ollama running locally            | embeddings-ollama       |\n| huggingface      | Adds support for local Embeddings using HuggingFace                            | embeddings-huggingface  |\n| openai           | Adds support for OpenAI Embeddings, requires OpenAI API key                    | embeddings-openai       |\n| sagemaker        | Adds support for Amazon Sagemaker Embeddings, requires Sagemaker endpoints     | embeddings-sagemaker    |\n| azopenai         | Adds support for Azure OpenAI Embeddings, requires Azure endpoints             | embeddings-azopenai     |\n| gemini           | Adds support for Gemini Embeddings, requires Gemini API key                    | embeddings-gemini       |\n\n#### Vector Stores\n\n| **Option**       | **Description**                         | **Extra**               |\n|------------------|-----------------------------------------|-------------------------|\n| **qdrant**       | Adds support for Qdrant vector store    | vector-stores-qdrant    |\n| milvus           | Adds support for Milvus vector store    | vector-stores-milvus    |\n| chroma           | Adds support for Chroma DB vector store | vector-stores-chroma    |\n| postgres         | Adds support for Postgres vector store  | vector-stores-postgres  |\n| clickhouse       | Adds support for Clickhouse vector store| vector-stores-clickhouse|\n\n#### UI\n\n| **Option**   | **Description**                          | **Extra** |\n|--------------|------------------------------------------|-----------|\n| Gradio       | Adds support for UI using Gradio         | ui        |\n\n<Callout intent = \"warning\">\nA working **Gradio UI client** is provided to test the API, together with a set of useful tools such as bulk\nmodel download script, ingestion script, documents folder watch, etc. Please refer to the [UI alternatives](/manual/user-interface/alternatives) page for more UI alternatives.\n</Callout>\n\n## Recommended Setups\n\nThere are just some examples of recommended setups. You can mix and match the different options to fit your needs.\nYou'll find more information in the Manual section of the documentation.\n\n> **Important for Windows**: In the examples below or how to run PrivateGPT with `make run`, `PGPT_PROFILES` env var is being set inline following Unix command line syntax (works on MacOS and Linux).\nIf you are using Windows, you'll need to set the env var in a different way, for example:\n\n```powershell\n# Powershell\n$env:PGPT_PROFILES=\"ollama\"\nmake run\n```\n\nor\n\n```cmd\n# CMD\nset PGPT_PROFILES=ollama\nmake run\n```\n\nRefer to the [troubleshooting](./troubleshooting) section for specific issues you might encounter.\n\n### Local, Ollama-powered setup - RECOMMENDED\n\n**The easiest way to run PrivateGPT fully locally** is to depend on Ollama for the LLM. Ollama provides local LLM and Embeddings super easy to install and use, abstracting the complexity of GPU support. It's the recommended setup for local development.\n\nGo to [ollama.ai](https://ollama.ai/) and follow the instructions to install Ollama on your machine.\n\nAfter the installation, make sure the Ollama desktop app is closed.\n\nNow, start Ollama service (it will start a local inference server, serving both the LLM and the Embeddings):\n```bash\nollama serve\n```\n\nInstall the models to be used, the default settings-ollama.yaml is configured to user llama3.1 8b LLM (~4GB) and nomic-embed-text Embeddings (~275MB)\n\nBy default, PGPT will automatically pull models as needed. This behavior can be changed by modifying the `ollama.autopull_models` property.\n\nIn any case, if you want to manually pull models, run the following commands:\n\n```bash\nollama pull llama3.1\nollama pull nomic-embed-text\n```\n\nOnce done, on a different terminal, you can install PrivateGPT with the following command:\n```bash\npoetry install --extras \"ui llms-ollama embeddings-ollama vector-stores-qdrant\"\n```\n\nOnce installed, you can run PrivateGPT. Make sure you have a working Ollama running locally before running the following command.\n\n```bash\nPGPT_PROFILES=ollama make run\n```\n\nPrivateGPT will use the already existing `settings-ollama.yaml` settings file, which is already configured to use Ollama LLM and Embeddings, and Qdrant. Review it and adapt it to your needs (different models, different Ollama port, etc.)\n\nThe UI will be available at http://localhost:8001\n\n### Private, Sagemaker-powered setup\n\nIf you need more performance, you can run a version of PrivateGPT that relies on powerful AWS Sagemaker machines to serve the LLM and Embeddings.\n\nYou need to have access to sagemaker inference endpoints for the LLM and / or the embeddings, and have AWS credentials properly configured.\n\nEdit the `settings-sagemaker.yaml` file to include the correct Sagemaker endpoints.\n\nThen, install PrivateGPT with the following command:\n```bash\npoetry install --extras \"ui llms-sagemaker embeddings-sagemaker vector-stores-qdrant\"\n```\n\nOnce installed, you can run PrivateGPT. Make sure you have a working Ollama running locally before running the following command.\n\n```bash\nPGPT_PROFILES=sagemaker make run\n```\n\nPrivateGPT will use the already existing `settings-sagemaker.yaml` settings file, which is already configured to use Sagemaker LLM and Embeddings endpoints, and Qdrant.\n\nThe UI will be available at http://localhost:8001\n\n### Non-Private, OpenAI-powered test setup\n\nIf you want to test PrivateGPT with OpenAI's LLM and Embeddings -taking into account your data is going to OpenAI!- you can run the following command:\n\nYou need an OPENAI API key to run this setup.\n\nEdit the `settings-openai.yaml` file to include the correct API KEY. Never commit it! It's a secret! As an alternative to editing `settings-openai.yaml`, you can just set the env var OPENAI_API_KEY.\n\nThen, install PrivateGPT with the following command:\n```bash\npoetry install --extras \"ui llms-openai embeddings-openai vector-stores-qdrant\"\n```\n\nOnce installed, you can run PrivateGPT.\n\n```bash\nPGPT_PROFILES=openai make run\n```\n\nPrivateGPT will use the already existing `settings-openai.yaml` settings file, which is already configured to use OpenAI LLM and Embeddings endpoints, and Qdrant.\n\nThe UI will be available at http://localhost:8001\n\n### Non-Private, Azure OpenAI-powered test setup\n\nIf you want to test PrivateGPT with Azure OpenAI's LLM and Embeddings -taking into account your data is going to Azure OpenAI!- you can run the following command:\n\nYou need to have access to Azure OpenAI inference endpoints for the LLM and / or the embeddings, and have Azure OpenAI credentials properly configured.\n\nEdit the `settings-azopenai.yaml` file to include the correct Azure OpenAI endpoints.\n\nThen, install PrivateGPT with the following command:\n```bash\npoetry install --extras \"ui llms-azopenai embeddings-azopenai vector-stores-qdrant\"\n```\n\nOnce installed, you can run PrivateGPT.\n\n```bash\nPGPT_PROFILES=azopenai make run\n```\n\nPrivateGPT will use the already existing `settings-azopenai.yaml` settings file, which is already configured to use Azure OpenAI LLM and Embeddings endpoints, and Qdrant.\n\nThe UI will be available at http://localhost:8001\n\n### Local, Llama-CPP powered setup\n\nIf you want to run PrivateGPT fully locally without relying on Ollama, you can run the following command:\n\n```bash\npoetry install --extras \"ui llms-llama-cpp embeddings-huggingface vector-stores-qdrant\"\n```\n\nIn order for local LLM and embeddings to work, you need to download the models to the `models` folder. You can do so by running the `setup` script:\n```bash\npoetry run python scripts/setup\n```\n\nOnce installed, you can run PrivateGPT with the following command:\n\n```bash\nPGPT_PROFILES=local make run\n```\n\nPrivateGPT will load the already existing `settings-local.yaml` file, which is already configured to use LlamaCPP LLM, HuggingFace embeddings and Qdrant.\n\nThe UI will be available at http://localhost:8001\n\n#### Llama-CPP support\n\nFor PrivateGPT to run fully locally without Ollama, Llama.cpp is required and in\nparticular [llama-cpp-python](https://github.com/abetlen/llama-cpp-python)\nis used.\n\nYou'll need to have a valid C++ compiler like gcc installed. See [Troubleshooting: C++ Compiler](#troubleshooting-c-compiler) for more details.\n\n> It's highly encouraged that you fully read llama-cpp and llama-cpp-python documentation relevant to your platform.\n> Running into installation issues is very likely, and you'll need to troubleshoot them yourself.\n\n##### Llama-CPP OSX GPU support\n\nYou will need to build [llama.cpp](https://github.com/ggerganov/llama.cpp) with metal support.\n\nTo do that, you need to install `llama.cpp` python's binding `llama-cpp-python` through pip, with the compilation flag\nthat activate `METAL`: you have to pass `-DLLAMA_METAL=on` to the CMake command tha `pip` runs for you (see below).\n\nIn other words, one should simply run:\n```bash\nCMAKE_ARGS=\"-DLLAMA_METAL=on\" pip install --force-reinstall --no-cache-dir llama-cpp-python\n```\n\nThe above command will force the re-installation of `llama-cpp-python` with `METAL` support by compiling\n`llama.cpp` locally with your `METAL` libraries (shipped by default with your macOS).\n\nMore information is available in the documentation of the libraries themselves:\n* [llama-cpp-python](https://github.com/abetlen/llama-cpp-python#installation-with-hardware-acceleration)\n* [llama-cpp-python's documentation](https://llama-cpp-python.readthedocs.io/en/latest/#installation-with-hardware-acceleration)\n* [llama.cpp](https://github.com/ggerganov/llama.cpp#build)\n\n##### Llama-CPP Windows NVIDIA GPU support\n\nWindows GPU support is done through CUDA.\nFollow the instructions on the original [llama.cpp](https://github.com/ggerganov/llama.cpp) repo to install the required\ndependencies.\n\nSome tips to get it working with an NVIDIA card and CUDA (Tested on Windows 10 with CUDA 11.5 RTX 3070):\n\n* Install latest VS2022 (and build tools) https://visualstudio.microsoft.com/vs/community/\n* Install CUDA toolkit https://developer.nvidia.com/cuda-downloads\n* Verify your installation is correct by running `nvcc --version` and `nvidia-smi`, ensure your CUDA version is up to\n  date and your GPU is detected.\n* [Optional] Install CMake to troubleshoot building issues by compiling llama.cpp directly https://cmake.org/download/\n\nIf you have all required dependencies properly configured running the\nfollowing powershell command should succeed.\n\n```powershell\n$env:CMAKE_ARGS='-DLLAMA_CUBLAS=on'; poetry run pip install --force-reinstall --no-cache-dir llama-cpp-python numpy==1.26.0\n```\n\nIf your installation was correct, you should see a message similar to the following next\ntime you start the server `BLAS = 1`. If there is some issue, please refer to the\n[troubleshooting](/installation/getting-started/troubleshooting#building-llama-cpp-with-nvidia-gpu-support) section.\n\n```console\nllama_new_context_with_model: total VRAM used: 4857.93 MB (model: 4095.05 MB, context: 762.87 MB)\nAVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 |\n```\n\nNote that llama.cpp offloads matrix calculations to the GPU but the performance is\nstill hit heavily due to latency between CPU and GPU communication. You might need to tweak\nbatch sizes and other parameters to get the best performance for your particular system.\n\n##### Llama-CPP Linux NVIDIA GPU support and Windows-WSL\n\nLinux GPU support is done through CUDA.\nFollow the instructions on the original [llama.cpp](https://github.com/ggerganov/llama.cpp) repo to install the required\nexternal\ndependencies.\n\nSome tips:\n\n* Make sure you have an up-to-date C++ compiler\n* Install CUDA toolkit https://developer.nvidia.com/cuda-downloads\n* Verify your installation is correct by running `nvcc --version` and `nvidia-smi`, ensure your CUDA version is up to\n  date and your GPU is detected.\n\nAfter that running the following command in the repository will install llama.cpp with GPU support:\n\n```bash\nCMAKE_ARGS='-DLLAMA_CUBLAS=on' poetry run pip install --force-reinstall --no-cache-dir llama-cpp-python numpy==1.26.0\n```\n\nIf your installation was correct, you should see a message similar to the following next\ntime you start the server `BLAS = 1`. If there is some issue, please refer to the\n[troubleshooting](/installation/getting-started/troubleshooting#building-llama-cpp-with-nvidia-gpu-support) section.\n\n```\nllama_new_context_with_model: total VRAM used: 4857.93 MB (model: 4095.05 MB, context: 762.87 MB)\nAVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 |\n```\n\n##### Llama-CPP Linux AMD GPU support\n\nLinux GPU support is done through ROCm.\nSome tips:\n* Install ROCm from [quick-start install guide](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/tutorial/quick-start.html)\n* [Install PyTorch for ROCm](https://rocm.docs.amd.com/projects/radeon/en/latest/docs/install/install-pytorch.html)\n```bash\nwget https://repo.radeon.com/rocm/manylinux/rocm-rel-6.0/torch-2.1.1%2Brocm6.0-cp311-cp311-linux_x86_64.whl\npoetry run pip install --force-reinstall --no-cache-dir torch-2.1.1+rocm6.0-cp311-cp311-linux_x86_64.whl\n```\n* Install bitsandbytes for ROCm\n```bash\nPYTORCH_ROCM_ARCH=gfx900,gfx906,gfx908,gfx90a,gfx1030,gfx1100,gfx1101,gfx940,gfx941,gfx942\nBITSANDBYTES_VERSION=62353b0200b8557026c176e74ac48b84b953a854\ngit clone https://github.com/arlo-phoenix/bitsandbytes-rocm-5.6\ncd bitsandbytes-rocm-5.6\ngit checkout ${BITSANDBYTES_VERSION}\nmake hip ROCM_TARGET=${PYTORCH_ROCM_ARCH} ROCM_HOME=/opt/rocm/\npip install . --extra-index-url https://download.pytorch.org/whl/nightly\n```\n\nAfter that running the following command in the repository will install llama.cpp with GPU support:\n```bash\nLLAMA_CPP_PYTHON_VERSION=0.2.56\nDAMDGPU_TARGETS=gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx940;gfx941;gfx942\nCMAKE_ARGS=\"-DLLAMA_HIPBLAS=ON -DCMAKE_C_COMPILER=/opt/rocm/llvm/bin/clang -DCMAKE_CXX_COMPILER=/opt/rocm/llvm/bin/clang++ -DAMDGPU_TARGETS=${DAMDGPU_TARGETS}\" poetry run pip install --force-reinstall --no-cache-dir llama-cpp-python==${LLAMA_CPP_PYTHON_VERSION}\n```\n\nIf your installation was correct, you should see a message similar to the following next time you start the server `BLAS = 1`.\n\n```\nAVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | MATMUL_INT8 = 0 |\n```\n\n##### Llama-CPP Known issues and Troubleshooting\n\nExecution of LLMs locally still has a lot of sharp edges, specially when running on non Linux platforms.\nYou might encounter several issues:\n\n* Performance: RAM or VRAM usage is very high, your computer might experience slowdowns or even crashes.\n* GPU Virtualization on Windows and OSX: Simply not possible with docker desktop, you have to run the server directly on\n  the host.\n* Building errors: Some of PrivateGPT dependencies need to build native code, and they might fail on some platforms.\n  Most likely you are missing some dev tools in your machine (updated C++ compiler, CUDA is not on PATH, etc.).\n  If you encounter any of these issues, please open an issue and we'll try to help.\n\nOne of the first reflex to adopt is: get more information.\nIf, during your installation, something does not go as planned, retry in *verbose* mode, and see what goes wrong.\n\nFor example, when installing packages with `pip install`, you can add the option `-vvv` to show the details of the installation.\n\n##### Llama-CPP Troubleshooting: C++ Compiler\n\nIf you encounter an error while building a wheel during the `pip install` process, you may need to install a C++\ncompiler on your computer.\n\n**For Windows 10/11**\n\nTo install a C++ compiler on Windows 10/11, follow these steps:\n\n1. Install Visual Studio 2022.\n2. Make sure the following components are selected:\n    * Universal Windows Platform development\n    * C++ CMake tools for Windows\n3. Download the MinGW installer from the [MinGW website](https://sourceforge.net/projects/mingw/).\n4. Run the installer and select the `gcc` component.\n\n**For OSX**\n\n1. Check if you have a C++ compiler installed, `Xcode` should have done it for you. To install Xcode, go to the App\n   Store and search for Xcode and install it. **Or** you can install the command line tools by running `xcode-select --install`.\n2. If not, you can install clang or gcc with homebrew `brew install gcc`\n\n##### Llama-CPP Troubleshooting: Mac Running Intel\n\nWhen running a Mac with Intel hardware (not M1), you may run into _clang: error: the clang compiler does not support '\n-march=native'_ during pip install.\n\nIf so set your archflags during pip install. eg: _ARCHFLAGS=\"-arch x86_64\" pip3 install -r requirements.txt_\n"
  },
  {
    "path": "fern/docs/pages/installation/troubleshooting.mdx",
    "content": "# Downloading Gated and Private Models\nMany models are gated or private, requiring special access to use them. Follow these steps to gain access and set up your environment for using these models.\n## Accessing Gated Models\n1. **Request Access:**\n   Follow the instructions provided [here](https://huggingface.co/docs/hub/en/models-gated) to request access to the gated model.\n2. **Generate a Token:**\n   Once you have access, generate a token by following the instructions [here](https://huggingface.co/docs/hub/en/security-tokens).\n3. **Set the Token:**\n   Add the generated token to your `settings.yaml` file:\n   ```yaml\n   huggingface:\n     access_token: <your-token>\n   ```\n   Alternatively, set the `HF_TOKEN` environment variable:\n   ```bash\n   export HF_TOKEN=<your-token>\n   ```\n\n# Tokenizer Setup\nPrivateGPT uses the `AutoTokenizer` library to tokenize input text accurately. It connects to HuggingFace's API to download the appropriate tokenizer for the specified model.\n\n## Configuring the Tokenizer\n1. **Specify the Model:**\n   In your `settings.yaml` file, specify the model you want to use:\n   ```yaml\n   llm:\n     tokenizer: meta-llama/Meta-Llama-3.1-8B-Instruct\n   ```\n2. **Set Access Token for Gated Models:**\n   If you are using a gated model, ensure the `access_token` is set as mentioned in the previous section.\nThis configuration ensures that PrivateGPT can download and use the correct tokenizer for the model you are working with.\n\n# Embedding dimensions mismatch\nIf you encounter an error message like `Embedding dimensions mismatch`, it is likely due to the embedding model and\ncurrent vector dimension mismatch. To resolve this issue, ensure that the model and the input data have the same vector dimensions.\n\nBy default, PrivateGPT uses `nomic-embed-text` embeddings, which have a vector dimension of 768.\nIf you are using a different embedding model, ensure that the vector dimensions match the model's output.\n\n<Callout intent = \"warning\">\nIn versions below to 0.6.0, the default embedding model was `BAAI/bge-small-en-v1.5` in `huggingface` setup.\nIf you plan to reuse the old generated embeddings, you need to update the `settings.yaml` file to use the correct embedding model:\n```yaml\nhuggingface:\n  embedding_hf_model_name: BAAI/bge-small-en-v1.5\nembedding:\n  embed_dim: 384\n```\n</Callout>\n\n# Building Llama-cpp with NVIDIA GPU support\n\n## Out-of-memory error\n\nIf you encounter an out-of-memory error while running `llama-cpp` with CUDA, you can try the following steps to resolve the issue:\n1. **Set the next environment:**\n    ```bash\n    TOKENIZERS_PARALLELISM=true\n    ```\n2. **Run PrivateGPT:**\n    ```bash\n    poetry run python -m privategpt\n    ```\nGive thanks to [MarioRossiGithub](https://github.com/MarioRossiGithub) for providing the following solution."
  },
  {
    "path": "fern/docs/pages/manual/ingestion-reset.mdx",
    "content": "# Reset Local documents database\n\nWhen running in a local setup, you can remove all ingested documents by simply\ndeleting all contents of `local_data` folder (except .gitignore).\n\nTo simplify this process, you can use the command:\n```bash\nmake wipe\n```\n\n# Advanced usage\n\nYou can actually delete your documents from your storage by using the\nAPI endpoint `DELETE` in the Ingestion API."
  },
  {
    "path": "fern/docs/pages/manual/ingestion.mdx",
    "content": "# Ingesting & Managing Documents\n\nThe ingestion of documents can be done in different ways:\n\n* Using the `/ingest` API\n* Using the Gradio UI\n* Using the Bulk Local Ingestion functionality (check next section)\n\n## Bulk Local Ingestion\n\nYou will need to activate `data.local_ingestion.enabled` in your setting file to use this feature. Additionally,\nit is probably a good idea to set `data.local_ingestion.allow_ingest_from` to specify which folders are allowed to be ingested.\n\n<Callout intent = \"warning\">\nBe careful enabling this feature in a production environment, as it can be a security risk, as it allows users to\ningest any local file with permissions.\n</Callout>\n\nWhen you are running PrivateGPT in a fully local setup, you can ingest a complete folder for convenience (containing\npdf, text files, etc.)\nand optionally watch changes on it with the command:\n\n```bash\nmake ingest /path/to/folder -- --watch\n```\n\nTo log the processed and failed files to an additional file, use:\n\n```bash\nmake ingest /path/to/folder -- --watch --log-file /path/to/log/file.log\n```\n\n**Note for Windows Users:** Depending on your Windows version and whether you are using PowerShell to execute\nPrivateGPT API calls, you may need to include the parameter name before passing the folder path for consumption:\n\n```bash\nmake ingest arg=/path/to/folder -- --watch --log-file /path/to/log/file.log\n```\n\nAfter ingestion is complete, you should be able to chat with your documents\nby navigating to http://localhost:8001 and using the option `Query documents`,\nor using the completions / chat API.\n\n## Ingestion troubleshooting\n\n### Running out of memory\n\nTo do not run out of memory, you should ingest your documents without the LLM loaded in your (video) memory.\nTo do so, you should change your configuration to set `llm.mode: mock`.\n\nYou can also use the existing `PGPT_PROFILES=mock` that will set the following configuration for you:\n\n```yaml\nllm:\n  mode: mock\nembedding:\n  mode: local\n```\n\nThis configuration allows you to use hardware acceleration for creating embeddings while avoiding loading the full LLM into (video) memory.\n\nOnce your documents are ingested, you can set the `llm.mode` value back to `local` (or your previous custom value).\n\n### Ingestion speed\n\nThe ingestion speed depends on the number of documents you are ingesting, and the size of each document.\nTo speed up the ingestion, you can change the ingestion mode in configuration.\n\nThe following ingestion mode exist:\n* `simple`: historic behavior, ingest one document at a time, sequentially\n* `batch`: read, parse, and embed multiple documents using batches (batch read, and then batch parse, and then batch embed)\n* `parallel`: read, parse, and embed multiple documents in parallel. This is the fastest ingestion mode for local setup.\n* `pipeline`: Alternative to parallel.\nTo change the ingestion mode, you can use the `embedding.ingest_mode` configuration value. The default value is `simple`.\n\nTo configure the number of workers used for parallel or batched ingestion, you can use\nthe `embedding.count_workers` configuration value. If you set this value too high, you might run out of\nmemory, so be mindful when setting this value. The default value is `2`.\nFor `batch` mode, you can easily set this value to your number of threads available on your CPU without\nrunning out of memory. For `parallel` mode, you should be more careful, and set this value to a lower value.\n\nThe configuration below should be enough for users who want to stress more their hardware:\n```yaml\nembedding:\n  ingest_mode: parallel\n  count_workers: 4\n```\n\nIf your hardware is powerful enough, and that you are loading heavy documents, you can increase the number of workers.\nIt is recommended to do your own tests to find the optimal value for your hardware.\n\nIf you have a `bash` shell, you can use this set of command to do your own benchmark:\n\n```bash\n# Wipe your local data, to put yourself in a clean state\n# This will delete all your ingested documents\nmake wipe\n\ntime PGPT_PROFILES=mock python ./scripts/ingest_folder.py ~/my-dir/to-ingest/\n```\n\n## Supported file formats\n\nPrivateGPT by default supports all the file formats that contains clear text (for example, `.txt` files, `.html`, etc.).\nHowever, these text based file formats as only considered as text files, and are not pre-processed in any other way.\n\nIt also supports the following file formats:\n* `.hwp`\n* `.pdf`\n* `.docx`\n* `.pptx`\n* `.ppt`\n* `.pptm`\n* `.jpg`\n* `.png`\n* `.jpeg`\n* `.mp3`\n* `.mp4`\n* `.csv`\n* `.epub`\n* `.md`\n* `.mbox`\n* `.ipynb`\n* `.json`\n\n<Callout intent = \"info\">\nWhile `PrivateGPT` supports these file formats, it **might** require additional\ndependencies to be installed in your python's virtual environment.\nFor example, if you try to ingest `.epub` files, `PrivateGPT` might fail to do it, and will instead display an\nexplanatory error asking you to download the necessary dependencies to install this file format.\n</Callout>\n\n<Callout intent = \"info\">\n**Other file formats might work**, but they will be considered as plain text\nfiles (in other words, they will be ingested as `.txt` files).\n</Callout>\n\n"
  },
  {
    "path": "fern/docs/pages/manual/llms.mdx",
    "content": "## Running the Server\n\nPrivateGPT supports running with different LLMs & setups.\n\n### Local models\n\nBoth the LLM and the Embeddings model will run locally.\n\nMake sure you have followed the *Local LLM requirements* section before moving on.\n\nThis command will start PrivateGPT using the `settings.yaml` (default profile) together with the `settings-local.yaml`\nconfiguration files. By default, it will enable both the API and the Gradio UI. Run:\n\n```bash\nPGPT_PROFILES=local make run\n```\n\nor\n\n```bash\nPGPT_PROFILES=local poetry run python -m private_gpt\n```\n\nWhen the server is started it will print a log *Application startup complete*.\nNavigate to http://localhost:8001 to use the Gradio UI or to http://localhost:8001/docs (API section) to try the API\nusing Swagger UI.\n\n#### Customizing low level parameters\n\nCurrently, not all the parameters of `llama.cpp` and `llama-cpp-python` are available at PrivateGPT's `settings.yaml` file.\nIn case you need to customize parameters such as the number of layers loaded into the GPU, you might change\nthese at the `llm_component.py` file under the `private_gpt/components/llm/llm_component.py`.\n\n##### Available LLM config options\n\nThe `llm` section of the settings allows for the following configurations:\n\n- `mode`: how to run your llm\n- `max_new_tokens`: this lets you configure the number of new tokens the LLM will generate and add to the context window (by default Llama.cpp uses `256`)\n\nExample:\n\n```yaml\nllm:\n  mode: local\n  max_new_tokens: 256\n```\n\nIf you are getting an out of memory error, you might also try a smaller model or stick to the proposed\nrecommended models, instead of custom tuning the parameters.\n\n### Using OpenAI\n\nIf you cannot run a local model (because you don't have a GPU, for example) or for testing purposes, you may\ndecide to run PrivateGPT using OpenAI as the LLM and Embeddings model.\n\nIn order to do so, create a profile `settings-openai.yaml` with the following contents:\n\n```yaml\nllm:\n  mode: openai\n\nopenai:\n  api_base: <openai-api-base-url> # Defaults to https://api.openai.com/v1\n  api_key: <your_openai_api_key>  # You could skip this configuration and use the OPENAI_API_KEY env var instead\n  model: <openai_model_to_use> # Optional model to use. Default is \"gpt-3.5-turbo\"\n                               # Note: Open AI Models are listed here: https://platform.openai.com/docs/models\n```\n\nAnd run PrivateGPT loading that profile you just created:\n\n`PGPT_PROFILES=openai make run`\n\nor\n\n`PGPT_PROFILES=openai poetry run python -m private_gpt`\n\nWhen the server is started it will print a log *Application startup complete*.\nNavigate to http://localhost:8001 to use the Gradio UI or to http://localhost:8001/docs (API section) to try the API.\nYou'll notice the speed and quality of response is higher, given you are using OpenAI's servers for the heavy\ncomputations.\n\n### Using OpenAI compatible API\n\nMany tools, including [LocalAI](https://localai.io/) and [vLLM](https://docs.vllm.ai/en/latest/),\nsupport serving local models with an OpenAI compatible API. Even when overriding the `api_base`,\nusing the `openai` mode doesn't allow you to use custom models. Instead, you should use the `openailike` mode:\n\n```yaml\nllm:\n  mode: openailike\n```\n\nThis mode uses the same settings as the `openai` mode.\n\nAs an example, you can follow the [vLLM quickstart guide](https://docs.vllm.ai/en/latest/getting_started/quickstart.html#openai-compatible-server)\nto run an OpenAI compatible server. Then, you can run PrivateGPT using the `settings-vllm.yaml` profile:\n\n`PGPT_PROFILES=vllm make run`\n\n### Using Azure OpenAI\n\nIf you cannot run a local model (because you don't have a GPU, for example) or for testing purposes, you may\ndecide to run PrivateGPT using Azure OpenAI as the LLM and Embeddings model.\n\nIn order to do so, create a profile `settings-azopenai.yaml` with the following contents:\n\n```yaml\nllm:\n  mode: azopenai\n\nembedding:\n  mode: azopenai\n\nazopenai:\n  api_key: <your_azopenai_api_key>  # You could skip this configuration and use the AZ_OPENAI_API_KEY env var instead\n  azure_endpoint: <your_azopenai_endpoint> # You could skip this configuration and use the AZ_OPENAI_ENDPOINT env var instead\n  api_version: <api_version> # The API version to use. Default is \"2023_05_15\"\n  embedding_deployment_name: <your_embedding_deployment_name> # You could skip this configuration and use the AZ_OPENAI_EMBEDDING_DEPLOYMENT_NAME env var instead\n  embedding_model: <openai_embeddings_to_use> # Optional model to use. Default is \"text-embedding-ada-002\" \n  llm_deployment_name: <your_model_deployment_name> # You could skip this configuration and use the AZ_OPENAI_LLM_DEPLOYMENT_NAME env var instead\n  llm_model: <openai_model_to_use> # Optional model to use. Default is \"gpt-35-turbo\"\n```\n\nAnd run PrivateGPT loading that profile you just created:\n\n`PGPT_PROFILES=azopenai make run`\n\nor\n\n`PGPT_PROFILES=azopenai poetry run python -m private_gpt`\n\nWhen the server is started it will print a log *Application startup complete*.\nNavigate to http://localhost:8001 to use the Gradio UI or to http://localhost:8001/docs (API section) to try the API.\nYou'll notice the speed and quality of response is higher, given you are using Azure OpenAI's servers for the heavy\ncomputations.\n\n### Using AWS Sagemaker\n\nFor a fully private & performant setup, you can choose to have both your LLM and Embeddings model deployed using Sagemaker.\n\nNote: how to deploy models on Sagemaker is out of the scope of this documentation.\n\nIn order to do so, create a profile `settings-sagemaker.yaml` with the following contents (remember to\nupdate the values of the llm_endpoint_name and embedding_endpoint_name to yours):\n\n```yaml\nllm:\n  mode: sagemaker\n\nsagemaker:\n  llm_endpoint_name: huggingface-pytorch-tgi-inference-2023-09-25-19-53-32-140\n  embedding_endpoint_name: huggingface-pytorch-inference-2023-11-03-07-41-36-479\n```\n\nAnd run PrivateGPT loading that profile you just created:\n\n`PGPT_PROFILES=sagemaker make run`\n\nor\n\n`PGPT_PROFILES=sagemaker poetry run python -m private_gpt`\n\nWhen the server is started it will print a log *Application startup complete*.\nNavigate to http://localhost:8001 to use the Gradio UI or to http://localhost:8001/docs (API section) to try the API.\n\n### Using Ollama\n\nAnother option for a fully private setup is using [Ollama](https://ollama.ai/).\n\nNote: how to deploy Ollama and pull models onto it is out of the scope of this documentation.\n\nIn order to do so, create a profile `settings-ollama.yaml` with the following contents:\n\n```yaml\nllm:\n  mode: ollama\n\nollama:\n  model: <ollama_model_to_use> # Required Model to use.\n                               # Note: Ollama Models are listed here: https://ollama.ai/library\n                               #       Be sure to pull the model to your Ollama server\n  api_base: <ollama-api-base-url> # Defaults to http://localhost:11434\n```\n\nAnd run PrivateGPT loading that profile you just created:\n\n`PGPT_PROFILES=ollama make run`\n\nor\n\n`PGPT_PROFILES=ollama poetry run python -m private_gpt`\n\nWhen the server is started it will print a log *Application startup complete*.\nNavigate to http://localhost:8001 to use the Gradio UI or to http://localhost:8001/docs (API section) to try the API.\n\n### Using IPEX-LLM\n\nFor a fully private setup on Intel GPUs (such as a local PC with an iGPU, or discrete GPUs like Arc, Flex, and Max), you can use [IPEX-LLM](https://github.com/intel-analytics/ipex-llm).\n\nTo deploy Ollama and pull models using IPEX-LLM, please refer to [this guide](https://ipex-llm.readthedocs.io/en/latest/doc/LLM/Quickstart/ollama_quickstart.html). Then, follow the same steps outlined in the [Using Ollama](#using-ollama) section to create a `settings-ollama.yaml` profile and run the private-GPT server.\n\n### Using Gemini\n\nIf you cannot run a local model (because you don't have a GPU, for example) or for testing purposes, you may\ndecide to run PrivateGPT using Gemini as the LLM and Embeddings model. In addition, you will benefit from\nmultimodal inputs, such as text and images, in a very large contextual window.\n\nIn order to do so, create a profile `settings-gemini.yaml` with the following contents:\n\n```yaml\nllm:\n  mode: gemini\n\nembedding:\n  mode: gemini\n\ngemini:\n  api_key: <your_gemini_api_key>                # You could skip this configuration and use the GEMINI_API_KEY env var instead\n  model: <gemini_model_to_use>                  # Optional model to use. Default is models/gemini-pro\"\n  embedding_model: <gemini_embeddings_to_use>   # Optional model to use. Default is \"models/embedding-001\"\n```\n\nAnd run PrivateGPT loading that profile you just created:\n\n`PGPT_PROFILES=gemini make run`\n\nor\n\n`PGPT_PROFILES=gemini poetry run python -m private_gpt`\n\nWhen the server is started it will print a log *Application startup complete*.\nNavigate to http://localhost:8001 to use the Gradio UI or to http://localhost:8001/docs (API section) to try the API.\n\n"
  },
  {
    "path": "fern/docs/pages/manual/nodestore.mdx",
    "content": "## NodeStores\nPrivateGPT supports **Simple** and [Postgres](https://www.postgresql.org/) providers. Simple being the default.\n\nIn order to select one or the other, set the `nodestore.database` property in the `settings.yaml` file to `simple` or `postgres`.\n\n```yaml\nnodestore:\n  database: simple\n```\n\n### Simple Document Store\n\nSetting up simple document store: Persist data with in-memory and disk storage.\n\nEnabling the simple document store is an excellent choice for small projects or proofs of concept where you need to persist data while maintaining minimal setup complexity. To get started, set the nodestore.database property in your settings.yaml file as follows:\n\n```yaml\nnodestore:\n  database: simple\n```\nThe beauty of the simple document store is its flexibility and ease of implementation. It provides a solid foundation for managing and retrieving data without the need for complex setup or configuration. The combination of in-memory processing and disk persistence ensures that you can efficiently handle small to medium-sized datasets while maintaining data consistency across runs.\n\n### Postgres Document Store\n\nTo enable Postgres, set the `nodestore.database` property in the `settings.yaml` file to `postgres` and install the `storage-nodestore-postgres` extra.  Note: Vector Embeddings Storage in Postgres is configured separately\n\n```bash\npoetry install --extras storage-nodestore-postgres\n```\n\nThe available configuration options are:\n| Field         | Description                                               |\n|---------------|-----------------------------------------------------------|\n| **host**      | The server hosting the Postgres database. Default is `localhost` |\n| **port**      | The port on which the Postgres database is accessible. Default is `5432` |\n| **database**  | The specific database to connect to. Default is `postgres` |\n| **user**      | The username for database access. Default is `postgres` |\n| **password**  | The password for database access. (Required)            |\n| **schema_name** | The database schema to use. Default is `private_gpt`       |\n\nFor example:\n```yaml\nnodestore:\n  database: postgres\n\npostgres:\n  host: localhost\n  port: 5432\n  database: postgres\n  user: postgres\n  password: <PASSWORD>\n  schema_name: private_gpt\n```\n\nGiven the above configuration, Two PostgreSQL tables will be created upon successful connection: one for storing metadata related to the index and another for document data itself.\n\n```\npostgres=# \\dt private_gpt.*\n                  List of relations\n   Schema    |      Name       | Type  |    Owner     \n-------------+-----------------+-------+--------------\n private_gpt | data_docstore   | table | postgres\n private_gpt | data_indexstore | table | postgres\n\npostgres=# \n```\n"
  },
  {
    "path": "fern/docs/pages/manual/reranker.mdx",
    "content": "## Enhancing Response Quality with Reranking\n\nPrivateGPT offers a reranking feature aimed at optimizing response generation by filtering out irrelevant documents, potentially leading to faster response times and enhanced relevance of answers generated by the LLM.\n\n### Enabling Reranking\n\nDocument reranking can significantly improve the efficiency and quality of the responses by pre-selecting the most relevant documents before generating an answer. To leverage this feature, ensure that it is enabled in the RAG settings and consider adjusting the parameters to best fit your use case.\n\n#### Additional Requirements\n\nBefore enabling reranking, you must install additional dependencies:\n\n```bash\npoetry install --extras rerank-sentence-transformers\n```\n\nThis command installs dependencies for the cross-encoder reranker from sentence-transformers, which is currently the only supported method by PrivateGPT for document reranking.\n\n#### Configuration\n\nTo enable and configure reranking, adjust the `rag` section within the `settings.yaml` file. Here are the key settings to consider:\n\n- `similarity_top_k`: Determines the number of documents to initially retrieve and consider for reranking. This value should be larger than `top_n`.\n- `rerank`:\n  - `enabled`: Set to `true` to activate the reranking feature.\n  - `top_n`: Specifies the number of documents to use in the final answer generation process, chosen from the top-ranked documents provided by `similarity_top_k`.\n\nExample configuration snippet:\n\n```yaml\nrag:\n  similarity_top_k: 10  # Number of documents to retrieve and consider for reranking\n  rerank:\n    enabled: true\n    top_n: 3  # Number of top-ranked documents to use for generating the answer\n```"
  },
  {
    "path": "fern/docs/pages/manual/settings.mdx",
    "content": "# Settings and profiles for your private GPT\n\nThe configuration of your private GPT server is done thanks to `settings` files (more precisely `settings.yaml`).\nThese text files are written using the [YAML](https://en.wikipedia.org/wiki/YAML) syntax.\n\nWhile PrivateGPT is distributing safe and universal configuration files, you might want to quickly customize your\nPrivateGPT, and this can be done using the `settings` files.\n\nThis project is defining the concept of **profiles** (or configuration profiles).\nThis mechanism, using your environment variables, is giving you the ability to easily switch between\nconfiguration you've made.\n\nA typical use case of profile is to easily switch between LLM and embeddings.\nTo be a bit more precise, you can change the language (to French, Spanish, Italian, English, etc) by simply changing\nthe profile you've selected; no code changes required!\n\nPrivateGPT is configured through *profiles* that are defined using yaml files, and selected through env variables.\nThe full list of properties configurable can be found in `settings.yaml`.\n\n## How to know which profiles exist\nGiven that a profile `foo_bar` points to the file `settings-foo_bar.yaml` and vice-versa, you simply have to look\nat the files starting with `settings` and ending in `.yaml`.\n\n## How to use an existing profiles\n**Please note that the syntax to set the value of an environment variables depends on your OS**.\nYou have to set environment variable `PGPT_PROFILES` to the name of the profile you want to use.\n\nFor example, on **linux and macOS**, this gives:\n```bash\nexport PGPT_PROFILES=my_profile_name_here\n```\n\nWindows Command Prompt (cmd) has a different syntax:\n```shell\nset PGPT_PROFILES=my_profile_name_here\n```\n\nWindows Powershell has a different syntax:\n```shell\n$env:PGPT_PROFILES=\"my_profile_name_here\"\n```\nIf the above is not working, you might want to try other ways to set an env variable in your window's terminal.\n\n---\n\nOnce you've set this environment variable to the desired profile, you can simply launch your PrivateGPT,\nand it will run using your profile on top of the default configuration.\n\n## Reference\nAdditional details on the profiles are described in this section\n\n### Environment variable `PGPT_SETTINGS_FOLDER`\n\nThe location of the settings folder. Defaults to the root of the project.\nShould contain the default `settings.yaml` and any other `settings-{profile}.yaml`.\n\n### Environment variable `PGPT_PROFILES`\n\nBy default, the profile definition in `settings.yaml` is loaded.\nUsing this env var you can load additional profiles; format is a comma separated list of profile names.\nThis will merge `settings-{profile}.yaml` on top of the base settings file.\n\nFor example:\n`PGPT_PROFILES=local,cuda` will load `settings-local.yaml`\nand `settings-cuda.yaml`, their contents will be merged with\nlater profiles properties overriding values of earlier ones like `settings.yaml`.\n\nDuring testing, the `test` profile will be active along with the default, therefore `settings-test.yaml`\nfile is required.\n\n### Environment variables expansion\n\nConfiguration files can contain environment variables,\nthey will be expanded at runtime.\n\nExpansion must follow the pattern `${VARIABLE_NAME:default_value}`.\n\nFor example, the following configuration will use the value of the `PORT`\nenvironment variable or `8001` if it's not set.\nMissing variables with no default will produce an error.\n\n```yaml\nserver:\n  port: ${PORT:8001}\n```"
  },
  {
    "path": "fern/docs/pages/manual/vectordb.mdx",
    "content": "## Vectorstores\nPrivateGPT supports [Qdrant](https://qdrant.tech/), [Milvus](https://milvus.io/), [Chroma](https://www.trychroma.com/), [PGVector](https://github.com/pgvector/pgvector) and [ClickHouse](https://github.com/ClickHouse/ClickHouse) as vectorstore providers. Qdrant being the default.\n\nIn order to select one or the other, set the `vectorstore.database` property in the `settings.yaml` file to `qdrant`, `milvus`, `chroma`, `postgres` and `clickhouse`.\n\n```yaml\nvectorstore:\n  database: qdrant\n```\n\n### Qdrant configuration\n\nTo enable Qdrant, set the `vectorstore.database` property in the `settings.yaml` file to `qdrant`.\n\nQdrant settings can be configured by setting values to the `qdrant` property in the `settings.yaml` file.\n\nThe available configuration options are:\n| Field        | Description |\n|--------------|-------------|\n| location     | If `:memory:` - use in-memory Qdrant instance. If `str` - use it as a `url` parameter.|\n| url          | Either host or str of 'Optional[scheme], host, Optional[port], Optional[prefix]'. Eg. `http://localhost:6333` |\n| port         | Port of the REST API interface. Default: `6333` |\n| grpc_port    | Port of the gRPC interface. Default: `6334` |\n| prefer_grpc  | If `true` - use gRPC interface whenever possible in custom methods. |\n| https        | If `true` - use HTTPS(SSL) protocol.|\n| api_key      | API key for authentication in Qdrant Cloud.|\n| prefix       | If set, add `prefix` to the REST URL path. Example: `service/v1` will result in `http://localhost:6333/service/v1/{qdrant-endpoint}` for REST API.|\n| timeout      | Timeout for REST and gRPC API requests. Default: 5.0 seconds for REST and unlimited for gRPC |\n| host         | Host name of Qdrant service. If url and host are not set, defaults to 'localhost'.|\n| path         | Persistence path for QdrantLocal. Eg. `local_data/private_gpt/qdrant`|\n| force_disable_check_same_thread         | Force disable check_same_thread for QdrantLocal sqlite connection, defaults to True.|\n\nBy default Qdrant tries to connect to an instance of Qdrant server at `http://localhost:3000`.\n\nTo obtain a local setup (disk-based database) without running a Qdrant server, configure the `qdrant.path` value in settings.yaml:\n\n```yaml\nqdrant:\n  path: local_data/private_gpt/qdrant\n```\n\n### Milvus configuration\n\nTo enable Milvus, set the `vectorstore.database` property in the `settings.yaml` file to `milvus` and install the `milvus` extra.\n\n```bash\npoetry install --extras vector-stores-milvus\n```\n\nThe available configuration options are:\n| Field        | Description |\n|--------------|-------------|\n| uri     | Default is set to \"local_data/private_gpt/milvus/milvus_local.db\" as a local file; you can also set up a more performant Milvus server on docker or k8s e.g.http://localhost:19530, as your uri; To use Zilliz Cloud, adjust the uri and token to Endpoint and Api key in Zilliz Cloud.|\n| token          | Pair with Milvus server on docker or k8s or zilliz cloud api key.|\n| collection_name         | The name of the collection, set to default \"milvus_db\".|\n| overwrite    | Overwrite the data in collection if it existed, set to default as True. |\n\nTo obtain a local setup (disk-based database) without running a Milvus server, configure the uri value in settings.yaml, to store in local_data/private_gpt/milvus/milvus_local.db.\n\n### Chroma configuration\n\nTo enable Chroma, set the `vectorstore.database` property in the `settings.yaml` file to `chroma` and install the `chroma` extra.\n\n```bash\npoetry install --extras chroma\n```\n\nBy default `chroma` will use a disk-based database stored in local_data_path / \"chroma_db\" (being local_data_path defined in settings.yaml)\n\n### PGVector\nTo use the PGVector store a [postgreSQL](https://www.postgresql.org/) database with the PGVector extension must be used.\n\nTo enable PGVector, set the `vectorstore.database` property in the `settings.yaml` file to `postgres` and install the `vector-stores-postgres` extra.\n\n```bash\npoetry install --extras vector-stores-postgres\n```\n\nPGVector settings can be configured by setting values to the `postgres` property in the `settings.yaml` file.\n\nThe available configuration options are:\n| Field         | Description                                               |\n|---------------|-----------------------------------------------------------|\n| **host**      | The server hosting the Postgres database. Default is `localhost` |\n| **port**      | The port on which the Postgres database is accessible. Default is `5432` |\n| **database**  | The specific database to connect to. Default is `postgres` |\n| **user**      | The username for database access. Default is `postgres` |\n| **password**  | The password for database access. (Required)            |\n| **schema_name** | The database schema to use. Default is `private_gpt`       |\n\nFor example:\n```yaml\nvectorstore:\n  database: postgres\n\npostgres:\n  host: localhost\n  port: 5432\n  database: postgres\n  user: postgres\n  password: <PASSWORD>\n  schema_name: private_gpt\n```\n\nThe following table will be created in the database\n```\npostgres=# \\d private_gpt.data_embeddings\n                                      Table \"private_gpt.data_embeddings\"\n  Column   |       Type        | Collation | Nullable |                         Default\n-----------+-------------------+-----------+----------+---------------------------------------------------------\n id        | bigint            |           | not null | nextval('private_gpt.data_embeddings_id_seq'::regclass)\n text      | character varying |           | not null |\n metadata_ | json              |           |          |\n node_id   | character varying |           |          |\n embedding | vector(768)       |           |          |\nIndexes:\n    \"data_embeddings_pkey\" PRIMARY KEY, btree (id)\n\npostgres=# \n```\nThe dimensions of the embeddings columns will be set based on the `embedding.embed_dim` value.  If the embedding model changes this table may need to be dropped and recreated to avoid a dimension mismatch.\n\n### ClickHouse\n\nTo utilize ClickHouse as the vector store, a [ClickHouse](https://github.com/ClickHouse/ClickHouse) database must be employed.\n\nTo enable ClickHouse, set the `vectorstore.database` property in the `settings.yaml` file to `clickhouse` and install the `vector-stores-clickhouse` extra.\n\n```bash\npoetry install --extras vector-stores-clickhouse\n```\n\nClickHouse settings can be configured by setting values to the `clickhouse` property in the `settings.yaml` file.\n\nThe available configuration options are:\n| Field                | Description                                                    |\n|----------------------|----------------------------------------------------------------|\n| **host**             | The server hosting the ClickHouse database. Default is `localhost` |\n| **port**             | The port on which the ClickHouse database is accessible. Default is `8123` |\n| **username**         | The username for database access. Default is `default` |\n| **password**         | The password for database access. (Optional) |\n| **database**         | The specific database to connect to. Default is `__default__` |\n| **secure**           | Use https/TLS for secure connection to the server. Default is `false` |\n| **interface**        | The protocol used for the connection, either 'http' or 'https'. (Optional) |\n| **settings**         | Specific ClickHouse server settings to be used with the session. (Optional) |\n| **connect_timeout**  | Timeout in seconds for establishing a connection. (Optional) |\n| **send_receive_timeout** | Read timeout in seconds for http connection. (Optional) |\n| **verify**           | Verify the server certificate in secure/https mode. (Optional) |\n| **ca_cert**          | Path to Certificate Authority root certificate (.pem format). (Optional) |\n| **client_cert**      | Path to TLS Client certificate (.pem format). (Optional) |\n| **client_cert_key**  | Path to the private key for the TLS Client certificate. (Optional) |\n| **http_proxy**       | HTTP proxy address. (Optional) |\n| **https_proxy**      | HTTPS proxy address. (Optional) |\n| **server_host_name** | Server host name to be checked against the TLS certificate. (Optional) |\n\nFor example:\n```yaml\nvectorstore:\n  database: clickhouse\n\nclickhouse:\n  host: localhost\n  port: 8443\n  username: admin\n  password: <PASSWORD>\n  database: embeddings\n  secure: false\n```\n\nThe following table will be created in the database:\n```\nclickhouse-client\n:) \\d embeddings.llama_index\n                                   Table \"llama_index\"\n  № |  name     | type                                         | default_type | default_expression | comment | codec_expression | ttl_expression\n----|-----------|----------------------------------------------|--------------|--------------------|---------|------------------|---------------\n  1 | id        | String                                       |              |                    |         |                  |\n  2 | doc_id    | String                                       |              |                    |         |                  |\n  3 | text      | String                                       |              |                    |         |                  |\n  4 | vector    | Array(Float32)                               |              |                    |         |                  |\n  5 | node_info | Tuple(start Nullable(UInt64), end Nullable(UInt64)) |       |                    |         |                  |\n  6 | metadata  | String                                       |              |                    |         |                  |\n\nclickhouse-client\n```\n\nThe dimensions of the embeddings columns will be set based on the `embedding.embed_dim` value. If the embedding model changes, this table may need to be dropped and recreated to avoid a dimension mismatch.\n"
  },
  {
    "path": "fern/docs/pages/overview/welcome.mdx",
    "content": "PrivateGPT provides an **API** containing all the building blocks required to\nbuild **private, context-aware AI applications**.\n\n<Callout intent = \"tip\">\nIf you are looking for an **enterprise-ready, fully private AI workspace**\ncheck out [Zylon's website](https://zylon.ai)  or [request a demo](https://cal.com/zylon/demo?source=pgpt-docs).\nCrafted by the team behind PrivateGPT, Zylon is a best-in-class AI collaborative\nworkspace that can be easily deployed on-premise (data center, bare metal...) or in your private cloud (AWS, GCP, Azure...).\n</Callout>\n\nThe API follows and extends OpenAI API standard, and supports both normal and streaming responses.\nThat means that, if you can use OpenAI API in one of your tools, you can use your own PrivateGPT API instead,\nwith no code changes, **and for free** if you are running PrivateGPT in a `local` setup.\n\nGet started by understanding the [Main Concepts and Installation](/installation) and then dive into the [API Reference](/api-reference).\n\n## Frequently Visited Resources\n\n<Cards>\n  <Card\n    title=\"Main Concepts\"\n    icon=\"fa-solid fa-lines-leaning\"\n    href=\"/installation\"\n  />\n  <Card\n    title=\"API Reference\"\n    icon=\"fa-solid fa-code\"\n    href=\"/api-reference\"\n  />\n  <Card\n    title=\"Twitter\"\n    icon=\"fa-brands fa-twitter\"\n    href=\"https://twitter.com/PrivateGPT_AI\"\n  />\n  <Card\n    title=\"Discord Server\"\n    icon=\"fa-brands fa-discord\"\n    href=\"https://discord.gg/bK6mRVpErU\"\n  />\n</Cards>\n\n<br />"
  },
  {
    "path": "fern/docs/pages/quickstart/quickstart.mdx",
    "content": "This guide provides a quick start for running different profiles of PrivateGPT using Docker Compose.\nThe profiles cater to various environments, including Ollama setups (CPU, CUDA, MacOS), and a fully local setup.\n\nBy default, Docker Compose will download pre-built images from a remote registry when starting the services. However, you have the option to build the images locally if needed. Details on building Docker image locally are provided at the end of this guide.\n\nIf you want to run PrivateGPT locally without Docker, refer to the [Local Installation Guide](/installation).\n\n## Prerequisites\n- **Docker and Docker Compose:** Ensure both are installed on your system.\n  [Installation Guide for Docker](https://docs.docker.com/get-docker/), [Installation Guide for Docker Compose](https://docs.docker.com/compose/install/).\n- **Clone PrivateGPT Repository:** Clone the PrivateGPT repository to your machine and navigate to the directory:\n  ```sh\n  git clone https://github.com/zylon-ai/private-gpt.git\n  cd private-gpt\n  ```\n\n## Setups\n\n### Ollama Setups (Recommended)\n\n#### 1. Default/Ollama CPU\n\n**Description:**\nThis profile runs the Ollama service using CPU resources. It is the standard configuration for running Ollama-based Private-GPT services without GPU acceleration.\n\n**Run:**\nTo start the services using pre-built images, run:\n```sh\ndocker-compose up\n```\nor with a specific profile:\n```sh\ndocker-compose --profile ollama-cpu up\n```\n\n#### 2. Ollama Nvidia CUDA\n\n**Description:**\nThis profile leverages GPU acceleration with CUDA support, suitable for computationally intensive tasks that benefit from GPU resources.\n\n**Requirements:**\nEnsure that your system has compatible GPU hardware and the necessary NVIDIA drivers installed. The installation process is detailed [here](https://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html).\n\n**Run:**\nTo start the services with CUDA support using pre-built images, run:\n```sh\ndocker-compose --profile ollama-cuda up\n```\n\n#### 3. Ollama External API\n\n**Description:**\nThis profile is designed for running PrivateGPT using Ollama installed on the host machine. This setup is particularly useful for MacOS users, as Docker does not yet support Metal GPU.\n\n**Requirements:**\nInstall Ollama on your machine by following the instructions at [ollama.ai](https://ollama.ai/).\n\n**Run:**\nTo start the Ollama service, use:\n```sh\nOLLAMA_HOST=0.0.0.0 ollama serve\n```\nTo start the services with the host configuration using pre-built images, run:\n```sh\ndocker-compose --profile ollama-api up\n```\n\n### Fully Local Setups\n\n#### 1. LlamaCPP CPU\n\n**Description:**\nThis profile runs the Private-GPT services locally using `llama-cpp` and Hugging Face models.\n\n**Requirements:**\nA **Hugging Face Token (HF_TOKEN)** is required for accessing Hugging Face models. Obtain your token following [this guide](/installation/getting-started/troubleshooting#downloading-gated-and-private-models).\n\n**Run:**\nStart the services with your Hugging Face token using pre-built images:\n```sh\nHF_TOKEN=<your_hf_token> docker-compose --profile llamacpp-cpu up\n```\nReplace `<your_hf_token>` with your actual Hugging Face token.\n\n## Building Locally\n\nIf you prefer to build Docker images locally, which is useful when making changes to the codebase or the Dockerfiles, follow these steps:\n\n### Building Locally\nTo build the Docker images locally, navigate to the cloned repository directory and run:\n```sh\ndocker-compose build\n```\nThis command compiles the necessary Docker images based on the current codebase and Dockerfile configurations.\n\n### Forcing a Rebuild with --build\nIf you have made changes and need to ensure these changes are reflected in the Docker images, you can force a rebuild before starting the services:\n```sh\ndocker-compose up --build\n```\nor with a specific profile:\n```sh\ndocker-compose --profile <profile_name> up --build\n```\nReplace `<profile_name>` with the desired profile."
  },
  {
    "path": "fern/docs/pages/recipes/quickstart.mdx",
    "content": "# Recipes\n\nRecipes are predefined use cases that help users solve very specific tasks using PrivateGPT.\nThey provide a streamlined approach to achieve common goals with the platform, offering both a starting point and inspiration for further exploration.\nThe main goal of Recipes is to empower the community to create and share solutions, expanding the capabilities of PrivateGPT.\n\n## How to Create a New Recipe\n\n1. **Identify the Task**: Define a specific task or problem that the Recipe will address.\n2. **Develop the Solution**: Create a clear and concise guide, including any necessary code snippets or configurations.\n3. **Submit a PR**: Fork the PrivateGPT repository, add your Recipe to the appropriate section, and submit a PR for review.\n\nWe encourage you to be creative and think outside the box! Your contributions help shape the future of PrivateGPT.\n\n## Available Recipes\n\n<Cards>\n  <Card\n    title=\"Summarize\"\n    icon=\"fa-solid fa-file-alt\"\n    href=\"/recipes/general-use-cases/summarize\"\n  />\n</Cards>\n"
  },
  {
    "path": "fern/docs/pages/recipes/summarize.mdx",
    "content": "The Summarize Recipe provides a method to extract concise summaries from ingested documents or texts using PrivateGPT.\nThis tool is particularly useful for quickly understanding large volumes of information by distilling key points and main ideas.\n\n## Use Case\n\nThe primary use case for the `Summarize` tool is to automate the summarization of lengthy documents,\nmaking it easier for users to grasp the essential information without reading through entire texts.\nThis can be applied in various scenarios, such as summarizing research papers, news articles, or business reports.\n\n## Key Features\n\n1. **Ingestion-compatible**: The user provides the text to be summarized. The text can be directly inputted or retrieved from ingested documents within the system.\n2. **Customization**: The summary generation can be influenced by providing specific `instructions` or a `prompt`. These inputs guide the model on how to frame the summary, allowing for customization according to user needs.\n3. **Streaming Support**: The tool supports streaming, allowing for real-time summary generation, which can be particularly useful for handling large texts or providing immediate feedback.\n\n## Contributing\n\nIf you have ideas for improving the Summarize or want to add new features, feel free to contribute!\nYou can submit your enhancements via a pull request on our [GitHub repository](https://github.com/zylon-ai/private-gpt).\n\n"
  },
  {
    "path": "fern/docs/pages/ui/alternatives.mdx",
    "content": "\nThis page aims to present different user interface (UI) alternatives for integrating and using PrivateGPT. These alternatives range from demo applications to fully customizable UI setups that can be adapted to your specific needs.\n\n**Do you have any working demo project using PrivateGPT?**\n\nPlease open a PR to add it to the list, and come on our Discord to tell us about it!\n\n<Callout intent = \"note\">\nWIP: This page provides an overview of one of the UI alternatives available for PrivateGPT. More alternatives will be added to this page as they become available.\n</Callout>\n\n## [PrivateGPT SDK Demo App](https://github.com/frgarciames/privategpt-react)\n\nThe PrivateGPT SDK demo app is a robust starting point for developers looking to integrate and customize PrivateGPT in their applications. Leveraging modern technologies like Tailwind, shadcn/ui, and Biomejs, it provides a smooth development experience and a highly customizable user interface. Refer to the [repository](https://github.com/frgarciames/privategpt-react) for more details and to get started.\n\n**Tech Stack:**\n\n- **Tailwind:** A utility-first CSS framework for rapid UI development.\n- **shadcn/ui:** A set of high-quality, customizable UI components.\n- **PrivateGPT Web SDK:** The core SDK for interacting with PrivateGPT.\n- **Biomejs formatter/linter:** A tool for maintaining code quality and consistency.\n"
  },
  {
    "path": "fern/docs/pages/ui/gradio.mdx",
    "content": "## Gradio UI user manual\n\nGradio UI is a ready to use way of testing most of PrivateGPT API functionalities.\n\n![Gradio PrivateGPT](https://github.com/zylon-ai/private-gpt/raw/main/fern/docs/assets/ui.png?raw=true)\n\n<Callout intent = \"warning\">\nA working **Gradio UI client** is provided to test the API, together with a set of useful tools such as bulk\nmodel download script, ingestion script, documents folder watch, etc. Please refer to the [UI alternatives](/manual/user-interface/alternatives) page for more UI alternatives.\n</Callout>\n\n### Execution Modes\n\nIt has 3 modes of execution (you can select in the top-left):\n\n* Query Docs: uses the context from the\n  ingested documents to answer the questions posted in the chat. It also takes\n  into account previous chat messages as context.\n    * Makes use of `/chat/completions` API with `use_context=true` and no\n      `context_filter`.\n* Search in Docs: fast search that returns the 4 most related text\n  chunks, together with their source document and page.\n    * Makes use of `/chunks` API with no `context_filter`, `limit=4` and\n      `prev_next_chunks=0`.\n* LLM Chat: simple, non-contextual chat with the LLM. The ingested documents won't\n  be taken into account, only the previous messages.\n    * Makes use of `/chat/completions` API with `use_context=false`.\n\n### Document Ingestion\n\nIngest documents by using the `Upload a File` button. You can check the progress of\nthe ingestion in the console logs of the server.\n\nThe list of ingested files is shown below the button.\n\nIf you want to delete the ingested documents, refer to *Reset Local documents\ndatabase* section in the documentation.\n\n### Chat\n\nNormal chat interface, self-explanatory ;)\n\n#### System Prompt\nYou can view and change the system prompt being passed to the LLM by clicking \"Additional Inputs\"\nin the chat interface. The system prompt is also logged on the server.\n\nBy default, the `Query Docs` mode uses the setting value `ui.default_query_system_prompt`.\n\nThe `LLM Chat` mode attempts to use the optional settings value `ui.default_chat_system_prompt`.\n\nIf no system prompt is entered, the UI will display the default system prompt being used\nfor the active mode.\n\n##### System Prompt Examples:\n\nThe system prompt can effectively provide your chat bot specialized roles, and results tailored to the prompt\nyou have given the model. Examples of system prompts can be be found\n[here](https://www.w3schools.com/gen_ai/chatgpt-3-5/chatgpt-3-5_roles.php).\n\nSome interesting examples to try include:\n\n* You are -X-. You have all the knowledge and personality of -X-. Answer as if you were -X- using\ntheir manner of speaking and vocabulary.\n    * Example: You are Shakespeare. You have all the knowledge and personality of Shakespeare.\n    Answer as if you were Shakespeare using their manner of speaking and vocabulary.\n* You are an expert (at) -role-. Answer all questions using your expertise on -specific domain topic-.\n    * Example: You are an expert software engineer. Answer all questions using your expertise on Python.\n* You are a -role- bot, respond with -response criteria needed-. If no -response criteria- is needed,\nrespond with -alternate response-.\n    * Example: You are a grammar checking bot, respond with any grammatical corrections needed. If no corrections\n    are needed, respond with \"verified\"."
  },
  {
    "path": "fern/docs.yml",
    "content": "# Main Fern configuration file\ninstances:\n  - url: privategpt.docs.buildwithfern.com\n    custom-domain: docs.privategpt.dev\n\ntitle: PrivateGPT | Docs\n\n# The tabs definition, in the top left corner\ntabs:\n  overview:\n    display-name: Overview\n    icon: \"fa-solid fa-home\"\n  quickstart:\n    display-name: Quickstart\n    icon: \"fa-solid fa-rocket\"\n  installation:\n    display-name: Installation\n    icon: \"fa-solid fa-download\"\n  manual:\n    display-name: Manual\n    icon: \"fa-solid fa-book\"\n  recipes:\n    display-name: Recipes\n    icon: \"fa-solid fa-flask\"\n  api-reference:\n    display-name: API Reference\n    icon: \"fa-solid fa-file-contract\"\n\n# Definition of tabs contents, will be displayed on the left side of the page, below all tabs\nnavigation:\n  # The default tab\n  - tab: overview\n    layout:\n      - section: Welcome\n        contents:\n          - page: Introduction\n            path: ./docs/pages/overview/welcome.mdx\n  - tab: quickstart\n    layout:\n      - section: Getting started\n        contents:\n          - page: Quickstart\n            path: ./docs/pages/quickstart/quickstart.mdx\n  # How to install PrivateGPT, with FAQ and troubleshooting\n  - tab: installation\n    layout:\n      - section: Getting started\n        contents:\n          - page: Main Concepts\n            path: ./docs/pages/installation/concepts.mdx\n          - page: Installation\n            path: ./docs/pages/installation/installation.mdx\n          - page: Troubleshooting\n            path: ./docs/pages/installation/troubleshooting.mdx\n  # Manual of PrivateGPT: how to use it and configure it\n  - tab: manual\n    layout:\n      - section: General configuration\n        contents:\n          - page: Configuration\n            path: ./docs/pages/manual/settings.mdx\n      - section: Document management\n        contents:\n          - page: Ingestion\n            path: ./docs/pages/manual/ingestion.mdx\n          - page: Deletion\n            path: ./docs/pages/manual/ingestion-reset.mdx\n      - section: Storage\n        contents:\n          - page: Vector Stores\n            path: ./docs/pages/manual/vectordb.mdx\n          - page: Node Stores\n            path: ./docs/pages/manual/nodestore.mdx\n      - section: Advanced Setup\n        contents:\n          - page: LLM Backends\n            path: ./docs/pages/manual/llms.mdx\n          - page: Reranking\n            path: ./docs/pages/manual/reranker.mdx\n      - section: User Interface\n        contents:\n          - page: Gradio Manual\n            path: ./docs/pages/ui/gradio.mdx\n          - page: Alternatives\n            path: ./docs/pages/ui/alternatives.mdx\n  - tab: recipes\n    layout:\n      - section: Getting started\n        contents:\n          - page: Quickstart\n            path: ./docs/pages/recipes/quickstart.mdx\n      - section: General use cases\n        contents:\n          - page: Summarize\n            path: ./docs/pages/recipes/summarize.mdx\n  # More advanced usage of PrivateGPT, by API\n  - tab: api-reference\n    layout:\n      - section: Overview\n        contents:\n          - page : API Reference overview\n            path: ./docs/pages/api-reference/api-reference.mdx\n          - page: SDKs\n            path: ./docs/pages/api-reference/sdks.mdx\n      - api: API Reference\n\n# Definition of the navbar, will be displayed in the top right corner.\n# `type:primary` is always displayed at the most right side of the navbar\nnavbar-links:\n  - type: secondary\n    text: Contact us\n    url: \"mailto:hello@zylon.ai\"\n  - type: github\n    value: \"https://github.com/zylon-ai/private-gpt\"\n  - type: primary\n    text: Join the Discord\n    url: https://discord.com/invite/bK6mRVpErU\n\ncolors:\n  accentPrimary:\n    dark: \"#C6BBFF\"\n    light: \"#756E98\"\n\nlogo:\n  dark: ./docs/assets/logo_light.png\n  light: ./docs/assets/logo_dark.png\n  height: 50\n\nfavicon: ./docs/assets/favicon.ico\n"
  },
  {
    "path": "fern/fern.config.json",
    "content": "{\n  \"organization\": \"privategpt\",\n  \"version\": \"0.31.17\"\n}"
  },
  {
    "path": "fern/generators.yml",
    "content": "groups:\n  public:\n    generators:\n      - name: fernapi/fern-python-sdk\n        version: 0.6.2\n        output:\n          location: local-file-system\n          path: ../../pgpt-sdk/python\n"
  },
  {
    "path": "fern/openapi/openapi.json",
    "content": "{\n  \"openapi\": \"3.1.0\",\n  \"info\": {\n    \"title\": \"FastAPI\",\n    \"version\": \"0.1.0\"\n  },\n  \"paths\": {\n    \"/v1/completions\": {\n      \"post\": {\n        \"tags\": [\n          \"Contextual Completions\"\n        ],\n        \"summary\": \"Completion\",\n        \"description\": \"We recommend most users use our Chat completions API.\\n\\nGiven a prompt, the model will return one predicted completion.\\n\\nOptionally include a `system_prompt` to influence the way the LLM answers.\\n\\nIf `use_context`\\nis set to `true`, the model will use context coming from the ingested documents\\nto create the response. The documents being used can be filtered using the\\n`context_filter` and passing the document IDs to be used. Ingested documents IDs\\ncan be found using `/ingest/list` endpoint. If you want all ingested documents to\\nbe used, remove `context_filter` altogether.\\n\\nWhen using `'include_sources': true`, the API will return the source Chunks used\\nto create the response, which come from the context provided.\\n\\nWhen using `'stream': true`, the API will return data chunks following [OpenAI's\\nstreaming model](https://platform.openai.com/docs/api-reference/chat/streaming):\\n```\\n{\\\"id\\\":\\\"12345\\\",\\\"object\\\":\\\"completion.chunk\\\",\\\"created\\\":1694268190,\\n\\\"model\\\":\\\"private-gpt\\\",\\\"choices\\\":[{\\\"index\\\":0,\\\"delta\\\":{\\\"content\\\":\\\"Hello\\\"},\\n\\\"finish_reason\\\":null}]}\\n```\",\n        \"operationId\": \"prompt_completion_v1_completions_post\",\n        \"requestBody\": {\n          \"content\": {\n            \"application/json\": {\n              \"schema\": {\n                \"$ref\": \"#/components/schemas/CompletionsBody\"\n              }\n            }\n          },\n          \"required\": true\n        },\n        \"responses\": {\n          \"200\": {\n            \"description\": \"Successful Response\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"$ref\": \"#/components/schemas/OpenAICompletion\"\n                }\n              }\n            }\n          },\n          \"422\": {\n            \"description\": \"Validation Error\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"$ref\": \"#/components/schemas/HTTPValidationError\"\n                }\n              }\n            }\n          }\n        },\n        \"x-fern-streaming\": {\n          \"stream-condition\": \"stream\",\n          \"response\": {\n            \"$ref\": \"#/components/schemas/OpenAICompletion\"\n          },\n          \"response-stream\": {\n            \"$ref\": \"#/components/schemas/OpenAICompletion\"\n          }\n        }\n      }\n    },\n    \"/v1/chat/completions\": {\n      \"post\": {\n        \"tags\": [\n          \"Contextual Completions\"\n        ],\n        \"summary\": \"Chat Completion\",\n        \"description\": \"Given a list of messages comprising a conversation, return a response.\\n\\nOptionally include an initial `role: system` message to influence the way\\nthe LLM answers.\\n\\nIf `use_context` is set to `true`, the model will use context coming\\nfrom the ingested documents to create the response. The documents being used can\\nbe filtered using the `context_filter` and passing the document IDs to be used.\\nIngested documents IDs can be found using `/ingest/list` endpoint. If you want\\nall ingested documents to be used, remove `context_filter` altogether.\\n\\nWhen using `'include_sources': true`, the API will return the source Chunks used\\nto create the response, which come from the context provided.\\n\\nWhen using `'stream': true`, the API will return data chunks following [OpenAI's\\nstreaming model](https://platform.openai.com/docs/api-reference/chat/streaming):\\n```\\n{\\\"id\\\":\\\"12345\\\",\\\"object\\\":\\\"completion.chunk\\\",\\\"created\\\":1694268190,\\n\\\"model\\\":\\\"private-gpt\\\",\\\"choices\\\":[{\\\"index\\\":0,\\\"delta\\\":{\\\"content\\\":\\\"Hello\\\"},\\n\\\"finish_reason\\\":null}]}\\n```\",\n        \"operationId\": \"chat_completion_v1_chat_completions_post\",\n        \"requestBody\": {\n          \"content\": {\n            \"application/json\": {\n              \"schema\": {\n                \"$ref\": \"#/components/schemas/ChatBody\"\n              }\n            }\n          },\n          \"required\": true\n        },\n        \"responses\": {\n          \"200\": {\n            \"description\": \"Successful Response\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"$ref\": \"#/components/schemas/OpenAICompletion\"\n                }\n              }\n            }\n          },\n          \"422\": {\n            \"description\": \"Validation Error\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"$ref\": \"#/components/schemas/HTTPValidationError\"\n                }\n              }\n            }\n          }\n        },\n        \"x-fern-streaming\": {\n          \"stream-condition\": \"stream\",\n          \"response\": {\n            \"$ref\": \"#/components/schemas/OpenAICompletion\"\n          },\n          \"response-stream\": {\n            \"$ref\": \"#/components/schemas/OpenAICompletion\"\n          }\n        }\n      }\n    },\n    \"/v1/chunks\": {\n      \"post\": {\n        \"tags\": [\n          \"Context Chunks\"\n        ],\n        \"summary\": \"Chunks Retrieval\",\n        \"description\": \"Given a `text`, returns the most relevant chunks from the ingested documents.\\n\\nThe returned information can be used to generate prompts that can be\\npassed to `/completions` or `/chat/completions` APIs. Note: it is usually a very\\nfast API, because only the Embeddings model is involved, not the LLM. The\\nreturned information contains the relevant chunk `text` together with the source\\n`document` it is coming from. It also contains a score that can be used to\\ncompare different results.\\n\\nThe max number of chunks to be returned is set using the `limit` param.\\n\\nPrevious and next chunks (pieces of text that appear right before or after in the\\ndocument) can be fetched by using the `prev_next_chunks` field.\\n\\nThe documents being used can be filtered using the `context_filter` and passing\\nthe document IDs to be used. Ingested documents IDs can be found using\\n`/ingest/list` endpoint. If you want all ingested documents to be used,\\nremove `context_filter` altogether.\",\n        \"operationId\": \"chunks_retrieval_v1_chunks_post\",\n        \"requestBody\": {\n          \"content\": {\n            \"application/json\": {\n              \"schema\": {\n                \"$ref\": \"#/components/schemas/ChunksBody\"\n              }\n            }\n          },\n          \"required\": true\n        },\n        \"responses\": {\n          \"200\": {\n            \"description\": \"Successful Response\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"$ref\": \"#/components/schemas/ChunksResponse\"\n                }\n              }\n            }\n          },\n          \"422\": {\n            \"description\": \"Validation Error\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"$ref\": \"#/components/schemas/HTTPValidationError\"\n                }\n              }\n            }\n          }\n        }\n      }\n    },\n    \"/v1/ingest\": {\n      \"post\": {\n        \"tags\": [\n          \"Ingestion\"\n        ],\n        \"summary\": \"Ingest\",\n        \"description\": \"Ingests and processes a file.\\n\\nDeprecated. Use ingest/file instead.\",\n        \"operationId\": \"ingest_v1_ingest_post\",\n        \"requestBody\": {\n          \"content\": {\n            \"multipart/form-data\": {\n              \"schema\": {\n                \"$ref\": \"#/components/schemas/Body_ingest_v1_ingest_post\"\n              }\n            }\n          },\n          \"required\": true\n        },\n        \"responses\": {\n          \"200\": {\n            \"description\": \"Successful Response\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"$ref\": \"#/components/schemas/IngestResponse\"\n                }\n              }\n            }\n          },\n          \"422\": {\n            \"description\": \"Validation Error\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"$ref\": \"#/components/schemas/HTTPValidationError\"\n                }\n              }\n            }\n          }\n        },\n        \"deprecated\": true\n      }\n    },\n    \"/v1/ingest/file\": {\n      \"post\": {\n        \"tags\": [\n          \"Ingestion\"\n        ],\n        \"summary\": \"Ingest File\",\n        \"description\": \"Ingests and processes a file, storing its chunks to be used as context.\\n\\nThe context obtained from files is later used in\\n`/chat/completions`, `/completions`, and `/chunks` APIs.\\n\\nMost common document\\nformats are supported, but you may be prompted to install an extra dependency to\\nmanage a specific file type.\\n\\nA file can generate different Documents (for example a PDF generates one Document\\nper page). All Documents IDs are returned in the response, together with the\\nextracted Metadata (which is later used to improve context retrieval). Those IDs\\ncan be used to filter the context used to create responses in\\n`/chat/completions`, `/completions`, and `/chunks` APIs.\",\n        \"operationId\": \"ingest_file_v1_ingest_file_post\",\n        \"requestBody\": {\n          \"content\": {\n            \"multipart/form-data\": {\n              \"schema\": {\n                \"$ref\": \"#/components/schemas/Body_ingest_file_v1_ingest_file_post\"\n              }\n            }\n          },\n          \"required\": true\n        },\n        \"responses\": {\n          \"200\": {\n            \"description\": \"Successful Response\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"$ref\": \"#/components/schemas/IngestResponse\"\n                }\n              }\n            }\n          },\n          \"422\": {\n            \"description\": \"Validation Error\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"$ref\": \"#/components/schemas/HTTPValidationError\"\n                }\n              }\n            }\n          }\n        }\n      }\n    },\n    \"/v1/ingest/text\": {\n      \"post\": {\n        \"tags\": [\n          \"Ingestion\"\n        ],\n        \"summary\": \"Ingest Text\",\n        \"description\": \"Ingests and processes a text, storing its chunks to be used as context.\\n\\nThe context obtained from files is later used in\\n`/chat/completions`, `/completions`, and `/chunks` APIs.\\n\\nA Document will be generated with the given text. The Document\\nID is returned in the response, together with the\\nextracted Metadata (which is later used to improve context retrieval). That ID\\ncan be used to filter the context used to create responses in\\n`/chat/completions`, `/completions`, and `/chunks` APIs.\",\n        \"operationId\": \"ingest_text_v1_ingest_text_post\",\n        \"requestBody\": {\n          \"content\": {\n            \"application/json\": {\n              \"schema\": {\n                \"$ref\": \"#/components/schemas/IngestTextBody\"\n              }\n            }\n          },\n          \"required\": true\n        },\n        \"responses\": {\n          \"200\": {\n            \"description\": \"Successful Response\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"$ref\": \"#/components/schemas/IngestResponse\"\n                }\n              }\n            }\n          },\n          \"422\": {\n            \"description\": \"Validation Error\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"$ref\": \"#/components/schemas/HTTPValidationError\"\n                }\n              }\n            }\n          }\n        }\n      }\n    },\n    \"/v1/ingest/list\": {\n      \"get\": {\n        \"tags\": [\n          \"Ingestion\"\n        ],\n        \"summary\": \"List Ingested\",\n        \"description\": \"Lists already ingested Documents including their Document ID and metadata.\\n\\nThose IDs can be used to filter the context used to create responses\\nin `/chat/completions`, `/completions`, and `/chunks` APIs.\",\n        \"operationId\": \"list_ingested_v1_ingest_list_get\",\n        \"responses\": {\n          \"200\": {\n            \"description\": \"Successful Response\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"$ref\": \"#/components/schemas/IngestResponse\"\n                }\n              }\n            }\n          }\n        }\n      }\n    },\n    \"/v1/ingest/{doc_id}\": {\n      \"delete\": {\n        \"tags\": [\n          \"Ingestion\"\n        ],\n        \"summary\": \"Delete Ingested\",\n        \"description\": \"Delete the specified ingested Document.\\n\\nThe `doc_id` can be obtained from the `GET /ingest/list` endpoint.\\nThe document will be effectively deleted from your storage context.\",\n        \"operationId\": \"delete_ingested_v1_ingest__doc_id__delete\",\n        \"parameters\": [\n          {\n            \"name\": \"doc_id\",\n            \"in\": \"path\",\n            \"required\": true,\n            \"schema\": {\n              \"type\": \"string\",\n              \"title\": \"Doc Id\"\n            }\n          }\n        ],\n        \"responses\": {\n          \"200\": {\n            \"description\": \"Successful Response\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {}\n              }\n            }\n          },\n          \"422\": {\n            \"description\": \"Validation Error\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"$ref\": \"#/components/schemas/HTTPValidationError\"\n                }\n              }\n            }\n          }\n        }\n      }\n    },\n    \"/v1/summarize\": {\n      \"post\": {\n        \"tags\": [\n          \"Recipes\"\n        ],\n        \"summary\": \"Summarize\",\n        \"description\": \"Given a text, the model will return a summary.\\n\\nOptionally include `instructions` to influence the way the summary is generated.\\n\\nIf `use_context`\\nis set to `true`, the model will also use the content coming from the ingested\\ndocuments in the summary. The documents being used can\\nbe filtered by their metadata using the `context_filter`.\\nIngested documents metadata can be found using `/ingest/list` endpoint.\\nIf you want all ingested documents to be used, remove `context_filter` altogether.\\n\\nIf `prompt` is set, it will be used as the prompt for the summarization,\\notherwise the default prompt will be used.\\n\\nWhen using `'stream': true`, the API will return data chunks following [OpenAI's\\nstreaming model](https://platform.openai.com/docs/api-reference/chat/streaming):\\n```\\n{\\\"id\\\":\\\"12345\\\",\\\"object\\\":\\\"completion.chunk\\\",\\\"created\\\":1694268190,\\n\\\"model\\\":\\\"private-gpt\\\",\\\"choices\\\":[{\\\"index\\\":0,\\\"delta\\\":{\\\"content\\\":\\\"Hello\\\"},\\n\\\"finish_reason\\\":null}]}\\n```\",\n        \"operationId\": \"summarize_v1_summarize_post\",\n        \"requestBody\": {\n          \"content\": {\n            \"application/json\": {\n              \"schema\": {\n                \"$ref\": \"#/components/schemas/SummarizeBody\"\n              }\n            }\n          },\n          \"required\": true\n        },\n        \"responses\": {\n          \"200\": {\n            \"description\": \"Successful Response\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"$ref\": \"#/components/schemas/SummarizeResponse\"\n                }\n              }\n            }\n          },\n          \"422\": {\n            \"description\": \"Validation Error\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"$ref\": \"#/components/schemas/HTTPValidationError\"\n                }\n              }\n            }\n          }\n        }\n      }\n    },\n    \"/v1/embeddings\": {\n      \"post\": {\n        \"tags\": [\n          \"Embeddings\"\n        ],\n        \"summary\": \"Embeddings Generation\",\n        \"description\": \"Get a vector representation of a given input.\\n\\nThat vector representation can be easily consumed\\nby machine learning models and algorithms.\",\n        \"operationId\": \"embeddings_generation_v1_embeddings_post\",\n        \"requestBody\": {\n          \"content\": {\n            \"application/json\": {\n              \"schema\": {\n                \"$ref\": \"#/components/schemas/EmbeddingsBody\"\n              }\n            }\n          },\n          \"required\": true\n        },\n        \"responses\": {\n          \"200\": {\n            \"description\": \"Successful Response\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"$ref\": \"#/components/schemas/EmbeddingsResponse\"\n                }\n              }\n            }\n          },\n          \"422\": {\n            \"description\": \"Validation Error\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"$ref\": \"#/components/schemas/HTTPValidationError\"\n                }\n              }\n            }\n          }\n        }\n      }\n    },\n    \"/health\": {\n      \"get\": {\n        \"tags\": [\n          \"Health\"\n        ],\n        \"summary\": \"Health\",\n        \"description\": \"Return ok if the system is up.\",\n        \"operationId\": \"health_health_get\",\n        \"responses\": {\n          \"200\": {\n            \"description\": \"Successful Response\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"$ref\": \"#/components/schemas/HealthResponse\"\n                }\n              }\n            }\n          }\n        }\n      }\n    }\n  },\n  \"components\": {\n    \"schemas\": {\n      \"Body_ingest_file_v1_ingest_file_post\": {\n        \"properties\": {\n          \"file\": {\n            \"type\": \"string\",\n            \"format\": \"binary\",\n            \"title\": \"File\"\n          }\n        },\n        \"type\": \"object\",\n        \"required\": [\n          \"file\"\n        ],\n        \"title\": \"Body_ingest_file_v1_ingest_file_post\"\n      },\n      \"Body_ingest_v1_ingest_post\": {\n        \"properties\": {\n          \"file\": {\n            \"type\": \"string\",\n            \"format\": \"binary\",\n            \"title\": \"File\"\n          }\n        },\n        \"type\": \"object\",\n        \"required\": [\n          \"file\"\n        ],\n        \"title\": \"Body_ingest_v1_ingest_post\"\n      },\n      \"ChatBody\": {\n        \"properties\": {\n          \"messages\": {\n            \"items\": {\n              \"$ref\": \"#/components/schemas/OpenAIMessage\"\n            },\n            \"type\": \"array\",\n            \"title\": \"Messages\"\n          },\n          \"use_context\": {\n            \"type\": \"boolean\",\n            \"title\": \"Use Context\",\n            \"default\": false\n          },\n          \"context_filter\": {\n            \"anyOf\": [\n              {\n                \"$ref\": \"#/components/schemas/ContextFilter\"\n              },\n              {\n                \"type\": \"null\"\n              }\n            ]\n          },\n          \"include_sources\": {\n            \"type\": \"boolean\",\n            \"title\": \"Include Sources\",\n            \"default\": true\n          },\n          \"stream\": {\n            \"type\": \"boolean\",\n            \"title\": \"Stream\",\n            \"default\": false\n          }\n        },\n        \"type\": \"object\",\n        \"required\": [\n          \"messages\"\n        ],\n        \"title\": \"ChatBody\",\n        \"examples\": [\n          {\n            \"context_filter\": {\n              \"docs_ids\": [\n                \"c202d5e6-7b69-4869-81cc-dd574ee8ee11\"\n              ]\n            },\n            \"include_sources\": true,\n            \"messages\": [\n              {\n                \"content\": \"You are a rapper. Always answer with a rap.\",\n                \"role\": \"system\"\n              },\n              {\n                \"content\": \"How do you fry an egg?\",\n                \"role\": \"user\"\n              }\n            ],\n            \"stream\": false,\n            \"use_context\": true\n          }\n        ]\n      },\n      \"Chunk\": {\n        \"properties\": {\n          \"object\": {\n            \"type\": \"string\",\n            \"enum\": [\n              \"context.chunk\"\n            ],\n            \"const\": \"context.chunk\",\n            \"title\": \"Object\"\n          },\n          \"score\": {\n            \"type\": \"number\",\n            \"title\": \"Score\",\n            \"examples\": [\n              0.023\n            ]\n          },\n          \"document\": {\n            \"$ref\": \"#/components/schemas/IngestedDoc\"\n          },\n          \"text\": {\n            \"type\": \"string\",\n            \"title\": \"Text\",\n            \"examples\": [\n              \"Outbound sales increased 20%, driven by new leads.\"\n            ]\n          },\n          \"previous_texts\": {\n            \"anyOf\": [\n              {\n                \"items\": {\n                  \"type\": \"string\"\n                },\n                \"type\": \"array\"\n              },\n              {\n                \"type\": \"null\"\n              }\n            ],\n            \"title\": \"Previous Texts\",\n            \"examples\": [\n              [\n                \"SALES REPORT 2023\",\n                \"Inbound didn't show major changes.\"\n              ]\n            ]\n          },\n          \"next_texts\": {\n            \"anyOf\": [\n              {\n                \"items\": {\n                  \"type\": \"string\"\n                },\n                \"type\": \"array\"\n              },\n              {\n                \"type\": \"null\"\n              }\n            ],\n            \"title\": \"Next Texts\",\n            \"examples\": [\n              [\n                \"New leads came from Google Ads campaign.\",\n                \"The campaign was run by the Marketing Department\"\n              ]\n            ]\n          }\n        },\n        \"type\": \"object\",\n        \"required\": [\n          \"object\",\n          \"score\",\n          \"document\",\n          \"text\"\n        ],\n        \"title\": \"Chunk\"\n      },\n      \"ChunksBody\": {\n        \"properties\": {\n          \"text\": {\n            \"type\": \"string\",\n            \"title\": \"Text\",\n            \"examples\": [\n              \"Q3 2023 sales\"\n            ]\n          },\n          \"context_filter\": {\n            \"anyOf\": [\n              {\n                \"$ref\": \"#/components/schemas/ContextFilter\"\n              },\n              {\n                \"type\": \"null\"\n              }\n            ]\n          },\n          \"limit\": {\n            \"type\": \"integer\",\n            \"title\": \"Limit\",\n            \"default\": 10\n          },\n          \"prev_next_chunks\": {\n            \"type\": \"integer\",\n            \"title\": \"Prev Next Chunks\",\n            \"default\": 0,\n            \"examples\": [\n              2\n            ]\n          }\n        },\n        \"type\": \"object\",\n        \"required\": [\n          \"text\"\n        ],\n        \"title\": \"ChunksBody\"\n      },\n      \"ChunksResponse\": {\n        \"properties\": {\n          \"object\": {\n            \"type\": \"string\",\n            \"enum\": [\n              \"list\"\n            ],\n            \"const\": \"list\",\n            \"title\": \"Object\"\n          },\n          \"model\": {\n            \"type\": \"string\",\n            \"enum\": [\n              \"private-gpt\"\n            ],\n            \"const\": \"private-gpt\",\n            \"title\": \"Model\"\n          },\n          \"data\": {\n            \"items\": {\n              \"$ref\": \"#/components/schemas/Chunk\"\n            },\n            \"type\": \"array\",\n            \"title\": \"Data\"\n          }\n        },\n        \"type\": \"object\",\n        \"required\": [\n          \"object\",\n          \"model\",\n          \"data\"\n        ],\n        \"title\": \"ChunksResponse\"\n      },\n      \"CompletionsBody\": {\n        \"properties\": {\n          \"prompt\": {\n            \"type\": \"string\",\n            \"title\": \"Prompt\"\n          },\n          \"system_prompt\": {\n            \"anyOf\": [\n              {\n                \"type\": \"string\"\n              },\n              {\n                \"type\": \"null\"\n              }\n            ],\n            \"title\": \"System Prompt\"\n          },\n          \"use_context\": {\n            \"type\": \"boolean\",\n            \"title\": \"Use Context\",\n            \"default\": false\n          },\n          \"context_filter\": {\n            \"anyOf\": [\n              {\n                \"$ref\": \"#/components/schemas/ContextFilter\"\n              },\n              {\n                \"type\": \"null\"\n              }\n            ]\n          },\n          \"include_sources\": {\n            \"type\": \"boolean\",\n            \"title\": \"Include Sources\",\n            \"default\": true\n          },\n          \"stream\": {\n            \"type\": \"boolean\",\n            \"title\": \"Stream\",\n            \"default\": false\n          }\n        },\n        \"type\": \"object\",\n        \"required\": [\n          \"prompt\"\n        ],\n        \"title\": \"CompletionsBody\",\n        \"examples\": [\n          {\n            \"include_sources\": false,\n            \"prompt\": \"How do you fry an egg?\",\n            \"stream\": false,\n            \"system_prompt\": \"You are a rapper. Always answer with a rap.\",\n            \"use_context\": false\n          }\n        ]\n      },\n      \"ContextFilter\": {\n        \"properties\": {\n          \"docs_ids\": {\n            \"anyOf\": [\n              {\n                \"items\": {\n                  \"type\": \"string\"\n                },\n                \"type\": \"array\"\n              },\n              {\n                \"type\": \"null\"\n              }\n            ],\n            \"title\": \"Docs Ids\",\n            \"examples\": [\n              [\n                \"c202d5e6-7b69-4869-81cc-dd574ee8ee11\"\n              ]\n            ]\n          }\n        },\n        \"type\": \"object\",\n        \"required\": [\n          \"docs_ids\"\n        ],\n        \"title\": \"ContextFilter\"\n      },\n      \"Embedding\": {\n        \"properties\": {\n          \"index\": {\n            \"type\": \"integer\",\n            \"title\": \"Index\"\n          },\n          \"object\": {\n            \"type\": \"string\",\n            \"enum\": [\n              \"embedding\"\n            ],\n            \"const\": \"embedding\",\n            \"title\": \"Object\"\n          },\n          \"embedding\": {\n            \"items\": {\n              \"type\": \"number\"\n            },\n            \"type\": \"array\",\n            \"title\": \"Embedding\",\n            \"examples\": [\n              [\n                0.0023064255,\n                -0.009327292\n              ]\n            ]\n          }\n        },\n        \"type\": \"object\",\n        \"required\": [\n          \"index\",\n          \"object\",\n          \"embedding\"\n        ],\n        \"title\": \"Embedding\"\n      },\n      \"EmbeddingsBody\": {\n        \"properties\": {\n          \"input\": {\n            \"anyOf\": [\n              {\n                \"type\": \"string\"\n              },\n              {\n                \"items\": {\n                  \"type\": \"string\"\n                },\n                \"type\": \"array\"\n              }\n            ],\n            \"title\": \"Input\"\n          }\n        },\n        \"type\": \"object\",\n        \"required\": [\n          \"input\"\n        ],\n        \"title\": \"EmbeddingsBody\"\n      },\n      \"EmbeddingsResponse\": {\n        \"properties\": {\n          \"object\": {\n            \"type\": \"string\",\n            \"enum\": [\n              \"list\"\n            ],\n            \"const\": \"list\",\n            \"title\": \"Object\"\n          },\n          \"model\": {\n            \"type\": \"string\",\n            \"enum\": [\n              \"private-gpt\"\n            ],\n            \"const\": \"private-gpt\",\n            \"title\": \"Model\"\n          },\n          \"data\": {\n            \"items\": {\n              \"$ref\": \"#/components/schemas/Embedding\"\n            },\n            \"type\": \"array\",\n            \"title\": \"Data\"\n          }\n        },\n        \"type\": \"object\",\n        \"required\": [\n          \"object\",\n          \"model\",\n          \"data\"\n        ],\n        \"title\": \"EmbeddingsResponse\"\n      },\n      \"HTTPValidationError\": {\n        \"properties\": {\n          \"detail\": {\n            \"items\": {\n              \"$ref\": \"#/components/schemas/ValidationError\"\n            },\n            \"type\": \"array\",\n            \"title\": \"Detail\"\n          }\n        },\n        \"type\": \"object\",\n        \"title\": \"HTTPValidationError\"\n      },\n      \"HealthResponse\": {\n        \"properties\": {\n          \"status\": {\n            \"type\": \"string\",\n            \"enum\": [\n              \"ok\"\n            ],\n            \"const\": \"ok\",\n            \"title\": \"Status\",\n            \"default\": \"ok\"\n          }\n        },\n        \"type\": \"object\",\n        \"title\": \"HealthResponse\"\n      },\n      \"IngestResponse\": {\n        \"properties\": {\n          \"object\": {\n            \"type\": \"string\",\n            \"enum\": [\n              \"list\"\n            ],\n            \"const\": \"list\",\n            \"title\": \"Object\"\n          },\n          \"model\": {\n            \"type\": \"string\",\n            \"enum\": [\n              \"private-gpt\"\n            ],\n            \"const\": \"private-gpt\",\n            \"title\": \"Model\"\n          },\n          \"data\": {\n            \"items\": {\n              \"$ref\": \"#/components/schemas/IngestedDoc\"\n            },\n            \"type\": \"array\",\n            \"title\": \"Data\"\n          }\n        },\n        \"type\": \"object\",\n        \"required\": [\n          \"object\",\n          \"model\",\n          \"data\"\n        ],\n        \"title\": \"IngestResponse\"\n      },\n      \"IngestTextBody\": {\n        \"properties\": {\n          \"file_name\": {\n            \"type\": \"string\",\n            \"title\": \"File Name\",\n            \"examples\": [\n              \"Avatar: The Last Airbender\"\n            ]\n          },\n          \"text\": {\n            \"type\": \"string\",\n            \"title\": \"Text\",\n            \"examples\": [\n              \"Avatar is set in an Asian and Arctic-inspired world in which some people can telekinetically manipulate one of the four elements\\u2014water, earth, fire or air\\u2014through practices known as 'bending', inspired by Chinese martial arts.\"\n            ]\n          }\n        },\n        \"type\": \"object\",\n        \"required\": [\n          \"file_name\",\n          \"text\"\n        ],\n        \"title\": \"IngestTextBody\"\n      },\n      \"IngestedDoc\": {\n        \"properties\": {\n          \"object\": {\n            \"type\": \"string\",\n            \"enum\": [\n              \"ingest.document\"\n            ],\n            \"const\": \"ingest.document\",\n            \"title\": \"Object\"\n          },\n          \"doc_id\": {\n            \"type\": \"string\",\n            \"title\": \"Doc Id\",\n            \"examples\": [\n              \"c202d5e6-7b69-4869-81cc-dd574ee8ee11\"\n            ]\n          },\n          \"doc_metadata\": {\n            \"anyOf\": [\n              {\n                \"type\": \"object\"\n              },\n              {\n                \"type\": \"null\"\n              }\n            ],\n            \"title\": \"Doc Metadata\",\n            \"examples\": [\n              {\n                \"file_name\": \"Sales Report Q3 2023.pdf\",\n                \"page_label\": \"2\"\n              }\n            ]\n          }\n        },\n        \"type\": \"object\",\n        \"required\": [\n          \"object\",\n          \"doc_id\",\n          \"doc_metadata\"\n        ],\n        \"title\": \"IngestedDoc\"\n      },\n      \"OpenAIChoice\": {\n        \"properties\": {\n          \"finish_reason\": {\n            \"anyOf\": [\n              {\n                \"type\": \"string\"\n              },\n              {\n                \"type\": \"null\"\n              }\n            ],\n            \"title\": \"Finish Reason\",\n            \"examples\": [\n              \"stop\"\n            ]\n          },\n          \"delta\": {\n            \"anyOf\": [\n              {\n                \"$ref\": \"#/components/schemas/OpenAIDelta\"\n              },\n              {\n                \"type\": \"null\"\n              }\n            ]\n          },\n          \"message\": {\n            \"anyOf\": [\n              {\n                \"$ref\": \"#/components/schemas/OpenAIMessage\"\n              },\n              {\n                \"type\": \"null\"\n              }\n            ]\n          },\n          \"sources\": {\n            \"anyOf\": [\n              {\n                \"items\": {\n                  \"$ref\": \"#/components/schemas/Chunk\"\n                },\n                \"type\": \"array\"\n              },\n              {\n                \"type\": \"null\"\n              }\n            ],\n            \"title\": \"Sources\"\n          },\n          \"index\": {\n            \"type\": \"integer\",\n            \"title\": \"Index\",\n            \"default\": 0\n          }\n        },\n        \"type\": \"object\",\n        \"required\": [\n          \"finish_reason\"\n        ],\n        \"title\": \"OpenAIChoice\",\n        \"description\": \"Response from AI.\\n\\nEither the delta or the message will be present, but never both.\\nSources used will be returned in case context retrieval was enabled.\"\n      },\n      \"OpenAICompletion\": {\n        \"properties\": {\n          \"id\": {\n            \"type\": \"string\",\n            \"title\": \"Id\"\n          },\n          \"object\": {\n            \"type\": \"string\",\n            \"enum\": [\n              \"completion\",\n              \"completion.chunk\"\n            ],\n            \"title\": \"Object\",\n            \"default\": \"completion\"\n          },\n          \"created\": {\n            \"type\": \"integer\",\n            \"title\": \"Created\",\n            \"examples\": [\n              1623340000\n            ]\n          },\n          \"model\": {\n            \"type\": \"string\",\n            \"enum\": [\n              \"private-gpt\"\n            ],\n            \"const\": \"private-gpt\",\n            \"title\": \"Model\"\n          },\n          \"choices\": {\n            \"items\": {\n              \"$ref\": \"#/components/schemas/OpenAIChoice\"\n            },\n            \"type\": \"array\",\n            \"title\": \"Choices\"\n          }\n        },\n        \"type\": \"object\",\n        \"required\": [\n          \"id\",\n          \"created\",\n          \"model\",\n          \"choices\"\n        ],\n        \"title\": \"OpenAICompletion\",\n        \"description\": \"Clone of OpenAI Completion model.\\n\\nFor more information see: https://platform.openai.com/docs/api-reference/chat/object\"\n      },\n      \"OpenAIDelta\": {\n        \"properties\": {\n          \"content\": {\n            \"anyOf\": [\n              {\n                \"type\": \"string\"\n              },\n              {\n                \"type\": \"null\"\n              }\n            ],\n            \"title\": \"Content\"\n          }\n        },\n        \"type\": \"object\",\n        \"required\": [\n          \"content\"\n        ],\n        \"title\": \"OpenAIDelta\",\n        \"description\": \"A piece of completion that needs to be concatenated to get the full message.\"\n      },\n      \"OpenAIMessage\": {\n        \"properties\": {\n          \"role\": {\n            \"type\": \"string\",\n            \"enum\": [\n              \"assistant\",\n              \"system\",\n              \"user\"\n            ],\n            \"title\": \"Role\",\n            \"default\": \"user\"\n          },\n          \"content\": {\n            \"anyOf\": [\n              {\n                \"type\": \"string\"\n              },\n              {\n                \"type\": \"null\"\n              }\n            ],\n            \"title\": \"Content\"\n          }\n        },\n        \"type\": \"object\",\n        \"required\": [\n          \"content\"\n        ],\n        \"title\": \"OpenAIMessage\",\n        \"description\": \"Inference result, with the source of the message.\\n\\nRole could be the assistant or system\\n(providing a default response, not AI generated).\"\n      },\n      \"SummarizeBody\": {\n        \"properties\": {\n          \"text\": {\n            \"anyOf\": [\n              {\n                \"type\": \"string\"\n              },\n              {\n                \"type\": \"null\"\n              }\n            ],\n            \"title\": \"Text\"\n          },\n          \"use_context\": {\n            \"type\": \"boolean\",\n            \"title\": \"Use Context\",\n            \"default\": false\n          },\n          \"context_filter\": {\n            \"anyOf\": [\n              {\n                \"$ref\": \"#/components/schemas/ContextFilter\"\n              },\n              {\n                \"type\": \"null\"\n              }\n            ]\n          },\n          \"prompt\": {\n            \"anyOf\": [\n              {\n                \"type\": \"string\"\n              },\n              {\n                \"type\": \"null\"\n              }\n            ],\n            \"title\": \"Prompt\"\n          },\n          \"instructions\": {\n            \"anyOf\": [\n              {\n                \"type\": \"string\"\n              },\n              {\n                \"type\": \"null\"\n              }\n            ],\n            \"title\": \"Instructions\"\n          },\n          \"stream\": {\n            \"type\": \"boolean\",\n            \"title\": \"Stream\",\n            \"default\": false\n          }\n        },\n        \"type\": \"object\",\n        \"title\": \"SummarizeBody\"\n      },\n      \"SummarizeResponse\": {\n        \"properties\": {\n          \"summary\": {\n            \"type\": \"string\",\n            \"title\": \"Summary\"\n          }\n        },\n        \"type\": \"object\",\n        \"required\": [\n          \"summary\"\n        ],\n        \"title\": \"SummarizeResponse\"\n      },\n      \"ValidationError\": {\n        \"properties\": {\n          \"loc\": {\n            \"items\": {\n              \"anyOf\": [\n                {\n                  \"type\": \"string\"\n                },\n                {\n                  \"type\": \"integer\"\n                }\n              ]\n            },\n            \"type\": \"array\",\n            \"title\": \"Location\"\n          },\n          \"msg\": {\n            \"type\": \"string\",\n            \"title\": \"Message\"\n          },\n          \"type\": {\n            \"type\": \"string\",\n            \"title\": \"Error Type\"\n          }\n        },\n        \"type\": \"object\",\n        \"required\": [\n          \"loc\",\n          \"msg\",\n          \"type\"\n        ],\n        \"title\": \"ValidationError\"\n      }\n    }\n  }\n}"
  },
  {
    "path": "local_data/.gitignore",
    "content": "*\n!.gitignore"
  },
  {
    "path": "models/.gitignore",
    "content": "*\n!.gitignore"
  },
  {
    "path": "private_gpt/__init__.py",
    "content": "\"\"\"private-gpt.\"\"\"\n\nimport logging\nimport os\n\n# Set to 'DEBUG' to have extensive logging turned on, even for libraries\nROOT_LOG_LEVEL = \"INFO\"\n\nPRETTY_LOG_FORMAT = (\n    \"%(asctime)s.%(msecs)03d [%(levelname)-8s] %(name)+25s - %(message)s\"\n)\nlogging.basicConfig(level=ROOT_LOG_LEVEL, format=PRETTY_LOG_FORMAT, datefmt=\"%H:%M:%S\")\nlogging.captureWarnings(True)\n\n# Disable gradio analytics\n# This is done this way because gradio does not solely rely on what values are\n# passed to gr.Blocks(enable_analytics=...) but also on the environment\n# variable GRADIO_ANALYTICS_ENABLED. `gradio.strings` actually reads this env\n# directly, so to fully disable gradio analytics we need to set this env var.\nos.environ[\"GRADIO_ANALYTICS_ENABLED\"] = \"False\"\n\n# Disable chromaDB telemetry\n# It is already disabled, see PR#1144\n# os.environ[\"ANONYMIZED_TELEMETRY\"] = \"False\"\n\n# adding tiktoken cache path within repo to be able to run in offline environment.\nos.environ[\"TIKTOKEN_CACHE_DIR\"] = \"tiktoken_cache\"\n"
  },
  {
    "path": "private_gpt/__main__.py",
    "content": "# start a fastapi server with uvicorn\n\nimport uvicorn\n\nfrom private_gpt.main import app\nfrom private_gpt.settings.settings import settings\n\n# Set log_config=None to do not use the uvicorn logging configuration, and\n# use ours instead. For reference, see below:\n# https://github.com/tiangolo/fastapi/discussions/7457#discussioncomment-5141108\nuvicorn.run(app, host=\"0.0.0.0\", port=settings().server.port, log_config=None)\n"
  },
  {
    "path": "private_gpt/components/__init__.py",
    "content": ""
  },
  {
    "path": "private_gpt/components/embedding/__init__.py",
    "content": ""
  },
  {
    "path": "private_gpt/components/embedding/custom/__init__.py",
    "content": ""
  },
  {
    "path": "private_gpt/components/embedding/custom/sagemaker.py",
    "content": "# mypy: ignore-errors\nimport json\nfrom typing import Any\n\nimport boto3\nfrom llama_index.core.base.embeddings.base import BaseEmbedding\nfrom pydantic import Field, PrivateAttr\n\n\nclass SagemakerEmbedding(BaseEmbedding):\n    \"\"\"Sagemaker Embedding Endpoint.\n\n    To use, you must supply the endpoint name from your deployed\n    Sagemaker embedding model & the region where it is deployed.\n\n    To authenticate, the AWS client uses the following methods to\n    automatically load credentials:\n    https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html\n\n    If a specific credential profile should be used, you must pass\n    the name of the profile from the ~/.aws/credentials file that is to be used.\n\n    Make sure the credentials / roles used have the required policies to\n    access the Sagemaker endpoint.\n    See: https://docs.aws.amazon.com/IAM/latest/UserGuide/access_policies.html\n    \"\"\"\n\n    endpoint_name: str = Field(description=\"\")\n\n    _boto_client: Any = boto3.client(\n        \"sagemaker-runtime\",\n    )  # TODO make it an optional field\n\n    _async_not_implemented_warned: bool = PrivateAttr(default=False)\n\n    @classmethod\n    def class_name(cls) -> str:\n        return \"SagemakerEmbedding\"\n\n    def _async_not_implemented_warn_once(self) -> None:\n        if not self._async_not_implemented_warned:\n            print(\"Async embedding not available, falling back to sync method.\")\n            self._async_not_implemented_warned = True\n\n    def _embed(self, sentences: list[str]) -> list[list[float]]:\n        request_params = {\n            \"inputs\": sentences,\n        }\n\n        resp = self._boto_client.invoke_endpoint(\n            EndpointName=self.endpoint_name,\n            Body=json.dumps(request_params),\n            ContentType=\"application/json\",\n        )\n\n        response_body = resp[\"Body\"]\n        response_str = response_body.read().decode(\"utf-8\")\n        response_json = json.loads(response_str)\n\n        return response_json[\"vectors\"]\n\n    def _get_query_embedding(self, query: str) -> list[float]:\n        \"\"\"Get query embedding.\"\"\"\n        return self._embed([query])[0]\n\n    async def _aget_query_embedding(self, query: str) -> list[float]:\n        # Warn the user that sync is being used\n        self._async_not_implemented_warn_once()\n        return self._get_query_embedding(query)\n\n    async def _aget_text_embedding(self, text: str) -> list[float]:\n        # Warn the user that sync is being used\n        self._async_not_implemented_warn_once()\n        return self._get_text_embedding(text)\n\n    def _get_text_embedding(self, text: str) -> list[float]:\n        \"\"\"Get text embedding.\"\"\"\n        return self._embed([text])[0]\n\n    def _get_text_embeddings(self, texts: list[str]) -> list[list[float]]:\n        \"\"\"Get text embeddings.\"\"\"\n        return self._embed(texts)\n"
  },
  {
    "path": "private_gpt/components/embedding/embedding_component.py",
    "content": "import logging\n\nfrom injector import inject, singleton\nfrom llama_index.core.embeddings import BaseEmbedding, MockEmbedding\n\nfrom private_gpt.paths import models_cache_path\nfrom private_gpt.settings.settings import Settings\n\nlogger = logging.getLogger(__name__)\n\n\n@singleton\nclass EmbeddingComponent:\n    embedding_model: BaseEmbedding\n\n    @inject\n    def __init__(self, settings: Settings) -> None:\n        embedding_mode = settings.embedding.mode\n        logger.info(\"Initializing the embedding model in mode=%s\", embedding_mode)\n        match embedding_mode:\n            case \"huggingface\":\n                try:\n                    from llama_index.embeddings.huggingface import (  # type: ignore\n                        HuggingFaceEmbedding,\n                    )\n                except ImportError as e:\n                    raise ImportError(\n                        \"Local dependencies not found, install with `poetry install --extras embeddings-huggingface`\"\n                    ) from e\n\n                self.embedding_model = HuggingFaceEmbedding(\n                    model_name=settings.huggingface.embedding_hf_model_name,\n                    cache_folder=str(models_cache_path),\n                    trust_remote_code=settings.huggingface.trust_remote_code,\n                )\n            case \"sagemaker\":\n                try:\n                    from private_gpt.components.embedding.custom.sagemaker import (\n                        SagemakerEmbedding,\n                    )\n                except ImportError as e:\n                    raise ImportError(\n                        \"Sagemaker dependencies not found, install with `poetry install --extras embeddings-sagemaker`\"\n                    ) from e\n\n                self.embedding_model = SagemakerEmbedding(\n                    endpoint_name=settings.sagemaker.embedding_endpoint_name,\n                )\n            case \"openai\":\n                try:\n                    from llama_index.embeddings.openai import (  # type: ignore\n                        OpenAIEmbedding,\n                    )\n                except ImportError as e:\n                    raise ImportError(\n                        \"OpenAI dependencies not found, install with `poetry install --extras embeddings-openai`\"\n                    ) from e\n\n                api_base = (\n                    settings.openai.embedding_api_base or settings.openai.api_base\n                )\n                api_key = settings.openai.embedding_api_key or settings.openai.api_key\n                model = settings.openai.embedding_model\n\n                self.embedding_model = OpenAIEmbedding(\n                    api_base=api_base,\n                    api_key=api_key,\n                    model=model,\n                )\n            case \"ollama\":\n                try:\n                    from llama_index.embeddings.ollama import (  # type: ignore\n                        OllamaEmbedding,\n                    )\n                    from ollama import Client  # type: ignore\n                except ImportError as e:\n                    raise ImportError(\n                        \"Local dependencies not found, install with `poetry install --extras embeddings-ollama`\"\n                    ) from e\n\n                ollama_settings = settings.ollama\n\n                # Calculate embedding model. If not provided tag, it will be use latest\n                model_name = (\n                    ollama_settings.embedding_model + \":latest\"\n                    if \":\" not in ollama_settings.embedding_model\n                    else ollama_settings.embedding_model\n                )\n\n                self.embedding_model = OllamaEmbedding(\n                    model_name=model_name,\n                    base_url=ollama_settings.embedding_api_base,\n                )\n\n                if ollama_settings.autopull_models:\n                    if ollama_settings.autopull_models:\n                        from private_gpt.utils.ollama import (\n                            check_connection,\n                            pull_model,\n                        )\n\n                        # TODO: Reuse llama-index client when llama-index is updated\n                        client = Client(\n                            host=ollama_settings.embedding_api_base,\n                            timeout=ollama_settings.request_timeout,\n                        )\n\n                        if not check_connection(client):\n                            raise ValueError(\n                                f\"Failed to connect to Ollama, \"\n                                f\"check if Ollama server is running on {ollama_settings.api_base}\"\n                            )\n                        pull_model(client, model_name)\n\n            case \"azopenai\":\n                try:\n                    from llama_index.embeddings.azure_openai import (  # type: ignore\n                        AzureOpenAIEmbedding,\n                    )\n                except ImportError as e:\n                    raise ImportError(\n                        \"Azure OpenAI dependencies not found, install with `poetry install --extras embeddings-azopenai`\"\n                    ) from e\n\n                azopenai_settings = settings.azopenai\n                self.embedding_model = AzureOpenAIEmbedding(\n                    model=azopenai_settings.embedding_model,\n                    deployment_name=azopenai_settings.embedding_deployment_name,\n                    api_key=azopenai_settings.api_key,\n                    azure_endpoint=azopenai_settings.azure_endpoint,\n                    api_version=azopenai_settings.api_version,\n                )\n            case \"gemini\":\n                try:\n                    from llama_index.embeddings.gemini import (  # type: ignore\n                        GeminiEmbedding,\n                    )\n                except ImportError as e:\n                    raise ImportError(\n                        \"Gemini dependencies not found, install with `poetry install --extras embeddings-gemini`\"\n                    ) from e\n\n                self.embedding_model = GeminiEmbedding(\n                    api_key=settings.gemini.api_key,\n                    model_name=settings.gemini.embedding_model,\n                )\n            case \"mistralai\":\n                try:\n                    from llama_index.embeddings.mistralai import (  # type: ignore\n                        MistralAIEmbedding,\n                    )\n                except ImportError as e:\n                    raise ImportError(\n                        \"Mistral dependencies not found, install with `poetry install --extras embeddings-mistral`\"\n                    ) from e\n\n                api_key = settings.openai.api_key\n                model = settings.openai.embedding_model\n\n                self.embedding_model = MistralAIEmbedding(\n                    api_key=api_key,\n                    model=model,\n                )\n            case \"mock\":\n                # Not a random number, is the dimensionality used by\n                # the default embedding model\n                self.embedding_model = MockEmbedding(384)\n"
  },
  {
    "path": "private_gpt/components/ingest/__init__.py",
    "content": ""
  },
  {
    "path": "private_gpt/components/ingest/ingest_component.py",
    "content": "import abc\nimport itertools\nimport logging\nimport multiprocessing\nimport multiprocessing.pool\nimport os\nimport threading\nfrom pathlib import Path\nfrom queue import Queue\nfrom typing import Any\n\nfrom llama_index.core.data_structs import IndexDict\nfrom llama_index.core.embeddings.utils import EmbedType\nfrom llama_index.core.indices import VectorStoreIndex, load_index_from_storage\nfrom llama_index.core.indices.base import BaseIndex\nfrom llama_index.core.ingestion import run_transformations\nfrom llama_index.core.schema import BaseNode, Document, TransformComponent\nfrom llama_index.core.storage import StorageContext\n\nfrom private_gpt.components.ingest.ingest_helper import IngestionHelper\nfrom private_gpt.paths import local_data_path\nfrom private_gpt.settings.settings import Settings\nfrom private_gpt.utils.eta import eta\n\nlogger = logging.getLogger(__name__)\n\n\nclass BaseIngestComponent(abc.ABC):\n    def __init__(\n        self,\n        storage_context: StorageContext,\n        embed_model: EmbedType,\n        transformations: list[TransformComponent],\n        *args: Any,\n        **kwargs: Any,\n    ) -> None:\n        logger.debug(\"Initializing base ingest component type=%s\", type(self).__name__)\n        self.storage_context = storage_context\n        self.embed_model = embed_model\n        self.transformations = transformations\n\n    @abc.abstractmethod\n    def ingest(self, file_name: str, file_data: Path) -> list[Document]:\n        pass\n\n    @abc.abstractmethod\n    def bulk_ingest(self, files: list[tuple[str, Path]]) -> list[Document]:\n        pass\n\n    @abc.abstractmethod\n    def delete(self, doc_id: str) -> None:\n        pass\n\n\nclass BaseIngestComponentWithIndex(BaseIngestComponent, abc.ABC):\n    def __init__(\n        self,\n        storage_context: StorageContext,\n        embed_model: EmbedType,\n        transformations: list[TransformComponent],\n        *args: Any,\n        **kwargs: Any,\n    ) -> None:\n        super().__init__(storage_context, embed_model, transformations, *args, **kwargs)\n\n        self.show_progress = True\n        self._index_thread_lock = (\n            threading.Lock()\n        )  # Thread lock! Not Multiprocessing lock\n        self._index = self._initialize_index()\n\n    def _initialize_index(self) -> BaseIndex[IndexDict]:\n        \"\"\"Initialize the index from the storage context.\"\"\"\n        try:\n            # Load the index with store_nodes_override=True to be able to delete them\n            index = load_index_from_storage(\n                storage_context=self.storage_context,\n                store_nodes_override=True,  # Force store nodes in index and document stores\n                show_progress=self.show_progress,\n                embed_model=self.embed_model,\n                transformations=self.transformations,\n            )\n        except ValueError:\n            # There are no index in the storage context, creating a new one\n            logger.info(\"Creating a new vector store index\")\n            index = VectorStoreIndex.from_documents(\n                [],\n                storage_context=self.storage_context,\n                store_nodes_override=True,  # Force store nodes in index and document stores\n                show_progress=self.show_progress,\n                embed_model=self.embed_model,\n                transformations=self.transformations,\n            )\n            index.storage_context.persist(persist_dir=local_data_path)\n        return index\n\n    def _save_index(self) -> None:\n        self._index.storage_context.persist(persist_dir=local_data_path)\n\n    def delete(self, doc_id: str) -> None:\n        with self._index_thread_lock:\n            # Delete the document from the index\n            self._index.delete_ref_doc(doc_id, delete_from_docstore=True)\n\n            # Save the index\n            self._save_index()\n\n\nclass SimpleIngestComponent(BaseIngestComponentWithIndex):\n    def __init__(\n        self,\n        storage_context: StorageContext,\n        embed_model: EmbedType,\n        transformations: list[TransformComponent],\n        *args: Any,\n        **kwargs: Any,\n    ) -> None:\n        super().__init__(storage_context, embed_model, transformations, *args, **kwargs)\n\n    def ingest(self, file_name: str, file_data: Path) -> list[Document]:\n        logger.info(\"Ingesting file_name=%s\", file_name)\n        documents = IngestionHelper.transform_file_into_documents(file_name, file_data)\n        logger.info(\n            \"Transformed file=%s into count=%s documents\", file_name, len(documents)\n        )\n        logger.debug(\"Saving the documents in the index and doc store\")\n        return self._save_docs(documents)\n\n    def bulk_ingest(self, files: list[tuple[str, Path]]) -> list[Document]:\n        saved_documents = []\n        for file_name, file_data in files:\n            documents = IngestionHelper.transform_file_into_documents(\n                file_name, file_data\n            )\n            saved_documents.extend(self._save_docs(documents))\n        return saved_documents\n\n    def _save_docs(self, documents: list[Document]) -> list[Document]:\n        logger.debug(\"Transforming count=%s documents into nodes\", len(documents))\n        with self._index_thread_lock:\n            for document in documents:\n                self._index.insert(document, show_progress=True)\n            logger.debug(\"Persisting the index and nodes\")\n            # persist the index and nodes\n            self._save_index()\n            logger.debug(\"Persisted the index and nodes\")\n        return documents\n\n\nclass BatchIngestComponent(BaseIngestComponentWithIndex):\n    \"\"\"Parallelize the file reading and parsing on multiple CPU core.\n\n    This also makes the embeddings to be computed in batches (on GPU or CPU).\n    \"\"\"\n\n    def __init__(\n        self,\n        storage_context: StorageContext,\n        embed_model: EmbedType,\n        transformations: list[TransformComponent],\n        count_workers: int,\n        *args: Any,\n        **kwargs: Any,\n    ) -> None:\n        super().__init__(storage_context, embed_model, transformations, *args, **kwargs)\n        # Make an efficient use of the CPU and GPU, the embedding\n        # must be in the transformations\n        assert (\n            len(self.transformations) >= 2\n        ), \"Embeddings must be in the transformations\"\n        assert count_workers > 0, \"count_workers must be > 0\"\n        self.count_workers = count_workers\n\n        self._file_to_documents_work_pool = multiprocessing.Pool(\n            processes=self.count_workers\n        )\n\n    def ingest(self, file_name: str, file_data: Path) -> list[Document]:\n        logger.info(\"Ingesting file_name=%s\", file_name)\n        documents = IngestionHelper.transform_file_into_documents(file_name, file_data)\n        logger.info(\n            \"Transformed file=%s into count=%s documents\", file_name, len(documents)\n        )\n        logger.debug(\"Saving the documents in the index and doc store\")\n        return self._save_docs(documents)\n\n    def bulk_ingest(self, files: list[tuple[str, Path]]) -> list[Document]:\n        documents = list(\n            itertools.chain.from_iterable(\n                self._file_to_documents_work_pool.starmap(\n                    IngestionHelper.transform_file_into_documents, files\n                )\n            )\n        )\n        logger.info(\n            \"Transformed count=%s files into count=%s documents\",\n            len(files),\n            len(documents),\n        )\n        return self._save_docs(documents)\n\n    def _save_docs(self, documents: list[Document]) -> list[Document]:\n        logger.debug(\"Transforming count=%s documents into nodes\", len(documents))\n        nodes = run_transformations(\n            documents,  # type: ignore[arg-type]\n            self.transformations,\n            show_progress=self.show_progress,\n        )\n        # Locking the index to avoid concurrent writes\n        with self._index_thread_lock:\n            logger.info(\"Inserting count=%s nodes in the index\", len(nodes))\n            self._index.insert_nodes(nodes, show_progress=True)\n            for document in documents:\n                self._index.docstore.set_document_hash(\n                    document.get_doc_id(), document.hash\n                )\n            logger.debug(\"Persisting the index and nodes\")\n            # persist the index and nodes\n            self._save_index()\n            logger.debug(\"Persisted the index and nodes\")\n        return documents\n\n\nclass ParallelizedIngestComponent(BaseIngestComponentWithIndex):\n    \"\"\"Parallelize the file ingestion (file reading, embeddings, and index insertion).\n\n    This use the CPU and GPU in parallel (both running at the same time), and\n    reduce the memory pressure by not loading all the files in memory at the same time.\n    \"\"\"\n\n    def __init__(\n        self,\n        storage_context: StorageContext,\n        embed_model: EmbedType,\n        transformations: list[TransformComponent],\n        count_workers: int,\n        *args: Any,\n        **kwargs: Any,\n    ) -> None:\n        super().__init__(storage_context, embed_model, transformations, *args, **kwargs)\n        # To make an efficient use of the CPU and GPU, the embeddings\n        # must be in the transformations (to be computed in batches)\n        assert (\n            len(self.transformations) >= 2\n        ), \"Embeddings must be in the transformations\"\n        assert count_workers > 0, \"count_workers must be > 0\"\n        self.count_workers = count_workers\n        # We are doing our own multiprocessing\n        # To do not collide with the multiprocessing of huggingface, we disable it\n        os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\n\n        self._ingest_work_pool = multiprocessing.pool.ThreadPool(\n            processes=self.count_workers\n        )\n\n        self._file_to_documents_work_pool = multiprocessing.Pool(\n            processes=self.count_workers\n        )\n\n    def ingest(self, file_name: str, file_data: Path) -> list[Document]:\n        logger.info(\"Ingesting file_name=%s\", file_name)\n        # Running in a single (1) process to release the current\n        # thread, and take a dedicated CPU core for computation\n        documents = self._file_to_documents_work_pool.apply(\n            IngestionHelper.transform_file_into_documents, (file_name, file_data)\n        )\n        logger.info(\n            \"Transformed file=%s into count=%s documents\", file_name, len(documents)\n        )\n        logger.debug(\"Saving the documents in the index and doc store\")\n        return self._save_docs(documents)\n\n    def bulk_ingest(self, files: list[tuple[str, Path]]) -> list[Document]:\n        # Lightweight threads, used for parallelize the\n        # underlying IO calls made in the ingestion\n\n        documents = list(\n            itertools.chain.from_iterable(\n                self._ingest_work_pool.starmap(self.ingest, files)\n            )\n        )\n        return documents\n\n    def _save_docs(self, documents: list[Document]) -> list[Document]:\n        logger.debug(\"Transforming count=%s documents into nodes\", len(documents))\n        nodes = run_transformations(\n            documents,  # type: ignore[arg-type]\n            self.transformations,\n            show_progress=self.show_progress,\n        )\n        # Locking the index to avoid concurrent writes\n        with self._index_thread_lock:\n            logger.info(\"Inserting count=%s nodes in the index\", len(nodes))\n            self._index.insert_nodes(nodes, show_progress=True)\n            for document in documents:\n                self._index.docstore.set_document_hash(\n                    document.get_doc_id(), document.hash\n                )\n            logger.debug(\"Persisting the index and nodes\")\n            # persist the index and nodes\n            self._save_index()\n            logger.debug(\"Persisted the index and nodes\")\n        return documents\n\n    def __del__(self) -> None:\n        # We need to do the appropriate cleanup of the multiprocessing pools\n        # when the object is deleted. Using root logger to avoid\n        # the logger to be deleted before the pool\n        logging.debug(\"Closing the ingest work pool\")\n        self._ingest_work_pool.close()\n        self._ingest_work_pool.join()\n        self._ingest_work_pool.terminate()\n        logging.debug(\"Closing the file to documents work pool\")\n        self._file_to_documents_work_pool.close()\n        self._file_to_documents_work_pool.join()\n        self._file_to_documents_work_pool.terminate()\n\n\nclass PipelineIngestComponent(BaseIngestComponentWithIndex):\n    \"\"\"Pipeline ingestion - keeping the embedding worker pool as busy as possible.\n\n    This class implements a threaded ingestion pipeline, which comprises two threads\n    and two queues. The primary thread is responsible for reading and parsing files\n    into documents. These documents are then placed into a queue, which is\n    distributed to a pool of worker processes for embedding computation. After\n    embedding, the documents are transferred to another queue where they are\n    accumulated until a threshold is reached. Upon reaching this threshold, the\n    accumulated documents are flushed to the document store, index, and vector\n    store.\n\n    Exception handling ensures robustness against erroneous files. However, in the\n    pipelined design, one error can lead to the discarding of multiple files. Any\n    discarded files will be reported.\n    \"\"\"\n\n    NODE_FLUSH_COUNT = 5000  # Save the index every # nodes.\n\n    def __init__(\n        self,\n        storage_context: StorageContext,\n        embed_model: EmbedType,\n        transformations: list[TransformComponent],\n        count_workers: int,\n        *args: Any,\n        **kwargs: Any,\n    ) -> None:\n        super().__init__(storage_context, embed_model, transformations, *args, **kwargs)\n        self.count_workers = count_workers\n        assert (\n            len(self.transformations) >= 2\n        ), \"Embeddings must be in the transformations\"\n        assert count_workers > 0, \"count_workers must be > 0\"\n        self.count_workers = count_workers\n        # We are doing our own multiprocessing\n        # To do not collide with the multiprocessing of huggingface, we disable it\n        os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\n\n        # doc_q stores parsed files as Document chunks.\n        # Using a shallow queue causes the filesystem parser to block\n        # when it reaches capacity. This ensures it doesn't outpace the\n        # computationally intensive embeddings phase, avoiding unnecessary\n        # memory consumption.  The semaphore is used to bound the async worker\n        # embedding computations to cause the doc Q to fill and block.\n        self.doc_semaphore = multiprocessing.Semaphore(\n            self.count_workers\n        )  # limit the doc queue to # items.\n        self.doc_q: Queue[tuple[str, str | None, list[Document] | None]] = Queue(20)\n        # node_q stores documents parsed into nodes (embeddings).\n        # Larger queue size so we don't block the embedding workers during a slow\n        # index update.\n        self.node_q: Queue[\n            tuple[str, str | None, list[Document] | None, list[BaseNode] | None]\n        ] = Queue(40)\n        threading.Thread(target=self._doc_to_node, daemon=True).start()\n        threading.Thread(target=self._write_nodes, daemon=True).start()\n\n    def _doc_to_node(self) -> None:\n        # Parse documents into nodes\n        with multiprocessing.pool.ThreadPool(processes=self.count_workers) as pool:\n            while True:\n                try:\n                    cmd, file_name, documents = self.doc_q.get(\n                        block=True\n                    )  # Documents for a file\n                    if cmd == \"process\":\n                        # Push CPU/GPU embedding work to the worker pool\n                        # Acquire semaphore to control access to worker pool\n                        self.doc_semaphore.acquire()\n                        pool.apply_async(\n                            self._doc_to_node_worker, (file_name, documents)\n                        )\n                    elif cmd == \"quit\":\n                        break\n                finally:\n                    if cmd != \"process\":\n                        self.doc_q.task_done()  # unblock Q joins\n\n    def _doc_to_node_worker(self, file_name: str, documents: list[Document]) -> None:\n        # CPU/GPU intensive work in its own process\n        try:\n            nodes = run_transformations(\n                documents,  # type: ignore[arg-type]\n                self.transformations,\n                show_progress=self.show_progress,\n            )\n            self.node_q.put((\"process\", file_name, documents, list(nodes)))\n        finally:\n            self.doc_semaphore.release()\n            self.doc_q.task_done()  # unblock Q joins\n\n    def _save_docs(\n        self, files: list[str], documents: list[Document], nodes: list[BaseNode]\n    ) -> None:\n        try:\n            logger.info(\n                f\"Saving {len(files)} files ({len(documents)} documents / {len(nodes)} nodes)\"\n            )\n            self._index.insert_nodes(nodes)\n            for document in documents:\n                self._index.docstore.set_document_hash(\n                    document.get_doc_id(), document.hash\n                )\n            self._save_index()\n        except Exception:\n            # Tell the user so they can investigate these files\n            logger.exception(f\"Processing files {files}\")\n        finally:\n            # Clearing work, even on exception, maintains a clean state.\n            nodes.clear()\n            documents.clear()\n            files.clear()\n\n    def _write_nodes(self) -> None:\n        # Save nodes to index.  I/O intensive.\n        node_stack: list[BaseNode] = []\n        doc_stack: list[Document] = []\n        file_stack: list[str] = []\n        while True:\n            try:\n                cmd, file_name, documents, nodes = self.node_q.get(block=True)\n                if cmd in (\"flush\", \"quit\"):\n                    if file_stack:\n                        self._save_docs(file_stack, doc_stack, node_stack)\n                    if cmd == \"quit\":\n                        break\n                elif cmd == \"process\":\n                    node_stack.extend(nodes)  # type: ignore[arg-type]\n                    doc_stack.extend(documents)  # type: ignore[arg-type]\n                    file_stack.append(file_name)  # type: ignore[arg-type]\n                    # Constant saving is heavy on I/O - accumulate to a threshold\n                    if len(node_stack) >= self.NODE_FLUSH_COUNT:\n                        self._save_docs(file_stack, doc_stack, node_stack)\n            finally:\n                self.node_q.task_done()\n\n    def _flush(self) -> None:\n        self.doc_q.put((\"flush\", None, None))\n        self.doc_q.join()\n        self.node_q.put((\"flush\", None, None, None))\n        self.node_q.join()\n\n    def ingest(self, file_name: str, file_data: Path) -> list[Document]:\n        documents = IngestionHelper.transform_file_into_documents(file_name, file_data)\n        self.doc_q.put((\"process\", file_name, documents))\n        self._flush()\n        return documents\n\n    def bulk_ingest(self, files: list[tuple[str, Path]]) -> list[Document]:\n        docs = []\n        for file_name, file_data in eta(files):\n            try:\n                documents = IngestionHelper.transform_file_into_documents(\n                    file_name, file_data\n                )\n                self.doc_q.put((\"process\", file_name, documents))\n                docs.extend(documents)\n            except Exception:\n                logger.exception(f\"Skipping {file_data.name}\")\n        self._flush()\n        return docs\n\n\ndef get_ingestion_component(\n    storage_context: StorageContext,\n    embed_model: EmbedType,\n    transformations: list[TransformComponent],\n    settings: Settings,\n) -> BaseIngestComponent:\n    \"\"\"Get the ingestion component for the given configuration.\"\"\"\n    ingest_mode = settings.embedding.ingest_mode\n    if ingest_mode == \"batch\":\n        return BatchIngestComponent(\n            storage_context=storage_context,\n            embed_model=embed_model,\n            transformations=transformations,\n            count_workers=settings.embedding.count_workers,\n        )\n    elif ingest_mode == \"parallel\":\n        return ParallelizedIngestComponent(\n            storage_context=storage_context,\n            embed_model=embed_model,\n            transformations=transformations,\n            count_workers=settings.embedding.count_workers,\n        )\n    elif ingest_mode == \"pipeline\":\n        return PipelineIngestComponent(\n            storage_context=storage_context,\n            embed_model=embed_model,\n            transformations=transformations,\n            count_workers=settings.embedding.count_workers,\n        )\n    else:\n        return SimpleIngestComponent(\n            storage_context=storage_context,\n            embed_model=embed_model,\n            transformations=transformations,\n        )\n"
  },
  {
    "path": "private_gpt/components/ingest/ingest_helper.py",
    "content": "import logging\nfrom pathlib import Path\n\nfrom llama_index.core.readers import StringIterableReader\nfrom llama_index.core.readers.base import BaseReader\nfrom llama_index.core.readers.json import JSONReader\nfrom llama_index.core.schema import Document\n\nlogger = logging.getLogger(__name__)\n\n\n# Inspired by the `llama_index.core.readers.file.base` module\ndef _try_loading_included_file_formats() -> dict[str, type[BaseReader]]:\n    try:\n        from llama_index.readers.file.docs import (  # type: ignore\n            DocxReader,\n            HWPReader,\n            PDFReader,\n        )\n        from llama_index.readers.file.epub import EpubReader  # type: ignore\n        from llama_index.readers.file.image import ImageReader  # type: ignore\n        from llama_index.readers.file.ipynb import IPYNBReader  # type: ignore\n        from llama_index.readers.file.markdown import MarkdownReader  # type: ignore\n        from llama_index.readers.file.mbox import MboxReader  # type: ignore\n        from llama_index.readers.file.slides import PptxReader  # type: ignore\n        from llama_index.readers.file.tabular import PandasCSVReader  # type: ignore\n        from llama_index.readers.file.video_audio import (  # type: ignore\n            VideoAudioReader,\n        )\n    except ImportError as e:\n        raise ImportError(\"`llama-index-readers-file` package not found\") from e\n\n    default_file_reader_cls: dict[str, type[BaseReader]] = {\n        \".hwp\": HWPReader,\n        \".pdf\": PDFReader,\n        \".docx\": DocxReader,\n        \".pptx\": PptxReader,\n        \".ppt\": PptxReader,\n        \".pptm\": PptxReader,\n        \".jpg\": ImageReader,\n        \".png\": ImageReader,\n        \".jpeg\": ImageReader,\n        \".mp3\": VideoAudioReader,\n        \".mp4\": VideoAudioReader,\n        \".csv\": PandasCSVReader,\n        \".epub\": EpubReader,\n        \".md\": MarkdownReader,\n        \".mbox\": MboxReader,\n        \".ipynb\": IPYNBReader,\n    }\n    return default_file_reader_cls\n\n\n# Patching the default file reader to support other file types\nFILE_READER_CLS = _try_loading_included_file_formats()\nFILE_READER_CLS.update(\n    {\n        \".json\": JSONReader,\n    }\n)\n\n\nclass IngestionHelper:\n    \"\"\"Helper class to transform a file into a list of documents.\n\n    This class should be used to transform a file into a list of documents.\n    These methods are thread-safe (and multiprocessing-safe).\n    \"\"\"\n\n    @staticmethod\n    def transform_file_into_documents(\n        file_name: str, file_data: Path\n    ) -> list[Document]:\n        documents = IngestionHelper._load_file_to_documents(file_name, file_data)\n        for document in documents:\n            document.metadata[\"file_name\"] = file_name\n        IngestionHelper._exclude_metadata(documents)\n        return documents\n\n    @staticmethod\n    def _load_file_to_documents(file_name: str, file_data: Path) -> list[Document]:\n        logger.debug(\"Transforming file_name=%s into documents\", file_name)\n        extension = Path(file_name).suffix\n        reader_cls = FILE_READER_CLS.get(extension)\n        if reader_cls is None:\n            logger.debug(\n                \"No reader found for extension=%s, using default string reader\",\n                extension,\n            )\n            # Read as a plain text\n            string_reader = StringIterableReader()\n            return string_reader.load_data([file_data.read_text()])\n\n        logger.debug(\"Specific reader found for extension=%s\", extension)\n        documents = reader_cls().load_data(file_data)\n\n        # Sanitize NUL bytes in text which can't be stored in Postgres\n        for i in range(len(documents)):\n            documents[i].text = documents[i].text.replace(\"\\u0000\", \"\")\n\n        return documents\n\n    @staticmethod\n    def _exclude_metadata(documents: list[Document]) -> None:\n        logger.debug(\"Excluding metadata from count=%s documents\", len(documents))\n        for document in documents:\n            document.metadata[\"doc_id\"] = document.doc_id\n            # We don't want the Embeddings search to receive this metadata\n            document.excluded_embed_metadata_keys = [\"doc_id\"]\n            # We don't want the LLM to receive these metadata in the context\n            document.excluded_llm_metadata_keys = [\"file_name\", \"doc_id\", \"page_label\"]\n"
  },
  {
    "path": "private_gpt/components/llm/__init__.py",
    "content": "\"\"\"LLM implementations.\"\"\"\n"
  },
  {
    "path": "private_gpt/components/llm/custom/__init__.py",
    "content": ""
  },
  {
    "path": "private_gpt/components/llm/custom/sagemaker.py",
    "content": "# mypy: ignore-errors\nfrom __future__ import annotations\n\nimport io\nimport json\nimport logging\nfrom typing import TYPE_CHECKING, Any\n\nimport boto3  # type: ignore\nfrom llama_index.core.base.llms.generic_utils import (\n    completion_response_to_chat_response,\n    stream_completion_response_to_chat_response,\n)\nfrom llama_index.core.bridge.pydantic import Field\nfrom llama_index.core.llms import (\n    CompletionResponse,\n    CustomLLM,\n    LLMMetadata,\n)\nfrom llama_index.core.llms.callbacks import (\n    llm_chat_callback,\n    llm_completion_callback,\n)\n\nif TYPE_CHECKING:\n    from collections.abc import Sequence\n\n    from llama_index.callbacks import CallbackManager\n    from llama_index.llms import (\n        ChatMessage,\n        ChatResponse,\n        ChatResponseGen,\n        CompletionResponseGen,\n    )\n\nlogger = logging.getLogger(__name__)\n\n\nclass LineIterator:\n    r\"\"\"A helper class for parsing the byte stream input from TGI container.\n\n    The output of the model will be in the following format:\n    ```\n    b'data:{\"token\": {\"text\": \" a\"}}\\n\\n'\n    b'data:{\"token\": {\"text\": \" challenging\"}}\\n\\n'\n    b'data:{\"token\": {\"text\": \" problem\"\n    b'}}'\n    ...\n    ```\n\n    While usually each PayloadPart event from the event stream will contain a byte array\n    with a full json, this is not guaranteed and some of the json objects may be split\n    across PayloadPart events. For example:\n    ```\n    {'PayloadPart': {'Bytes': b'{\"outputs\": '}}\n    {'PayloadPart': {'Bytes': b'[\" problem\"]}\\n'}}\n    ```\n\n\n    This class accounts for this by concatenating bytes written via the 'write' function\n    and then exposing a method which will return lines (ending with a '\\n' character)\n    within the buffer via the 'scan_lines' function. It maintains the position of the\n    last read position to ensure that previous bytes are not exposed again. It will\n    also save any pending lines that doe not end with a '\\n' to make sure truncations\n    are concatinated\n    \"\"\"\n\n    def __init__(self, stream: Any) -> None:\n        \"\"\"Line iterator initializer.\"\"\"\n        self.byte_iterator = iter(stream)\n        self.buffer = io.BytesIO()\n        self.read_pos = 0\n\n    def __iter__(self) -> Any:\n        \"\"\"Self iterator.\"\"\"\n        return self\n\n    def __next__(self) -> Any:\n        \"\"\"Next element from iterator.\"\"\"\n        while True:\n            self.buffer.seek(self.read_pos)\n            line = self.buffer.readline()\n            if line and line[-1] == ord(\"\\n\"):\n                self.read_pos += len(line)\n                return line[:-1]\n            try:\n                chunk = next(self.byte_iterator)\n            except StopIteration:\n                if self.read_pos < self.buffer.getbuffer().nbytes:\n                    continue\n                raise\n            if \"PayloadPart\" not in chunk:\n                logger.warning(\"Unknown event type=%s\", chunk)\n                continue\n            self.buffer.seek(0, io.SEEK_END)\n            self.buffer.write(chunk[\"PayloadPart\"][\"Bytes\"])\n\n\nclass SagemakerLLM(CustomLLM):\n    \"\"\"Sagemaker Inference Endpoint models.\n\n    To use, you must supply the endpoint name from your deployed\n    Sagemaker model & the region where it is deployed.\n\n    To authenticate, the AWS client uses the following methods to\n    automatically load credentials:\n    https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html\n\n    If a specific credential profile should be used, you must pass\n    the name of the profile from the ~/.aws/credentials file that is to be used.\n\n    Make sure the credentials / roles used have the required policies to\n    access the Sagemaker endpoint.\n    See: https://docs.aws.amazon.com/IAM/latest/UserGuide/access_policies.html\n    \"\"\"\n\n    endpoint_name: str = Field(description=\"\")\n    temperature: float = Field(description=\"The temperature to use for sampling.\")\n    max_new_tokens: int = Field(description=\"The maximum number of tokens to generate.\")\n    context_window: int = Field(\n        description=\"The maximum number of context tokens for the model.\"\n    )\n    messages_to_prompt: Any = Field(\n        description=\"The function to convert messages to a prompt.\", exclude=True\n    )\n    completion_to_prompt: Any = Field(\n        description=\"The function to convert a completion to a prompt.\", exclude=True\n    )\n    generate_kwargs: dict[str, Any] = Field(\n        default_factory=dict, description=\"Kwargs used for generation.\"\n    )\n    model_kwargs: dict[str, Any] = Field(\n        default_factory=dict, description=\"Kwargs used for model initialization.\"\n    )\n    verbose: bool = Field(description=\"Whether to print verbose output.\")\n\n    _boto_client: Any = boto3.client(\n        \"sagemaker-runtime\",\n    )  # TODO make it an optional field\n\n    def __init__(\n        self,\n        endpoint_name: str | None = \"\",\n        temperature: float = 0.1,\n        max_new_tokens: int = 512,  # to review defaults\n        context_window: int = 2048,  # to review defaults\n        messages_to_prompt: Any = None,\n        completion_to_prompt: Any = None,\n        callback_manager: CallbackManager | None = None,\n        generate_kwargs: dict[str, Any] | None = None,\n        model_kwargs: dict[str, Any] | None = None,\n        verbose: bool = True,\n    ) -> None:\n        \"\"\"SagemakerLLM initializer.\"\"\"\n        model_kwargs = model_kwargs or {}\n        model_kwargs.update({\"n_ctx\": context_window, \"verbose\": verbose})\n\n        messages_to_prompt = messages_to_prompt or {}\n        completion_to_prompt = completion_to_prompt or {}\n\n        generate_kwargs = generate_kwargs or {}\n        generate_kwargs.update(\n            {\"temperature\": temperature, \"max_tokens\": max_new_tokens}\n        )\n\n        super().__init__(\n            endpoint_name=endpoint_name,\n            temperature=temperature,\n            context_window=context_window,\n            max_new_tokens=max_new_tokens,\n            messages_to_prompt=messages_to_prompt,\n            completion_to_prompt=completion_to_prompt,\n            callback_manager=callback_manager,\n            generate_kwargs=generate_kwargs,\n            model_kwargs=model_kwargs,\n            verbose=verbose,\n        )\n\n    @property\n    def inference_params(self):\n        # TODO expose the rest of params\n        return {\n            \"do_sample\": True,\n            \"top_p\": 0.7,\n            \"temperature\": self.temperature,\n            \"top_k\": 50,\n            \"max_new_tokens\": self.max_new_tokens,\n        }\n\n    @property\n    def metadata(self) -> LLMMetadata:\n        \"\"\"Get LLM metadata.\"\"\"\n        return LLMMetadata(\n            context_window=self.context_window,\n            num_output=self.max_new_tokens,\n            model_name=\"Sagemaker LLama 2\",\n        )\n\n    @llm_completion_callback()\n    def complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:\n        self.generate_kwargs.update({\"stream\": False})\n\n        is_formatted = kwargs.pop(\"formatted\", False)\n        if not is_formatted:\n            prompt = self.completion_to_prompt(prompt)\n\n        request_params = {\n            \"inputs\": prompt,\n            \"stream\": False,\n            \"parameters\": self.inference_params,\n        }\n\n        resp = self._boto_client.invoke_endpoint(\n            EndpointName=self.endpoint_name,\n            Body=json.dumps(request_params),\n            ContentType=\"application/json\",\n        )\n\n        response_body = resp[\"Body\"]\n        response_str = response_body.read().decode(\"utf-8\")\n        response_dict = json.loads(response_str)\n\n        return CompletionResponse(\n            text=response_dict[0][\"generated_text\"][len(prompt) :], raw=resp\n        )\n\n    @llm_completion_callback()\n    def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:\n        def get_stream():\n            text = \"\"\n\n            request_params = {\n                \"inputs\": prompt,\n                \"stream\": True,\n                \"parameters\": self.inference_params,\n            }\n            resp = self._boto_client.invoke_endpoint_with_response_stream(\n                EndpointName=self.endpoint_name,\n                Body=json.dumps(request_params),\n                ContentType=\"application/json\",\n            )\n\n            event_stream = resp[\"Body\"]\n            start_json = b\"{\"\n            stop_token = \"<|endoftext|>\"\n            first_token = True\n\n            for line in LineIterator(event_stream):\n                if line != b\"\" and start_json in line:\n                    data = json.loads(line[line.find(start_json) :].decode(\"utf-8\"))\n                    special = data[\"token\"][\"special\"]\n                    stop = data[\"token\"][\"text\"] == stop_token\n                    if not special and not stop:\n                        delta = data[\"token\"][\"text\"]\n                        # trim the leading space for the first token if present\n                        if first_token:\n                            delta = delta.lstrip()\n                            first_token = False\n                        text += delta\n                        yield CompletionResponse(delta=delta, text=text, raw=data)\n\n        return get_stream()\n\n    @llm_chat_callback()\n    def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:\n        prompt = self.messages_to_prompt(messages)\n        completion_response = self.complete(prompt, formatted=True, **kwargs)\n        return completion_response_to_chat_response(completion_response)\n\n    @llm_chat_callback()\n    def stream_chat(\n        self, messages: Sequence[ChatMessage], **kwargs: Any\n    ) -> ChatResponseGen:\n        prompt = self.messages_to_prompt(messages)\n        completion_response = self.stream_complete(prompt, formatted=True, **kwargs)\n        return stream_completion_response_to_chat_response(completion_response)\n"
  },
  {
    "path": "private_gpt/components/llm/llm_component.py",
    "content": "import logging\nfrom collections.abc import Callable\nfrom typing import Any\n\nfrom injector import inject, singleton\nfrom llama_index.core.llms import LLM, MockLLM\nfrom llama_index.core.settings import Settings as LlamaIndexSettings\nfrom llama_index.core.utils import set_global_tokenizer\nfrom transformers import AutoTokenizer  # type: ignore\n\nfrom private_gpt.components.llm.prompt_helper import get_prompt_style\nfrom private_gpt.paths import models_cache_path, models_path\nfrom private_gpt.settings.settings import Settings\n\nlogger = logging.getLogger(__name__)\n\n\n@singleton\nclass LLMComponent:\n    llm: LLM\n\n    @inject\n    def __init__(self, settings: Settings) -> None:\n        llm_mode = settings.llm.mode\n        if settings.llm.tokenizer and settings.llm.mode != \"mock\":\n            # Try to download the tokenizer. If it fails, the LLM will still work\n            # using the default one, which is less accurate.\n            try:\n                set_global_tokenizer(\n                    AutoTokenizer.from_pretrained(\n                        pretrained_model_name_or_path=settings.llm.tokenizer,\n                        cache_dir=str(models_cache_path),\n                        token=settings.huggingface.access_token,\n                    )\n                )\n            except Exception as e:\n                logger.warning(\n                    f\"Failed to download tokenizer {settings.llm.tokenizer}: {e!s}\"\n                    f\"Please follow the instructions in the documentation to download it if needed: \"\n                    f\"https://docs.privategpt.dev/installation/getting-started/troubleshooting#tokenizer-setup.\"\n                    f\"Falling back to default tokenizer.\"\n                )\n\n        logger.info(\"Initializing the LLM in mode=%s\", llm_mode)\n        match settings.llm.mode:\n            case \"llamacpp\":\n                try:\n                    from llama_index.llms.llama_cpp import LlamaCPP  # type: ignore\n                except ImportError as e:\n                    raise ImportError(\n                        \"Local dependencies not found, install with `poetry install --extras llms-llama-cpp`\"\n                    ) from e\n\n                prompt_style = get_prompt_style(settings.llm.prompt_style)\n                settings_kwargs = {\n                    \"tfs_z\": settings.llamacpp.tfs_z,  # ollama and llama-cpp\n                    \"top_k\": settings.llamacpp.top_k,  # ollama and llama-cpp\n                    \"top_p\": settings.llamacpp.top_p,  # ollama and llama-cpp\n                    \"repeat_penalty\": settings.llamacpp.repeat_penalty,  # ollama llama-cpp\n                    \"n_gpu_layers\": -1,\n                    \"offload_kqv\": True,\n                }\n                self.llm = LlamaCPP(\n                    model_path=str(models_path / settings.llamacpp.llm_hf_model_file),\n                    temperature=settings.llm.temperature,\n                    max_new_tokens=settings.llm.max_new_tokens,\n                    context_window=settings.llm.context_window,\n                    generate_kwargs={},\n                    callback_manager=LlamaIndexSettings.callback_manager,\n                    # All to GPU\n                    model_kwargs=settings_kwargs,\n                    # transform inputs into Llama2 format\n                    messages_to_prompt=prompt_style.messages_to_prompt,\n                    completion_to_prompt=prompt_style.completion_to_prompt,\n                    verbose=True,\n                )\n\n            case \"sagemaker\":\n                try:\n                    from private_gpt.components.llm.custom.sagemaker import SagemakerLLM\n                except ImportError as e:\n                    raise ImportError(\n                        \"Sagemaker dependencies not found, install with `poetry install --extras llms-sagemaker`\"\n                    ) from e\n\n                self.llm = SagemakerLLM(\n                    endpoint_name=settings.sagemaker.llm_endpoint_name,\n                    max_new_tokens=settings.llm.max_new_tokens,\n                    context_window=settings.llm.context_window,\n                )\n            case \"openai\":\n                try:\n                    from llama_index.llms.openai import OpenAI  # type: ignore\n                except ImportError as e:\n                    raise ImportError(\n                        \"OpenAI dependencies not found, install with `poetry install --extras llms-openai`\"\n                    ) from e\n\n                openai_settings = settings.openai\n                self.llm = OpenAI(\n                    api_base=openai_settings.api_base,\n                    api_key=openai_settings.api_key,\n                    model=openai_settings.model,\n                )\n            case \"openailike\":\n                try:\n                    from llama_index.llms.openai_like import OpenAILike  # type: ignore\n                except ImportError as e:\n                    raise ImportError(\n                        \"OpenAILike dependencies not found, install with `poetry install --extras llms-openai-like`\"\n                    ) from e\n                prompt_style = get_prompt_style(settings.llm.prompt_style)\n                openai_settings = settings.openai\n                self.llm = OpenAILike(\n                    api_base=openai_settings.api_base,\n                    api_key=openai_settings.api_key,\n                    model=openai_settings.model,\n                    is_chat_model=True,\n                    max_tokens=settings.llm.max_new_tokens,\n                    api_version=\"\",\n                    temperature=settings.llm.temperature,\n                    context_window=settings.llm.context_window,\n                    messages_to_prompt=prompt_style.messages_to_prompt,\n                    completion_to_prompt=prompt_style.completion_to_prompt,\n                    tokenizer=settings.llm.tokenizer,\n                    timeout=openai_settings.request_timeout,\n                    reuse_client=False,\n                )\n            case \"ollama\":\n                try:\n                    from llama_index.llms.ollama import Ollama  # type: ignore\n                except ImportError as e:\n                    raise ImportError(\n                        \"Ollama dependencies not found, install with `poetry install --extras llms-ollama`\"\n                    ) from e\n\n                ollama_settings = settings.ollama\n\n                settings_kwargs = {\n                    \"tfs_z\": ollama_settings.tfs_z,  # ollama and llama-cpp\n                    \"num_predict\": ollama_settings.num_predict,  # ollama only\n                    \"top_k\": ollama_settings.top_k,  # ollama and llama-cpp\n                    \"top_p\": ollama_settings.top_p,  # ollama and llama-cpp\n                    \"repeat_last_n\": ollama_settings.repeat_last_n,  # ollama\n                    \"repeat_penalty\": ollama_settings.repeat_penalty,  # ollama llama-cpp\n                }\n\n                # calculate llm model. If not provided tag, it will be use latest\n                model_name = (\n                    ollama_settings.llm_model + \":latest\"\n                    if \":\" not in ollama_settings.llm_model\n                    else ollama_settings.llm_model\n                )\n\n                llm = Ollama(\n                    model=model_name,\n                    base_url=ollama_settings.api_base,\n                    temperature=settings.llm.temperature,\n                    context_window=settings.llm.context_window,\n                    additional_kwargs=settings_kwargs,\n                    request_timeout=ollama_settings.request_timeout,\n                )\n\n                if ollama_settings.autopull_models:\n                    from private_gpt.utils.ollama import check_connection, pull_model\n\n                    if not check_connection(llm.client):\n                        raise ValueError(\n                            f\"Failed to connect to Ollama, \"\n                            f\"check if Ollama server is running on {ollama_settings.api_base}\"\n                        )\n                    pull_model(llm.client, model_name)\n\n                if (\n                    ollama_settings.keep_alive\n                    != ollama_settings.model_fields[\"keep_alive\"].default\n                ):\n                    # Modify Ollama methods to use the \"keep_alive\" field.\n                    def add_keep_alive(func: Callable[..., Any]) -> Callable[..., Any]:\n                        def wrapper(*args: Any, **kwargs: Any) -> Any:\n                            kwargs[\"keep_alive\"] = ollama_settings.keep_alive\n                            return func(*args, **kwargs)\n\n                        return wrapper\n\n                    Ollama.chat = add_keep_alive(Ollama.chat)  # type: ignore\n                    Ollama.stream_chat = add_keep_alive(Ollama.stream_chat)  # type: ignore\n                    Ollama.complete = add_keep_alive(Ollama.complete)  # type: ignore\n                    Ollama.stream_complete = add_keep_alive(Ollama.stream_complete)  # type: ignore\n\n                self.llm = llm\n\n            case \"azopenai\":\n                try:\n                    from llama_index.llms.azure_openai import (  # type: ignore\n                        AzureOpenAI,\n                    )\n                except ImportError as e:\n                    raise ImportError(\n                        \"Azure OpenAI dependencies not found, install with `poetry install --extras llms-azopenai`\"\n                    ) from e\n\n                azopenai_settings = settings.azopenai\n                self.llm = AzureOpenAI(\n                    model=azopenai_settings.llm_model,\n                    deployment_name=azopenai_settings.llm_deployment_name,\n                    api_key=azopenai_settings.api_key,\n                    azure_endpoint=azopenai_settings.azure_endpoint,\n                    api_version=azopenai_settings.api_version,\n                )\n            case \"gemini\":\n                try:\n                    from llama_index.llms.gemini import (  # type: ignore\n                        Gemini,\n                    )\n                except ImportError as e:\n                    raise ImportError(\n                        \"Google Gemini dependencies not found, install with `poetry install --extras llms-gemini`\"\n                    ) from e\n                gemini_settings = settings.gemini\n                self.llm = Gemini(\n                    model_name=gemini_settings.model, api_key=gemini_settings.api_key\n                )\n            case \"mock\":\n                self.llm = MockLLM()\n"
  },
  {
    "path": "private_gpt/components/llm/prompt_helper.py",
    "content": "import abc\nimport logging\nfrom collections.abc import Sequence\nfrom typing import Any, Literal\n\nfrom llama_index.core.llms import ChatMessage, MessageRole\n\nlogger = logging.getLogger(__name__)\n\n\nclass AbstractPromptStyle(abc.ABC):\n    \"\"\"Abstract class for prompt styles.\n\n    This class is used to format a series of messages into a prompt that can be\n    understood by the models. A series of messages represents the interaction(s)\n    between a user and an assistant. This series of messages can be considered as a\n    session between a user X and an assistant Y.This session holds, through the\n    messages, the state of the conversation. This session, to be understood by the\n    model, needs to be formatted into a prompt (i.e. a string that the models\n    can understand). Prompts can be formatted in different ways,\n    depending on the model.\n\n    The implementations of this class represent the different ways to format a\n    series of messages into a prompt.\n    \"\"\"\n\n    def __init__(self, *args: Any, **kwargs: Any) -> None:\n        logger.debug(\"Initializing prompt_style=%s\", self.__class__.__name__)\n\n    @abc.abstractmethod\n    def _messages_to_prompt(self, messages: Sequence[ChatMessage]) -> str:\n        pass\n\n    @abc.abstractmethod\n    def _completion_to_prompt(self, completion: str) -> str:\n        pass\n\n    def messages_to_prompt(self, messages: Sequence[ChatMessage]) -> str:\n        prompt = self._messages_to_prompt(messages)\n        logger.debug(\"Got for messages='%s' the prompt='%s'\", messages, prompt)\n        return prompt\n\n    def completion_to_prompt(self, prompt: str) -> str:\n        completion = prompt  # Fix: Llama-index parameter has to be named as prompt\n        prompt = self._completion_to_prompt(completion)\n        logger.debug(\"Got for completion='%s' the prompt='%s'\", completion, prompt)\n        return prompt\n\n\nclass DefaultPromptStyle(AbstractPromptStyle):\n    \"\"\"Default prompt style that uses the defaults from llama_utils.\n\n    It basically passes None to the LLM, indicating it should use\n    the default functions.\n    \"\"\"\n\n    def __init__(self, *args: Any, **kwargs: Any) -> None:\n        super().__init__(*args, **kwargs)\n\n        # Hacky way to override the functions\n        # Override the functions to be None, and pass None to the LLM.\n        self.messages_to_prompt = None  # type: ignore[method-assign, assignment]\n        self.completion_to_prompt = None  # type: ignore[method-assign, assignment]\n\n    def _messages_to_prompt(self, messages: Sequence[ChatMessage]) -> str:\n        return \"\"\n\n    def _completion_to_prompt(self, completion: str) -> str:\n        return \"\"\n\n\nclass Llama2PromptStyle(AbstractPromptStyle):\n    \"\"\"Simple prompt style that uses llama 2 prompt style.\n\n    Inspired by llama_index/legacy/llms/llama_utils.py\n\n    It transforms the sequence of messages into a prompt that should look like:\n    ```text\n    <s> [INST] <<SYS>> your system prompt here. <</SYS>>\n\n    user message here [/INST] assistant (model) response here </s>\n    ```\n    \"\"\"\n\n    BOS, EOS = \"<s>\", \"</s>\"\n    B_INST, E_INST = \"[INST]\", \"[/INST]\"\n    B_SYS, E_SYS = \"<<SYS>>\\n\", \"\\n<</SYS>>\\n\\n\"\n    DEFAULT_SYSTEM_PROMPT = \"\"\"\\\n    You are a helpful, respectful and honest assistant. \\\n    Always answer as helpfully as possible and follow ALL given instructions. \\\n    Do not speculate or make up information. \\\n    Do not reference any given instructions or context. \\\n    \"\"\"\n\n    def _messages_to_prompt(self, messages: Sequence[ChatMessage]) -> str:\n        string_messages: list[str] = []\n        if messages[0].role == MessageRole.SYSTEM:\n            # pull out the system message (if it exists in messages)\n            system_message_str = messages[0].content or \"\"\n            messages = messages[1:]\n        else:\n            system_message_str = self.DEFAULT_SYSTEM_PROMPT\n\n        system_message_str = f\"{self.B_SYS} {system_message_str.strip()} {self.E_SYS}\"\n\n        for i in range(0, len(messages), 2):\n            # first message should always be a user\n            user_message = messages[i]\n            assert user_message.role == MessageRole.USER\n\n            if i == 0:\n                # make sure system prompt is included at the start\n                str_message = f\"{self.BOS} {self.B_INST} {system_message_str} \"\n            else:\n                # end previous user-assistant interaction\n                string_messages[-1] += f\" {self.EOS}\"\n                # no need to include system prompt\n                str_message = f\"{self.BOS} {self.B_INST} \"\n\n            # include user message content\n            str_message += f\"{user_message.content} {self.E_INST}\"\n\n            if len(messages) > (i + 1):\n                # if assistant message exists, add to str_message\n                assistant_message = messages[i + 1]\n                assert assistant_message.role == MessageRole.ASSISTANT\n                str_message += f\" {assistant_message.content}\"\n\n            string_messages.append(str_message)\n\n        return \"\".join(string_messages)\n\n    def _completion_to_prompt(self, completion: str) -> str:\n        system_prompt_str = self.DEFAULT_SYSTEM_PROMPT\n\n        return (\n            f\"{self.BOS} {self.B_INST} {self.B_SYS} {system_prompt_str.strip()} {self.E_SYS} \"\n            f\"{completion.strip()} {self.E_INST}\"\n        )\n\n\nclass Llama3PromptStyle(AbstractPromptStyle):\n    r\"\"\"Template for Meta's Llama 3.1.\n\n    The format follows this structure:\n    <|begin_of_text|>\n    <|start_header_id|>system<|end_header_id|>\n\n    [System message content]<|eot_id|>\n    <|start_header_id|>user<|end_header_id|>\n\n    [User message content]<|eot_id|>\n    <|start_header_id|>assistant<|end_header_id|>\n\n    [Assistant message content]<|eot_id|>\n    ...\n    (Repeat for each message, including possible 'ipython' role)\n    \"\"\"\n\n    BOS, EOS = \"<|begin_of_text|>\", \"<|end_of_text|>\"\n    B_INST, E_INST = \"<|start_header_id|>\", \"<|end_header_id|>\"\n    EOT = \"<|eot_id|>\"\n    B_SYS, E_SYS = \"<|start_header_id|>system<|end_header_id|>\", \"<|eot_id|>\"\n    ASSISTANT_INST = \"<|start_header_id|>assistant<|end_header_id|>\"\n    DEFAULT_SYSTEM_PROMPT = \"\"\"\\\n    You are a helpful, respectful and honest assistant. \\\n    Always answer as helpfully as possible and follow ALL given instructions. \\\n    Do not speculate or make up information. \\\n    Do not reference any given instructions or context. \\\n    \"\"\"\n\n    def _messages_to_prompt(self, messages: Sequence[ChatMessage]) -> str:\n        prompt = \"\"\n        has_system_message = False\n\n        for i, message in enumerate(messages):\n            if not message or message.content is None:\n                continue\n            if message.role == MessageRole.SYSTEM:\n                prompt += f\"{self.B_SYS}\\n\\n{message.content.strip()}{self.E_SYS}\"\n                has_system_message = True\n            else:\n                role_header = f\"{self.B_INST}{message.role.value}{self.E_INST}\"\n                prompt += f\"{role_header}\\n\\n{message.content.strip()}{self.EOT}\"\n\n            # Add assistant header if the last message is not from the assistant\n            if i == len(messages) - 1 and message.role != MessageRole.ASSISTANT:\n                prompt += f\"{self.ASSISTANT_INST}\\n\\n\"\n\n        # Add default system prompt if no system message was provided\n        if not has_system_message:\n            prompt = (\n                f\"{self.B_SYS}\\n\\n{self.DEFAULT_SYSTEM_PROMPT}{self.E_SYS}\" + prompt\n            )\n\n        # TODO: Implement tool handling logic\n\n        return prompt\n\n    def _completion_to_prompt(self, completion: str) -> str:\n        return (\n            f\"{self.B_SYS}\\n\\n{self.DEFAULT_SYSTEM_PROMPT}{self.E_SYS}\"\n            f\"{self.B_INST}user{self.E_INST}\\n\\n{completion.strip()}{self.EOT}\"\n            f\"{self.ASSISTANT_INST}\\n\\n\"\n        )\n\n\nclass TagPromptStyle(AbstractPromptStyle):\n    \"\"\"Tag prompt style (used by Vigogne) that uses the prompt style `<|ROLE|>`.\n\n    It transforms the sequence of messages into a prompt that should look like:\n    ```text\n    <|system|>: your system prompt here.\n    <|user|>: user message here\n    (possibly with context and question)\n    <|assistant|>: assistant (model) response here.\n    ```\n\n    FIXME: should we add surrounding `<s>` and `</s>` tags, like in llama2?\n    \"\"\"\n\n    def _messages_to_prompt(self, messages: Sequence[ChatMessage]) -> str:\n        \"\"\"Format message to prompt with `<|ROLE|>: MSG` style.\"\"\"\n        prompt = \"\"\n        for message in messages:\n            role = message.role\n            content = message.content or \"\"\n            message_from_user = f\"<|{role.lower()}|>: {content.strip()}\"\n            message_from_user += \"\\n\"\n            prompt += message_from_user\n        # we are missing the last <|assistant|> tag that will trigger a completion\n        prompt += \"<|assistant|>: \"\n        return prompt\n\n    def _completion_to_prompt(self, completion: str) -> str:\n        return self._messages_to_prompt(\n            [ChatMessage(content=completion, role=MessageRole.USER)]\n        )\n\n\nclass MistralPromptStyle(AbstractPromptStyle):\n    def _messages_to_prompt(self, messages: Sequence[ChatMessage]) -> str:\n        inst_buffer = []\n        text = \"\"\n        for message in messages:\n            if message.role == MessageRole.SYSTEM or message.role == MessageRole.USER:\n                inst_buffer.append(str(message.content).strip())\n            elif message.role == MessageRole.ASSISTANT:\n                text += \"<s>[INST] \" + \"\\n\".join(inst_buffer) + \" [/INST]\"\n                text += \" \" + str(message.content).strip() + \"</s>\"\n                inst_buffer.clear()\n            else:\n                raise ValueError(f\"Unknown message role {message.role}\")\n\n        if len(inst_buffer) > 0:\n            text += \"<s>[INST] \" + \"\\n\".join(inst_buffer) + \" [/INST]\"\n\n        return text\n\n    def _completion_to_prompt(self, completion: str) -> str:\n        return self._messages_to_prompt(\n            [ChatMessage(content=completion, role=MessageRole.USER)]\n        )\n\n\nclass ChatMLPromptStyle(AbstractPromptStyle):\n    def _messages_to_prompt(self, messages: Sequence[ChatMessage]) -> str:\n        prompt = \"<|im_start|>system\\n\"\n        for message in messages:\n            role = message.role\n            content = message.content or \"\"\n            if role.lower() == \"system\":\n                message_from_user = f\"{content.strip()}\"\n                prompt += message_from_user\n            elif role.lower() == \"user\":\n                prompt += \"<|im_end|>\\n<|im_start|>user\\n\"\n                message_from_user = f\"{content.strip()}<|im_end|>\\n\"\n                prompt += message_from_user\n        prompt += \"<|im_start|>assistant\\n\"\n        return prompt\n\n    def _completion_to_prompt(self, completion: str) -> str:\n        return self._messages_to_prompt(\n            [ChatMessage(content=completion, role=MessageRole.USER)]\n        )\n\n\ndef get_prompt_style(\n    prompt_style: (\n        Literal[\"default\", \"llama2\", \"llama3\", \"tag\", \"mistral\", \"chatml\"] | None\n    )\n) -> AbstractPromptStyle:\n    \"\"\"Get the prompt style to use from the given string.\n\n    :param prompt_style: The prompt style to use.\n    :return: The prompt style to use.\n    \"\"\"\n    if prompt_style is None or prompt_style == \"default\":\n        return DefaultPromptStyle()\n    elif prompt_style == \"llama2\":\n        return Llama2PromptStyle()\n    elif prompt_style == \"llama3\":\n        return Llama3PromptStyle()\n    elif prompt_style == \"tag\":\n        return TagPromptStyle()\n    elif prompt_style == \"mistral\":\n        return MistralPromptStyle()\n    elif prompt_style == \"chatml\":\n        return ChatMLPromptStyle()\n    raise ValueError(f\"Unknown prompt_style='{prompt_style}'\")\n"
  },
  {
    "path": "private_gpt/components/node_store/__init__.py",
    "content": ""
  },
  {
    "path": "private_gpt/components/node_store/node_store_component.py",
    "content": "import logging\n\nfrom injector import inject, singleton\nfrom llama_index.core.storage.docstore import BaseDocumentStore, SimpleDocumentStore\nfrom llama_index.core.storage.index_store import SimpleIndexStore\nfrom llama_index.core.storage.index_store.types import BaseIndexStore\n\nfrom private_gpt.paths import local_data_path\nfrom private_gpt.settings.settings import Settings\n\nlogger = logging.getLogger(__name__)\n\n\n@singleton\nclass NodeStoreComponent:\n    index_store: BaseIndexStore\n    doc_store: BaseDocumentStore\n\n    @inject\n    def __init__(self, settings: Settings) -> None:\n        match settings.nodestore.database:\n            case \"simple\":\n                try:\n                    self.index_store = SimpleIndexStore.from_persist_dir(\n                        persist_dir=str(local_data_path)\n                    )\n                except FileNotFoundError:\n                    logger.debug(\"Local index store not found, creating a new one\")\n                    self.index_store = SimpleIndexStore()\n\n                try:\n                    self.doc_store = SimpleDocumentStore.from_persist_dir(\n                        persist_dir=str(local_data_path)\n                    )\n                except FileNotFoundError:\n                    logger.debug(\"Local document store not found, creating a new one\")\n                    self.doc_store = SimpleDocumentStore()\n\n            case \"postgres\":\n                try:\n                    from llama_index.storage.docstore.postgres import (  # type: ignore\n                        PostgresDocumentStore,\n                    )\n                    from llama_index.storage.index_store.postgres import (  # type: ignore\n                        PostgresIndexStore,\n                    )\n                except ImportError:\n                    raise ImportError(\n                        \"Postgres dependencies not found, install with `poetry install --extras storage-nodestore-postgres`\"\n                    ) from None\n\n                if settings.postgres is None:\n                    raise ValueError(\"Postgres index/doc store settings not found.\")\n\n                self.index_store = PostgresIndexStore.from_params(\n                    **settings.postgres.model_dump(exclude_none=True)\n                )\n\n                self.doc_store = PostgresDocumentStore.from_params(\n                    **settings.postgres.model_dump(exclude_none=True)\n                )\n\n            case _:\n                # Should be unreachable\n                # The settings validator should have caught this\n                raise ValueError(\n                    f\"Database {settings.nodestore.database} not supported\"\n                )\n"
  },
  {
    "path": "private_gpt/components/vector_store/__init__.py",
    "content": ""
  },
  {
    "path": "private_gpt/components/vector_store/batched_chroma.py",
    "content": "from collections.abc import Generator, Sequence\nfrom typing import TYPE_CHECKING, Any\n\nfrom llama_index.core.schema import BaseNode, MetadataMode\nfrom llama_index.core.vector_stores.utils import node_to_metadata_dict\nfrom llama_index.vector_stores.chroma import ChromaVectorStore  # type: ignore\n\nif TYPE_CHECKING:\n    from collections.abc import Mapping\n\n\ndef chunk_list(\n    lst: Sequence[BaseNode], max_chunk_size: int\n) -> Generator[Sequence[BaseNode], None, None]:\n    \"\"\"Yield successive max_chunk_size-sized chunks from lst.\n\n    Args:\n        lst (List[BaseNode]): list of nodes with embeddings\n        max_chunk_size (int): max chunk size\n\n    Yields:\n        Generator[List[BaseNode], None, None]: list of nodes with embeddings\n    \"\"\"\n    for i in range(0, len(lst), max_chunk_size):\n        yield lst[i : i + max_chunk_size]\n\n\nclass BatchedChromaVectorStore(ChromaVectorStore):  # type: ignore\n    \"\"\"Chroma vector store, batching additions to avoid reaching the max batch limit.\n\n    In this vector store, embeddings are stored within a ChromaDB collection.\n\n    During query time, the index uses ChromaDB to query for the top\n    k most similar nodes.\n\n    Args:\n        chroma_client (from chromadb.api.API):\n            API instance\n        chroma_collection (chromadb.api.models.Collection.Collection):\n            ChromaDB collection instance\n\n    \"\"\"\n\n    chroma_client: Any | None\n\n    def __init__(\n        self,\n        chroma_client: Any,\n        chroma_collection: Any,\n        host: str | None = None,\n        port: str | None = None,\n        ssl: bool = False,\n        headers: dict[str, str] | None = None,\n        collection_kwargs: dict[Any, Any] | None = None,\n    ) -> None:\n        super().__init__(\n            chroma_collection=chroma_collection,\n            host=host,\n            port=port,\n            ssl=ssl,\n            headers=headers,\n            collection_kwargs=collection_kwargs or {},\n        )\n        self.chroma_client = chroma_client\n\n    def add(self, nodes: Sequence[BaseNode], **add_kwargs: Any) -> list[str]:\n        \"\"\"Add nodes to index, batching the insertion to avoid issues.\n\n        Args:\n            nodes: List[BaseNode]: list of nodes with embeddings\n            add_kwargs: _\n        \"\"\"\n        if not self.chroma_client:\n            raise ValueError(\"Client not initialized\")\n\n        if not self._collection:\n            raise ValueError(\"Collection not initialized\")\n\n        max_chunk_size = self.chroma_client.max_batch_size\n        node_chunks = chunk_list(nodes, max_chunk_size)\n\n        all_ids = []\n        for node_chunk in node_chunks:\n            embeddings: list[Sequence[float]] = []\n            metadatas: list[Mapping[str, Any]] = []\n            ids = []\n            documents = []\n            for node in node_chunk:\n                embeddings.append(node.get_embedding())\n                metadatas.append(\n                    node_to_metadata_dict(\n                        node, remove_text=True, flat_metadata=self.flat_metadata\n                    )\n                )\n                ids.append(node.node_id)\n                documents.append(node.get_content(metadata_mode=MetadataMode.NONE))\n\n            self._collection.add(\n                embeddings=embeddings,\n                ids=ids,\n                metadatas=metadatas,\n                documents=documents,\n            )\n            all_ids.extend(ids)\n\n        return all_ids\n"
  },
  {
    "path": "private_gpt/components/vector_store/vector_store_component.py",
    "content": "import logging\nimport typing\n\nfrom injector import inject, singleton\nfrom llama_index.core.indices.vector_store import VectorIndexRetriever, VectorStoreIndex\nfrom llama_index.core.vector_stores.types import (\n    BasePydanticVectorStore,\n    FilterCondition,\n    MetadataFilter,\n    MetadataFilters,\n)\n\nfrom private_gpt.open_ai.extensions.context_filter import ContextFilter\nfrom private_gpt.paths import local_data_path\nfrom private_gpt.settings.settings import Settings\n\nlogger = logging.getLogger(__name__)\n\n\ndef _doc_id_metadata_filter(\n    context_filter: ContextFilter | None,\n) -> MetadataFilters:\n    filters = MetadataFilters(filters=[], condition=FilterCondition.OR)\n\n    if context_filter is not None and context_filter.docs_ids is not None:\n        for doc_id in context_filter.docs_ids:\n            filters.filters.append(MetadataFilter(key=\"doc_id\", value=doc_id))\n\n    return filters\n\n\n@singleton\nclass VectorStoreComponent:\n    settings: Settings\n    vector_store: BasePydanticVectorStore\n\n    @inject\n    def __init__(self, settings: Settings) -> None:\n        self.settings = settings\n        match settings.vectorstore.database:\n            case \"postgres\":\n                try:\n                    from llama_index.vector_stores.postgres import (  # type: ignore\n                        PGVectorStore,\n                    )\n                except ImportError as e:\n                    raise ImportError(\n                        \"Postgres dependencies not found, install with `poetry install --extras vector-stores-postgres`\"\n                    ) from e\n\n                if settings.postgres is None:\n                    raise ValueError(\n                        \"Postgres settings not found. Please provide settings.\"\n                    )\n\n                self.vector_store = typing.cast(\n                    BasePydanticVectorStore,\n                    PGVectorStore.from_params(\n                        **settings.postgres.model_dump(exclude_none=True),\n                        table_name=\"embeddings\",\n                        embed_dim=settings.embedding.embed_dim,\n                    ),\n                )\n\n            case \"chroma\":\n                try:\n                    import chromadb  # type: ignore\n                    from chromadb.config import (  # type: ignore\n                        Settings as ChromaSettings,\n                    )\n\n                    from private_gpt.components.vector_store.batched_chroma import (\n                        BatchedChromaVectorStore,\n                    )\n                except ImportError as e:\n                    raise ImportError(\n                        \"ChromaDB dependencies not found, install with `poetry install --extras vector-stores-chroma`\"\n                    ) from e\n\n                chroma_settings = ChromaSettings(anonymized_telemetry=False)\n                chroma_client = chromadb.PersistentClient(\n                    path=str((local_data_path / \"chroma_db\").absolute()),\n                    settings=chroma_settings,\n                )\n                chroma_collection = chroma_client.get_or_create_collection(\n                    \"make_this_parameterizable_per_api_call\"\n                )  # TODO\n\n                self.vector_store = typing.cast(\n                    BasePydanticVectorStore,\n                    BatchedChromaVectorStore(\n                        chroma_client=chroma_client, chroma_collection=chroma_collection\n                    ),\n                )\n\n            case \"qdrant\":\n                try:\n                    from llama_index.vector_stores.qdrant import (  # type: ignore\n                        QdrantVectorStore,\n                    )\n                    from qdrant_client import QdrantClient  # type: ignore\n                except ImportError as e:\n                    raise ImportError(\n                        \"Qdrant dependencies not found, install with `poetry install --extras vector-stores-qdrant`\"\n                    ) from e\n\n                if settings.qdrant is None:\n                    logger.info(\n                        \"Qdrant config not found. Using default settings.\"\n                        \"Trying to connect to Qdrant at localhost:6333.\"\n                    )\n                    client = QdrantClient()\n                else:\n                    client = QdrantClient(\n                        **settings.qdrant.model_dump(exclude_none=True)\n                    )\n                self.vector_store = typing.cast(\n                    BasePydanticVectorStore,\n                    QdrantVectorStore(\n                        client=client,\n                        collection_name=\"make_this_parameterizable_per_api_call\",\n                    ),  # TODO\n                )\n\n            case \"milvus\":\n                try:\n                    from llama_index.vector_stores.milvus import (  # type: ignore\n                        MilvusVectorStore,\n                    )\n                except ImportError as e:\n                    raise ImportError(\n                        \"Milvus dependencies not found, install with `poetry install --extras vector-stores-milvus`\"\n                    ) from e\n\n                if settings.milvus is None:\n                    logger.info(\n                        \"Milvus config not found. Using default settings.\\n\"\n                        \"Trying to connect to Milvus at local_data/private_gpt/milvus/milvus_local.db \"\n                        \"with collection 'make_this_parameterizable_per_api_call'.\"\n                    )\n\n                    self.vector_store = typing.cast(\n                        BasePydanticVectorStore,\n                        MilvusVectorStore(\n                            dim=settings.embedding.embed_dim,\n                            collection_name=\"make_this_parameterizable_per_api_call\",\n                            overwrite=True,\n                        ),\n                    )\n\n                else:\n                    self.vector_store = typing.cast(\n                        BasePydanticVectorStore,\n                        MilvusVectorStore(\n                            dim=settings.embedding.embed_dim,\n                            uri=settings.milvus.uri,\n                            token=settings.milvus.token,\n                            collection_name=settings.milvus.collection_name,\n                            overwrite=settings.milvus.overwrite,\n                        ),\n                    )\n\n            case \"clickhouse\":\n                try:\n                    from clickhouse_connect import (  # type: ignore\n                        get_client,\n                    )\n                    from llama_index.vector_stores.clickhouse import (  # type: ignore\n                        ClickHouseVectorStore,\n                    )\n                except ImportError as e:\n                    raise ImportError(\n                        \"ClickHouse dependencies not found, install with `poetry install --extras vector-stores-clickhouse`\"\n                    ) from e\n\n                if settings.clickhouse is None:\n                    raise ValueError(\n                        \"ClickHouse settings not found. Please provide settings.\"\n                    )\n\n                clickhouse_client = get_client(\n                    host=settings.clickhouse.host,\n                    port=settings.clickhouse.port,\n                    username=settings.clickhouse.username,\n                    password=settings.clickhouse.password,\n                )\n                self.vector_store = ClickHouseVectorStore(\n                    clickhouse_client=clickhouse_client\n                )\n            case _:\n                # Should be unreachable\n                # The settings validator should have caught this\n                raise ValueError(\n                    f\"Vectorstore database {settings.vectorstore.database} not supported\"\n                )\n\n    def get_retriever(\n        self,\n        index: VectorStoreIndex,\n        context_filter: ContextFilter | None = None,\n        similarity_top_k: int = 2,\n    ) -> VectorIndexRetriever:\n        # This way we support qdrant (using doc_ids) and the rest (using filters)\n        return VectorIndexRetriever(\n            index=index,\n            similarity_top_k=similarity_top_k,\n            doc_ids=context_filter.docs_ids if context_filter else None,\n            filters=(\n                _doc_id_metadata_filter(context_filter)\n                if self.settings.vectorstore.database != \"qdrant\"\n                else None\n            ),\n        )\n\n    def close(self) -> None:\n        if hasattr(self.vector_store.client, \"close\"):\n            self.vector_store.client.close()\n"
  },
  {
    "path": "private_gpt/constants.py",
    "content": "from pathlib import Path\n\nPROJECT_ROOT_PATH: Path = Path(__file__).parents[1]\n"
  },
  {
    "path": "private_gpt/di.py",
    "content": "from injector import Injector\n\nfrom private_gpt.settings.settings import Settings, unsafe_typed_settings\n\n\ndef create_application_injector() -> Injector:\n    _injector = Injector(auto_bind=True)\n    _injector.binder.bind(Settings, to=unsafe_typed_settings)\n    return _injector\n\n\n\"\"\"\nGlobal injector for the application.\n\nAvoid using this reference, it will make your code harder to test.\n\nInstead, use the `request.state.injector` reference, which is bound to every request\n\"\"\"\nglobal_injector: Injector = create_application_injector()\n"
  },
  {
    "path": "private_gpt/launcher.py",
    "content": "\"\"\"FastAPI app creation, logger configuration and main API routes.\"\"\"\n\nimport logging\n\nfrom fastapi import Depends, FastAPI, Request\nfrom fastapi.middleware.cors import CORSMiddleware\nfrom injector import Injector\nfrom llama_index.core.callbacks import CallbackManager\nfrom llama_index.core.callbacks.global_handlers import create_global_handler\nfrom llama_index.core.settings import Settings as LlamaIndexSettings\n\nfrom private_gpt.server.chat.chat_router import chat_router\nfrom private_gpt.server.chunks.chunks_router import chunks_router\nfrom private_gpt.server.completions.completions_router import completions_router\nfrom private_gpt.server.embeddings.embeddings_router import embeddings_router\nfrom private_gpt.server.health.health_router import health_router\nfrom private_gpt.server.ingest.ingest_router import ingest_router\nfrom private_gpt.server.recipes.summarize.summarize_router import summarize_router\nfrom private_gpt.settings.settings import Settings\n\nlogger = logging.getLogger(__name__)\n\n\ndef create_app(root_injector: Injector) -> FastAPI:\n\n    # Start the API\n    async def bind_injector_to_request(request: Request) -> None:\n        request.state.injector = root_injector\n\n    app = FastAPI(dependencies=[Depends(bind_injector_to_request)])\n\n    app.include_router(completions_router)\n    app.include_router(chat_router)\n    app.include_router(chunks_router)\n    app.include_router(ingest_router)\n    app.include_router(summarize_router)\n    app.include_router(embeddings_router)\n    app.include_router(health_router)\n\n    # Add LlamaIndex simple observability\n    global_handler = create_global_handler(\"simple\")\n    if global_handler:\n        LlamaIndexSettings.callback_manager = CallbackManager([global_handler])\n\n    settings = root_injector.get(Settings)\n    if settings.server.cors.enabled:\n        logger.debug(\"Setting up CORS middleware\")\n        app.add_middleware(\n            CORSMiddleware,\n            allow_credentials=settings.server.cors.allow_credentials,\n            allow_origins=settings.server.cors.allow_origins,\n            allow_origin_regex=settings.server.cors.allow_origin_regex,\n            allow_methods=settings.server.cors.allow_methods,\n            allow_headers=settings.server.cors.allow_headers,\n        )\n\n    if settings.ui.enabled:\n        logger.debug(\"Importing the UI module\")\n        try:\n            from private_gpt.ui.ui import PrivateGptUi\n        except ImportError as e:\n            raise ImportError(\n                \"UI dependencies not found, install with `poetry install --extras ui`\"\n            ) from e\n\n        ui = root_injector.get(PrivateGptUi)\n        ui.mount_in_app(app, settings.ui.path)\n\n    return app\n"
  },
  {
    "path": "private_gpt/main.py",
    "content": "\"\"\"FastAPI app creation, logger configuration and main API routes.\"\"\"\n\nfrom private_gpt.di import global_injector\nfrom private_gpt.launcher import create_app\n\napp = create_app(global_injector)\n"
  },
  {
    "path": "private_gpt/open_ai/__init__.py",
    "content": "\"\"\"OpenAI compatibility utilities.\"\"\"\n"
  },
  {
    "path": "private_gpt/open_ai/extensions/__init__.py",
    "content": "\"\"\"OpenAI API extensions.\"\"\"\n"
  },
  {
    "path": "private_gpt/open_ai/extensions/context_filter.py",
    "content": "from pydantic import BaseModel, Field\n\n\nclass ContextFilter(BaseModel):\n    docs_ids: list[str] | None = Field(\n        examples=[[\"c202d5e6-7b69-4869-81cc-dd574ee8ee11\"]]\n    )\n"
  },
  {
    "path": "private_gpt/open_ai/openai_models.py",
    "content": "import time\nimport uuid\nfrom collections.abc import Iterator\nfrom typing import Literal\n\nfrom llama_index.core.llms import ChatResponse, CompletionResponse\nfrom pydantic import BaseModel, Field\n\nfrom private_gpt.server.chunks.chunks_service import Chunk\n\n\nclass OpenAIDelta(BaseModel):\n    \"\"\"A piece of completion that needs to be concatenated to get the full message.\"\"\"\n\n    content: str | None\n\n\nclass OpenAIMessage(BaseModel):\n    \"\"\"Inference result, with the source of the message.\n\n    Role could be the assistant or system\n    (providing a default response, not AI generated).\n    \"\"\"\n\n    role: Literal[\"assistant\", \"system\", \"user\"] = Field(default=\"user\")\n    content: str | None\n\n\nclass OpenAIChoice(BaseModel):\n    \"\"\"Response from AI.\n\n    Either the delta or the message will be present, but never both.\n    Sources used will be returned in case context retrieval was enabled.\n    \"\"\"\n\n    finish_reason: str | None = Field(examples=[\"stop\"])\n    delta: OpenAIDelta | None = None\n    message: OpenAIMessage | None = None\n    sources: list[Chunk] | None = None\n    index: int = 0\n\n\nclass OpenAICompletion(BaseModel):\n    \"\"\"Clone of OpenAI Completion model.\n\n    For more information see: https://platform.openai.com/docs/api-reference/chat/object\n    \"\"\"\n\n    id: str\n    object: Literal[\"completion\", \"completion.chunk\"] = Field(default=\"completion\")\n    created: int = Field(..., examples=[1623340000])\n    model: Literal[\"private-gpt\"]\n    choices: list[OpenAIChoice]\n\n    @classmethod\n    def from_text(\n        cls,\n        text: str | None,\n        finish_reason: str | None = None,\n        sources: list[Chunk] | None = None,\n    ) -> \"OpenAICompletion\":\n        return OpenAICompletion(\n            id=str(uuid.uuid4()),\n            object=\"completion\",\n            created=int(time.time()),\n            model=\"private-gpt\",\n            choices=[\n                OpenAIChoice(\n                    message=OpenAIMessage(role=\"assistant\", content=text),\n                    finish_reason=finish_reason,\n                    sources=sources,\n                )\n            ],\n        )\n\n    @classmethod\n    def json_from_delta(\n        cls,\n        *,\n        text: str | None,\n        finish_reason: str | None = None,\n        sources: list[Chunk] | None = None,\n    ) -> str:\n        chunk = OpenAICompletion(\n            id=str(uuid.uuid4()),\n            object=\"completion.chunk\",\n            created=int(time.time()),\n            model=\"private-gpt\",\n            choices=[\n                OpenAIChoice(\n                    delta=OpenAIDelta(content=text),\n                    finish_reason=finish_reason,\n                    sources=sources,\n                )\n            ],\n        )\n\n        return chunk.model_dump_json()\n\n\ndef to_openai_response(\n    response: str | ChatResponse, sources: list[Chunk] | None = None\n) -> OpenAICompletion:\n    if isinstance(response, ChatResponse):\n        return OpenAICompletion.from_text(response.delta, finish_reason=\"stop\")\n    else:\n        return OpenAICompletion.from_text(\n            response, finish_reason=\"stop\", sources=sources\n        )\n\n\ndef to_openai_sse_stream(\n    response_generator: Iterator[str | CompletionResponse | ChatResponse],\n    sources: list[Chunk] | None = None,\n) -> Iterator[str]:\n    for response in response_generator:\n        if isinstance(response, CompletionResponse | ChatResponse):\n            yield f\"data: {OpenAICompletion.json_from_delta(text=response.delta)}\\n\\n\"\n        else:\n            yield f\"data: {OpenAICompletion.json_from_delta(text=response, sources=sources)}\\n\\n\"\n    yield f\"data: {OpenAICompletion.json_from_delta(text='', finish_reason='stop')}\\n\\n\"\n    yield \"data: [DONE]\\n\\n\"\n"
  },
  {
    "path": "private_gpt/paths.py",
    "content": "from pathlib import Path\n\nfrom private_gpt.constants import PROJECT_ROOT_PATH\nfrom private_gpt.settings.settings import settings\n\n\ndef _absolute_or_from_project_root(path: str) -> Path:\n    if path.startswith(\"/\"):\n        return Path(path)\n    return PROJECT_ROOT_PATH / path\n\n\nmodels_path: Path = PROJECT_ROOT_PATH / \"models\"\nmodels_cache_path: Path = models_path / \"cache\"\ndocs_path: Path = PROJECT_ROOT_PATH / \"docs\"\nlocal_data_path: Path = _absolute_or_from_project_root(\n    settings().data.local_data_folder\n)\n"
  },
  {
    "path": "private_gpt/server/__init__.py",
    "content": "\"\"\"private-gpt server.\"\"\"\n"
  },
  {
    "path": "private_gpt/server/chat/__init__.py",
    "content": ""
  },
  {
    "path": "private_gpt/server/chat/chat_router.py",
    "content": "from fastapi import APIRouter, Depends, Request\nfrom llama_index.core.llms import ChatMessage, MessageRole\nfrom pydantic import BaseModel\nfrom starlette.responses import StreamingResponse\n\nfrom private_gpt.open_ai.extensions.context_filter import ContextFilter\nfrom private_gpt.open_ai.openai_models import (\n    OpenAICompletion,\n    OpenAIMessage,\n    to_openai_response,\n    to_openai_sse_stream,\n)\nfrom private_gpt.server.chat.chat_service import ChatService\nfrom private_gpt.server.utils.auth import authenticated\n\nchat_router = APIRouter(prefix=\"/v1\", dependencies=[Depends(authenticated)])\n\n\nclass ChatBody(BaseModel):\n    messages: list[OpenAIMessage]\n    use_context: bool = False\n    context_filter: ContextFilter | None = None\n    include_sources: bool = True\n    stream: bool = False\n\n    model_config = {\n        \"json_schema_extra\": {\n            \"examples\": [\n                {\n                    \"messages\": [\n                        {\n                            \"role\": \"system\",\n                            \"content\": \"You are a rapper. Always answer with a rap.\",\n                        },\n                        {\n                            \"role\": \"user\",\n                            \"content\": \"How do you fry an egg?\",\n                        },\n                    ],\n                    \"stream\": False,\n                    \"use_context\": True,\n                    \"include_sources\": True,\n                    \"context_filter\": {\n                        \"docs_ids\": [\"c202d5e6-7b69-4869-81cc-dd574ee8ee11\"]\n                    },\n                }\n            ]\n        }\n    }\n\n\n@chat_router.post(\n    \"/chat/completions\",\n    response_model=None,\n    responses={200: {\"model\": OpenAICompletion}},\n    tags=[\"Contextual Completions\"],\n    openapi_extra={\n        \"x-fern-streaming\": {\n            \"stream-condition\": \"stream\",\n            \"response\": {\"$ref\": \"#/components/schemas/OpenAICompletion\"},\n            \"response-stream\": {\"$ref\": \"#/components/schemas/OpenAICompletion\"},\n        }\n    },\n)\ndef chat_completion(\n    request: Request, body: ChatBody\n) -> OpenAICompletion | StreamingResponse:\n    \"\"\"Given a list of messages comprising a conversation, return a response.\n\n    Optionally include an initial `role: system` message to influence the way\n    the LLM answers.\n\n    If `use_context` is set to `true`, the model will use context coming\n    from the ingested documents to create the response. The documents being used can\n    be filtered using the `context_filter` and passing the document IDs to be used.\n    Ingested documents IDs can be found using `/ingest/list` endpoint. If you want\n    all ingested documents to be used, remove `context_filter` altogether.\n\n    When using `'include_sources': true`, the API will return the source Chunks used\n    to create the response, which come from the context provided.\n\n    When using `'stream': true`, the API will return data chunks following [OpenAI's\n    streaming model](https://platform.openai.com/docs/api-reference/chat/streaming):\n    ```\n    {\"id\":\"12345\",\"object\":\"completion.chunk\",\"created\":1694268190,\n    \"model\":\"private-gpt\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"Hello\"},\n    \"finish_reason\":null}]}\n    ```\n    \"\"\"\n    service = request.state.injector.get(ChatService)\n    all_messages = [\n        ChatMessage(content=m.content, role=MessageRole(m.role)) for m in body.messages\n    ]\n    if body.stream:\n        completion_gen = service.stream_chat(\n            messages=all_messages,\n            use_context=body.use_context,\n            context_filter=body.context_filter,\n        )\n        return StreamingResponse(\n            to_openai_sse_stream(\n                completion_gen.response,\n                completion_gen.sources if body.include_sources else None,\n            ),\n            media_type=\"text/event-stream\",\n        )\n    else:\n        completion = service.chat(\n            messages=all_messages,\n            use_context=body.use_context,\n            context_filter=body.context_filter,\n        )\n        return to_openai_response(\n            completion.response, completion.sources if body.include_sources else None\n        )\n"
  },
  {
    "path": "private_gpt/server/chat/chat_service.py",
    "content": "from dataclasses import dataclass\nfrom typing import TYPE_CHECKING\n\nfrom injector import inject, singleton\nfrom llama_index.core.chat_engine import ContextChatEngine, SimpleChatEngine\nfrom llama_index.core.chat_engine.types import (\n    BaseChatEngine,\n)\nfrom llama_index.core.indices import VectorStoreIndex\nfrom llama_index.core.indices.postprocessor import MetadataReplacementPostProcessor\nfrom llama_index.core.llms import ChatMessage, MessageRole\nfrom llama_index.core.postprocessor import (\n    SentenceTransformerRerank,\n    SimilarityPostprocessor,\n)\nfrom llama_index.core.storage import StorageContext\nfrom llama_index.core.types import TokenGen\nfrom pydantic import BaseModel\n\nfrom private_gpt.components.embedding.embedding_component import EmbeddingComponent\nfrom private_gpt.components.llm.llm_component import LLMComponent\nfrom private_gpt.components.node_store.node_store_component import NodeStoreComponent\nfrom private_gpt.components.vector_store.vector_store_component import (\n    VectorStoreComponent,\n)\nfrom private_gpt.open_ai.extensions.context_filter import ContextFilter\nfrom private_gpt.server.chunks.chunks_service import Chunk\nfrom private_gpt.settings.settings import Settings\n\nif TYPE_CHECKING:\n    from llama_index.core.postprocessor.types import BaseNodePostprocessor\n\n\nclass Completion(BaseModel):\n    response: str\n    sources: list[Chunk] | None = None\n\n\nclass CompletionGen(BaseModel):\n    response: TokenGen\n    sources: list[Chunk] | None = None\n\n\n@dataclass\nclass ChatEngineInput:\n    system_message: ChatMessage | None = None\n    last_message: ChatMessage | None = None\n    chat_history: list[ChatMessage] | None = None\n\n    @classmethod\n    def from_messages(cls, messages: list[ChatMessage]) -> \"ChatEngineInput\":\n        # Detect if there is a system message, extract the last message and chat history\n        system_message = (\n            messages[0]\n            if len(messages) > 0 and messages[0].role == MessageRole.SYSTEM\n            else None\n        )\n        last_message = (\n            messages[-1]\n            if len(messages) > 0 and messages[-1].role == MessageRole.USER\n            else None\n        )\n        # Remove from messages list the system message and last message,\n        # if they exist. The rest is the chat history.\n        if system_message:\n            messages.pop(0)\n        if last_message:\n            messages.pop(-1)\n        chat_history = messages if len(messages) > 0 else None\n\n        return cls(\n            system_message=system_message,\n            last_message=last_message,\n            chat_history=chat_history,\n        )\n\n\n@singleton\nclass ChatService:\n    settings: Settings\n\n    @inject\n    def __init__(\n        self,\n        settings: Settings,\n        llm_component: LLMComponent,\n        vector_store_component: VectorStoreComponent,\n        embedding_component: EmbeddingComponent,\n        node_store_component: NodeStoreComponent,\n    ) -> None:\n        self.settings = settings\n        self.llm_component = llm_component\n        self.embedding_component = embedding_component\n        self.vector_store_component = vector_store_component\n        self.storage_context = StorageContext.from_defaults(\n            vector_store=vector_store_component.vector_store,\n            docstore=node_store_component.doc_store,\n            index_store=node_store_component.index_store,\n        )\n        self.index = VectorStoreIndex.from_vector_store(\n            vector_store_component.vector_store,\n            storage_context=self.storage_context,\n            llm=llm_component.llm,\n            embed_model=embedding_component.embedding_model,\n            show_progress=True,\n        )\n\n    def _chat_engine(\n        self,\n        system_prompt: str | None = None,\n        use_context: bool = False,\n        context_filter: ContextFilter | None = None,\n    ) -> BaseChatEngine:\n        settings = self.settings\n        if use_context:\n            vector_index_retriever = self.vector_store_component.get_retriever(\n                index=self.index,\n                context_filter=context_filter,\n                similarity_top_k=self.settings.rag.similarity_top_k,\n            )\n            node_postprocessors: list[BaseNodePostprocessor] = [\n                MetadataReplacementPostProcessor(target_metadata_key=\"window\"),\n            ]\n            if settings.rag.similarity_value:\n                node_postprocessors.append(\n                    SimilarityPostprocessor(\n                        similarity_cutoff=settings.rag.similarity_value\n                    )\n                )\n\n            if settings.rag.rerank.enabled:\n                rerank_postprocessor = SentenceTransformerRerank(\n                    model=settings.rag.rerank.model, top_n=settings.rag.rerank.top_n\n                )\n                node_postprocessors.append(rerank_postprocessor)\n\n            return ContextChatEngine.from_defaults(\n                system_prompt=system_prompt,\n                retriever=vector_index_retriever,\n                llm=self.llm_component.llm,  # Takes no effect at the moment\n                node_postprocessors=node_postprocessors,\n            )\n        else:\n            return SimpleChatEngine.from_defaults(\n                system_prompt=system_prompt,\n                llm=self.llm_component.llm,\n            )\n\n    def stream_chat(\n        self,\n        messages: list[ChatMessage],\n        use_context: bool = False,\n        context_filter: ContextFilter | None = None,\n    ) -> CompletionGen:\n        chat_engine_input = ChatEngineInput.from_messages(messages)\n        last_message = (\n            chat_engine_input.last_message.content\n            if chat_engine_input.last_message\n            else None\n        )\n        system_prompt = (\n            chat_engine_input.system_message.content\n            if chat_engine_input.system_message\n            else None\n        )\n        chat_history = (\n            chat_engine_input.chat_history if chat_engine_input.chat_history else None\n        )\n\n        chat_engine = self._chat_engine(\n            system_prompt=system_prompt,\n            use_context=use_context,\n            context_filter=context_filter,\n        )\n        streaming_response = chat_engine.stream_chat(\n            message=last_message if last_message is not None else \"\",\n            chat_history=chat_history,\n        )\n        sources = [Chunk.from_node(node) for node in streaming_response.source_nodes]\n        completion_gen = CompletionGen(\n            response=streaming_response.response_gen, sources=sources\n        )\n        return completion_gen\n\n    def chat(\n        self,\n        messages: list[ChatMessage],\n        use_context: bool = False,\n        context_filter: ContextFilter | None = None,\n    ) -> Completion:\n        chat_engine_input = ChatEngineInput.from_messages(messages)\n        last_message = (\n            chat_engine_input.last_message.content\n            if chat_engine_input.last_message\n            else None\n        )\n        system_prompt = (\n            chat_engine_input.system_message.content\n            if chat_engine_input.system_message\n            else None\n        )\n        chat_history = (\n            chat_engine_input.chat_history if chat_engine_input.chat_history else None\n        )\n\n        chat_engine = self._chat_engine(\n            system_prompt=system_prompt,\n            use_context=use_context,\n            context_filter=context_filter,\n        )\n        wrapped_response = chat_engine.chat(\n            message=last_message if last_message is not None else \"\",\n            chat_history=chat_history,\n        )\n        sources = [Chunk.from_node(node) for node in wrapped_response.source_nodes]\n        completion = Completion(response=wrapped_response.response, sources=sources)\n        return completion\n"
  },
  {
    "path": "private_gpt/server/chunks/__init__.py",
    "content": ""
  },
  {
    "path": "private_gpt/server/chunks/chunks_router.py",
    "content": "from typing import Literal\n\nfrom fastapi import APIRouter, Depends, Request\nfrom pydantic import BaseModel, Field\n\nfrom private_gpt.open_ai.extensions.context_filter import ContextFilter\nfrom private_gpt.server.chunks.chunks_service import Chunk, ChunksService\nfrom private_gpt.server.utils.auth import authenticated\n\nchunks_router = APIRouter(prefix=\"/v1\", dependencies=[Depends(authenticated)])\n\n\nclass ChunksBody(BaseModel):\n    text: str = Field(examples=[\"Q3 2023 sales\"])\n    context_filter: ContextFilter | None = None\n    limit: int = 10\n    prev_next_chunks: int = Field(default=0, examples=[2])\n\n\nclass ChunksResponse(BaseModel):\n    object: Literal[\"list\"]\n    model: Literal[\"private-gpt\"]\n    data: list[Chunk]\n\n\n@chunks_router.post(\"/chunks\", tags=[\"Context Chunks\"])\ndef chunks_retrieval(request: Request, body: ChunksBody) -> ChunksResponse:\n    \"\"\"Given a `text`, returns the most relevant chunks from the ingested documents.\n\n    The returned information can be used to generate prompts that can be\n    passed to `/completions` or `/chat/completions` APIs. Note: it is usually a very\n    fast API, because only the Embeddings model is involved, not the LLM. The\n    returned information contains the relevant chunk `text` together with the source\n    `document` it is coming from. It also contains a score that can be used to\n    compare different results.\n\n    The max number of chunks to be returned is set using the `limit` param.\n\n    Previous and next chunks (pieces of text that appear right before or after in the\n    document) can be fetched by using the `prev_next_chunks` field.\n\n    The documents being used can be filtered using the `context_filter` and passing\n    the document IDs to be used. Ingested documents IDs can be found using\n    `/ingest/list` endpoint. If you want all ingested documents to be used,\n    remove `context_filter` altogether.\n    \"\"\"\n    service = request.state.injector.get(ChunksService)\n    results = service.retrieve_relevant(\n        body.text, body.context_filter, body.limit, body.prev_next_chunks\n    )\n    return ChunksResponse(\n        object=\"list\",\n        model=\"private-gpt\",\n        data=results,\n    )\n"
  },
  {
    "path": "private_gpt/server/chunks/chunks_service.py",
    "content": "from typing import TYPE_CHECKING, Literal\n\nfrom injector import inject, singleton\nfrom llama_index.core.indices import VectorStoreIndex\nfrom llama_index.core.schema import NodeWithScore\nfrom llama_index.core.storage import StorageContext\nfrom pydantic import BaseModel, Field\n\nfrom private_gpt.components.embedding.embedding_component import EmbeddingComponent\nfrom private_gpt.components.llm.llm_component import LLMComponent\nfrom private_gpt.components.node_store.node_store_component import NodeStoreComponent\nfrom private_gpt.components.vector_store.vector_store_component import (\n    VectorStoreComponent,\n)\nfrom private_gpt.open_ai.extensions.context_filter import ContextFilter\nfrom private_gpt.server.ingest.model import IngestedDoc\n\nif TYPE_CHECKING:\n    from llama_index.core.schema import RelatedNodeInfo\n\n\nclass Chunk(BaseModel):\n    object: Literal[\"context.chunk\"]\n    score: float = Field(examples=[0.023])\n    document: IngestedDoc\n    text: str = Field(examples=[\"Outbound sales increased 20%, driven by new leads.\"])\n    previous_texts: list[str] | None = Field(\n        default=None,\n        examples=[[\"SALES REPORT 2023\", \"Inbound didn't show major changes.\"]],\n    )\n    next_texts: list[str] | None = Field(\n        default=None,\n        examples=[\n            [\n                \"New leads came from Google Ads campaign.\",\n                \"The campaign was run by the Marketing Department\",\n            ]\n        ],\n    )\n\n    @classmethod\n    def from_node(cls: type[\"Chunk\"], node: NodeWithScore) -> \"Chunk\":\n        doc_id = node.node.ref_doc_id if node.node.ref_doc_id is not None else \"-\"\n        return cls(\n            object=\"context.chunk\",\n            score=node.score or 0.0,\n            document=IngestedDoc(\n                object=\"ingest.document\",\n                doc_id=doc_id,\n                doc_metadata=node.metadata,\n            ),\n            text=node.get_content(),\n        )\n\n\n@singleton\nclass ChunksService:\n    @inject\n    def __init__(\n        self,\n        llm_component: LLMComponent,\n        vector_store_component: VectorStoreComponent,\n        embedding_component: EmbeddingComponent,\n        node_store_component: NodeStoreComponent,\n    ) -> None:\n        self.vector_store_component = vector_store_component\n        self.llm_component = llm_component\n        self.embedding_component = embedding_component\n        self.storage_context = StorageContext.from_defaults(\n            vector_store=vector_store_component.vector_store,\n            docstore=node_store_component.doc_store,\n            index_store=node_store_component.index_store,\n        )\n\n    def _get_sibling_nodes_text(\n        self, node_with_score: NodeWithScore, related_number: int, forward: bool = True\n    ) -> list[str]:\n        explored_nodes_texts = []\n        current_node = node_with_score.node\n        for _ in range(related_number):\n            explored_node_info: RelatedNodeInfo | None = (\n                current_node.next_node if forward else current_node.prev_node\n            )\n            if explored_node_info is None:\n                break\n\n            explored_node = self.storage_context.docstore.get_node(\n                explored_node_info.node_id\n            )\n\n            explored_nodes_texts.append(explored_node.get_content())\n            current_node = explored_node\n\n        return explored_nodes_texts\n\n    def retrieve_relevant(\n        self,\n        text: str,\n        context_filter: ContextFilter | None = None,\n        limit: int = 10,\n        prev_next_chunks: int = 0,\n    ) -> list[Chunk]:\n        index = VectorStoreIndex.from_vector_store(\n            self.vector_store_component.vector_store,\n            storage_context=self.storage_context,\n            llm=self.llm_component.llm,\n            embed_model=self.embedding_component.embedding_model,\n            show_progress=True,\n        )\n        vector_index_retriever = self.vector_store_component.get_retriever(\n            index=index, context_filter=context_filter, similarity_top_k=limit\n        )\n        nodes = vector_index_retriever.retrieve(text)\n        nodes.sort(key=lambda n: n.score or 0.0, reverse=True)\n\n        retrieved_nodes = []\n        for node in nodes:\n            chunk = Chunk.from_node(node)\n            chunk.previous_texts = self._get_sibling_nodes_text(\n                node, prev_next_chunks, False\n            )\n            chunk.next_texts = self._get_sibling_nodes_text(node, prev_next_chunks)\n            retrieved_nodes.append(chunk)\n\n        return retrieved_nodes\n"
  },
  {
    "path": "private_gpt/server/completions/__init__.py",
    "content": "\"\"\"Deprecated Openai compatibility endpoint.\"\"\"\n"
  },
  {
    "path": "private_gpt/server/completions/completions_router.py",
    "content": "from fastapi import APIRouter, Depends, Request\nfrom pydantic import BaseModel\nfrom starlette.responses import StreamingResponse\n\nfrom private_gpt.open_ai.extensions.context_filter import ContextFilter\nfrom private_gpt.open_ai.openai_models import (\n    OpenAICompletion,\n    OpenAIMessage,\n)\nfrom private_gpt.server.chat.chat_router import ChatBody, chat_completion\nfrom private_gpt.server.utils.auth import authenticated\n\ncompletions_router = APIRouter(prefix=\"/v1\", dependencies=[Depends(authenticated)])\n\n\nclass CompletionsBody(BaseModel):\n    prompt: str\n    system_prompt: str | None = None\n    use_context: bool = False\n    context_filter: ContextFilter | None = None\n    include_sources: bool = True\n    stream: bool = False\n\n    model_config = {\n        \"json_schema_extra\": {\n            \"examples\": [\n                {\n                    \"prompt\": \"How do you fry an egg?\",\n                    \"system_prompt\": \"You are a rapper. Always answer with a rap.\",\n                    \"stream\": False,\n                    \"use_context\": False,\n                    \"include_sources\": False,\n                }\n            ]\n        }\n    }\n\n\n@completions_router.post(\n    \"/completions\",\n    response_model=None,\n    summary=\"Completion\",\n    responses={200: {\"model\": OpenAICompletion}},\n    tags=[\"Contextual Completions\"],\n    openapi_extra={\n        \"x-fern-streaming\": {\n            \"stream-condition\": \"stream\",\n            \"response\": {\"$ref\": \"#/components/schemas/OpenAICompletion\"},\n            \"response-stream\": {\"$ref\": \"#/components/schemas/OpenAICompletion\"},\n        }\n    },\n)\ndef prompt_completion(\n    request: Request, body: CompletionsBody\n) -> OpenAICompletion | StreamingResponse:\n    \"\"\"We recommend most users use our Chat completions API.\n\n    Given a prompt, the model will return one predicted completion.\n\n    Optionally include a `system_prompt` to influence the way the LLM answers.\n\n    If `use_context`\n    is set to `true`, the model will use context coming from the ingested documents\n    to create the response. The documents being used can be filtered using the\n    `context_filter` and passing the document IDs to be used. Ingested documents IDs\n    can be found using `/ingest/list` endpoint. If you want all ingested documents to\n    be used, remove `context_filter` altogether.\n\n    When using `'include_sources': true`, the API will return the source Chunks used\n    to create the response, which come from the context provided.\n\n    When using `'stream': true`, the API will return data chunks following [OpenAI's\n    streaming model](https://platform.openai.com/docs/api-reference/chat/streaming):\n    ```\n    {\"id\":\"12345\",\"object\":\"completion.chunk\",\"created\":1694268190,\n    \"model\":\"private-gpt\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"Hello\"},\n    \"finish_reason\":null}]}\n    ```\n    \"\"\"\n    messages = [OpenAIMessage(content=body.prompt, role=\"user\")]\n    # If system prompt is passed, create a fake message with the system prompt.\n    if body.system_prompt:\n        messages.insert(0, OpenAIMessage(content=body.system_prompt, role=\"system\"))\n\n    chat_body = ChatBody(\n        messages=messages,\n        use_context=body.use_context,\n        stream=body.stream,\n        include_sources=body.include_sources,\n        context_filter=body.context_filter,\n    )\n    return chat_completion(request, chat_body)\n"
  },
  {
    "path": "private_gpt/server/embeddings/__init__.py",
    "content": ""
  },
  {
    "path": "private_gpt/server/embeddings/embeddings_router.py",
    "content": "from typing import Literal\n\nfrom fastapi import APIRouter, Depends, Request\nfrom pydantic import BaseModel\n\nfrom private_gpt.server.embeddings.embeddings_service import (\n    Embedding,\n    EmbeddingsService,\n)\nfrom private_gpt.server.utils.auth import authenticated\n\nembeddings_router = APIRouter(prefix=\"/v1\", dependencies=[Depends(authenticated)])\n\n\nclass EmbeddingsBody(BaseModel):\n    input: str | list[str]\n\n\nclass EmbeddingsResponse(BaseModel):\n    object: Literal[\"list\"]\n    model: Literal[\"private-gpt\"]\n    data: list[Embedding]\n\n\n@embeddings_router.post(\"/embeddings\", tags=[\"Embeddings\"])\ndef embeddings_generation(request: Request, body: EmbeddingsBody) -> EmbeddingsResponse:\n    \"\"\"Get a vector representation of a given input.\n\n    That vector representation can be easily consumed\n    by machine learning models and algorithms.\n    \"\"\"\n    service = request.state.injector.get(EmbeddingsService)\n    input_texts = body.input if isinstance(body.input, list) else [body.input]\n    embeddings = service.texts_embeddings(input_texts)\n    return EmbeddingsResponse(object=\"list\", model=\"private-gpt\", data=embeddings)\n"
  },
  {
    "path": "private_gpt/server/embeddings/embeddings_service.py",
    "content": "from typing import Literal\n\nfrom injector import inject, singleton\nfrom pydantic import BaseModel, Field\n\nfrom private_gpt.components.embedding.embedding_component import EmbeddingComponent\n\n\nclass Embedding(BaseModel):\n    index: int\n    object: Literal[\"embedding\"]\n    embedding: list[float] = Field(examples=[[0.0023064255, -0.009327292]])\n\n\n@singleton\nclass EmbeddingsService:\n    @inject\n    def __init__(self, embedding_component: EmbeddingComponent) -> None:\n        self.embedding_model = embedding_component.embedding_model\n\n    def texts_embeddings(self, texts: list[str]) -> list[Embedding]:\n        texts_embeddings = self.embedding_model.get_text_embedding_batch(texts)\n        return [\n            Embedding(\n                index=texts_embeddings.index(embedding),\n                object=\"embedding\",\n                embedding=embedding,\n            )\n            for embedding in texts_embeddings\n        ]\n"
  },
  {
    "path": "private_gpt/server/health/__init__.py",
    "content": ""
  },
  {
    "path": "private_gpt/server/health/health_router.py",
    "content": "from typing import Literal\n\nfrom fastapi import APIRouter\nfrom pydantic import BaseModel, Field\n\n# Not authentication or authorization required to get the health status.\nhealth_router = APIRouter()\n\n\nclass HealthResponse(BaseModel):\n    status: Literal[\"ok\"] = Field(default=\"ok\")\n\n\n@health_router.get(\"/health\", tags=[\"Health\"])\ndef health() -> HealthResponse:\n    \"\"\"Return ok if the system is up.\"\"\"\n    return HealthResponse(status=\"ok\")\n"
  },
  {
    "path": "private_gpt/server/ingest/__init__.py",
    "content": ""
  },
  {
    "path": "private_gpt/server/ingest/ingest_router.py",
    "content": "from typing import Literal\n\nfrom fastapi import APIRouter, Depends, HTTPException, Request, UploadFile\nfrom pydantic import BaseModel, Field\n\nfrom private_gpt.server.ingest.ingest_service import IngestService\nfrom private_gpt.server.ingest.model import IngestedDoc\nfrom private_gpt.server.utils.auth import authenticated\n\ningest_router = APIRouter(prefix=\"/v1\", dependencies=[Depends(authenticated)])\n\n\nclass IngestTextBody(BaseModel):\n    file_name: str = Field(examples=[\"Avatar: The Last Airbender\"])\n    text: str = Field(\n        examples=[\n            \"Avatar is set in an Asian and Arctic-inspired world in which some \"\n            \"people can telekinetically manipulate one of the four elements—water, \"\n            \"earth, fire or air—through practices known as 'bending', inspired by \"\n            \"Chinese martial arts.\"\n        ]\n    )\n\n\nclass IngestResponse(BaseModel):\n    object: Literal[\"list\"]\n    model: Literal[\"private-gpt\"]\n    data: list[IngestedDoc]\n\n\n@ingest_router.post(\"/ingest\", tags=[\"Ingestion\"], deprecated=True)\ndef ingest(request: Request, file: UploadFile) -> IngestResponse:\n    \"\"\"Ingests and processes a file.\n\n    Deprecated. Use ingest/file instead.\n    \"\"\"\n    return ingest_file(request, file)\n\n\n@ingest_router.post(\"/ingest/file\", tags=[\"Ingestion\"])\ndef ingest_file(request: Request, file: UploadFile) -> IngestResponse:\n    \"\"\"Ingests and processes a file, storing its chunks to be used as context.\n\n    The context obtained from files is later used in\n    `/chat/completions`, `/completions`, and `/chunks` APIs.\n\n    Most common document\n    formats are supported, but you may be prompted to install an extra dependency to\n    manage a specific file type.\n\n    A file can generate different Documents (for example a PDF generates one Document\n    per page). All Documents IDs are returned in the response, together with the\n    extracted Metadata (which is later used to improve context retrieval). Those IDs\n    can be used to filter the context used to create responses in\n    `/chat/completions`, `/completions`, and `/chunks` APIs.\n    \"\"\"\n    service = request.state.injector.get(IngestService)\n    if file.filename is None:\n        raise HTTPException(400, \"No file name provided\")\n    ingested_documents = service.ingest_bin_data(file.filename, file.file)\n    return IngestResponse(object=\"list\", model=\"private-gpt\", data=ingested_documents)\n\n\n@ingest_router.post(\"/ingest/text\", tags=[\"Ingestion\"])\ndef ingest_text(request: Request, body: IngestTextBody) -> IngestResponse:\n    \"\"\"Ingests and processes a text, storing its chunks to be used as context.\n\n    The context obtained from files is later used in\n    `/chat/completions`, `/completions`, and `/chunks` APIs.\n\n    A Document will be generated with the given text. The Document\n    ID is returned in the response, together with the\n    extracted Metadata (which is later used to improve context retrieval). That ID\n    can be used to filter the context used to create responses in\n    `/chat/completions`, `/completions`, and `/chunks` APIs.\n    \"\"\"\n    service = request.state.injector.get(IngestService)\n    if len(body.file_name) == 0:\n        raise HTTPException(400, \"No file name provided\")\n    ingested_documents = service.ingest_text(body.file_name, body.text)\n    return IngestResponse(object=\"list\", model=\"private-gpt\", data=ingested_documents)\n\n\n@ingest_router.get(\"/ingest/list\", tags=[\"Ingestion\"])\ndef list_ingested(request: Request) -> IngestResponse:\n    \"\"\"Lists already ingested Documents including their Document ID and metadata.\n\n    Those IDs can be used to filter the context used to create responses\n    in `/chat/completions`, `/completions`, and `/chunks` APIs.\n    \"\"\"\n    service = request.state.injector.get(IngestService)\n    ingested_documents = service.list_ingested()\n    return IngestResponse(object=\"list\", model=\"private-gpt\", data=ingested_documents)\n\n\n@ingest_router.delete(\"/ingest/{doc_id}\", tags=[\"Ingestion\"])\ndef delete_ingested(request: Request, doc_id: str) -> None:\n    \"\"\"Delete the specified ingested Document.\n\n    The `doc_id` can be obtained from the `GET /ingest/list` endpoint.\n    The document will be effectively deleted from your storage context.\n    \"\"\"\n    service = request.state.injector.get(IngestService)\n    service.delete(doc_id)\n"
  },
  {
    "path": "private_gpt/server/ingest/ingest_service.py",
    "content": "import logging\nimport tempfile\nfrom pathlib import Path\nfrom typing import TYPE_CHECKING, AnyStr, BinaryIO\n\nfrom injector import inject, singleton\nfrom llama_index.core.node_parser import SentenceWindowNodeParser\nfrom llama_index.core.storage import StorageContext\n\nfrom private_gpt.components.embedding.embedding_component import EmbeddingComponent\nfrom private_gpt.components.ingest.ingest_component import get_ingestion_component\nfrom private_gpt.components.llm.llm_component import LLMComponent\nfrom private_gpt.components.node_store.node_store_component import NodeStoreComponent\nfrom private_gpt.components.vector_store.vector_store_component import (\n    VectorStoreComponent,\n)\nfrom private_gpt.server.ingest.model import IngestedDoc\nfrom private_gpt.settings.settings import settings\n\nif TYPE_CHECKING:\n    from llama_index.core.storage.docstore.types import RefDocInfo\n\nlogger = logging.getLogger(__name__)\n\n\n@singleton\nclass IngestService:\n    @inject\n    def __init__(\n        self,\n        llm_component: LLMComponent,\n        vector_store_component: VectorStoreComponent,\n        embedding_component: EmbeddingComponent,\n        node_store_component: NodeStoreComponent,\n    ) -> None:\n        self.llm_service = llm_component\n        self.storage_context = StorageContext.from_defaults(\n            vector_store=vector_store_component.vector_store,\n            docstore=node_store_component.doc_store,\n            index_store=node_store_component.index_store,\n        )\n        node_parser = SentenceWindowNodeParser.from_defaults()\n\n        self.ingest_component = get_ingestion_component(\n            self.storage_context,\n            embed_model=embedding_component.embedding_model,\n            transformations=[node_parser, embedding_component.embedding_model],\n            settings=settings(),\n        )\n\n    def _ingest_data(self, file_name: str, file_data: AnyStr) -> list[IngestedDoc]:\n        logger.debug(\"Got file data of size=%s to ingest\", len(file_data))\n        # llama-index mainly supports reading from files, so\n        # we have to create a tmp file to read for it to work\n        # delete=False to avoid a Windows 11 permission error.\n        with tempfile.NamedTemporaryFile(delete=False) as tmp:\n            try:\n                path_to_tmp = Path(tmp.name)\n                if isinstance(file_data, bytes):\n                    path_to_tmp.write_bytes(file_data)\n                else:\n                    path_to_tmp.write_text(str(file_data))\n                return self.ingest_file(file_name, path_to_tmp)\n            finally:\n                tmp.close()\n                path_to_tmp.unlink()\n\n    def ingest_file(self, file_name: str, file_data: Path) -> list[IngestedDoc]:\n        logger.info(\"Ingesting file_name=%s\", file_name)\n        documents = self.ingest_component.ingest(file_name, file_data)\n        logger.info(\"Finished ingestion file_name=%s\", file_name)\n        return [IngestedDoc.from_document(document) for document in documents]\n\n    def ingest_text(self, file_name: str, text: str) -> list[IngestedDoc]:\n        logger.debug(\"Ingesting text data with file_name=%s\", file_name)\n        return self._ingest_data(file_name, text)\n\n    def ingest_bin_data(\n        self, file_name: str, raw_file_data: BinaryIO\n    ) -> list[IngestedDoc]:\n        logger.debug(\"Ingesting binary data with file_name=%s\", file_name)\n        file_data = raw_file_data.read()\n        return self._ingest_data(file_name, file_data)\n\n    def bulk_ingest(self, files: list[tuple[str, Path]]) -> list[IngestedDoc]:\n        logger.info(\"Ingesting file_names=%s\", [f[0] for f in files])\n        documents = self.ingest_component.bulk_ingest(files)\n        logger.info(\"Finished ingestion file_name=%s\", [f[0] for f in files])\n        return [IngestedDoc.from_document(document) for document in documents]\n\n    def list_ingested(self) -> list[IngestedDoc]:\n        ingested_docs: list[IngestedDoc] = []\n        try:\n            docstore = self.storage_context.docstore\n            ref_docs: dict[str, RefDocInfo] | None = docstore.get_all_ref_doc_info()\n\n            if not ref_docs:\n                return ingested_docs\n\n            for doc_id, ref_doc_info in ref_docs.items():\n                doc_metadata = None\n                if ref_doc_info is not None and ref_doc_info.metadata is not None:\n                    doc_metadata = IngestedDoc.curate_metadata(ref_doc_info.metadata)\n                ingested_docs.append(\n                    IngestedDoc(\n                        object=\"ingest.document\",\n                        doc_id=doc_id,\n                        doc_metadata=doc_metadata,\n                    )\n                )\n        except ValueError:\n            logger.warning(\"Got an exception when getting list of docs\", exc_info=True)\n            pass\n        logger.debug(\"Found count=%s ingested documents\", len(ingested_docs))\n        return ingested_docs\n\n    def delete(self, doc_id: str) -> None:\n        \"\"\"Delete an ingested document.\n\n        :raises ValueError: if the document does not exist\n        \"\"\"\n        logger.info(\n            \"Deleting the ingested document=%s in the doc and index store\", doc_id\n        )\n        self.ingest_component.delete(doc_id)\n"
  },
  {
    "path": "private_gpt/server/ingest/ingest_watcher.py",
    "content": "from collections.abc import Callable\nfrom pathlib import Path\nfrom typing import Any\n\nfrom watchdog.events import (\n    FileCreatedEvent,\n    FileModifiedEvent,\n    FileSystemEvent,\n    FileSystemEventHandler,\n)\nfrom watchdog.observers import Observer\n\n\nclass IngestWatcher:\n    def __init__(\n        self, watch_path: Path, on_file_changed: Callable[[Path], None]\n    ) -> None:\n        self.watch_path = watch_path\n        self.on_file_changed = on_file_changed\n\n        class Handler(FileSystemEventHandler):\n            def on_modified(self, event: FileSystemEvent) -> None:\n                if isinstance(event, FileModifiedEvent):\n                    on_file_changed(Path(event.src_path))\n\n            def on_created(self, event: FileSystemEvent) -> None:\n                if isinstance(event, FileCreatedEvent):\n                    on_file_changed(Path(event.src_path))\n\n        event_handler = Handler()\n        observer: Any = Observer()\n        self._observer = observer\n        self._observer.schedule(event_handler, str(watch_path), recursive=True)\n\n    def start(self) -> None:\n        self._observer.start()\n        while self._observer.is_alive():\n            try:\n                self._observer.join(1)\n            except KeyboardInterrupt:\n                break\n\n    def stop(self) -> None:\n        self._observer.stop()\n        self._observer.join()\n"
  },
  {
    "path": "private_gpt/server/ingest/model.py",
    "content": "from typing import Any, Literal\n\nfrom llama_index.core.schema import Document\nfrom pydantic import BaseModel, Field\n\n\nclass IngestedDoc(BaseModel):\n    object: Literal[\"ingest.document\"]\n    doc_id: str = Field(examples=[\"c202d5e6-7b69-4869-81cc-dd574ee8ee11\"])\n    doc_metadata: dict[str, Any] | None = Field(\n        examples=[\n            {\n                \"page_label\": \"2\",\n                \"file_name\": \"Sales Report Q3 2023.pdf\",\n            }\n        ]\n    )\n\n    @staticmethod\n    def curate_metadata(metadata: dict[str, Any]) -> dict[str, Any]:\n        \"\"\"Remove unwanted metadata keys.\"\"\"\n        for key in [\"doc_id\", \"window\", \"original_text\"]:\n            metadata.pop(key, None)\n        return metadata\n\n    @staticmethod\n    def from_document(document: Document) -> \"IngestedDoc\":\n        return IngestedDoc(\n            object=\"ingest.document\",\n            doc_id=document.doc_id,\n            doc_metadata=IngestedDoc.curate_metadata(document.metadata),\n        )\n"
  },
  {
    "path": "private_gpt/server/recipes/summarize/__init__.py",
    "content": ""
  },
  {
    "path": "private_gpt/server/recipes/summarize/summarize_router.py",
    "content": "from fastapi import APIRouter, Depends, Request\nfrom pydantic import BaseModel\nfrom starlette.responses import StreamingResponse\n\nfrom private_gpt.open_ai.extensions.context_filter import ContextFilter\nfrom private_gpt.open_ai.openai_models import (\n    to_openai_sse_stream,\n)\nfrom private_gpt.server.recipes.summarize.summarize_service import SummarizeService\nfrom private_gpt.server.utils.auth import authenticated\n\nsummarize_router = APIRouter(prefix=\"/v1\", dependencies=[Depends(authenticated)])\n\n\nclass SummarizeBody(BaseModel):\n    text: str | None = None\n    use_context: bool = False\n    context_filter: ContextFilter | None = None\n    prompt: str | None = None\n    instructions: str | None = None\n    stream: bool = False\n\n\nclass SummarizeResponse(BaseModel):\n    summary: str\n\n\n@summarize_router.post(\n    \"/summarize\",\n    response_model=None,\n    summary=\"Summarize\",\n    responses={200: {\"model\": SummarizeResponse}},\n    tags=[\"Recipes\"],\n)\ndef summarize(\n    request: Request, body: SummarizeBody\n) -> SummarizeResponse | StreamingResponse:\n    \"\"\"Given a text, the model will return a summary.\n\n    Optionally include `instructions` to influence the way the summary is generated.\n\n    If `use_context`\n    is set to `true`, the model will also use the content coming from the ingested\n    documents in the summary. The documents being used can\n    be filtered by their metadata using the `context_filter`.\n    Ingested documents metadata can be found using `/ingest/list` endpoint.\n    If you want all ingested documents to be used, remove `context_filter` altogether.\n\n    If `prompt` is set, it will be used as the prompt for the summarization,\n    otherwise the default prompt will be used.\n\n    When using `'stream': true`, the API will return data chunks following [OpenAI's\n    streaming model](https://platform.openai.com/docs/api-reference/chat/streaming):\n    ```\n    {\"id\":\"12345\",\"object\":\"completion.chunk\",\"created\":1694268190,\n    \"model\":\"private-gpt\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"Hello\"},\n    \"finish_reason\":null}]}\n    ```\n    \"\"\"\n    service: SummarizeService = request.state.injector.get(SummarizeService)\n\n    if body.stream:\n        completion_gen = service.stream_summarize(\n            text=body.text,\n            instructions=body.instructions,\n            use_context=body.use_context,\n            context_filter=body.context_filter,\n            prompt=body.prompt,\n        )\n        return StreamingResponse(\n            to_openai_sse_stream(\n                response_generator=completion_gen,\n            ),\n            media_type=\"text/event-stream\",\n        )\n    else:\n        completion = service.summarize(\n            text=body.text,\n            instructions=body.instructions,\n            use_context=body.use_context,\n            context_filter=body.context_filter,\n            prompt=body.prompt,\n        )\n        return SummarizeResponse(\n            summary=completion,\n        )\n"
  },
  {
    "path": "private_gpt/server/recipes/summarize/summarize_service.py",
    "content": "from itertools import chain\n\nfrom injector import inject, singleton\nfrom llama_index.core import (\n    Document,\n    StorageContext,\n    SummaryIndex,\n)\nfrom llama_index.core.base.response.schema import Response, StreamingResponse\nfrom llama_index.core.node_parser import SentenceSplitter\nfrom llama_index.core.response_synthesizers import ResponseMode\nfrom llama_index.core.storage.docstore.types import RefDocInfo\nfrom llama_index.core.types import TokenGen\n\nfrom private_gpt.components.embedding.embedding_component import EmbeddingComponent\nfrom private_gpt.components.llm.llm_component import LLMComponent\nfrom private_gpt.components.node_store.node_store_component import NodeStoreComponent\nfrom private_gpt.components.vector_store.vector_store_component import (\n    VectorStoreComponent,\n)\nfrom private_gpt.open_ai.extensions.context_filter import ContextFilter\nfrom private_gpt.settings.settings import Settings\n\nDEFAULT_SUMMARIZE_PROMPT = (\n    \"Provide a comprehensive summary of the provided context information. \"\n    \"The summary should cover all the key points and main ideas presented in \"\n    \"the original text, while also condensing the information into a concise \"\n    \"and easy-to-understand format. Please ensure that the summary includes \"\n    \"relevant details and examples that support the main ideas, while avoiding \"\n    \"any unnecessary information or repetition.\"\n)\n\n\n@singleton\nclass SummarizeService:\n    @inject\n    def __init__(\n        self,\n        settings: Settings,\n        llm_component: LLMComponent,\n        node_store_component: NodeStoreComponent,\n        vector_store_component: VectorStoreComponent,\n        embedding_component: EmbeddingComponent,\n    ) -> None:\n        self.settings = settings\n        self.llm_component = llm_component\n        self.node_store_component = node_store_component\n        self.vector_store_component = vector_store_component\n        self.embedding_component = embedding_component\n        self.storage_context = StorageContext.from_defaults(\n            vector_store=vector_store_component.vector_store,\n            docstore=node_store_component.doc_store,\n            index_store=node_store_component.index_store,\n        )\n\n    @staticmethod\n    def _filter_ref_docs(\n        ref_docs: dict[str, RefDocInfo], context_filter: ContextFilter | None\n    ) -> list[RefDocInfo]:\n        if context_filter is None or not context_filter.docs_ids:\n            return list(ref_docs.values())\n\n        return [\n            ref_doc\n            for doc_id, ref_doc in ref_docs.items()\n            if doc_id in context_filter.docs_ids\n        ]\n\n    def _summarize(\n        self,\n        use_context: bool = False,\n        stream: bool = False,\n        text: str | None = None,\n        instructions: str | None = None,\n        context_filter: ContextFilter | None = None,\n        prompt: str | None = None,\n    ) -> str | TokenGen:\n\n        nodes_to_summarize = []\n\n        # Add text to summarize\n        if text:\n            text_documents = [Document(text=text)]\n            nodes_to_summarize += (\n                SentenceSplitter.from_defaults().get_nodes_from_documents(\n                    text_documents\n                )\n            )\n\n        # Add context documents to summarize\n        if use_context:\n            # 1. Recover all ref docs\n            ref_docs: dict[str, RefDocInfo] | None = (\n                self.storage_context.docstore.get_all_ref_doc_info()\n            )\n            if ref_docs is None:\n                raise ValueError(\"No documents have been ingested yet.\")\n\n            # 2. Filter documents based on context_filter (if provided)\n            filtered_ref_docs = self._filter_ref_docs(ref_docs, context_filter)\n\n            # 3. Get all nodes from the filtered documents\n            filtered_node_ids = chain.from_iterable(\n                [ref_doc.node_ids for ref_doc in filtered_ref_docs]\n            )\n            filtered_nodes = self.storage_context.docstore.get_nodes(\n                node_ids=list(filtered_node_ids),\n            )\n\n            nodes_to_summarize += filtered_nodes\n\n        # Create a SummaryIndex to summarize the nodes\n        summary_index = SummaryIndex(\n            nodes=nodes_to_summarize,\n            storage_context=StorageContext.from_defaults(),  # In memory SummaryIndex\n            show_progress=True,\n        )\n\n        # Make a tree summarization query\n        # above the set of all candidate nodes\n        query_engine = summary_index.as_query_engine(\n            llm=self.llm_component.llm,\n            response_mode=ResponseMode.TREE_SUMMARIZE,\n            streaming=stream,\n            use_async=self.settings.summarize.use_async,\n        )\n\n        prompt = prompt or DEFAULT_SUMMARIZE_PROMPT\n\n        summarize_query = prompt + \"\\n\" + (instructions or \"\")\n\n        response = query_engine.query(summarize_query)\n        if isinstance(response, Response):\n            return response.response or \"\"\n        elif isinstance(response, StreamingResponse):\n            return response.response_gen\n        else:\n            raise TypeError(f\"The result is not of a supported type: {type(response)}\")\n\n    def summarize(\n        self,\n        use_context: bool = False,\n        text: str | None = None,\n        instructions: str | None = None,\n        context_filter: ContextFilter | None = None,\n        prompt: str | None = None,\n    ) -> str:\n        return self._summarize(\n            use_context=use_context,\n            stream=False,\n            text=text,\n            instructions=instructions,\n            context_filter=context_filter,\n            prompt=prompt,\n        )  # type: ignore\n\n    def stream_summarize(\n        self,\n        use_context: bool = False,\n        text: str | None = None,\n        instructions: str | None = None,\n        context_filter: ContextFilter | None = None,\n        prompt: str | None = None,\n    ) -> TokenGen:\n        return self._summarize(\n            use_context=use_context,\n            stream=True,\n            text=text,\n            instructions=instructions,\n            context_filter=context_filter,\n            prompt=prompt,\n        )  # type: ignore\n"
  },
  {
    "path": "private_gpt/server/utils/__init__.py",
    "content": ""
  },
  {
    "path": "private_gpt/server/utils/auth.py",
    "content": "\"\"\"Authentication mechanism for the API.\n\nDefine a simple mechanism to authenticate requests.\nMore complex authentication mechanisms can be defined here, and be placed in the\n`authenticated` method (being a 'bean' injected in fastapi routers).\n\nAuthorization can also be made after the authentication, and depends on\nthe authentication. Authorization should not be implemented in this file.\n\nAuthorization can be done by following fastapi's guides:\n* https://fastapi.tiangolo.com/advanced/security/oauth2-scopes/\n* https://fastapi.tiangolo.com/tutorial/security/\n* https://fastapi.tiangolo.com/tutorial/dependencies/dependencies-in-path-operation-decorators/\n\"\"\"\n\n# mypy: ignore-errors\n# Disabled mypy error: All conditional function variants must have identical signatures\n# We are changing the implementation of the authenticated method, based on\n# the config. If the auth is not enabled, we are not defining the complex method\n# with its dependencies.\nimport logging\nimport secrets\nfrom typing import Annotated\n\nfrom fastapi import Depends, Header, HTTPException\n\nfrom private_gpt.settings.settings import settings\n\n# 401 signify that the request requires authentication.\n# 403 signify that the authenticated user is not authorized to perform the operation.\nNOT_AUTHENTICATED = HTTPException(\n    status_code=401,\n    detail=\"Not authenticated\",\n    headers={\"WWW-Authenticate\": 'Basic realm=\"All the API\", charset=\"UTF-8\"'},\n)\n\nlogger = logging.getLogger(__name__)\n\n\ndef _simple_authentication(authorization: Annotated[str, Header()] = \"\") -> bool:\n    \"\"\"Check if the request is authenticated.\"\"\"\n    if not secrets.compare_digest(authorization, settings().server.auth.secret):\n        # If the \"Authorization\" header is not the expected one, raise an exception.\n        raise NOT_AUTHENTICATED\n    return True\n\n\nif not settings().server.auth.enabled:\n    logger.debug(\n        \"Defining a dummy authentication mechanism for fastapi, always authenticating requests\"\n    )\n\n    # Define a dummy authentication method that always returns True.\n    def authenticated() -> bool:\n        \"\"\"Check if the request is authenticated.\"\"\"\n        return True\n\nelse:\n    logger.info(\"Defining the given authentication mechanism for the API\")\n\n    # Method to be used as a dependency to check if the request is authenticated.\n    def authenticated(\n        _simple_authentication: Annotated[bool, Depends(_simple_authentication)]\n    ) -> bool:\n        \"\"\"Check if the request is authenticated.\"\"\"\n        assert settings().server.auth.enabled\n        if not _simple_authentication:\n            raise NOT_AUTHENTICATED\n        return True\n"
  },
  {
    "path": "private_gpt/settings/__init__.py",
    "content": "\"\"\"Settings.\"\"\"\n"
  },
  {
    "path": "private_gpt/settings/settings.py",
    "content": "from typing import Any, Literal\n\nfrom pydantic import BaseModel, Field\n\nfrom private_gpt.settings.settings_loader import load_active_settings\n\n\nclass CorsSettings(BaseModel):\n    \"\"\"CORS configuration.\n\n    For more details on the CORS configuration, see:\n    # * https://fastapi.tiangolo.com/tutorial/cors/\n    # * https://developer.mozilla.org/en-US/docs/Web/HTTP/CORS\n    \"\"\"\n\n    enabled: bool = Field(\n        description=\"Flag indicating if CORS headers are set or not.\"\n        \"If set to True, the CORS headers will be set to allow all origins, methods and headers.\",\n        default=False,\n    )\n    allow_credentials: bool = Field(\n        description=\"Indicate that cookies should be supported for cross-origin requests\",\n        default=False,\n    )\n    allow_origins: list[str] = Field(\n        description=\"A list of origins that should be permitted to make cross-origin requests.\",\n        default=[],\n    )\n    allow_origin_regex: list[str] = Field(\n        description=\"A regex string to match against origins that should be permitted to make cross-origin requests.\",\n        default=None,\n    )\n    allow_methods: list[str] = Field(\n        description=\"A list of HTTP methods that should be allowed for cross-origin requests.\",\n        default=[\n            \"GET\",\n        ],\n    )\n    allow_headers: list[str] = Field(\n        description=\"A list of HTTP request headers that should be supported for cross-origin requests.\",\n        default=[],\n    )\n\n\nclass AuthSettings(BaseModel):\n    \"\"\"Authentication configuration.\n\n    The implementation of the authentication strategy must\n    \"\"\"\n\n    enabled: bool = Field(\n        description=\"Flag indicating if authentication is enabled or not.\",\n        default=False,\n    )\n    secret: str = Field(\n        description=\"The secret to be used for authentication. \"\n        \"It can be any non-blank string. For HTTP basic authentication, \"\n        \"this value should be the whole 'Authorization' header that is expected\"\n    )\n\n\nclass IngestionSettings(BaseModel):\n    \"\"\"Ingestion configuration.\n\n    This configuration is used to control the ingestion of data into the system\n    using non-server methods. This is useful for local development and testing;\n    or to ingest in bulk from a folder.\n\n    Please note that this configuration is not secure and should be used in\n    a controlled environment only (setting right permissions, etc.).\n    \"\"\"\n\n    enabled: bool = Field(\n        description=\"Flag indicating if local ingestion is enabled or not.\",\n        default=False,\n    )\n    allow_ingest_from: list[str] = Field(\n        description=\"A list of folders that should be permitted to make ingest requests.\",\n        default=[],\n    )\n\n\nclass ServerSettings(BaseModel):\n    env_name: str = Field(\n        description=\"Name of the environment (prod, staging, local...)\"\n    )\n    port: int = Field(description=\"Port of PrivateGPT FastAPI server, defaults to 8001\")\n    cors: CorsSettings = Field(\n        description=\"CORS configuration\", default=CorsSettings(enabled=False)\n    )\n    auth: AuthSettings = Field(\n        description=\"Authentication configuration\",\n        default_factory=lambda: AuthSettings(enabled=False, secret=\"secret-key\"),\n    )\n\n\nclass DataSettings(BaseModel):\n    local_ingestion: IngestionSettings = Field(\n        description=\"Ingestion configuration\",\n        default_factory=lambda: IngestionSettings(allow_ingest_from=[\"*\"]),\n    )\n    local_data_folder: str = Field(\n        description=\"Path to local storage.\"\n        \"It will be treated as an absolute path if it starts with /\"\n    )\n\n\nclass LLMSettings(BaseModel):\n    mode: Literal[\n        \"llamacpp\",\n        \"openai\",\n        \"openailike\",\n        \"azopenai\",\n        \"sagemaker\",\n        \"mock\",\n        \"ollama\",\n        \"gemini\",\n    ]\n    max_new_tokens: int = Field(\n        256,\n        description=\"The maximum number of token that the LLM is authorized to generate in one completion.\",\n    )\n    context_window: int = Field(\n        3900,\n        description=\"The maximum number of context tokens for the model.\",\n    )\n    tokenizer: str = Field(\n        None,\n        description=\"The model id of a predefined tokenizer hosted inside a model repo on \"\n        \"huggingface.co. Valid model ids can be located at the root-level, like \"\n        \"`bert-base-uncased`, or namespaced under a user or organization name, \"\n        \"like `HuggingFaceH4/zephyr-7b-beta`. If not set, will load a tokenizer matching \"\n        \"gpt-3.5-turbo LLM.\",\n    )\n    temperature: float = Field(\n        0.1,\n        description=\"The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual.\",\n    )\n    prompt_style: Literal[\"default\", \"llama2\", \"llama3\", \"tag\", \"mistral\", \"chatml\"] = (\n        Field(\n            \"llama2\",\n            description=(\n                \"The prompt style to use for the chat engine. \"\n                \"If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\\n\"\n                \"If `llama2` - use the llama2 prompt style from the llama_index. Based on `<s>`, `[INST]` and `<<SYS>>`.\\n\"\n                \"If `llama3` - use the llama3 prompt style from the llama_index.\"\n                \"If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \\n\"\n                \"If `mistral` - use the `mistral prompt style. It shoudl look like <s>[INST] {System Prompt} [/INST]</s>[INST] { UserInstructions } [/INST]\"\n                \"`llama2` is the historic behaviour. `default` might work better with your custom models.\"\n            ),\n        )\n    )\n\n\nclass VectorstoreSettings(BaseModel):\n    database: Literal[\"chroma\", \"qdrant\", \"postgres\", \"clickhouse\", \"milvus\"]\n\n\nclass NodeStoreSettings(BaseModel):\n    database: Literal[\"simple\", \"postgres\"]\n\n\nclass LlamaCPPSettings(BaseModel):\n    llm_hf_repo_id: str\n    llm_hf_model_file: str\n    tfs_z: float = Field(\n        1.0,\n        description=\"Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting.\",\n    )\n    top_k: int = Field(\n        40,\n        description=\"Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)\",\n    )\n    top_p: float = Field(\n        0.9,\n        description=\"Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)\",\n    )\n    repeat_penalty: float = Field(\n        1.1,\n        description=\"Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)\",\n    )\n\n\nclass HuggingFaceSettings(BaseModel):\n    embedding_hf_model_name: str = Field(\n        description=\"Name of the HuggingFace model to use for embeddings\"\n    )\n    access_token: str = Field(\n        None,\n        description=\"Huggingface access token, required to download some models\",\n    )\n    trust_remote_code: bool = Field(\n        False,\n        description=\"If set to True, the code from the remote model will be trusted and executed.\",\n    )\n\n\nclass EmbeddingSettings(BaseModel):\n    mode: Literal[\n        \"huggingface\",\n        \"openai\",\n        \"azopenai\",\n        \"sagemaker\",\n        \"ollama\",\n        \"mock\",\n        \"gemini\",\n        \"mistralai\",\n    ]\n    ingest_mode: Literal[\"simple\", \"batch\", \"parallel\", \"pipeline\"] = Field(\n        \"simple\",\n        description=(\n            \"The ingest mode to use for the embedding engine:\\n\"\n            \"If `simple` - ingest files sequentially and one by one. It is the historic behaviour.\\n\"\n            \"If `batch` - if multiple files, parse all the files in parallel, \"\n            \"and send them in batch to the embedding model.\\n\"\n            \"In `pipeline` - The Embedding engine is kept as busy as possible\\n\"\n            \"If `parallel` - parse the files in parallel using multiple cores, and embedd them in parallel.\\n\"\n            \"`parallel` is the fastest mode for local setup, as it parallelize IO RW in the index.\\n\"\n            \"For modes that leverage parallelization, you can specify the number of \"\n            \"workers to use with `count_workers`.\\n\"\n        ),\n    )\n    count_workers: int = Field(\n        2,\n        description=(\n            \"The number of workers to use for file ingestion.\\n\"\n            \"In `batch` mode, this is the number of workers used to parse the files.\\n\"\n            \"In `parallel` mode, this is the number of workers used to parse the files and embed them.\\n\"\n            \"In `pipeline` mode, this is the number of workers that can perform embeddings.\\n\"\n            \"This is only used if `ingest_mode` is not `simple`.\\n\"\n            \"Do not go too high with this number, as it might cause memory issues. (especially in `parallel` mode)\\n\"\n            \"Do not set it higher than your number of threads of your CPU.\"\n        ),\n    )\n    embed_dim: int = Field(\n        384,\n        description=\"The dimension of the embeddings stored in the Postgres database\",\n    )\n\n\nclass SagemakerSettings(BaseModel):\n    llm_endpoint_name: str\n    embedding_endpoint_name: str\n\n\nclass OpenAISettings(BaseModel):\n    api_base: str = Field(\n        None,\n        description=\"Base URL of OpenAI API. Example: 'https://api.openai.com/v1'.\",\n    )\n    api_key: str\n    model: str = Field(\n        \"gpt-3.5-turbo\",\n        description=\"OpenAI Model to use. Example: 'gpt-4'.\",\n    )\n    request_timeout: float = Field(\n        120.0,\n        description=\"Time elapsed until openailike server times out the request. Default is 120s. Format is float. \",\n    )\n    embedding_api_base: str = Field(\n        None,\n        description=\"Base URL of OpenAI API. Example: 'https://api.openai.com/v1'.\",\n    )\n    embedding_api_key: str\n    embedding_model: str = Field(\n        \"text-embedding-ada-002\",\n        description=\"OpenAI embedding Model to use. Example: 'text-embedding-3-large'.\",\n    )\n\n\nclass GeminiSettings(BaseModel):\n    api_key: str\n    model: str = Field(\n        \"models/gemini-pro\",\n        description=\"Google Model to use. Example: 'models/gemini-pro'.\",\n    )\n    embedding_model: str = Field(\n        \"models/embedding-001\",\n        description=\"Google Embedding Model to use. Example: 'models/embedding-001'.\",\n    )\n\n\nclass OllamaSettings(BaseModel):\n    api_base: str = Field(\n        \"http://localhost:11434\",\n        description=\"Base URL of Ollama API. Example: 'https://localhost:11434'.\",\n    )\n    embedding_api_base: str = Field(\n        \"http://localhost:11434\",\n        description=\"Base URL of Ollama embedding API. Example: 'https://localhost:11434'.\",\n    )\n    llm_model: str = Field(\n        None,\n        description=\"Model to use. Example: 'llama2-uncensored'.\",\n    )\n    embedding_model: str = Field(\n        None,\n        description=\"Model to use. Example: 'nomic-embed-text'.\",\n    )\n    keep_alive: str = Field(\n        \"5m\",\n        description=\"Time the model will stay loaded in memory after a request. examples: 5m, 5h, '-1' \",\n    )\n    tfs_z: float = Field(\n        1.0,\n        description=\"Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting.\",\n    )\n    num_predict: int = Field(\n        None,\n        description=\"Maximum number of tokens to predict when generating text. (Default: 128, -1 = infinite generation, -2 = fill context)\",\n    )\n    top_k: int = Field(\n        40,\n        description=\"Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)\",\n    )\n    top_p: float = Field(\n        0.9,\n        description=\"Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)\",\n    )\n    repeat_last_n: int = Field(\n        64,\n        description=\"Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)\",\n    )\n    repeat_penalty: float = Field(\n        1.1,\n        description=\"Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)\",\n    )\n    request_timeout: float = Field(\n        120.0,\n        description=\"Time elapsed until ollama times out the request. Default is 120s. Format is float. \",\n    )\n    autopull_models: bool = Field(\n        False,\n        description=\"If set to True, the Ollama will automatically pull the models from the API base.\",\n    )\n\n\nclass AzureOpenAISettings(BaseModel):\n    api_key: str\n    azure_endpoint: str\n    api_version: str = Field(\n        \"2023_05_15\",\n        description=\"The API version to use for this operation. This follows the YYYY-MM-DD format.\",\n    )\n    embedding_deployment_name: str\n    embedding_model: str = Field(\n        \"text-embedding-ada-002\",\n        description=\"OpenAI Model to use. Example: 'text-embedding-ada-002'.\",\n    )\n    llm_deployment_name: str\n    llm_model: str = Field(\n        \"gpt-35-turbo\",\n        description=\"OpenAI Model to use. Example: 'gpt-4'.\",\n    )\n\n\nclass UISettings(BaseModel):\n    enabled: bool\n    path: str\n    default_mode: Literal[\"RAG\", \"Search\", \"Basic\", \"Summarize\"] = Field(\n        \"RAG\",\n        description=\"The default mode.\",\n    )\n    default_chat_system_prompt: str = Field(\n        None,\n        description=\"The default system prompt to use for the chat mode.\",\n    )\n    default_query_system_prompt: str = Field(\n        None, description=\"The default system prompt to use for the query mode.\"\n    )\n    default_summarization_system_prompt: str = Field(\n        None,\n        description=\"The default system prompt to use for the summarization mode.\",\n    )\n    delete_file_button_enabled: bool = Field(\n        True, description=\"If the button to delete a file is enabled or not.\"\n    )\n    delete_all_files_button_enabled: bool = Field(\n        False, description=\"If the button to delete all files is enabled or not.\"\n    )\n\n\nclass RerankSettings(BaseModel):\n    enabled: bool = Field(\n        False,\n        description=\"This value controls whether a reranker should be included in the RAG pipeline.\",\n    )\n    model: str = Field(\n        \"cross-encoder/ms-marco-MiniLM-L-2-v2\",\n        description=\"Rerank model to use. Limited to SentenceTransformer cross-encoder models.\",\n    )\n    top_n: int = Field(\n        2,\n        description=\"This value controls the number of documents returned by the RAG pipeline.\",\n    )\n\n\nclass RagSettings(BaseModel):\n    similarity_top_k: int = Field(\n        2,\n        description=\"This value controls the number of documents returned by the RAG pipeline or considered for reranking if enabled.\",\n    )\n    similarity_value: float = Field(\n        None,\n        description=\"If set, any documents retrieved from the RAG must meet a certain match score. Acceptable values are between 0 and 1.\",\n    )\n    rerank: RerankSettings\n\n\nclass SummarizeSettings(BaseModel):\n    use_async: bool = Field(\n        True,\n        description=\"If set to True, the summarization will be done asynchronously.\",\n    )\n\n\nclass ClickHouseSettings(BaseModel):\n    host: str = Field(\n        \"localhost\",\n        description=\"The server hosting the ClickHouse database\",\n    )\n    port: int = Field(\n        8443,\n        description=\"The port on which the ClickHouse database is accessible\",\n    )\n    username: str = Field(\n        \"default\",\n        description=\"The username to use to connect to the ClickHouse database\",\n    )\n    password: str = Field(\n        \"\",\n        description=\"The password to use to connect to the ClickHouse database\",\n    )\n    database: str = Field(\n        \"__default__\",\n        description=\"The default database to use for connections\",\n    )\n    secure: bool | str = Field(\n        False,\n        description=\"Use https/TLS for secure connection to the server\",\n    )\n    interface: str | None = Field(\n        None,\n        description=\"Must be either 'http' or 'https'. Determines the protocol to use for the connection\",\n    )\n    settings: dict[str, Any] | None = Field(\n        None,\n        description=\"Specific ClickHouse server settings to be used with the session\",\n    )\n    connect_timeout: int | None = Field(\n        None,\n        description=\"Timeout in seconds for establishing a connection\",\n    )\n    send_receive_timeout: int | None = Field(\n        None,\n        description=\"Read timeout in seconds for http connection\",\n    )\n    verify: bool | None = Field(\n        None,\n        description=\"Verify the server certificate in secure/https mode\",\n    )\n    ca_cert: str | None = Field(\n        None,\n        description=\"Path to Certificate Authority root certificate (.pem format)\",\n    )\n    client_cert: str | None = Field(\n        None,\n        description=\"Path to TLS Client certificate (.pem format)\",\n    )\n    client_cert_key: str | None = Field(\n        None,\n        description=\"Path to the private key for the TLS Client certificate\",\n    )\n    http_proxy: str | None = Field(\n        None,\n        description=\"HTTP proxy address\",\n    )\n    https_proxy: str | None = Field(\n        None,\n        description=\"HTTPS proxy address\",\n    )\n    server_host_name: str | None = Field(\n        None,\n        description=\"Server host name to be checked against the TLS certificate\",\n    )\n\n\nclass PostgresSettings(BaseModel):\n    host: str = Field(\n        \"localhost\",\n        description=\"The server hosting the Postgres database\",\n    )\n    port: int = Field(\n        5432,\n        description=\"The port on which the Postgres database is accessible\",\n    )\n    user: str = Field(\n        \"postgres\",\n        description=\"The user to use to connect to the Postgres database\",\n    )\n    password: str = Field(\n        \"postgres\",\n        description=\"The password to use to connect to the Postgres database\",\n    )\n    database: str = Field(\n        \"postgres\",\n        description=\"The database to use to connect to the Postgres database\",\n    )\n    schema_name: str = Field(\n        \"public\",\n        description=\"The name of the schema in the Postgres database to use\",\n    )\n\n\nclass QdrantSettings(BaseModel):\n    location: str | None = Field(\n        None,\n        description=(\n            \"If `:memory:` - use in-memory Qdrant instance.\\n\"\n            \"If `str` - use it as a `url` parameter.\\n\"\n        ),\n    )\n    url: str | None = Field(\n        None,\n        description=(\n            \"Either host or str of 'Optional[scheme], host, Optional[port], Optional[prefix]'.\"\n        ),\n    )\n    port: int | None = Field(6333, description=\"Port of the REST API interface.\")\n    grpc_port: int | None = Field(6334, description=\"Port of the gRPC interface.\")\n    prefer_grpc: bool | None = Field(\n        False,\n        description=\"If `true` - use gRPC interface whenever possible in custom methods.\",\n    )\n    https: bool | None = Field(\n        None,\n        description=\"If `true` - use HTTPS(SSL) protocol.\",\n    )\n    api_key: str | None = Field(\n        None,\n        description=\"API key for authentication in Qdrant Cloud.\",\n    )\n    prefix: str | None = Field(\n        None,\n        description=(\n            \"Prefix to add to the REST URL path.\"\n            \"Example: `service/v1` will result in \"\n            \"'http://localhost:6333/service/v1/{qdrant-endpoint}' for REST API.\"\n        ),\n    )\n    timeout: float | None = Field(\n        None,\n        description=\"Timeout for REST and gRPC API requests.\",\n    )\n    host: str | None = Field(\n        None,\n        description=\"Host name of Qdrant service. If url and host are None, set to 'localhost'.\",\n    )\n    path: str | None = Field(None, description=\"Persistence path for QdrantLocal.\")\n    force_disable_check_same_thread: bool | None = Field(\n        True,\n        description=(\n            \"For QdrantLocal, force disable check_same_thread. Default: `True`\"\n            \"Only use this if you can guarantee that you can resolve the thread safety outside QdrantClient.\"\n        ),\n    )\n\n\nclass MilvusSettings(BaseModel):\n    uri: str = Field(\n        \"local_data/private_gpt/milvus/milvus_local.db\",\n        description=\"The URI of the Milvus instance. For example: 'local_data/private_gpt/milvus/milvus_local.db' for Milvus Lite.\",\n    )\n    token: str = Field(\n        \"\",\n        description=(\n            \"A valid access token to access the specified Milvus instance. \"\n            \"This can be used as a recommended alternative to setting user and password separately. \"\n        ),\n    )\n    collection_name: str = Field(\n        \"make_this_parameterizable_per_api_call\",\n        description=\"The name of the collection in Milvus. Default is 'make_this_parameterizable_per_api_call'.\",\n    )\n    overwrite: bool = Field(\n        True, description=\"Overwrite the previous collection schema if it exists.\"\n    )\n\n\nclass Settings(BaseModel):\n    server: ServerSettings\n    data: DataSettings\n    ui: UISettings\n    llm: LLMSettings\n    embedding: EmbeddingSettings\n    llamacpp: LlamaCPPSettings\n    huggingface: HuggingFaceSettings\n    sagemaker: SagemakerSettings\n    openai: OpenAISettings\n    gemini: GeminiSettings\n    ollama: OllamaSettings\n    azopenai: AzureOpenAISettings\n    vectorstore: VectorstoreSettings\n    nodestore: NodeStoreSettings\n    rag: RagSettings\n    summarize: SummarizeSettings\n    qdrant: QdrantSettings | None = None\n    postgres: PostgresSettings | None = None\n    clickhouse: ClickHouseSettings | None = None\n    milvus: MilvusSettings | None = None\n\n\n\"\"\"\nThis is visible just for DI or testing purposes.\n\nUse dependency injection or `settings()` method instead.\n\"\"\"\nunsafe_settings = load_active_settings()\n\n\"\"\"\nThis is visible just for DI or testing purposes.\n\nUse dependency injection or `settings()` method instead.\n\"\"\"\nunsafe_typed_settings = Settings(**unsafe_settings)\n\n\ndef settings() -> Settings:\n    \"\"\"Get the current loaded settings from the DI container.\n\n    This method exists to keep compatibility with the existing code,\n    that require global access to the settings.\n\n    For regular components use dependency injection instead.\n    \"\"\"\n    from private_gpt.di import global_injector\n\n    return global_injector.get(Settings)\n"
  },
  {
    "path": "private_gpt/settings/settings_loader.py",
    "content": "import functools\nimport logging\nimport os\nimport sys\nfrom collections.abc import Iterable\nfrom pathlib import Path\nfrom typing import Any\n\nfrom pydantic.v1.utils import deep_update, unique_list\n\nfrom private_gpt.constants import PROJECT_ROOT_PATH\nfrom private_gpt.settings.yaml import load_yaml_with_envvars\n\nlogger = logging.getLogger(__name__)\n\n_settings_folder = os.environ.get(\"PGPT_SETTINGS_FOLDER\", PROJECT_ROOT_PATH)\n\n# if running in unittest, use the test profile\n_test_profile = [\"test\"] if \"tests.fixtures\" in sys.modules else []\n\nactive_profiles: list[str] = unique_list(\n    [\"default\"]\n    + [\n        item.strip()\n        for item in os.environ.get(\"PGPT_PROFILES\", \"\").split(\",\")\n        if item.strip()\n    ]\n    + _test_profile\n)\n\n\ndef merge_settings(settings: Iterable[dict[str, Any]]) -> dict[str, Any]:\n    return functools.reduce(deep_update, settings, {})\n\n\ndef load_settings_from_profile(profile: str) -> dict[str, Any]:\n    if profile == \"default\":\n        profile_file_name = \"settings.yaml\"\n    else:\n        profile_file_name = f\"settings-{profile}.yaml\"\n\n    path = Path(_settings_folder) / profile_file_name\n    with Path(path).open(\"r\") as f:\n        config = load_yaml_with_envvars(f)\n    if not isinstance(config, dict):\n        raise TypeError(f\"Config file has no top-level mapping: {path}\")\n    return config\n\n\ndef load_active_settings() -> dict[str, Any]:\n    \"\"\"Load active profiles and merge them.\"\"\"\n    logger.info(\"Starting application with profiles=%s\", active_profiles)\n    loaded_profiles = [\n        load_settings_from_profile(profile) for profile in active_profiles\n    ]\n    merged: dict[str, Any] = merge_settings(loaded_profiles)\n    return merged\n"
  },
  {
    "path": "private_gpt/settings/yaml.py",
    "content": "import os\nimport re\nimport typing\nfrom typing import Any, TextIO\n\nfrom yaml import SafeLoader\n\n_env_replace_matcher = re.compile(r\"\\$\\{(\\w|_)+:?.*}\")\n\n\n@typing.no_type_check  # pyaml does not have good hints, everything is Any\ndef load_yaml_with_envvars(\n    stream: TextIO, environ: dict[str, Any] = os.environ\n) -> dict[str, Any]:\n    \"\"\"Load yaml file with environment variable expansion.\n\n    The pattern ${VAR} or ${VAR:default} will be replaced with\n    the value of the environment variable.\n    \"\"\"\n    loader = SafeLoader(stream)\n\n    def load_env_var(_, node) -> str:\n        \"\"\"Extract the matched value, expand env variable, and replace the match.\"\"\"\n        value = str(node.value).removeprefix(\"${\").removesuffix(\"}\")\n        split = value.split(\":\", 1)\n        env_var = split[0]\n        value = environ.get(env_var)\n        default = None if len(split) == 1 else split[1]\n        if value is None and default is None:\n            raise ValueError(\n                f\"Environment variable {env_var} is not set and not default was provided\"\n            )\n        return value or default\n\n    loader.add_implicit_resolver(\"env_var_replacer\", _env_replace_matcher, None)\n    loader.add_constructor(\"env_var_replacer\", load_env_var)\n\n    try:\n        return loader.get_single_data()\n    finally:\n        loader.dispose()\n"
  },
  {
    "path": "private_gpt/ui/__init__.py",
    "content": "\"\"\"Gradio based UI.\"\"\"\n"
  },
  {
    "path": "private_gpt/ui/images.py",
    "content": "logo_svg = \"data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iODYxIiBoZWlnaHQ9Ijk4IiB2aWV3Qm94PSIwIDAgODYxIDk4IiBmaWxsPSJub25lIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciPgo8cGF0aCBkPSJNNDguMTM0NSAwLjE1NzkxMUMzNi44Mjk5IDEuMDM2NTQgMjYuMTIwNSA1LjU1MzI4IDE3LjYyNTYgMTMuMDI1QzkuMTMwNDYgMjAuNDk2NyAzLjMxMTcgMzAuNTE2OSAxLjA0OTUyIDQxLjU3MDVDLTEuMjEyNzMgNTIuNjIzOCAwLjIwNDQxOSA2NC4xMDk0IDUuMDg2MiA3NC4yOTA1QzkuOTY4NjggODQuNDcxNiAxOC4wNTAzIDkyLjc5NDMgMjguMTA5OCA5OEwzMy43MDI2IDgyLjU5MDdMMzUuNDU0MiA3Ny43NjU2QzI5LjgzODcgNzQuMTY5MiAyNS41NDQ0IDY4Ljg2MDcgMjMuMjE0IDYyLjYzNDRDMjAuODgyMiA1Ni40MDg2IDIwLjYzOSA0OS41OTkxIDIyLjUyMDQgNDMuMjI0M0MyNC40MDI5IDM2Ljg0OTUgMjguMzA5NiAzMS4yNTI1IDMzLjY1NjEgMjcuMjcwNkMzOS4wMDIgMjMuMjg4MyA0NS41MDAzIDIxLjEzNSA1Mi4xNzg5IDIxLjEzM0M1OC44NTczIDIxLjEzMDMgNjUuMzU3MSAyMy4yNzgzIDcwLjcwNjUgMjcuMjU1OEM3Ni4wNTU0IDMxLjIzNCA3OS45NjY0IDM2LjgyNzcgODEuODU0MyA0My4yMDA2QzgzLjc0MjkgNDkuNTczNiA4My41MDYyIDU2LjM4MzYgODEuMTgwMSA2Mi42MTE3Qzc4Ljg1NDUgNjguODM5NiA3NC41NjUgNzQuMTUxNCA2OC45NTI5IDc3Ljc1MjhMNzAuNzA3NCA4Mi41OTA3TDc2LjMwMDIgOTcuOTk3MUM4Ni45Nzg4IDkyLjQ3MDUgOTUuNDA4OCA4My40NDE5IDEwMC4xNjMgNzIuNDQwNEMxMDQuOTE3IDYxLjQzOTQgMTA1LjcwNCA0OS4xNDE3IDEwMi4zODkgMzcuNjNDOTkuMDc0NiAyNi4xMTc5IDkxLjg2MjcgMTYuMDk5MyA4MS45NzQzIDkuMjcwNzlDNzIuMDg2MSAyLjQ0MTkxIDYwLjEyOTEgLTAuNzc3MDg2IDQ4LjEyODYgMC4xNTg5MzRMNDguMTM0NSAwLjE1NzkxMVoiIGZpbGw9IiMxRjFGMjkiLz4KPGcgY2xpcC1wYXRoPSJ1cmwoI2NsaXAwXzVfMTkpIj4KPHBhdGggZD0iTTIyMC43NzIgMTIuNzUyNEgyNTIuNjM5QzI2Ny4yNjMgMTIuNzUyNCAyNzcuNzM5IDIxLjk2NzUgMjc3LjczOSAzNS40MDUyQzI3Ny43MzkgNDYuNzg3IDI2OS44ODEgNTUuMzUwOCAyNTguMzE0IDU3LjQxMDdMMjc4LjgzIDg1LjM3OTRIMjYxLjM3TDI0Mi4wNTQgNTcuOTUzM0gyMzUuNTA2Vjg1LjM3OTRIMjIwLjc3NEwyMjAuNzcyIDEyLjc1MjRaTTIzNS41MDQgMjYuMzAyOFY0NC40MDdIMjUyLjYzMkMyNTguOTYyIDQ0LjQwNyAyNjIuOTk5IDQwLjgyOTggMjYyLjk5OSAzNS40MTAyQzI2Mi45OTkgMjkuODgwOSAyNTguOTYyIDI2LjMwMjggMjUyLjYzMiAyNi4zMDI4SDIzNS41MDRaIiBmaWxsPSIjMUYxRjI5Ii8+CjxwYXRoIGQ9Ik0yOTUuMTc2IDg1LjM4NDRWMTIuNzUyNEgzMDkuOTA5Vjg1LjM4NDRIMjk1LjE3NloiIGZpbGw9IiMxRjFGMjkiLz4KPHBhdGggZD0iTTM2My43OTUgNjUuNzYzTDM4NS42MiAxMi43NTI0SDQwMS40NDRMMzcxLjIxNSA4NS4zODQ0SDM1Ni40ODNMMzI2LjI1NCAxMi43NTI0SDM0Mi4wNzhMMzYzLjc5NSA2NS43NjNaIiBmaWxsPSIjMUYxRjI5Ii8+CjxwYXRoIGQ9Ik00NDguMzI3IDcyLjA1MDRINDE1LjY5OEw0MTAuMjQxIDg1LjM4NDRIMzk0LjQxOEw0MjQuNjQ3IDEyLjc1MjRINDM5LjM3OUw0NjkuNjA4IDg1LjM4NDRINDUzLjc4M0w0NDguMzI3IDcyLjA1MDRaTTQ0Mi43NjEgNTguNUw0MzIuMDY2IDMyLjM3NDhMNDIxLjI2MiA1OC41SDQ0Mi43NjFaIiBmaWxsPSIjMUYxRjI5Ii8+CjxwYXRoIGQ9Ik00NjUuMjIxIDEyLjc1MjRINTMwLjU5MlYyNi4zMDI4SDUwNS4yNzVWODUuMzg0NEg0OTAuNTM5VjI2LjMwMjhINDY1LjIyMVYxMi43NTI0WiIgZmlsbD0iIzFGMUYyOSIvPgo8cGF0aCBkPSJNNTk1LjE5MyAxMi43NTI0VjI2LjMwMjhINTYyLjEyOFY0MS4xNTUxSDU5NS4xOTNWNTQuNzA2NUg1NjIuMTI4VjcxLjgzNEg1OTUuMTkzVjg1LjM4NDRINTQ3LjM5NVYxMi43NTI0SDU5NS4xOTNaIiBmaWxsPSIjMUYxRjI5Ii8+CjxwYXRoIGQ9Ik0xNjcuMjAxIDU3LjQxNThIMTg2LjUzNkMxOTAuODg2IDU3LjQ2NjIgMTk1LjE2OCA1Ni4zMzQ4IDE5OC45MTggNTQuMTQzN0MyMDIuMTc5IDUyLjIxOTkgMjA0Ljg2OSA0OS40NzM2IDIwNi43MTYgNDYuMTgzNUMyMDguNTYyIDQyLjg5MzQgMjA5LjUgMzkuMTc2NiAyMDkuNDMzIDM1LjQxMDJDMjA5LjQzMyAyMS45Njc1IDE5OC45NTggMTIuNzU3NCAxODQuMzM0IDEyLjc1NzRIMTUyLjQ2OFY4NS4zODk0SDE2Ny4yMDFWNTcuNDIwN1Y1Ny40MTU4Wk0xNjcuMjAxIDI2LjMwNThIMTg0LjMyOUMxOTAuNjU4IDI2LjMwNTggMTk0LjY5NiAyOS44ODQgMTk0LjY5NiAzNS40MTMzQzE5NC42OTYgNDAuODMyOSAxOTAuNjU4IDQ0LjQwOTkgMTg0LjMyOSA0NC40MDk5SDE2Ny4yMDFWMjYuMzA1OFoiIGZpbGw9IiMxRjFGMjkiLz4KPHBhdGggZD0iTTc5NC44MzUgMTIuNzUyNEg4NjAuMjA2VjI2LjMwMjhIODM0Ljg4OVY4NS4zODQ0SDgyMC4xNTZWMjYuMzAyOEg3OTQuODM1VjEyLjc1MjRaIiBmaWxsPSIjMUYxRjI5Ii8+CjxwYXRoIGQ9Ik03NDEuOTA3IDU3LjQxNThINzYxLjI0MUM3NjUuNTkyIDU3LjQ2NjEgNzY5Ljg3NCA1Ni4zMzQ3IDc3My42MjQgNTQuMTQzN0M3NzYuODg0IDUyLjIxOTkgNzc5LjU3NSA0OS40NzM2IDc4MS40MjEgNDYuMTgzNUM3ODMuMjY4IDQyLjg5MzQgNzg0LjIwNiAzOS4xNzY2IDc4NC4xMzkgMzUuNDEwMkM3ODQuMTM5IDIxLjk2NzUgNzczLjY2NCAxMi43NTc0IDc1OS4wMzkgMTIuNzU3NEg3MjcuMTc1Vjg1LjM4OTRINzQxLjkwN1Y1Ny40MjA3VjU3LjQxNThaTTc0MS45MDcgMjYuMzA1OEg3NTkuMDM1Qzc2NS4zNjUgMjYuMzA1OCA3NjkuNDAzIDI5Ljg4NCA3NjkuNDAzIDM1LjQxMzNDNzY5LjQwMyA0MC44MzI5IDc2NS4zNjUgNDQuNDA5OSA3NTkuMDM1IDQ0LjQwOTlINzQxLjkwN1YyNi4zMDU4WiIgZmlsbD0iIzFGMUYyOSIvPgo8cGF0aCBkPSJNNjgxLjA2OSA0Ny4wMTE1VjU5LjAxMjVINjk1LjM3OVY3MS42NzE5QzY5Mi41MjYgNzMuNDM2OCA2ODguNTI0IDc0LjMzMTkgNjgzLjQ3NyA3NC4zMzE5QzY2Ni4wMDMgNzQuMzMxOSA2NTguMDQ1IDYxLjgxMjQgNjU4LjA0NSA1MC4xOEM2NTguMDQ1IDMzLjk2MDUgNjcxLjAwOCAyNS40NzMyIDY4My44MTIgMjUuNDczMkM2OTAuNDI1IDI1LjQ2MjggNjk2LjkwOSAyNy4yODA0IDcwMi41NDEgMzAuNzIyNkw3MDMuMTU3IDMxLjEyNTRMNzA1Ljk1OCAxOC4xODZMNzA1LjY2MyAxNy45OTc3QzcwMC4wNDYgMTQuNDAwNCA2OTEuMjkxIDEyLjI1OSA2ODIuMjUxIDEyLjI1OUM2NjMuMTk3IDEyLjI1OSA2NDIuOTQ5IDI1LjM5NjcgNjQyLjk0OSA0OS43NDVDNjQyLjk0OSA2MS4wODQ1IDY0Ny4yOTMgNzAuNzE3NCA2NTUuNTExIDc3LjYwMjlDNjYzLjIyNCA4My44MjQ1IDY3Mi44NzQgODcuMTg5IDY4Mi44MDkgODcuMTIwMUM2OTQuMzYzIDg3LjEyMDEgNzAzLjA2MSA4NC42NDk1IDcwOS40MDIgNzkuNTY5Mkw3MDkuNTg5IDc5LjQxODFWNDcuMDExNUg2ODEuMDY5WiIgZmlsbD0iIzFGMUYyOSIvPgo8L2c+CjxkZWZzPgo8Y2xpcFBhdGggaWQ9ImNsaXAwXzVfMTkiPgo8cmVjdCB3aWR0aD0iNzA3Ljc3OCIgaGVpZ2h0PSI3NC44NjExIiBmaWxsPSJ3aGl0ZSIgdHJhbnNmb3JtPSJ0cmFuc2xhdGUoMTUyLjQ0NCAxMi4yNSkiLz4KPC9jbGlwUGF0aD4KPC9kZWZzPgo8L3N2Zz4K\"\n"
  },
  {
    "path": "private_gpt/ui/ui.py",
    "content": "\"\"\"This file should be imported if and only if you want to run the UI locally.\"\"\"\n\nimport base64\nimport logging\nimport time\nfrom collections.abc import Iterable\nfrom enum import Enum\nfrom pathlib import Path\nfrom typing import Any\n\nimport gradio as gr  # type: ignore\nfrom fastapi import FastAPI\nfrom gradio.themes.utils.colors import slate  # type: ignore\nfrom injector import inject, singleton\nfrom llama_index.core.llms import ChatMessage, ChatResponse, MessageRole\nfrom llama_index.core.types import TokenGen\nfrom pydantic import BaseModel\n\nfrom private_gpt.constants import PROJECT_ROOT_PATH\nfrom private_gpt.di import global_injector\nfrom private_gpt.open_ai.extensions.context_filter import ContextFilter\nfrom private_gpt.server.chat.chat_service import ChatService, CompletionGen\nfrom private_gpt.server.chunks.chunks_service import Chunk, ChunksService\nfrom private_gpt.server.ingest.ingest_service import IngestService\nfrom private_gpt.server.recipes.summarize.summarize_service import SummarizeService\nfrom private_gpt.settings.settings import settings\nfrom private_gpt.ui.images import logo_svg\n\nlogger = logging.getLogger(__name__)\n\nTHIS_DIRECTORY_RELATIVE = Path(__file__).parent.relative_to(PROJECT_ROOT_PATH)\n# Should be \"private_gpt/ui/avatar-bot.ico\"\nAVATAR_BOT = THIS_DIRECTORY_RELATIVE / \"avatar-bot.ico\"\n\nUI_TAB_TITLE = \"My Private GPT\"\n\nSOURCES_SEPARATOR = \"<hr>Sources: \\n\"\n\n\nclass Modes(str, Enum):\n    RAG_MODE = \"RAG\"\n    SEARCH_MODE = \"Search\"\n    BASIC_CHAT_MODE = \"Basic\"\n    SUMMARIZE_MODE = \"Summarize\"\n\n\nMODES: list[Modes] = [\n    Modes.RAG_MODE,\n    Modes.SEARCH_MODE,\n    Modes.BASIC_CHAT_MODE,\n    Modes.SUMMARIZE_MODE,\n]\n\n\nclass Source(BaseModel):\n    file: str\n    page: str\n    text: str\n\n    class Config:\n        frozen = True\n\n    @staticmethod\n    def curate_sources(sources: list[Chunk]) -> list[\"Source\"]:\n        curated_sources = []\n\n        for chunk in sources:\n            doc_metadata = chunk.document.doc_metadata\n\n            file_name = doc_metadata.get(\"file_name\", \"-\") if doc_metadata else \"-\"\n            page_label = doc_metadata.get(\"page_label\", \"-\") if doc_metadata else \"-\"\n\n            source = Source(file=file_name, page=page_label, text=chunk.text)\n            curated_sources.append(source)\n            curated_sources = list(\n                dict.fromkeys(curated_sources).keys()\n            )  # Unique sources only\n\n        return curated_sources\n\n\n@singleton\nclass PrivateGptUi:\n    @inject\n    def __init__(\n        self,\n        ingest_service: IngestService,\n        chat_service: ChatService,\n        chunks_service: ChunksService,\n        summarizeService: SummarizeService,\n    ) -> None:\n        self._ingest_service = ingest_service\n        self._chat_service = chat_service\n        self._chunks_service = chunks_service\n        self._summarize_service = summarizeService\n\n        # Cache the UI blocks\n        self._ui_block = None\n\n        self._selected_filename = None\n\n        # Initialize system prompt based on default mode\n        default_mode_map = {mode.value: mode for mode in Modes}\n        self._default_mode = default_mode_map.get(\n            settings().ui.default_mode, Modes.RAG_MODE\n        )\n        self._system_prompt = self._get_default_system_prompt(self._default_mode)\n\n    def _chat(\n        self, message: str, history: list[list[str]], mode: Modes, *_: Any\n    ) -> Any:\n        def yield_deltas(completion_gen: CompletionGen) -> Iterable[str]:\n            full_response: str = \"\"\n            stream = completion_gen.response\n            for delta in stream:\n                if isinstance(delta, str):\n                    full_response += str(delta)\n                elif isinstance(delta, ChatResponse):\n                    full_response += delta.delta or \"\"\n                yield full_response\n                time.sleep(0.02)\n\n            if completion_gen.sources:\n                full_response += SOURCES_SEPARATOR\n                cur_sources = Source.curate_sources(completion_gen.sources)\n                sources_text = \"\\n\\n\\n\"\n                used_files = set()\n                for index, source in enumerate(cur_sources, start=1):\n                    if f\"{source.file}-{source.page}\" not in used_files:\n                        sources_text = (\n                            sources_text\n                            + f\"{index}. {source.file} (page {source.page}) \\n\\n\"\n                        )\n                        used_files.add(f\"{source.file}-{source.page}\")\n                sources_text += \"<hr>\\n\\n\"\n                full_response += sources_text\n            yield full_response\n\n        def yield_tokens(token_gen: TokenGen) -> Iterable[str]:\n            full_response: str = \"\"\n            for token in token_gen:\n                full_response += str(token)\n                yield full_response\n\n        def build_history() -> list[ChatMessage]:\n            history_messages: list[ChatMessage] = []\n\n            for interaction in history:\n                history_messages.append(\n                    ChatMessage(content=interaction[0], role=MessageRole.USER)\n                )\n                if len(interaction) > 1 and interaction[1] is not None:\n                    history_messages.append(\n                        ChatMessage(\n                            # Remove from history content the Sources information\n                            content=interaction[1].split(SOURCES_SEPARATOR)[0],\n                            role=MessageRole.ASSISTANT,\n                        )\n                    )\n\n            # max 20 messages to try to avoid context overflow\n            return history_messages[:20]\n\n        new_message = ChatMessage(content=message, role=MessageRole.USER)\n        all_messages = [*build_history(), new_message]\n        # If a system prompt is set, add it as a system message\n        if self._system_prompt:\n            all_messages.insert(\n                0,\n                ChatMessage(\n                    content=self._system_prompt,\n                    role=MessageRole.SYSTEM,\n                ),\n            )\n        match mode:\n            case Modes.RAG_MODE:\n                # Use only the selected file for the query\n                context_filter = None\n                if self._selected_filename is not None:\n                    docs_ids = []\n                    for ingested_document in self._ingest_service.list_ingested():\n                        if (\n                            ingested_document.doc_metadata[\"file_name\"]\n                            == self._selected_filename\n                        ):\n                            docs_ids.append(ingested_document.doc_id)\n                    context_filter = ContextFilter(docs_ids=docs_ids)\n\n                query_stream = self._chat_service.stream_chat(\n                    messages=all_messages,\n                    use_context=True,\n                    context_filter=context_filter,\n                )\n                yield from yield_deltas(query_stream)\n            case Modes.BASIC_CHAT_MODE:\n                llm_stream = self._chat_service.stream_chat(\n                    messages=all_messages,\n                    use_context=False,\n                )\n                yield from yield_deltas(llm_stream)\n\n            case Modes.SEARCH_MODE:\n                response = self._chunks_service.retrieve_relevant(\n                    text=message, limit=4, prev_next_chunks=0\n                )\n\n                sources = Source.curate_sources(response)\n\n                yield \"\\n\\n\\n\".join(\n                    f\"{index}. **{source.file} \"\n                    f\"(page {source.page})**\\n \"\n                    f\"{source.text}\"\n                    for index, source in enumerate(sources, start=1)\n                )\n            case Modes.SUMMARIZE_MODE:\n                # Summarize the given message, optionally using selected files\n                context_filter = None\n                if self._selected_filename:\n                    docs_ids = []\n                    for ingested_document in self._ingest_service.list_ingested():\n                        if (\n                            ingested_document.doc_metadata[\"file_name\"]\n                            == self._selected_filename\n                        ):\n                            docs_ids.append(ingested_document.doc_id)\n                    context_filter = ContextFilter(docs_ids=docs_ids)\n\n                summary_stream = self._summarize_service.stream_summarize(\n                    use_context=True,\n                    context_filter=context_filter,\n                    instructions=message,\n                )\n                yield from yield_tokens(summary_stream)\n\n    # On initialization and on mode change, this function set the system prompt\n    # to the default prompt based on the mode (and user settings).\n    @staticmethod\n    def _get_default_system_prompt(mode: Modes) -> str:\n        p = \"\"\n        match mode:\n            # For query chat mode, obtain default system prompt from settings\n            case Modes.RAG_MODE:\n                p = settings().ui.default_query_system_prompt\n            # For chat mode, obtain default system prompt from settings\n            case Modes.BASIC_CHAT_MODE:\n                p = settings().ui.default_chat_system_prompt\n            # For summarization mode, obtain default system prompt from settings\n            case Modes.SUMMARIZE_MODE:\n                p = settings().ui.default_summarization_system_prompt\n            # For any other mode, clear the system prompt\n            case _:\n                p = \"\"\n        return p\n\n    @staticmethod\n    def _get_default_mode_explanation(mode: Modes) -> str:\n        match mode:\n            case Modes.RAG_MODE:\n                return \"Get contextualized answers from selected files.\"\n            case Modes.SEARCH_MODE:\n                return \"Find relevant chunks of text in selected files.\"\n            case Modes.BASIC_CHAT_MODE:\n                return \"Chat with the LLM using its training data. Files are ignored.\"\n            case Modes.SUMMARIZE_MODE:\n                return \"Generate a summary of the selected files. Prompt to customize the result.\"\n            case _:\n                return \"\"\n\n    def _set_system_prompt(self, system_prompt_input: str) -> None:\n        logger.info(f\"Setting system prompt to: {system_prompt_input}\")\n        self._system_prompt = system_prompt_input\n\n    def _set_explanatation_mode(self, explanation_mode: str) -> None:\n        self._explanation_mode = explanation_mode\n\n    def _set_current_mode(self, mode: Modes) -> Any:\n        self.mode = mode\n        self._set_system_prompt(self._get_default_system_prompt(mode))\n        self._set_explanatation_mode(self._get_default_mode_explanation(mode))\n        interactive = self._system_prompt is not None\n        return [\n            gr.update(placeholder=self._system_prompt, interactive=interactive),\n            gr.update(value=self._explanation_mode),\n        ]\n\n    def _list_ingested_files(self) -> list[list[str]]:\n        files = set()\n        for ingested_document in self._ingest_service.list_ingested():\n            if ingested_document.doc_metadata is None:\n                # Skipping documents without metadata\n                continue\n            file_name = ingested_document.doc_metadata.get(\n                \"file_name\", \"[FILE NAME MISSING]\"\n            )\n            files.add(file_name)\n        return [[row] for row in files]\n\n    def _upload_file(self, files: list[str]) -> None:\n        logger.debug(\"Loading count=%s files\", len(files))\n        paths = [Path(file) for file in files]\n\n        # remove all existing Documents with name identical to a new file upload:\n        file_names = [path.name for path in paths]\n        doc_ids_to_delete = []\n        for ingested_document in self._ingest_service.list_ingested():\n            if (\n                ingested_document.doc_metadata\n                and ingested_document.doc_metadata[\"file_name\"] in file_names\n            ):\n                doc_ids_to_delete.append(ingested_document.doc_id)\n        if len(doc_ids_to_delete) > 0:\n            logger.info(\n                \"Uploading file(s) which were already ingested: %s document(s) will be replaced.\",\n                len(doc_ids_to_delete),\n            )\n            for doc_id in doc_ids_to_delete:\n                self._ingest_service.delete(doc_id)\n\n        self._ingest_service.bulk_ingest([(str(path.name), path) for path in paths])\n\n    def _delete_all_files(self) -> Any:\n        ingested_files = self._ingest_service.list_ingested()\n        logger.debug(\"Deleting count=%s files\", len(ingested_files))\n        for ingested_document in ingested_files:\n            self._ingest_service.delete(ingested_document.doc_id)\n        return [\n            gr.List(self._list_ingested_files()),\n            gr.components.Button(interactive=False),\n            gr.components.Button(interactive=False),\n            gr.components.Textbox(\"All files\"),\n        ]\n\n    def _delete_selected_file(self) -> Any:\n        logger.debug(\"Deleting selected %s\", self._selected_filename)\n        # Note: keep looping for pdf's (each page became a Document)\n        for ingested_document in self._ingest_service.list_ingested():\n            if (\n                ingested_document.doc_metadata\n                and ingested_document.doc_metadata[\"file_name\"]\n                == self._selected_filename\n            ):\n                self._ingest_service.delete(ingested_document.doc_id)\n        return [\n            gr.List(self._list_ingested_files()),\n            gr.components.Button(interactive=False),\n            gr.components.Button(interactive=False),\n            gr.components.Textbox(\"All files\"),\n        ]\n\n    def _deselect_selected_file(self) -> Any:\n        self._selected_filename = None\n        return [\n            gr.components.Button(interactive=False),\n            gr.components.Button(interactive=False),\n            gr.components.Textbox(\"All files\"),\n        ]\n\n    def _selected_a_file(self, select_data: gr.SelectData) -> Any:\n        self._selected_filename = select_data.value\n        return [\n            gr.components.Button(interactive=True),\n            gr.components.Button(interactive=True),\n            gr.components.Textbox(self._selected_filename),\n        ]\n\n    def _build_ui_blocks(self) -> gr.Blocks:\n        logger.debug(\"Creating the UI blocks\")\n        with gr.Blocks(\n            title=UI_TAB_TITLE,\n            theme=gr.themes.Soft(primary_hue=slate),\n            css=\".logo { \"\n            \"display:flex;\"\n            \"background-color: #C7BAFF;\"\n            \"height: 80px;\"\n            \"border-radius: 8px;\"\n            \"align-content: center;\"\n            \"justify-content: center;\"\n            \"align-items: center;\"\n            \"}\"\n            \".logo img { height: 25% }\"\n            \".contain { display: flex !important; flex-direction: column !important; }\"\n            \"#component-0, #component-3, #component-10, #component-8  { height: 100% !important; }\"\n            \"#chatbot { flex-grow: 1 !important; overflow: auto !important;}\"\n            \"#col { height: calc(100vh - 112px - 16px) !important; }\"\n            \"hr { margin-top: 1em; margin-bottom: 1em; border: 0; border-top: 1px solid #FFF; }\"\n            \".avatar-image { background-color: antiquewhite; border-radius: 2px; }\"\n            \".footer { text-align: center; margin-top: 20px; font-size: 14px; display: flex; align-items: center; justify-content: center; }\"\n            \".footer-zylon-link { display:flex; margin-left: 5px; text-decoration: auto; color: var(--body-text-color); }\"\n            \".footer-zylon-link:hover { color: #C7BAFF; }\"\n            \".footer-zylon-ico { height: 20px; margin-left: 5px; background-color: antiquewhite; border-radius: 2px; }\",\n        ) as blocks:\n            with gr.Row():\n                gr.HTML(f\"<div class='logo'/><img src={logo_svg} alt=PrivateGPT></div\")\n\n            with gr.Row(equal_height=False):\n                with gr.Column(scale=3):\n                    default_mode = self._default_mode\n                    mode = gr.Radio(\n                        [mode.value for mode in MODES],\n                        label=\"Mode\",\n                        value=default_mode,\n                    )\n                    explanation_mode = gr.Textbox(\n                        placeholder=self._get_default_mode_explanation(default_mode),\n                        show_label=False,\n                        max_lines=3,\n                        interactive=False,\n                    )\n                    upload_button = gr.components.UploadButton(\n                        \"Upload File(s)\",\n                        type=\"filepath\",\n                        file_count=\"multiple\",\n                        size=\"sm\",\n                    )\n                    ingested_dataset = gr.List(\n                        self._list_ingested_files,\n                        headers=[\"File name\"],\n                        label=\"Ingested Files\",\n                        height=235,\n                        interactive=False,\n                        render=False,  # Rendered under the button\n                    )\n                    upload_button.upload(\n                        self._upload_file,\n                        inputs=upload_button,\n                        outputs=ingested_dataset,\n                    )\n                    ingested_dataset.change(\n                        self._list_ingested_files,\n                        outputs=ingested_dataset,\n                    )\n                    ingested_dataset.render()\n                    deselect_file_button = gr.components.Button(\n                        \"De-select selected file\", size=\"sm\", interactive=False\n                    )\n                    selected_text = gr.components.Textbox(\n                        \"All files\", label=\"Selected for Query or Deletion\", max_lines=1\n                    )\n                    delete_file_button = gr.components.Button(\n                        \"🗑️ Delete selected file\",\n                        size=\"sm\",\n                        visible=settings().ui.delete_file_button_enabled,\n                        interactive=False,\n                    )\n                    delete_files_button = gr.components.Button(\n                        \"⚠️ Delete ALL files\",\n                        size=\"sm\",\n                        visible=settings().ui.delete_all_files_button_enabled,\n                    )\n                    deselect_file_button.click(\n                        self._deselect_selected_file,\n                        outputs=[\n                            delete_file_button,\n                            deselect_file_button,\n                            selected_text,\n                        ],\n                    )\n                    ingested_dataset.select(\n                        fn=self._selected_a_file,\n                        outputs=[\n                            delete_file_button,\n                            deselect_file_button,\n                            selected_text,\n                        ],\n                    )\n                    delete_file_button.click(\n                        self._delete_selected_file,\n                        outputs=[\n                            ingested_dataset,\n                            delete_file_button,\n                            deselect_file_button,\n                            selected_text,\n                        ],\n                    )\n                    delete_files_button.click(\n                        self._delete_all_files,\n                        outputs=[\n                            ingested_dataset,\n                            delete_file_button,\n                            deselect_file_button,\n                            selected_text,\n                        ],\n                    )\n                    system_prompt_input = gr.Textbox(\n                        placeholder=self._system_prompt,\n                        label=\"System Prompt\",\n                        lines=2,\n                        interactive=True,\n                        render=False,\n                    )\n                    # When mode changes, set default system prompt, and other stuffs\n                    mode.change(\n                        self._set_current_mode,\n                        inputs=mode,\n                        outputs=[system_prompt_input, explanation_mode],\n                    )\n                    # On blur, set system prompt to use in queries\n                    system_prompt_input.blur(\n                        self._set_system_prompt,\n                        inputs=system_prompt_input,\n                    )\n\n                    def get_model_label() -> str | None:\n                        \"\"\"Get model label from llm mode setting YAML.\n\n                        Raises:\n                            ValueError: If an invalid 'llm_mode' is encountered.\n\n                        Returns:\n                            str: The corresponding model label.\n                        \"\"\"\n                        # Get model label from llm mode setting YAML\n                        # Labels: local, openai, openailike, sagemaker, mock, ollama\n                        config_settings = settings()\n                        if config_settings is None:\n                            raise ValueError(\"Settings are not configured.\")\n\n                        # Get llm_mode from settings\n                        llm_mode = config_settings.llm.mode\n\n                        # Mapping of 'llm_mode' to corresponding model labels\n                        model_mapping = {\n                            \"llamacpp\": config_settings.llamacpp.llm_hf_model_file,\n                            \"openai\": config_settings.openai.model,\n                            \"openailike\": config_settings.openai.model,\n                            \"azopenai\": config_settings.azopenai.llm_model,\n                            \"sagemaker\": config_settings.sagemaker.llm_endpoint_name,\n                            \"mock\": llm_mode,\n                            \"ollama\": config_settings.ollama.llm_model,\n                            \"gemini\": config_settings.gemini.model,\n                        }\n\n                        if llm_mode not in model_mapping:\n                            print(f\"Invalid 'llm mode': {llm_mode}\")\n                            return None\n\n                        return model_mapping[llm_mode]\n\n                with gr.Column(scale=7, elem_id=\"col\"):\n                    # Determine the model label based on the value of PGPT_PROFILES\n                    model_label = get_model_label()\n                    if model_label is not None:\n                        label_text = (\n                            f\"LLM: {settings().llm.mode} | Model: {model_label}\"\n                        )\n                    else:\n                        label_text = f\"LLM: {settings().llm.mode}\"\n\n                    _ = gr.ChatInterface(\n                        self._chat,\n                        chatbot=gr.Chatbot(\n                            label=label_text,\n                            show_copy_button=True,\n                            elem_id=\"chatbot\",\n                            render=False,\n                            avatar_images=(\n                                None,\n                                AVATAR_BOT,\n                            ),\n                        ),\n                        additional_inputs=[mode, upload_button, system_prompt_input],\n                    )\n\n            with gr.Row():\n                avatar_byte = AVATAR_BOT.read_bytes()\n                f_base64 = f\"data:image/png;base64,{base64.b64encode(avatar_byte).decode('utf-8')}\"\n                gr.HTML(\n                    f\"<div class='footer'><a class='footer-zylon-link' href='https://zylon.ai/'>Maintained by Zylon <img class='footer-zylon-ico' src='{f_base64}' alt=Zylon></a></div>\"\n                )\n\n        return blocks\n\n    def get_ui_blocks(self) -> gr.Blocks:\n        if self._ui_block is None:\n            self._ui_block = self._build_ui_blocks()\n        return self._ui_block\n\n    def mount_in_app(self, app: FastAPI, path: str) -> None:\n        blocks = self.get_ui_blocks()\n        blocks.queue()\n        logger.info(\"Mounting the gradio UI, at path=%s\", path)\n        gr.mount_gradio_app(app, blocks, path=path, favicon_path=AVATAR_BOT)\n\n\nif __name__ == \"__main__\":\n    ui = global_injector.get(PrivateGptUi)\n    _blocks = ui.get_ui_blocks()\n    _blocks.queue()\n    _blocks.launch(debug=False, show_api=False)\n"
  },
  {
    "path": "private_gpt/utils/__init__.py",
    "content": "\"\"\"general utils.\"\"\"\n"
  },
  {
    "path": "private_gpt/utils/eta.py",
    "content": "import datetime\nimport logging\nimport math\nimport time\nfrom collections import deque\nfrom typing import Any\n\nlogger = logging.getLogger(__name__)\n\n\ndef human_time(*args: Any, **kwargs: Any) -> str:\n    def timedelta_total_seconds(timedelta: datetime.timedelta) -> float:\n        return (\n            timedelta.microseconds\n            + 0.0\n            + (timedelta.seconds + timedelta.days * 24 * 3600) * 10**6\n        ) / 10**6\n\n    secs = float(timedelta_total_seconds(datetime.timedelta(*args, **kwargs)))\n    # We want (ms) precision below 2 seconds\n    if secs < 2:\n        return f\"{secs * 1000}ms\"\n    units = [(\"y\", 86400 * 365), (\"d\", 86400), (\"h\", 3600), (\"m\", 60), (\"s\", 1)]\n    parts = []\n    for unit, mul in units:\n        if secs / mul >= 1 or mul == 1:\n            if mul > 1:\n                n = int(math.floor(secs / mul))\n                secs -= n * mul\n            else:\n                # >2s we drop the (ms) component.\n                n = int(secs)\n            if n:\n                parts.append(f\"{n}{unit}\")\n    return \" \".join(parts)\n\n\ndef eta(iterator: list[Any]) -> Any:\n    \"\"\"Report an ETA after 30s and every 60s thereafter.\"\"\"\n    total = len(iterator)\n    _eta = ETA(total)\n    _eta.needReport(30)\n    for processed, data in enumerate(iterator, start=1):\n        yield data\n        _eta.update(processed)\n        if _eta.needReport(60):\n            logger.info(f\"{processed}/{total} - ETA {_eta.human_time()}\")\n\n\nclass ETA:\n    \"\"\"Predict how long something will take to complete.\"\"\"\n\n    def __init__(self, total: int):\n        self.total: int = total  # Total expected records.\n        self.rate: float = 0.0  # per second\n        self._timing_data: deque[tuple[float, int]] = deque(maxlen=100)\n        self.secondsLeft: float = 0.0\n        self.nexttime: float = 0.0\n\n    def human_time(self) -> str:\n        if self._calc():\n            return f\"{human_time(seconds=self.secondsLeft)} @ {int(self.rate * 60)}/min\"\n        return \"(computing)\"\n\n    def update(self, count: int) -> None:\n        # count should be in the range 0 to self.total\n        assert count > 0\n        assert count <= self.total\n        self._timing_data.append((time.time(), count))  # (X,Y) for pearson\n\n    def needReport(self, whenSecs: int) -> bool:\n        now = time.time()\n        if now > self.nexttime:\n            self.nexttime = now + whenSecs\n            return True\n        return False\n\n    def _calc(self) -> bool:\n        # A sample before a prediction.   Need two points to compute slope!\n        if len(self._timing_data) < 3:\n            return False\n\n        # http://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient\n        # Calculate means and standard deviations.\n        samples = len(self._timing_data)\n        # column wise sum of the timing tuples to compute their mean.\n        mean_x, mean_y = (\n            sum(i) / samples for i in zip(*self._timing_data, strict=False)\n        )\n        std_x = math.sqrt(\n            sum(pow(i[0] - mean_x, 2) for i in self._timing_data) / (samples - 1)\n        )\n        std_y = math.sqrt(\n            sum(pow(i[1] - mean_y, 2) for i in self._timing_data) / (samples - 1)\n        )\n\n        # Calculate coefficient.\n        sum_xy, sum_sq_v_x, sum_sq_v_y = 0.0, 0.0, 0\n        for x, y in self._timing_data:\n            x -= mean_x\n            y -= mean_y\n            sum_xy += x * y\n            sum_sq_v_x += pow(x, 2)\n            sum_sq_v_y += pow(y, 2)\n        pearson_r = sum_xy / math.sqrt(sum_sq_v_x * sum_sq_v_y)\n\n        # Calculate regression line.\n        # y = mx + b where m is the slope and b is the y-intercept.\n        m = self.rate = pearson_r * (std_y / std_x)\n        y = self.total\n        b = mean_y - m * mean_x\n        x = (y - b) / m\n\n        # Calculate fitted line (transformed/shifted regression line horizontally).\n        fitted_b = self._timing_data[-1][1] - (m * self._timing_data[-1][0])\n        fitted_x = (y - fitted_b) / m\n        _, count = self._timing_data[-1]  # adjust last data point progress count\n        adjusted_x = ((fitted_x - x) * (count / self.total)) + x\n        eta_epoch = adjusted_x\n\n        self.secondsLeft = max([eta_epoch - time.time(), 0])\n        return True\n"
  },
  {
    "path": "private_gpt/utils/ollama.py",
    "content": "import logging\nfrom collections import deque\nfrom collections.abc import Iterator, Mapping\nfrom typing import Any\n\nfrom httpx import ConnectError\nfrom tqdm import tqdm  # type: ignore\n\nfrom private_gpt.utils.retry import retry\n\ntry:\n    from ollama import Client, ResponseError  # type: ignore\nexcept ImportError as e:\n    raise ImportError(\n        \"Ollama dependencies not found, install with `poetry install --extras llms-ollama or embeddings-ollama`\"\n    ) from e\n\nlogger = logging.getLogger(__name__)\n\n_MAX_RETRIES = 5\n_JITTER = (3.0, 10.0)\n\n\n@retry(\n    is_async=False,\n    exceptions=(ConnectError, ResponseError),\n    tries=_MAX_RETRIES,\n    jitter=_JITTER,\n    logger=logger,\n)\ndef check_connection(client: Client) -> bool:\n    try:\n        client.list()\n        return True\n    except (ConnectError, ResponseError) as e:\n        raise e\n    except Exception as e:\n        logger.error(f\"Failed to connect to Ollama: {type(e).__name__}: {e!s}\")\n        return False\n\n\ndef process_streaming(generator: Iterator[Mapping[str, Any]]) -> None:\n    progress_bars = {}\n    queue = deque()  # type: ignore\n\n    def create_progress_bar(dgt: str, total: int) -> Any:\n        return tqdm(\n            total=total, desc=f\"Pulling model {dgt[7:17]}...\", unit=\"B\", unit_scale=True\n        )\n\n    current_digest = None\n\n    for chunk in generator:\n        digest = chunk.get(\"digest\")\n        completed_size = chunk.get(\"completed\", 0)\n        total_size = chunk.get(\"total\")\n\n        if digest and total_size is not None:\n            if digest not in progress_bars and completed_size > 0:\n                progress_bars[digest] = create_progress_bar(digest, total=total_size)\n                if current_digest is None:\n                    current_digest = digest\n                else:\n                    queue.append(digest)\n\n            if digest in progress_bars:\n                progress_bar = progress_bars[digest]\n                progress = completed_size - progress_bar.n\n                if completed_size > 0 and total_size >= progress != progress_bar.n:\n                    if digest == current_digest:\n                        progress_bar.update(progress)\n                        if progress_bar.n >= total_size:\n                            progress_bar.close()\n                            current_digest = queue.popleft() if queue else None\n                    else:\n                        # Store progress for later update\n                        progress_bars[digest].total = total_size\n                        progress_bars[digest].n = completed_size\n\n    # Close any remaining progress bars at the end\n    for progress_bar in progress_bars.values():\n        progress_bar.close()\n\n\ndef pull_model(client: Client, model_name: str, raise_error: bool = True) -> None:\n    try:\n        installed_models = [model[\"name\"] for model in client.list().get(\"models\", {})]\n        if model_name not in installed_models:\n            logger.info(f\"Pulling model {model_name}. Please wait...\")\n            process_streaming(client.pull(model_name, stream=True))\n            logger.info(f\"Model {model_name} pulled successfully\")\n    except Exception as e:\n        logger.error(f\"Failed to pull model {model_name}: {e!s}\")\n        if raise_error:\n            raise e\n"
  },
  {
    "path": "private_gpt/utils/retry.py",
    "content": "import logging\nfrom collections.abc import Callable\nfrom typing import Any\n\nfrom retry_async import retry as retry_untyped  # type: ignore\n\nretry_logger = logging.getLogger(__name__)\n\n\ndef retry(\n    exceptions: Any = Exception,\n    *,\n    is_async: bool = False,\n    tries: int = -1,\n    delay: float = 0,\n    max_delay: float | None = None,\n    backoff: float = 1,\n    jitter: float | tuple[float, float] = 0,\n    logger: logging.Logger = retry_logger,\n) -> Callable[..., Any]:\n    wrapped = retry_untyped(\n        exceptions=exceptions,\n        is_async=is_async,\n        tries=tries,\n        delay=delay,\n        max_delay=max_delay,\n        backoff=backoff,\n        jitter=jitter,\n        logger=logger,\n    )\n    return wrapped  # type: ignore\n"
  },
  {
    "path": "private_gpt/utils/typing.py",
    "content": "from typing import TypeVar\n\nT = TypeVar(\"T\")\nK = TypeVar(\"K\")\nV = TypeVar(\"V\")\n"
  },
  {
    "path": "pyproject.toml",
    "content": "[tool.poetry]\nname = \"private-gpt\"\nversion = \"0.6.2\"\ndescription = \"Private GPT\"\nauthors = [\"Zylon <hi@zylon.ai>\"]\n\n[tool.poetry.dependencies]\npython = \">=3.11,<3.12\"\n# PrivateGPT\nfastapi = { extras = [\"all\"], version = \"^0.115.0\" }\npython-multipart = \"^0.0.10\"\ninjector = \"^0.22.0\"\npyyaml = \"^6.0.2\"\nwatchdog = \"^4.0.1\"\ntransformers = \"^4.44.2\"\ndocx2txt = \"^0.8\"\ncryptography = \"^3.1\"\n# LlamaIndex core libs\nllama-index-core = \">=0.11.2,<0.12.0\"\nllama-index-readers-file = \"*\"\n# Optional LlamaIndex integration libs\nllama-index-llms-llama-cpp = {version = \"*\", optional = true}\nllama-index-llms-openai = {version =\"*\", optional = true}\nllama-index-llms-openai-like = {version =\"*\", optional = true}\nllama-index-llms-ollama = {version =\"*\", optional = true}\nllama-index-llms-azure-openai = {version =\"*\", optional = true}\nllama-index-llms-gemini = {version =\"*\", optional = true}\nllama-index-embeddings-ollama = {version =\"*\", optional = true}\nllama-index-embeddings-huggingface = {version =\"*\", optional = true}\nllama-index-embeddings-openai = {version =\"*\", optional = true}\nllama-index-embeddings-azure-openai = {version =\"*\", optional = true}\nllama-index-embeddings-gemini = {version =\"*\", optional = true}\nllama-index-embeddings-mistralai = {version =\"*\", optional = true}\nllama-index-vector-stores-qdrant = {version =\"*\", optional = true}\nllama-index-vector-stores-milvus = {version =\"*\", optional = true}\nllama-index-vector-stores-chroma = {version =\"*\", optional = true}\nllama-index-vector-stores-postgres = {version =\"*\", optional = true}\nllama-index-vector-stores-clickhouse = {version =\"*\", optional = true}\nllama-index-storage-docstore-postgres = {version =\"*\", optional = true}\nllama-index-storage-index-store-postgres = {version =\"*\", optional = true}\n# Postgres\npsycopg2-binary = {version =\"^2.9.9\", optional = true}\nasyncpg = {version=\"^0.29.0\", optional = true}\n\n# ClickHouse\nclickhouse-connect = {version = \"^0.7.19\", optional = true}\n\n# Optional Sagemaker dependency\nboto3 = {version =\"^1.35.26\", optional = true}\n\n# Optional Reranker dependencies\ntorch = {version =\"^2.4.1\", optional = true}\nsentence-transformers = {version =\"^3.1.1\", optional = true}\n\n# Optional UI\ngradio = {version =\"^4.44.0\", optional = true}\nffmpy = {version =\"^0.4.0\", optional = true}\n\n# Optional HF Transformers\neinops = {version = \"^0.8.0\", optional = true}\nretry-async = \"^0.1.4\"\n\n[tool.poetry.extras]\nui = [\"gradio\", \"ffmpy\"]\nllms-llama-cpp = [\"llama-index-llms-llama-cpp\"]\nllms-openai = [\"llama-index-llms-openai\"]\nllms-openai-like = [\"llama-index-llms-openai-like\"]\nllms-ollama = [\"llama-index-llms-ollama\"]\nllms-sagemaker = [\"boto3\"]\nllms-azopenai = [\"llama-index-llms-azure-openai\"]\nllms-gemini = [\"llama-index-llms-gemini\"]\nembeddings-ollama = [\"llama-index-embeddings-ollama\"]\nembeddings-huggingface = [\"llama-index-embeddings-huggingface\", \"einops\"]\nembeddings-openai = [\"llama-index-embeddings-openai\"]\nembeddings-sagemaker = [\"boto3\"]\nembeddings-azopenai = [\"llama-index-embeddings-azure-openai\"]\nembeddings-gemini = [\"llama-index-embeddings-gemini\"]\nembeddings-mistral = [\"llama-index-embeddings-mistralai\"]\nvector-stores-qdrant = [\"llama-index-vector-stores-qdrant\"]\nvector-stores-clickhouse = [\"llama-index-vector-stores-clickhouse\", \"clickhouse_connect\"]\nvector-stores-chroma = [\"llama-index-vector-stores-chroma\"]\nvector-stores-postgres = [\"llama-index-vector-stores-postgres\"]\nvector-stores-milvus = [\"llama-index-vector-stores-milvus\"]\nstorage-nodestore-postgres = [\"llama-index-storage-docstore-postgres\",\"llama-index-storage-index-store-postgres\",\"psycopg2-binary\",\"asyncpg\"]\nrerank-sentence-transformers = [\"torch\", \"sentence-transformers\"]\n\n[tool.poetry.group.dev.dependencies]\nblack = \"^24\"\nmypy = \"^1.11\"\npre-commit = \"^3\"\npytest = \"^8\"\npytest-cov = \"^5\"\nruff = \"^0\"\npytest-asyncio = \"^0.24.0\"\ntypes-pyyaml = \"^6.0.12.20240917\"\n\n[build-system]\nrequires = [\"poetry-core>=1.0.0\"]\nbuild-backend = \"poetry.core.masonry.api\"\n\n# Packages configs\n\n## coverage\n\n[tool.coverage.run]\nbranch = true\n\n[tool.coverage.report]\nskip_empty = true\nprecision = 2\n\n## black\n\n[tool.black]\ntarget-version = ['py311']\n\n## ruff\n# Recommended ruff config for now, to be updated as we go along.\n[tool.ruff]\ntarget-version = 'py311'\n\n# See all rules at https://beta.ruff.rs/docs/rules/\nlint.select = [\n    \"E\", # pycodestyle\n    \"W\", # pycodestyle\n    \"F\", # Pyflakes\n    \"B\", # flake8-bugbear\n    \"C4\", # flake8-comprehensions\n    \"D\", # pydocstyle\n    \"I\", # isort\n    \"SIM\", # flake8-simplify\n    \"TCH\", # flake8-type-checking\n    \"TID\", # flake8-tidy-imports\n    \"Q\", # flake8-quotes\n    \"UP\", # pyupgrade\n    \"PT\", # flake8-pytest-style\n    \"RUF\", # Ruff-specific rules\n]\n\nlint.ignore = [\n    \"E501\", # \"Line too long\"\n    # -> line length already regulated by black\n    \"PT011\", # \"pytest.raises() should specify expected exception\"\n    # -> would imply to update tests every time you update exception message\n    \"SIM102\", # \"Use a single `if` statement instead of nested `if` statements\"\n    # -> too restrictive,\n    \"D100\",\n    \"D101\",\n    \"D102\",\n    \"D103\",\n    \"D104\",\n    \"D105\",\n    \"D106\",\n    \"D107\"\n    # -> \"Missing docstring in public function too restrictive\"\n]\n\n[tool.ruff.lint.pydocstyle]\n# Automatically disable rules that are incompatible with Google docstring convention\nconvention = \"google\"\n\n[tool.ruff.lint.pycodestyle]\nmax-doc-length = 88\n\n[tool.ruff.lint.flake8-tidy-imports]\nban-relative-imports = \"all\"\n\n[tool.ruff.lint.flake8-type-checking]\nstrict = true\nruntime-evaluated-base-classes = [\"pydantic.BaseModel\"]\n# Pydantic needs to be able to evaluate types at runtime\n# see https://pypi.org/project/flake8-type-checking/ for flake8-type-checking documentation\n# see https://beta.ruff.rs/docs/settings/#flake8-type-checking-runtime-evaluated-base-classes for ruff documentation\n\n[tool.ruff.lint.per-file-ignores]\n# Allow missing docstrings for tests\n\"tests/**/*.py\" = [\"D1\"]\n\n## mypy\n\n[tool.mypy]\npython_version = \"3.11\"\nstrict = true\ncheck_untyped_defs = false\nexplicit_package_bases = true\nwarn_unused_ignores = false\nexclude = [\"tests\"]\n\n[tool.mypy-llama-index]\nignore_missing_imports = true\n\n[tool.pytest.ini_options]\nasyncio_mode = \"auto\"\ntestpaths = [\"tests\"]\naddopts = [\n    \"--import-mode=importlib\",\n]\n"
  },
  {
    "path": "scripts/__init__.py",
    "content": "\"\"\"PrivateGPT scripts.\"\"\"\n"
  },
  {
    "path": "scripts/extract_openapi.py",
    "content": "import argparse\nimport json\nimport sys\n\nimport yaml\nfrom uvicorn.importer import import_from_string\n\nparser = argparse.ArgumentParser(prog=\"extract_openapi.py\")\nparser.add_argument(\"app\", help='App import string. Eg. \"main:app\"', default=\"main:app\")\nparser.add_argument(\"--app-dir\", help=\"Directory containing the app\", default=None)\nparser.add_argument(\n    \"--out\", help=\"Output file ending in .json or .yaml\", default=\"openapi.yaml\"\n)\n\nif __name__ == \"__main__\":\n    args = parser.parse_args()\n\n    if args.app_dir is not None:\n        print(f\"adding {args.app_dir} to sys.path\")\n        sys.path.insert(0, args.app_dir)\n\n    print(f\"importing app from {args.app}\")\n    app = import_from_string(args.app)\n    openapi = app.openapi()\n    version = openapi.get(\"openapi\", \"unknown version\")\n\n    print(f\"writing openapi spec v{version}\")\n    with open(args.out, \"w\") as f:\n        if args.out.endswith(\".json\"):\n            json.dump(openapi, f, indent=2)\n        else:\n            yaml.dump(openapi, f, sort_keys=False)\n\n    print(f\"spec written to {args.out}\")\n"
  },
  {
    "path": "scripts/ingest_folder.py",
    "content": "#!/usr/bin/env python3\n\nimport argparse\nimport logging\nfrom pathlib import Path\n\nfrom private_gpt.di import global_injector\nfrom private_gpt.server.ingest.ingest_service import IngestService\nfrom private_gpt.server.ingest.ingest_watcher import IngestWatcher\nfrom private_gpt.settings.settings import Settings\n\nlogger = logging.getLogger(__name__)\n\n\nclass LocalIngestWorker:\n    def __init__(self, ingest_service: IngestService, setting: Settings) -> None:\n        self.ingest_service = ingest_service\n\n        self.total_documents = 0\n        self.current_document_count = 0\n\n        self._files_under_root_folder: list[Path] = []\n\n        self.is_local_ingestion_enabled = setting.data.local_ingestion.enabled\n        self.allowed_local_folders = setting.data.local_ingestion.allow_ingest_from\n\n    def _validate_folder(self, folder_path: Path) -> None:\n        if not self.is_local_ingestion_enabled:\n            raise ValueError(\n                \"Local ingestion is disabled.\"\n                \"You can enable it in settings `ingestion.enabled`\"\n            )\n\n        # Allow all folders if wildcard is present\n        if \"*\" in self.allowed_local_folders:\n            return\n\n        for allowed_folder in self.allowed_local_folders:\n            if not folder_path.is_relative_to(allowed_folder):\n                raise ValueError(f\"Folder {folder_path} is not allowed for ingestion\")\n\n    def _find_all_files_in_folder(self, root_path: Path, ignored: list[str]) -> None:\n        \"\"\"Search all files under the root folder recursively.\n\n        Count them at the same time\n        \"\"\"\n        for file_path in root_path.iterdir():\n            if file_path.is_file() and file_path.name not in ignored:\n                self.total_documents += 1\n                self._validate_folder(file_path)\n                self._files_under_root_folder.append(file_path)\n            elif file_path.is_dir() and file_path.name not in ignored:\n                self._find_all_files_in_folder(file_path, ignored)\n\n    def ingest_folder(self, folder_path: Path, ignored: list[str]) -> None:\n        # Count total documents before ingestion\n        self._find_all_files_in_folder(folder_path, ignored)\n        self._ingest_all(self._files_under_root_folder)\n\n    def _ingest_all(self, files_to_ingest: list[Path]) -> None:\n        logger.info(\"Ingesting files=%s\", [f.name for f in files_to_ingest])\n        self.ingest_service.bulk_ingest([(str(p.name), p) for p in files_to_ingest])\n\n    def ingest_on_watch(self, changed_path: Path) -> None:\n        logger.info(\"Detected change in at path=%s, ingesting\", changed_path)\n        self._do_ingest_one(changed_path)\n\n    def _do_ingest_one(self, changed_path: Path) -> None:\n        try:\n            if changed_path.exists():\n                logger.info(f\"Started ingesting file={changed_path}\")\n                self.ingest_service.ingest_file(changed_path.name, changed_path)\n                logger.info(f\"Completed ingesting file={changed_path}\")\n        except Exception:\n            logger.exception(\n                f\"Failed to ingest document: {changed_path}, find the exception attached\"\n            )\n\n\nparser = argparse.ArgumentParser(prog=\"ingest_folder.py\")\nparser.add_argument(\"folder\", help=\"Folder to ingest\")\nparser.add_argument(\n    \"--watch\",\n    help=\"Watch for changes\",\n    action=argparse.BooleanOptionalAction,\n    default=False,\n)\nparser.add_argument(\n    \"--ignored\",\n    nargs=\"*\",\n    help=\"List of files/directories to ignore\",\n    default=[],\n)\nparser.add_argument(\n    \"--log-file\",\n    help=\"Optional path to a log file. If provided, logs will be written to this file.\",\n    type=str,\n    default=None,\n)\n\nargs = parser.parse_args()\n\n# Set up logging to a file if a path is provided\nif args.log_file:\n    file_handler = logging.FileHandler(args.log_file, mode=\"a\")\n    file_handler.setFormatter(\n        logging.Formatter(\n            \"[%(asctime)s.%(msecs)03d] [%(levelname)s] %(message)s\",\n            datefmt=\"%Y-%m-%d %H:%M:%S\",\n        )\n    )\n    logger.addHandler(file_handler)\n\nif __name__ == \"__main__\":\n    root_path = Path(args.folder)\n    if not root_path.exists():\n        raise ValueError(f\"Path {args.folder} does not exist\")\n\n    ingest_service = global_injector.get(IngestService)\n    settings = global_injector.get(Settings)\n    worker = LocalIngestWorker(ingest_service, settings)\n    worker.ingest_folder(root_path, args.ignored)\n\n    if args.ignored:\n        logger.info(f\"Skipping following files and directories: {args.ignored}\")\n\n    if args.watch:\n        logger.info(f\"Watching {args.folder} for changes, press Ctrl+C to stop...\")\n        directories_to_watch = [\n            dir\n            for dir in root_path.iterdir()\n            if dir.is_dir() and dir.name not in args.ignored\n        ]\n        watcher = IngestWatcher(args.folder, worker.ingest_on_watch)\n        watcher.start()\n"
  },
  {
    "path": "scripts/setup",
    "content": "#!/usr/bin/env python3\nimport os\nimport argparse\n\nfrom huggingface_hub import hf_hub_download, snapshot_download\nfrom transformers import AutoTokenizer\n\nfrom private_gpt.paths import models_path, models_cache_path\nfrom private_gpt.settings.settings import settings\n\nresume_download = True\nif __name__ == '__main__':\n    parser = argparse.ArgumentParser(prog='Setup: Download models from Hugging Face')\n    parser.add_argument('--resume', default=True, action=argparse.BooleanOptionalAction, help='Enable/Disable resume_download options to restart the download progress interrupted')\n    args = parser.parse_args()\n    resume_download = args.resume\n\nos.makedirs(models_path, exist_ok=True)\n\n# Download Embedding model\nembedding_path = models_path / \"embedding\"\nprint(f\"Downloading embedding {settings().huggingface.embedding_hf_model_name}\")\nsnapshot_download(\n    repo_id=settings().huggingface.embedding_hf_model_name,\n    cache_dir=models_cache_path,\n    local_dir=embedding_path,\n    token=settings().huggingface.access_token,\n)\nprint(\"Embedding model downloaded!\")\n\n# Download LLM and create a symlink to the model file\nprint(f\"Downloading LLM {settings().llamacpp.llm_hf_model_file}\")\nhf_hub_download(\n    repo_id=settings().llamacpp.llm_hf_repo_id,\n    filename=settings().llamacpp.llm_hf_model_file,\n    cache_dir=models_cache_path,\n    local_dir=models_path,\n    resume_download=resume_download,\n    token=settings().huggingface.access_token,\n)\nprint(\"LLM model downloaded!\")\n\n# Download Tokenizer\nif settings().llm.tokenizer:\n    print(f\"Downloading tokenizer {settings().llm.tokenizer}\")\n    AutoTokenizer.from_pretrained(\n        pretrained_model_name_or_path=settings().llm.tokenizer,\n        cache_dir=models_cache_path,\n        token=settings().huggingface.access_token,\n    )\n    print(\"Tokenizer downloaded!\")\n\nprint(\"Setup done\")\n"
  },
  {
    "path": "scripts/utils.py",
    "content": "import argparse\nimport os\nimport shutil\nfrom typing import Any, ClassVar\n\nfrom private_gpt.paths import local_data_path\nfrom private_gpt.settings.settings import settings\n\n\ndef wipe_file(file: str) -> None:\n    if os.path.isfile(file):\n        os.remove(file)\n        print(f\" - Deleted {file}\")\n\n\ndef wipe_tree(path: str) -> None:\n    if not os.path.exists(path):\n        print(f\"Warning: Path not found {path}\")\n        return\n    print(f\"Wiping {path}...\")\n    all_files = os.listdir(path)\n\n    files_to_remove = [file for file in all_files if file != \".gitignore\"]\n    for file_name in files_to_remove:\n        file_path = os.path.join(path, file_name)\n        try:\n            if os.path.isfile(file_path):\n                os.remove(file_path)\n            elif os.path.isdir(file_path):\n                shutil.rmtree(file_path)\n            print(f\" - Deleted {file_path}\")\n        except PermissionError:\n            print(\n                f\"PermissionError: Unable to remove {file_path}. It is in use by another process.\"\n            )\n            continue\n\n\nclass Postgres:\n    tables: ClassVar[dict[str, list[str]]] = {\n        \"nodestore\": [\"data_docstore\", \"data_indexstore\"],\n        \"vectorstore\": [\"data_embeddings\"],\n    }\n\n    def __init__(self) -> None:\n        try:\n            import psycopg2\n        except ModuleNotFoundError:\n            raise ModuleNotFoundError(\"Postgres dependencies not found\") from None\n\n        connection = settings().postgres.model_dump(exclude_none=True)\n        self.schema = connection.pop(\"schema_name\")\n        self.conn = psycopg2.connect(**connection)\n\n    def wipe(self, storetype: str) -> None:\n        cur = self.conn.cursor()\n        try:\n            for table in self.tables[storetype]:\n                sql = f\"DROP TABLE IF EXISTS {self.schema}.{table}\"\n                cur.execute(sql)\n                print(f\"Table {self.schema}.{table} dropped.\")\n            self.conn.commit()\n        finally:\n            cur.close()\n\n    def stats(self, store_type: str) -> None:\n        template = \"SELECT '{table}', COUNT(*), pg_size_pretty(pg_total_relation_size('{table}')) FROM {table}\"\n        sql = \" UNION ALL \".join(\n            template.format(table=tbl) for tbl in self.tables[store_type]\n        )\n\n        cur = self.conn.cursor()\n        try:\n            print(f\"Storage for Postgres {store_type}.\")\n            print(\"{:<15} | {:>15} | {:>9}\".format(\"Table\", \"Rows\", \"Size\"))\n            print(\"-\" * 45)  # Print a line separator\n\n            cur.execute(sql)\n            for row in cur.fetchall():\n                formatted_row_count = f\"{row[1]:,}\"\n                print(f\"{row[0]:<15} | {formatted_row_count:>15} | {row[2]:>9}\")\n\n            print()\n        finally:\n            cur.close()\n\n    def __del__(self):\n        if hasattr(self, \"conn\") and self.conn:\n            self.conn.close()\n\n\nclass Simple:\n    def wipe(self, store_type: str) -> None:\n        assert store_type == \"nodestore\"\n        from llama_index.core.storage.docstore.types import (\n            DEFAULT_PERSIST_FNAME as DOCSTORE,\n        )\n        from llama_index.core.storage.index_store.types import (\n            DEFAULT_PERSIST_FNAME as INDEXSTORE,\n        )\n\n        for store in (DOCSTORE, INDEXSTORE):\n            wipe_file(str((local_data_path / store).absolute()))\n\n\nclass Chroma:\n    def wipe(self, store_type: str) -> None:\n        assert store_type == \"vectorstore\"\n        wipe_tree(str((local_data_path / \"chroma_db\").absolute()))\n\n\nclass Qdrant:\n    COLLECTION = (\n        \"make_this_parameterizable_per_api_call\"  # ?! see vector_store_component.py\n    )\n\n    def __init__(self) -> None:\n        try:\n            from qdrant_client import QdrantClient  # type: ignore\n        except ImportError:\n            raise ImportError(\"Qdrant dependencies not found\") from None\n        self.client = QdrantClient(**settings().qdrant.model_dump(exclude_none=True))\n\n    def wipe(self, store_type: str) -> None:\n        assert store_type == \"vectorstore\"\n        try:\n            self.client.delete_collection(self.COLLECTION)\n            print(\"Collection dropped successfully.\")\n        except Exception as e:\n            print(\"Error dropping collection:\", e)\n\n    def stats(self, store_type: str) -> None:\n        print(f\"Storage for Qdrant {store_type}.\")\n        try:\n            collection_data = self.client.get_collection(self.COLLECTION)\n            if collection_data:\n                # Collection Info\n                # https://qdrant.tech/documentation/concepts/collections/\n                print(f\"\\tPoints:        {collection_data.points_count:,}\")\n                print(f\"\\tVectors:       {collection_data.vectors_count:,}\")\n                print(f\"\\tIndex Vectors: {collection_data.indexed_vectors_count:,}\")\n                return\n        except ValueError:\n            pass\n        print(\"\\t- Qdrant collection not found or empty\")\n\n\nclass Command:\n    DB_HANDLERS: ClassVar[dict[str, Any]] = {\n        \"simple\": Simple,  # node store\n        \"chroma\": Chroma,  # vector store\n        \"postgres\": Postgres,  # node, index and vector store\n        \"qdrant\": Qdrant,  # vector store\n    }\n\n    def for_each_store(self, cmd: str):\n        for store_type in (\"nodestore\", \"vectorstore\"):\n            database = getattr(settings(), store_type).database\n            handler_class = self.DB_HANDLERS.get(database)\n            if handler_class is None:\n                print(f\"No handler found for database '{database}'\")\n                continue\n            handler_instance = handler_class()  # Instantiate the class\n            # If the DB can handle this cmd dispatch it.\n            if hasattr(handler_instance, cmd) and callable(\n                func := getattr(handler_instance, cmd)\n            ):\n                func(store_type)\n            else:\n                print(\n                    f\"Unable to execute command '{cmd}' on '{store_type}' in database '{database}'\"\n                )\n\n    def execute(self, cmd: str) -> None:\n        if cmd in (\"wipe\", \"stats\"):\n            self.for_each_store(cmd)\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\"mode\", help=\"select a mode to run\", choices=[\"wipe\", \"stats\"])\n    args = parser.parse_args()\n\n    Command().execute(args.mode.lower())\n"
  },
  {
    "path": "settings-azopenai.yaml",
    "content": "server:\n  env_name: ${APP_ENV:azopenai}\n\nllm:\n  mode: azopenai\n\nembedding:\n  mode: azopenai\n\nazopenai:\n  api_key: ${AZ_OPENAI_API_KEY:}\n  azure_endpoint: ${AZ_OPENAI_ENDPOINT:}\n  embedding_deployment_name: ${AZ_OPENAI_EMBEDDING_DEPLOYMENT_NAME:}\n  llm_deployment_name: ${AZ_OPENAI_LLM_DEPLOYMENT_NAME:}\n  api_version: \"2023-05-15\"\n  embedding_model: text-embedding-ada-002\n  llm_model: gpt-35-turbo"
  },
  {
    "path": "settings-docker.yaml",
    "content": "server:\n  env_name: ${APP_ENV:prod}\n  port: ${PORT:8080}\n\nllm:\n  mode: ${PGPT_MODE:mock}\n\nembedding:\n  mode: ${PGPT_EMBED_MODE:mock}\n\nllamacpp:\n  llm_hf_repo_id: ${PGPT_HF_REPO_ID:lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF}\n  llm_hf_model_file: ${PGPT_HF_MODEL_FILE:Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf}\n\nhuggingface:\n  embedding_hf_model_name: ${PGPT_EMBEDDING_HF_MODEL_NAME:nomic-ai/nomic-embed-text-v1.5}\n\nsagemaker:\n  llm_endpoint_name: ${PGPT_SAGEMAKER_LLM_ENDPOINT_NAME:}\n  embedding_endpoint_name: ${PGPT_SAGEMAKER_EMBEDDING_ENDPOINT_NAME:}\n\nollama:\n  llm_model: ${PGPT_OLLAMA_LLM_MODEL:llama3.1}\n  embedding_model: ${PGPT_OLLAMA_EMBEDDING_MODEL:nomic-embed-text}\n  api_base: ${PGPT_OLLAMA_API_BASE:http://ollama:11434}\n  embedding_api_base: ${PGPT_OLLAMA_EMBEDDING_API_BASE:http://ollama:11434}\n  tfs_z: ${PGPT_OLLAMA_TFS_Z:1.0}\n  top_k: ${PGPT_OLLAMA_TOP_K:40}\n  top_p: ${PGPT_OLLAMA_TOP_P:0.9}\n  repeat_last_n: ${PGPT_OLLAMA_REPEAT_LAST_N:64}\n  repeat_penalty: ${PGPT_OLLAMA_REPEAT_PENALTY:1.2}\n  request_timeout: ${PGPT_OLLAMA_REQUEST_TIMEOUT:600.0}\n  autopull_models: ${PGPT_OLLAMA_AUTOPULL_MODELS:true}\n\nui:\n  enabled: true\n  path: /\n"
  },
  {
    "path": "settings-gemini.yaml",
    "content": "llm:\n  mode: gemini\n\nembedding:\n  mode: gemini\n\ngemini:\n  api_key: ${GOOGLE_API_KEY:}\n  model: models/gemini-pro\n  embedding_model: models/embedding-001\n"
  },
  {
    "path": "settings-local.yaml",
    "content": "# poetry install --extras \"ui llms-llama-cpp vector-stores-qdrant embeddings-huggingface\"\nserver:\n  env_name: ${APP_ENV:local}\n\nllm:\n  mode: llamacpp\n  # Should be matching the selected model\n  max_new_tokens: 512\n  context_window: 3900\n  tokenizer: meta-llama/Meta-Llama-3.1-8B-Instruct\n  prompt_style: \"llama3\"\n\nllamacpp:\n  llm_hf_repo_id: lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF\n  llm_hf_model_file: Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf\n\nembedding:\n  mode: huggingface\n\nhuggingface:\n  embedding_hf_model_name: nomic-ai/nomic-embed-text-v1.5\n\nvectorstore:\n  database: qdrant\n\nqdrant:\n  path: local_data/private_gpt/qdrant\n"
  },
  {
    "path": "settings-mock.yaml",
    "content": "server:\n  env_name: ${APP_ENV:mock}\n\n# This configuration allows you to use GPU for creating embeddings while avoiding loading LLM into vRAM\nllm:\n  mode: mock\n\nembedding:\n  mode: huggingface\n"
  },
  {
    "path": "settings-ollama-pg.yaml",
    "content": "# Using ollama and postgres for the vector, doc and index store. Ollama is also used for embeddings.\n# To use install these extras:\n# poetry install --extras \"llms-ollama ui vector-stores-postgres embeddings-ollama storage-nodestore-postgres\"\nserver:\n  env_name: ${APP_ENV:ollama}\n\nllm:\n  mode: ollama\n  max_new_tokens: 512\n  context_window: 3900\n\nembedding:\n  mode: ollama\n  embed_dim: 768\n\nollama:\n  llm_model: llama3.1\n  embedding_model: nomic-embed-text\n  api_base: http://localhost:11434\n\nnodestore:\n  database: postgres\n\nvectorstore:\n  database: postgres\n\npostgres:\n  host: localhost\n  port: 5432\n  database: postgres\n  user: postgres\n  password: admin\n  schema_name: private_gpt\n\n"
  },
  {
    "path": "settings-ollama.yaml",
    "content": "server:\n  env_name: ${APP_ENV:ollama}\n\nllm:\n  mode: ollama\n  max_new_tokens: 512\n  context_window: 3900\n  temperature: 0.1     #The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual. (Default: 0.1)\n\nembedding:\n  mode: ollama\n\nollama:\n  llm_model: llama3.1\n  embedding_model: nomic-embed-text\n  api_base: http://localhost:11434\n  embedding_api_base: http://localhost:11434  # change if your embedding model runs on another ollama\n  keep_alive: 5m\n  tfs_z: 1.0              # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting.\n  top_k: 40               # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)\n  top_p: 0.9              # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)\n  repeat_last_n: 64       # Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)\n  repeat_penalty: 1.2     # Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)\n  request_timeout: 120.0  # Time elapsed until ollama times out the request. Default is 120s. Format is float.\n\nvectorstore:\n  database: qdrant\n\nqdrant:\n  path: local_data/private_gpt/qdrant\n"
  },
  {
    "path": "settings-openai.yaml",
    "content": "server:\n  env_name: ${APP_ENV:openai}\n\nllm:\n  mode: openai\n\nembedding:\n  mode: openai\n\nopenai:\n  api_key: ${OPENAI_API_KEY:}\n  model: gpt-3.5-turbo\n"
  },
  {
    "path": "settings-sagemaker.yaml",
    "content": "server:\n  env_name: ${APP_ENV:sagemaker}\n  port: ${PORT:8001}\n\nui:\n  enabled: true\n  path: /\n\nllm:\n  mode: sagemaker\n\nembedding:\n  mode: sagemaker\n\nsagemaker:\n  llm_endpoint_name: llm\n  embedding_endpoint_name: embedding"
  },
  {
    "path": "settings-test.yaml",
    "content": "server:\n  env_name: test\n  auth:\n    enabled: false\n    # Dummy secrets used for tests\n    secret: \"foo bar; dummy secret\"\n\ndata:\n  local_data_folder: local_data/tests\n\nqdrant:\n  path: local_data/tests\n\nllm:\n  mode: mock\n\nembedding:\n  mode: mock\n\nui:\n  enabled: false"
  },
  {
    "path": "settings-vllm.yaml",
    "content": "server:\n  env_name: ${APP_ENV:vllm}\n\nllm:\n  mode: openailike\n  max_new_tokens: 512\n  tokenizer: meta-llama/Meta-Llama-3.1-8B-Instruct\n  temperature: 0.1\n\nembedding:\n  mode: huggingface\n  ingest_mode: simple\n\nhuggingface:\n  embedding_hf_model_name: nomic-ai/nomic-embed-text-v1.5\n\nopenai:\n  api_base: http://localhost:8000/v1\n  api_key: EMPTY\n  model: facebook/opt-125m\n  request_timeout: 600.0"
  },
  {
    "path": "settings.yaml",
    "content": "# The default configuration file.\n# More information about configuration can be found in the documentation: https://docs.privategpt.dev/\n# Syntax in `private_pgt/settings/settings.py`\nserver:\n  env_name: ${APP_ENV:prod}\n  port: ${PORT:8001}\n  cors:\n    enabled: true\n    allow_origins: [\"*\"]\n    allow_methods: [\"*\"]\n    allow_headers: [\"*\"]\n  auth:\n    enabled: false\n    # python -c 'import base64; print(\"Basic \" + base64.b64encode(\"secret:key\".encode()).decode())'\n    # 'secret' is the username and 'key' is the password for basic auth by default\n    # If the auth is enabled, this value must be set in the \"Authorization\" header of the request.\n    secret: \"Basic c2VjcmV0OmtleQ==\"\n\ndata:\n  local_ingestion:\n    enabled: ${LOCAL_INGESTION_ENABLED:false}\n    allow_ingest_from: [\"*\"]\n  local_data_folder: local_data/private_gpt\n\nui:\n  enabled: true\n  path: /\n  # \"RAG\", \"Search\", \"Basic\", or \"Summarize\"\n  default_mode: \"RAG\"\n  default_chat_system_prompt: >\n    You are a helpful, respectful and honest assistant.\n    Always answer as helpfully as possible and follow ALL given instructions.\n    Do not speculate or make up information.\n    Do not reference any given instructions or context.\n  default_query_system_prompt: >\n    You can only answer questions about the provided context.\n    If you know the answer but it is not based in the provided context, don't provide\n    the answer, just state the answer is not in the context provided.\n  default_summarization_system_prompt: >\n    Provide a comprehensive summary of the provided context information.\n    The summary should cover all the key points and main ideas presented in\n    the original text, while also condensing the information into a concise\n    and easy-to-understand format. Please ensure that the summary includes\n    relevant details and examples that support the main ideas, while avoiding\n    any unnecessary information or repetition.\n  delete_file_button_enabled: true\n  delete_all_files_button_enabled: true\n\nllm:\n  mode: llamacpp\n  prompt_style: \"llama3\"\n  # Should be matching the selected model\n  max_new_tokens: 512\n  context_window: 3900\n  # Select your tokenizer. Llama-index tokenizer is the default.\n  # tokenizer: meta-llama/Meta-Llama-3.1-8B-Instruct\n  temperature: 0.1      # The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual. (Default: 0.1)\n\nrag:\n  similarity_top_k: 2\n  #This value controls how many \"top\" documents the RAG returns to use in the context.\n  #similarity_value: 0.45\n  #This value is disabled by default.  If you enable this settings, the RAG will only use articles that meet a certain percentage score.\n  rerank:\n    enabled: false\n    model: cross-encoder/ms-marco-MiniLM-L-2-v2\n    top_n: 1\n\nsummarize:\n  use_async: true\n\nclickhouse:\n    host: localhost\n    port: 8443\n    username: admin\n    password: clickhouse\n    database: embeddings\n\nllamacpp:\n  llm_hf_repo_id: lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF\n  llm_hf_model_file: Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf\n  tfs_z: 1.0            # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting\n  top_k: 40             # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)\n  top_p: 1.0            # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)\n  repeat_penalty: 1.1   # Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)\n\nembedding:\n  # Should be matching the value above in most cases\n  mode: huggingface\n  ingest_mode: simple\n  embed_dim: 768 # 768 is for nomic-ai/nomic-embed-text-v1.5\n\nhuggingface:\n  embedding_hf_model_name: nomic-ai/nomic-embed-text-v1.5\n  access_token: ${HF_TOKEN:}\n  # Warning: Enabling this option will allow the model to download and execute code from the internet.\n  # Nomic AI requires this option to be enabled to use the model, be aware if you are using a different model.\n  trust_remote_code: true\n\nvectorstore:\n  database: qdrant\n\nnodestore:\n  database: simple\n\nmilvus:\n  uri: local_data/private_gpt/milvus/milvus_local.db\n  collection_name: milvus_db\n  overwrite: false\n\nqdrant:\n  path: local_data/private_gpt/qdrant\n\npostgres:\n  host: localhost\n  port: 5432\n  database: postgres\n  user: postgres\n  password: postgres\n  schema_name: private_gpt\n\nsagemaker:\n  llm_endpoint_name: huggingface-pytorch-tgi-inference-2023-09-25-19-53-32-140\n  embedding_endpoint_name: huggingface-pytorch-inference-2023-11-03-07-41-36-479\n\nopenai:\n  api_key: ${OPENAI_API_KEY:}\n  model: gpt-3.5-turbo\n  embedding_api_key: ${OPENAI_API_KEY:}\n\nollama:\n  llm_model: llama3.1\n  embedding_model: nomic-embed-text\n  api_base: http://localhost:11434\n  embedding_api_base: http://localhost:11434  # change if your embedding model runs on another ollama\n  keep_alive: 5m\n  request_timeout: 120.0\n  autopull_models: true\n\nazopenai:\n  api_key: ${AZ_OPENAI_API_KEY:}\n  azure_endpoint: ${AZ_OPENAI_ENDPOINT:}\n  embedding_deployment_name: ${AZ_OPENAI_EMBEDDING_DEPLOYMENT_NAME:}\n  llm_deployment_name: ${AZ_OPENAI_LLM_DEPLOYMENT_NAME:}\n  api_version: \"2023-05-15\"\n  embedding_model: text-embedding-ada-002\n  llm_model: gpt-35-turbo\n\ngemini:\n  api_key: ${GOOGLE_API_KEY:}\n  model: models/gemini-pro\n  embedding_model: models/embedding-001\n"
  },
  {
    "path": "tests/__init__.py",
    "content": "\"\"\"Tests.\"\"\"\n"
  },
  {
    "path": "tests/conftest.py",
    "content": "import os\nimport pathlib\nfrom glob import glob\n\nroot_path = pathlib.Path(__file__).parents[1]\n# This is to prevent a bug in intellij that uses the wrong working directory\nos.chdir(root_path)\n\n\ndef _as_module(fixture_path: str) -> str:\n    return fixture_path.replace(\"/\", \".\").replace(\"\\\\\", \".\").replace(\".py\", \"\")\n\n\npytest_plugins = [_as_module(fixture) for fixture in glob(\"tests/fixtures/[!_]*.py\")]\n"
  },
  {
    "path": "tests/fixtures/__init__.py",
    "content": "\"\"\"Global fixtures.\"\"\"\n"
  },
  {
    "path": "tests/fixtures/auto_close_qdrant.py",
    "content": "import pytest\n\nfrom private_gpt.components.vector_store.vector_store_component import (\n    VectorStoreComponent,\n)\nfrom tests.fixtures.mock_injector import MockInjector\n\n\n@pytest.fixture(autouse=True)\ndef _auto_close_vector_store_client(injector: MockInjector) -> None:\n    \"\"\"Auto close VectorStore client after each test.\n\n    VectorStore client (qdrant/chromadb) opens a connection the\n    Database that causes issues when running tests too fast,\n    so close explicitly after each test.\n    \"\"\"\n    yield\n    injector.get(VectorStoreComponent).close()\n"
  },
  {
    "path": "tests/fixtures/fast_api_test_client.py",
    "content": "import pytest\nfrom fastapi.testclient import TestClient\n\nfrom private_gpt.launcher import create_app\nfrom tests.fixtures.mock_injector import MockInjector\n\n\n@pytest.fixture\ndef test_client(request: pytest.FixtureRequest, injector: MockInjector) -> TestClient:\n    if request is not None and hasattr(request, \"param\"):\n        injector.bind_settings(request.param or {})\n\n    app_under_test = create_app(injector.test_injector)\n    return TestClient(app_under_test)\n"
  },
  {
    "path": "tests/fixtures/ingest_helper.py",
    "content": "from pathlib import Path\n\nimport pytest\nfrom fastapi.testclient import TestClient\n\nfrom private_gpt.server.ingest.ingest_router import IngestResponse\n\n\nclass IngestHelper:\n    def __init__(self, test_client: TestClient):\n        self.test_client = test_client\n\n    def ingest_file(self, path: Path) -> IngestResponse:\n        files = {\"file\": (path.name, path.open(\"rb\"))}\n\n        response = self.test_client.post(\"/v1/ingest/file\", files=files)\n        assert response.status_code == 200\n        ingest_result = IngestResponse.model_validate(response.json())\n        return ingest_result\n\n\n@pytest.fixture\ndef ingest_helper(test_client: TestClient) -> IngestHelper:\n    return IngestHelper(test_client)\n"
  },
  {
    "path": "tests/fixtures/mock_injector.py",
    "content": "from collections.abc import Callable\nfrom typing import Any\nfrom unittest.mock import MagicMock\n\nimport pytest\nfrom injector import Provider, ScopeDecorator, singleton\n\nfrom private_gpt.di import create_application_injector\nfrom private_gpt.settings.settings import Settings, unsafe_settings\nfrom private_gpt.settings.settings_loader import merge_settings\nfrom private_gpt.utils.typing import T\n\n\nclass MockInjector:\n    def __init__(self) -> None:\n        self.test_injector = create_application_injector()\n\n    def bind_mock(\n        self,\n        interface: type[T],\n        mock: (T | (Callable[..., T] | Provider[T])) | None = None,\n        *,\n        scope: ScopeDecorator = singleton,\n    ) -> T:\n        if mock is None:\n            mock = MagicMock()\n        self.test_injector.binder.bind(interface, to=mock, scope=scope)\n        return mock  # type: ignore\n\n    def bind_settings(self, settings: dict[str, Any]) -> Settings:\n        merged = merge_settings([unsafe_settings, settings])\n        new_settings = Settings(**merged)\n        self.test_injector.binder.bind(Settings, new_settings)\n        return new_settings\n\n    def get(self, interface: type[T]) -> T:\n        return self.test_injector.get(interface)\n\n\n@pytest.fixture\ndef injector() -> MockInjector:\n    return MockInjector()\n"
  },
  {
    "path": "tests/server/chat/test_chat_routes.py",
    "content": "from fastapi.testclient import TestClient\n\nfrom private_gpt.open_ai.openai_models import OpenAICompletion, OpenAIMessage\nfrom private_gpt.server.chat.chat_router import ChatBody\n\n\ndef test_chat_route_produces_a_stream(test_client: TestClient) -> None:\n    body = ChatBody(\n        messages=[OpenAIMessage(content=\"test\", role=\"user\")],\n        use_context=False,\n        stream=True,\n    )\n    response = test_client.post(\"/v1/chat/completions\", json=body.model_dump())\n\n    raw_events = response.text.split(\"\\n\\n\")\n    events = [\n        item.removeprefix(\"data: \") for item in raw_events if item.startswith(\"data: \")\n    ]\n    assert response.status_code == 200\n    assert \"text/event-stream\" in response.headers[\"content-type\"]\n    assert len(events) > 0\n    assert events[-1] == \"[DONE]\"\n\n\ndef test_chat_route_produces_a_single_value(test_client: TestClient) -> None:\n    body = ChatBody(\n        messages=[OpenAIMessage(content=\"test\", role=\"user\")],\n        use_context=False,\n        stream=False,\n    )\n    response = test_client.post(\"/v1/chat/completions\", json=body.model_dump())\n\n    # No asserts, if it validates it's good\n    OpenAICompletion.model_validate(response.json())\n    assert response.status_code == 200\n"
  },
  {
    "path": "tests/server/chunks/chunk_test.txt",
    "content": "e88c1005-637d-4cb4-ae79-9b8eb58cab97\n\nb483dd15-78c4-4d67-b546-21a0d690bf43\n\na8080238-b294-4598-ac9c-7abf4c8e0552\n\n14208dac-c600-4a18-872b-5e45354cfff2"
  },
  {
    "path": "tests/server/chunks/test_chunk_routes.py",
    "content": "from pathlib import Path\n\nfrom fastapi.testclient import TestClient\n\nfrom private_gpt.server.chunks.chunks_router import ChunksBody, ChunksResponse\nfrom tests.fixtures.ingest_helper import IngestHelper\n\n\ndef test_chunks_retrieval(test_client: TestClient, ingest_helper: IngestHelper) -> None:\n    # Make sure there is at least some chunk to query in the database\n    path = Path(__file__).parents[0] / \"chunk_test.txt\"\n    ingest_helper.ingest_file(path)\n\n    body = ChunksBody(text=\"b483dd15-78c4-4d67-b546-21a0d690bf43\")\n    response = test_client.post(\"/v1/chunks\", json=body.model_dump())\n    assert response.status_code == 200\n    chunk_response = ChunksResponse.model_validate(response.json())\n    assert len(chunk_response.data) > 0\n"
  },
  {
    "path": "tests/server/embeddings/test_embedding_routes.py",
    "content": "from fastapi.testclient import TestClient\n\nfrom private_gpt.server.embeddings.embeddings_router import (\n    EmbeddingsBody,\n    EmbeddingsResponse,\n)\n\n\ndef test_embeddings_generation(test_client: TestClient) -> None:\n    body = EmbeddingsBody(input=\"Embed me\")\n    response = test_client.post(\"/v1/embeddings\", json=body.model_dump())\n\n    assert response.status_code == 200\n    embedding_response = EmbeddingsResponse.model_validate(response.json())\n    assert len(embedding_response.data) > 0\n    assert len(embedding_response.data[0].embedding) > 0\n"
  },
  {
    "path": "tests/server/ingest/test.txt",
    "content": "Once upon a time, in a magical forest called Enchantia, lived a young and cheerful deer named Zumi. Zumi was no ordinary deer; she was bright-eyed, intelligent, and had a heart full of curiosity. One sunny morning, as the forest came alive with the sweet melodies of chirping birds and rustling leaves, Zumi eagerly pranced through the woods on her way to school.\n\nEnchantia Forest School was a unique place, where all the woodland creatures gathered to learn and grow together. The school was nestled in a clearing surrounded by tall, ancient trees. Zumi loved the feeling of anticipation as she approached the school, her hooves barely touching the ground in excitement.\n\nAs she arrived at the school, her dear friend and classmate, Oliver the wise old owl, greeted her with a friendly hoot. \"Good morning, Zumi! Are you ready for another day of adventure and learning?\"\n\nZumi's eyes sparkled with enthusiasm as she nodded, \"Absolutely, Oliver! I can't wait to see what we'll discover today.\"\n\nIn their classroom, Teacher Willow, a gentle and nurturing willow tree, welcomed the students. The classroom was adorned with vibrant leaves and twinkling fireflies, creating a magical and cozy atmosphere. Today's lesson was about the history of the forest and the importance of living harmoniously with nature.\n\nThe students listened attentively as Teacher Willow recounted stories of ancient times when the forest thrived in unity and peace. Zumi was particularly enthralled by the tales of forest guardians and how they protected the magical balance of Enchantia.\n\nAfter the lesson, it was time for recess. Zumi joined her friends in a lively game of tag, where they darted and danced playfully among the trees. Zumi's speed and agility made her an excellent tagger, and laughter filled the air as they played.\n\nLater, they gathered for an art class, where they expressed themselves through painting and sculpting with clay. Zumi chose to paint a mural of the forest, portraying the beauty and magic they were surrounded by every day.\n\nAs the day came to an end, the students sat in a circle to share stories and reflections. Zumi shared her excitement for the day and how she learned to appreciate the interconnectedness of all creatures in the forest.\n\nAs the sun set, casting a golden glow across the forest, Zumi made her way back home, her heart brimming with happiness and newfound knowledge. Each day at Enchantia Forest School was an adventure, and Zumi couldn't wait to learn more and grow with her friends, for the magic of learning was as boundless as the forest itself. And so, under the canopy of stars and the watchful eyes of the forest, Zumi drifted into dreams filled with wonder and anticipation for the adventures that awaited her on the morrow."
  },
  {
    "path": "tests/server/ingest/test_ingest_routes.py",
    "content": "import tempfile\nfrom pathlib import Path\n\nfrom fastapi.testclient import TestClient\n\nfrom private_gpt.server.ingest.ingest_router import IngestResponse\nfrom tests.fixtures.ingest_helper import IngestHelper\n\n\ndef test_ingest_accepts_txt_files(ingest_helper: IngestHelper) -> None:\n    path = Path(__file__).parents[0] / \"test.txt\"\n    ingest_result = ingest_helper.ingest_file(path)\n    assert len(ingest_result.data) == 1\n\n\ndef test_ingest_accepts_pdf_files(ingest_helper: IngestHelper) -> None:\n    path = Path(__file__).parents[0] / \"test.pdf\"\n    ingest_result = ingest_helper.ingest_file(path)\n    assert len(ingest_result.data) == 1\n\n\ndef test_ingest_list_returns_something_after_ingestion(\n    test_client: TestClient, ingest_helper: IngestHelper\n) -> None:\n    response_before = test_client.get(\"/v1/ingest/list\")\n    count_ingest_before = len(response_before.json()[\"data\"])\n    with tempfile.NamedTemporaryFile(\"w\", suffix=\".txt\") as test_file:\n        test_file.write(\"Foo bar; hello there!\")\n        test_file.flush()\n        test_file.seek(0)\n        ingest_result = ingest_helper.ingest_file(Path(test_file.name))\n    assert len(ingest_result.data) == 1, \"The temp doc should have been ingested\"\n    response_after = test_client.get(\"/v1/ingest/list\")\n    count_ingest_after = len(response_after.json()[\"data\"])\n    assert (\n        count_ingest_after == count_ingest_before + 1\n    ), \"The temp doc should be returned\"\n\n\ndef test_ingest_plain_text(test_client: TestClient) -> None:\n    response = test_client.post(\n        \"/v1/ingest/text\", json={\"file_name\": \"file_name\", \"text\": \"text\"}\n    )\n    assert response.status_code == 200\n    ingest_result = IngestResponse.model_validate(response.json())\n    assert len(ingest_result.data) == 1\n"
  },
  {
    "path": "tests/server/ingest/test_local_ingest.py",
    "content": "import os\nimport subprocess\nfrom pathlib import Path\n\nimport pytest\nfrom fastapi.testclient import TestClient\n\n\n@pytest.fixture\ndef file_path() -> str:\n    return \"test.txt\"\n\n\ndef create_test_file(file_path: str) -> None:\n    with open(file_path, \"w\") as f:\n        f.write(\"test\")\n\n\ndef clear_log_file(log_file_path: str) -> None:\n    if Path(log_file_path).exists():\n        os.remove(log_file_path)\n\n\ndef read_log_file(log_file_path: str) -> str:\n    with open(log_file_path) as f:\n        return f.read()\n\n\ndef init_structure(folder: str, file_path: str) -> None:\n    clear_log_file(file_path)\n    os.makedirs(folder, exist_ok=True)\n    create_test_file(f\"{folder}/${file_path}\")\n\n\ndef test_ingest_one_file_in_allowed_folder(\n    file_path: str, test_client: TestClient\n) -> None:\n    allowed_folder = \"local_data/tests/allowed_folder\"\n    init_structure(allowed_folder, file_path)\n\n    test_env = os.environ.copy()\n    test_env[\"PGPT_PROFILES\"] = \"test\"\n    test_env[\"LOCAL_INGESTION_ENABLED\"] = \"True\"\n\n    result = subprocess.run(\n        [\"python\", \"scripts/ingest_folder.py\", allowed_folder],\n        capture_output=True,\n        text=True,\n        env=test_env,\n    )\n\n    assert result.returncode == 0, f\"Script failed with error: {result.stderr}\"\n    response_after = test_client.get(\"/v1/ingest/list\")\n\n    count_ingest_after = len(response_after.json()[\"data\"])\n    assert count_ingest_after > 0, \"No documents were ingested\"\n\n\ndef test_ingest_disabled(file_path: str) -> None:\n    allowed_folder = \"local_data/tests/allowed_folder\"\n    init_structure(allowed_folder, file_path)\n\n    test_env = os.environ.copy()\n    test_env[\"PGPT_PROFILES\"] = \"test\"\n    test_env[\"LOCAL_INGESTION_ENABLED\"] = \"False\"\n\n    result = subprocess.run(\n        [\"python\", \"scripts/ingest_folder.py\", allowed_folder],\n        capture_output=True,\n        text=True,\n        env=test_env,\n    )\n\n    assert result.returncode != 0, f\"Script failed with error: {result.stderr}\"\n"
  },
  {
    "path": "tests/server/recipes/test_summarize_router.py",
    "content": "from fastapi.testclient import TestClient\n\nfrom private_gpt.server.recipes.summarize.summarize_router import (\n    SummarizeBody,\n    SummarizeResponse,\n)\n\n\ndef test_summarize_route_produces_a_stream(test_client: TestClient) -> None:\n    body = SummarizeBody(\n        text=\"Test\",\n        stream=True,\n    )\n    response = test_client.post(\"/v1/summarize\", json=body.model_dump())\n\n    raw_events = response.text.split(\"\\n\\n\")\n    events = [\n        item.removeprefix(\"data: \") for item in raw_events if item.startswith(\"data: \")\n    ]\n    assert response.status_code == 200\n    assert \"text/event-stream\" in response.headers[\"content-type\"]\n    assert len(events) > 0\n    assert events[-1] == \"[DONE]\"\n\n\ndef test_summarize_route_produces_a_single_value(test_client: TestClient) -> None:\n    body = SummarizeBody(\n        text=\"test\",\n        stream=False,\n    )\n    response = test_client.post(\"/v1/summarize\", json=body.model_dump())\n\n    # No asserts, if it validates it's good\n    SummarizeResponse.model_validate(response.json())\n    assert response.status_code == 200\n\n\ndef test_summarize_with_document_context(test_client: TestClient) -> None:\n    # Ingest an document\n    ingest_response = test_client.post(\n        \"/v1/ingest/text\",\n        json={\n            \"file_name\": \"file_name\",\n            \"text\": \"Lorem ipsum dolor sit amet\",\n        },\n    )\n    assert ingest_response.status_code == 200\n    ingested_docs = ingest_response.json()[\"data\"]\n    assert len(ingested_docs) == 1\n\n    body = SummarizeBody(\n        use_context=True,\n        context_filter={\"docs_ids\": [doc[\"doc_id\"] for doc in ingested_docs]},\n        stream=False,\n    )\n    response = test_client.post(\"/v1/summarize\", json=body.model_dump())\n\n    completion: SummarizeResponse = SummarizeResponse.model_validate(response.json())\n    assert response.status_code == 200\n    # We can check the content of the completion, because mock LLM used in tests\n    # always echoes the prompt. In the case of summary, the input context is passed.\n    assert completion.summary.find(\"Lorem ipsum dolor sit amet\") != -1\n\n\ndef test_summarize_with_non_existent_document_context_not_fails(\n    test_client: TestClient,\n) -> None:\n    body = SummarizeBody(\n        use_context=True,\n        context_filter={\n            \"docs_ids\": [\"non-existent-doc-id\"],\n        },\n        stream=False,\n    )\n\n    response = test_client.post(\"/v1/summarize\", json=body.model_dump())\n\n    completion: SummarizeResponse = SummarizeResponse.model_validate(response.json())\n    assert response.status_code == 200\n    # We can check the content of the completion, because mock LLM used in tests\n    # always echoes the prompt. In the case of summary, the input context is passed.\n    assert completion.summary.find(\"Empty Response\") != -1\n\n\ndef test_summarize_with_metadata_and_document_context(test_client: TestClient) -> None:\n    docs = []\n\n    # Ingest a first document\n    document_1_content = \"Content of document 1\"\n    ingest_response = test_client.post(\n        \"/v1/ingest/text\",\n        json={\n            \"file_name\": \"file_name_1\",\n            \"text\": document_1_content,\n        },\n    )\n    assert ingest_response.status_code == 200\n    ingested_docs = ingest_response.json()[\"data\"]\n    assert len(ingested_docs) == 1\n    docs += ingested_docs\n\n    # Ingest a second document\n    document_2_content = \"Text of document 2\"\n    ingest_response = test_client.post(\n        \"/v1/ingest/text\",\n        json={\n            \"file_name\": \"file_name_2\",\n            \"text\": document_2_content,\n        },\n    )\n    assert ingest_response.status_code == 200\n    ingested_docs = ingest_response.json()[\"data\"]\n    assert len(ingested_docs) == 1\n    docs += ingested_docs\n\n    # Completions with the first document's id and the second document's metadata\n    body = SummarizeBody(\n        use_context=True,\n        context_filter={\"docs_ids\": [doc[\"doc_id\"] for doc in docs]},\n        stream=False,\n    )\n    response = test_client.post(\"/v1/summarize\", json=body.model_dump())\n\n    completion: SummarizeResponse = SummarizeResponse.model_validate(response.json())\n    assert response.status_code == 200\n    # Assert both documents are part of the used sources\n    # We can check the content of the completion, because mock LLM used in tests\n    # always echoes the prompt. In the case of summary, the input context is passed.\n    assert completion.summary.find(document_1_content) != -1\n    assert completion.summary.find(document_2_content) != -1\n\n\ndef test_summarize_with_prompt(test_client: TestClient) -> None:\n    ingest_response = test_client.post(\n        \"/v1/ingest/text\",\n        json={\n            \"file_name\": \"file_name\",\n            \"text\": \"Lorem ipsum dolor sit amet\",\n        },\n    )\n    assert ingest_response.status_code == 200\n    ingested_docs = ingest_response.json()[\"data\"]\n    assert len(ingested_docs) == 1\n\n    body = SummarizeBody(\n        use_context=True,\n        context_filter={\n            \"docs_ids\": [doc[\"doc_id\"] for doc in ingested_docs],\n        },\n        prompt=\"This is a custom summary prompt, 54321\",\n        stream=False,\n    )\n    response = test_client.post(\"/v1/summarize\", json=body.model_dump())\n\n    completion: SummarizeResponse = SummarizeResponse.model_validate(response.json())\n    assert response.status_code == 200\n    # We can check the content of the completion, because mock LLM used in tests\n    # always echoes the prompt. In the case of summary, the input context is passed.\n    assert completion.summary.find(\"This is a custom summary prompt, 54321\") != -1\n"
  },
  {
    "path": "tests/server/utils/test_auth.py",
    "content": "from fastapi.testclient import TestClient\n\n\ndef test_default_does_not_require_auth(test_client: TestClient) -> None:\n    response_before = test_client.get(\"/v1/ingest/list\")\n    assert response_before.status_code == 200\n"
  },
  {
    "path": "tests/server/utils/test_simple_auth.py",
    "content": "\"\"\"Tests to validate that the simple authentication mechanism is working.\n\nNOTE: We are not testing the switch based on the config in\n      `private_gpt.server.utils.auth`. This is not done because of the way the code\n      is currently architecture (it is hard to patch the `settings` and the app while\n      the tests are directly importing them).\n\"\"\"\n\nfrom typing import Annotated\n\nimport pytest\nfrom fastapi import Depends\nfrom fastapi.testclient import TestClient\n\nfrom private_gpt.server.utils.auth import (\n    NOT_AUTHENTICATED,\n    _simple_authentication,\n    authenticated,\n)\nfrom private_gpt.settings.settings import settings\n\n\ndef _copy_simple_authenticated(\n    _simple_authentication: Annotated[bool, Depends(_simple_authentication)]\n) -> bool:\n    \"\"\"Check if the request is authenticated.\"\"\"\n    if not _simple_authentication:\n        raise NOT_AUTHENTICATED\n    return True\n\n\n@pytest.fixture(autouse=True)\ndef _patch_authenticated_dependency(test_client: TestClient):\n    # Patch the server to use simple authentication\n\n    test_client.app.dependency_overrides[authenticated] = _copy_simple_authenticated\n\n    # Call the actual test\n    yield\n\n    # Remove the patch for other tests\n    test_client.app.dependency_overrides = {}\n\n\ndef test_default_auth_working_when_enabled_401(test_client: TestClient) -> None:\n    response = test_client.get(\"/v1/ingest/list\")\n    assert response.status_code == 401\n\n\ndef test_default_auth_working_when_enabled_200(test_client: TestClient) -> None:\n    response_fail = test_client.get(\"/v1/ingest/list\")\n    assert response_fail.status_code == 401\n\n    response_success = test_client.get(\n        \"/v1/ingest/list\", headers={\"Authorization\": settings().server.auth.secret}\n    )\n    assert response_success.status_code == 200\n"
  },
  {
    "path": "tests/settings/test_settings.py",
    "content": "from private_gpt.settings.settings import Settings, settings\nfrom tests.fixtures.mock_injector import MockInjector\n\n\ndef test_settings_are_loaded_and_merged() -> None:\n    assert settings().server.env_name == \"test\"\n\n\ndef test_settings_can_be_overriden(injector: MockInjector) -> None:\n    injector.bind_settings({\"server\": {\"env_name\": \"overriden\"}})\n    mocked_settings = injector.get(Settings)\n    assert mocked_settings.server.env_name == \"overriden\"\n"
  },
  {
    "path": "tests/settings/test_settings_loader.py",
    "content": "import io\nimport os\n\nimport pytest\n\nfrom private_gpt.settings.yaml import load_yaml_with_envvars\n\n\ndef test_environment_variables_are_loaded() -> None:\n    sample_yaml = \"\"\"\n    replaced: ${TEST_REPLACE_ME}\n    \"\"\"\n    env = {\"TEST_REPLACE_ME\": \"replaced\"}\n    loaded = load_yaml_with_envvars(io.StringIO(sample_yaml), env)\n    os.environ.copy()\n    assert loaded[\"replaced\"] == \"replaced\"\n\n\ndef test_environment_defaults_variables_are_loaded() -> None:\n    sample_yaml = \"\"\"\n    replaced: ${PGPT_EMBEDDING_HF_MODEL_NAME:BAAI/bge-small-en-v1.5}\n    \"\"\"\n    loaded = load_yaml_with_envvars(io.StringIO(sample_yaml), {})\n    assert loaded[\"replaced\"] == \"BAAI/bge-small-en-v1.5\"\n\n\ndef test_environment_defaults_variables_are_loaded_with_duplicated_delimiters() -> None:\n    sample_yaml = \"\"\"\n    replaced: ${PGPT_EMBEDDING_HF_MODEL_NAME::duped::}\n    \"\"\"\n    loaded = load_yaml_with_envvars(io.StringIO(sample_yaml), {})\n    assert loaded[\"replaced\"] == \":duped::\"\n\n\ndef test_environment_without_defaults_fails() -> None:\n    sample_yaml = \"\"\"\n    replaced: ${TEST_REPLACE_ME}\n    \"\"\"\n    with pytest.raises(ValueError) as error:\n        load_yaml_with_envvars(io.StringIO(sample_yaml), {})\n    assert error is not None\n"
  },
  {
    "path": "tests/test_prompt_helper.py",
    "content": "import pytest\nfrom llama_index.core.llms import ChatMessage, MessageRole\n\nfrom private_gpt.components.llm.prompt_helper import (\n    ChatMLPromptStyle,\n    DefaultPromptStyle,\n    Llama2PromptStyle,\n    Llama3PromptStyle,\n    MistralPromptStyle,\n    TagPromptStyle,\n    get_prompt_style,\n)\n\n\n@pytest.mark.parametrize(\n    (\"prompt_style\", \"expected_prompt_style\"),\n    [\n        (\"default\", DefaultPromptStyle),\n        (\"llama2\", Llama2PromptStyle),\n        (\"tag\", TagPromptStyle),\n        (\"mistral\", MistralPromptStyle),\n        (\"chatml\", ChatMLPromptStyle),\n    ],\n)\ndef test_get_prompt_style_success(prompt_style, expected_prompt_style):\n    assert isinstance(get_prompt_style(prompt_style), expected_prompt_style)\n\n\ndef test_get_prompt_style_failure():\n    prompt_style = \"unknown\"\n    with pytest.raises(ValueError) as exc_info:\n        get_prompt_style(prompt_style)\n    assert str(exc_info.value) == f\"Unknown prompt_style='{prompt_style}'\"\n\n\ndef test_tag_prompt_style_format():\n    prompt_style = TagPromptStyle()\n    messages = [\n        ChatMessage(content=\"You are an AI assistant.\", role=MessageRole.SYSTEM),\n        ChatMessage(content=\"Hello, how are you doing?\", role=MessageRole.USER),\n    ]\n\n    expected_prompt = (\n        \"<|system|>: You are an AI assistant.\\n\"\n        \"<|user|>: Hello, how are you doing?\\n\"\n        \"<|assistant|>: \"\n    )\n\n    assert prompt_style.messages_to_prompt(messages) == expected_prompt\n\n\ndef test_tag_prompt_style_format_with_system_prompt():\n    prompt_style = TagPromptStyle()\n    messages = [\n        ChatMessage(\n            content=\"FOO BAR Custom sys prompt from messages.\", role=MessageRole.SYSTEM\n        ),\n        ChatMessage(content=\"Hello, how are you doing?\", role=MessageRole.USER),\n    ]\n\n    expected_prompt = (\n        \"<|system|>: FOO BAR Custom sys prompt from messages.\\n\"\n        \"<|user|>: Hello, how are you doing?\\n\"\n        \"<|assistant|>: \"\n    )\n\n    assert prompt_style.messages_to_prompt(messages) == expected_prompt\n\n\ndef test_mistral_prompt_style_format():\n    prompt_style = MistralPromptStyle()\n    messages = [\n        ChatMessage(content=\"A\", role=MessageRole.SYSTEM),\n        ChatMessage(content=\"B\", role=MessageRole.USER),\n    ]\n    expected_prompt = \"<s>[INST] A\\nB [/INST]\"\n    assert prompt_style.messages_to_prompt(messages) == expected_prompt\n\n    messages2 = [\n        ChatMessage(content=\"A\", role=MessageRole.SYSTEM),\n        ChatMessage(content=\"B\", role=MessageRole.USER),\n        ChatMessage(content=\"C\", role=MessageRole.ASSISTANT),\n        ChatMessage(content=\"D\", role=MessageRole.USER),\n    ]\n    expected_prompt2 = \"<s>[INST] A\\nB [/INST] C</s><s>[INST] D [/INST]\"\n    assert prompt_style.messages_to_prompt(messages2) == expected_prompt2\n\n\ndef test_chatml_prompt_style_format():\n    prompt_style = ChatMLPromptStyle()\n    messages = [\n        ChatMessage(content=\"You are an AI assistant.\", role=MessageRole.SYSTEM),\n        ChatMessage(content=\"Hello, how are you doing?\", role=MessageRole.USER),\n    ]\n\n    expected_prompt = (\n        \"<|im_start|>system\\n\"\n        \"You are an AI assistant.<|im_end|>\\n\"\n        \"<|im_start|>user\\n\"\n        \"Hello, how are you doing?<|im_end|>\\n\"\n        \"<|im_start|>assistant\\n\"\n    )\n\n    assert prompt_style.messages_to_prompt(messages) == expected_prompt\n\n\ndef test_llama2_prompt_style_format():\n    prompt_style = Llama2PromptStyle()\n    messages = [\n        ChatMessage(content=\"You are an AI assistant.\", role=MessageRole.SYSTEM),\n        ChatMessage(content=\"Hello, how are you doing?\", role=MessageRole.USER),\n    ]\n\n    expected_prompt = (\n        \"<s> [INST] <<SYS>>\\n\"\n        \" You are an AI assistant. \\n\"\n        \"<</SYS>>\\n\"\n        \"\\n\"\n        \" Hello, how are you doing? [/INST]\"\n    )\n\n    assert prompt_style.messages_to_prompt(messages) == expected_prompt\n\n\ndef test_llama2_prompt_style_with_system_prompt():\n    prompt_style = Llama2PromptStyle()\n    messages = [\n        ChatMessage(\n            content=\"FOO BAR Custom sys prompt from messages.\", role=MessageRole.SYSTEM\n        ),\n        ChatMessage(content=\"Hello, how are you doing?\", role=MessageRole.USER),\n    ]\n\n    expected_prompt = (\n        \"<s> [INST] <<SYS>>\\n\"\n        \" FOO BAR Custom sys prompt from messages. \\n\"\n        \"<</SYS>>\\n\"\n        \"\\n\"\n        \" Hello, how are you doing? [/INST]\"\n    )\n\n    assert prompt_style.messages_to_prompt(messages) == expected_prompt\n\n\ndef test_llama3_prompt_style_format():\n    prompt_style = Llama3PromptStyle()\n    messages = [\n        ChatMessage(content=\"You are a helpful assistant\", role=MessageRole.SYSTEM),\n        ChatMessage(content=\"Hello, how are you doing?\", role=MessageRole.USER),\n    ]\n\n    expected_prompt = (\n        \"<|start_header_id|>system<|end_header_id|>\\n\\n\"\n        \"You are a helpful assistant<|eot_id|>\"\n        \"<|start_header_id|>user<|end_header_id|>\\n\\n\"\n        \"Hello, how are you doing?<|eot_id|>\"\n        \"<|start_header_id|>assistant<|end_header_id|>\\n\\n\"\n    )\n\n    assert prompt_style.messages_to_prompt(messages) == expected_prompt\n\n\ndef test_llama3_prompt_style_with_default_system():\n    prompt_style = Llama3PromptStyle()\n    messages = [\n        ChatMessage(content=\"Hello!\", role=MessageRole.USER),\n    ]\n    expected = (\n        \"<|start_header_id|>system<|end_header_id|>\\n\\n\"\n        f\"{prompt_style.DEFAULT_SYSTEM_PROMPT}<|eot_id|>\"\n        \"<|start_header_id|>user<|end_header_id|>\\n\\nHello!<|eot_id|>\"\n        \"<|start_header_id|>assistant<|end_header_id|>\\n\\n\"\n    )\n    assert prompt_style._messages_to_prompt(messages) == expected\n\n\ndef test_llama3_prompt_style_with_assistant_response():\n    prompt_style = Llama3PromptStyle()\n    messages = [\n        ChatMessage(content=\"You are a helpful assistant\", role=MessageRole.SYSTEM),\n        ChatMessage(content=\"What is the capital of France?\", role=MessageRole.USER),\n        ChatMessage(\n            content=\"The capital of France is Paris.\", role=MessageRole.ASSISTANT\n        ),\n    ]\n\n    expected_prompt = (\n        \"<|start_header_id|>system<|end_header_id|>\\n\\n\"\n        \"You are a helpful assistant<|eot_id|>\"\n        \"<|start_header_id|>user<|end_header_id|>\\n\\n\"\n        \"What is the capital of France?<|eot_id|>\"\n        \"<|start_header_id|>assistant<|end_header_id|>\\n\\n\"\n        \"The capital of France is Paris.<|eot_id|>\"\n    )\n\n    assert prompt_style.messages_to_prompt(messages) == expected_prompt\n"
  },
  {
    "path": "tests/ui/test_ui.py",
    "content": "import pytest\nfrom fastapi.testclient import TestClient\n\n\n@pytest.mark.parametrize(\n    \"test_client\", [{\"ui\": {\"enabled\": True, \"path\": \"/ui\"}}], indirect=True\n)\ndef test_ui_starts_in_the_given_endpoint(test_client: TestClient) -> None:\n    response = test_client.get(\"/ui\")\n    assert response.status_code == 200\n"
  },
  {
    "path": "tiktoken_cache/.gitignore",
    "content": "*\n!.gitignore"
  },
  {
    "path": "version.txt",
    "content": "0.6.2\n"
  }
]