[
  {
    "path": ".github/ISSUE_TEMPLATE/bug_report_template.yml",
    "content": "name: \"Bug report\"\ndescription: Report a bug\nlabels: [ \"bug\" ]\nbody:\n  - type: markdown\n    attributes:\n      value: |\n        Thanks for taking the time to fill out this bug report!\n  - type: textarea\n    id: bug-description\n    attributes:\n      label: Describe the bug\n      description: A clear and concise description of what the bug is.\n      placeholder: Bug description\n    validations:\n      required: true\n  - type: checkboxes\n    attributes:\n      label: Is there an existing issue for this?\n      description: Please search to see if an issue already exists for the issue you encountered.\n      options:\n        - label: I have searched the existing issues\n          required: true\n  - type: textarea\n    id: reproduction\n    attributes:\n      label: Reproduction\n      description: Please provide the steps necessary to reproduce your issue.\n      placeholder: Reproduction\n    validations:\n      required: true\n  - type: textarea\n    id: screenshot\n    attributes:\n      label: Screenshot\n      description: \"If possible, please include screenshot(s) so that we can understand what the issue is.\"\n  - type: textarea\n    id: logs\n    attributes:\n      label: Logs\n      description: \"Please include the full stacktrace of the errors you get in the command-line (if any).\"\n      render: shell\n    validations:\n      required: true\n  - type: textarea\n    id: system-info\n    attributes:\n      label: System Info\n      description: \"Please share your operating system and GPU type (NVIDIA/AMD/Intel/Apple). If you are using a Google Colab notebook, mention that instead.\"\n      render: shell\n      placeholder: \n    validations:\n      required: true\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/feature_request.md",
    "content": "---\nname: Feature request\nabout: Suggest an improvement or new feature for the web UI\ntitle: ''\nlabels: 'enhancement'\nassignees: ''\n\n---\n\n**Description**\n\nA clear and concise description of what you want to be implemented.\n\n**Additional Context**\n\nIf applicable, please provide any extra information, external links, or screenshots that could be useful.\n"
  },
  {
    "path": ".github/dependabot.yml",
    "content": "# To get started with Dependabot version updates, you'll need to specify which\n# package ecosystems to update and where the package manifests are located.\n# Please see the documentation for all configuration options:\n# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates\n\nversion: 2\nupdates:\n  - package-ecosystem: \"pip\"\n    directories:\n      - \"/requirements/full/\"\n      - \"/requirements/portable/\"\n    target-branch: \"dev\"\n    schedule:\n      interval: \"weekly\"\n"
  },
  {
    "path": ".github/pull_request_template.md",
    "content": "## Checklist:\n\n- [ ] I have read the [Contributing guidelines](https://github.com/oobabooga/text-generation-webui/wiki/Contributing-guidelines).\n"
  },
  {
    "path": ".github/workflows/build-everything-tgw.yml",
    "content": "name: Build Everything TGW\n\non:\n  workflow_dispatch:\n    inputs:\n      version:\n        description: 'Version tag of text-generation-webui to build: v3.0'\n        default: 'v3.0'\n        required: true\n        type: string\n\npermissions:\n  contents: write\n\njobs:\n  build_release_cuda_windows:\n    name: CUDA Windows\n    uses: ./.github/workflows/build-portable-release-cuda.yml\n    with:\n      version: ${{ inputs.version }}\n      config: 'os:windows-2022'\n\n  build_release_cuda_linux:\n    name: CUDA Linux\n    uses: ./.github/workflows/build-portable-release-cuda.yml\n    with:\n      version: ${{ inputs.version }}\n      config: 'os:ubuntu-22.04'\n\n  build_release_vulkan_windows:\n    name: Vulkan Windows\n    uses: ./.github/workflows/build-portable-release-vulkan.yml\n    with:\n      version: ${{ inputs.version }}\n      config: 'os:windows-2022'\n\n  build_release_vulkan_linux:\n    name: Vulkan Linux\n    uses: ./.github/workflows/build-portable-release-vulkan.yml\n    with:\n      version: ${{ inputs.version }}\n      config: 'os:ubuntu-22.04'\n\n  build_release_rocm_linux:\n    name: ROCm Linux\n    uses: ./.github/workflows/build-portable-release-rocm.yml\n    with:\n      version: ${{ inputs.version }}\n      config: 'os:ubuntu-22.04'\n\n  build_release_cpu_windows:\n    name: CPU Windows\n    uses: ./.github/workflows/build-portable-release.yml\n    with:\n      version: ${{ inputs.version }}\n      config: 'os:windows-2022'\n\n  build_release_cpu_linux:\n    name: CPU Linux\n    uses: ./.github/workflows/build-portable-release.yml\n    with:\n      version: ${{ inputs.version }}\n      config: 'os:ubuntu-22.04'\n\n  build_release_macos:\n    name: macOS\n    uses: ./.github/workflows/build-portable-release.yml\n    with:\n      version: ${{ inputs.version }}\n      config: 'os:macos-15-intel,macos-14'\n"
  },
  {
    "path": ".github/workflows/build-portable-release-cuda.yml",
    "content": "name: Build CUDA\n\non:\n  workflow_dispatch:\n    inputs:\n      version:\n        description: 'Version tag of text-generation-webui to build: v3.0'\n        default: 'v3.0'\n        required: true\n        type: string\n      config:\n        description: 'Override configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'\n        default: 'Default'\n        required: false\n        type: string\n      exclude:\n        description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'\n        default: 'None'\n        required: false\n        type: string\n  workflow_call:\n    inputs:\n      version:\n        description: 'Version tag of text-generation-webui to build: v3.0'\n        default: 'v3.0'\n        required: true\n        type: string\n      config:\n        description: 'Configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'\n        default: 'Default'\n        required: false\n        type: string\n      exclude:\n        description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'\n        default: 'None'\n        required: false\n        type: string\n\npermissions:\n  contents: write\n\njobs:\n  define_matrix:\n    name: Define Build Matrix\n    runs-on: ubuntu-latest\n    outputs:\n      matrix: ${{ steps.set-matrix.outputs.matrix }}\n    defaults:\n      run:\n        shell: pwsh\n    env:\n      CONFIGIN: ${{ inputs.config }}\n      EXCLUDEIN: ${{ inputs.exclude }}\n\n    steps:\n      - name: Define Job Output\n        id: set-matrix\n        run: |\n          $matrix = @{\n              'os' = @('ubuntu-22.04', 'windows-2022')\n              'pyver' = @(\"3.13\")\n              'cuda' = @(\"12.4\", \"13.1\")\n          }\n\n          if ($env:CONFIGIN -ne 'Default') {$env:CONFIGIN.split(';').foreach({$matrix[$_.split(':')[0]] = $_.split(':')[1].split(',')})}\n\n          if ($env:EXCLUDEIN -ne 'None') {\n              $exclusions = @()\n              $exclusions += $env:EXCLUDEIN.split(';').replace(':','=').replace(',',\"`n\") | ConvertFrom-StringData\n              $matrix['exclude'] = $exclusions\n          }\n\n          $matrixOut = ConvertTo-Json $matrix -Compress\n          Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT\n\n  build_wheels:\n    name: ${{ matrix.os }} ${{ matrix.pyver }} CUDA ${{ matrix.cuda }}\n    needs: define_matrix\n    runs-on: ${{ matrix.os }}\n    strategy:\n      matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }}\n    defaults:\n      run:\n        shell: pwsh\n    env:\n      PCKGVER: ${{ inputs.version }}\n\n    steps:\n      - uses: actions/checkout@v6\n        with:\n          repository: 'oobabooga/text-generation-webui'\n          ref: ${{ inputs.version }}\n          submodules: 'recursive'\n\n      - uses: actions/setup-python@v6\n        with:\n          python-version: ${{ matrix.pyver }}\n\n      - name: Build Package\n        shell: bash\n        run: |\n            VERSION_CLEAN=\"${{ inputs.version }}\"\n            VERSION_CLEAN=\"${VERSION_CLEAN#v}\"\n            cd ..\n            cp -r text-generation-webui \"text-generation-webui-${VERSION_CLEAN}\"\n            cd \"text-generation-webui-${VERSION_CLEAN}\"\n\n            # Remove extensions that need additional requirements\n            allowed=(\"character_bias\" \"gallery\" \"openai\" \"sd_api_pictures\")\n            find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E \"$(printf '%s|' \"${allowed[@]}\" | sed 's/|$//')\" | xargs rm -rf\n\n            # Define common variables\n            CUDA_VERSION=\"${{ matrix.cuda }}\"\n            VERSION=\"${{ inputs.version }}\"\n\n            # 1. Set platform-specific variables\n            if [[ \"$RUNNER_OS\" == \"Windows\" ]]; then\n                PLATFORM=\"windows\"\n                PYTHON_URL=\"https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-pc-windows-msvc-install_only.tar.gz\"\n                PIP_PATH=\"portable_env/python.exe -m pip\"\n                PACKAGES_PATH=\"portable_env/Lib/site-packages\"\n                rm start_linux.sh start_macos.sh\n            else\n                PLATFORM=\"linux\"\n                PYTHON_URL=\"https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-unknown-linux-gnu-install_only.tar.gz\"\n                PIP_PATH=\"portable_env/bin/python -m pip\"\n                PACKAGES_PATH=\"portable_env/lib/python3.13/site-packages\"\n                rm start_macos.sh start_windows.bat\n            fi\n\n            # 2. Download and extract Python\n            cd ..\n            echo \"Downloading Python for $PLATFORM...\"\n            curl -L -o python-build.tar.gz \"$PYTHON_URL\"\n            tar -xzf python-build.tar.gz\n            mv python \"text-generation-webui-${VERSION_CLEAN}/portable_env\"\n\n            # 3. Prepare requirements file based on CUDA version\n            cd \"text-generation-webui-${VERSION_CLEAN}\"\n            if [[ \"$CUDA_VERSION\" == \"13.1\" ]]; then\n                REQ_FILE=\"requirements/portable/requirements_cuda131.txt\"\n            else\n                REQ_FILE=\"requirements/portable/requirements.txt\"\n            fi\n\n            # 4. Install packages\n            echo \"Installing Python packages from $REQ_FILE...\"\n            $PIP_PATH install --target=\"./$PACKAGES_PATH\" -r \"$REQ_FILE\"\n\n            # 5. Clean up\n            rm -rf .git cmd* update_wizard* Colab-TextGen-GPU.ipynb docker setup.cfg .github .gitignore requirements/ one_click.py\n\n            # 6. Create archive\n            cd ..\n            if [[ \"$RUNNER_OS\" == \"Windows\" ]]; then\n                ARCHIVE_NAME=\"textgen-portable-${VERSION_CLEAN}-${PLATFORM}-cuda${CUDA_VERSION}.zip\"\n                echo \"Creating archive: $ARCHIVE_NAME\"\n                powershell -Command \"Compress-Archive -Path text-generation-webui-${VERSION_CLEAN} -DestinationPath $ARCHIVE_NAME\"\n            else\n                ARCHIVE_NAME=\"textgen-portable-${VERSION_CLEAN}-${PLATFORM}-cuda${CUDA_VERSION}.tar.gz\"\n                echo \"Creating archive: $ARCHIVE_NAME\"\n                tar czf \"$ARCHIVE_NAME\" \"text-generation-webui-${VERSION_CLEAN}\"\n            fi\n\n      - name: Upload files to a GitHub release\n        id: upload-release\n        uses: svenstaro/upload-release-action@2.7.0\n        continue-on-error: true\n        with:\n          repo_token: ${{ secrets.GITHUB_TOKEN }}\n          file: ../textgen-portable-*\n          tag: ${{ inputs.version }}\n          file_glob: true\n          make_latest: false\n          overwrite: true\n"
  },
  {
    "path": ".github/workflows/build-portable-release-rocm.yml",
    "content": "name: Build ROCm\n\non:\n  workflow_dispatch:\n    inputs:\n      version:\n        description: 'Version tag of text-generation-webui to build: v3.0'\n        default: 'v3.0'\n        required: true\n        type: string\n      config:\n        description: 'Override configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'\n        default: 'Default'\n        required: false\n        type: string\n      exclude:\n        description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'\n        default: 'None'\n        required: false\n        type: string\n  workflow_call:\n    inputs:\n      version:\n        description: 'Version tag of text-generation-webui to build: v3.0'\n        default: 'v3.0'\n        required: true\n        type: string\n      config:\n        description: 'Configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'\n        default: 'Default'\n        required: false\n        type: string\n      exclude:\n        description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'\n        default: 'None'\n        required: false\n        type: string\n\npermissions:\n  contents: write\n\njobs:\n  define_matrix:\n    name: Define Build Matrix\n    runs-on: ubuntu-latest\n    outputs:\n      matrix: ${{ steps.set-matrix.outputs.matrix }}\n    defaults:\n      run:\n        shell: pwsh\n    env:\n      CONFIGIN: ${{ inputs.config }}\n      EXCLUDEIN: ${{ inputs.exclude }}\n\n    steps:\n      - name: Define Job Output\n        id: set-matrix\n        run: |\n          $matrix = @{\n              'os' = @('ubuntu-22.04', 'windows-2022')\n              'pyver' = @(\"3.13\")\n          }\n\n          if ($env:CONFIGIN -ne 'Default') {$env:CONFIGIN.split(';').foreach({$matrix[$_.split(':')[0]] = $_.split(':')[1].split(',')})}\n\n          if ($env:EXCLUDEIN -ne 'None') {\n              $exclusions = @()\n              $exclusions += $env:EXCLUDEIN.split(';').replace(':','=').replace(',',\"`n\") | ConvertFrom-StringData\n              $matrix['exclude'] = $exclusions\n          }\n\n          $matrixOut = ConvertTo-Json $matrix -Compress\n          Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT\n\n  build_wheels:\n    name: ${{ matrix.os }} ${{ matrix.pyver }}\n    needs: define_matrix\n    runs-on: ${{ matrix.os }}\n    strategy:\n      matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }}\n    defaults:\n      run:\n        shell: pwsh\n    env:\n      PCKGVER: ${{ inputs.version }}\n\n    steps:\n      - uses: actions/checkout@v6\n        with:\n          repository: 'oobabooga/text-generation-webui'\n          ref: ${{ inputs.version }}\n          submodules: 'recursive'\n\n      - uses: actions/setup-python@v6\n        with:\n          python-version: ${{ matrix.pyver }}\n\n      - name: Build Package\n        shell: bash\n        run: |\n            VERSION_CLEAN=\"${{ inputs.version }}\"\n            VERSION_CLEAN=\"${VERSION_CLEAN#v}\"\n            cd ..\n            cp -r text-generation-webui \"text-generation-webui-${VERSION_CLEAN}\"\n            cd \"text-generation-webui-${VERSION_CLEAN}\"\n\n            # Remove extensions that need additional requirements\n            allowed=(\"character_bias\" \"gallery\" \"openai\" \"sd_api_pictures\")\n            find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E \"$(printf '%s|' \"${allowed[@]}\" | sed 's/|$//')\" | xargs rm -rf\n\n            # Define common variables\n            VERSION=\"${{ inputs.version }}\"\n\n            # 1. Set platform-specific variables\n            if [[ \"$RUNNER_OS\" == \"Windows\" ]]; then\n                PLATFORM=\"windows\"\n                PYTHON_URL=\"https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-pc-windows-msvc-install_only.tar.gz\"\n                PIP_PATH=\"portable_env/python.exe -m pip\"\n                PACKAGES_PATH=\"portable_env/Lib/site-packages\"\n                rm start_linux.sh start_macos.sh\n            else\n                PLATFORM=\"linux\"\n                PYTHON_URL=\"https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-unknown-linux-gnu-install_only.tar.gz\"\n                PIP_PATH=\"portable_env/bin/python -m pip\"\n                PACKAGES_PATH=\"portable_env/lib/python3.13/site-packages\"\n                rm start_macos.sh start_windows.bat\n            fi\n\n            # 2. Download and extract Python\n            cd ..\n            echo \"Downloading Python for $PLATFORM...\"\n            curl -L -o python-build.tar.gz \"$PYTHON_URL\"\n            tar -xzf python-build.tar.gz\n            mv python \"text-generation-webui-${VERSION_CLEAN}/portable_env\"\n\n            # 3. Prepare requirements file\n            REQ_FILE=\"requirements/portable/requirements_amd.txt\"\n\n            cd \"text-generation-webui-${VERSION_CLEAN}\"\n\n            # 4. Install packages\n            echo \"Installing Python packages from $REQ_FILE...\"\n            $PIP_PATH install --target=\"./$PACKAGES_PATH\" -r \"$REQ_FILE\"\n\n            # 5. Clean up\n            rm -rf .git cmd* update_wizard* Colab-TextGen-GPU.ipynb docker setup.cfg .github .gitignore requirements/ one_click.py\n\n            # 6. Create archive\n            cd ..\n            if [[ \"$RUNNER_OS\" == \"Windows\" ]]; then\n                ARCHIVE_NAME=\"textgen-portable-${VERSION_CLEAN}-${PLATFORM}-rocm7.2.zip\"\n                echo \"Creating archive: $ARCHIVE_NAME\"\n                powershell -Command \"Compress-Archive -Path text-generation-webui-${VERSION_CLEAN} -DestinationPath $ARCHIVE_NAME\"\n            else\n                ARCHIVE_NAME=\"textgen-portable-${VERSION_CLEAN}-${PLATFORM}-rocm7.2.tar.gz\"\n                echo \"Creating archive: $ARCHIVE_NAME\"\n                tar czf \"$ARCHIVE_NAME\" \"text-generation-webui-${VERSION_CLEAN}\"\n            fi\n\n      - name: Upload files to a GitHub release\n        id: upload-release\n        uses: svenstaro/upload-release-action@2.7.0\n        continue-on-error: true\n        with:\n          repo_token: ${{ secrets.GITHUB_TOKEN }}\n          file: ../textgen-portable-*\n          tag: ${{ inputs.version }}\n          file_glob: true\n          make_latest: false\n          overwrite: true\n"
  },
  {
    "path": ".github/workflows/build-portable-release-vulkan.yml",
    "content": "name: Build Vulkan\n\non:\n  workflow_dispatch:\n    inputs:\n      version:\n        description: 'Version tag of text-generation-webui to build: v3.0'\n        default: 'v3.0'\n        required: true\n        type: string\n      config:\n        description: 'Override configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'\n        default: 'Default'\n        required: false\n        type: string\n      exclude:\n        description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'\n        default: 'None'\n        required: false\n        type: string\n  workflow_call:\n    inputs:\n      version:\n        description: 'Version tag of text-generation-webui to build: v3.0'\n        default: 'v3.0'\n        required: true\n        type: string\n      config:\n        description: 'Configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'\n        default: 'Default'\n        required: false\n        type: string\n      exclude:\n        description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'\n        default: 'None'\n        required: false\n        type: string\n\npermissions:\n  contents: write\n\njobs:\n  define_matrix:\n    name: Define Build Matrix\n    runs-on: ubuntu-latest\n    outputs:\n      matrix: ${{ steps.set-matrix.outputs.matrix }}\n    defaults:\n      run:\n        shell: pwsh\n    env:\n      CONFIGIN: ${{ inputs.config }}\n      EXCLUDEIN: ${{ inputs.exclude }}\n\n    steps:\n      - name: Define Job Output\n        id: set-matrix\n        run: |\n          $matrix = @{\n              'os' = @('ubuntu-22.04', 'windows-2022')\n              'pyver' = @(\"3.13\")\n          }\n\n          if ($env:CONFIGIN -ne 'Default') {$env:CONFIGIN.split(';').foreach({$matrix[$_.split(':')[0]] = $_.split(':')[1].split(',')})}\n\n          if ($env:EXCLUDEIN -ne 'None') {\n              $exclusions = @()\n              $exclusions += $env:EXCLUDEIN.split(';').replace(':','=').replace(',',\"`n\") | ConvertFrom-StringData\n              $matrix['exclude'] = $exclusions\n          }\n\n          $matrixOut = ConvertTo-Json $matrix -Compress\n          Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT\n\n  build_wheels:\n    name: ${{ matrix.os }} ${{ matrix.pyver }}\n    needs: define_matrix\n    runs-on: ${{ matrix.os }}\n    strategy:\n      matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }}\n    defaults:\n      run:\n        shell: pwsh\n    env:\n      PCKGVER: ${{ inputs.version }}\n\n    steps:\n      - uses: actions/checkout@v6\n        with:\n          repository: 'oobabooga/text-generation-webui'\n          ref: ${{ inputs.version }}\n          submodules: 'recursive'\n\n      - uses: actions/setup-python@v6\n        with:\n          python-version: ${{ matrix.pyver }}\n\n      - name: Build Package\n        shell: bash\n        run: |\n            VERSION_CLEAN=\"${{ inputs.version }}\"\n            VERSION_CLEAN=\"${VERSION_CLEAN#v}\"\n            cd ..\n            cp -r text-generation-webui \"text-generation-webui-${VERSION_CLEAN}\"\n            cd \"text-generation-webui-${VERSION_CLEAN}\"\n\n            # Remove extensions that need additional requirements\n            allowed=(\"character_bias\" \"gallery\" \"openai\" \"sd_api_pictures\")\n            find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E \"$(printf '%s|' \"${allowed[@]}\" | sed 's/|$//')\" | xargs rm -rf\n\n            # Define common variables\n            VERSION=\"${{ inputs.version }}\"\n\n            # 1. Set platform-specific variables\n            if [[ \"$RUNNER_OS\" == \"Windows\" ]]; then\n                PLATFORM=\"windows\"\n                PYTHON_URL=\"https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-pc-windows-msvc-install_only.tar.gz\"\n                PIP_PATH=\"portable_env/python.exe -m pip\"\n                PACKAGES_PATH=\"portable_env/Lib/site-packages\"\n                rm start_linux.sh start_macos.sh\n            else\n                PLATFORM=\"linux\"\n                PYTHON_URL=\"https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-unknown-linux-gnu-install_only.tar.gz\"\n                PIP_PATH=\"portable_env/bin/python -m pip\"\n                PACKAGES_PATH=\"portable_env/lib/python3.13/site-packages\"\n                rm start_macos.sh start_windows.bat\n            fi\n\n            # 2. Download and extract Python\n            cd ..\n            echo \"Downloading Python for $PLATFORM...\"\n            curl -L -o python-build.tar.gz \"$PYTHON_URL\"\n            tar -xzf python-build.tar.gz\n            mv python \"text-generation-webui-${VERSION_CLEAN}/portable_env\"\n\n            # 3. Prepare requirements file\n            REQ_FILE=\"requirements/portable/requirements_vulkan.txt\"\n\n            cd \"text-generation-webui-${VERSION_CLEAN}\"\n\n            # 4. Install packages\n            echo \"Installing Python packages from $REQ_FILE...\"\n            $PIP_PATH install --target=\"./$PACKAGES_PATH\" -r \"$REQ_FILE\"\n\n            # 5. Clean up\n            rm -rf .git cmd* update_wizard* Colab-TextGen-GPU.ipynb docker setup.cfg .github .gitignore requirements/ one_click.py\n\n            # 6. Create archive\n            cd ..\n            if [[ \"$RUNNER_OS\" == \"Windows\" ]]; then\n                ARCHIVE_NAME=\"textgen-portable-${VERSION_CLEAN}-${PLATFORM}-vulkan.zip\"\n                echo \"Creating archive: $ARCHIVE_NAME\"\n                powershell -Command \"Compress-Archive -Path text-generation-webui-${VERSION_CLEAN} -DestinationPath $ARCHIVE_NAME\"\n            else\n                ARCHIVE_NAME=\"textgen-portable-${VERSION_CLEAN}-${PLATFORM}-vulkan.tar.gz\"\n                echo \"Creating archive: $ARCHIVE_NAME\"\n                tar czf \"$ARCHIVE_NAME\" \"text-generation-webui-${VERSION_CLEAN}\"\n            fi\n\n      - name: Upload files to a GitHub release\n        id: upload-release\n        uses: svenstaro/upload-release-action@2.7.0\n        continue-on-error: true\n        with:\n          repo_token: ${{ secrets.GITHUB_TOKEN }}\n          file: ../textgen-portable-*\n          tag: ${{ inputs.version }}\n          file_glob: true\n          make_latest: false\n          overwrite: true\n"
  },
  {
    "path": ".github/workflows/build-portable-release.yml",
    "content": "name: Build CPU and macOS\n\non:\n  workflow_dispatch:\n    inputs:\n      version:\n        description: 'Version tag of text-generation-webui to build: v3.0'\n        default: 'v3.0'\n        required: true\n        type: string\n      config:\n        description: 'Override configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'\n        default: 'Default'\n        required: false\n        type: string\n      exclude:\n        description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'\n        default: 'None'\n        required: false\n        type: string\n  workflow_call:\n    inputs:\n      version:\n        description: 'Version tag of text-generation-webui to build: v3.0'\n        default: 'v3.0'\n        required: true\n        type: string\n      config:\n        description: 'Configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'\n        default: 'Default'\n        required: false\n        type: string\n      exclude:\n        description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'\n        default: 'None'\n        required: false\n        type: string\n\npermissions:\n  contents: write\n\njobs:\n  define_matrix:\n    name: Define Build Matrix\n    runs-on: ubuntu-latest\n    outputs:\n      matrix: ${{ steps.set-matrix.outputs.matrix }}\n    defaults:\n      run:\n        shell: pwsh\n    env:\n      CONFIGIN: ${{ inputs.config }}\n      EXCLUDEIN: ${{ inputs.exclude }}\n\n    steps:\n      - name: Define Job Output\n        id: set-matrix\n        run: |\n          $matrix = @{\n              'os' = @('ubuntu-22.04', 'windows-2022', 'macos-14')\n              'pyver' = @(\"3.13\")\n          }\n\n          if ($env:CONFIGIN -ne 'Default') {$env:CONFIGIN.split(';').foreach({$matrix[$_.split(':')[0]] = $_.split(':')[1].split(',')})}\n\n          if ($env:EXCLUDEIN -ne 'None') {\n              $exclusions = @()\n              $exclusions += $env:EXCLUDEIN.split(';').replace(':','=').replace(',',\"`n\") | ConvertFrom-StringData\n              $matrix['exclude'] = $exclusions\n          }\n\n          $matrixOut = ConvertTo-Json $matrix -Compress\n          Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT\n\n  build_wheels:\n    name: ${{ matrix.os }} ${{ matrix.pyver }}\n    needs: define_matrix\n    runs-on: ${{ matrix.os }}\n    strategy:\n      matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }}\n    defaults:\n      run:\n        shell: pwsh\n    env:\n      PCKGVER: ${{ inputs.version }}\n\n    steps:\n      - uses: actions/checkout@v6\n        with:\n          repository: 'oobabooga/text-generation-webui'\n          ref: ${{ inputs.version }}\n          submodules: 'recursive'\n\n      - uses: actions/setup-python@v6\n        with:\n          python-version: ${{ matrix.pyver }}\n\n      - name: Build Package\n        shell: bash\n        run: |\n            VERSION_CLEAN=\"${{ inputs.version }}\"\n            VERSION_CLEAN=\"${VERSION_CLEAN#v}\"\n            cd ..\n            cp -r text-generation-webui \"text-generation-webui-${VERSION_CLEAN}\"\n            cd \"text-generation-webui-${VERSION_CLEAN}\"\n\n            # Remove extensions that need additional requirements\n            allowed=(\"character_bias\" \"gallery\" \"openai\" \"sd_api_pictures\")\n            find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E \"$(printf '%s|' \"${allowed[@]}\" | sed 's/|$//')\" | xargs rm -rf\n\n            # Define common variables\n            VERSION=\"${{ inputs.version }}\"\n            OS_TYPE=\"${{ matrix.os }}\"\n\n            # 1. Set platform-specific variables\n            if [[ \"$RUNNER_OS\" == \"Windows\" ]]; then\n                PLATFORM=\"windows-cpu\"\n                PYTHON_URL=\"https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-pc-windows-msvc-install_only.tar.gz\"\n                PIP_PATH=\"portable_env/python.exe -m pip\"\n                PACKAGES_PATH=\"portable_env/Lib/site-packages\"\n                rm start_linux.sh start_macos.sh\n            elif [[ \"$RUNNER_OS\" == \"macOS\" ]]; then\n                if [[ \"$OS_TYPE\" == \"macos-15-intel\" ]]; then\n                    PLATFORM=\"macos-x86_64\"\n                    PYTHON_URL=\"https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-apple-darwin-install_only.tar.gz\"\n                    REQ_TYPE=\"apple_intel\"\n                else\n                    PLATFORM=\"macos-arm64\"\n                    PYTHON_URL=\"https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-aarch64-apple-darwin-install_only.tar.gz\"\n                    REQ_TYPE=\"apple_silicon\"\n                fi\n                PIP_PATH=\"portable_env/bin/python -m pip\"\n                PACKAGES_PATH=\"portable_env/lib/python3.13/site-packages\"\n                rm start_linux.sh start_windows.bat\n            else\n                # Linux case\n                PLATFORM=\"linux-cpu\"\n                PYTHON_URL=\"https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-unknown-linux-gnu-install_only.tar.gz\"\n                PIP_PATH=\"portable_env/bin/python -m pip\"\n                PACKAGES_PATH=\"portable_env/lib/python3.13/site-packages\"\n                rm start_macos.sh start_windows.bat\n            fi\n\n            # 2. Download and extract Python\n            echo \"Downloading Python for $PLATFORM...\"\n            cd ..\n            curl -L -o python-build.tar.gz \"$PYTHON_URL\"\n            tar -xzf python-build.tar.gz\n            mv python \"text-generation-webui-${VERSION_CLEAN}/portable_env\"\n\n            # 3. Prepare requirements file based on platform\n            cd \"text-generation-webui-${VERSION_CLEAN}\"\n\n            # Select requirements file based on platform\n            if [[ \"$RUNNER_OS\" == \"macOS\" ]]; then\n                if [[ \"$OS_TYPE\" == \"macos-15-intel\" ]]; then\n                    REQ_FILE=\"requirements/portable/requirements_apple_intel.txt\"\n                else\n                    REQ_FILE=\"requirements/portable/requirements_apple_silicon.txt\"\n                fi\n            else\n                REQ_FILE=\"requirements/portable/requirements_cpu_only.txt\"\n            fi\n\n            echo \"Using requirements file: $REQ_FILE\"\n\n            # 4. Install packages\n            echo \"Installing Python packages from $REQ_FILE...\"\n            $PIP_PATH install --target=\"./$PACKAGES_PATH\" -r \"$REQ_FILE\"\n\n            # 5. Clean up\n            rm -rf .git cmd* update_wizard* Colab-TextGen-GPU.ipynb docker setup.cfg .github .gitignore requirements/ one_click.py\n\n            # 6. Create archive\n            cd ..\n            if [[ \"$RUNNER_OS\" == \"Windows\" ]]; then\n                ARCHIVE_NAME=\"textgen-portable-${VERSION_CLEAN}-${PLATFORM}.zip\"\n                echo \"Creating archive: $ARCHIVE_NAME\"\n                powershell -Command \"Compress-Archive -Path text-generation-webui-${VERSION_CLEAN} -DestinationPath $ARCHIVE_NAME\"\n            else\n                ARCHIVE_NAME=\"textgen-portable-${VERSION_CLEAN}-${PLATFORM}.tar.gz\"\n                echo \"Creating archive: $ARCHIVE_NAME\"\n                tar czf \"$ARCHIVE_NAME\" \"text-generation-webui-${VERSION_CLEAN}\"\n            fi\n\n      - name: Upload files to a GitHub release\n        id: upload-release\n        uses: svenstaro/upload-release-action@2.7.0\n        continue-on-error: true\n        with:\n          repo_token: ${{ secrets.GITHUB_TOKEN }}\n          file: ../textgen-portable-*\n          tag: ${{ inputs.version }}\n          file_glob: true\n          make_latest: false\n          overwrite: true\n"
  },
  {
    "path": ".gitignore",
    "content": "/css\n/extensions\n/installer_files\n/repositories\n/user_data\n\n.chroma\n.DS_Store\n.eslintrc.js\n.idea\n.installer_state.json\n.venv\nvenv\n.envrc\n.direnv\n.vs\n.vscode\n*.bak\n*.ipynb\n*.log\n*pycache*\ncert.pem\nkey.pem\npackage.json\npackage-lock.json\nThumbs.db\nwandb\n\n# ignore user docker config and top level links to docker files\n/docker-compose.yaml\n/docker-compose.yml\n/Dockerfile\n.env\n"
  },
  {
    "path": "LICENSE",
    "content": "                    GNU AFFERO GENERAL PUBLIC LICENSE\n                       Version 3, 19 November 2007\n\n Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>\n Everyone is permitted to copy and distribute verbatim copies\n of this license document, but changing it is not allowed.\n\n                            Preamble\n\n  The GNU Affero General Public License is a free, copyleft license for\nsoftware and other kinds of works, specifically designed to ensure\ncooperation with the community in the case of network server software.\n\n  The licenses for most software and other practical works are designed\nto take away your freedom to share and change the works.  By contrast,\nour General Public Licenses are intended to guarantee your freedom to\nshare and change all versions of a program--to make sure it remains free\nsoftware for all its users.\n\n  When we speak of free software, we are referring to freedom, not\nprice.  Our General Public Licenses are designed to make sure that you\nhave the freedom to distribute copies of free software (and charge for\nthem if you wish), that you receive source code or can get it if you\nwant it, that you can change the software or use pieces of it in new\nfree programs, and that you know you can do these things.\n\n  Developers that use our General Public Licenses protect your rights\nwith two steps: (1) assert copyright on the software, and (2) offer\nyou this License which gives you legal permission to copy, distribute\nand/or modify the software.\n\n  A secondary benefit of defending all users' freedom is that\nimprovements made in alternate versions of the program, if they\nreceive widespread use, become available for other developers to\nincorporate.  Many developers of free software are heartened and\nencouraged by the resulting cooperation.  However, in the case of\nsoftware used on network servers, this result may fail to come about.\nThe GNU General Public License permits making a modified version and\nletting the public access it on a server without ever releasing its\nsource code to the public.\n\n  The GNU Affero General Public License is designed specifically to\nensure that, in such cases, the modified source code becomes available\nto the community.  It requires the operator of a network server to\nprovide the source code of the modified version running there to the\nusers of that server.  Therefore, public use of a modified version, on\na publicly accessible server, gives the public access to the source\ncode of the modified version.\n\n  An older license, called the Affero General Public License and\npublished by Affero, was designed to accomplish similar goals.  This is\na different license, not a version of the Affero GPL, but Affero has\nreleased a new version of the Affero GPL which permits relicensing under\nthis license.\n\n  The precise terms and conditions for copying, distribution and\nmodification follow.\n\n                       TERMS AND CONDITIONS\n\n  0. Definitions.\n\n  \"This License\" refers to version 3 of the GNU Affero General Public License.\n\n  \"Copyright\" also means copyright-like laws that apply to other kinds of\nworks, such as semiconductor masks.\n\n  \"The Program\" refers to any copyrightable work licensed under this\nLicense.  Each licensee is addressed as \"you\".  \"Licensees\" and\n\"recipients\" may be individuals or organizations.\n\n  To \"modify\" a work means to copy from or adapt all or part of the work\nin a fashion requiring copyright permission, other than the making of an\nexact copy.  The resulting work is called a \"modified version\" of the\nearlier work or a work \"based on\" the earlier work.\n\n  A \"covered work\" means either the unmodified Program or a work based\non the Program.\n\n  To \"propagate\" a work means to do anything with it that, without\npermission, would make you directly or secondarily liable for\ninfringement under applicable copyright law, except executing it on a\ncomputer or modifying a private copy.  Propagation includes copying,\ndistribution (with or without modification), making available to the\npublic, and in some countries other activities as well.\n\n  To \"convey\" a work means any kind of propagation that enables other\nparties to make or receive copies.  Mere interaction with a user through\na computer network, with no transfer of a copy, is not conveying.\n\n  An interactive user interface displays \"Appropriate Legal Notices\"\nto the extent that it includes a convenient and prominently visible\nfeature that (1) displays an appropriate copyright notice, and (2)\ntells the user that there is no warranty for the work (except to the\nextent that warranties are provided), that licensees may convey the\nwork under this License, and how to view a copy of this License.  If\nthe interface presents a list of user commands or options, such as a\nmenu, a prominent item in the list meets this criterion.\n\n  1. Source Code.\n\n  The \"source code\" for a work means the preferred form of the work\nfor making modifications to it.  \"Object code\" means any non-source\nform of a work.\n\n  A \"Standard Interface\" means an interface that either is an official\nstandard defined by a recognized standards body, or, in the case of\ninterfaces specified for a particular programming language, one that\nis widely used among developers working in that language.\n\n  The \"System Libraries\" of an executable work include anything, other\nthan the work as a whole, that (a) is included in the normal form of\npackaging a Major Component, but which is not part of that Major\nComponent, and (b) serves only to enable use of the work with that\nMajor Component, or to implement a Standard Interface for which an\nimplementation is available to the public in source code form.  A\n\"Major Component\", in this context, means a major essential component\n(kernel, window system, and so on) of the specific operating system\n(if any) on which the executable work runs, or a compiler used to\nproduce the work, or an object code interpreter used to run it.\n\n  The \"Corresponding Source\" for a work in object code form means all\nthe source code needed to generate, install, and (for an executable\nwork) run the object code and to modify the work, including scripts to\ncontrol those activities.  However, it does not include the work's\nSystem Libraries, or general-purpose tools or generally available free\nprograms which are used unmodified in performing those activities but\nwhich are not part of the work.  For example, Corresponding Source\nincludes interface definition files associated with source files for\nthe work, and the source code for shared libraries and dynamically\nlinked subprograms that the work is specifically designed to require,\nsuch as by intimate data communication or control flow between those\nsubprograms and other parts of the work.\n\n  The Corresponding Source need not include anything that users\ncan regenerate automatically from other parts of the Corresponding\nSource.\n\n  The Corresponding Source for a work in source code form is that\nsame work.\n\n  2. Basic Permissions.\n\n  All rights granted under this License are granted for the term of\ncopyright on the Program, and are irrevocable provided the stated\nconditions are met.  This License explicitly affirms your unlimited\npermission to run the unmodified Program.  The output from running a\ncovered work is covered by this License only if the output, given its\ncontent, constitutes a covered work.  This License acknowledges your\nrights of fair use or other equivalent, as provided by copyright law.\n\n  You may make, run and propagate covered works that you do not\nconvey, without conditions so long as your license otherwise remains\nin force.  You may convey covered works to others for the sole purpose\nof having them make modifications exclusively for you, or provide you\nwith facilities for running those works, provided that you comply with\nthe terms of this License in conveying all material for which you do\nnot control copyright.  Those thus making or running the covered works\nfor you must do so exclusively on your behalf, under your direction\nand control, on terms that prohibit them from making any copies of\nyour copyrighted material outside their relationship with you.\n\n  Conveying under any other circumstances is permitted solely under\nthe conditions stated below.  Sublicensing is not allowed; section 10\nmakes it unnecessary.\n\n  3. Protecting Users' Legal Rights From Anti-Circumvention Law.\n\n  No covered work shall be deemed part of an effective technological\nmeasure under any applicable law fulfilling obligations under article\n11 of the WIPO copyright treaty adopted on 20 December 1996, or\nsimilar laws prohibiting or restricting circumvention of such\nmeasures.\n\n  When you convey a covered work, you waive any legal power to forbid\ncircumvention of technological measures to the extent such circumvention\nis effected by exercising rights under this License with respect to\nthe covered work, and you disclaim any intention to limit operation or\nmodification of the work as a means of enforcing, against the work's\nusers, your or third parties' legal rights to forbid circumvention of\ntechnological measures.\n\n  4. Conveying Verbatim Copies.\n\n  You may convey verbatim copies of the Program's source code as you\nreceive it, in any medium, provided that you conspicuously and\nappropriately publish on each copy an appropriate copyright notice;\nkeep intact all notices stating that this License and any\nnon-permissive terms added in accord with section 7 apply to the code;\nkeep intact all notices of the absence of any warranty; and give all\nrecipients a copy of this License along with the Program.\n\n  You may charge any price or no price for each copy that you convey,\nand you may offer support or warranty protection for a fee.\n\n  5. Conveying Modified Source Versions.\n\n  You may convey a work based on the Program, or the modifications to\nproduce it from the Program, in the form of source code under the\nterms of section 4, provided that you also meet all of these conditions:\n\n    a) The work must carry prominent notices stating that you modified\n    it, and giving a relevant date.\n\n    b) The work must carry prominent notices stating that it is\n    released under this License and any conditions added under section\n    7.  This requirement modifies the requirement in section 4 to\n    \"keep intact all notices\".\n\n    c) You must license the entire work, as a whole, under this\n    License to anyone who comes into possession of a copy.  This\n    License will therefore apply, along with any applicable section 7\n    additional terms, to the whole of the work, and all its parts,\n    regardless of how they are packaged.  This License gives no\n    permission to license the work in any other way, but it does not\n    invalidate such permission if you have separately received it.\n\n    d) If the work has interactive user interfaces, each must display\n    Appropriate Legal Notices; however, if the Program has interactive\n    interfaces that do not display Appropriate Legal Notices, your\n    work need not make them do so.\n\n  A compilation of a covered work with other separate and independent\nworks, which are not by their nature extensions of the covered work,\nand which are not combined with it such as to form a larger program,\nin or on a volume of a storage or distribution medium, is called an\n\"aggregate\" if the compilation and its resulting copyright are not\nused to limit the access or legal rights of the compilation's users\nbeyond what the individual works permit.  Inclusion of a covered work\nin an aggregate does not cause this License to apply to the other\nparts of the aggregate.\n\n  6. Conveying Non-Source Forms.\n\n  You may convey a covered work in object code form under the terms\nof sections 4 and 5, provided that you also convey the\nmachine-readable Corresponding Source under the terms of this License,\nin one of these ways:\n\n    a) Convey the object code in, or embodied in, a physical product\n    (including a physical distribution medium), accompanied by the\n    Corresponding Source fixed on a durable physical medium\n    customarily used for software interchange.\n\n    b) Convey the object code in, or embodied in, a physical product\n    (including a physical distribution medium), accompanied by a\n    written offer, valid for at least three years and valid for as\n    long as you offer spare parts or customer support for that product\n    model, to give anyone who possesses the object code either (1) a\n    copy of the Corresponding Source for all the software in the\n    product that is covered by this License, on a durable physical\n    medium customarily used for software interchange, for a price no\n    more than your reasonable cost of physically performing this\n    conveying of source, or (2) access to copy the\n    Corresponding Source from a network server at no charge.\n\n    c) Convey individual copies of the object code with a copy of the\n    written offer to provide the Corresponding Source.  This\n    alternative is allowed only occasionally and noncommercially, and\n    only if you received the object code with such an offer, in accord\n    with subsection 6b.\n\n    d) Convey the object code by offering access from a designated\n    place (gratis or for a charge), and offer equivalent access to the\n    Corresponding Source in the same way through the same place at no\n    further charge.  You need not require recipients to copy the\n    Corresponding Source along with the object code.  If the place to\n    copy the object code is a network server, the Corresponding Source\n    may be on a different server (operated by you or a third party)\n    that supports equivalent copying facilities, provided you maintain\n    clear directions next to the object code saying where to find the\n    Corresponding Source.  Regardless of what server hosts the\n    Corresponding Source, you remain obligated to ensure that it is\n    available for as long as needed to satisfy these requirements.\n\n    e) Convey the object code using peer-to-peer transmission, provided\n    you inform other peers where the object code and Corresponding\n    Source of the work are being offered to the general public at no\n    charge under subsection 6d.\n\n  A separable portion of the object code, whose source code is excluded\nfrom the Corresponding Source as a System Library, need not be\nincluded in conveying the object code work.\n\n  A \"User Product\" is either (1) a \"consumer product\", which means any\ntangible personal property which is normally used for personal, family,\nor household purposes, or (2) anything designed or sold for incorporation\ninto a dwelling.  In determining whether a product is a consumer product,\ndoubtful cases shall be resolved in favor of coverage.  For a particular\nproduct received by a particular user, \"normally used\" refers to a\ntypical or common use of that class of product, regardless of the status\nof the particular user or of the way in which the particular user\nactually uses, or expects or is expected to use, the product.  A product\nis a consumer product regardless of whether the product has substantial\ncommercial, industrial or non-consumer uses, unless such uses represent\nthe only significant mode of use of the product.\n\n  \"Installation Information\" for a User Product means any methods,\nprocedures, authorization keys, or other information required to install\nand execute modified versions of a covered work in that User Product from\na modified version of its Corresponding Source.  The information must\nsuffice to ensure that the continued functioning of the modified object\ncode is in no case prevented or interfered with solely because\nmodification has been made.\n\n  If you convey an object code work under this section in, or with, or\nspecifically for use in, a User Product, and the conveying occurs as\npart of a transaction in which the right of possession and use of the\nUser Product is transferred to the recipient in perpetuity or for a\nfixed term (regardless of how the transaction is characterized), the\nCorresponding Source conveyed under this section must be accompanied\nby the Installation Information.  But this requirement does not apply\nif neither you nor any third party retains the ability to install\nmodified object code on the User Product (for example, the work has\nbeen installed in ROM).\n\n  The requirement to provide Installation Information does not include a\nrequirement to continue to provide support service, warranty, or updates\nfor a work that has been modified or installed by the recipient, or for\nthe User Product in which it has been modified or installed.  Access to a\nnetwork may be denied when the modification itself materially and\nadversely affects the operation of the network or violates the rules and\nprotocols for communication across the network.\n\n  Corresponding Source conveyed, and Installation Information provided,\nin accord with this section must be in a format that is publicly\ndocumented (and with an implementation available to the public in\nsource code form), and must require no special password or key for\nunpacking, reading or copying.\n\n  7. Additional Terms.\n\n  \"Additional permissions\" are terms that supplement the terms of this\nLicense by making exceptions from one or more of its conditions.\nAdditional permissions that are applicable to the entire Program shall\nbe treated as though they were included in this License, to the extent\nthat they are valid under applicable law.  If additional permissions\napply only to part of the Program, that part may be used separately\nunder those permissions, but the entire Program remains governed by\nthis License without regard to the additional permissions.\n\n  When you convey a copy of a covered work, you may at your option\nremove any additional permissions from that copy, or from any part of\nit.  (Additional permissions may be written to require their own\nremoval in certain cases when you modify the work.)  You may place\nadditional permissions on material, added by you to a covered work,\nfor which you have or can give appropriate copyright permission.\n\n  Notwithstanding any other provision of this License, for material you\nadd to a covered work, you may (if authorized by the copyright holders of\nthat material) supplement the terms of this License with terms:\n\n    a) Disclaiming warranty or limiting liability differently from the\n    terms of sections 15 and 16 of this License; or\n\n    b) Requiring preservation of specified reasonable legal notices or\n    author attributions in that material or in the Appropriate Legal\n    Notices displayed by works containing it; or\n\n    c) Prohibiting misrepresentation of the origin of that material, or\n    requiring that modified versions of such material be marked in\n    reasonable ways as different from the original version; or\n\n    d) Limiting the use for publicity purposes of names of licensors or\n    authors of the material; or\n\n    e) Declining to grant rights under trademark law for use of some\n    trade names, trademarks, or service marks; or\n\n    f) Requiring indemnification of licensors and authors of that\n    material by anyone who conveys the material (or modified versions of\n    it) with contractual assumptions of liability to the recipient, for\n    any liability that these contractual assumptions directly impose on\n    those licensors and authors.\n\n  All other non-permissive additional terms are considered \"further\nrestrictions\" within the meaning of section 10.  If the Program as you\nreceived it, or any part of it, contains a notice stating that it is\ngoverned by this License along with a term that is a further\nrestriction, you may remove that term.  If a license document contains\na further restriction but permits relicensing or conveying under this\nLicense, you may add to a covered work material governed by the terms\nof that license document, provided that the further restriction does\nnot survive such relicensing or conveying.\n\n  If you add terms to a covered work in accord with this section, you\nmust place, in the relevant source files, a statement of the\nadditional terms that apply to those files, or a notice indicating\nwhere to find the applicable terms.\n\n  Additional terms, permissive or non-permissive, may be stated in the\nform of a separately written license, or stated as exceptions;\nthe above requirements apply either way.\n\n  8. Termination.\n\n  You may not propagate or modify a covered work except as expressly\nprovided under this License.  Any attempt otherwise to propagate or\nmodify it is void, and will automatically terminate your rights under\nthis License (including any patent licenses granted under the third\nparagraph of section 11).\n\n  However, if you cease all violation of this License, then your\nlicense from a particular copyright holder is reinstated (a)\nprovisionally, unless and until the copyright holder explicitly and\nfinally terminates your license, and (b) permanently, if the copyright\nholder fails to notify you of the violation by some reasonable means\nprior to 60 days after the cessation.\n\n  Moreover, your license from a particular copyright holder is\nreinstated permanently if the copyright holder notifies you of the\nviolation by some reasonable means, this is the first time you have\nreceived notice of violation of this License (for any work) from that\ncopyright holder, and you cure the violation prior to 30 days after\nyour receipt of the notice.\n\n  Termination of your rights under this section does not terminate the\nlicenses of parties who have received copies or rights from you under\nthis License.  If your rights have been terminated and not permanently\nreinstated, you do not qualify to receive new licenses for the same\nmaterial under section 10.\n\n  9. Acceptance Not Required for Having Copies.\n\n  You are not required to accept this License in order to receive or\nrun a copy of the Program.  Ancillary propagation of a covered work\noccurring solely as a consequence of using peer-to-peer transmission\nto receive a copy likewise does not require acceptance.  However,\nnothing other than this License grants you permission to propagate or\nmodify any covered work.  These actions infringe copyright if you do\nnot accept this License.  Therefore, by modifying or propagating a\ncovered work, you indicate your acceptance of this License to do so.\n\n  10. Automatic Licensing of Downstream Recipients.\n\n  Each time you convey a covered work, the recipient automatically\nreceives a license from the original licensors, to run, modify and\npropagate that work, subject to this License.  You are not responsible\nfor enforcing compliance by third parties with this License.\n\n  An \"entity transaction\" is a transaction transferring control of an\norganization, or substantially all assets of one, or subdividing an\norganization, or merging organizations.  If propagation of a covered\nwork results from an entity transaction, each party to that\ntransaction who receives a copy of the work also receives whatever\nlicenses to the work the party's predecessor in interest had or could\ngive under the previous paragraph, plus a right to possession of the\nCorresponding Source of the work from the predecessor in interest, if\nthe predecessor has it or can get it with reasonable efforts.\n\n  You may not impose any further restrictions on the exercise of the\nrights granted or affirmed under this License.  For example, you may\nnot impose a license fee, royalty, or other charge for exercise of\nrights granted under this License, and you may not initiate litigation\n(including a cross-claim or counterclaim in a lawsuit) alleging that\nany patent claim is infringed by making, using, selling, offering for\nsale, or importing the Program or any portion of it.\n\n  11. Patents.\n\n  A \"contributor\" is a copyright holder who authorizes use under this\nLicense of the Program or a work on which the Program is based.  The\nwork thus licensed is called the contributor's \"contributor version\".\n\n  A contributor's \"essential patent claims\" are all patent claims\nowned or controlled by the contributor, whether already acquired or\nhereafter acquired, that would be infringed by some manner, permitted\nby this License, of making, using, or selling its contributor version,\nbut do not include claims that would be infringed only as a\nconsequence of further modification of the contributor version.  For\npurposes of this definition, \"control\" includes the right to grant\npatent sublicenses in a manner consistent with the requirements of\nthis License.\n\n  Each contributor grants you a non-exclusive, worldwide, royalty-free\npatent license under the contributor's essential patent claims, to\nmake, use, sell, offer for sale, import and otherwise run, modify and\npropagate the contents of its contributor version.\n\n  In the following three paragraphs, a \"patent license\" is any express\nagreement or commitment, however denominated, not to enforce a patent\n(such as an express permission to practice a patent or covenant not to\nsue for patent infringement).  To \"grant\" such a patent license to a\nparty means to make such an agreement or commitment not to enforce a\npatent against the party.\n\n  If you convey a covered work, knowingly relying on a patent license,\nand the Corresponding Source of the work is not available for anyone\nto copy, free of charge and under the terms of this License, through a\npublicly available network server or other readily accessible means,\nthen you must either (1) cause the Corresponding Source to be so\navailable, or (2) arrange to deprive yourself of the benefit of the\npatent license for this particular work, or (3) arrange, in a manner\nconsistent with the requirements of this License, to extend the patent\nlicense to downstream recipients.  \"Knowingly relying\" means you have\nactual knowledge that, but for the patent license, your conveying the\ncovered work in a country, or your recipient's use of the covered work\nin a country, would infringe one or more identifiable patents in that\ncountry that you have reason to believe are valid.\n\n  If, pursuant to or in connection with a single transaction or\narrangement, you convey, or propagate by procuring conveyance of, a\ncovered work, and grant a patent license to some of the parties\nreceiving the covered work authorizing them to use, propagate, modify\nor convey a specific copy of the covered work, then the patent license\nyou grant is automatically extended to all recipients of the covered\nwork and works based on it.\n\n  A patent license is \"discriminatory\" if it does not include within\nthe scope of its coverage, prohibits the exercise of, or is\nconditioned on the non-exercise of one or more of the rights that are\nspecifically granted under this License.  You may not convey a covered\nwork if you are a party to an arrangement with a third party that is\nin the business of distributing software, under which you make payment\nto the third party based on the extent of your activity of conveying\nthe work, and under which the third party grants, to any of the\nparties who would receive the covered work from you, a discriminatory\npatent license (a) in connection with copies of the covered work\nconveyed by you (or copies made from those copies), or (b) primarily\nfor and in connection with specific products or compilations that\ncontain the covered work, unless you entered into that arrangement,\nor that patent license was granted, prior to 28 March 2007.\n\n  Nothing in this License shall be construed as excluding or limiting\nany implied license or other defenses to infringement that may\notherwise be available to you under applicable patent law.\n\n  12. No Surrender of Others' Freedom.\n\n  If conditions are imposed on you (whether by court order, agreement or\notherwise) that contradict the conditions of this License, they do not\nexcuse you from the conditions of this License.  If you cannot convey a\ncovered work so as to satisfy simultaneously your obligations under this\nLicense and any other pertinent obligations, then as a consequence you may\nnot convey it at all.  For example, if you agree to terms that obligate you\nto collect a royalty for further conveying from those to whom you convey\nthe Program, the only way you could satisfy both those terms and this\nLicense would be to refrain entirely from conveying the Program.\n\n  13. Remote Network Interaction; Use with the GNU General Public License.\n\n  Notwithstanding any other provision of this License, if you modify the\nProgram, your modified version must prominently offer all users\ninteracting with it remotely through a computer network (if your version\nsupports such interaction) an opportunity to receive the Corresponding\nSource of your version by providing access to the Corresponding Source\nfrom a network server at no charge, through some standard or customary\nmeans of facilitating copying of software.  This Corresponding Source\nshall include the Corresponding Source for any work covered by version 3\nof the GNU General Public License that is incorporated pursuant to the\nfollowing paragraph.\n\n  Notwithstanding any other provision of this License, you have\npermission to link or combine any covered work with a work licensed\nunder version 3 of the GNU General Public License into a single\ncombined work, and to convey the resulting work.  The terms of this\nLicense will continue to apply to the part which is the covered work,\nbut the work with which it is combined will remain governed by version\n3 of the GNU General Public License.\n\n  14. Revised Versions of this License.\n\n  The Free Software Foundation may publish revised and/or new versions of\nthe GNU Affero General Public License from time to time.  Such new versions\nwill be similar in spirit to the present version, but may differ in detail to\naddress new problems or concerns.\n\n  Each version is given a distinguishing version number.  If the\nProgram specifies that a certain numbered version of the GNU Affero General\nPublic License \"or any later version\" applies to it, you have the\noption of following the terms and conditions either of that numbered\nversion or of any later version published by the Free Software\nFoundation.  If the Program does not specify a version number of the\nGNU Affero General Public License, you may choose any version ever published\nby the Free Software Foundation.\n\n  If the Program specifies that a proxy can decide which future\nversions of the GNU Affero General Public License can be used, that proxy's\npublic statement of acceptance of a version permanently authorizes you\nto choose that version for the Program.\n\n  Later license versions may give you additional or different\npermissions.  However, no additional obligations are imposed on any\nauthor or copyright holder as a result of your choosing to follow a\nlater version.\n\n  15. Disclaimer of Warranty.\n\n  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY\nAPPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT\nHOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM \"AS IS\" WITHOUT WARRANTY\nOF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,\nTHE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR\nPURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM\nIS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF\nALL NECESSARY SERVICING, REPAIR OR CORRECTION.\n\n  16. Limitation of Liability.\n\n  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING\nWILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS\nTHE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY\nGENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE\nUSE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF\nDATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD\nPARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),\nEVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF\nSUCH DAMAGES.\n\n  17. Interpretation of Sections 15 and 16.\n\n  If the disclaimer of warranty and limitation of liability provided\nabove cannot be given local legal effect according to their terms,\nreviewing courts shall apply local law that most closely approximates\nan absolute waiver of all civil liability in connection with the\nProgram, unless a warranty or assumption of liability accompanies a\ncopy of the Program in return for a fee.\n\n                     END OF TERMS AND CONDITIONS\n\n            How to Apply These Terms to Your New Programs\n\n  If you develop a new program, and you want it to be of the greatest\npossible use to the public, the best way to achieve this is to make it\nfree software which everyone can redistribute and change under these terms.\n\n  To do so, attach the following notices to the program.  It is safest\nto attach them to the start of each source file to most effectively\nstate the exclusion of warranty; and each file should have at least\nthe \"copyright\" line and a pointer to where the full notice is found.\n\n    <one line to give the program's name and a brief idea of what it does.>\n    Copyright (C) <year>  <name of author>\n\n    This program is free software: you can redistribute it and/or modify\n    it under the terms of the GNU Affero General Public License as published\n    by the Free Software Foundation, either version 3 of the License, or\n    (at your option) any later version.\n\n    This program is distributed in the hope that it will be useful,\n    but WITHOUT ANY WARRANTY; without even the implied warranty of\n    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n    GNU Affero General Public License for more details.\n\n    You should have received a copy of the GNU Affero General Public License\n    along with this program.  If not, see <https://www.gnu.org/licenses/>.\n\nAlso add information on how to contact you by electronic and paper mail.\n\n  If your software can interact with users remotely through a computer\nnetwork, you should also make sure that it provides a way for users to\nget its source.  For example, if your program is a web application, its\ninterface could display a \"Source\" link that leads users to an archive\nof the code.  There are many ways you could offer source, and different\nsolutions will be better for different programs; see section 13 for the\nspecific requirements.\n\n  You should also get your employer (if you work as a programmer) or school,\nif any, to sign a \"copyright disclaimer\" for the program, if necessary.\nFor more information on this, and how to apply and follow the GNU AGPL, see\n<https://www.gnu.org/licenses/>.\n"
  },
  {
    "path": "README.md",
    "content": "<div align=\"center\" markdown=\"1\">\n   <sup>Special thanks to:</sup>\n   <br>\n   <br>\n   <a href=\"https://go.warp.dev/text-generation-webui\">\n      <img alt=\"Warp sponsorship\" width=\"400\" src=\"https://raw.githubusercontent.com/warpdotdev/brand-assets/refs/heads/main/Github/Sponsor/Warp-Github-LG-02.png\">\n   </a>\n\n### [Warp, built for coding with multiple AI agents](https://go.warp.dev/text-generation-webui)\n[Available for macOS, Linux, & Windows](https://go.warp.dev/text-generation-webui)<br>\n</div>\n<hr>\n\n# Text Generation Web UI\n\nA Gradio web UI for running Large Language Models locally. 100% private and offline. Supports text generation, vision, tool-calling, training, image generation, and more.\n\n[Try the Deep Reason extension](https://oobabooga.gumroad.com/l/deep_reason)\n\n|![Image1](https://github.com/oobabooga/screenshots/raw/main/INSTRUCT-3.5.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/CHAT-3.5.png) |\n|:---:|:---:|\n|![Image1](https://github.com/oobabooga/screenshots/raw/main/DEFAULT-3.5.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/PARAMETERS-3.5.png) |\n\n## Features\n\n- **Multiple backends**: [llama.cpp](https://github.com/ggerganov/llama.cpp), [Transformers](https://github.com/huggingface/transformers), [ExLlamaV3](https://github.com/turboderp-org/exllamav3), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM). Switch between backends and models without restarting.\n- **File attachments**: Upload text files, PDF documents, and .docx documents to talk about their contents.\n- **Vision (multimodal)**: Attach images to messages for visual understanding ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Multimodal-Tutorial)).\n- **Tool-calling**: Models can call custom functions during chat — web search, page fetching, math, and more. Each tool is a single `.py` file, easy to create and extend ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Tool-Calling-Tutorial)).\n- **OpenAI-compatible API**: Chat and Completions endpoints with tool-calling support. Use as a local drop-in replacement for the OpenAI API ([examples](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples)).\n- **Training**: Fine-tune LoRAs on multi-turn chat or raw text datasets. Supports resuming interrupted runs ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/05-%E2%80%90-Training-Tab)).\n- **Image generation**: A dedicated tab for `diffusers` models like **Z-Image-Turbo**. Features 4-bit/8-bit quantization and a persistent gallery with metadata ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Image-Generation-Tutorial)).\n- **Easy setup**: [Portable builds](https://github.com/oobabooga/text-generation-webui/releases) (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or a one-click installer for the full feature set.\n- 100% offline and private, with zero telemetry, external resources, or remote update requests.\n- `instruct` mode for instruction-following (like ChatGPT), and `chat-instruct`/`chat` modes for talking to custom characters. Prompts are automatically formatted with Jinja2 templates.\n- Edit messages, navigate between message versions, and branch conversations at any point.\n- Free-form text generation in the Notebook tab without being limited to chat turns.\n- Multiple sampling parameters and generation options for sophisticated text generation control.\n- Aesthetic UI with dark and light themes.\n- Syntax highlighting for code blocks and LaTeX rendering for mathematical expressions.\n- Extension support, with numerous built-in and user-contributed extensions available. See the [wiki](https://github.com/oobabooga/text-generation-webui/wiki/07-%E2%80%90-Extensions) and [extensions directory](https://github.com/oobabooga/text-generation-webui-extensions) for details.\n\n## How to install\n\n#### ✅ Option 1: Portable builds (get started in 1 minute)\n\nNo installation needed – just download, unzip and run. All dependencies included.\n\nDownload from here: **https://github.com/oobabooga/text-generation-webui/releases**\n\n- Builds are provided for Linux, Windows, and macOS, with options for CUDA, Vulkan, ROCm, and CPU-only.\n- Compatible with GGUF (llama.cpp) models.\n\n#### Option 2: Manual portable install with venv\n\nVery fast setup that should work on any Python 3.9+:\n\n```bash\n# Clone repository\ngit clone https://github.com/oobabooga/text-generation-webui\ncd text-generation-webui\n\n# Create virtual environment\npython -m venv venv\n\n# Activate virtual environment\n# On Windows:\nvenv\\Scripts\\activate\n# On macOS/Linux:\nsource venv/bin/activate\n\n# Install dependencies (choose appropriate file under requirements/portable for your hardware)\npip install -r requirements/portable/requirements.txt --upgrade\n\n# Launch server (basic command)\npython server.py --portable --api --auto-launch\n\n# When done working, deactivate\ndeactivate\n```\n\n#### Option 3: One-click installer\n\nFor users who need additional backends (ExLlamaV3, Transformers), training, image generation, or extensions (TTS, voice input, translation, etc). Requires ~10GB disk space and downloads PyTorch.\n\n1. Clone the repository, or [download its source code](https://github.com/oobabooga/text-generation-webui/archive/refs/heads/main.zip) and extract it.\n2. Run the startup script for your OS: `start_windows.bat`, `start_linux.sh`, or `start_macos.sh`.\n3. When prompted, select your GPU vendor.\n4. After installation, open `http://127.0.0.1:7860` in your browser.\n\nTo restart the web UI later, run the same `start_` script.\n\nYou can pass command-line flags directly (e.g., `./start_linux.sh --help`), or add them to `user_data/CMD_FLAGS.txt` (e.g., `--api` to enable the API).\n\nTo update, run the update script for your OS: `update_wizard_windows.bat`, `update_wizard_linux.sh`, or `update_wizard_macos.sh`.\n\nTo reinstall with a fresh Python environment, delete the `installer_files` folder and run the `start_` script again.\n\n<details>\n<summary>\nOne-click installer details\n</summary>\n\n### One-click-installer\n\nThe script uses Miniforge to set up a Conda environment in the `installer_files` folder.\n\nIf you ever need to install something manually in the `installer_files` environment, you can launch an interactive shell using the cmd script: `cmd_linux.sh`, `cmd_windows.bat`, or `cmd_macos.sh`.\n\n* There is no need to run any of those scripts (`start_`, `update_wizard_`, or `cmd_`) as admin/root.\n* To install requirements for extensions, it is recommended to use the update wizard script with the \"Install/update extensions requirements\" option. At the end, this script will install the main requirements for the project to make sure that they take precedence in case of version conflicts.\n* For automated installation, you can use the `GPU_CHOICE`, `LAUNCH_AFTER_INSTALL`, and `INSTALL_EXTENSIONS` environment variables. For instance: `GPU_CHOICE=A LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh`.\n\n</details>\n\n<details>\n<summary>\nManual full installation with conda or docker\n</summary>\n\n### Full installation with Conda\n\n#### 0. Install Conda\n\nhttps://github.com/conda-forge/miniforge\n\nOn Linux or WSL, Miniforge can be automatically installed with these two commands:\n\n```\ncurl -sL \"https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh\" > \"Miniforge3.sh\"\nbash Miniforge3.sh\n```\n\nFor other platforms, download from: https://github.com/conda-forge/miniforge/releases/latest\n\n#### 1. Create a new conda environment\n\n```\nconda create -n textgen python=3.13\nconda activate textgen\n```\n\n#### 2. Install Pytorch\n\n| System | GPU | Command |\n|--------|---------|---------|\n| Linux/WSL | NVIDIA | `pip3 install torch==2.9.1 --index-url https://download.pytorch.org/whl/cu128` |\n| Linux/WSL | CPU only | `pip3 install torch==2.9.1 --index-url https://download.pytorch.org/whl/cpu` |\n| Linux | AMD | `pip3 install https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2/torch-2.9.1%2Brocm7.2.0.lw.git7e1940d4-cp313-cp313-linux_x86_64.whl` |\n| MacOS + MPS | Any | `pip3 install torch==2.9.1` |\n| Windows | NVIDIA | `pip3 install torch==2.9.1 --index-url https://download.pytorch.org/whl/cu128` |\n| Windows | CPU only | `pip3 install torch==2.9.1` |\n\nThe up-to-date commands can be found here: https://pytorch.org/get-started/locally/.\n\nIf you need `nvcc` to compile some library manually, you will additionally need to install this:\n\n```\nconda install -y -c \"nvidia/label/cuda-12.8.1\" cuda\n```\n\n#### 3. Install the web UI\n\n```\ngit clone https://github.com/oobabooga/text-generation-webui\ncd text-generation-webui\npip install -r requirements/full/<requirements file according to table below>\n```\n\nRequirements file to use:\n\n| GPU | requirements file to use |\n|--------|---------|\n| NVIDIA | `requirements.txt` |\n| AMD | `requirements_amd.txt` |\n| CPU only | `requirements_cpu_only.txt` |\n| Apple Intel | `requirements_apple_intel.txt` |\n| Apple Silicon | `requirements_apple_silicon.txt` |\n\n### Start the web UI\n\n```\nconda activate textgen\ncd text-generation-webui\npython server.py\n```\n\nThen browse to\n\n`http://127.0.0.1:7860`\n\n#### Manual install\n\nThe `requirements*.txt` above contain various wheels precompiled through GitHub Actions. If you wish to compile things manually, or if you need to because no suitable wheels are available for your hardware, you can use `requirements_nowheels.txt` and then install your desired loaders manually.\n\n### Alternative: Docker\n\n```\nFor NVIDIA GPU:\nln -s docker/{nvidia/Dockerfile,nvidia/docker-compose.yml,.dockerignore} .\nFor AMD GPU:\nln -s docker/{amd/Dockerfile,amd/docker-compose.yml,.dockerignore} .\nFor Intel GPU:\nln -s docker/{intel/Dockerfile,intel/docker-compose.yml,.dockerignore} .\nFor CPU only\nln -s docker/{cpu/Dockerfile,cpu/docker-compose.yml,.dockerignore} .\ncp docker/.env.example .env\n#Create logs/cache dir :\nmkdir -p user_data/logs user_data/cache\n# Edit .env and set:\n#   TORCH_CUDA_ARCH_LIST based on your GPU model\n#   APP_RUNTIME_GID      your host user's group id (run `id -g` in a terminal)\n#   BUILD_EXTENIONS      optionally add comma separated list of extensions to build\n# Edit user_data/CMD_FLAGS.txt and add in it the options you want to execute (like --listen --cpu)\n#\ndocker compose up --build\n```\n\n* You need to have Docker Compose v2.17 or higher installed. See [this guide](https://github.com/oobabooga/text-generation-webui/wiki/09-%E2%80%90-Docker) for instructions.\n* For additional docker files, check out [this repository](https://github.com/Atinoda/text-generation-webui-docker).\n\n### Updating the requirements\n\nFrom time to time, the `requirements*.txt` change. To update, use these commands:\n\n```\nconda activate textgen\ncd text-generation-webui\npip install -r <requirements file that you have used> --upgrade\n```\n</details>\n\n<details>\n<summary>\nList of command-line flags\n</summary>\n\n```txt\nusage: server.py [-h] [--user-data-dir USER_DATA_DIR] [--multi-user] [--model MODEL] [--lora LORA [LORA ...]] [--model-dir MODEL_DIR] [--lora-dir LORA_DIR] [--model-menu] [--settings SETTINGS]\n                 [--extensions EXTENSIONS [EXTENSIONS ...]] [--verbose] [--idle-timeout IDLE_TIMEOUT] [--image-model IMAGE_MODEL] [--image-model-dir IMAGE_MODEL_DIR] [--image-dtype {bfloat16,float16}]\n                 [--image-attn-backend {flash_attention_2,sdpa}] [--image-cpu-offload] [--image-compile] [--image-quant {none,bnb-8bit,bnb-4bit,torchao-int8wo,torchao-fp4,torchao-float8wo}]\n                 [--loader LOADER] [--ctx-size N] [--cache-type N] [--model-draft MODEL_DRAFT] [--draft-max DRAFT_MAX] [--gpu-layers-draft GPU_LAYERS_DRAFT] [--device-draft DEVICE_DRAFT]\n                 [--ctx-size-draft CTX_SIZE_DRAFT] [--spec-type {none,ngram-mod,ngram-simple,ngram-map-k,ngram-map-k4v,ngram-cache}] [--spec-ngram-size-n SPEC_NGRAM_SIZE_N]\n                 [--spec-ngram-size-m SPEC_NGRAM_SIZE_M] [--spec-ngram-min-hits SPEC_NGRAM_MIN_HITS] [--gpu-layers N] [--cpu-moe] [--mmproj MMPROJ] [--streaming-llm] [--tensor-split TENSOR_SPLIT]\n                 [--row-split] [--no-mmap] [--mlock] [--no-kv-offload] [--batch-size BATCH_SIZE] [--ubatch-size UBATCH_SIZE] [--threads THREADS] [--threads-batch THREADS_BATCH] [--numa]\n                 [--parallel PARALLEL] [--fit-target FIT_TARGET] [--extra-flags EXTRA_FLAGS] [--cpu] [--cpu-memory CPU_MEMORY] [--disk] [--disk-cache-dir DISK_CACHE_DIR] [--load-in-8bit] [--bf16]\n                 [--no-cache] [--trust-remote-code] [--force-safetensors] [--no_use_fast] [--attn-implementation IMPLEMENTATION] [--load-in-4bit] [--use_double_quant] [--compute_dtype COMPUTE_DTYPE]\n                 [--quant_type QUANT_TYPE] [--gpu-split GPU_SPLIT] [--enable-tp] [--tp-backend TP_BACKEND] [--cfg-cache] [--listen] [--listen-port LISTEN_PORT] [--listen-host LISTEN_HOST] [--share]\n                 [--auto-launch] [--gradio-auth GRADIO_AUTH] [--gradio-auth-path GRADIO_AUTH_PATH] [--ssl-keyfile SSL_KEYFILE] [--ssl-certfile SSL_CERTFILE] [--subpath SUBPATH] [--old-colors]\n                 [--portable] [--api] [--public-api] [--public-api-id PUBLIC_API_ID] [--api-port API_PORT] [--api-key API_KEY] [--admin-key ADMIN_KEY] [--api-enable-ipv6] [--api-disable-ipv4]\n                 [--nowebui] [--temperature N] [--dynatemp-low N] [--dynatemp-high N] [--dynatemp-exponent N] [--smoothing-factor N] [--smoothing-curve N] [--min-p N] [--top-p N] [--top-k N]\n                 [--typical-p N] [--xtc-threshold N] [--xtc-probability N] [--epsilon-cutoff N] [--eta-cutoff N] [--tfs N] [--top-a N] [--top-n-sigma N] [--adaptive-target N] [--adaptive-decay N]\n                 [--dry-multiplier N] [--dry-allowed-length N] [--dry-base N] [--repetition-penalty N] [--frequency-penalty N] [--presence-penalty N] [--encoder-repetition-penalty N]\n                 [--no-repeat-ngram-size N] [--repetition-penalty-range N] [--penalty-alpha N] [--guidance-scale N] [--mirostat-mode N] [--mirostat-tau N] [--mirostat-eta N]\n                 [--do-sample | --no-do-sample] [--dynamic-temperature | --no-dynamic-temperature] [--temperature-last | --no-temperature-last] [--sampler-priority N] [--dry-sequence-breakers N]\n                 [--enable-thinking | --no-enable-thinking] [--reasoning-effort N] [--chat-template-file CHAT_TEMPLATE_FILE]\n\nText Generation Web UI\n\noptions:\n  -h, --help                                           show this help message and exit\n\nBasic settings:\n  --user-data-dir USER_DATA_DIR                        Path to the user data directory. Default: auto-detected.\n  --multi-user                                         Multi-user mode. Chat histories are not saved or automatically loaded. Best suited for small trusted teams.\n  --model MODEL                                        Name of the model to load by default.\n  --lora LORA [LORA ...]                               The list of LoRAs to load. If you want to load more than one LoRA, write the names separated by spaces.\n  --model-dir MODEL_DIR                                Path to directory with all the models.\n  --lora-dir LORA_DIR                                  Path to directory with all the loras.\n  --model-menu                                         Show a model menu in the terminal when the web UI is first launched.\n  --settings SETTINGS                                  Load the default interface settings from this yaml file. See user_data/settings-template.yaml for an example. If you create a file called\n                                                       user_data/settings.yaml, this file will be loaded by default without the need to use the --settings flag.\n  --extensions EXTENSIONS [EXTENSIONS ...]             The list of extensions to load. If you want to load more than one extension, write the names separated by spaces.\n  --verbose                                            Print the prompts to the terminal.\n  --idle-timeout IDLE_TIMEOUT                          Unload model after this many minutes of inactivity. It will be automatically reloaded when you try to use it again.\n\nImage model:\n  --image-model IMAGE_MODEL                            Name of the image model to select on startup (overrides saved setting).\n  --image-model-dir IMAGE_MODEL_DIR                    Path to directory with all the image models.\n  --image-dtype {bfloat16,float16}                     Data type for image model.\n  --image-attn-backend {flash_attention_2,sdpa}        Attention backend for image model.\n  --image-cpu-offload                                  Enable CPU offloading for image model.\n  --image-compile                                      Compile the image model for faster inference.\n  --image-quant {none,bnb-8bit,bnb-4bit,torchao-int8wo,torchao-fp4,torchao-float8wo}\n                                                       Quantization method for image model.\n\nModel loader:\n  --loader LOADER                                      Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, ExLlamav3_HF, ExLlamav3, TensorRT-\n                                                       LLM.\n\nContext and cache:\n  --ctx-size, --n_ctx, --max_seq_len N                 Context size in tokens. 0 = auto for llama.cpp (requires gpu-layers=-1), 8192 for other loaders.\n  --cache-type, --cache_type N                         KV cache type; valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV3 - fp16, q2 to q8 (can specify k_bits and v_bits separately, e.g. q4_q8).\n\nSpeculative decoding:\n  --model-draft MODEL_DRAFT                            Path to the draft model for speculative decoding.\n  --draft-max DRAFT_MAX                                Number of tokens to draft for speculative decoding.\n  --gpu-layers-draft GPU_LAYERS_DRAFT                  Number of layers to offload to the GPU for the draft model.\n  --device-draft DEVICE_DRAFT                          Comma-separated list of devices to use for offloading the draft model. Example: CUDA0,CUDA1\n  --ctx-size-draft CTX_SIZE_DRAFT                      Size of the prompt context for the draft model. If 0, uses the same as the main model.\n  --spec-type {none,ngram-mod,ngram-simple,ngram-map-k,ngram-map-k4v,ngram-cache}\n                                                       Draftless speculative decoding type. Recommended: ngram-mod.\n  --spec-ngram-size-n SPEC_NGRAM_SIZE_N                N-gram lookup size for ngram speculative decoding.\n  --spec-ngram-size-m SPEC_NGRAM_SIZE_M                Draft n-gram size for ngram speculative decoding.\n  --spec-ngram-min-hits SPEC_NGRAM_MIN_HITS            Minimum n-gram hits for ngram-map speculative decoding.\n\nllama.cpp:\n  --gpu-layers, --n-gpu-layers N                       Number of layers to offload to the GPU. -1 = auto.\n  --cpu-moe                                            Move the experts to the CPU (for MoE models).\n  --mmproj MMPROJ                                      Path to the mmproj file for vision models.\n  --streaming-llm                                      Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.\n  --tensor-split TENSOR_SPLIT                          Split the model across multiple GPUs. Comma-separated list of proportions. Example: 60,40.\n  --row-split                                          Split the model by rows across GPUs. This may improve multi-gpu performance.\n  --no-mmap                                            Prevent mmap from being used.\n  --mlock                                              Force the system to keep the model in RAM.\n  --no-kv-offload                                      Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.\n  --batch-size BATCH_SIZE                              Maximum number of prompt tokens to batch together when calling llama-server. This is the application level batch size.\n  --ubatch-size UBATCH_SIZE                            Maximum number of prompt tokens to batch together when calling llama-server. This is the max physical batch size for computation (device level).\n  --threads THREADS                                    Number of threads to use.\n  --threads-batch THREADS_BATCH                        Number of threads to use for batches/prompt processing.\n  --numa                                               Activate NUMA task allocation for llama.cpp.\n  --parallel PARALLEL                                  Number of parallel request slots. The context size is divided equally among slots. For example, to have 4 slots with 8192 context each, set\n                                                       ctx_size to 32768.\n  --fit-target FIT_TARGET                              Target VRAM margin per device for auto GPU layers, comma-separated list of values in MiB. A single value is broadcast across all devices.\n                                                       Default: 1024.\n  --extra-flags EXTRA_FLAGS                            Extra flags to pass to llama-server. Format: \"flag1=value1,flag2,flag3=value3\". Example: \"override-tensor=exps=CPU\"\n\nTransformers/Accelerate:\n  --cpu                                                Use the CPU to generate text. Warning: Training on CPU is extremely slow.\n  --cpu-memory CPU_MEMORY                              Maximum CPU memory in GiB. Use this for CPU offloading.\n  --disk                                               If the model is too large for your GPU(s) and CPU combined, send the remaining layers to the disk.\n  --disk-cache-dir DISK_CACHE_DIR                      Directory to save the disk cache to.\n  --load-in-8bit                                       Load the model with 8-bit precision (using bitsandbytes).\n  --bf16                                               Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU.\n  --no-cache                                           Set use_cache to False while generating text. This reduces VRAM usage slightly, but it comes at a performance cost.\n  --trust-remote-code                                  Set trust_remote_code=True while loading the model. Necessary for some models.\n  --force-safetensors                                  Set use_safetensors=True while loading the model. This prevents arbitrary code execution.\n  --no_use_fast                                        Set use_fast=False while loading the tokenizer (it's True by default). Use this if you have any problems related to use_fast.\n  --attn-implementation IMPLEMENTATION                 Attention implementation. Valid options: sdpa, eager, flash_attention_2.\n\nbitsandbytes 4-bit:\n  --load-in-4bit                                       Load the model with 4-bit precision (using bitsandbytes).\n  --use_double_quant                                   use_double_quant for 4-bit.\n  --compute_dtype COMPUTE_DTYPE                        compute dtype for 4-bit. Valid options: bfloat16, float16, float32.\n  --quant_type QUANT_TYPE                              quant_type for 4-bit. Valid options: nf4, fp4.\n\nExLlamaV3:\n  --gpu-split GPU_SPLIT                                Comma-separated list of VRAM (in GB) to use per GPU device for model layers. Example: 20,7,7.\n  --enable-tp, --enable_tp                             Enable Tensor Parallelism (TP) to split the model across GPUs.\n  --tp-backend TP_BACKEND                              The backend for tensor parallelism. Valid options: native, nccl. Default: native.\n  --cfg-cache                                          Create an additional cache for CFG negative prompts. Necessary to use CFG with that loader.\n\nGradio:\n  --listen                                             Make the web UI reachable from your local network.\n  --listen-port LISTEN_PORT                            The listening port that the server will use.\n  --listen-host LISTEN_HOST                            The hostname that the server will use.\n  --share                                              Create a public URL. This is useful for running the web UI on Google Colab or similar.\n  --auto-launch                                        Open the web UI in the default browser upon launch.\n  --gradio-auth GRADIO_AUTH                            Set Gradio authentication password in the format \"username:password\". Multiple credentials can also be supplied with \"u1:p1,u2:p2,u3:p3\".\n  --gradio-auth-path GRADIO_AUTH_PATH                  Set the Gradio authentication file path. The file should contain one or more user:password pairs in the same format as above.\n  --ssl-keyfile SSL_KEYFILE                            The path to the SSL certificate key file.\n  --ssl-certfile SSL_CERTFILE                          The path to the SSL certificate cert file.\n  --subpath SUBPATH                                    Customize the subpath for gradio, use with reverse proxy\n  --old-colors                                         Use the legacy Gradio colors, before the December/2024 update.\n  --portable                                           Hide features not available in portable mode like training.\n\nAPI:\n  --api                                                Enable the API extension.\n  --public-api                                         Create a public URL for the API using Cloudflare.\n  --public-api-id PUBLIC_API_ID                        Tunnel ID for named Cloudflare Tunnel. Use together with public-api option.\n  --api-port API_PORT                                  The listening port for the API.\n  --api-key API_KEY                                    API authentication key.\n  --admin-key ADMIN_KEY                                API authentication key for admin tasks like loading and unloading models. If not set, will be the same as --api-key.\n  --api-enable-ipv6                                    Enable IPv6 for the API\n  --api-disable-ipv4                                   Disable IPv4 for the API\n  --nowebui                                            Do not launch the Gradio UI. Useful for launching the API in standalone mode.\n\nAPI generation defaults:\n  --temperature N                                      Temperature\n  --dynatemp-low N                                     Dynamic temperature low\n  --dynatemp-high N                                    Dynamic temperature high\n  --dynatemp-exponent N                                Dynamic temperature exponent\n  --smoothing-factor N                                 Smoothing factor\n  --smoothing-curve N                                  Smoothing curve\n  --min-p N                                            Min P\n  --top-p N                                            Top P\n  --top-k N                                            Top K\n  --typical-p N                                        Typical P\n  --xtc-threshold N                                    XTC threshold\n  --xtc-probability N                                  XTC probability\n  --epsilon-cutoff N                                   Epsilon cutoff\n  --eta-cutoff N                                       Eta cutoff\n  --tfs N                                              TFS\n  --top-a N                                            Top A\n  --top-n-sigma N                                      Top N Sigma\n  --adaptive-target N                                  Adaptive target\n  --adaptive-decay N                                   Adaptive decay\n  --dry-multiplier N                                   DRY multiplier\n  --dry-allowed-length N                               DRY allowed length\n  --dry-base N                                         DRY base\n  --repetition-penalty N                               Repetition penalty\n  --frequency-penalty N                                Frequency penalty\n  --presence-penalty N                                 Presence penalty\n  --encoder-repetition-penalty N                       Encoder repetition penalty\n  --no-repeat-ngram-size N                             No repeat ngram size\n  --repetition-penalty-range N                         Repetition penalty range\n  --penalty-alpha N                                    Penalty alpha\n  --guidance-scale N                                   Guidance scale\n  --mirostat-mode N                                    Mirostat mode\n  --mirostat-tau N                                     Mirostat tau\n  --mirostat-eta N                                     Mirostat eta\n  --do-sample, --no-do-sample                          Do sample\n  --dynamic-temperature, --no-dynamic-temperature      Dynamic temperature\n  --temperature-last, --no-temperature-last            Temperature last\n  --sampler-priority N                                 Sampler priority\n  --dry-sequence-breakers N                            DRY sequence breakers\n  --enable-thinking, --no-enable-thinking              Enable thinking\n  --reasoning-effort N                                 Reasoning effort\n  --chat-template-file CHAT_TEMPLATE_FILE              Path to a chat template file (.jinja, .jinja2, or .yaml) to use as the default instruction template for API requests. Overrides the model's\n                                                       built-in template.\n```\n\n</details>\n\n## Downloading models\n\n1. Download a GGUF model file from [Hugging Face](https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads&search=gguf).\n2. Place it in the `user_data/models` folder.\n\nThat's it. The UI will detect it automatically.\n\nTo check what will fit your GPU, you can use the [VRAM Calculator](https://huggingface.co/spaces/oobabooga/accurate-gguf-vram-calculator).\n\n<details>\n<summary>Other model types (Transformers, EXL3)</summary>\n\nModels that consist of multiple files (like 16-bit Transformers models and EXL3 models) should be placed in a subfolder inside `user_data/models`:\n\n```\ntext-generation-webui\n└── user_data\n    └── models\n        └── Qwen_Qwen3-8B\n            ├── config.json\n            ├── generation_config.json\n            ├── model-00001-of-00004.safetensors\n            ├── ...\n            ├── tokenizer_config.json\n            └── tokenizer.json\n```\n\nThese formats require the one-click installer (not the portable build).\n</details>\n\n## Documentation\n\nhttps://github.com/oobabooga/text-generation-webui/wiki\n\n## Community\n\nhttps://www.reddit.com/r/Oobabooga/\n\n## Acknowledgments\n\n- In August 2023, [Andreessen Horowitz](https://a16z.com/) (a16z) provided a generous grant to encourage and support my independent work on this project. I am **extremely** grateful for their trust and recognition.\n- This project was inspired by [AUTOMATIC1111/stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) and wouldn't exist without it.\n"
  },
  {
    "path": "cmd_linux.sh",
    "content": "#!/usr/bin/env bash\n\ncd \"$(dirname \"${BASH_SOURCE[0]}\")\"\n\nif [[ \"$(pwd)\" =~ \" \" ]]; then echo This script relies on Miniforge which can not be silently installed under a path with spaces. && exit; fi\n\n# deactivate existing conda envs as needed to avoid conflicts\n{ conda deactivate && conda deactivate && conda deactivate; } 2> /dev/null\n\n# config\nCONDA_ROOT_PREFIX=\"$(pwd)/installer_files/conda\"\nINSTALL_ENV_DIR=\"$(pwd)/installer_files/env\"\n\n# environment isolation\nexport PYTHONNOUSERSITE=1\nunset PYTHONPATH\nunset PYTHONHOME\nexport CUDA_PATH=\"$INSTALL_ENV_DIR\"\nexport CUDA_HOME=\"$CUDA_PATH\"\n\n# activate env\nbash --init-file <(echo \"source \\\"$CONDA_ROOT_PREFIX/etc/profile.d/conda.sh\\\" && conda activate \\\"$INSTALL_ENV_DIR\\\"\")\n"
  },
  {
    "path": "cmd_macos.sh",
    "content": "#!/bin/bash\n\ncd \"$(dirname \"${BASH_SOURCE[0]}\")\"\n\nif [[ \"$(pwd)\" =~ \" \" ]]; then echo This script relies on Miniforge which can not be silently installed under a path with spaces. && exit; fi\n\n# deactivate existing conda envs as needed to avoid conflicts\n{ conda deactivate && conda deactivate && conda deactivate; } 2> /dev/null\n\n# config\nCONDA_ROOT_PREFIX=\"$(pwd)/installer_files/conda\"\nINSTALL_ENV_DIR=\"$(pwd)/installer_files/env\"\n\n# environment isolation\nexport PYTHONNOUSERSITE=1\nunset PYTHONPATH\nunset PYTHONHOME\nexport CUDA_PATH=\"$INSTALL_ENV_DIR\"\nexport CUDA_HOME=\"$CUDA_PATH\"\n\n# activate env\nsource $CONDA_ROOT_PREFIX/etc/profile.d/conda.sh\nconda activate $INSTALL_ENV_DIR\nexec bash --norc\n"
  },
  {
    "path": "cmd_windows.bat",
    "content": "@echo off\n\ncd /D \"%~dp0\"\n\nset PATH=%PATH%;%SystemRoot%\\system32\n\necho \"%CD%\"| findstr /C:\" \" >nul && echo This script relies on Miniforge which can not be silently installed under a path with spaces. && goto end\n\n@rem fix failed install when installing to a separate drive\nset TMP=%cd%\\installer_files\nset TEMP=%cd%\\installer_files\n\n@rem deactivate existing conda envs as needed to avoid conflicts\n(call conda deactivate && call conda deactivate && call conda deactivate) 2>nul\n\n@rem config\nset CONDA_ROOT_PREFIX=%cd%\\installer_files\\conda\nset INSTALL_ENV_DIR=%cd%\\installer_files\\env\n\n@rem environment isolation\nset PYTHONNOUSERSITE=1\nset PYTHONPATH=\nset PYTHONHOME=\nset PYTHONUTF8=1\nset \"CUDA_PATH=%INSTALL_ENV_DIR%\"\nset \"CUDA_HOME=%CUDA_PATH%\"\n\n@rem activate installer env\ncall \"%CONDA_ROOT_PREFIX%\\condabin\\conda.bat\" activate \"%INSTALL_ENV_DIR%\" || ( echo. && echo Miniforge hook not found. && goto end )\n\n@rem enter commands\ncmd /k \"%*\"\n\n:end\npause\n"
  },
  {
    "path": "docker/.dockerignore",
    "content": ".env\nDockerfile\n/user_data\n"
  },
  {
    "path": "docker/TensorRT-LLM/Dockerfile",
    "content": "FROM nvidia/cuda:13.0.1-cudnn-runtime-ubuntu24.04\n\n# Install Python 3.12, Git, and OpenMPI\nRUN apt update && apt install -y python3.12 python3-pip git build-essential openmpi-bin libopenmpi-dev\n\n# Set the working directory\nWORKDIR /app\n\n# This is needed to avoid an error about \"Failed to build mpi4py\" in the next command\nENV LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH\n\n# Install text-generation-webui\nRUN git clone https://github.com/oobabooga/text-generation-webui\nWORKDIR /app/text-generation-webui\nRUN pip install --break-system-packages -r requirements/full/requirements.txt\n\n# Install TensorRT-LLM\nRUN pip3 install --break-system-packages tensorrt_llm==1.1.0 --extra-index-url https://pypi.nvidia.com\n\n# Expose the necessary port for the Python server\nEXPOSE 7860 5000\n\n# Run the Python server.py script with the specified command\nCMD [\"python3\", \"server.py\", \"--api\", \"--listen\"]\n"
  },
  {
    "path": "docker/amd/Dockerfile",
    "content": "# BUILDER\nFROM ubuntu:22.04\nWORKDIR /builder\nARG BUILD_EXTENSIONS=\"${BUILD_EXTENSIONS:-}\"\nARG APP_UID=\"${APP_UID:-6972}\"\nARG APP_GID=\"${APP_GID:-6972}\"\n\nRUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \\\n    apt update && \\\n    apt install --no-install-recommends -y git vim build-essential python3-dev pip bash curl && \\\n    rm -rf /var/lib/apt/lists/*\nWORKDIR /home/app/\nRUN git clone https://github.com/oobabooga/text-generation-webui.git \nWORKDIR /home/app/text-generation-webui\nRUN GPU_CHOICE=B LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose\nEXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000}\nWORKDIR /home/app/text-generation-webui\n# set umask to ensure group read / write at runtime\nCMD umask 0002 && export HOME=/home/app/text-generation-webui && ./start_linux.sh --listen\n"
  },
  {
    "path": "docker/amd/docker-compose.yml",
    "content": "version: \"3.3\"\nservices:\n  text-generation-webui:\n    build:\n      context: .\n      args:\n        BUILD_EXTENSIONS: ${BUILD_EXTENSIONS:-}\n        APP_GID: ${APP_GID:-6972}\n        APP_UID: ${APP_UID:-6972}\n    env_file: .env\n    user: \"${APP_RUNTIME_UID:-6972}:${APP_RUNTIME_GID:-6972}\"\n    ports:\n      - \"${HOST_PORT:-7860}:${CONTAINER_PORT:-7860}\"\n      - \"${HOST_API_PORT:-5000}:${CONTAINER_API_PORT:-5000}\"\n    stdin_open: true\n    group_add:\n      - video\n    tty: true\n    ipc: host\n    devices:\n      - /dev/kfd\n      - /dev/dri \n    cap_add: \n      - SYS_PTRACE\n    security_opt:\n      - seccomp=unconfined\n    volumes:\n      - ./user_data:/home/app/text-generation-webui/user_data\n"
  },
  {
    "path": "docker/cpu/Dockerfile",
    "content": "# BUILDER\nFROM ubuntu:22.04\nWORKDIR /builder\nARG BUILD_EXTENSIONS=\"${BUILD_EXTENSIONS:-}\"\nARG APP_UID=\"${APP_UID:-6972}\"\nARG APP_GID=\"${APP_GID:-6972}\"\n\nRUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \\\n    apt update && \\\n    apt install --no-install-recommends -y git vim build-essential python3-dev pip bash curl && \\\n    rm -rf /var/lib/apt/lists/*\nWORKDIR /home/app/\nRUN git clone https://github.com/oobabooga/text-generation-webui.git \nWORKDIR /home/app/text-generation-webui\nRUN GPU_CHOICE=N LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose\nEXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000}\n# set umask to ensure group read / write at runtime\nWORKDIR /home/app/text-generation-webui\nCMD umask 0002 && export HOME=/home/app/text-generation-webui && ./start_linux.sh --listen\n"
  },
  {
    "path": "docker/cpu/docker-compose.yml",
    "content": "version: \"3.3\"\nservices:\n  text-generation-webui:\n    build:\n      context: .\n      args:\n        BUILD_EXTENSIONS: ${BUILD_EXTENSIONS:-}\n        APP_GID: ${APP_GID:-6972}\n        APP_UID: ${APP_UID:-6972}\n    env_file: .env\n    user: \"${APP_RUNTIME_UID:-6972}:${APP_RUNTIME_GID:-6972}\"\n    ports:\n      - \"${HOST_PORT:-7860}:${CONTAINER_PORT:-7860}\"\n      - \"${HOST_API_PORT:-5000}:${CONTAINER_API_PORT:-5000}\"\n    stdin_open: true\n    tty: true\n    volumes:\n      - ./user_data:/home/app/text-generation-webui/user_data\n"
  },
  {
    "path": "docker/intel/Dockerfile",
    "content": "# BUILDER\nFROM ubuntu:22.04\nWORKDIR /builder\nARG BUILD_EXTENSIONS=\"${BUILD_EXTENSIONS:-}\"\nARG APP_UID=\"${APP_UID:-6972}\"\nARG APP_GID=\"${APP_GID:-6972}\"\n\nRUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \\\n    apt update && \\\n    apt install --no-install-recommends -y git vim build-essential python3-dev pip bash curl && \\\n    rm -rf /var/lib/apt/lists/*\nWORKDIR /home/app/\nRUN git clone https://github.com/oobabooga/text-generation-webui.git \nWORKDIR /home/app/text-generation-webui\nRUN GPU_CHOICE=D LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose\nEXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000}\n# set umask to ensure group read / write at runtime\nWORKDIR /home/app/text-generation-webui\nCMD umask 0002 && export HOME=/home/app/text-generation-webui && ./start_linux.sh --listen\n"
  },
  {
    "path": "docker/intel/docker-compose.yml",
    "content": "version: \"3.3\"\nservices:\n  text-generation-webui:\n    build:\n      context: .\n      args:\n        BUILD_EXTENSIONS: ${BUILD_EXTENSIONS:-}\n        APP_GID: ${APP_GID:-6972}\n        APP_UID: ${APP_UID:-6972}\n    env_file: .env\n    user: \"${APP_RUNTIME_UID:-6972}:${APP_RUNTIME_GID:-6972}\"\n    ports:\n      - \"${HOST_PORT:-7860}:${CONTAINER_PORT:-7860}\"\n      - \"${HOST_API_PORT:-5000}:${CONTAINER_API_PORT:-5000}\"\n    stdin_open: true\n    group_add:\n      - video\n    tty: true\n    ipc: host\n    devices:\n      - /dev/kfd\n      - /dev/dri \n    cap_add: \n      - SYS_PTRACE\n    security_opt:\n      - seccomp=unconfined\n    volumes:\n      - ./user_data:/home/app/text-generation-webui/user_data\n"
  },
  {
    "path": "docker/nvidia/Dockerfile",
    "content": "# BUILDER\nFROM ubuntu:22.04\nWORKDIR /builder\nARG TORCH_CUDA_ARCH_LIST=\"${TORCH_CUDA_ARCH_LIST:-3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX}\"\nARG BUILD_EXTENSIONS=\"${BUILD_EXTENSIONS:-}\"\nARG APP_UID=\"${APP_UID:-6972}\"\nARG APP_GID=\"${APP_GID:-6972}\"\n\nRUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \\\n    apt update && \\\n    apt install --no-install-recommends -y git vim build-essential python3-dev pip bash curl && \\\n    rm -rf /var/lib/apt/lists/*\nWORKDIR /home/app/\nRUN git clone https://github.com/oobabooga/text-generation-webui.git \nWORKDIR /home/app/text-generation-webui\nRUN GPU_CHOICE=A LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose\nEXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000}\nWORKDIR /home/app/text-generation-webui\n# set umask to ensure group read / write at runtime\nCMD umask 0002 && export HOME=/home/app/text-generation-webui && ./start_linux.sh --listen\n"
  },
  {
    "path": "docker/nvidia/docker-compose.yml",
    "content": "version: \"3.3\"\nservices:\n  text-generation-webui:\n    build:\n      context: .\n      args:\n        # specify which cuda version your card supports: https://developer.nvidia.com/cuda-gpus\n        TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST:-8.6;8.9+PTX}\n        BUILD_EXTENSIONS: ${BUILD_EXTENSIONS:-}\n        APP_GID: ${APP_GID:-6972}\n        APP_UID: ${APP_UID:-6972}\n    env_file: .env\n    user: \"${APP_RUNTIME_UID:-6972}:${APP_RUNTIME_GID:-6972}\"\n    ports:\n      - \"${HOST_PORT:-7860}:${CONTAINER_PORT:-7860}\"\n      - \"${HOST_API_PORT:-5000}:${CONTAINER_API_PORT:-5000}\"\n    stdin_open: true\n    tty: true\n    volumes:\n      - ./user_data:/home/app/text-generation-webui/user_data\n    deploy:\n      resources:\n        reservations:\n          devices:\n            - driver: nvidia\n              count: all\n              capabilities: [gpu]\n"
  },
  {
    "path": "docs/01 - Chat Tab.md",
    "content": "Used to have multi-turn conversations with the model.\n\n## Input area\n\nThe main action buttons are:\n\n* **Send**: sends your message and makes the model start a reply.\n* **Stop**: stops an ongoing generation as soon as the next token is generated (which can take a while for a slow model).\n\nThe hover menu (☰) that appears over the chat area contains:\n\n* **Regenerate**: similar to Send, but your last message is used as input instead of the text in the input field. Note that if the temperature/top_p/top_k parameters are low in the \"Parameters\" tab of the UI, the new reply may end up identical to the previous one.\n* **Continue**: makes the model attempt to continue the existing reply. In some cases, the model may simply end the existing turn immediately without generating anything new, but in other cases, it may generate a longer reply.\n* **Remove last reply**: removes the last input/output pair from the history and sends your last message back into the input field.\n* **Impersonate**: makes the model generate a new message on your behalf in the input field, taking into consideration the existing chat history.\n* **Send dummy message**: adds a new message to the chat history without causing the model to generate a reply.\n* **Send dummy reply**: adds a new reply to the chat history as if the model had generated this reply. Useful in conjunction with \"Send dummy message\".\n* **Send to Notebook**: sends the entire chat prompt up to now to the Notebook tab.\n* **Show controls**: checkbox that toggles the visibility of the sidebar controls (Start reply with, Mode, Chat style, etc.). Shortcut: Ctrl+S.\n\n## Past chats\n\nAllows you to switch between the current and previous conversations with the current character, or between the current and previous instruct conversations (if in \"instruct\" mode). The available buttons are:\n\n* **Branch**: creates a branch of the current conversation at a specific message.\n* **Rename**: allows you to give a unique name to the selected conversation.\n* **🗑️**: deletes the selected conversation.\n* **New chat**: starts a new conversation. If you are talking to a character that has a \"Greeting\" message defined, this message will be automatically added to the new history.\n\nA search field is also available to filter conversations by name.\n\n## Sidebar controls\n\nThe sidebar (toggled via \"Show controls\") contains:\n\n* **Start reply with**: whatever you type there will appear at the start of every reply by the bot. This is useful to guide the response in the desired direction.\n* **Reasoning effort**: controls the thinking depth for models that support it. Options: low, medium, high.\n* **Enable thinking**: enables extended thinking mode for models that support it.\n* **Activate web search**: when enabled, the model can search the web for information before replying. You can also set the number of pages to download.\n* **Mode**: see below.\n* **Chat style**: see below.\n* **Command for chat-instruct mode**: the command that is used in chat-instruct mode to query the model to generate a reply on behalf of the character. Can be used creatively to generate specific kinds of responses. Inside this string, `<|character|>` is a placeholder that gets replaced with the bot name, and `<|prompt|>` is a placeholder that gets replaced with the full chat prompt.\n\n## Mode\n\nThe most important input field. It defines how the chat prompt is formatted. There are 3 options: chat, chat-instruct, and instruct. It is worth going into more detail about this because it seems to not be obvious to a lot of people.\n\n### Instruction-following models\n\nThere are two kinds of models: base models, like Llama and GPT-J, and fine-tuned models, like Alpaca and Vicuna. Fine-tuned models are trained starting from base models, most often with the goal of getting the model to understand and respond to instructions just like ChatGPT does. Let's call such models *instruction-following models*.\n\nEach instruction-following model was trained on a specific prompt format, and you have to use that exact prompt format if you want the model to follow your instructions as accurately as it can.\n\nAs an example, this is the Alpaca format:\n\n```\nBelow is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nHi there!\n\n### Response:\nHello! It's nice to meet you. What can I help with?\n\n### Instruction:\nHow are you?\n\n### Response:\nI'm doing well, thank you for asking! Is there something specific you would like to talk about or ask me? I'm here to help answer any questions you may have.\n```\n\nThis format is characterized by a context string at the top, and alternating turns where each user input starts with `### Instruction:` and each bot turn starts with `### Response:`. There are also weirder formats, like the one used by the Llama-2-chat models released by Meta AI:\n\n```\n[INST] <<SYS>>\nAnswer the questions.\n<</SYS>>\nHi there! [/INST] Hello! It's nice to meet you. What can I help with? </s><s>[INST] How are you? [/INST] I'm doing well, thank you for asking! Is there something specific you would like to talk about or ask me? I'm here to help answer any questions you may have.\n```\n\nIn this format, there are special tokens at the end of each bot reply (`</s>`, the end of sequence token, and `<s>`, the beginning of sequence token); no new lines separating the turns; and the context string is written between `<<SYS>>` and `<</SYS>>`. Despite the intimidating look of this format, the logic is the same: there are user turns and bot turns, and each one appears in a specific place in the template.\n\nIt is important to emphasize that instruction-following models **have to be used with the exact prompt format that they were trained on**. Using those models with any other prompt format should be considered undefined behavior. The model will still generate replies, but they will be less accurate to your inputs.\n\nNow that an instruction-following model is defined, we can move on to describing the 3 chat modes.\n\n### Chat\n\nUsed for talking to the character defined under \"Character\" tab using a simple chat prompt in this format:\n\n```\nChiharu Yamada's Persona: Chiharu Yamada is a young, computer engineer-nerd with a knack for problem solving and a passion for technology.\nYou: Hi there!\nChiharu Yamada: Hello! It's nice to meet you. What can I help with?\nYou: How are you?\nChiharu Yamada: I'm doing well, thank you for asking! Is there something specific you would like to talk about or ask me? I'm here to help answer any questions you may have.\n```\n\nThere are 3 adjustable parameters in the \"Character\" tab being used in this prompt:\n\n* The **Context** string appears at the top of the prompt. Most often it describes the bot's personality and adds a few example messages to guide the model towards the desired reply length and format. This string never gets truncated: as the prompt size increases, old messages get removed one at a time until the prompt becomes smaller than the truncation length set under \"Parameters\" > \"Generation\" > \"Truncate the prompt up to this length\".\n* The **Your name** string appears at the beginning of each user reply. By default, this string is \"You\".\n* The **Character's name** string appears at the beginning of each bot reply.\n\nAdditionally, the **Greeting** string appears as the bot's opening message whenever the history is cleared.\n\nThe \"Chat\" option should typically be used only for base models or non-instruct fine tunes, and should not be used for instruction-following models.\n\n### Instruct\n\nUsed for talking to an instruction-following model using the prompt format defined under \"Parameters\" > \"Instruction template\". Think of this option as an offline ChatGPT.\n\nThe prompt format is defined by the **Instruction template** parameter in \"Parameters\" > \"Instruction template\", which represents a Jinja2 template.\n\nNote that when you load a model in the \"Model\" tab, the web UI will try to automatically detect its instruction template (if any), and will update the values under \"Parameters\" > \"Instruction template\" accordingly. This is done using a set of regular expressions defined in `user_data/models/config.yaml`. This detection is not guaranteed to be accurate. You should check the model card on Hugging Face to see if you are using the correct prompt format.\n\n### Chat-instruct\n\nAs said above, instruction-following models are meant to be used with their specific prompt templates. The chat-instruct mode allows you to use those templates to generate a chat reply, thus mixing Chat and Instruct modes (hence the name).\n\nIt works by creating a single instruction-following turn where a command is given followed by the regular chat prompt. Here is an example in Alpaca format:\n\n```\nBelow is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nContinue the chat dialogue below. Write a single reply for the character \"Chiharu Yamada\".\nChiharu Yamada's Persona: Chiharu Yamada is a young, computer engineer-nerd with a knack for problem solving and a passion for technology.\nYou: Hi there!\nChiharu Yamada: Hello! It's nice to meet you. What can I help with?\nYou: How are you?\n\n### Response:\nChiharu Yamada:\n```\n\nHere, the command is\n\n> Continue the chat dialogue below. Write a single reply for the character \"Chiharu Yamada\".\n\nBelow this command, the regular chat prompt is added, including its Context string and the chat history, and then the user turn ends. The bot turn starts with the \"Character's name\" string followed by `:`, thus prompting the instruction-following model to write a single reply for the character.\n\nNote that you can get creative: instead of writing something trivial like \"Write a single reply for the character\", you could add more complex instructions like\n\n> This is an adventure game, and your task is to write a reply in name of \"<|character|>\" where 3 options are given for the user to then choose from.\n\nAnd it works:\n\n![chat-instruct](https://github.com/oobabooga/text-generation-webui/assets/112222186/e38e3469-8263-4a10-b1a1-3c955026b8e7)\n\n## Chat style\n\nThis defines the visual style of the chat UI. Each option is a CSS file defined under `text-generation-webui/css/chat_style-name.css`, where \"name\" is how this style is called in the dropdown menu. You can add new styles by simply copying `chat_style-cai-chat.css` to `chat_style-myNewStyle.css` and editing the contents of this new file. If you end up with a style that you like, you are highly encouraged to submit it to the repository.\n\nThe styles are only applied to chat and chat-instruct modes. Instruct mode has its separate style defined in `text-generation-webui/css/html_instruct_style.css`.\n\n## Character gallery\n\nThis menu is a built-in extension defined under `text-generation-webui/extensions/gallery`. It displays a gallery with your characters, and if you click on a character, it will be automatically selected in the Character tab.\n"
  },
  {
    "path": "docs/02 - Default and Notebook Tabs.md",
    "content": "Used to generate raw completions starting from your prompt.\n\n## Default tab\n\nThis tab contains two main text boxes: Input, where you enter your prompt, and Output, where the model output will appear.\n\n### Input\n\nThe number on the lower right of the Input box counts the number of tokens in the input. It gets updated whenever you update the input text as long as a model is loaded (otherwise there is no tokenizer to count the tokens).\n\nBelow the Input box, the following buttons can be found:\n\n* **Continue**: starts a new generation taking as input the text in the \"Output\" box.\n* **Generate**: starts a new generation.\n* **Stop**: stops an ongoing generation as soon as the next token is generated (which can take a while for a slow model).\n\nIn the **Prompt** menu, you can select from saved prompts stored in `user_data/logs/notebook`. The **New** button creates a new prompt, the **Rename** button renames the selected prompt, and the 🗑️ button deletes it. The 🔄 button refreshes the list.\n\n### Output\n\nFive tabs can be found:\n\n* **Raw**: where the raw text generated by the model appears.\n* **Markdown**: it contains a \"Render\" button. You can click on it at any time to render the current output as markdown. This is particularly useful for models that generate LaTeX equations like GALACTICA.\n* **HTML**: displays the output in an HTML style that is meant to be easier to read. Its style is defined under `text-generation-webui/css/html_readable_style.css`.\n* **Logits**: when you click on \"Get next token probabilities\", this tab displays the 50 most likely next tokens and their probabilities based on your current input. If \"Use samplers\" is checked, the probabilities will be the ones after the sampling parameters in the \"Parameters\" > \"Generation\" tab are applied. Otherwise, they will be the raw probabilities generated by the model.\n* **Tokens**: allows you to tokenize your prompt and see the ID numbers for the individual tokens.\n\n## Notebook tab\n\nPrecisely the same thing as the Default tab, with the difference that the output appears in the same text box as the input.\n\nIt contains the following additional button:\n\n* **Regenerate**: uses your previous input for generation while discarding the last output.\n"
  },
  {
    "path": "docs/03 - Parameters Tab.md",
    "content": "## Generation\n\nContains parameters that control the text generation. \n\n### Quick rundown\n\nLLMs work by generating one token at a time. Given your prompt, the model calculates the probabilities for every possible next token. The actual token generation is done after that. \n\n* In *greedy decoding*, the most likely token is always picked.\n* Most commonly, *sampling* techniques are used to choose from the next-token distribution in a more non-trivial way with the goal of improving the quality of the generated text.\n\n### Preset menu\n\nCan be used to save and load combinations of parameters for reuse.\n\n* **🎲 button**: creates a random yet interpretable preset. Only 1 parameter of each category is included for the categories: removing tail tokens, avoiding repetition, and flattening the distribution. That is, top_p and top_k are not mixed, and neither are repetition_penalty and frequency_penalty. You can use this button to break out of a loop of bad generations after multiple \"Regenerate\" attempts.\n\n#### Built-in presets\n\nThese were obtained after a blind contest called \"Preset Arena\" where hundreds of people voted. The full results can be found [here](https://github.com/oobabooga/oobabooga.github.io/blob/main/arena/results.md).\n\nA key takeaway is that the best presets are:\n\n* **For Instruct**: Divine Intellect, Big O, simple-1.\n* **For Chat**: Midnight Enigma, Yara, Shortwave.\n\nThe other presets are:\n\n* Mirostat: a special decoding technique first implemented in llama.cpp and then adapted into this repository for all loaders. Many people have obtained positive results with it for chat.\n* LLaMA-Precise: a legacy preset that was the default for the web UI before the Preset Arena.\n* Debug-deterministic: disables sampling. It is useful for debugging, or if you intentionally want to use greedy decoding.\n\n### Parameters description\n\nFor more information about the parameters, the [transformers documentation](https://huggingface.co/docs/transformers/main_classes/text_generation#transformers.GenerationConfig) is a good reference.\n\n* **max_new_tokens**: Maximum number of tokens to generate. Don't set it higher than necessary: it is used in the truncation calculation through the formula `(prompt_length) = min(truncation_length - max_new_tokens, prompt_length)`, so your prompt will get truncated if you set it too high.\n* **temperature**: Primary factor to control the randomness of outputs. 0 = deterministic (only the most likely token is used). Higher value = more randomness.\n* **top_p**: If not set to 1, select tokens with probabilities adding up to less than this number. Higher value = higher range of possible random results.\n* **min_p**: Tokens with probability smaller than `(min_p) * (probability of the most likely token)` are discarded. This is the same as top_a but without squaring the probability.\n* **top_k**: Similar to top_p, but select instead only the top_k most likely tokens. Higher value = higher range of possible random results.\n* **repetition_penalty**: Penalty factor for repeating prior tokens. 1 means no penalty, higher value = less repetition, lower value = more repetition.\n* **presence_penalty**: Similar to repetition_penalty, but with an additive offset on the raw token scores instead of a multiplicative factor. It may generate better results. 0 means no penalty, higher value = less repetition, lower value = more repetition. Previously called \"additive_repetition_penalty\".\n* **frequency_penalty**: Repetition penalty that scales based on how many times the token has appeared in the context. Be careful with this; there's no limit to how much a token can be penalized.\n* **repetition_penalty_range**: The number of most recent tokens to consider for repetition penalty. 0 makes all tokens be used.\n* **dry_multiplier**: Set to greater than 0 to enable DRY (Don't Repeat Yourself) sampling. It penalizes tokens that would extend a sequence that already appeared in the context. Recommended value: 0.8.\n* **dry_allowed_length**: The longest sequence that can be repeated without being penalized by DRY. Shorter values make DRY more aggressive.\n* **dry_base**: Controls how fast the DRY penalty grows with increasing sequence length.\n* **typical_p**: If not set to 1, select only tokens that are at least this much more likely to appear than random tokens, given the prior text.\n* **tfs**: Tries to detect a tail of low-probability tokens in the distribution and removes those tokens. See [this blog post](https://www.trentonbricken.com/Tail-Free-Sampling/) for details. The closer to 0, the more discarded tokens.\n* **top_a**: Tokens with probability smaller than `(top_a) * (probability of the most likely token)^2` are discarded.\n* **top_n_sigma**: Keeps only tokens within N standard deviations of the mean log-probability. Acts as an adaptive cutoff that adjusts to the shape of the distribution. 0 disables it.\n* **xtc_threshold**: eXclusion from Top Choices (XTC) sampling. If 2 or more tokens have probability above this threshold, the top token may be removed. This encourages the model to use less common word choices and can increase creativity.\n* **xtc_probability**: The probability that XTC removal will actually happen when the threshold condition is met. Set to 1 for it to always apply, or lower for occasional application.\n* **epsilon_cutoff**: In units of 1e-4; a reasonable value is 3. This sets a probability floor below which tokens are excluded from being sampled.\n* **eta_cutoff**: In units of 1e-4; a reasonable value is 3. The main parameter of the special Eta Sampling technique. See [this paper](https://arxiv.org/pdf/2210.15191.pdf) for a description.\n* **guidance_scale**: The main parameter for Classifier-Free Guidance (CFG). [The paper](https://arxiv.org/pdf/2306.17806.pdf) suggests that 1.5 is a good value. It can be used in conjunction with a negative prompt or not.\n* **Negative prompt**: Only used when `guidance_scale != 1`. It is most useful for instruct models and custom system messages. You place your full prompt in this field with the system message replaced with the default one for the model (like \"You are Llama, a helpful assistant...\") to make the model pay more attention to your custom system message.\n* **penalty_alpha**: Contrastive Search is enabled by setting this to greater than zero and unchecking \"do_sample\". It should be used with a low value of top_k, for instance, top_k = 4.\n* **mirostat_mode**: Activates Mirostat sampling, an adaptive decoding method that dynamically controls output perplexity for higher-quality text generation. 0 is disabled. 1 is the classic Mirostat algorithm described in [the paper](https://arxiv.org/abs/2007.14966), but can be less stable, or “wobbly,” and produce less coherent text. 2 is the improved version that is more stable and has lower perplexity, recommended for most use cases. \n*Note: Use either mirostat or dynamic_temperature, not both at the same time.*\n* **mirostat_tau**: Target perplexity for Mirostat sampling. Controls how “surprising” the text is. Higher values = more diverse, lower = more predictable. Preset Arena suggests 8 as a good value.\n* **mirostat_eta**: Learning rate for Mirostat’s perplexity adjustment. Higher values = adapts faster but less stable, lower values = slower but more stable. Preset Arena suggests 0.1 as a good value.\n* **adaptive_target**: Target probability for adaptive-p sampling. This method adjusts the sampling threshold dynamically based on an exponential moving average of recent token probabilities. 0 disables it.\n* **adaptive_decay**: EMA decay rate for adaptive-p sampling. Controls how quickly the running average adjusts. Default: 0.9.\n* **dynamic_temperature**: Activates Dynamic Temperature. This modifies temperature to range between \"dynatemp_low\" (minimum) and \"dynatemp_high\" (maximum), with an entropy-based scaling. The steepness of the curve is controlled by \"dynatemp_exponent\". \n*Note: Use either dynamic_temperature or mirostat, not both at the same time.*\n* **smoothing_factor**: Activates Quadratic Sampling. When `0 < smoothing_factor < 1`, the logits distribution becomes flatter. When `smoothing_factor > 1`, it becomes more peaked.\n* **smoothing_curve**: Adjusts the dropoff curve of Quadratic Sampling. Higher values make the curve steeper. Only takes effect when smoothing_factor is set.\n* **temperature_last**: Makes temperature the last sampler instead of the first. With this, you can remove low probability tokens with a sampler like min_p and then use a high temperature to make the model creative without losing coherency. Note: this parameter takes precedence over \"Sampler priority\". That means that `temperature`/`dynamic_temperature`/`quadratic_sampling` will be removed from wherever they are and moved to the end of the stack.\n* **do_sample**: When unchecked, sampling is entirely disabled, and greedy decoding is used instead (the most likely token is always picked).\n* **Seed**: Set the Pytorch seed to this number. Note that some loaders do not use Pytorch (notably llama.cpp). For these loaders, the seed has no effect.\n* **encoder_repetition_penalty**: Also known as the \"Hallucinations filter\". Used to penalize tokens that are *not* in the prior text. Higher value = more likely to stay in context, lower value = more likely to diverge.\n* **no_repeat_ngram_size**: If not set to 0, specifies the length of token sets that are completely blocked from repeating at all. Higher values = blocks larger phrases, lower values = blocks words or letters from repeating. Only 0 or high values are a good idea in most cases.\n\nTo the right (or below if you are on mobile), the following parameters are present:\n\n* **Truncate the prompt up to this length**: Used to prevent the prompt from getting bigger than the model's context length. In the case of the transformers loader, which allocates memory dynamically, this parameter can also be used to set a VRAM ceiling and prevent out-of-memory errors. This parameter is automatically updated with the model's context length (from \"ctx_size\" for loaders that use this parameter, and from the model metadata directly for loaders that do not) when you load a model.\n* **Maximum number of tokens/second**: to make text readable in real-time in case the model is generating too fast. Good if you want to flex and tell everyone how good your GPU is.\n* **Custom system message**: If not empty, will be used instead of the default system message in the instruction template. Useful for customizing the personality of the chatbot. Example: \"You are a duck.\"\n* **Custom stopping strings**: The model stops generating as soon as any of the strings set in this field is generated. Note that when generating text in the Chat tab, some default stopping strings are set regardless of this parameter, like \"\\nYour Name:\" and \"\\nBot name:\" for chat mode. That's why this parameter has a \"Custom\" in its name.\n* **Custom token bans**: Allows you to ban the model from generating certain tokens altogether. You need to find the token IDs under \"Default\" > \"Tokens\" or \"Notebook\" > \"Tokens\", or by looking at the `tokenizer.json` for the model directly.\n* **auto_max_new_tokens**: When checked, the max_new_tokens parameter is expanded in the backend to the available context length. The maximum length is given by the \"truncation_length\" parameter. This is useful for getting long replies in the Chat tab without having to click on \"Continue\" many times.\n* **Ban the eos_token**: One of the possible tokens that a model can generate is the EOS (End of Sequence) token. When it is generated, the generation stops prematurely. When this parameter is checked, that token is banned from being generated, and the generation will always generate \"max_new_tokens\" tokens.\n* **Add the bos_token to the beginning of prompts**: By default, the tokenizer will add a BOS (Beginning of Sequence) token to your prompt. During training, BOS tokens are used to separate different documents. If unchecked, no BOS token will be added, and the model will interpret your prompt as being in the middle of a document instead of at the start of one. This significantly changes the output and can make it more creative.\n* **Skip special tokens**: When decoding the generated tokens, skip special tokens from being converted to their text representation. Otherwise, BOS appears as `<s>`, EOS as `</s>`, etc.\n* **prompt_lookup_num_tokens**: Activates Prompt Lookup Decoding, a form of speculative decoding for the Transformers loader. It guesses future tokens by looking for matching patterns in the prompt itself, which can speed up generation for tasks that involve repeating or paraphrasing parts of the input.\n* **Activate text streaming**: When unchecked, the full response is outputted at once, without streaming the words one at a time. I recommend unchecking this parameter on high latency networks like running the webui on Google Colab or using `--share`.\n* **Static KV cache**: Use a static cache for improved performance with the Transformers loader. May not be compatible with all models.\n* **Sampler priority**: Allows you to customize the order in which the different samplers are applied. The first sampler on the list gets applied first. With this, custom orders like `top_p -> temperature -> top_k` can be defined.\n* **DRY sequence breakers**: Tokens across which DRY sequence matching is not continued. Typically punctuation and special tokens. Only used when DRY is active (dry_multiplier > 0).\n* **Load grammar from file**: Loads a GBNF grammar from a file under `user_data/grammars`. The output is written to the \"Grammar\" box below. You can also save and delete custom grammars using this menu.\n* **Grammar**: Allows you to constrain the model output to a particular format. For instance, you can make the model generate lists, JSON, specific words, etc. Grammar is extremely powerful and I highly recommend it. The syntax looks a bit daunting at first sight, but it gets very easy once you understand it. See the [GBNF Guide](https://github.com/ggerganov/llama.cpp/blob/master/grammars/README.md) for details.\n\n### Chat tab controls\n\nThe following parameters appear in the Chat tab sidebar rather than the Parameters tab:\n\n* **reasoning_effort**: Controls the thinking depth for models that support it (used by GPT-OSS). Options: low, medium, high.\n* **enable_thinking**: Enables extended thinking mode for models that support it (used by Seed-OSS and pre-2507 Qwen3). When enabled, the model can use a thinking step before generating its reply.\n\n## Instruction template\n\nThis sub-tab within the Parameters tab defines the instruction template used in the Chat tab when \"instruct\" or \"chat-instruct\" are selected under \"Mode\".\n\n* **Saved instruction templates**: A dropdown menu where you can select a template. Click **Load** to apply it. The 💾 button saves the current template, and the 🗑️ button deletes the selected one.\n* **Instruction template**: A Jinja2 template that defines the prompt format for the instruction-following conversation.\n* **Send to notebook**: Send the full instruction template in string format to the Notebook tab.\n* **Chat template**: A Jinja2 template that defines the prompt format for regular chat conversations with characters.\n\n## Character tab\n\nThe Character tab is a separate top-level tab that contains the following sub-tabs:\n\n### Character\n\nParameters that define the character used in the Chat tab when \"chat\" or \"chat-instruct\" are selected under \"Mode\".\n\n* **Character**: A dropdown menu where you can select from saved characters, save a new character (💾 button), and delete the selected character (🗑️). The **Restore character** button resets the character to its last saved state.\n* **Character's name**: The bot name as it appears in the prompt.\n* **Context**: A string that is always at the top of the prompt. It never gets truncated. It usually defines the bot's personality and some key elements of the conversation.\n* **Greeting**: An opening message for the bot. When set, it appears whenever you start a new chat.\n* **Character picture**: A profile picture for the bot. To make it apply, you need to save the bot by clicking on 💾.\n* **Your picture**: Your profile picture. It will be used in all conversations.\n\nNote: the following replacements take place in the context and greeting fields when the chat prompt is generated:\n\n* `{{char}}` and `<BOT>` get replaced with \"Character's name\".\n* `{{user}}` and `<USER>` get replaced with \"Your name\".\n\nSo you can use those special placeholders in your character definitions. They are commonly found in TavernAI character cards.\n\n### User\n\nAllows you to create and manage user profiles.\n\n* **User**: A dropdown to select, save (💾), or delete (🗑️) user profiles.\n* **Name**: Your name as it appears in the prompt.\n* **Description**: An optional description of yourself that can be referenced in conversations.\n\n### Chat history\n\nIn this tab, you can download the current chat history in JSON format and upload a previously saved chat history.\n\nWhen a history is uploaded, a new chat is created to hold it. That is, you don't lose your current chat in the Chat tab.\n\n### Upload character\n\n#### YAML or JSON\n\nAllows you to upload characters in the YAML format used by the web UI, including optionally a profile picture.\n\n#### TavernAI PNG\n\nAllows you to upload a TavernAI character card. It will be converted to the internal YAML format of the web UI after upload.\n"
  },
  {
    "path": "docs/04 - Model Tab.md",
    "content": "This is where you load models, apply LoRAs to a loaded model, and download new models.\n\n## Model loaders\n\n### llama.cpp\n\nLoads: GGUF models. Note: GGML models have been deprecated and do not work anymore.\n\nExample: https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF\n\n* **gpu_layers**: The number of layers to allocate to the GPU. If set to 0, only the CPU will be used. If you want to offload all layers, you can simply set this to the maximum value.\n* **ctx_size**: Context length of the model. In llama.cpp, the cache is preallocated, so the higher this value, the higher the VRAM. It is automatically set to the maximum sequence length for the model based on the metadata inside the GGUF file, but you may need to lower this value to fit the model into your GPU. Set to 0 for automatic context size based on available memory. After loading the model, the \"Truncate the prompt up to this length\" parameter under \"Parameters\" > \"Generation\" is automatically set to your chosen \"ctx_size\" so that you don't have to set the same thing twice.\n* **cache_type**: KV cache quantization type. Valid options: `fp16`, `q8_0`, `q4_0`. Lower quantization saves VRAM at the cost of some quality.\n* **tensor_split**: For multi-gpu only. Sets the amount of memory to allocate per GPU as proportions. Not to be confused with other loaders where this is set in GB; here you can set something like `30,70` for 30%/70%.\n* **batch_size**: Maximum number of prompt tokens to batch together when calling llama_eval.\n* **ubatch_size**: Physical maximum batch size for prompt processing.\n* **threads**: Number of threads. Recommended value: your number of physical cores.\n* **threads_batch**: Number of threads for batch processing. Recommended value: your total number of cores (physical + virtual).\n* **cpu_moe**: Force MoE expert layers to run on the CPU, keeping the rest on the GPU.\n* **extra_flags**: Extra flags to pass to llama-server. Format: `flag1=value1,flag2,flag3=value3`. Example: `override-tensor=exps=CPU`.\n* **mmproj**: Path to the mmproj file for multimodal (vision) models. This enables image understanding capabilities.\n* **streaming_llm**: Experimental feature to avoid re-evaluating the entire prompt when part of it is removed, for instance, when you hit the context length for the model in chat mode and an old message is removed.\n* **cpu**: Force a version of llama.cpp compiled without GPU acceleration to be used. Can usually be ignored. Only set this if you want to use CPU only and llama.cpp doesn't work otherwise.\n* **row_split**: Split the model by rows across GPUs. This may improve multi-gpu performance.\n* **no_kv_offload**: Do not offload the KV cache to the GPU. This saves VRAM but reduces performance.\n* **no_mmap**: Loads the model into memory at once, possibly preventing I/O operations later on at the cost of a longer load time.\n* **mlock**: Force the system to keep the model in RAM rather than swapping or compressing.\n* **numa**: May improve performance on certain multi-cpu systems.\n\n### Transformers\n\nLoads: full precision (16-bit or 32-bit) models, as well as bitsandbytes-quantized models. The repository usually has a clean name without GGUF or EXL3 in its name, and the model files are named `model.safetensors` or split into parts like `model-00001-of-00004.safetensors`.\n\nExample: [https://huggingface.co/lmsys/vicuna-7b-v1.5](https://huggingface.co/lmsys/vicuna-7b-v1.5).\n\nFull precision models use a ton of VRAM, so you will usually want to select the \"load_in_4bit\" and \"use_double_quant\" options to load the model in 4-bit precision using bitsandbytes.\n\nOptions:\n\n* **gpu_split**: When using multiple GPUs, sets the amount of VRAM in GB to allocate per GPU. Example: `20,7,7`.\n* **cpu_memory**: Maximum CPU memory in GiB to use for CPU offloading via the accelerate library. Whatever doesn't fit in the GPU or CPU will go to a disk cache if the \"disk\" checkbox is enabled.\n* **compute_dtype**: Used when \"load_in_4bit\" is checked. I recommend leaving the default value.\n* **quant_type**: Used when \"load_in_4bit\" is checked. I recommend leaving the default value.\n* **attn_implementation**: Choose the attention implementation. Valid options: `sdpa`, `eager`, `flash_attention_2`. The default (`sdpa`) works well in most cases; `flash_attention_2` may be useful for training.\n* **cpu**: Loads the model in CPU mode using Pytorch. The model will be loaded in 32-bit precision, so a lot of RAM will be used. CPU inference with transformers is older than llama.cpp and it works, but it's a lot slower. Note: this parameter has a different interpretation in the llama.cpp loader (see above).\n* **load_in_8bit**: Load the model in 8-bit precision using bitsandbytes. The 8-bit kernel in that library has been optimized for training and not inference, so load_in_8bit is slower than load_in_4bit (but more accurate).\n* **bf16**: Use bfloat16 precision instead of float16 (the default). Only applies when quantization is not used.\n* **disk**: Enable disk offloading for layers that don't fit into the GPU and CPU combined.\n* **load_in_4bit**: Load the model in 4-bit precision using bitsandbytes.\n* **use_double_quant**: Use double quantization with 4-bit loading for reduced memory usage.\n* **trust-remote-code**: Some models use custom Python code to load the model or the tokenizer. For such models, this option needs to be set. It doesn't download any remote content: all it does is execute the .py files that get downloaded with the model. Those files can potentially include malicious code; I have never seen it happen, but it is in principle possible.\n* **no_use_fast**: Do not use the \"fast\" version of the tokenizer. Can usually be ignored; only check this if you can't load the tokenizer for your model otherwise.\n\n### ExLlamav3_HF\n\nLoads: EXL3 models. These models usually have \"EXL3\" or \"exl3\" in the model name.\n\nUses the ExLlamaV3 backend with Transformers samplers.\n\n* **ctx_size**: Context length of the model. The cache is preallocated, so the higher this value, the higher the VRAM. It is automatically set to the maximum sequence length for the model based on its metadata, but you may need to lower this value to fit the model into your GPU. After loading the model, the \"Truncate the prompt up to this length\" parameter under \"Parameters\" > \"Generation\" is automatically set to your chosen \"ctx_size\" so that you don't have to set the same thing twice.\n* **cache_type**: KV cache quantization type. Valid options: `fp16`, `q2` to `q8`. You can also specify key and value bits separately, e.g. `q4_q8`. Lower quantization saves VRAM at the cost of some quality.\n* **gpu_split**: Comma-separated list of VRAM (in GB) to use per GPU device for model layers. Example: `20,7,7`.\n* **cfg_cache**: Creates a second cache to hold the CFG negative prompts. You need to set this if and only if you intend to use CFG in the \"Parameters\" > \"Generation\" tab. Checking this parameter doubles the cache VRAM usage.\n* **no_use_fast**: Do not use the \"fast\" version of the tokenizer.\n* **enable_tp**: Enable Tensor Parallelism (TP) to split the model across GPUs.\n* **tp_backend**: The backend for tensor parallelism. Valid options: `native`, `nccl`. Default: `native`.\n\n### ExLlamav3\n\nThe same as ExLlamav3_HF but using the internal samplers of ExLlamaV3 instead of the ones in the Transformers library. Supports speculative decoding with a draft model. Also supports multimodal (vision) models natively.\n\n* **ctx_size**: Same as ExLlamav3_HF.\n* **cache_type**: Same as ExLlamav3_HF.\n* **gpu_split**: Same as ExLlamav3_HF.\n* **enable_tp**: Enable Tensor Parallelism (TP) to split the model across GPUs.\n* **tp_backend**: The backend for tensor parallelism. Valid options: `native`, `nccl`. Default: `native`.\n\n### TensorRT-LLM\n\nLoads: TensorRT-LLM engine models. These are highly optimized models compiled specifically for NVIDIA GPUs.\n\n* **ctx_size**: Context length of the model.\n* **cpp_runner**: Use the ModelRunnerCpp runner, which is faster than the default ModelRunner but doesn't support streaming yet.\n\n## Model dropdown\n\nHere you can select a model to be loaded, refresh the list of available models, load/unload/reload the selected model, and save the settings for the model. The \"settings\" are the values in the input fields (checkboxes, sliders, dropdowns) below this dropdown.\n\nAfter saving, those settings will get restored whenever you select that model again in the dropdown menu.\n\nIf the **Autoload the model** checkbox is selected, the model will be loaded as soon as it is selected in this menu. Otherwise, you will have to click on the \"Load\" button.\n\n## LoRA dropdown\n\nUsed to apply LoRAs to the model. Note that LoRA support is not implemented for all loaders. Check the [What Works](https://github.com/oobabooga/text-generation-webui/wiki/What-Works) page for details.\n\n## Download model or LoRA\n\nHere you can download a model or LoRA directly from the https://huggingface.co/ website.\n\n* Models will be saved to `user_data/models`.\n* LoRAs will be saved to `user_data/loras`.\n\nIn the input field, you can enter either the Hugging Face username/model path (like `facebook/galactica-125m`) or the full model URL (like `https://huggingface.co/facebook/galactica-125m`). To specify a branch, add it at the end after a \":\" character like this: `facebook/galactica-125m:main`.\n\nTo download a single file, as necessary for models in GGUF format, you can click on \"Get file list\" after entering the model path in the input field, and then copy and paste the desired file name in the \"File name\" field before clicking on \"Download\".\n"
  },
  {
    "path": "docs/05 - Training Tab.md",
    "content": "## Training Your Own LoRAs\n\nA LoRA is tied to a specific model architecture — a LoRA trained on Llama 3 8B won't work on Mistral 7B. Train on the exact model you plan to use.\n\n### Quick Start\n\n1. Load your base model with the **Transformers** loader (no LoRAs loaded).\n2. Open the **Training** tab > **Train LoRA**.\n3. Pick a dataset and configure parameters (see [below](#parameters)).\n4. Click **Start LoRA Training** and monitor the [loss](#loss).\n5. When done, load the LoRA from the **Models** tab and test it.\n\n### Resuming Training\n\nTo resume from a checkpoint, use the same LoRA name and uncheck `Override Existing Files`. If checkpoints exist (from `Save every n steps`), training will automatically resume from the latest one with full optimizer and scheduler state preserved. Note that you cannot change the `Rank` of an already created LoRA.\n\nYou should also use `Copy parameters from` to restore the UI settings (learning rate, epochs, etc.) from the previous run, so that training continues with the same configuration.\n\n### Troubleshooting\n\n- **Corrupted outputs**: Start over with a lower Learning Rate.\n- **Not learning enough**: Run more epochs, or increase the Rank.\n- **Unwanted formatting**: Tweak your dataset, or train for fewer steps.\n\n## Instruction Templates\n\nAll instruction/chat training uses `apply_chat_template()` with Jinja2 templates. You have two options in the **Instruction Template** dropdown:\n\n- **Chat Template**: Uses the model's built-in chat template from its tokenizer. Works with instruct/chat models that ship with a chat template (Llama 3, Qwen, Mistral, etc.).\n- **Named template** (e.g. ChatML, Alpaca, Llama-v3, etc.): Loads a Jinja2 template from `user_data/instruction-templates/`. This is useful for base models that don't have a built-in template, or when you want to override the model's default template.\n\nBoth options are functionally identical — the only difference is where the Jinja2 template string comes from. In both cases:\n- The dataset is tokenized via `apply_chat_template()`\n- Labels are automatically masked so only assistant responses are trained on\n- Multi-turn conversations are supported natively\n- Special tokens are handled correctly by the template\n\nThe WebUI ships with 50+ templates in `user_data/instruction-templates/`. You can also add your own by creating a `.yaml` file with an `instruction_template` key containing a Jinja2 template string, or a plain `.jinja` file.\n\n**Dataset formats:** Your JSON dataset can use either of these structures:\n\nOpenAI messages format:\n```json\n[\n  {\n    \"messages\": [\n      {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n      {\"role\": \"user\", \"content\": \"What is Python?\"},\n      {\"role\": \"assistant\", \"content\": \"A programming language.\"},\n      {\"role\": \"user\", \"content\": \"What's it used for?\"},\n      {\"role\": \"assistant\", \"content\": \"Web dev, data science, scripting, and more.\"}\n    ]\n  }\n]\n```\n\nShareGPT format (`conversations` key with `from`/`value` fields):\n```json\n[\n  {\n    \"conversations\": [\n      {\"from\": \"system\", \"value\": \"You are a helpful assistant.\"},\n      {\"from\": \"human\", \"value\": \"What is Python?\"},\n      {\"from\": \"gpt\", \"value\": \"A programming language.\"},\n      {\"from\": \"human\", \"value\": \"What's it used for?\"},\n      {\"from\": \"gpt\", \"value\": \"Web dev, data science, scripting, and more.\"}\n    ]\n  }\n]\n```\n\n## Text Dataset\n\nFor pretraining-style training on raw text, use the **Text Dataset** tab. Your dataset should be a JSON file with one document per row, each with a `\"text\"` key:\n\n```json\n[\n  {\"text\": \"First document content...\"},\n  {\"text\": \"Second document content...\"}\n]\n```\n\nThis is the standard format used by most pretraining datasets (The Pile, RedPajama, etc.).\n\nEach document is tokenized (with BOS token), concatenated into one long token sequence, and split into chunks of `Cutoff Length` tokens. The final chunk is padded if shorter than the cutoff length. When `Add EOS token` is enabled, an EOS token is appended after each document before concatenation, helping the model learn document boundaries.\n\n- `Stride Length` controls the overlap between consecutive chunks in tokens. Set to 0 for non-overlapping chunks (the standard concatenate-and-split approach). Values like 256 or 512 create overlapping chunks that help the model learn context across chunk boundaries, at the cost of more training samples.\n\n## Target Modules\n\nBy default, **Target all linear layers** is enabled. This uses peft's `all-linear` mode, which applies LoRA to every `nn.Linear` layer in the model except the output head (`lm_head`). It works for any model architecture.\n\nIf you uncheck it, you can manually select individual projection modules (`q_proj`, `k_proj`, `v_proj`, `o_proj`, `gate_proj`, `down_proj`, `up_proj`). Targeting fewer modules reduces VRAM usage and adapter size, but also reduces how much the model can learn. The default selection of `q_proj` + `v_proj` is the minimum for basic style/format training.\n\n## Parameters\n\nEach parameter has a description in the UI. Below is guidance on the most important choices.\n\n### VRAM\n\nVRAM usage during training is roughly similar to inference with ~1000 tokens of context. If you can run the model, you can probably train LoRAs with the default settings. If you run out of VRAM, reduce `Micro Batch Size` or `Cutoff Length`. Training 4-bit quantized models uses more VRAM — set `Micro Batch Size` to `1` to compensate.\n\n### Rank\n\nHigher rank = more learning capacity = larger adapter = more VRAM. Use 4–8 for style/format, 128–256 to teach factual knowledge.\n\n### Learning Rate and Epochs\n\nThese control how aggressively the model learns and how many times it sees the data. Higher LR + fewer epochs = fast but rough. Lower LR + more epochs = slower but higher quality. The scheduler (default: cosine) decays the LR over the course of training — see [HuggingFace docs](https://huggingface.co/docs/transformers/main_classes/optimizer_schedules#schedules) for graphs of each option.\n\n## Loss\n\nWhen you're running training, the WebUI's console window will log reports that include, among other things, a numeric value named `Loss`. It will start as a high number, and gradually get lower and lower as it goes.\n\nLoss measures how far the model's predictions are from the training data, with `0` meaning a perfect match. It's calculated as the cross-entropy between the model's output distribution and the expected tokens.\n\nIn practice, a loss of `0` means the model has overfit — it memorized the training data at the expense of its general capabilities.\n\nLoss is a balancing game: you want it low enough that the model learns your data, but not so low that it loses general knowledge. Generally, if it goes below `1.0`, overfitting is likely and you should stop training. In some cases you may want to go as low as `0.5` (if you need very predictable outputs). Different goals have different needs, so experiment and see what works best for you.\n\nNote: if you see Loss start at or suddenly jump to exactly `0`, it is likely something has gone wrong in your training process (eg model corruption).\n"
  },
  {
    "path": "docs/06 - Session Tab.md",
    "content": "Here you can restart the UI with new settings.\n\n## Settings\n\n* **Toggle light/dark theme**: switches between light and dark mode.\n* **Show two columns in the Notebook tab**: toggles between the two-column Default layout and the single-column Notebook layout.\n* **Turn long pasted text into attachments in the Chat tab**: when enabled, long pasted text is automatically converted into file attachments.\n* **Include attachments/search results from previous messages in the chat prompt**: when enabled, attachments and web search results from earlier messages are included in subsequent prompts.\n\n## Extensions & flags\n\n* **Available extensions**: shows a list of extensions available under `text-generation-webui/extensions` and `text-generation-webui/user_data/extensions`. Note that some of these extensions may require manually installing Python requirements through the command: `pip install -r extensions/extension_name/requirements.txt`.\n* **Boolean command-line flags**: shows command-line flags of bool (true/false) type.\n\nAfter selecting your desired flags and extensions, you can restart the UI by clicking on **Apply flags/extensions and restart**.\n\n## Install or update an extension\n\nIn this field, you can enter the GitHub URL for an extension and press enter to either install it (i.e. cloning it into `text-generation-webui/extensions`) or update it with `git pull` in case it is already cloned.\n\nNote that some extensions may include additional Python requirements. In this case, to install those you have to run the command\n\n```\npip install -r extensions/extension-name/requirements.txt\n```\n\nor\n\n```\npip install -r extensions\\extension-name\\requirements.txt\n```\n\nif you are on Windows.\n\nIf you used the one-click installer, this command should be executed in the terminal window that appears when you run the \"cmd_\" script for your OS.\n\n## Saving UI defaults\n\nThe **Save extensions settings to user_data/settings.yaml** button gathers the visible values in the UI and saves them to `user_data/settings.yaml` so that your settings will persist across multiple restarts of the UI.\n\nNote that preset parameters like temperature are not individually saved, so you need to first save your preset and select it in the preset menu before saving the defaults.\n"
  },
  {
    "path": "docs/07 - Extensions.md",
    "content": "# Extensions\n\nExtensions are defined by files named `script.py` inside subfolders of either:\n- `text-generation-webui/extensions`\n- `text-generation-webui/user_data/extensions`\n\nThey are loaded at startup if the folder name is specified after the `--extensions` flag.\n\nFor instance, `extensions/silero_tts/script.py` or `user_data/extensions/silero_tts/script.py` gets loaded with `python server.py --extensions silero_tts`.\n\n**Note:** Extensions in `user_data/extensions/` take priority over those in `extensions/` when both exist with the same name.\n\n## [text-generation-webui-extensions](https://github.com/oobabooga/text-generation-webui-extensions)\n\nThe repository above contains a directory of user extensions.\n\nIf you create an extension, you are welcome to host it in a GitHub repository and submit a PR adding it to the list.\n\n## Built-in extensions\n\n|Extension|Description|\n|---------|-----------|\n|[openai](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/openai)| Creates an API that mimics the OpenAI API and can be used as a drop-in replacement. |\n|[superboogav2](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/superboogav2)| Enhanced RAG extension with support for PDF, DOCX, and PPTX files. |\n|[send_pictures](https://github.com/oobabooga/text-generation-webui/blob/main/extensions/send_pictures/)| Creates an image upload field that can be used to send images to the bot in chat mode. Captions are automatically generated using BLIP. |\n|[coqui_tts](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/coqui_tts)| Text-to-speech extension using Coqui XTTS v2. |\n|[silero_tts](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/silero_tts)| Text-to-speech extension using [Silero](https://github.com/snakers4/silero-models). When used in chat mode, responses are replaced with an audio widget. |\n|[whisper_stt](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/whisper_stt)| Allows you to enter your inputs in chat mode using your microphone. |\n|[perplexity_colors](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/perplexity_colors)| Colors each token in the output text by its associated probability, as derived from the model logits. |\n|[google_translate](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/google_translate)| Automatically translates inputs and outputs using Google Translate.|\n|[gallery](https://github.com/oobabooga/text-generation-webui/blob/main/extensions/gallery/)| Creates a gallery with the chat characters and their pictures. |\n|[sd_api_pictures](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/sd_api_pictures)| Allows you to request pictures from the bot in chat mode, which will be generated using the AUTOMATIC1111 Stable Diffusion API. See examples [here](https://github.com/oobabooga/text-generation-webui/pull/309). |\n|[long_replies](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/long_replies)| Forces longer replies by suppressing early newlines in the model output. |\n|[ngrok](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/ngrok)| Allows you to access the web UI remotely using the ngrok reverse tunnel service (free). It's an alternative to the built-in Gradio `--share` feature. |\n|[superbooga](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/superbooga)| An extension that uses ChromaDB to create an arbitrarily large pseudocontext, taking as input text files, URLs, or pasted text. Based on https://github.com/kaiokendev/superbig. |\n|[character_bias](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/character_bias)| Just a very simple example that adds a hidden string at the beginning of the bot's reply in chat mode. |\n\n## How to write an extension\n\nThe extensions framework is based on special functions and variables that you can define in `script.py`. The functions are the following:\n\n| Function        | Description |\n|-------------|-------------|\n| `def setup()` | Is executed when the extension gets imported. |\n| `def ui()` | Creates custom gradio elements when the UI is launched. | \n| `def custom_css()` | Returns custom CSS as a string. It is applied whenever the web UI is loaded. |\n| `def custom_js()` | Same as above but for javascript. |\n| `def input_modifier(string, state, is_chat=False)`  | Modifies the input string before it enters the model. In chat mode, it is applied to the user message. Otherwise, it is applied to the entire prompt. |\n| `def output_modifier(string, state, is_chat=False)`  | Modifies the output string before it is presented in the UI. In chat mode, it is applied to the bot's reply. Otherwise, it is applied to the entire output. |\n| `def chat_input_modifier(text, visible_text, state)` | Modifies both the visible and internal inputs in chat mode. Can be used to hijack the chat input with custom content. |\n| `def bot_prefix_modifier(string, state)`  | Applied in chat mode to the prefix for the bot's reply. |\n| `def state_modifier(state)`  | Modifies the dictionary containing the UI input parameters before it is used by the text generation functions. |\n| `def history_modifier(history)`  | Modifies the chat history before the text generation in chat mode begins. |\n| `def custom_generate_reply(...)` | Overrides the main text generation function. |\n| `def custom_generate_chat_prompt(...)` | Overrides the prompt generator in chat mode. |\n| `def tokenizer_modifier(state, prompt, input_ids, input_embeds)` | Modifies the `input_ids`/`input_embeds` fed to the model. Should return `prompt`, `input_ids`, `input_embeds`. See the `example` extension for a template. |\n| `def custom_tokenized_length(prompt)` | Used in conjunction with `tokenizer_modifier`, returns the length in tokens of `prompt`. See the `example` extension for a template. |\n\nAdditionally, you can define a special `params` dictionary. In it, the `display_name` key is used to define the displayed name of the extension in the UI, and the `is_tab` key is used to define whether the extension should appear in a new tab. By default, extensions appear at the bottom of the \"Text generation\" tab.\n\nExample:\n\n```python\nparams = {\n    \"display_name\": \"Google Translate\",\n    \"is_tab\": True,\n}\n```\n\nThe `params` dict may also contain variables that you want to be customizable through a `settings.yaml` file. For instance, assuming the extension is in `extensions/google_translate`, the variable `language string` in\n\n```python\nparams = {\n    \"display_name\": \"Google Translate\",\n    \"is_tab\": True,\n    \"language string\": \"jp\"\n}\n```\n\ncan be customized by adding a key called `google_translate-language string` to `settings.yaml`:\n\n```python\ngoogle_translate-language string: 'fr'\n``` \n\nThat is, the syntax for the key is `extension_name-variable_name`.\n\n## Using multiple extensions at the same time\n\nYou can activate more than one extension at a time by providing their names separated by spaces after `--extensions`. The input, output, and bot prefix modifiers will be applied in the specified order. \n\nExample:\n\n```\npython server.py --extensions enthusiasm translate # First apply enthusiasm, then translate\npython server.py --extensions translate enthusiasm # First apply translate, then enthusiasm\n```\n\nDo note, that for:\n- `custom_generate_chat_prompt`\n- `custom_generate_reply`\n- `custom_tokenized_length`\n\nonly the first declaration encountered will be used and the rest will be ignored. \n\n## A full example\n\nThe source code below can be found at [extensions/example/script.py](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/example/script.py).\n\n```python\n\"\"\"\nAn example of extension. It does nothing, but you can add transformations\nbefore the return statements to customize the webui behavior.\n\nStarting from history_modifier and ending in output_modifier, the\nfunctions are declared in the same order that they are called at\ngeneration time.\n\"\"\"\n\nimport gradio as gr\nimport torch\nfrom transformers import LogitsProcessor\n\nfrom modules import chat, shared\nfrom modules.text_generation import (\n    decode,\n    encode,\n    generate_reply,\n)\n\nparams = {\n    \"display_name\": \"Example Extension\",\n    \"is_tab\": False,\n}\n\nclass MyLogits(LogitsProcessor):\n    \"\"\"\n    Manipulates the probabilities for the next token before it gets sampled.\n    Used in the logits_processor_modifier function below.\n    \"\"\"\n    def __init__(self):\n        pass\n\n    def __call__(self, input_ids, scores):\n        # probs = torch.softmax(scores, dim=-1, dtype=torch.float)\n        # probs[0] /= probs[0].sum()\n        # scores = torch.log(probs / (1 - probs))\n        return scores\n\ndef history_modifier(history):\n    \"\"\"\n    Modifies the chat history.\n    Only used in chat mode.\n    \"\"\"\n    return history\n\ndef state_modifier(state):\n    \"\"\"\n    Modifies the state variable, which is a dictionary containing the input\n    values in the UI like sliders and checkboxes.\n    \"\"\"\n    return state\n\ndef chat_input_modifier(text, visible_text, state):\n    \"\"\"\n    Modifies the user input string in chat mode (visible_text).\n    You can also modify the internal representation of the user\n    input (text) to change how it will appear in the prompt.\n    \"\"\"\n    return text, visible_text\n\ndef input_modifier(string, state, is_chat=False):\n    \"\"\"\n    In default/notebook modes, modifies the whole prompt.\n\n    In chat mode, it is the same as chat_input_modifier but only applied\n    to \"text\", here called \"string\", and not to \"visible_text\".\n    \"\"\"\n    return string\n\ndef bot_prefix_modifier(string, state):\n    \"\"\"\n    Modifies the prefix for the next bot reply in chat mode.\n    By default, the prefix will be something like \"Bot Name:\".\n    \"\"\"\n    return string\n\ndef tokenizer_modifier(state, prompt, input_ids, input_embeds):\n    \"\"\"\n    Modifies the input ids and embeds.\n    Modifies the input ids and embeds fed to the model.\n    Only used by loaders that use the transformers library for sampling.\n    \"\"\"\n    return prompt, input_ids, input_embeds\n\ndef logits_processor_modifier(processor_list, input_ids):\n    \"\"\"\n    Adds logits processors to the list, allowing you to access and modify\n    the next token probabilities.\n    Only used by loaders that use the transformers library for sampling.\n    \"\"\"\n    processor_list.append(MyLogits())\n    return processor_list\n\ndef output_modifier(string, state, is_chat=False):\n    \"\"\"\n    Modifies the LLM output before it gets presented.\n\n    In chat mode, the modified version goes into history['visible'],\n    and the original version goes into history['internal'].\n    \"\"\"\n    return string\n\ndef custom_generate_chat_prompt(user_input, state, **kwargs):\n    \"\"\"\n    Replaces the function that generates the prompt from the chat history.\n    Only used in chat mode.\n    \"\"\"\n    result = chat.generate_chat_prompt(user_input, state, **kwargs)\n    return result\n\ndef custom_css():\n    \"\"\"\n    Returns a CSS string that gets appended to the CSS for the webui.\n    \"\"\"\n    return ''\n\ndef custom_js():\n    \"\"\"\n    Returns a javascript string that gets appended to the javascript\n    for the webui.\n    \"\"\"\n    return ''\n\ndef setup():\n    \"\"\"\n    Gets executed only once, when the extension is imported.\n    \"\"\"\n    pass\n\ndef ui():\n    \"\"\"\n    Gets executed when the UI is drawn. Custom gradio elements and\n    their corresponding event handlers should be defined here.\n\n    To learn about gradio components, check out the docs:\n    https://gradio.app/docs/\n    \"\"\"\n    pass\n```\n"
  },
  {
    "path": "docs/08 - Additional Tips.md",
    "content": "## Audio notification\n\nIf your computer takes a long time to generate each response for the model that you are using, you can enable an audio notification for when the response is completed. This feature was kindly contributed by HappyWorldGames in [#1277](https://github.com/oobabooga/text-generation-webui/pull/1277).\n\n### Installation\n\nSimply place a file called \"notification.mp3\" in the same folder as `server.py`. Here you can find some examples:\n\n* https://pixabay.com/sound-effects/search/ding/?duration=0-30\n* https://pixabay.com/sound-effects/search/notification/?duration=0-30\n\nSource: https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/1126\n\nThis file will be automatically detected the next time you start the web UI.\n\n## Miscellaneous info\n\n### You can train LoRAs in CPU mode\n\nLoad the web UI with\n\n```\npython server.py --cpu\n```\n\nand start training the LoRA from the training tab as usual.\n\n### You can check the sha256sum of downloaded models with the download script\n\n```\npython download-model.py facebook/galactica-125m --check\n```\n\n### The download script continues interrupted downloads by default\n\nIt doesn't start over.\n\n"
  },
  {
    "path": "docs/09 - Docker.md",
    "content": "Docker Compose is a way of installing and launching the web UI in an isolated Ubuntu image using only a few commands.\n\n## Prerequisites\n\nYou need Docker Compose v2.17 or higher:\n\n```\n~$ docker compose version\nDocker Compose version v2.21.0\n```\n\nInstallation instructions: https://docs.docker.com/engine/install/\n\nFor NVIDIA GPUs, you also need the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html).\n\n## Quick start\n\nThere are four Docker variants available under `docker/`:\n\n| Directory | GPU | Notes |\n|-----------|-----|-------|\n| `docker/nvidia` | NVIDIA | Requires NVIDIA Container Toolkit |\n| `docker/amd` | AMD | Requires ROCm-compatible GPU |\n| `docker/intel` | Intel Arc | Beta support |\n| `docker/cpu` | None | CPU-only inference |\n\nTo launch (using NVIDIA as an example):\n\n```bash\ncd text-generation-webui/docker/nvidia\ncp ../.env.example .env\n# Optionally edit .env to customize ports, TORCH_CUDA_ARCH_LIST, etc.\ndocker compose up --build\n```\n\nThe web UI will be available at `http://localhost:7860`.\n\n## User data\n\nCreate a `user_data/` directory next to the `docker-compose.yml` to persist your models, characters, presets, and settings between container rebuilds:\n\n```bash\nmkdir -p user_data\n```\n\nThis directory is mounted into the container at runtime. You can place a `CMD_FLAGS.txt` inside it to pass persistent flags to the web UI (e.g., `--api`).\n\nModels can be downloaded through the web UI's “Model” tab once it's running, and they will be saved to `user_data/models/`.\n\n## Dedicated docker repository\n\nAn external repository maintains a docker wrapper for this project as well as several pre-configured 'one-click' `docker compose` variants. It can be found at: [Atinoda/text-generation-webui-docker](https://github.com/Atinoda/text-generation-webui-docker).\n"
  },
  {
    "path": "docs/11 - AMD Setup.md",
    "content": "## Using an AMD GPU in Linux\n\nRequires ROCm 6.4 to be installed.\n\n### Option 1: One-click installer\n\nThe one-click installer (`start_linux.sh`) automatically detects AMD GPUs. When prompted, select the AMD option, or set the `GPU_CHOICE` environment variable before running:\n\n```\nGPU_CHOICE=B ./start_linux.sh\n```\n\n### Option 2: Manual conda install\n\nFollow the manual conda installation instructions in the README, using the AMD PyTorch command:\n\n```\npip3 install torch==2.9.1 --index-url https://download.pytorch.org/whl/rocm6.4\n```\n\nThen install the project requirements with the AMD requirements file:\n\n```\npip install -r requirements/full/requirements_amd.txt\n```\n"
  },
  {
    "path": "docs/12 - OpenAI API.md",
    "content": "## OpenAI compatible API\n\nThe main API for this project is meant to be a drop-in replacement to the OpenAI API, including Chat and Completions endpoints.\n\n* It is 100% offline and private.\n* It doesn't create any logs.\n* It doesn't connect to OpenAI.\n* It doesn't use the openai-python library.\n\n### Starting the API\n\nAdd `--api` to your command-line flags.\n\n* To create a public Cloudflare URL, add the `--public-api` flag.\n* To listen on your local network, add the `--listen` flag.\n* To change the port, which is 5000 by default, use `--api-port 1234` (change 1234 to your desired port number).\n* To use SSL, add `--ssl-keyfile key.pem --ssl-certfile cert.pem`. ⚠️ **Note**: this doesn't work with `--public-api` since Cloudflare already uses HTTPS by default.\n* To use an API key for authentication, add `--api-key yourkey`.\n\n### Examples\n\nFor the documentation with all the endpoints, parameters and their types, consult `http://127.0.0.1:5000/docs` or the [typing.py](https://github.com/oobabooga/text-generation-webui/blob/main/extensions/openai/typing.py) file.\n\nThe official examples in the [OpenAI documentation](https://platform.openai.com/docs/api-reference) should also work, and the same parameters apply (although the API here has more optional parameters).\n\n#### Completions\n\n```shell\ncurl http://127.0.0.1:5000/v1/completions \\\n  -H \"Content-Type: application/json\" \\\n  -d '{\n    \"prompt\": \"This is a cake recipe:\\n\\n1.\",\n    \"max_tokens\": 512,\n    \"temperature\": 0.6,\n    \"top_p\": 0.95,\n    \"top_k\": 20\n  }'\n```\n\n#### Chat completions\n\nWorks best with instruction-following models. If the \"instruction_template\" variable is not provided, it will be guessed automatically based on the model name using the regex patterns in `user_data/models/config.yaml`.\n\n```shell\ncurl http://127.0.0.1:5000/v1/chat/completions \\\n  -H \"Content-Type: application/json\" \\\n  -d '{\n    \"messages\": [\n      {\n        \"role\": \"user\",\n        \"content\": \"Hello!\"\n      }\n    ],\n    \"temperature\": 0.6,\n    \"top_p\": 0.95,\n    \"top_k\": 20\n  }'\n```\n\n#### Chat completions with characters\n\n```shell\ncurl http://127.0.0.1:5000/v1/chat/completions \\\n  -H \"Content-Type: application/json\" \\\n  -d '{\n    \"messages\": [\n      {\n        \"role\": \"user\",\n        \"content\": \"Hello! Who are you?\"\n      }\n    ],\n    \"mode\": \"chat-instruct\",\n    \"character\": \"Example\",\n    \"temperature\": 0.6,\n    \"top_p\": 0.95,\n    \"top_k\": 20\n  }'\n```\n\n#### Multimodal/vision (llama.cpp and ExLlamaV3)\n\n##### With /v1/chat/completions (recommended!)\n\n```shell\ncurl http://127.0.0.1:5000/v1/chat/completions \\\n  -H \"Content-Type: application/json\" \\\n  -d '{\n    \"messages\": [\n      {\n        \"role\": \"user\",\n        \"content\": [\n          {\"type\": \"text\", \"text\": \"Please describe what you see in this image.\"},\n          {\"type\": \"image_url\", \"image_url\": {\"url\": \"https://github.com/turboderp-org/exllamav3/blob/master/examples/media/cat.png?raw=true\"}}\n        ]\n      }\n    ],\n    \"temperature\": 0.6,\n    \"top_p\": 0.95,\n    \"top_k\": 20\n  }'\n```\n\nFor base64-encoded images, just replace the inner \"url\" value with this format: `data:image/FORMAT;base64,BASE64_STRING` where FORMAT is the file type (png, jpeg, gif, etc.) and BASE64_STRING is your base64-encoded image data.\n\n##### With /v1/completions\n\n```shell\ncurl http://127.0.0.1:5000/v1/completions \\\n  -H \"Content-Type: application/json\" \\\n  -d '{\n    \"messages\": [\n      {\n        \"role\": \"user\",\n        \"content\": [\n          {\n            \"type\": \"text\",\n            \"text\": \"About image <__media__> and image <__media__>, what I can say is that the first one\"\n          },\n          {\n            \"type\": \"image_url\",\n            \"image_url\": {\n              \"url\": \"https://github.com/turboderp-org/exllamav3/blob/master/examples/media/cat.png?raw=true\"\n            }\n          },\n          {\n            \"type\": \"image_url\",\n            \"image_url\": {\n              \"url\": \"https://github.com/turboderp-org/exllamav3/blob/master/examples/media/strawberry.png?raw=true\"\n            }\n          }\n        ]\n      }\n    ],\n    \"temperature\": 0.6,\n    \"top_p\": 0.95,\n    \"top_k\": 20\n  }'\n```\n\nFor base64-encoded images, just replace the inner \"url\" values with this format: `data:image/FORMAT;base64,BASE64_STRING` where FORMAT is the file type (png, jpeg, gif, etc.) and BASE64_STRING is your base64-encoded image data.\n\n#### Image generation\n\n```shell\ncurl http://127.0.0.1:5000/v1/images/generations \\\n  -H \"Content-Type: application/json\" \\\n  -d '{\n    \"prompt\": \"an orange tree\",\n    \"steps\": 9,\n    \"cfg_scale\": 0,\n    \"batch_size\": 1,\n    \"batch_count\": 1\n  }'\n```\n\nYou need to load an image model first. You can do this via the UI, or by adding `--image-model your_model_name` when launching the server.\n\nThe output is a JSON object containing a `data` array. Each element has a `b64_json` field with the base64-encoded PNG image:\n\n```json\n{\n  \"created\": 1764791227,\n  \"data\": [\n    {\n      \"b64_json\": \"iVBORw0KGgo...\"\n    }\n  ]\n}\n```\n\n#### SSE streaming\n\n```shell\ncurl http://127.0.0.1:5000/v1/chat/completions \\\n  -H \"Content-Type: application/json\" \\\n  -d '{\n    \"messages\": [\n      {\n        \"role\": \"user\",\n        \"content\": \"Hello!\"\n      }\n    ],\n    \"temperature\": 0.6,\n    \"top_p\": 0.95,\n    \"top_k\": 20,\n    \"stream\": true\n  }'\n```\n\n#### Logits\n\n```shell\ncurl -k http://127.0.0.1:5000/v1/internal/logits \\\n  -H \"Content-Type: application/json\" \\\n  -d '{\n    \"prompt\": \"Who is best, Asuka or Rei? Answer:\",\n    \"use_samplers\": false\n  }'\n```\n\n#### Logits after sampling parameters\n\n```shell\ncurl -k http://127.0.0.1:5000/v1/internal/logits \\\n  -H \"Content-Type: application/json\" \\\n  -d '{\n    \"prompt\": \"Who is best, Asuka or Rei? Answer:\",\n    \"use_samplers\": true,\n    \"top_k\": 3\n  }'\n```\n\n#### List models\n\n```shell\ncurl -k http://127.0.0.1:5000/v1/internal/model/list \\\n  -H \"Content-Type: application/json\"\n```\n\n#### Load model\n\n```shell\ncurl -k http://127.0.0.1:5000/v1/internal/model/load \\\n  -H \"Content-Type: application/json\" \\\n  -d '{\n    \"model_name\": \"Qwen_Qwen3-0.6B-Q4_K_M.gguf\",\n    \"args\": {\n      \"ctx_size\": 32768,\n      \"flash_attn\": true,\n      \"cache_type\": \"q8_0\"\n    }\n  }'\n```\n\n#### Python chat example\n\n```python\nimport requests\n\nurl = \"http://127.0.0.1:5000/v1/chat/completions\"\n\nheaders = {\n    \"Content-Type\": \"application/json\"\n}\n\nhistory = []\n\nwhile True:\n    user_message = input(\"> \")\n    history.append({\"role\": \"user\", \"content\": user_message})\n    data = {\n        \"messages\": history,\n        \"temperature\": 0.6,\n        \"top_p\": 0.95,\n        \"top_k\": 20\n    }\n\n    response = requests.post(url, headers=headers, json=data, verify=False)\n    assistant_message = response.json()['choices'][0]['message']['content']\n    history.append({\"role\": \"assistant\", \"content\": assistant_message})\n    print(assistant_message)\n```\n\n#### Python chat example with streaming\n\nStart the script with `python -u` to see the output in real time.\n\n```python\nimport requests\nimport sseclient  # pip install sseclient-py\nimport json\n\nurl = \"http://127.0.0.1:5000/v1/chat/completions\"\n\nheaders = {\n    \"Content-Type\": \"application/json\"\n}\n\nhistory = []\n\nwhile True:\n    user_message = input(\"> \")\n    history.append({\"role\": \"user\", \"content\": user_message})\n    data = {\n        \"stream\": True,\n        \"messages\": history,\n        \"temperature\": 0.6,\n        \"top_p\": 0.95,\n        \"top_k\": 20\n    }\n\n    stream_response = requests.post(url, headers=headers, json=data, verify=False, stream=True)\n    client = sseclient.SSEClient(stream_response)\n\n    assistant_message = ''\n    for event in client.events():\n        payload = json.loads(event.data)\n        chunk = payload['choices'][0]['delta']['content']\n        assistant_message += chunk\n        print(chunk, end='')\n\n    print()\n    history.append({\"role\": \"assistant\", \"content\": assistant_message})\n```\n\n#### Python completions example with streaming\n\nStart the script with `python -u` to see the output in real time.\n\n```python\nimport json\nimport requests\nimport sseclient  # pip install sseclient-py\n\nurl = \"http://127.0.0.1:5000/v1/completions\"\n\nheaders = {\n    \"Content-Type\": \"application/json\"\n}\n\ndata = {\n    \"prompt\": \"This is a cake recipe:\\n\\n1.\",\n    \"max_tokens\": 512,\n    \"temperature\": 0.6,\n    \"top_p\": 0.95,\n    \"top_k\": 20,\n    \"stream\": True,\n}\n\nstream_response = requests.post(url, headers=headers, json=data, verify=False, stream=True)\nclient = sseclient.SSEClient(stream_response)\n\nprint(data['prompt'], end='')\nfor event in client.events():\n    payload = json.loads(event.data)\n    print(payload['choices'][0]['text'], end='')\n\nprint()\n```\n\n#### Python parallel requests example\n\nThe API supports handling multiple requests in parallel. For ExLlamaV3, this works out of the box. For llama.cpp, you need to pass `--parallel N` to set the number of concurrent slots.\n\n```python\nimport concurrent.futures\nimport requests\n\nurl = \"http://127.0.0.1:5000/v1/chat/completions\"\nprompts = [\n    \"Write a haiku about the ocean.\",\n    \"Explain quantum computing in simple terms.\",\n    \"Tell me a joke about programmers.\",\n]\n\ndef send_request(prompt):\n    response = requests.post(url, json={\n        \"messages\": [{\"role\": \"user\", \"content\": prompt}],\n        \"max_tokens\": 200,\n    })\n    return response.json()[\"choices\"][0][\"message\"][\"content\"]\n\nwith concurrent.futures.ThreadPoolExecutor() as executor:\n    results = list(executor.map(send_request, prompts))\n\nfor prompt, result in zip(prompts, results):\n    print(f\"Q: {prompt}\\nA: {result}\\n\")\n```\n\n#### Python example with API key\n\nReplace\n\n```python\nheaders = {\n    \"Content-Type\": \"application/json\"\n}\n```\n\nwith\n\n```python\nheaders = {\n    \"Content-Type\": \"application/json\",\n    \"Authorization\": \"Bearer yourPassword123\"\n}\n```\n\nin any of the examples above.\n\n#### Tool/Function calling\n\nUse a model with tool calling support (Qwen, Mistral, GPT-OSS, etc). Tools are passed via the `tools` parameter and the prompt is automatically formatted using the model's Jinja2 template.\n\nWhen the model decides to call a tool, the response will have `finish_reason: \"tool_calls\"` and a `tool_calls` array with structured function names and arguments. You then execute the tool, send the result back as a `role: \"tool\"` message, and continue until the model responds with `finish_reason: \"stop\"`.\n\nSome models call multiple tools in parallel (Qwen, Mistral), while others call one at a time (GPT-OSS). The loop below handles both styles.\n\n```python\nimport json\nimport requests\n\nurl = \"http://127.0.0.1:5000/v1/chat/completions\"\n\n# Define your tools\ntools = [\n    {\n        \"type\": \"function\",\n        \"function\": {\n            \"name\": \"get_weather\",\n            \"description\": \"Get the current weather for a given location\",\n            \"parameters\": {\n                \"type\": \"object\",\n                \"properties\": {\n                    \"location\": {\"type\": \"string\", \"description\": \"City name\"},\n                },\n                \"required\": [\"location\"]\n            }\n        }\n    },\n    {\n        \"type\": \"function\",\n        \"function\": {\n            \"name\": \"get_time\",\n            \"description\": \"Get the current time in a given timezone\",\n            \"parameters\": {\n                \"type\": \"object\",\n                \"properties\": {\n                    \"timezone\": {\"type\": \"string\", \"description\": \"IANA timezone string\"},\n                },\n                \"required\": [\"timezone\"]\n            }\n        }\n    },\n]\n\n\ndef execute_tool(name, arguments):\n    \"\"\"Replace this with your actual tool implementations.\"\"\"\n    if name == \"get_weather\":\n        return {\"temperature\": 22, \"condition\": \"sunny\", \"humidity\": 45}\n    elif name == \"get_time\":\n        return {\"time\": \"2:30 PM\", \"timezone\": \"JST\"}\n    return {\"error\": f\"Unknown tool: {name}\"}\n\n\nmessages = [{\"role\": \"user\", \"content\": \"What time is it in Tokyo and what's the weather like there?\"}]\n\n# Tool-calling loop: keep going until the model gives a final answer\nfor _ in range(10):\n    response = requests.post(url, json={\"messages\": messages, \"tools\": tools}).json()\n    choice = response[\"choices\"][0]\n\n    if choice[\"finish_reason\"] == \"tool_calls\":\n        # Add the assistant's response (with tool_calls) to history\n        messages.append({\n            \"role\": \"assistant\",\n            \"content\": choice[\"message\"][\"content\"],\n            \"tool_calls\": choice[\"message\"][\"tool_calls\"],\n        })\n\n        # Execute each tool and add results to history\n        for tool_call in choice[\"message\"][\"tool_calls\"]:\n            name = tool_call[\"function\"][\"name\"]\n            arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n            result = execute_tool(name, arguments)\n\n            print(f\"Tool call: {name}({arguments}) => {result}\")\n            messages.append({\n                \"role\": \"tool\",\n                \"tool_call_id\": tool_call[\"id\"],\n                \"content\": json.dumps(result),\n            })\n    else:\n        # Final answer\n        print(f\"\\nAssistant: {choice['message']['content']}\")\n        break\n```\n\n### Environment variables\n\nThe following environment variables can be used (they take precedence over everything else):\n\n| Variable Name          | Description                                                                                        | Example Value              |\n|------------------------|------------------------------------|----------------------------|\n| `OPENEDAI_PORT`           | Port number         |             5000               |\n| `OPENEDAI_CERT_PATH`      | SSL certificate file path         |            cert.pem                |\n| `OPENEDAI_KEY_PATH`       | SSL key file path                    |             key.pem               |\n| `OPENEDAI_DEBUG`          | Enable debugging (set to 1)    | 1                          |\n| `OPENEDAI_EMBEDDING_MODEL` | Embedding model (if applicable) |          sentence-transformers/all-mpnet-base-v2                  |\n| `OPENEDAI_EMBEDDING_DEVICE` | Embedding device (if applicable) |           cuda                 |\n\n#### Persistent settings with `settings.yaml`\n\nYou can also set the following variables in your `settings.yaml` file:\n\n```\nopenai-embedding_device: cuda\nopenai-embedding_model: \"sentence-transformers/all-mpnet-base-v2\"\nopenai-debug: 1\n```\n\n### Third-party application setup\n\nYou can usually force an application that uses the OpenAI API to connect to the local API by using the following environment variables:\n\n```shell\nOPENAI_API_HOST=http://127.0.0.1:5000\n```\n\nor\n\n```shell\nOPENAI_API_KEY=sk-111111111111111111111111111111111111111111111111\nOPENAI_API_BASE=http://127.0.0.1:5000/v1\n```\n\nWith the [official python openai client](https://github.com/openai/openai-python) (v1.x), the address can be set like this:\n\n```python\nfrom openai import OpenAI\n\nclient = OpenAI(\n    api_key=\"sk-111111111111111111111111111111111111111111111111\",\n    base_url=\"http://127.0.0.1:5000/v1\"\n)\n\nresponse = client.chat.completions.create(\n    model=\"x\",\n    messages=[{\"role\": \"user\", \"content\": \"Hello!\"}]\n)\nprint(response.choices[0].message.content)\n```\n\nWith the [official Node.js openai client](https://github.com/openai/openai-node) (v4.x):\n\n```js\nimport OpenAI from \"openai\";\n\nconst client = new OpenAI({\n  apiKey: process.env.OPENAI_API_KEY,\n  baseURL: \"http://127.0.0.1:5000/v1\",\n});\n\nconst response = await client.chat.completions.create({\n  model: \"x\",\n  messages: [{ role: \"user\", content: \"Hello!\" }],\n});\nconsole.log(response.choices[0].message.content);\n```\n### Embeddings (alpha)\n\nEmbeddings requires `sentence-transformers` installed, but chat and completions will function without it loaded. The embeddings endpoint is currently using the HuggingFace model: `sentence-transformers/all-mpnet-base-v2` for embeddings. This produces 768 dimensional embeddings. The model is small and fast. This model and embedding size may change in the future.\n\n| model name             | dimensions | input max tokens | speed | size | Avg. performance |\n| ---------------------- | ---------- | ---------------- | ----- | ---- | ---------------- |\n| all-mpnet-base-v2      | 768        | 384              | 2800  | 420M | 63.3             |\n| all-MiniLM-L6-v2       | 384        | 256              | 14200 | 80M  | 58.8             |\n\nIn short, the all-MiniLM-L6-v2 model is 5x faster, 5x smaller ram, 2x smaller storage, and still offers good quality. Stats from (https://www.sbert.net/docs/pretrained_models.html). To change the model from the default you can set the environment variable `OPENEDAI_EMBEDDING_MODEL`, ex. \"OPENEDAI_EMBEDDING_MODEL=all-MiniLM-L6-v2\".\n\nWarning: You cannot mix embeddings from different models even if they have the same dimensions. They are not comparable.\n\n### Compatibility\n\n| API endpoint              | notes                                                                       |\n| ------------------------- | --------------------------------------------------------------------------- |\n| /v1/chat/completions      | Use with instruction-following models. Supports streaming, tool calls.      |\n| /v1/completions           | Text completion endpoint.                                                   |\n| /v1/embeddings            | Using SentenceTransformer embeddings.                                       |\n| /v1/images/generations    | Image generation, response_format='b64_json' only.                         |\n| /v1/moderations           | Basic support via embeddings.                                               |\n| /v1/models                | Lists models. Currently loaded model first.                                 |\n| /v1/models/{id}           | Returns model info.                                                         |\n| /v1/audio/\\*              | Supported.                                                                  |\n| /v1/images/edits          | Not yet supported.                                                          |\n| /v1/images/variations     | Not yet supported.                                                          |\n\n#### Applications\n\nAlmost everything needs the `OPENAI_API_KEY` and `OPENAI_API_BASE` environment variables set, but there are some exceptions.\n\n| Compatibility | Application/Library  | Website                                                                        | Notes                                                                                     |\n| ------------- | -------------------- | ------------------------------------------------------------------------------ | ----------------------------------------------------------------------------------------- |\n| ✅❌          | openai-python        | https://github.com/openai/openai-python                                        | Use `OpenAI(base_url=\"http://127.0.0.1:5000/v1\")`. Only the endpoints from above work.   |\n| ✅❌          | openai-node          | https://github.com/openai/openai-node                                          | Use `new OpenAI({baseURL: \"http://127.0.0.1:5000/v1\"})`. See example above.              |\n| ✅            | anse                 | https://github.com/anse-app/anse                                               | API Key & URL configurable in UI, Images also work.                                       |\n| ✅            | shell_gpt            | https://github.com/TheR1D/shell_gpt                                            | OPENAI_API_HOST=http://127.0.0.1:5000                                                    |\n| ✅            | gpt-shell            | https://github.com/jla/gpt-shell                                               | OPENAI_API_BASE=http://127.0.0.1:5000/v1                                                 |\n| ✅            | gpt-discord-bot      | https://github.com/openai/gpt-discord-bot                                      | OPENAI_API_BASE=http://127.0.0.1:5000/v1                                                 |\n| ✅            | OpenAI for Notepad++ | https://github.com/Krazal/nppopenai                                            | api_url=http://127.0.0.1:5000 in the config file, or environment variables.               |\n| ✅            | vscode-openai        | https://marketplace.visualstudio.com/items?itemName=AndrewButson.vscode-openai | OPENAI_API_BASE=http://127.0.0.1:5000/v1                                                 |\n| ✅❌          | langchain            | https://github.com/hwchase17/langchain                                         | Use `base_url=\"http://127.0.0.1:5000/v1\"`. Results depend on model and prompt formatting. |\n"
  },
  {
    "path": "docs/13 - Keyboard Shortcuts.md",
    "content": "# Keyboard Shortcuts\n\n#### General\n\n| Shortcut                | Description                                      |\n|-------------------------|--------------------------------------------------|\n| Esc                     | Stop generation                                  |\n\n#### Chat tab\n\n| Shortcut                | Description                                      |\n|-------------------------|--------------------------------------------------|\n| Ctrl + S                | Show/hide chat controls                          |\n| Ctrl + Enter            | Regenerate                                       |\n| Alt + Enter             | Continue                                         |\n| Ctrl + Shift + Backspace| Remove last                                      |\n| Ctrl + Shift + M        | Impersonate                                      |\n| ← (Left Arrow)          | Navigate to previous version of last assistant message |\n| → (Right Arrow)         | Navigate to next version of last assistant message (or regenerate if at latest version) |\n"
  },
  {
    "path": "docs/Image Generation Tutorial.md",
    "content": "# Image Generation Tutorial\n\nThis feature allows you to generate images using `diffusers` models like [Tongyi-MAI/Z-Image-Turbo](https://huggingface.co/Tongyi-MAI/Z-Image-Turbo) directly within the web UI.\n\n<img alt=\"print\" src=\"https://github.com/user-attachments/assets/5108de50-658b-4e93-b2ae-4656d076bc9d\" />\n\n\n## Installation\n\n1. Clone the repository with\n\n```\ngit clone https://github.com/oobabooga/text-generation-webui\n```\n\nor download it from [here](https://github.com/oobabooga/text-generation-webui/archive/refs/heads/main.zip) and unzip it.\n\n2. Use the one-click installer.\n\n- Windows: Double click on `start_windows.bat`\n- Linux: Run `./start_linux.sh`\n- macOS: Run `./start_macos.sh`\n\nNote: Image generation does not work with the portable builds in `.zip` format in the [Releases page](https://github.com/oobabooga/text-generation-webui/releases). You need the \"full\" version of the web UI.\n\n## Downloading a model\n\n1. Once installation ends, browse to `http://127.0.0.1:7860/`.\n2. Click on \"Image AI\" on the left.\n3. Click on \"Model\" at the top.\n4. In the \"Download model\" field, paste `https://huggingface.co/Tongyi-MAI/Z-Image-Turbo` and click \"Download\".\n5. Wait for the download to finish (it's 31 GB).\n\n## Loading the model\n\nSelect the quantization option in the \"Quantization\" menu and click \"Load\".\n\nThe memory usage for `Z-Image-Turbo` for each option is:\n\n| Quantization Method | VRAM Usage |\n| :--- | :--- |\n| None (FP16/BF16) | 25613 MiB |\n| bnb-8bit | 16301 MiB |\n| bnb-8bit + CPU Offload | 16235 MiB |\n| bnb-4bit | 11533 MiB |\n| bnb-4bit + CPU Offload | 7677 MiB |\n\nThe `torchao` options support `torch.compile` for faster image generation, with `float8wo` specifically providing native hardware acceleration for RTX 40-series and newer GPUs.\n\nNote: The next time you launch the web UI, the model will get automatically loaded with your last settings when you try to generate an image. You do not need to go to the Model tab and click \"Load\" each time.\n\n## Generating images:\n\n1. While still in the \"Image AI\" page, go to the \"Generate\" tab.\n2. Type your prompt and click on the Generate button.\n\n### Model-specific settings\n\n- For Z-Image-Turbo, make sure to keep CFG Scale at 0 and Steps at 9. Do not write a Negative Prompt as it will get ignored with this CFG Scale value.\n\n### LLM Prompt Variations\n\nTo use this feature, you need to load an LLM in the main \"Model\" page on the left.\n\nIf you have no idea what to use, do this to get started:\n\n1. Download [Qwen3-4B-Q3_K_M.gguf](https://huggingface.co/unsloth/Qwen3-4B-GGUF/resolve/main/Qwen3-4B-Q3_K_M.gguf) to your `text-generation-webui/user_data/models` folder.\n2. Select the model in the dropdown menu in the \"Model\" page.\n3. Click Load.\n\nThen go back to the \"Image AI\" page and check \"LLM Prompt Variations\".\n\nAfter that, your prompts will be automatically updated by the LLM each time you generate an image. If you use a \"Sequential Count\" value greater than 1, a new prompt will be created for each sequential batch.\n\nThe improvement in creativity is striking (prompt: `Photo of a beautiful woman at night under moonlight`):\n\n<img  alt=\"comparison_collage\" src=\"https://github.com/user-attachments/assets/67884832-2800-41cb-a146-e88e25af89c4\" />\n\n## Generating images over API\n\nIt is possible to generate images using the project's API. Just make sure to start the server with `--api`, either by\n\n1. Passing the `--api` flag to your `start` script, like `./start_linux.sh --api`, or\n2. Writing `--api` to your `user_data/CMD_FLAGS.txt` file and relaunching the web UI.\n\nHere is an API call example:\n\n```\ncurl http://127.0.0.1:5000/v1/images/generations \\\n  -H \"Content-Type: application/json\" \\\n  -d '{\n    \"prompt\": \"an orange tree\",\n    \"steps\": 9,\n    \"cfg_scale\": 0,\n    \"batch_size\": 1,\n    \"batch_count\": 1\n  }'\n```\n"
  },
  {
    "path": "docs/Multimodal Tutorial.md",
    "content": "## Getting started\n\n### 1. Find a multimodal model\n\nGGUF models with vision capabilities are uploaded along a `mmproj` file to Hugging Face.\n\nFor instance, [unsloth/gemma-3-4b-it-GGUF](https://huggingface.co/unsloth/gemma-3-4b-it-GGUF/tree/main) has this:\n\n<img width=\"414\" height=\"270\" alt=\"print1\" src=\"https://github.com/user-attachments/assets/ac5aeb61-f6a2-491e-a1f0-47d6e27ea286\" />\n\n### 2. Download the model to `user_data/models`\n\nAs an example, download\n\nhttps://huggingface.co/unsloth/gemma-3-4b-it-GGUF/resolve/main/gemma-3-4b-it-Q4_K_S.gguf?download=true\n\nto your `text-generation-webui/user_data/models` folder.\n\n### 3. Download the associated mmproj file to `user_data/mmproj`\n\nThen download\n\nhttps://huggingface.co/unsloth/gemma-3-4b-it-GGUF/resolve/main/mmproj-F16.gguf?download=true\n\nto your `text-generation-webui/user_data/mmproj` folder. Name it `mmproj-gemma-3-4b-it-F16.gguf` to give it a recognizable name.\n\n### 4. Load the model\n\n1. Launch the web UI\n2. Navigate to the Model tab\n3. Select the GGUF model in the Model dropdown:\n\n<img width=\"545\" height=\"92\" alt=\"print2\" src=\"https://github.com/user-attachments/assets/3f920f50-e6c3-4768-91e2-20828dd63a1c\" />\n\n4. Select the mmproj file in the Multimodal (vision) menu:\n\n<img width=\"454\" height=\"172\" alt=\"print3\" src=\"https://github.com/user-attachments/assets/a657e20f-0ceb-4d71-9fe4-2b78571d20a6\" />\n\n5. Click \"Load\"\n\n### 5. Send a message with an image\n\nSelect your image by clicking on the 📎 icon and send your message:\n\n<img width=\"368\" height=\"135\" alt=\"print5\" src=\"https://github.com/user-attachments/assets/6175ec9f-04f4-4dba-9382-4ac80d5b0b1f\" />\n\nThe model will reply with great understanding of the image contents:\n\n<img width=\"809\" height=\"884\" alt=\"print6\" src=\"https://github.com/user-attachments/assets/be4a8f4d-619d-49e6-86f5-012d89f8db8d\" />\n\n## Multimodal with ExLlamaV3\n\nMultimodal also works with the ExLlamaV3 loader (the non-HF one).\n\nNo additional files are necessary, just load a multimodal EXL3 model and send an image.\n\nExamples of models that you can use:\n\n- https://huggingface.co/turboderp/gemma-3-27b-it-exl3\n- https://huggingface.co/turboderp/Mistral-Small-3.1-24B-Instruct-2503-exl3\n\n## Multimodal API examples\n\nIn the page below you can find some ready-to-use examples:\n\n[Multimodal/vision (llama.cpp and ExLlamaV3)](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#multimodalvision-llamacpp-and-exllamav3)\n"
  },
  {
    "path": "docs/README.md",
    "content": "These files are a mirror of the documentation at:\n\n# https://github.com/oobabooga/text-generation-webui/wiki\n\nIt is recommended to browse it there. Contributions can be sent here and will later be synced with the wiki.\n"
  },
  {
    "path": "docs/Tool Calling Tutorial.md",
    "content": "## Supported models\n\nThe following models are supported:\n\n- Qwen 3.5\n- GPT-OSS\n- Mistral Small / Devstral\n- DeepSeek V3\n- Kimi-K2\n- MiniMax-M2.5\n- GLM-5\n- Llama 4\n\nOther models that output tool calls as JSON (inside XML tags, code blocks, or plain JSON) are also supported through a generic fallback parser.\n\n## Tool calling in the UI\n\n### 1. Load a model with tool-calling support\n\nLoad a model with tool-calling support from the Model tab.\n\n### 2. Select tools\n\nIn the chat sidebar, check the tools you want the model to use:\n\n- **web_search** -- Search the web using DuckDuckGo.\n- **fetch_webpage** -- Fetch the content of a URL.\n- **calculate** -- Evaluate math expressions.\n- **get_datetime** -- Get the current date and time.\n- **roll_dice** -- Roll dice.\n\n### 3. Chat\n\nSend a message as usual. When the model decides it needs a tool, it will call it automatically. You will see each tool call and its result in a collapsible accordion inside the chat message.\n\nThe model may call multiple tools in sequence before giving its final answer.\n\n## Writing custom tools\n\nEach tool is a single `.py` file in `user_data/tools/`. It needs two things:\n\n1. A `tool` dictionary that describes the function (name, description, parameters).\n2. An `execute(arguments)` function that runs it and returns the result.\n\nHere is a minimal example (`user_data/tools/get_datetime.py`):\n\n```python\nfrom datetime import datetime\n\ntool = {\n    \"type\": \"function\",\n    \"function\": {\n        \"name\": \"get_datetime\",\n        \"description\": \"Get the current date and time.\",\n        \"parameters\": {\n            \"type\": \"object\",\n            \"properties\": {},\n        }\n    }\n}\n\n\ndef execute(arguments):\n    now = datetime.now()\n    return {\"date\": now.strftime(\"%Y-%m-%d\"), \"time\": now.strftime(\"%I:%M %p\")}\n```\n\nAn example with parameters (`user_data/tools/roll_dice.py`):\n\n```python\nimport random\n\ntool = {\n    \"type\": \"function\",\n    \"function\": {\n        \"name\": \"roll_dice\",\n        \"description\": \"Roll one or more dice with the specified number of sides.\",\n        \"parameters\": {\n            \"type\": \"object\",\n            \"properties\": {\n                \"count\": {\"type\": \"integer\", \"description\": \"Number of dice to roll.\", \"default\": 1},\n                \"sides\": {\"type\": \"integer\", \"description\": \"Number of sides per die.\", \"default\": 20},\n            },\n        }\n    }\n}\n\n\ndef execute(arguments):\n    count = max(1, min(arguments.get(\"count\", 1), 1000))\n    sides = max(2, min(arguments.get(\"sides\", 20), 1000))\n    rolls = [random.randint(1, sides) for _ in range(count)]\n    return {\"rolls\": rolls, \"total\": sum(rolls)}\n```\n\nYou can open the built-in tools in `user_data/tools/` for more examples.\n\n## Tool calling over the API\n\nTool calling over the API follows the [OpenAI API](https://platform.openai.com/docs/guides/function-calling) convention. Define your tools, send them with your messages, and handle tool calls in a loop until the model gives a final answer.\n\n```python\nimport json\nimport requests\n\nurl = \"http://127.0.0.1:5000/v1/chat/completions\"\n\ntools = [\n    {\n        \"type\": \"function\",\n        \"function\": {\n            \"name\": \"get_weather\",\n            \"description\": \"Get the current weather for a given location.\",\n            \"parameters\": {\n                \"type\": \"object\",\n                \"properties\": {\n                    \"location\": {\"type\": \"string\", \"description\": \"City name\"},\n                },\n                \"required\": [\"location\"]\n            }\n        }\n    }\n]\n\n\ndef execute_tool(name, arguments):\n    if name == \"get_weather\":\n        return {\"temperature\": \"14°C\", \"condition\": \"partly cloudy\"}\n    return {\"error\": f\"Unknown tool: {name}\"}\n\n\nmessages = [{\"role\": \"user\", \"content\": \"What's the weather like in Paris?\"}]\n\nfor _ in range(10):\n    response = requests.post(url, json={\"messages\": messages, \"tools\": tools}).json()\n    choice = response[\"choices\"][0]\n\n    if choice[\"finish_reason\"] == \"tool_calls\":\n        messages.append({\n            \"role\": \"assistant\",\n            \"content\": choice[\"message\"][\"content\"],\n            \"tool_calls\": choice[\"message\"][\"tool_calls\"],\n        })\n\n        for tool_call in choice[\"message\"][\"tool_calls\"]:\n            name = tool_call[\"function\"][\"name\"]\n            arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n            result = execute_tool(name, arguments)\n            print(f\"Tool call: {name}({arguments}) => {result}\")\n\n            messages.append({\n                \"role\": \"tool\",\n                \"tool_call_id\": tool_call[\"id\"],\n                \"content\": json.dumps(result),\n            })\n    else:\n        print(f\"\\nAssistant: {choice['message']['content']}\")\n        break\n```\n"
  },
  {
    "path": "docs/What Works.md",
    "content": "## What Works\n\n| Loader         | Loading LoRAs | Training LoRAs | Multimodal | Perplexity evaluation |\n|----------------|---------------|----------------|------------|-----------------------|\n| llama.cpp      |      ❌       |       ❌       |    ✅\\*    |           ❌          |\n| Transformers   |      ✅       |       ✅       |    ✅\\*\\*  |           ✅          |\n| ExLlamav3_HF   |      ❌       |       ❌       |    ❌      |           ✅          |\n| ExLlamav3      |      ❌       |       ❌       |    ✅      |           ❌          |\n| TensorRT-LLM   |      ❌       |       ❌       |    ❌      |           ❌          |\n\n❌ = not supported\n\n✅ = supported\n\n\\* Via the `mmproj` parameter (multimodal projector file).\n\n\\*\\* Via the `send_pictures` extension.\n"
  },
  {
    "path": "download-model.py",
    "content": "'''\nDownloads models from Hugging Face to user_data/models/username_modelname.\n\nExample:\npython download-model.py facebook/opt-1.3b\n\n'''\n\nimport argparse\nimport base64\nimport datetime\nimport hashlib\nimport json\nimport os\nimport re\nimport sys\nfrom multiprocessing import Array\nfrom pathlib import Path\nfrom time import sleep\n\nimport requests\nimport tqdm\nfrom requests.adapters import HTTPAdapter\nfrom requests.exceptions import ConnectionError, RequestException, Timeout\nfrom tqdm.contrib.concurrent import thread_map\n\nfrom modules.paths import resolve_user_data_dir\n\nbase = os.environ.get(\"HF_ENDPOINT\") or \"https://huggingface.co\"\n\n\nclass ModelDownloader:\n    def __init__(self, max_retries=7):\n        self.max_retries = max_retries\n        self.session = self.get_session()\n        self._progress_bar_slots = None\n        self.progress_queue = None\n\n    def get_session(self):\n        session = requests.Session()\n        if self.max_retries:\n            session.mount('https://cdn-lfs.huggingface.co', HTTPAdapter(max_retries=self.max_retries))\n            session.mount('https://huggingface.co', HTTPAdapter(max_retries=self.max_retries))\n\n        if os.getenv('HF_USER') is not None and os.getenv('HF_PASS') is not None:\n            session.auth = (os.getenv('HF_USER'), os.getenv('HF_PASS'))\n\n        try:\n            from huggingface_hub import get_token\n            token = get_token()\n        except ImportError:\n            token = os.getenv(\"HF_TOKEN\")\n\n        if token is not None:\n            session.headers = {'authorization': f'Bearer {token}'}\n\n        return session\n\n    def sanitize_model_and_branch_names(self, model, branch):\n        if model[-1] == '/':\n            model = model[:-1]\n\n        if model.startswith(base + '/'):\n            model = model[len(base) + 1:]\n\n        model_parts = model.split(\":\")\n        model = model_parts[0] if len(model_parts) > 0 else model\n        branch = model_parts[1] if len(model_parts) > 1 else branch\n\n        if branch is None:\n            branch = \"main\"\n        else:\n            pattern = re.compile(r\"^[a-zA-Z0-9._-]+$\")\n            if not pattern.match(branch):\n                raise ValueError(\n                    \"Invalid branch name. Only alphanumeric characters, period, underscore and dash are allowed.\")\n\n        return model, branch\n\n    def get_download_links_from_huggingface(self, model, branch, text_only=False, specific_file=None, exclude_pattern=None):\n        session = self.session\n        page = f\"/api/models/{model}/tree/{branch}\"\n        cursor = b\"\"\n\n        links = []\n        sha256 = []\n        file_sizes = []\n        classifications = []\n        has_pytorch = False\n        has_pt = False\n        has_gguf = False\n        has_safetensors = False\n        is_lora = False\n        while True:\n            url = f\"{base}{page}\" + (f\"?cursor={cursor.decode()}\" if cursor else \"\")\n            r = session.get(url, timeout=10)\n            r.raise_for_status()\n            content = r.content\n\n            dict = json.loads(content)\n            if len(dict) == 0:\n                break\n\n            for i in range(len(dict)):\n                fname = dict[i]['path']\n                if specific_file not in [None, ''] and fname != specific_file:\n                    continue\n\n                # Exclude files matching the exclude pattern\n                if exclude_pattern is not None and re.match(exclude_pattern, fname):\n                    continue\n\n                if not is_lora and fname.endswith(('adapter_config.json', 'adapter_model.bin')):\n                    is_lora = True\n\n                is_pytorch = re.match(r\"(pytorch|adapter|gptq)_model.*\\.bin\", fname)\n                is_safetensors = re.match(r\".*\\.safetensors\", fname)\n                is_pt = re.match(r\".*\\.pt\", fname)\n                is_gguf = re.match(r\".*\\.gguf\", fname)\n                is_tiktoken = re.match(r\".*\\.tiktoken\", fname)\n                is_tokenizer = re.match(r\"(tokenizer|ice|spiece).*\\.model\", fname) or is_tiktoken\n                is_text = re.match(r\".*\\.(txt|json|py|md)\", fname) or is_tokenizer\n                if any((is_pytorch, is_safetensors, is_pt, is_gguf, is_tokenizer, is_text)):\n                    file_size = 0\n                    if 'lfs' in dict[i]:\n                        sha256.append([fname, dict[i]['lfs']['oid']])\n                        file_size = dict[i]['lfs'].get('size', 0)\n                    elif 'size' in dict[i]:\n                        file_size = dict[i]['size']\n\n                    file_sizes.append(file_size)\n\n                    if is_text:\n                        links.append(f\"{base}/{model}/resolve/{branch}/{fname}\")\n                        classifications.append('text')\n                        continue\n\n                    if not text_only:\n                        links.append(f\"{base}/{model}/resolve/{branch}/{fname}\")\n                        if is_safetensors:\n                            has_safetensors = True\n                            classifications.append('safetensors')\n                        elif is_pytorch:\n                            has_pytorch = True\n                            classifications.append('pytorch')\n                        elif is_pt:\n                            has_pt = True\n                            classifications.append('pt')\n                        elif is_gguf:\n                            has_gguf = True\n                            classifications.append('gguf')\n\n            cursor = base64.b64encode(f'{{\"file_name\":\"{dict[-1][\"path\"]}\"}}'.encode()) + b':50'\n            cursor = base64.b64encode(cursor)\n            cursor = cursor.replace(b'=', b'%3D')\n\n        # If both pytorch and safetensors are available, download safetensors only\n        # Also if GGUF and safetensors are available, download only safetensors\n        if (has_pytorch or has_pt or has_gguf) and has_safetensors:\n            has_gguf = False\n            for i in range(len(classifications) - 1, -1, -1):\n                if classifications[i] in ['pytorch', 'pt', 'gguf']:\n                    links.pop(i)\n                    file_sizes.pop(i)\n\n        # For GGUF, try to download only the Q4_K_M if no specific file is specified.\n        if has_gguf and specific_file is None:\n            has_q4km = False\n            for i in range(len(classifications) - 1, -1, -1):\n                if 'q4_k_m' in links[i].lower():\n                    has_q4km = True\n\n            if has_q4km:\n                for i in range(len(classifications) - 1, -1, -1):\n                    if 'q4_k_m' not in links[i].lower():\n                        links.pop(i)\n                        file_sizes.pop(i)\n            else:\n                for i in range(len(classifications) - 1, -1, -1):\n                    if links[i].lower().endswith('.gguf'):\n                        links.pop(i)\n                        file_sizes.pop(i)\n\n        is_llamacpp = has_gguf and specific_file is not None\n        return links, sha256, is_lora, is_llamacpp, file_sizes\n\n    def get_output_folder(self, model, branch, is_lora, is_llamacpp=False, model_dir=None, user_data_dir=None):\n        if model_dir:\n            base_folder = model_dir\n        else:\n            if user_data_dir is None:\n                user_data_dir = resolve_user_data_dir()\n            base_folder = str(user_data_dir / 'models') if not is_lora else str(user_data_dir / 'loras')\n\n        # If the model is of type GGUF, save directly in the base_folder\n        if is_llamacpp:\n            return Path(base_folder)\n\n        output_folder = f\"{'_'.join(model.split('/')[-2:])}\"\n        if branch != 'main':\n            output_folder += f'_{branch}'\n\n        output_folder = Path(base_folder) / output_folder\n        return output_folder\n\n    @property\n    def progress_bar_slots(self):\n        if self._progress_bar_slots is None:\n            raise RuntimeError(\"Progress bar slots not initialized. Start download threads first.\")\n\n        return self._progress_bar_slots\n\n    def initialize_progress_bar_slots(self, num_threads):\n        self._progress_bar_slots = Array(\"B\", [0] * num_threads)\n\n    def get_progress_bar_position(self):\n        with self.progress_bar_slots.get_lock():\n            for i in range(len(self.progress_bar_slots)):\n                if self.progress_bar_slots[i] == 0:\n                    self.progress_bar_slots[i] = 1\n                    return i\n\n        return 0  # fallback\n\n    def release_progress_bar_position(self, slot):\n        with self.progress_bar_slots.get_lock():\n            self.progress_bar_slots[slot] = 0\n\n    def get_single_file(self, url, output_folder, start_from_scratch=False):\n        filename = Path(url.rsplit('/', 1)[1])\n        output_path = output_folder / filename\n        progress_bar_position = self.get_progress_bar_position()\n\n        max_retries = self.max_retries\n        attempt = 0\n        file_downloaded_count_for_progress = 0\n\n        try:\n            while attempt < max_retries:\n                attempt += 1\n                session = self.session\n                headers = {}\n                mode = 'wb'\n                current_file_size_on_disk = 0\n\n                try:\n                    if output_path.exists() and not start_from_scratch:\n                        current_file_size_on_disk = output_path.stat().st_size\n\n                        # Make a HEAD request without following redirects to get metadata first\n                        r_head = session.head(url, timeout=20, allow_redirects=True)\n                        r_head.raise_for_status()  # Will raise an error for 4xx or 5xx status codes\n\n                        # Check for the new 'x-linked-size' header from Hugging Face\n                        if 'x-linked-size' in r_head.headers:\n                            total_size = int(r_head.headers['x-linked-size'])\n                        # Fallback to the old 'content-length' just in case\n                        elif 'content-length' in r_head.headers:\n                            total_size = int(r_head.headers.get('content-length', 0))\n                        else:\n                            total_size = 0\n\n                        if current_file_size_on_disk >= total_size and total_size > 0:\n                            if self.progress_queue is not None and total_size > 0:\n                                self.progress_queue.put((1.0, str(filename)))\n                            return\n\n                        headers = {'Range': f'bytes={current_file_size_on_disk}-'}\n                        mode = 'ab'\n\n                    with session.get(url, stream=True, headers=headers, timeout=30) as r:\n                        r.raise_for_status()\n                        total_size_from_stream = int(r.headers.get('content-length', 0))\n                        if mode == 'ab':\n                            effective_total_size = current_file_size_on_disk + total_size_from_stream\n                        else:\n                            effective_total_size = total_size_from_stream\n\n                        block_size = 1024 * 1024\n                        filename_str = str(filename)\n\n                        tqdm_kwargs = {\n                            'total': effective_total_size,\n                            'initial': current_file_size_on_disk if mode == 'ab' else 0,\n                            'unit': 'B',\n                            'unit_scale': True,\n                            'unit_divisor': 1024,\n                            'bar_format': '{desc}{percentage:3.0f}%|{bar:50}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]',\n                            'desc': f\"{filename_str}: \",\n                            'position': progress_bar_position,\n                            'leave': False\n                        }\n\n                        if 'COLAB_GPU' in os.environ:\n                            tqdm_kwargs.update({\n                                'position': 0,\n                                'leave': True\n                            })\n\n                        with open(output_path, mode) as f:\n                            if mode == 'ab':\n                                f.seek(current_file_size_on_disk)\n\n                            with tqdm.tqdm(**tqdm_kwargs) as t:\n                                file_downloaded_count_for_progress = current_file_size_on_disk\n                                for data in r.iter_content(block_size):\n                                    f.write(data)\n                                    t.update(len(data))\n                                    if effective_total_size != 0 and self.progress_queue is not None:\n                                        file_downloaded_count_for_progress += len(data)\n                                        progress_fraction = float(file_downloaded_count_for_progress) / float(effective_total_size)\n                                        self.progress_queue.put((progress_fraction, filename_str))\n                        break\n\n                except (RequestException, ConnectionError, Timeout) as e:\n                    print(f\"Error downloading {filename}: {e}.\")\n                    print(f\"That was attempt {attempt}/{max_retries}.\", end=' ')\n                    if attempt < max_retries:\n                        print(f\"Retry begins in {2 ** attempt} seconds.\")\n                        sleep(2 ** attempt)\n                    else:\n                        print(\"Failed to download after the maximum number of attempts.\")\n        finally:\n            self.release_progress_bar_position(progress_bar_position)\n\n    def start_download_threads(self, file_list, output_folder, start_from_scratch=False, threads=4):\n        self.initialize_progress_bar_slots(threads)\n        tqdm.tqdm.set_lock(tqdm.tqdm.get_lock())\n        try:\n            thread_map(\n                lambda url: self.get_single_file(url, output_folder, start_from_scratch=start_from_scratch),\n                file_list,\n                max_workers=threads,\n                disable=True\n            )\n        finally:\n            print(f\"\\nDownload of {len(file_list)} files to {output_folder} completed.\")\n\n    def download_model_files(self, model, branch, links, sha256, output_folder, progress_queue=None, start_from_scratch=False, threads=4, specific_file=None, is_llamacpp=False):\n        self.progress_queue = progress_queue\n\n        output_folder.mkdir(parents=True, exist_ok=True)\n\n        if not is_llamacpp:\n            metadata = f'url: https://huggingface.co/{model}\\n' \\\n                       f'branch: {branch}\\n' \\\n                       f'download date: {datetime.datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")}\\n'\n\n            sha256_str = '\\n'.join([f'    {item[1]} {item[0]}' for item in sha256])\n            if sha256_str:\n                metadata += f'sha256sum:\\n{sha256_str}'\n\n            metadata += '\\n'\n            (output_folder / 'huggingface-metadata.txt').write_text(metadata)\n\n        if specific_file:\n            print(f\"Downloading {specific_file} to {output_folder}\")\n        else:\n            print(f\"Downloading the model to {output_folder}\")\n\n        self.start_download_threads(links, output_folder, start_from_scratch=start_from_scratch, threads=threads)\n\n    def check_model_files(self, model, branch, links, sha256, output_folder):\n        # Validate the checksums\n        validated = True\n        for i in range(len(sha256)):\n            fpath = (output_folder / sha256[i][0])\n\n            if not fpath.exists():\n                print(f\"The following file is missing: {fpath}\")\n                validated = False\n                continue\n\n            with open(output_folder / sha256[i][0], \"rb\") as f:\n                bytes = f.read()\n                file_hash = hashlib.sha256(bytes).hexdigest()\n                if file_hash != sha256[i][1]:\n                    print(f'Checksum failed: {sha256[i][0]}  {sha256[i][1]}')\n                    validated = False\n                else:\n                    print(f'Checksum validated: {sha256[i][0]}  {sha256[i][1]}')\n\n        if validated:\n            print('[+] Validated checksums of all model files!')\n        else:\n            print('[-] Invalid checksums. Rerun download-model.py with the --clean flag.')\n\n\nif __name__ == '__main__':\n\n    parser = argparse.ArgumentParser()\n    parser.add_argument('MODEL', type=str, default=None, nargs='?')\n    parser.add_argument('--branch', type=str, default='main', help='Name of the Git branch to download from.')\n    parser.add_argument('--threads', type=int, default=4, help='Number of files to download simultaneously.')\n    parser.add_argument('--text-only', action='store_true', help='Only download text files (txt/json).')\n    parser.add_argument('--specific-file', type=str, default=None, help='Name of the specific file to download (if not provided, downloads all).')\n    parser.add_argument('--exclude-pattern', type=str, default=None, help='Regex pattern to exclude files from download.')\n    parser.add_argument('--output', type=str, default=None, help='Save the model files to this folder.')\n    parser.add_argument('--model-dir', type=str, default=None, help='Save the model files to a subfolder of this folder instead of the default one (user_data/models).')\n    parser.add_argument('--user-data-dir', type=str, default=None, help='Path to the user data directory. Overrides auto-detection.')\n    parser.add_argument('--clean', action='store_true', help='Does not resume the previous download.')\n    parser.add_argument('--check', action='store_true', help='Validates the checksums of model files.')\n    parser.add_argument('--max-retries', type=int, default=7, help='Max retries count when get error in download time.')\n    args = parser.parse_args()\n\n    branch = args.branch\n    model = args.MODEL\n    specific_file = args.specific_file\n    exclude_pattern = args.exclude_pattern\n\n    if model is None:\n        print(\"Error: Please specify the model you'd like to download (e.g. 'python download-model.py facebook/opt-1.3b').\")\n        sys.exit()\n\n    downloader = ModelDownloader(max_retries=args.max_retries)\n\n    # Handle direct file URLs (e.g. https://huggingface.co/org/repo/resolve/branch/file.gguf)\n    if '/resolve/' in model:\n        url = model if model.startswith('http') else f'{base}/{model}'\n        url = url.split('?')[0]\n        filename = url.split('/')[-1]\n\n        if args.output:\n            output_folder = Path(args.output)\n        elif args.model_dir:\n            output_folder = Path(args.model_dir)\n        else:\n            user_data_dir = Path(args.user_data_dir) if args.user_data_dir else resolve_user_data_dir()\n            output_folder = user_data_dir / 'models'\n\n        output_folder.mkdir(parents=True, exist_ok=True)\n        print(f\"Downloading {filename} to {output_folder}\")\n        downloader.get_single_file(url, output_folder, start_from_scratch=args.clean)\n        sys.exit()\n\n    # Clean up the model/branch names\n    try:\n        model, branch = downloader.sanitize_model_and_branch_names(model, branch)\n    except ValueError as err_branch:\n        print(f\"Error: {err_branch}\")\n        sys.exit()\n\n    # Get the download links from Hugging Face\n    links, sha256, is_lora, is_llamacpp, file_sizes = downloader.get_download_links_from_huggingface(\n        model, branch, text_only=args.text_only, specific_file=specific_file, exclude_pattern=exclude_pattern\n    )\n\n    # Get the output folder\n    user_data_dir = Path(args.user_data_dir) if args.user_data_dir else None\n    if args.output:\n        output_folder = Path(args.output)\n    else:\n        output_folder = downloader.get_output_folder(model, branch, is_lora, is_llamacpp=is_llamacpp, model_dir=args.model_dir, user_data_dir=user_data_dir)\n\n    if args.check:\n        # Check previously downloaded files\n        downloader.check_model_files(model, branch, links, sha256, output_folder)\n    else:\n        # Download files\n        downloader.download_model_files(\n            model, branch, links, sha256, output_folder,\n            specific_file=specific_file, threads=args.threads, is_llamacpp=is_llamacpp\n        )\n"
  },
  {
    "path": "js/dark_theme.js",
    "content": "function toggleDarkMode() {\n  document.body.classList.toggle(\"dark\");\n  var currentCSS = document.getElementById(\"highlight-css\");\n  if (currentCSS.getAttribute(\"href\") === \"file/css/highlightjs/github-dark.min.css\") {\n    currentCSS.setAttribute(\"href\", \"file/css/highlightjs/github.min.css\");\n  } else {\n    currentCSS.setAttribute(\"href\", \"file/css/highlightjs/github-dark.min.css\");\n  }\n\n  // Re-highlight all code blocks once stylesheet loads\n  currentCSS.onload = function() {\n    const messageBodies = document.getElementById(\"chat\").querySelectorAll(\".message-body\");\n    messageBodies.forEach((messageBody) => {\n      const codeBlocks = messageBody.querySelectorAll(\"pre code\");\n      codeBlocks.forEach((codeBlock) => {\n        hljs.highlightElement(codeBlock);\n      });\n    });\n  };\n}\n"
  },
  {
    "path": "js/global_scope_js.js",
    "content": "// -------------------------------------------------\n// Event handlers\n// -------------------------------------------------\n\nfunction copyToClipboard(element) {\n  if (!element) return;\n\n  const messageElement = element.closest(\".message, .user-message, .assistant-message\");\n  if (!messageElement) return;\n\n  const rawText = messageElement.getAttribute(\"data-raw\");\n  if (!rawText) return;\n\n  const copyPromise = navigator.clipboard && window.isSecureContext\n    ? navigator.clipboard.writeText(rawText)\n    : fallbackCopyToClipboard(rawText);\n\n  copyPromise.then(function() {\n    const originalSvg = element.innerHTML;\n    element.innerHTML = \"<svg xmlns=\\\"http://www.w3.org/2000/svg\\\" width=\\\"20\\\" height=\\\"20\\\" viewBox=\\\"0 0 24 24\\\" fill=\\\"none\\\" stroke=\\\"currentColor\\\" stroke-width=\\\"2\\\" stroke-linecap=\\\"round\\\" stroke-linejoin=\\\"round\\\" class=\\\"text-green-500 dark:text-green-400\\\"><path d=\\\"M5 12l5 5l10 -10\\\"></path></svg>\";\n    setTimeout(() => {\n      element.innerHTML = originalSvg;\n    }, 1000);\n  }).catch(function(err) {\n    console.error(\"Failed to copy text: \", err);\n  });\n}\n\nfunction fallbackCopyToClipboard(text) {\n  return new Promise((resolve, reject) => {\n    const textArea = document.createElement(\"textarea\");\n    textArea.value = text;\n    textArea.style.position = \"fixed\";\n    textArea.style.left = \"-9999px\";\n    textArea.style.top = \"-9999px\";\n    document.body.appendChild(textArea);\n    textArea.focus();\n    textArea.select();\n    try {\n      const successful = document.execCommand(\"copy\");\n      document.body.removeChild(textArea);\n      successful ? resolve() : reject();\n    } catch (err) {\n      document.body.removeChild(textArea);\n      reject(err);\n    }\n  });\n}\n\nfunction branchHere(element) {\n  if (!element) return;\n\n  const messageElement = element.closest(\".message, .user-message, .assistant-message\");\n  if (!messageElement) return;\n\n  const index = messageElement.getAttribute(\"data-index\");\n  if (!index) return;\n\n  const branchIndexInput = document.getElementById(\"Branch-index\").querySelector(\"input\");\n  if (!branchIndexInput) {\n    console.error(\"Element with ID 'Branch-index' not found.\");\n    return;\n  }\n  const branchButton = document.getElementById(\"Branch\");\n\n  if (!branchButton) {\n    console.error(\"Required element 'Branch' not found.\");\n    return;\n  }\n\n  branchIndexInput.value = index;\n\n  // Trigger any 'change' or 'input' events Gradio might be listening for\n  const event = new Event(\"input\", { bubbles: true });\n  branchIndexInput.dispatchEvent(event);\n\n  branchButton.click();\n}\n\n// -------------------------------------------------\n// Message Editing Functions\n// -------------------------------------------------\n\nfunction editHere(buttonElement) {\n  if (!buttonElement) return;\n\n  const messageElement = buttonElement.closest(\".message, .user-message, .assistant-message\");\n  if (!messageElement) return;\n\n  const messageBody = messageElement.querySelector(\".message-body\");\n  if (!messageBody) return;\n\n  // If already editing, focus the textarea\n  const existingTextarea = messageBody.querySelector(\".editing-textarea\");\n  if (existingTextarea) {\n    existingTextarea.focus();\n    return;\n  }\n\n  // Determine role based on message element - handle different chat modes\n  const isUserMessage = messageElement.classList.contains(\"user-message\") ||\n                       messageElement.querySelector(\".text-you\") !== null ||\n                       messageElement.querySelector(\".circle-you\") !== null;\n\n  startEditing(messageElement, messageBody, isUserMessage);\n}\n\nfunction startEditing(messageElement, messageBody, isUserMessage) {\n  const rawText = messageElement.getAttribute(\"data-raw\") || messageBody.textContent;\n  const originalHTML = messageBody.innerHTML;\n\n  // Create editing interface\n  const editingInterface = createEditingInterface(rawText);\n\n  // Replace message content\n  messageBody.innerHTML = \"\";\n  messageBody.appendChild(editingInterface.textarea);\n  messageBody.appendChild(editingInterface.controls);\n\n  editingInterface.textarea.focus();\n  editingInterface.textarea.setSelectionRange(rawText.length, rawText.length);\n\n  // Temporarily mark as scrolled to prevent auto-scroll\n  const wasScrolled = window.isScrolled;\n  window.isScrolled = true;\n\n  // Scroll the textarea into view\n  editingInterface.textarea.scrollIntoView({\n    behavior: \"smooth\",\n    block: \"center\"\n  });\n\n  // Restore the original scroll state after animation\n  setTimeout(() => {\n    window.isScrolled = wasScrolled;\n  }, 500);\n\n  // Setup event handlers\n  setupEditingHandlers(editingInterface.textarea, messageElement, originalHTML, messageBody, isUserMessage);\n}\n\nfunction createEditingInterface(text) {\n  const textarea = document.createElement(\"textarea\");\n  textarea.value = text;\n  textarea.className = \"editing-textarea\";\n  textarea.rows = Math.max(3, text.split(\"\\n\").length);\n\n  const controls = document.createElement(\"div\");\n  controls.className = \"edit-controls-container\";\n\n  const saveButton = document.createElement(\"button\");\n  saveButton.textContent = \"Save\";\n  saveButton.className = \"edit-control-button\";\n  saveButton.type = \"button\";\n\n  const cancelButton = document.createElement(\"button\");\n  cancelButton.textContent = \"Cancel\";\n  cancelButton.className = \"edit-control-button edit-cancel-button\";\n  cancelButton.type = \"button\";\n\n  controls.appendChild(saveButton);\n  controls.appendChild(cancelButton);\n\n  return { textarea, controls, saveButton, cancelButton };\n}\n\nfunction setupEditingHandlers(textarea, messageElement, originalHTML, messageBody, isUserMessage) {\n  const saveButton = messageBody.querySelector(\".edit-control-button:not(.edit-cancel-button)\");\n  const cancelButton = messageBody.querySelector(\".edit-cancel-button\");\n\n  const submitEdit = () => {\n    const index = messageElement.getAttribute(\"data-index\");\n    if (!index || !submitMessageEdit(index, textarea.value, isUserMessage)) {\n      cancelEdit();\n    }\n  };\n\n  const cancelEdit = () => {\n    messageBody.innerHTML = originalHTML;\n  };\n\n  // Event handlers\n  saveButton.onclick = submitEdit;\n  cancelButton.onclick = cancelEdit;\n\n  textarea.onkeydown = (e) => {\n    if (e.key === \"Enter\" && !e.shiftKey) {\n      e.preventDefault();\n      submitEdit();\n    } else if (e.key === \"Escape\") {\n      e.preventDefault();\n      cancelEdit();\n    }\n  };\n}\n\nfunction submitMessageEdit(index, newText, isUserMessage) {\n  const editIndexInput = document.getElementById(\"Edit-message-index\")?.querySelector(\"input\");\n  const editTextInput = document.getElementById(\"Edit-message-text\")?.querySelector(\"textarea\");\n  const editRoleInput = document.getElementById(\"Edit-message-role\")?.querySelector(\"textarea\");\n  const editButton = document.getElementById(\"Edit-message\");\n\n  if (!editIndexInput || !editTextInput || !editRoleInput || !editButton) {\n    console.error(\"Edit elements not found\");\n    return false;\n  }\n\n  editIndexInput.value = index;\n  editTextInput.value = newText;\n  editRoleInput.value = isUserMessage ? \"user\" : \"assistant\";\n\n  editIndexInput.dispatchEvent(new Event(\"input\", { bubbles: true }));\n  editTextInput.dispatchEvent(new Event(\"input\", { bubbles: true }));\n  editRoleInput.dispatchEvent(new Event(\"input\", { bubbles: true }));\n\n  editButton.click();\n  return true;\n}\n\nfunction navigateVersion(element, direction) {\n  if (!element) return;\n\n  const messageElement = element.closest(\".message, .user-message, .assistant-message\");\n  if (!messageElement) return;\n\n  const index = messageElement.getAttribute(\"data-index\");\n  if (!index) return;\n\n  // Determine role based on message element classes\n  let role = \"assistant\"; // Default role\n  if (messageElement.classList.contains(\"user-message\") ||\n      messageElement.querySelector(\".text-you\") ||\n      messageElement.querySelector(\".circle-you\")) {\n    role = \"user\";\n  }\n\n  const indexInput = document.getElementById(\"Navigate-message-index\")?.querySelector(\"input\");\n  const directionInput = document.getElementById(\"Navigate-direction\")?.querySelector(\"textarea\");\n  const roleInput = document.getElementById(\"Navigate-message-role\")?.querySelector(\"textarea\");\n  const navigateButton = document.getElementById(\"Navigate-version\");\n\n  if (!indexInput || !directionInput || !roleInput || !navigateButton) {\n    console.error(\"Navigation control elements (index, direction, role, or button) not found.\");\n    return;\n  }\n\n  indexInput.value = index;\n  directionInput.value = direction;\n  roleInput.value = role;\n\n  // Trigger 'input' events for Gradio to pick up changes\n  const event = new Event(\"input\", { bubbles: true });\n  indexInput.dispatchEvent(event);\n  directionInput.dispatchEvent(event);\n  roleInput.dispatchEvent(event);\n\n  navigateButton.click();\n}\n\nfunction regenerateClick() {\n  document.getElementById(\"Regenerate\").click();\n}\n\nfunction continueClick() {\n  document.getElementById(\"Continue\").click();\n}\n\nfunction removeLastClick() {\n  document.getElementById(\"Remove-last\").click();\n}\n\nfunction autoScrollToBottom() {\n  if (!window.isScrolled) {\n    const chatParent = document.getElementById(\"chat\")?.parentNode?.parentNode?.parentNode;\n    if (chatParent) {\n      const maxScroll = chatParent.scrollHeight - chatParent.clientHeight;\n      if (maxScroll > 0 && chatParent.scrollTop < maxScroll - 1) {\n        chatParent.scrollTop = maxScroll;\n      }\n    }\n  }\n}\n\nfunction updateInstructPadding() {\n  const chatElement = document.getElementById(\"chat\");\n  if (chatElement && chatElement.getAttribute(\"data-mode\") === \"instruct\") {\n    const messagesContainer = chatElement.querySelector(\".messages\");\n    const lastChild = messagesContainer?.lastElementChild;\n    const prevSibling = lastChild?.previousElementSibling;\n    if (lastChild && prevSibling && chatElement.offsetHeight > 0) {\n      let bufferHeight = Math.max(0, Math.max(window.innerHeight - 128 - 84, window.innerHeight - prevSibling.offsetHeight - 84) - lastChild.offsetHeight);\n      if (window.innerWidth <= 924) {\n        bufferHeight = Math.max(0, bufferHeight - 32);\n      }\n      messagesContainer.style.paddingBottom = `${bufferHeight}px`;\n    }\n  }\n}\n\nlet pendingMorphdomData = null;\nlet morphdomRafId = null;\n\nfunction handleMorphdomUpdate(data) {\n  pendingMorphdomData = data;\n  if (!morphdomRafId) {\n    morphdomRafId = requestAnimationFrame(() => {\n      morphdomRafId = null;\n      applyMorphdomUpdate(pendingMorphdomData);\n      pendingMorphdomData = null;\n    });\n  }\n}\n\nfunction applyMorphdomUpdate(data) {\n  // Determine target element and use it as query scope\n  var target_element, target_html;\n  if (data.last_message_only) {\n    const childNodes = document.getElementsByClassName(\"messages\")[0].childNodes;\n    target_element = childNodes[childNodes.length - 1];\n    target_html = data.html;\n  } else {\n    target_element = document.getElementById(\"chat\").parentNode;\n    target_html =  \"<div class=\\\"prose svelte-1ybaih5\\\">\" + data.html + \"</div>\";\n  }\n\n  const queryScope = target_element;\n\n  // Track open blocks and store their scroll positions\n  const openBlocks = new Set();\n  const scrollPositions = {};\n  queryScope.querySelectorAll(\".thinking-block\").forEach(block => {\n    const blockId = block.getAttribute(\"data-block-id\");\n    if (blockId && block.hasAttribute(\"open\")) {\n      openBlocks.add(blockId);\n      const content = block.querySelector(\".thinking-content\");\n      if (content) {\n        const isAtBottom = Math.abs((content.scrollHeight - content.scrollTop) - content.clientHeight) < 5;\n        scrollPositions[blockId] = {\n          position: content.scrollTop,\n          isAtBottom: isAtBottom\n        };\n      }\n    }\n  });\n\n  morphdom(\n    target_element,\n    target_html,\n    {\n      onBeforeElUpdated: function(fromEl, toEl) {\n        // Preserve code highlighting\n        if (fromEl.tagName === \"PRE\") {\n          const fromCode = fromEl.querySelector(\"code[data-highlighted]\");\n          const toCode = toEl.querySelector(\"code\");\n\n          if (fromCode && toCode && fromCode.textContent === toCode.textContent) {\n            toEl.className = fromEl.className;\n            toEl.innerHTML = fromEl.innerHTML;\n            return false;\n          }\n        }\n\n        // For thinking blocks, assume closed by default\n        if (fromEl.classList && fromEl.classList.contains(\"thinking-block\") &&\n           toEl.classList && toEl.classList.contains(\"thinking-block\")) {\n          const blockId = toEl.getAttribute(\"data-block-id\");\n          // Remove open attribute by default\n          toEl.removeAttribute(\"open\");\n          // If this block was explicitly opened by user, keep it open\n          if (blockId && openBlocks.has(blockId)) {\n            toEl.setAttribute(\"open\", \"\");\n          }\n        }\n\n        return !fromEl.isEqualNode(toEl);\n      },\n\n      onElUpdated: function(el) {\n        // Restore scroll positions for open thinking blocks\n        if (el.classList && el.classList.contains(\"thinking-block\") && el.hasAttribute(\"open\")) {\n          const blockId = el.getAttribute(\"data-block-id\");\n          const content = el.querySelector(\".thinking-content\");\n\n          if (content && blockId && scrollPositions[blockId]) {\n            setTimeout(() => {\n              if (scrollPositions[blockId].isAtBottom) {\n                content.scrollTop = content.scrollHeight;\n              } else {\n                content.scrollTop = scrollPositions[blockId].position;\n              }\n            }, 0);\n          }\n        }\n      }\n    }\n  );\n\n  // Syntax highlighting and LaTeX\n  if (window.doSyntaxHighlighting) {\n    window.doSyntaxHighlighting();\n  }\n\n  // Auto-scroll runs both before and after padding update.\n  // Before: so content growth isn't hidden by padding absorption.\n  // After: so padding-added space is also scrolled into view.\n  autoScrollToBottom();\n  updateInstructPadding();\n  autoScrollToBottom();\n\n  // Add toggle listeners for new blocks\n  queryScope.querySelectorAll(\".thinking-block\").forEach(block => {\n    if (!block._hasToggleListener) {\n      block.addEventListener(\"toggle\", function(e) {\n        const wasScrolled = window.isScrolled;\n        if (this.open) {\n          const content = this.querySelector(\".thinking-content\");\n          if (content) {\n            setTimeout(() => {\n              content.scrollTop = content.scrollHeight;\n            }, 0);\n          }\n        }\n        autoScrollToBottom();\n        updateInstructPadding();\n        autoScrollToBottom();\n        // Restore scroll state so the browser's layout adjustment\n        // from the toggle doesn't disable auto-scroll\n        window.isScrolled = wasScrolled;\n      });\n      block._hasToggleListener = true;\n    }\n  });\n}\n"
  },
  {
    "path": "js/katex/auto-render.js",
    "content": "! function(e, t) {\n    \"object\" == typeof exports && \"object\" == typeof module ? module.exports = t(require(\"katex\")) : \"function\" == typeof define && define.amd ? define([\"katex\"], t) : \"object\" == typeof exports ? exports.renderMathInElement = t(require(\"katex\")) : e.renderMathInElement = t(e.katex)\n}(\"undefined\" != typeof self ? self : this, (function(e) {\n    return function() {\n        \"use strict\";\n        var t = {\n                771: function(t) {\n                    t.exports = e\n                }\n            },\n            n = {};\n\n        function r(e) {\n            var o = n[e];\n            if (void 0 !== o) return o.exports;\n            var i = n[e] = {\n                exports: {}\n            };\n            return t[e](i, i.exports, r), i.exports\n        }\n        r.n = function(e) {\n            var t = e && e.__esModule ? function() {\n                return e.default\n            } : function() {\n                return e\n            };\n            return r.d(t, {\n                a: t\n            }), t\n        }, r.d = function(e, t) {\n            for (var n in t) r.o(t, n) && !r.o(e, n) && Object.defineProperty(e, n, {\n                enumerable: !0,\n                get: t[n]\n            })\n        }, r.o = function(e, t) {\n            return Object.prototype.hasOwnProperty.call(e, t)\n        };\n        var o = {};\n        return function() {\n            r.d(o, {\n                default: function() {\n                    return d\n                }\n            });\n            var e = r(771),\n                t = r.n(e);\n            const n = function(e, t, n) {\n                    let r = n,\n                        o = 0;\n                    const i = e.length;\n                    for (; r < t.length;) {\n                        const n = t[r];\n                        if (o <= 0 && t.slice(r, r + i) === e) return r;\n                        \"\\\\\" === n ? r++ : \"{\" === n ? o++ : \"}\" === n && o--, r++\n                    }\n                    return -1\n                },\n                i = /^\\\\begin{/;\n            var a = function(e, t) {\n                let r;\n                const o = [],\n                    a = new RegExp(\"(\" + t.map((e => e.left.replace(/[-/\\\\^$*+?.()|[\\]{}]/g, \"\\\\$&\"))).join(\"|\") + \")\");\n                for (; r = e.search(a), -1 !== r;) {\n                    const charAfterOpen = e[r + 1];\n                    if (e[r] == \"$\" && charAfterOpen != \"$\") {\n                        const closeDollarIndex = e.indexOf('$', r + 1);\n                        if (closeDollarIndex != -1) {\n                            const charBeforeOpen = r > 0 ? e[r - 1] : '';\n                            const charBeforeClose = r + 1 < closeDollarIndex ? e[closeDollarIndex - 1] : '';\n                            const charBeforeBeforeClose = r + 1 < closeDollarIndex ? e[closeDollarIndex - 2] : '';\n                            const charAfterClose = closeDollarIndex + 1 < e.length ? e[closeDollarIndex + 1] : '';\n                            if ((/[A-Za-z0-9_$-]/.test(charBeforeOpen)) || ((' ' == charBeforeClose) ||\n                                                                             /[0-9]/.test(charAfterOpen) &&\n                                                                             (/[A-Za-z0-9]/.test(charAfterClose)\n                                                                              || '-' == charBeforeClose))) {\n                                 o.push({\n                                     type: \"text\",\n                                     data: e.slice(0, r + 1),\n                                 });\n                                e = e.slice(r + 1); // now text starts after delimiter\n                                continue;\n                            }\n                        }\n                    }\n                    r > 0 && (o.push({\n                        type: \"text\",\n                        data: e.slice(0, r)\n                    }), e = e.slice(r));\n                    const a = t.findIndex((t => e.startsWith(t.left)));\n                    if (r = n(t[a].right, e, t[a].left.length), -1 === r) break;\n                    const l = e.slice(0, r + t[a].right.length),\n                        s = i.test(l) ? l : e.slice(t[a].left.length, r);\n                    o.push({\n                        type: \"math\",\n                        data: s,\n                        rawData: l,\n                        display: t[a].display\n                    }), e = e.slice(r + t[a].right.length)\n                }\n                return \"\" !== e && o.push({\n                    type: \"text\",\n                    data: e\n                }), o\n            };\n            const l = function(e, n) {\n                    const r = a(e, n.delimiters);\n                    if (1 === r.length && \"text\" === r[0].type) return null;\n                    const o = document.createDocumentFragment();\n                    for (let e = 0; e < r.length; e++)\n                        if (\"text\" === r[e].type) o.appendChild(document.createTextNode(r[e].data));\n                        else {\n                            const i = document.createElement(\"span\");\n                            let a = r[e].data;\n                            n.displayMode = r[e].display;\n                            try {\n                                n.preProcess && (a = n.preProcess(a)), t().render(a, i, n)\n                            } catch (i) {\n                                if (!(i instanceof t().ParseError)) throw i;\n                                n.errorCallback(\"KaTeX auto-render: Failed to parse `\" + r[e].data + \"` with \", i), o.appendChild(document.createTextNode(r[e].rawData));\n                                continue\n                            }\n                            o.appendChild(i)\n                        }\n                    return o\n                },\n                s = function(e, t) {\n                    for (let n = 0; n < e.childNodes.length; n++) {\n                        const r = e.childNodes[n];\n                        if (3 === r.nodeType) {\n                            let o = r.textContent,\n                                i = r.nextSibling,\n                                a = 0;\n                            for (; i && i.nodeType === Node.TEXT_NODE;) o += i.textContent, i = i.nextSibling, a++;\n                            const s = l(o, t);\n                            if (s) {\n                                for (let e = 0; e < a; e++) r.nextSibling.remove();\n                                n += s.childNodes.length - 1, e.replaceChild(s, r)\n                            } else n += a\n                        } else if (1 === r.nodeType) {\n                            const e = \" \" + r.className + \" \"; - 1 === t.ignoredTags.indexOf(r.nodeName.toLowerCase()) && t.ignoredClasses.every((t => -1 === e.indexOf(\" \" + t + \" \"))) && s(r, t)\n                        }\n                    }\n                };\n            var d = function(e, t) {\n                if (!e) throw new Error(\"No element provided to render\");\n                const n = {};\n                for (const e in t) t.hasOwnProperty(e) && (n[e] = t[e]);\n                n.delimiters = n.delimiters || [{\n                    left: \"$$\",\n                    right: \"$$\",\n                    display: !0\n                }, {\n                    left: \"\\\\(\",\n                    right: \"\\\\)\",\n                    display: !1\n                }, {\n                    left: \"\\\\begin{equation}\",\n                    right: \"\\\\end{equation}\",\n                    display: !0\n                }, {\n                    left: \"\\\\begin{align}\",\n                    right: \"\\\\end{align}\",\n                    display: !0\n                }, {\n                    left: \"\\\\begin{alignat}\",\n                    right: \"\\\\end{alignat}\",\n                    display: !0\n                }, {\n                    left: \"\\\\begin{gather}\",\n                    right: \"\\\\end{gather}\",\n                    display: !0\n                }, {\n                    left: \"\\\\begin{CD}\",\n                    right: \"\\\\end{CD}\",\n                    display: !0\n                }, {\n                    left: \"\\\\[\",\n                    right: \"\\\\]\",\n                    display: !0\n                }], n.ignoredTags = n.ignoredTags || [\"script\", \"noscript\", \"style\", \"textarea\", \"pre\", \"code\", \"option\"], n.ignoredClasses = n.ignoredClasses || [], n.errorCallback = n.errorCallback || console.error, n.macros = n.macros || {}, s(e, n)\n            }\n        }(), o = o.default\n    }()\n}));\n"
  },
  {
    "path": "js/main.js",
    "content": "// ------------------------------------------------\n// Main\n// ------------------------------------------------\n\n// Sync highlight.js theme with the actual Gradio theme\nvar defined_hljs_css = document.body.classList.contains(\"dark\") ? \"file/css/highlightjs/github-dark.min.css\" : \"file/css/highlightjs/github.min.css\";\nif (document.getElementById(\"highlight-css\").getAttribute(\"href\") !== defined_hljs_css) {\n  document.getElementById(\"highlight-css\").setAttribute(\"href\", defined_hljs_css);\n}\n\nlet main_parent = document.getElementById(\"chat-tab\").parentNode;\nlet extensions = document.getElementById(\"extensions\");\n\nmain_parent.childNodes[0].classList.add(\"header_bar\");\nmain_parent.style = \"padding: 0; margin: 0\";\nmain_parent.parentNode.style = \"gap: 0\";\nmain_parent.parentNode.parentNode.style = \"padding: 0\";\n\ndocument.querySelector(\".header_bar\").addEventListener(\"click\", function(event) {\n  if (event.target.tagName !== \"BUTTON\") return;\n\n  const buttonText = event.target.textContent.trim();\n  const extensionsVisible = [\"Chat\", \"Default\", \"Notebook\"].includes(buttonText);\n  const chatVisible = buttonText === \"Chat\";\n  const showControlsChecked = document.querySelector(\"#show-controls input\").checked;\n  const extensions = document.querySelector(\"#extensions\");\n\n  if (extensionsVisible) {\n    if (extensions) {\n      extensions.style.display = \"flex\";\n    }\n\n    this.style.marginBottom = chatVisible ? \"0px\" : \"19px\";\n\n    if (chatVisible && !showControlsChecked) {\n      document.querySelectorAll(\"#extensions\").forEach(element => {\n        element.style.display = \"none\";\n      });\n    }\n\n  } else {\n    this.style.marginBottom = \"19px\";\n    if (extensions) extensions.style.display = \"none\";\n  }\n});\n\n//------------------------------------------------\n// Keyboard shortcuts\n//------------------------------------------------\n\n// --- Helper functions --- //\nfunction isModifiedKeyboardEvent() {\n  return (event instanceof KeyboardEvent &&\n    event.shiftKey ||\n    event.ctrlKey ||\n    event.altKey ||\n    event.metaKey);\n}\n\nfunction isFocusedOnEditableTextbox() {\n  if (event.target.tagName === \"INPUT\" || event.target.tagName === \"TEXTAREA\") {\n    return !!event.target.value;\n  }\n}\n\nlet previousTabId = \"chat-tab-button\";\ndocument.addEventListener(\"keydown\", function(event) {\n  // Stop generation on Esc pressed\n  if (event.key === \"Escape\") {\n    // Find the element with id 'stop' and click it\n    var stopButton = document.getElementById(\"stop\");\n    if (stopButton) {\n      stopButton.click();\n    }\n    return;\n  }\n\n  if (!document.querySelector(\"#chat-tab\").checkVisibility() ) {\n    return;\n  }\n\n  // Show chat controls on Ctrl + S\n  if (event.ctrlKey && event.key == \"s\") {\n    event.preventDefault();\n\n    var showControlsElement = document.getElementById(\"show-controls\");\n    if (showControlsElement && showControlsElement.childNodes.length >= 4) {\n      showControlsElement.childNodes[3].click();\n\n      var arr = document.getElementById(\"chat-input\").childNodes[2].childNodes;\n      arr[arr.length - 1].focus();\n    }\n  }\n\n  // Regenerate on Ctrl + Enter\n  else if (event.ctrlKey && event.key === \"Enter\") {\n    event.preventDefault();\n    document.getElementById(\"Regenerate\").click();\n  }\n\n  // Continue on Alt + Enter\n  else if (event.altKey && event.key === \"Enter\") {\n    event.preventDefault();\n    document.getElementById(\"Continue\").click();\n  }\n\n  // Remove last on Ctrl + Shift + Backspace\n  else if (event.ctrlKey && event.shiftKey && event.key === \"Backspace\") {\n    event.preventDefault();\n    document.getElementById(\"Remove-last\").click();\n  }\n\n  // Impersonate on Ctrl + Shift + M\n  else if (event.ctrlKey && event.shiftKey && event.key === \"M\") {\n    event.preventDefault();\n    document.getElementById(\"Impersonate\").click();\n  }\n\n  // --- Simple version navigation --- //\n  if (!isFocusedOnEditableTextbox()) {\n    // Version navigation on Arrow keys (horizontal)\n    if (!isModifiedKeyboardEvent() && event.key === \"ArrowLeft\") {\n      event.preventDefault();\n      navigateLastAssistantMessage(\"left\");\n    }\n\n    else if (!isModifiedKeyboardEvent() && event.key === \"ArrowRight\") {\n      event.preventDefault();\n      if (!navigateLastAssistantMessage(\"right\")) {\n        // If can't navigate right (last version), regenerate\n        document.getElementById(\"Regenerate\").click();\n      }\n    }\n  }\n\n});\n\n//------------------------------------------------\n// Position the chat typing dots\n//------------------------------------------------\ntyping = document.getElementById(\"typing-container\");\ntypingParent = typing.parentNode;\ntypingSibling = typing.previousElementSibling;\ntypingSibling.insertBefore(typing, typingSibling.childNodes[2]);\n\n//------------------------------------------------\n// Chat scrolling\n//------------------------------------------------\nconst targetElement = document.getElementById(\"chat\").parentNode.parentNode.parentNode;\ntargetElement.classList.add(\"pretty_scrollbar\");\ntargetElement.classList.add(\"chat-parent\");\nwindow.isScrolled = false;\nlet scrollTimeout;\nlet lastScrollTop = 0;\nlet lastScrollHeight = 0;\nlet lastClientHeight = 0;\n\ntargetElement.addEventListener(\"scroll\", function() {\n  let diff = targetElement.scrollHeight - targetElement.clientHeight;\n  let isAtBottomNow = Math.abs(targetElement.scrollTop - diff) <= 10 || diff <= 0;\n\n  // Add scrolling class to disable hover effects\n  if (window.isScrolled || !isAtBottomNow) {\n    targetElement.classList.add(\"scrolling\");\n  }\n\n  if(isAtBottomNow) {\n    window.isScrolled = false;\n  } else if (targetElement.scrollTop < lastScrollTop && targetElement.scrollHeight >= lastScrollHeight && targetElement.clientHeight <= lastClientHeight) {\n    window.isScrolled = true;\n  }\n  lastScrollTop = targetElement.scrollTop;\n  lastScrollHeight = targetElement.scrollHeight;\n  lastClientHeight = targetElement.clientHeight;\n\n  // Clear previous timeout and set new one\n  clearTimeout(scrollTimeout);\n  scrollTimeout = setTimeout(() => {\n    targetElement.classList.remove(\"scrolling\");\n    doSyntaxHighlighting(); // Only run after scrolling stops\n  }, 150);\n});\n\n// Create a MutationObserver instance\nconst observer = new MutationObserver(function() {\n  if (targetElement.classList.contains(\"_generating\")) {\n    typing.parentNode.classList.add(\"visible-dots\");\n    document.getElementById(\"stop\").style.display = \"flex\";\n    document.getElementById(\"Generate\").style.display = \"none\";\n    // If the user is near the bottom, ensure auto-scroll is enabled\n    // for the new reply. This catches cases where isScrolled was\n    // incorrectly set to true by layout shifts during page load, etc.\n    const diff = targetElement.scrollHeight - targetElement.clientHeight;\n    if (Math.abs(targetElement.scrollTop - diff) <= 10 || diff <= 0) {\n      window.isScrolled = false;\n    }\n  } else {\n    typing.parentNode.classList.remove(\"visible-dots\");\n    document.getElementById(\"stop\").style.display = \"none\";\n    document.getElementById(\"Generate\").style.display = \"flex\";\n  }\n});\n\n// Only watch for attribute changes on targetElement (e.g. _generating class)\nconst config = {\n  attributes: true\n};\n\n// Start observing the target element\nobserver.observe(targetElement, config);\n\n//------------------------------------------------\n// Handle syntax highlighting / LaTeX\n//------------------------------------------------\nfunction isElementVisibleOnScreen(element) {\n  const rect = element.getBoundingClientRect();\n  return (\n    rect.left < window.innerWidth &&\n    rect.right > 0 &&\n    rect.top < window.innerHeight &&\n    rect.bottom > 0\n  );\n}\n\nwindow.doSyntaxHighlighting = function() {\n  const messageBodies = document.getElementById(\"chat\").querySelectorAll(\".message-body\");\n\n  if (messageBodies.length > 0) {\n    let hasSeenVisible = false;\n\n    // Go from last message to first\n    for (let i = messageBodies.length - 1; i >= 0; i--) {\n      const messageBody = messageBodies[i];\n\n      if (isElementVisibleOnScreen(messageBody)) {\n        hasSeenVisible = true;\n\n        // Handle both code and math in a single pass through each message\n        const codeBlocks = messageBody.querySelectorAll(\"pre code:not([data-highlighted])\");\n        codeBlocks.forEach((codeBlock) => {\n          hljs.highlightElement(codeBlock);\n          codeBlock.setAttribute(\"data-highlighted\", \"true\");\n          codeBlock.classList.add(\"pretty_scrollbar\");\n        });\n\n        // Only render math in visible elements\n        const mathContainers = messageBody.querySelectorAll(\"p, span, li, td, th, h1, h2, h3, h4, h5, h6, blockquote, figcaption, caption, dd, dt\");\n        mathContainers.forEach(container => {\n          if (isElementVisibleOnScreen(container)) {\n            renderMathInElement(container, {\n              delimiters: [\n                { left: \"$$\", right: \"$$\", display: true },\n                { left: \"$\", right: \"$\", display: false },\n                { left: \"\\\\(\", right: \"\\\\)\", display: false },\n                { left: \"\\\\[\", right: \"\\\\]\", display: true },\n              ],\n            });\n          }\n        });\n      } else if (hasSeenVisible) {\n      // We've seen visible messages but this one is not visible\n      // Since we're going from last to first, we can break\n        break;\n      }\n    }\n  }\n}\nconst doSyntaxHighlighting = window.doSyntaxHighlighting;\n\n//------------------------------------------------\n// Add some scrollbars\n//------------------------------------------------\nconst scrollbarElements = document.querySelectorAll(\".add_scrollbar textarea, .add_scrollbar .drag-drop-list\");\nfor(i = 0; i < scrollbarElements.length; i++) {\n  scrollbarElements[i].classList.remove(\"scroll-hide\");\n  scrollbarElements[i].classList.add(\"pretty_scrollbar\");\n  scrollbarElements[i].style.resize = \"none\";\n}\n\n\n//------------------------------------------------\n// Tools: inject \"Refresh list\" link into the label\n//------------------------------------------------\nconst toolsTitle = document.querySelector(\"#tools-group > [data-testid='block-info']\");\nconst toolsInfo = toolsTitle ? toolsTitle.nextElementSibling : null;\nif (toolsInfo) {\n  const refreshLink = document.createElement(\"span\");\n  refreshLink.textContent = \" [Refresh list]\";\n  refreshLink.className = \"tools-refresh-link\";\n  refreshLink.addEventListener(\"click\", function(e) {\n    e.preventDefault();\n    document.querySelector(\"#tools-refresh-btn\").click();\n  });\n  toolsInfo.appendChild(refreshLink);\n}\n\n//------------------------------------------------\n// Remove some backgrounds\n//------------------------------------------------\nconst noBackgroundelements = document.querySelectorAll(\".no-background\");\nfor(i = 0; i < noBackgroundelements.length; i++) {\n  noBackgroundelements[i].parentNode.style.border = \"none\";\n  noBackgroundelements[i].parentNode.parentNode.parentNode.style.alignItems = \"center\";\n}\n\nconst slimDropdownElements = document.querySelectorAll(\".slim-dropdown\");\nfor (i = 0; i < slimDropdownElements.length; i++) {\n  const parentNode = slimDropdownElements[i].parentNode;\n  parentNode.style.background = \"transparent\";\n  parentNode.style.border = \"0\";\n}\n\n//------------------------------------------------\n// Create the hover menu in the chat tab\n// The show/hide events were adapted from:\n// https://github.com/SillyTavern/SillyTavern/blob/6c8bd06308c69d51e2eb174541792a870a83d2d6/public/script.js\n//------------------------------------------------\nvar buttonsInChat = document.querySelectorAll(\"#chat-tab #chat-buttons button, #chat-tab #chat-buttons #show-controls\");\nvar button = document.getElementById(\"hover-element-button\");\nvar menu = document.getElementById(\"hover-menu\");\nvar istouchscreen = (navigator.maxTouchPoints > 0) || \"ontouchstart\" in document.documentElement;\n\nfunction showMenu() {\n  menu.style.display = \"flex\"; // Show the menu\n}\n\nfunction hideMenu() {\n  menu.style.display = \"none\"; // Hide the menu\n  if (!istouchscreen) {\n    document.querySelector(\"#chat-input textarea\").focus(); // Focus on the chat input\n  }\n}\n\nif (buttonsInChat.length > 0) {\n  for (let i = buttonsInChat.length - 1; i >= 0; i--) {\n    const thisButton = buttonsInChat[i];\n    menu.appendChild(thisButton);\n\n    // Only apply transformations to button elements\n    if (thisButton.tagName.toLowerCase() === \"button\") {\n      thisButton.addEventListener(\"click\", () => {\n        hideMenu();\n      });\n\n      const buttonText = thisButton.textContent;\n      const matches = buttonText.match(/(\\(.*?\\))/);\n\n      if (matches && matches.length > 1) {\n        // Apply the transparent-substring class to the matched substring\n        const substring = matches[1];\n        const newText = buttonText.replace(substring, `&nbsp;<span class=\"transparent-substring\">${substring.slice(1, -1)}</span>`);\n        thisButton.innerHTML = newText;\n      }\n    }\n  }\n}\n\nfunction isMouseOverButtonOrMenu() {\n  return menu.matches(\":hover\") || button.matches(\":hover\");\n}\n\nbutton.addEventListener(\"mouseenter\", function () {\n  if (!istouchscreen) {\n    showMenu();\n  }\n});\n\nbutton.addEventListener(\"click\", function () {\n  if (menu.style.display === \"flex\") {\n    hideMenu();\n  }\n  else {\n    showMenu();\n  }\n});\n\n// Add event listener for mouseleave on the button\nbutton.addEventListener(\"mouseleave\", function () {\n  // Delay to prevent menu hiding when the mouse leaves the button into the menu\n  setTimeout(function () {\n    if (!isMouseOverButtonOrMenu()) {\n      hideMenu();\n    }\n  }, 100);\n});\n\n// Add event listener for mouseleave on the menu\nmenu.addEventListener(\"mouseleave\", function () {\n  // Delay to prevent menu hide when the mouse leaves the menu into the button\n  setTimeout(function () {\n    if (!isMouseOverButtonOrMenu()) {\n      hideMenu();\n    }\n  }, 100);\n});\n\n// Add event listener for click anywhere in the document\ndocument.addEventListener(\"click\", function (event) {\n  const target = event.target;\n\n  // Check if the click is outside the button/menu and the menu is visible\n  if (!isMouseOverButtonOrMenu() && menu.style.display === \"flex\") {\n    hideMenu();\n  }\n\n  if (event.target.classList.contains(\"pfp_character\")) {\n    toggleBigPicture();\n  }\n\n  // Handle sidebar clicks on mobile\n  if (isMobile()) {\n  // Check if the click did NOT originate from any of the specified toggle buttons or elements\n    if (\n      target.closest(\"#navigation-toggle\") !== navigationToggle &&\n    target.closest(\"#past-chats-toggle\") !== pastChatsToggle &&\n    target.closest(\"#chat-controls-toggle\") !== chatControlsToggle &&\n    target.closest(\".header_bar\") !== headerBar &&\n    target.closest(\"#past-chats-row\") !== pastChatsRow &&\n    target.closest(\"#chat-controls\") !== chatControlsRow\n    ) {\n      handleIndividualSidebarClose(event);\n    }\n  }\n});\n\n//------------------------------------------------\n// Position the chat input\n//------------------------------------------------\ndocument.getElementById(\"chat-input-row\").classList.add(\"chat-input-positioned\");\n\n//------------------------------------------------\n// Focus on the chat input\n//------------------------------------------------\nconst chatTextArea = document.getElementById(\"chat-input\").querySelector(\"textarea\");\n\nfunction respondToChatInputVisibility(element, callback) {\n  var options = {\n    root: document.documentElement,\n  };\n\n  var observer = new IntersectionObserver((entries, observer) => {\n    entries.forEach(entry => {\n      callback(entry.intersectionRatio > 0);\n    });\n  }, options);\n\n  observer.observe(element);\n}\n\nfunction handleChatInputVisibilityChange(isVisible) {\n  if (isVisible) {\n    chatTextArea.focus();\n  }\n}\n\nrespondToChatInputVisibility(chatTextArea, handleChatInputVisibilityChange);\n\n//------------------------------------------------\n// Show enlarged character picture when the profile\n// picture is clicked on\n//------------------------------------------------\nlet bigPictureVisible = false;\n\nfunction addBigPicture() {\n  var imgElement = document.createElement(\"img\");\n  var timestamp = new Date().getTime();\n  imgElement.src = \"/file/user_data/cache/pfp_character.png?time=\" + timestamp;\n  imgElement.classList.add(\"bigProfilePicture\");\n  imgElement.addEventListener(\"load\", function () {\n    this.style.visibility = \"visible\";\n  });\n  imgElement.addEventListener(\"error\", function () {\n    this.style.visibility = \"hidden\";\n  });\n\n  var imgElementParent = document.getElementById(\"chat\").parentNode.parentNode.parentNode.parentNode.parentNode.parentNode.parentNode;\n  imgElementParent.appendChild(imgElement);\n}\n\nfunction deleteBigPicture() {\n  var bigProfilePictures = document.querySelectorAll(\".bigProfilePicture\");\n  bigProfilePictures.forEach(function (element) {\n    element.parentNode.removeChild(element);\n  });\n}\n\nfunction toggleBigPicture() {\n  if(bigPictureVisible) {\n    deleteBigPicture();\n    bigPictureVisible = false;\n  } else {\n    addBigPicture();\n    bigPictureVisible = true;\n  }\n}\n\n//------------------------------------------------\n// Handle the chat input box growth\n//------------------------------------------------\n\n// Cache DOM elements\nconst chatContainer = document.getElementById(\"chat\").parentNode.parentNode.parentNode;\nconst chatInput = document.querySelector(\"#chat-input textarea\");\n\n// Variables to store current dimensions\nlet currentChatInputHeight = chatInput.clientHeight;\n\n//------------------------------------------------\n// Focus on the rename text area when it becomes visible\n//------------------------------------------------\nconst renameTextArea = document.getElementById(\"rename-row\").querySelector(\"textarea\");\n\nfunction respondToRenameVisibility(element, callback) {\n  var options = {\n    root: document.documentElement,\n  };\n\n  var observer = new IntersectionObserver((entries, observer) => {\n    entries.forEach(entry => {\n      callback(entry.intersectionRatio > 0);\n    });\n  }, options);\n\n  observer.observe(element);\n}\n\n\nfunction handleVisibilityChange(isVisible) {\n  if (isVisible) {\n    renameTextArea.focus();\n  }\n}\n\nrespondToRenameVisibility(renameTextArea, handleVisibilityChange);\n\n//------------------------------------------------\n// Adjust the chat tab margin if no extension UI\n// is present at the bottom\n//------------------------------------------------\n\nif (document.getElementById(\"extensions\") === null) {\n  document.getElementById(\"chat-tab\").style.marginBottom = \"-29px\";\n}\n\n//------------------------------------------------\n// Focus on the chat input after starting a new chat\n//------------------------------------------------\n\ndocument.querySelectorAll(\".focus-on-chat-input\").forEach(element => {\n  element.addEventListener(\"click\", function() {\n    document.querySelector(\"#chat-input textarea\").focus();\n  });\n});\n\n//------------------------------------------------\n// \"New chat\" hover menu with incognito option\n//------------------------------------------------\n\n(function() {\n  const newChatBtn = document.getElementById(\"new-chat-btn\");\n\n  const wrapper = document.createElement(\"div\");\n  wrapper.id = \"new-chat-wrapper\";\n  newChatBtn.replaceWith(wrapper);\n  wrapper.appendChild(newChatBtn);\n\n  const arrow = document.createElement(\"span\");\n  arrow.className = \"new-chat-arrow\";\n  arrow.textContent = \"\\u25BE\";\n\n  const menu = document.createElement(\"div\");\n  menu.className = \"new-chat-menu\";\n  const option = document.createElement(\"div\");\n  option.className = \"new-chat-menu-item\";\n  option.textContent = \"Incognito chat\";\n  menu.appendChild(option);\n\n  arrow.appendChild(menu);\n  wrapper.appendChild(arrow);\n\n  option.addEventListener(\"click\", function(e) {\n    e.stopPropagation();\n    document.querySelector(\"#incognito-chat-btn\").click();\n  });\n})();\n\n//------------------------------------------------\n// Fix a border around the \"past chats\" menu\n//------------------------------------------------\ndocument.getElementById(\"past-chats\").parentNode.style.borderRadius = \"0px\";\n\n//------------------------------------------------\n// Allow the character dropdown to coexist at the\n// Chat tab and the Parameters > Character tab\n//------------------------------------------------\n\nconst headerBar = document.querySelector(\".header_bar\");\nlet originalParent;\nlet originalIndex; // To keep track of the original position\nlet movedElement;\n\nfunction moveToChatTab() {\n  const characterMenu = document.getElementById(\"character-menu\");\n  const grandParent = characterMenu.parentElement.parentElement;\n\n  // Save the initial location for the character dropdown\n  if (!originalParent) {\n    originalParent = grandParent.parentElement;\n    originalIndex = Array.from(originalParent.children).indexOf(grandParent);\n    movedElement = grandParent;\n  }\n\n  // Do not show the Character dropdown in the Chat tab when \"instruct\" mode is selected\n  const instructRadio = document.querySelector(\"#chat-mode input[value=\\\"instruct\\\"]\");\n  if (instructRadio && instructRadio.checked) {\n    grandParent.style.display = \"none\";\n  }\n\n  grandParent.children[0].style.minWidth = \"100%\";\n\n  const chatControlsFirstChild = document.querySelector(\"#chat-controls\").firstElementChild;\n  const newParent = chatControlsFirstChild;\n  let newPosition = newParent.children.length - 3;\n\n  newParent.insertBefore(grandParent, newParent.children[newPosition]);\n  document.getElementById(\"save-character\").style.display = \"none\";\n  document.getElementById(\"restore-character\").style.display = \"none\";\n}\n\nfunction restoreOriginalPosition() {\n  if (originalParent && movedElement) {\n    if (originalIndex >= originalParent.children.length) {\n      originalParent.appendChild(movedElement);\n    } else {\n      originalParent.insertBefore(movedElement, originalParent.children[originalIndex]);\n    }\n\n    document.getElementById(\"save-character\").style.display = \"\";\n    document.getElementById(\"restore-character\").style.display = \"\";\n    movedElement.style.display = \"\";\n    movedElement.children[0].style.minWidth = \"\";\n  }\n}\n\nheaderBar.addEventListener(\"click\", (e) => {\n  if (e.target.tagName === \"BUTTON\") {\n    const tabName = e.target.textContent.trim();\n    if (tabName === \"Chat\") {\n      moveToChatTab();\n    } else {\n      restoreOriginalPosition();\n    }\n  }\n});\n\n//------------------------------------------------\n// Add a confirmation dialog when leaving the page\n// Useful to avoid data loss\n//------------------------------------------------\nwindow.addEventListener(\"beforeunload\", function (event) {\n  // Cancel the event\n  event.preventDefault();\n  // Chrome requires returnValue to be set\n  event.returnValue = \"\";\n});\n\nmoveToChatTab();\n\n//------------------------------------------------\n// Buttons to toggle the sidebars\n//------------------------------------------------\n\nconst leftArrowSVG = `\n<svg xmlns=\"http://www.w3.org/2000/svg\" width=\"24\" height=\"24\" viewBox=\"0 0 24 24\" fill=\"none\" stroke=\"currentColor\" stroke-width=\"2\" stroke-linecap=\"round\" stroke-linejoin=\"round\" class=\"tabler-icon tabler-icon-arrow-bar-left\">\n  <path d=\"M4 12l10 0\"></path>\n  <path d=\"M4 12l4 4\"></path>\n  <path d=\"M4 12l4 -4\"></path>\n  <path d=\"M20 4l0 16\"></path>\n</svg>`;\n\nconst rightArrowSVG = `\n<svg xmlns=\"http://www.w3.org/2000/svg\" width=\"24\" height=\"24\" viewBox=\"0 0 24 24\" fill=\"none\" stroke=\"currentColor\" stroke-width=\"2\" stroke-linecap=\"round\" stroke-linejoin=\"round\" class=\"tabler-icon tabler-icon-arrow-bar-right\">\n  <path d=\"M20 12l-10 0\"></path>\n  <path d=\"M20 12l-4 4\"></path>\n  <path d=\"M20 12l-4 -4\"></path>\n  <path d=\"M4 4l0 16\"></path>\n</svg>`;\n\nconst hamburgerMenuSVG = `\n<svg xmlns=\"http://www.w3.org/2000/svg\" width=\"20\" height=\"20\" viewBox=\"0 0 24 24\" fill=\"none\" stroke=\"currentColor\" stroke-width=\"2\" stroke-linecap=\"round\" stroke-linejoin=\"round\" class=\"icon icon-hamburger-menu\">\n  <line x1=\"3\" y1=\"12\" x2=\"21\" y2=\"12\"></line>\n  <line x1=\"3\" y1=\"6\" x2=\"21\" y2=\"6\"></line>\n  <line x1=\"3\" y1=\"18\" x2=\"21\" y2=\"18\"></line>\n</svg>`;\n\nconst closeMenuSVG = `\n<svg xmlns=\"http://www.w3.org/2000/svg\" width=\"20\" height=\"20\" viewBox=\"0 0 24 24\" fill=\"none\" stroke=\"currentColor\" stroke-width=\"2\" stroke-linecap=\"round\" stroke-linejoin=\"round\" class=\"icon icon-close-menu\">\n  <line x1=\"18\" y1=\"6\" x2=\"6\" y2=\"18\"></line>\n  <line x1=\"6\" y1=\"6\" x2=\"18\" y2=\"18\"></line>\n</svg>`;\n\nconst chatTab = document.getElementById(\"chat-tab\");\nconst pastChatsRow = document.getElementById(\"past-chats-row\");\nconst chatControlsRow = document.getElementById(\"chat-controls\");\n\nif (chatTab) {\n  // Create past-chats-toggle div\n  const pastChatsToggle = document.createElement(\"div\");\n  pastChatsToggle.id = \"past-chats-toggle\";\n  pastChatsToggle.innerHTML = leftArrowSVG; // Set initial icon to left arrow\n  pastChatsToggle.classList.add(\"past-chats-open\"); // Set initial position\n\n  // Create chat-controls-toggle div\n  const chatControlsToggle = document.createElement(\"div\");\n  chatControlsToggle.id = \"chat-controls-toggle\";\n  chatControlsToggle.innerHTML = rightArrowSVG; // Set initial icon to right arrow\n  chatControlsToggle.classList.add(\"chat-controls-open\"); // Set initial position\n\n  // Append both elements to the chat-tab\n  chatTab.appendChild(pastChatsToggle);\n  chatTab.appendChild(chatControlsToggle);\n}\n\n// Create navigation toggle div\nconst navigationToggle = document.createElement(\"div\");\nnavigationToggle.id = \"navigation-toggle\";\nnavigationToggle.innerHTML = leftArrowSVG; // Set initial icon to right arrow\nnavigationToggle.classList.add(\"navigation-left\"); // Set initial position\nheaderBar.appendChild(navigationToggle);\n\n// Retrieve the dynamically created toggle buttons\nconst pastChatsToggle = document.getElementById(\"past-chats-toggle\");\nconst chatControlsToggle = document.getElementById(\"chat-controls-toggle\");\n\nfunction handleIndividualSidebarClose(event) {\n  const target = event.target;\n\n  // Close navigation bar if click is outside and it is open\n  if (!headerBar.contains(target) && !headerBar.classList.contains(\"sidebar-hidden\")) {\n    toggleSidebar(headerBar, navigationToggle, true);\n  }\n\n  // Close past chats row if click is outside and it is open\n  if (!pastChatsRow.contains(target) && !pastChatsRow.classList.contains(\"sidebar-hidden\")) {\n    toggleSidebar(pastChatsRow, pastChatsToggle, true);\n  }\n\n  // Close chat controls row if click is outside and it is open\n  if (!chatControlsRow.contains(target) && !chatControlsRow.classList.contains(\"sidebar-hidden\")) {\n    toggleSidebar(chatControlsRow, chatControlsToggle, true);\n  }\n}\n\nfunction toggleSidebar(sidebar, toggle, forceClose = false) {\n  const isCurrentlyHidden = sidebar.classList.contains(\"sidebar-hidden\");\n  const shouldClose = !isCurrentlyHidden;\n\n  // Apply visibility classes\n  sidebar.classList.toggle(\"sidebar-hidden\", shouldClose);\n  sidebar.classList.toggle(\"sidebar-shown\", !shouldClose);\n\n  if (sidebar === headerBar) {\n    // Special handling for header bar\n    document.documentElement.style.setProperty(\"--header-width\", shouldClose ? \"0px\" : \"112px\");\n    pastChatsRow.classList.toggle(\"negative-header\", shouldClose);\n    pastChatsToggle.classList.toggle(\"negative-header\", shouldClose);\n    toggle.innerHTML = shouldClose ? hamburgerMenuSVG : closeMenuSVG;\n  } else if (sidebar === pastChatsRow) {\n    // Past chats sidebar\n    toggle.classList.toggle(\"past-chats-closed\", shouldClose);\n    toggle.classList.toggle(\"past-chats-open\", !shouldClose);\n    toggle.innerHTML = shouldClose ? rightArrowSVG : leftArrowSVG;\n  } else if (sidebar === chatControlsRow) {\n    // Chat controls sidebar\n    toggle.classList.toggle(\"chat-controls-closed\", shouldClose);\n    toggle.classList.toggle(\"chat-controls-open\", !shouldClose);\n    toggle.innerHTML = shouldClose ? leftArrowSVG : rightArrowSVG;\n  }\n\n  // Mobile handling\n  if (isMobile()) {\n    sidebar.classList.toggle(\"sidebar-shown\", !shouldClose);\n  }\n}\n\n// Function to check if the device is mobile\nfunction isMobile() {\n  return window.innerWidth <= 924;\n}\n\n// Function to initialize sidebars\nfunction initializeSidebars() {\n  const isOnMobile = isMobile();\n\n  if (isOnMobile) {\n    // Mobile state: Hide sidebars and set closed states\n    [pastChatsRow, chatControlsRow, headerBar].forEach(el => {\n      el.classList.add(\"sidebar-hidden\");\n      el.classList.remove(\"sidebar-shown\");\n    });\n\n    document.documentElement.style.setProperty(\"--header-width\", \"0px\");\n    pastChatsRow.classList.add(\"negative-header\");\n    pastChatsToggle.classList.add(\"negative-header\", \"past-chats-closed\");\n    pastChatsToggle.classList.remove(\"past-chats-open\");\n\n    [chatControlsToggle, navigationToggle].forEach(el => {\n      el.classList.add(\"chat-controls-closed\");\n      el.classList.remove(\"chat-controls-open\");\n    });\n\n    pastChatsToggle.innerHTML = rightArrowSVG;\n    chatControlsToggle.innerHTML = leftArrowSVG;\n    navigationToggle.innerHTML = hamburgerMenuSVG;\n  } else {\n    // Desktop state: Show sidebars and set open states\n    [pastChatsRow, chatControlsRow].forEach(el => {\n      el.classList.remove(\"sidebar-hidden\", \"sidebar-shown\");\n    });\n\n    pastChatsToggle.classList.add(\"past-chats-open\");\n    pastChatsToggle.classList.remove(\"past-chats-closed\");\n\n    [chatControlsToggle, navigationToggle].forEach(el => {\n      el.classList.add(\"chat-controls-open\");\n      el.classList.remove(\"chat-controls-closed\");\n    });\n\n    pastChatsToggle.innerHTML = leftArrowSVG;\n    chatControlsToggle.innerHTML = rightArrowSVG;\n    navigationToggle.innerHTML = closeMenuSVG;\n  }\n}\n\n// Run the initializer when the page loads\ninitializeSidebars();\n\n// Add click event listeners to toggle buttons\npastChatsToggle.addEventListener(\"click\", () => {\n  const isCurrentlyOpen = !pastChatsRow.classList.contains(\"sidebar-hidden\");\n  toggleSidebar(pastChatsRow, pastChatsToggle);\n\n  // On desktop, open/close both sidebars at the same time\n  if (!isMobile()) {\n    if (isCurrentlyOpen) {\n      // If we just closed the left sidebar, also close the right sidebar\n      if (!chatControlsRow.classList.contains(\"sidebar-hidden\")) {\n        toggleSidebar(chatControlsRow, chatControlsToggle, true);\n      }\n    } else {\n      // If we just opened the left sidebar, also open the right sidebar\n      if (chatControlsRow.classList.contains(\"sidebar-hidden\")) {\n        toggleSidebar(chatControlsRow, chatControlsToggle, false);\n      }\n    }\n  }\n});\n\nchatControlsToggle.addEventListener(\"click\", () => {\n  const isCurrentlyOpen = !chatControlsRow.classList.contains(\"sidebar-hidden\");\n  toggleSidebar(chatControlsRow, chatControlsToggle);\n\n  // On desktop, open/close both sidebars at the same time\n  if (!isMobile()) {\n    if (isCurrentlyOpen) {\n      // If we just closed the right sidebar, also close the left sidebar\n      if (!pastChatsRow.classList.contains(\"sidebar-hidden\")) {\n        toggleSidebar(pastChatsRow, pastChatsToggle, true);\n      }\n    } else {\n      // If we just opened the right sidebar, also open the left sidebar\n      if (pastChatsRow.classList.contains(\"sidebar-hidden\")) {\n        toggleSidebar(pastChatsRow, pastChatsToggle, false);\n      }\n    }\n  }\n});\n\nnavigationToggle.addEventListener(\"click\", () => {\n  toggleSidebar(headerBar, navigationToggle);\n});\n\n//------------------------------------------------\n// Fixes #chat-input textarea height issue\n// for devices with width <= 924px\n//------------------------------------------------\n\nif (isMobile()) {\n  // Target the textarea\n  const textarea = document.querySelector(\"#chat-input textarea\");\n\n  if (textarea) {\n    // Simulate adding and removing a newline\n    textarea.value += \"\\n\";\n    textarea.dispatchEvent(new Event(\"input\", { bubbles: true }));\n    textarea.value = textarea.value.slice(0, -1);\n    textarea.dispatchEvent(new Event(\"input\", { bubbles: true }));\n  }\n}\n\n//------------------------------------------------\n// Create a top navigation bar on mobile\n//------------------------------------------------\n\nfunction createMobileTopBar() {\n  const chatTab = document.getElementById(\"chat-tab\");\n\n  // Only create the top bar if it doesn't already exist\n  if (chatTab && !chatTab.querySelector(\".mobile-top-bar\")) {\n    const topBar = document.createElement(\"div\");\n    topBar.classList.add(\"mobile-top-bar\");\n\n    // Insert the top bar as the first child of chat-tab\n    chatTab.appendChild(topBar);\n  }\n}\n\ncreateMobileTopBar();\n\n//------------------------------------------------\n// Simple Navigation Functions\n//------------------------------------------------\n\nfunction navigateLastAssistantMessage(direction) {\n  const chat = document.querySelector(\"#chat\");\n  if (!chat) return false;\n\n  const messages = chat.querySelectorAll(\"[data-index]\");\n  if (messages.length === 0) return false;\n\n  // Find the last assistant message (starting from the end)\n  let lastAssistantMessage = null;\n  for (let i = messages.length - 1; i >= 0; i--) {\n    const msg = messages[i];\n    if (\n      msg.classList.contains(\"assistant-message\") ||\n      msg.querySelector(\".circle-bot\") ||\n      msg.querySelector(\".text-bot\")\n    ) {\n      lastAssistantMessage = msg;\n      break;\n    }\n  }\n\n  if (!lastAssistantMessage) return false;\n\n  const buttons = lastAssistantMessage.querySelectorAll(\".version-nav-button\");\n\n  for (let i = 0; i < buttons.length; i++) {\n    const button = buttons[i];\n    const onclick = button.getAttribute(\"onclick\");\n    const disabled = button.hasAttribute(\"disabled\");\n\n    const isLeft = onclick && onclick.includes(\"'left'\");\n    const isRight = onclick && onclick.includes(\"'right'\");\n\n    if (!disabled) {\n      if (direction === \"left\" && isLeft) {\n        navigateVersion(button, direction);\n        return true;\n      }\n      if (direction === \"right\" && isRight) {\n        navigateVersion(button, direction);\n        return true;\n      }\n    }\n  }\n\n  return false;\n}\n\n//------------------------------------------------\n// Paste Handler for Long Text\n//------------------------------------------------\n\nconst MAX_PLAIN_TEXT_LENGTH = 2500;\n\nfunction setupPasteHandler() {\n  const textbox = document.querySelector(\"#chat-input textarea[data-testid=\\\"textbox\\\"]\");\n  const fileInput = document.querySelector(\"#chat-input input[data-testid=\\\"file-upload\\\"]\");\n\n  if (!textbox || !fileInput) {\n    setTimeout(setupPasteHandler, 500);\n    return;\n  }\n\n  textbox.addEventListener(\"paste\", async (event) => {\n    const text = event.clipboardData?.getData(\"text\");\n\n    if (text && text.length > MAX_PLAIN_TEXT_LENGTH && document.querySelector(\"#paste_to_attachment input[data-testid=\\\"checkbox\\\"]\")?.checked) {\n      event.preventDefault();\n\n      const file = new File([text], \"pasted_text.txt\", {\n        type: \"text/plain\",\n        lastModified: Date.now()\n      });\n\n      const dataTransfer = new DataTransfer();\n      dataTransfer.items.add(file);\n      fileInput.files = dataTransfer.files;\n      fileInput.dispatchEvent(new Event(\"change\", { bubbles: true }));\n    }\n  });\n}\n\nif (document.readyState === \"loading\") {\n  document.addEventListener(\"DOMContentLoaded\", setupPasteHandler);\n} else {\n  setupPasteHandler();\n}\n\n//------------------------------------------------\n// Tooltips\n//------------------------------------------------\n\n// File upload button\ndocument.querySelector(\"#chat-input .upload-button\").title = \"Upload text files, PDFs, DOCX documents, and images\";\n\n// Activate web search\ndocument.getElementById(\"web-search\").title = \"Search the internet with DuckDuckGo\";\n\n//------------------------------------------------\n// Inline icons for deleting past chats\n//------------------------------------------------\n\nfunction addMiniDeletes() {\n  document.querySelectorAll(\"#past-chats label:not(.has-delete)\").forEach(label => {\n    const container = document.createElement(\"span\");\n    container.className = \"delete-container\";\n\n    label.classList.add(\"chat-label-with-delete\");\n\n    const trashBtn = document.createElement(\"button\");\n    trashBtn.innerHTML = \"🗑️\";\n    trashBtn.className = \"trash-btn\";\n\n    const cancelBtn = document.createElement(\"button\");\n    cancelBtn.innerHTML = \"✕\";\n    cancelBtn.className = \"cancel-btn\";\n\n    const confirmBtn = document.createElement(\"button\");\n    confirmBtn.innerHTML = \"✓\";\n    confirmBtn.className = \"confirm-btn\";\n\n    label.addEventListener(\"mouseenter\", () => {\n      container.style.opacity = \"1\";\n    });\n\n    label.addEventListener(\"mouseleave\", () => {\n      container.style.opacity = \"0\";\n    });\n\n    trashBtn.onclick = (e) => {\n      e.stopPropagation();\n      label.querySelector(\"input\").click();\n      document.querySelector(\"#delete_chat\").click();\n      trashBtn.style.display = \"none\";\n      cancelBtn.style.display = \"flex\";\n      confirmBtn.style.display = \"flex\";\n    };\n\n    cancelBtn.onclick = (e) => {\n      e.stopPropagation();\n      document.querySelector(\"#delete_chat-cancel\").click();\n      resetButtons();\n    };\n\n    confirmBtn.onclick = (e) => {\n      e.stopPropagation();\n      document.querySelector(\"#delete_chat-confirm\").click();\n      resetButtons();\n    };\n\n    function resetButtons() {\n      trashBtn.style.display = \"inline\";\n      cancelBtn.style.display = \"none\";\n      confirmBtn.style.display = \"none\";\n    }\n\n    container.append(trashBtn, cancelBtn, confirmBtn);\n    label.appendChild(container);\n    label.classList.add(\"has-delete\");\n  });\n}\n\nnew MutationObserver(() => addMiniDeletes()).observe(\n  document.querySelector(\"#past-chats\"),\n  {childList: true, subtree: true}\n);\naddMiniDeletes();\n\n//------------------------------------------------\n// Fix autoscroll after fonts load\n//------------------------------------------------\ndocument.fonts.addEventListener(\"loadingdone\", (event) => {\n  setTimeout(() => {\n    if (!window.isScrolled) {\n      const maxScroll = targetElement.scrollHeight - targetElement.clientHeight;\n      if (targetElement.scrollTop < maxScroll - 5) {\n        targetElement.scrollTop = maxScroll;\n      }\n    }\n  }, 50);\n});\n\n(function() {\n  const chatParent = document.querySelector(\".chat-parent\");\n  const chatInputRow = document.querySelector(\"#chat-input-row\");\n  const originalMarginBottom = 75;\n  let originalHeight = chatInputRow.offsetHeight;\n\n  function updateMargin() {\n    const currentHeight = chatInputRow.offsetHeight;\n    const heightDifference = currentHeight - originalHeight;\n    chatParent.style.marginBottom = `${originalMarginBottom + heightDifference}px`;\n    if (!window.isScrolled) {\n      chatParent.scrollTop = chatParent.scrollHeight - chatParent.clientHeight;\n    }\n  }\n\n  // Watch for size changes that affect height\n  new ResizeObserver(updateMargin).observe(chatInputRow);\n\n  // Also listen for window resize\n  window.addEventListener(\"resize\", updateMargin);\n\n  // Initial call to set the margin based on current state\n  updateMargin();\n})();\n"
  },
  {
    "path": "js/save_files.js",
    "content": "// Functions for downloading JSON files\nfunction getCurrentTimestamp() {\n  const now = new Date();\n  const timezoneOffset = now.getTimezoneOffset() * 60000; // Convert to milliseconds\n  const localTime = new Date(now.getTime() - timezoneOffset);\n  const formattedTimestamp = localTime.toISOString().replace(/[-:]/g, \"\").slice(0, 15);\n  return formattedTimestamp;\n}\n\nfunction saveFile(contents, filename) {\n  const element = document.createElement(\"a\");\n  element.setAttribute(\"href\", \"data:text/plain;charset=utf-8,\" + encodeURIComponent(contents));\n  element.setAttribute(\"download\", filename);\n  element.style.display = \"none\";\n  document.body.appendChild(element);\n  element.click();\n  document.body.removeChild(element);\n}\n\nfunction saveHistory(history, character, mode) {\n  let path = null;\n\n  if ([\"chat\", \"chat-instruct\"].includes(mode) && character && character.trim() !== \"\") {\n    path = `history_${character}_${getCurrentTimestamp()}.json`;\n  } else {\n    try {\n      path = `history_${mode}_${getCurrentTimestamp()}.json`;\n    } catch (error) {\n      path = `history_${getCurrentTimestamp()}.json`;\n    }\n  }\n  saveFile(history, path);\n}\n\nfunction saveSession(session) {\n  let path = null;\n\n  path = `session_${getCurrentTimestamp()}.json`;\n  saveFile(session, path);\n}\n"
  },
  {
    "path": "js/show_controls.js",
    "content": "const chatParent = document.querySelector(\".chat-parent\");\n\nfunction toggle_controls(value) {\n  const extensions = document.querySelector(\"#extensions\");\n\n  if (value) {\n    // SHOW MODE: Click toggles to show hidden sidebars\n    const navToggle = document.getElementById(\"navigation-toggle\");\n    const pastChatsToggle = document.getElementById(\"past-chats-toggle\");\n\n    if (navToggle && document.querySelector(\".header_bar\")?.classList.contains(\"sidebar-hidden\")) {\n      navToggle.click();\n    }\n    if (pastChatsToggle && document.getElementById(\"past-chats-row\")?.classList.contains(\"sidebar-hidden\")) {\n      pastChatsToggle.click();\n    }\n\n    // Show extensions only\n    if (extensions) {\n      extensions.style.display = \"inherit\";\n    }\n\n    let gallery_element = document.getElementById(\"gallery-extension\");\n    if (gallery_element) {\n      gallery_element.style.display = \"block\";\n    }\n\n  } else {\n    // HIDE MODE: Click toggles to hide visible sidebars\n    const navToggle = document.getElementById(\"navigation-toggle\");\n    const pastChatsToggle = document.getElementById(\"past-chats-toggle\");\n\n    if (navToggle && !document.querySelector(\".header_bar\")?.classList.contains(\"sidebar-hidden\")) {\n      navToggle.click();\n    }\n    if (pastChatsToggle && !document.getElementById(\"past-chats-row\")?.classList.contains(\"sidebar-hidden\")) {\n      pastChatsToggle.click();\n    }\n\n    // Hide extensions only\n    if (extensions) {\n      extensions.style.display = \"none\";\n    }\n  }\n}\n"
  },
  {
    "path": "js/switch_tabs.js",
    "content": "function scrollToTop() {\n  window.scrollTo({ top: 0 });\n}\n\nfunction findButtonsByText(buttonText) {\n  const buttons = document.getElementsByTagName(\"button\");\n  const matchingButtons = [];\n\n  for (let i = 0; i < buttons.length; i++) {\n    if (buttons[i].textContent.trim() === buttonText) {\n      matchingButtons.push(buttons[i]);\n    }\n  }\n\n  return matchingButtons;\n}\n\nfunction switch_to_chat() {\n  document.getElementById(\"chat-tab-button\").click();\n  scrollToTop();\n}\n\nfunction switch_to_notebook() {\n  document.getElementById(\"notebook-parent-tab-button\").click();\n  findButtonsByText(\"Raw\")[1].click();\n  scrollToTop();\n}\n\nfunction switch_to_generation_parameters() {\n  document.getElementById(\"parameters-button\").click();\n  findButtonsByText(\"Generation\")[0].click();\n  scrollToTop();\n}\n\nfunction switch_to_character() {\n  document.getElementById(\"character-tab-button\").click();\n  scrollToTop();\n}\n\nfunction switch_to_image_ai_generate() {\n  const container = document.querySelector(\"#image-ai-tab\");\n  const buttons = container.getElementsByTagName(\"button\");\n\n  for (let i = 0; i < buttons.length; i++) {\n    if (buttons[i].textContent.trim() === \"Generate\") {\n      buttons[i].click();\n      break;\n    }\n  }\n\n  scrollToTop();\n}\n"
  },
  {
    "path": "js/update_big_picture.js",
    "content": "function updateBigPicture() {\n  var existingElement = document.querySelector(\".bigProfilePicture\");\n  if (existingElement) {\n    var timestamp = new Date().getTime();\n    existingElement.src = \"/file/user_data/cache/pfp_character.png?time=\" + timestamp;\n  }\n}\n"
  },
  {
    "path": "modules/LoRA.py",
    "content": "from pathlib import Path\n\nimport modules.shared as shared\nfrom modules.logging_colors import logger\n\n\ndef add_lora_to_model(lora_names):\n    add_lora_transformers(lora_names)\n\n\ndef get_lora_path(lora_name):\n    p = Path(lora_name)\n    if p.exists():\n        lora_name = p.parts[-1]\n\n    return Path(f\"{shared.args.lora_dir}/{lora_name}\")\n\n\ndef add_lora_transformers(lora_names):\n    from peft import PeftModel\n\n    from modules.torch_utils import get_device\n\n    prior_set = set(shared.lora_names)\n    added_set = set(lora_names) - prior_set\n    removed_set = prior_set - set(lora_names)\n\n    # If no LoRA needs to be added or removed, exit\n    if len(added_set) == 0 and len(removed_set) == 0:\n        return\n\n    # Add a LoRA when another LoRA is already present\n    if len(removed_set) == 0 and len(prior_set) > 0 and \"__merged\" not in shared.model.peft_config.keys():\n        logger.info(f\"Adding the LoRA(s) named {added_set} to the model\")\n        for lora in added_set:\n            shared.model.load_adapter(get_lora_path(lora), lora)\n\n        if len(lora_names) > 1:\n            merge_loras()\n\n        shared.lora_names = lora_names\n        return\n\n    # If any LoRA needs to be removed, start over\n    if len(removed_set) > 0:\n        shared.model = shared.model.unload()\n\n    if len(lora_names) > 0:\n        params = {}\n        if not shared.args.cpu:\n            if not shared.args.load_in_4bit and not shared.args.load_in_8bit:\n                params['dtype'] = shared.model.dtype\n                if hasattr(shared.model, \"hf_device_map\"):\n                    params['device_map'] = {\"base_model.model.\" + k: v for k, v in shared.model.hf_device_map.items()}\n\n        logger.info(\"Applying the following LoRAs to {}: {}\".format(shared.model_name, ', '.join(lora_names)))\n        shared.model = PeftModel.from_pretrained(shared.model, get_lora_path(lora_names[0]), adapter_name=lora_names[0], **params)\n        for lora in lora_names[1:]:\n            shared.model.load_adapter(get_lora_path(lora), lora)\n\n        if len(lora_names) > 1:\n            merge_loras()\n\n        if not shared.args.load_in_8bit and not shared.args.cpu:\n            shared.model.half()\n            if not hasattr(shared.model, \"hf_device_map\"):\n                device = get_device()\n                if device:\n                    shared.model = shared.model.to(device)\n\n    shared.lora_names = lora_names\n\n\ndef merge_loras():\n    if len(list({shared.model.peft_config[adapter].r for adapter in shared.model.peft_config.keys()})) > 1:\n        logger.warning(\"The loaded LoRAs cannot be merged, as they have dissimilar ranks. Only the first one will be active.\")\n        return\n\n    shared.model.add_weighted_adapter(shared.lora_names, [1] * len(shared.lora_names), \"__merged\")\n    shared.model.set_adapter(\"__merged\")\n"
  },
  {
    "path": "modules/callbacks.py",
    "content": "import traceback\nfrom queue import Queue\nfrom threading import Thread\n\nimport modules.shared as shared\n\n\nclass StopNowException(Exception):\n    pass\n\n\nclass Iteratorize:\n\n    \"\"\"\n    Transforms a function that takes a callback\n    into a lazy iterator (generator).\n\n    Adapted from: https://stackoverflow.com/a/9969000\n    \"\"\"\n\n    def __init__(self, func, args=None, kwargs=None, callback=None):\n        self.mfunc = func\n        self.c_callback = callback\n        self.q = Queue()\n        self.sentinel = object()\n        self.args = args or []\n        self.kwargs = kwargs or {}\n        self.stop_now = False\n\n        def _callback(val):\n            if self.stop_now or shared.stop_everything:\n                raise StopNowException\n            self.q.put(val)\n\n        def gentask():\n            try:\n                ret = self.mfunc(callback=_callback, *args, **self.kwargs)\n            except StopNowException:\n                pass\n            except Exception:\n                traceback.print_exc()\n                pass\n\n            self.q.put(self.sentinel)\n            if self.c_callback:\n                self.c_callback(ret)\n\n        self.thread = Thread(target=gentask)\n        self.thread.start()\n\n    def __iter__(self):\n        return self\n\n    def __next__(self):\n        obj = self.q.get(True, None)\n        if obj is self.sentinel:\n            raise StopIteration\n        else:\n            return obj\n\n    def __del__(self):\n        pass\n\n    def __enter__(self):\n        return self\n\n    def __exit__(self, exc_type, exc_val, exc_tb):\n        self.stop_now = True\n"
  },
  {
    "path": "modules/chat.py",
    "content": "import base64\nimport copy\nimport functools\nimport html\nimport json\nimport pprint\nimport re\nimport shutil\nimport threading\nimport time\nfrom datetime import datetime\nfrom functools import partial\nfrom pathlib import Path\n\nimport markupsafe\nimport yaml\nfrom jinja2.ext import loopcontrols\nfrom jinja2.sandbox import ImmutableSandboxedEnvironment\nfrom PIL import Image\n\nimport modules.shared as shared\nfrom modules import utils\nfrom modules.extensions import apply_extensions\nfrom modules.html_generator import (\n    chat_html_wrapper,\n    convert_to_markdown,\n    extract_thinking_block,\n    make_thumbnail\n)\nfrom modules.image_utils import open_image_safely\nfrom modules.logging_colors import logger\nfrom modules.reasoning import THINKING_FORMATS\nfrom modules.text_generation import (\n    generate_reply,\n    get_encoded_length,\n    get_max_prompt_length\n)\nfrom modules.utils import (\n    delete_file,\n    get_available_characters,\n    get_available_users,\n    sanitize_filename,\n    save_file\n)\nfrom modules.web_search import add_web_search_attachments\n\n_history_file_lock = threading.Lock()\n\n\ndef strftime_now(format):\n    return datetime.now().strftime(format)\n\n\ndef get_current_timestamp():\n    \"\"\"Returns the current time in 24-hour format\"\"\"\n    return datetime.now().strftime('%b %d, %Y %H:%M')\n\n\ndef update_message_metadata(metadata_dict, role, index, **fields):\n    \"\"\"\n    Updates or adds metadata fields for a specific message.\n\n    Args:\n        metadata_dict: The metadata dictionary\n        role: The role (user, assistant, etc)\n        index: The message index\n        **fields: Arbitrary metadata fields to update/add\n    \"\"\"\n    key = f\"{role}_{index}\"\n    if key not in metadata_dict:\n        metadata_dict[key] = {}\n\n    # Update with provided fields\n    for field_name, field_value in fields.items():\n        metadata_dict[key][field_name] = field_value\n\n\njinja_env = ImmutableSandboxedEnvironment(\n    trim_blocks=True,\n    lstrip_blocks=True,\n    extensions=[loopcontrols]\n)\n\n\ndef custom_tojson(value, indent=None, ensure_ascii=True):\n    return markupsafe.Markup(json.dumps(value, indent=indent, ensure_ascii=ensure_ascii))\n\n\njinja_env.filters[\"tojson\"] = custom_tojson\njinja_env.globals[\"strftime_now\"] = strftime_now\n\n\ndef _raise_exception(message):\n    raise ValueError(message)\n\n\njinja_env.globals[\"raise_exception\"] = _raise_exception\n\n_template_cache = {}\n\n\ndef get_compiled_template(template_str):\n    \"\"\"Cache compiled Jinja2 templates keyed by their source string.\"\"\"\n    compiled = _template_cache.get(template_str)\n    if compiled is None:\n        compiled = jinja_env.from_string(template_str)\n        _template_cache[template_str] = compiled\n\n    return compiled\n\n\ndef str_presenter(dumper, data):\n    \"\"\"\n    Copied from https://github.com/yaml/pyyaml/issues/240\n    Makes pyyaml output prettier multiline strings.\n    \"\"\"\n\n    if data.count('\\n') > 0:\n        return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|')\n\n    return dumper.represent_scalar('tag:yaml.org,2002:str', data)\n\n\nyaml.add_representer(str, str_presenter)\nyaml.representer.SafeRepresenter.add_representer(str, str_presenter)\n\n\nclass _JsonDict(dict):\n    \"\"\"A dict that serializes as JSON when used in string concatenation.\n\n    Some Jinja2 templates (Qwen, GLM) iterate arguments with .items(),\n    requiring a dict.  Others (DeepSeek) concatenate arguments as a\n    string, requiring JSON.  This class satisfies both.\n    \"\"\"\n\n    def __str__(self):\n        return json.dumps(self, ensure_ascii=False)\n\n    def __add__(self, other):\n        return str(self) + other\n\n    def __radd__(self, other):\n        return other + str(self)\n\n\ndef _deserialize_tool_call_arguments(tool_calls):\n    \"\"\"Convert tool_call arguments from JSON strings to _JsonDict.\n\n    The OpenAI API spec sends arguments as a JSON string, but Jinja2\n    templates may need a dict (.items()) or a string (concatenation).\n    _JsonDict handles both transparently.\n    \"\"\"\n    result = []\n    for tc in tool_calls:\n        tc = copy.copy(tc)\n        func = tc.get('function', {})\n        if isinstance(func, dict):\n            func = dict(func)\n            args = func.get('arguments')\n            if isinstance(args, str):\n                try:\n                    func['arguments'] = _JsonDict(json.loads(args))\n                except (json.JSONDecodeError, ValueError):\n                    pass\n            elif isinstance(args, dict) and not isinstance(args, _JsonDict):\n                func['arguments'] = _JsonDict(args)\n            tc['function'] = func\n        result.append(tc)\n    return result\n\n\ndef _expand_tool_sequence(tool_seq):\n    \"\"\"Expand a tool_sequence list into API messages.\n\n    Returns a list of dicts (role: assistant with tool_calls, or role: tool).\n    If any tool_call IDs are missing a matching tool result, a synthetic\n    empty result is inserted so the prompt is never malformed.\n    \"\"\"\n    messages = []\n    expected_ids = []\n    seen_ids = set()\n\n    for item in tool_seq:\n        if 'tool_calls' in item:\n            deserialized = _deserialize_tool_call_arguments(item['tool_calls'])\n            messages.append({\n                \"role\": \"assistant\",\n                \"content\": item.get('content', ''),\n                \"tool_calls\": deserialized\n            })\n            for tc in item['tool_calls']:\n                tc_id = tc.get('id', '')\n                if tc_id:\n                    expected_ids.append(tc_id)\n        elif item.get('role') == 'tool':\n            messages.append({\n                \"role\": \"tool\",\n                \"content\": item['content'],\n                \"tool_call_id\": item.get('tool_call_id', '')\n            })\n            seen_ids.add(item.get('tool_call_id', ''))\n\n    # Fill in synthetic results for any orphaned tool call IDs\n    for tc_id in expected_ids:\n        if tc_id not in seen_ids:\n            messages.append({\n                \"role\": \"tool\",\n                \"content\": \"\",\n                \"tool_call_id\": tc_id\n            })\n\n    return messages\n\n\ndef generate_chat_prompt(user_input, state, **kwargs):\n    impersonate = kwargs.get('impersonate', False)\n    _continue = kwargs.get('_continue', False)\n    also_return_rows = kwargs.get('also_return_rows', False)\n    history_data = kwargs.get('history', state['history'])\n    history = history_data['internal']\n    metadata = history_data.get('metadata', {})\n\n    # Templates\n    chat_template_str = state['chat_template_str']\n    if state['mode'] != 'instruct':\n        chat_template_str = replace_character_names(chat_template_str, state['name1'], state['name2'])\n\n    instruction_template = get_compiled_template(state['instruction_template_str'])\n    chat_template = get_compiled_template(chat_template_str)\n\n    instruct_renderer = partial(\n        instruction_template.render,\n        builtin_tools=None,\n        tools=state['tools'] if 'tools' in state else None,\n        tools_in_user_message=False,\n        add_generation_prompt=False,\n        enable_thinking=state['enable_thinking'],\n        reasoning_effort=state['reasoning_effort'],\n        thinking_budget=-1 if state.get('enable_thinking', True) else 0,\n        bos_token=shared.bos_token,\n        eos_token=shared.eos_token,\n    )\n\n    chat_renderer = partial(\n        chat_template.render,\n        add_generation_prompt=False,\n        name1=state['name1'],\n        name2=state['name2'],\n        user_bio=replace_character_names(state['user_bio'], state['name1'], state['name2']),\n        tools=state['tools'] if 'tools' in state else None,\n    )\n\n    messages = []\n\n    if state['mode'] == 'instruct':\n        renderer = instruct_renderer\n        if state['custom_system_message'].strip() != '':\n            messages.append({\"role\": \"system\", \"content\": state['custom_system_message']})\n    else:\n        renderer = chat_renderer\n        if state['context'].strip() != '' or state['user_bio'].strip() != '':\n            context = replace_character_names(state['context'], state['name1'], state['name2'])\n            messages.append({\"role\": \"system\", \"content\": context})\n\n    insert_pos = len(messages)\n    for i, entry in enumerate(reversed(history)):\n        user_msg = entry[0].strip()\n        assistant_msg = entry[1].strip()\n        tool_msg = entry[2].strip() if len(entry) > 2 else ''\n        entry_meta = entry[3] if len(entry) > 3 else {}\n\n        row_idx = len(history) - i - 1\n\n        if tool_msg:\n            tool_message = {\"role\": \"tool\", \"content\": tool_msg}\n            if \"tool_call_id\" in entry_meta:\n                tool_message[\"tool_call_id\"] = entry_meta[\"tool_call_id\"]\n            messages.insert(insert_pos, tool_message)\n\n        if not assistant_msg and entry_meta.get('tool_calls'):\n            # Assistant message with only tool_calls and no text content\n            messages.insert(insert_pos, {\"role\": \"assistant\", \"content\": \"\", \"tool_calls\": _deserialize_tool_call_arguments(entry_meta['tool_calls'])})\n        elif assistant_msg:\n            # Handle GPT-OSS as a special case\n            if '<|channel|>analysis<|message|>' in assistant_msg or '<|channel|>final<|message|>' in assistant_msg:\n                thinking_content = \"\"\n                final_content = \"\"\n\n                # Extract analysis content if present\n                if '<|channel|>analysis<|message|>' in assistant_msg:\n                    parts = assistant_msg.split('<|channel|>analysis<|message|>', 1)\n                    if len(parts) > 1:\n                        # The content is everything after the tag\n                        potential_content = parts[1]\n\n                        # Now, find the end of this content block\n                        analysis_end_tag = '<|end|>'\n                        if analysis_end_tag in potential_content:\n                            thinking_content = potential_content.split(analysis_end_tag, 1)[0].strip()\n                        else:\n                            # Fallback: if no <|end|> tag, stop at the start of the final channel if it exists\n                            final_channel_tag = '<|channel|>final<|message|>'\n                            if final_channel_tag in potential_content:\n                                thinking_content = potential_content.split(final_channel_tag, 1)[0].strip()\n                            else:\n                                thinking_content = potential_content.strip()\n\n                # Extract final content if present\n                final_tag_to_find = '<|channel|>final<|message|>'\n                if final_tag_to_find in assistant_msg:\n                    parts = assistant_msg.split(final_tag_to_find, 1)\n                    if len(parts) > 1:\n                        # The content is everything after the tag\n                        potential_content = parts[1]\n\n                        # Now, find the end of this content block\n                        final_end_tag = '<|end|>'\n                        if final_end_tag in potential_content:\n                            final_content = potential_content.split(final_end_tag, 1)[0].strip()\n                        else:\n                            final_content = potential_content.strip()\n\n                # Insert as structured message\n                msg_dict = {\"role\": \"assistant\", \"content\": final_content}\n                if '<|channel|>analysis<|message|>' in assistant_msg:\n                    msg_dict[\"thinking\"] = thinking_content\n\n                messages.insert(insert_pos, msg_dict)\n\n            # Handle Seed-OSS\n            elif '<seed:think>' in assistant_msg:\n                thinking_content = \"\"\n                final_content = assistant_msg\n\n                # Extract thinking content if present\n                if '<seed:think>' in assistant_msg:\n                    parts = assistant_msg.split('<seed:think>', 1)\n                    if len(parts) > 1:\n                        potential_content = parts[1]\n                        if '</seed:think>' in potential_content:\n                            thinking_content = potential_content.split('</seed:think>', 1)[0].strip()\n                            final_content = parts[0] + potential_content.split('</seed:think>', 1)[1]\n                        else:\n                            thinking_content = potential_content.strip()\n                            final_content = parts[0]\n\n                # Insert as structured message\n                msg_dict = {\"role\": \"assistant\", \"content\": final_content.strip()}\n                if thinking_content:\n                    msg_dict[\"reasoning_content\"] = thinking_content\n\n                messages.insert(insert_pos, msg_dict)\n\n            else:\n                # Default case (used by all other models)\n                messages.insert(insert_pos, {\"role\": \"assistant\", \"content\": assistant_msg})\n\n            # Attach tool_calls metadata to the assistant message if present\n            if entry_meta.get('tool_calls') and messages[insert_pos].get('role') == 'assistant':\n                messages[insert_pos]['tool_calls'] = _deserialize_tool_call_arguments(entry_meta['tool_calls'])\n\n        # Expand tool_sequence from metadata (inserted AFTER assistant so that\n        # the final order is: user → tool_calls → tool_results → final_answer)\n        meta_key = f\"assistant_{row_idx}\"\n        tool_seq = metadata.get(meta_key, {}).get('tool_sequence', [])\n        if tool_seq:\n            for msg in reversed(_expand_tool_sequence(tool_seq)):\n                messages.insert(insert_pos, msg)\n\n        if entry_meta.get('role') == 'system':\n            if user_msg:\n                messages.insert(insert_pos, {\"role\": \"system\", \"content\": user_msg})\n        elif user_msg not in ['', '<|BEGIN-VISIBLE-CHAT|>']:\n            # Check for user message attachments in metadata\n            user_key = f\"user_{row_idx}\"\n            enhanced_user_msg = user_msg\n\n            # Add attachment content if present AND if past attachments are enabled\n            if user_key in metadata and \"attachments\" in metadata[user_key]:\n                attachments_text = \"\"\n                image_refs = \"\"\n\n                for attachment in metadata[user_key][\"attachments\"]:\n                    if attachment.get(\"type\") == \"image\":\n                        # Add image reference for multimodal models\n                        image_refs += \"<__media__>\"\n                    elif state.get('include_past_attachments', True):\n                        # Handle text/PDF attachments\n                        filename = attachment.get(\"name\", \"file\")\n                        content = attachment.get(\"content\", \"\")\n                        if attachment.get(\"type\") == \"text/html\" and attachment.get(\"url\"):\n                            attachments_text += f\"\\nName: {filename}\\nURL: {attachment['url']}\\nContents:\\n\\n=====\\n{content}\\n=====\\n\\n\"\n                        else:\n                            attachments_text += f\"\\nName: {filename}\\nContents:\\n\\n=====\\n{content}\\n=====\\n\\n\"\n\n                if image_refs:\n                    enhanced_user_msg = f\"{image_refs}\\n\\n{enhanced_user_msg}\"\n                if attachments_text:\n                    enhanced_user_msg += f\"\\n\\nATTACHMENTS:\\n{attachments_text}\"\n\n            messages.insert(insert_pos, {\"role\": \"user\", \"content\": enhanced_user_msg})\n\n    # Handle the current user input\n    user_input = user_input.strip()\n\n    # Check if we have attachments\n    if not (impersonate or _continue):\n        has_attachments = False\n        if len(history_data.get('metadata', {})) > 0:\n            current_row_idx = len(history)\n            user_key = f\"user_{current_row_idx}\"\n            has_attachments = user_key in metadata and \"attachments\" in metadata[user_key]\n\n        if user_input or has_attachments:\n            # For the current user input being processed, check if we need to add attachments\n            if len(history_data.get('metadata', {})) > 0:\n                current_row_idx = len(history)\n                user_key = f\"user_{current_row_idx}\"\n\n                if user_key in metadata and \"attachments\" in metadata[user_key]:\n                    attachments_text = \"\"\n                    image_refs = \"\"\n\n                    for attachment in metadata[user_key][\"attachments\"]:\n                        if attachment.get(\"type\") == \"image\":\n                            image_refs += \"<__media__>\"\n                        else:\n                            filename = attachment.get(\"name\", \"file\")\n                            content = attachment.get(\"content\", \"\")\n                            if attachment.get(\"type\") == \"text/html\" and attachment.get(\"url\"):\n                                attachments_text += f\"\\nName: {filename}\\nURL: {attachment['url']}\\nContents:\\n\\n=====\\n{content}\\n=====\\n\\n\"\n                            else:\n                                attachments_text += f\"\\nName: {filename}\\nContents:\\n\\n=====\\n{content}\\n=====\\n\\n\"\n\n                    if image_refs:\n                        user_input = f\"{image_refs}\\n\\n{user_input}\"\n                    if attachments_text:\n                        user_input += f\"\\n\\nATTACHMENTS:\\n{attachments_text}\"\n\n            messages.append({\"role\": \"user\", \"content\": user_input})\n\n        # Expand tool_sequence for the current entry (excluded from the\n        # history loop during regenerate — needed so the model sees prior\n        # tool calls and results when re-generating the final answer).\n        current_tool_seq = metadata.get(f\"assistant_{len(history)}\", {}).get('tool_sequence', [])\n        messages.extend(_expand_tool_sequence(current_tool_seq))\n\n    if impersonate and state['mode'] != 'chat-instruct':\n        messages.append({\"role\": \"user\", \"content\": \"fake user message replace me\"})\n\n    def make_prompt(messages):\n        last_message = messages[-1].copy()\n        if _continue:\n            if state['mode'] == 'chat-instruct':\n                messages = messages[:-1]\n            else:\n                messages[-1][\"content\"] = \"fake assistant message replace me\"\n                messages.append({\"role\": \"assistant\", \"content\": \"this will get deleted\"})\n\n        if state['mode'] != 'chat-instruct':\n            add_generation_prompt = (not _continue and not impersonate)\n        else:\n            add_generation_prompt = False\n\n        prompt = renderer(\n            messages=messages,\n            add_generation_prompt=add_generation_prompt\n        )\n\n        if state['mode'] == 'chat-instruct':\n            command = state['chat-instruct_command']\n            command = command.replace('<|character|>', state['name2'] if not impersonate else state['name1'])\n            command = command.replace('<|prompt|>', prompt)\n            command = replace_character_names(command, state['name1'], state['name2'])\n\n            outer_messages = []\n            if state['custom_system_message'].strip() != '':\n                outer_messages.append({\"role\": \"system\", \"content\": state['custom_system_message']})\n\n            outer_messages.append({\"role\": \"user\", \"content\": command})\n            if _continue:\n                outer_messages.append(last_message.copy())\n                outer_messages[-1][\"content\"] = \"fake assistant message replace me\"\n                outer_messages.append({\"role\": \"assistant\", \"content\": \"this will get deleted\"})\n\n            prompt = instruct_renderer(\n                messages=outer_messages,\n                add_generation_prompt=not _continue\n            )\n\n        if _continue:\n            prompt = prompt.split(\"fake assistant message replace me\", 1)[0]\n\n            content = last_message.get(\"content\", \"\")\n            partial_thought = last_message.get(\"thinking\", \"\") or last_message.get(\"reasoning_content\", \"\")\n\n            # Handle partial thinking blocks (GPT-OSS and Seed-OSS)\n            if not content and partial_thought and partial_thought.strip():\n                search_string = partial_thought.strip()\n                index = prompt.rfind(search_string)\n                if index != -1:\n                    prompt = prompt[:index] + partial_thought\n                else:\n                    # Fallback if search fails: just append the thought\n                    prompt += partial_thought\n            else:\n                # All other cases\n                prompt += content\n\n        if impersonate:\n            prompt = prompt.split(\"fake user message replace me\", 1)[0]\n            prompt += user_input\n\n        if state['mode'] in ['chat', 'chat-instruct'] and not impersonate and not _continue:\n            prompt += apply_extensions('bot_prefix', \"\", state)\n\n        return prompt\n\n    prompt = make_prompt(messages)\n\n    # Handle truncation\n    if shared.tokenizer is not None:\n        max_length = get_max_prompt_length(state)\n        encoded_length = get_encoded_length(prompt)\n        while len(messages) > 0 and encoded_length > max_length:\n\n            # Remove old message, save system message\n            if len(messages) > 2 and messages[0]['role'] == 'system':\n                messages.pop(1)\n\n            # Remove old message when no system message is present\n            elif len(messages) > 1 and messages[0]['role'] != 'system':\n                messages.pop(0)\n\n            # Resort to truncating the user input\n            else:\n                user_message = messages[-1]['content']\n\n                # Bisect the truncation point\n                left, right = 0, len(user_message)\n\n                while left < right:\n                    mid = (left + right + 1) // 2\n\n                    messages[-1]['content'] = user_message[:mid]\n                    prompt = make_prompt(messages)\n                    encoded_length = get_encoded_length(prompt)\n\n                    if encoded_length <= max_length:\n                        left = mid\n                    else:\n                        right = mid - 1\n\n                messages[-1]['content'] = user_message[:left]\n                prompt = make_prompt(messages)\n                encoded_length = get_encoded_length(prompt)\n                if encoded_length > max_length:\n                    logger.error(f\"Failed to build the chat prompt. The input is too long for the available context length.\\n\\nTruncation length: {state['truncation_length']}\\nmax_new_tokens: {state['max_new_tokens']} (is it too high?)\\nAvailable context length: {max_length}\\n\")\n                    raise ValueError\n                else:\n                    # Calculate token counts for the log message\n                    original_user_tokens = get_encoded_length(user_message)\n                    truncated_user_tokens = get_encoded_length(user_message[:left])\n                    total_context = max_length + state['max_new_tokens']\n\n                    logger.warning(\n                        f\"User message truncated from {original_user_tokens} to {truncated_user_tokens} tokens. \"\n                        f\"Context full: {max_length} input tokens ({total_context} total, {state['max_new_tokens']} for output). \"\n                        f\"Increase ctx-size while loading the model to avoid truncation.\"\n                    )\n\n                    break\n\n            prompt = make_prompt(messages)\n            encoded_length = get_encoded_length(prompt)\n\n    if also_return_rows:\n        return prompt, [message['content'] for message in messages]\n    else:\n        return prompt\n\n\ndef count_prompt_tokens(text_input, state):\n    \"\"\"Count tokens for current history + input including attachments\"\"\"\n    if shared.tokenizer is None:\n        return \"Tokenizer not available\"\n\n    try:\n        # Handle dict format with text and files\n        files = []\n        if isinstance(text_input, dict):\n            files = text_input.get('files', [])\n            text = text_input.get('text', '')\n        else:\n            text = text_input\n            files = []\n\n        # Create temporary history copy to add attachments\n        temp_history = copy.deepcopy(state['history'])\n        if 'metadata' not in temp_history:\n            temp_history['metadata'] = {}\n\n        # Process attachments if any\n        if files:\n            row_idx = len(temp_history['internal'])\n            for file_path in files:\n                add_message_attachment(temp_history, row_idx, file_path, is_user=True)\n\n        # Create temp state with modified history\n        temp_state = copy.deepcopy(state)\n        temp_state['history'] = temp_history\n\n        # Build prompt using existing logic\n        prompt = generate_chat_prompt(text, temp_state)\n        current_tokens = get_encoded_length(prompt)\n        max_tokens = temp_state['truncation_length']\n\n        percentage = (current_tokens / max_tokens) * 100 if max_tokens > 0 else 0\n\n        return f\"History + Input:<br/>{current_tokens:,} / {max_tokens:,} tokens ({percentage:.1f}%)\"\n\n    except Exception as e:\n        logger.error(f\"Error counting tokens: {e}\")\n        return f\"Error: {str(e)}\"\n\n\ndef get_stopping_strings(state):\n    stopping_strings = []\n    renderers = []\n\n    if state['mode'] in ['instruct', 'chat-instruct']:\n        template = get_compiled_template(state['instruction_template_str'])\n        renderer = partial(template.render, add_generation_prompt=False, bos_token=shared.bos_token, eos_token=shared.eos_token)\n        renderers.append(renderer)\n\n    if state['mode'] in ['chat']:\n        template = get_compiled_template(state['chat_template_str'])\n        renderer = partial(template.render, add_generation_prompt=False, name1=state['name1'], name2=state['name2'])\n        renderers.append(renderer)\n\n    fake_messages = [\n        {\"role\": \"user\", \"content\": \"first user message\"},\n        {\"role\": \"assistant\", \"content\": \"first assistant message\"},\n        {\"role\": \"user\", \"content\": \"second user message\"},\n        {\"role\": \"assistant\", \"content\": \"second assistant message\"},\n    ]\n\n    stopping_strings = []\n    for renderer in renderers:\n        prompt = renderer(messages=fake_messages)\n\n        # Find positions of each message content\n        first_user_end = prompt.find(\"first user message\") + len(\"first user message\")\n        first_assistant_start = prompt.find(\"first assistant message\")\n        first_assistant_end = prompt.find(\"first assistant message\") + len(\"first assistant message\")\n        second_user_start = prompt.find(\"second user message\")\n        second_assistant_end = prompt.find(\"second assistant message\") + len(\"second assistant message\")\n\n        # Extract pieces of text potentially containing unique stopping strings\n        texts = [\n            prompt[first_user_end:first_assistant_start],\n            prompt[first_assistant_end:second_user_start],\n            prompt[second_assistant_end:]\n        ]\n\n        for text in texts:\n            stripped_text = text.strip()\n            if stripped_text.startswith(\"<\") and \">\" in stripped_text:\n                stopping_strings.append(stripped_text.split(\">\")[0] + \">\")\n            elif stripped_text.startswith(\"[\") and \"]\" in stripped_text:\n                stopping_strings.append(stripped_text.split(\"]\")[0] + \"]\")\n            elif stripped_text.startswith(\"(\") and \")\" in stripped_text:\n                stopping_strings.append(stripped_text.split(\")\")[0] + \")\")\n            elif stripped_text.startswith(\"{\") and \"}\" in stripped_text:\n                stopping_strings.append(stripped_text.split(\"}\")[0] + \"}\")\n            elif \":\" in text:\n                stopping_strings.append(text.split(\":\")[0] + \":\")\n\n    if 'stopping_strings' in state and isinstance(state['stopping_strings'], list):\n        stopping_strings += state.pop('stopping_strings')\n\n    # Remove redundant items that start with another item\n    result = [item for item in stopping_strings if not any(item.startswith(other) and item != other for other in stopping_strings)]\n    result = list(set(result))\n\n    # Handle GPT-OSS as a special case\n    if '<|channel|>final<|message|>' in state['instruction_template_str'] and \"<|end|>\" in result:\n        result.remove(\"<|end|>\")\n        result.append(\"<|result|>\")\n        result = list(set(result))\n\n    if shared.args.verbose:\n        logger.info(\"STOPPING_STRINGS=\")\n        pprint.PrettyPrinter(indent=4, sort_dicts=False).pprint(result)\n        print()\n\n    return result\n\n\ndef add_message_version(history, role, row_idx, is_current=True):\n    key = f\"{role}_{row_idx}\"\n    if 'metadata' not in history:\n        history['metadata'] = {}\n    if key not in history['metadata']:\n        history['metadata'][key] = {}\n\n    if \"versions\" not in history['metadata'][key]:\n        history['metadata'][key][\"versions\"] = []\n\n    # Determine which index to use for content based on role\n    content_idx = 0 if role == 'user' else 1\n    current_content = history['internal'][row_idx][content_idx]\n    current_visible = history['visible'][row_idx][content_idx]\n\n    history['metadata'][key][\"versions\"].append({\n        \"content\": current_content,\n        \"visible_content\": current_visible,\n        \"timestamp\": get_current_timestamp()\n    })\n\n    if is_current:\n        # Set the current_version_index to the newly added version (which is now the last one).\n        history['metadata'][key][\"current_version_index\"] = len(history['metadata'][key][\"versions\"]) - 1\n\n\ndef add_message_attachment(history, row_idx, file_path, is_user=True):\n    \"\"\"Add a file attachment to a message in history metadata\"\"\"\n    if 'metadata' not in history:\n        history['metadata'] = {}\n\n    key = f\"{'user' if is_user else 'assistant'}_{row_idx}\"\n\n    if key not in history['metadata']:\n        history['metadata'][key] = {\"timestamp\": get_current_timestamp()}\n    if \"attachments\" not in history['metadata'][key]:\n        history['metadata'][key][\"attachments\"] = []\n\n    # Get file info using pathlib\n    path = Path(file_path)\n    filename = path.name\n    file_extension = path.suffix.lower()\n\n    try:\n        # Handle image files\n        if file_extension in ['.jpg', '.jpeg', '.png', '.webp', '.bmp', '.gif']:\n            # Convert image to base64\n            with open(path, 'rb') as f:\n                image_data = base64.b64encode(f.read()).decode('utf-8')\n\n            # Determine MIME type from extension\n            mime_type_map = {\n                '.jpg': 'image/jpeg',\n                '.jpeg': 'image/jpeg',\n                '.png': 'image/png',\n                '.webp': 'image/webp',\n                '.bmp': 'image/bmp',\n                '.gif': 'image/gif'\n            }\n            mime_type = mime_type_map.get(file_extension, 'image/jpeg')\n\n            # Format as data URL\n            data_url = f\"data:{mime_type};base64,{image_data}\"\n\n            # Generate unique image ID\n            image_id = len([att for att in history['metadata'][key][\"attachments\"] if att.get(\"type\") == \"image\"]) + 1\n\n            attachment = {\n                \"name\": filename,\n                \"type\": \"image\",\n                \"image_data\": data_url,\n                \"image_id\": image_id,\n            }\n        elif file_extension == '.pdf':\n            # Process PDF file\n            content = extract_pdf_text(path)\n            attachment = {\n                \"name\": filename,\n                \"type\": \"application/pdf\",\n                \"content\": content,\n            }\n        elif file_extension == '.docx':\n            content = extract_docx_text(path)\n            attachment = {\n                \"name\": filename,\n                \"type\": \"application/docx\",\n                \"content\": content,\n            }\n        else:\n            # Default handling for text files\n            with open(path, 'r', encoding='utf-8') as f:\n                content = f.read()\n\n            attachment = {\n                \"name\": filename,\n                \"type\": \"text/plain\",\n                \"content\": content,\n            }\n\n        history['metadata'][key][\"attachments\"].append(attachment)\n        return attachment  # Return the attachment for reuse\n    except Exception as e:\n        logger.error(f\"Error processing attachment {filename}: {e}\")\n        return None\n\n\ndef extract_pdf_text(pdf_path):\n    \"\"\"Extract text from a PDF file\"\"\"\n    import pymupdf\n\n    text = \"\"\n    try:\n        with pymupdf.open(pdf_path) as doc:\n            for page in doc:\n                text += page.get_text() + \"\\n\\n\"\n\n        return text.strip()\n    except Exception as e:\n        logger.error(f\"Error extracting text from PDF: {e}\")\n        return f\"[Error extracting PDF text: {str(e)}]\"\n\n\ndef extract_docx_text(docx_path):\n    \"\"\"\n    Extract text from a .docx file, including headers,\n    body (paragraphs and tables), and footers.\n    \"\"\"\n    try:\n        import docx\n\n        doc = docx.Document(docx_path)\n        parts = []\n\n        # 1) Extract non-empty header paragraphs from each section\n        for section in doc.sections:\n            for para in section.header.paragraphs:\n                text = para.text.strip()\n                if text:\n                    parts.append(text)\n\n        # 2) Extract body blocks (paragraphs and tables) in document order\n        parent_elm = doc.element.body\n        for child in parent_elm.iterchildren():\n            if isinstance(child, docx.oxml.text.paragraph.CT_P):\n                para = docx.text.paragraph.Paragraph(child, doc)\n                text = para.text.strip()\n                if text:\n                    parts.append(text)\n\n            elif isinstance(child, docx.oxml.table.CT_Tbl):\n                table = docx.table.Table(child, doc)\n                for row in table.rows:\n                    cells = [cell.text.strip() for cell in row.cells]\n                    parts.append(\"\\t\".join(cells))\n\n        # 3) Extract non-empty footer paragraphs from each section\n        for section in doc.sections:\n            for para in section.footer.paragraphs:\n                text = para.text.strip()\n                if text:\n                    parts.append(text)\n\n        return \"\\n\".join(parts)\n\n    except Exception as e:\n        logger.error(f\"Error extracting text from DOCX: {e}\")\n        return f\"[Error extracting DOCX text: {str(e)}]\"\n\n\ndef generate_search_query(user_message, state):\n    \"\"\"Generate a search query from user message using the LLM\"\"\"\n    # Augment the user message with search instruction\n    augmented_message = f\"{user_message}\\n\\n=====\\n\\nPlease turn the message above into a short web search query in the same language as the message. Respond with only the search query, nothing else.\"\n\n    # Use a minimal state for search query generation but keep the full history\n    search_state = state.copy()\n    search_state['auto_max_new_tokens'] = True\n    search_state['enable_thinking'] = False\n    search_state['reasoning_effort'] = 'low'\n    search_state['start_with'] = \"\"\n\n    # Generate the full prompt using existing history + augmented message\n    formatted_prompt = generate_chat_prompt(augmented_message, search_state)\n\n    query = \"\"\n    for reply in generate_reply(formatted_prompt, search_state, stopping_strings=[], is_chat=True):\n        query = reply\n\n    # Check for thinking block delimiters and extract content after them\n    if \"</think>\" in query:\n        query = query.rsplit(\"</think>\", 1)[1]\n    elif \"<|start|>assistant<|channel|>final<|message|>\" in query:\n        query = query.rsplit(\"<|start|>assistant<|channel|>final<|message|>\", 1)[1]\n    elif \"<|channel|>final<|message|>\" in query:\n        query = query.rsplit(\"<|channel|>final<|message|>\", 1)[1]\n    elif \"</seed:think>\" in query:\n        query = query.rsplit(\"</seed:think>\", 1)[1]\n\n    # Strip and remove surrounding quotes if present\n    query = query.strip()\n    if len(query) >= 2 and query.startswith('\"') and query.endswith('\"'):\n        query = query[1:-1]\n\n    return query\n\n\ndef chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_message=True, for_ui=False):\n    # Handle dict format with text and files\n    files = []\n    if isinstance(text, dict):\n        files = text.get('files', [])\n        text = text.get('text', '')\n\n    history = state['history']\n    output = copy.deepcopy(history)\n    output = apply_extensions('history', output)\n    state = apply_extensions('state', state)\n\n    # Handle GPT-OSS as a special case\n    if '<|channel|>final<|message|>' in state['instruction_template_str']:\n        state['skip_special_tokens'] = False\n\n    # Let the jinja2 template handle the BOS token\n    if state['mode'] in ['instruct', 'chat-instruct']:\n        state['add_bos_token'] = False\n\n    # Initialize metadata if not present\n    if 'metadata' not in output:\n        output['metadata'] = {}\n\n    visible_text = None\n    stopping_strings = get_stopping_strings(state)\n    is_stream = state['stream']\n\n    # Prepare the input\n    if not (regenerate or _continue):\n        visible_text = html.escape(text)\n\n        # Process file attachments and store in metadata\n        row_idx = len(output['internal'])\n\n        # Add attachments to metadata only, not modifying the message text\n        for file_path in files:\n            add_message_attachment(output, row_idx, file_path, is_user=True)\n\n        # Add web search results as attachments if enabled\n        if state.get('enable_web_search', False):\n            search_query = generate_search_query(text, state)\n            add_web_search_attachments(output, row_idx, text, search_query, state)\n\n        # Apply extensions\n        text, visible_text = apply_extensions('chat_input', text, visible_text, state)\n        text = apply_extensions('input', text, state, is_chat=True)\n\n        # Current row index\n        output['internal'].append([text, ''])\n        output['visible'].append([visible_text, ''])\n        # Add metadata with timestamp\n        update_message_metadata(output['metadata'], \"user\", row_idx, timestamp=get_current_timestamp())\n\n        # *Is typing...*\n        if loading_message:\n            yield {\n                'visible': output['visible'][:-1] + [[output['visible'][-1][0], shared.processing_message]],\n                'internal': output['internal'],\n                'metadata': output['metadata']\n            }\n    else:\n        text, visible_text = output['internal'][-1][0], output['visible'][-1][0]\n        if regenerate and not state.get('_tool_turn'):\n            row_idx = len(output['internal']) - 1\n\n            # Store the old response as a version before regenerating\n            if not output['metadata'].get(f\"assistant_{row_idx}\", {}).get('versions'):\n                add_message_version(output, \"assistant\", row_idx, is_current=False)\n\n            # Add new empty version (will be filled during streaming)\n            key = f\"assistant_{row_idx}\"\n            output['metadata'][key][\"versions\"].append({\n                \"content\": \"\",\n                \"visible_content\": \"\",\n                \"timestamp\": get_current_timestamp()\n            })\n            output['metadata'][key][\"current_version_index\"] = len(output['metadata'][key][\"versions\"]) - 1\n\n            if loading_message:\n                yield {\n                    'visible': output['visible'][:-1] + [[visible_text, shared.processing_message]],\n                    'internal': output['internal'][:-1] + [[text, '']],\n                    'metadata': output['metadata']\n                }\n        elif _continue:\n            last_reply = [output['internal'][-1][1], output['visible'][-1][1]]\n            if loading_message:\n                yield {\n                    'visible': output['visible'][:-1] + [[visible_text, last_reply[1] + '...']],\n                    'internal': output['internal'],\n                    'metadata': output['metadata']\n                }\n\n    row_idx = len(output['internal']) - 1\n\n    # Collect image attachments for multimodal generation from the entire history\n    all_image_attachments = []\n    if 'metadata' in output:\n        for i in range(len(output['internal'])):\n            user_key = f\"user_{i}\"\n            if user_key in output['metadata'] and \"attachments\" in output['metadata'][user_key]:\n                for attachment in output['metadata'][user_key][\"attachments\"]:\n                    if attachment.get(\"type\") == \"image\":\n                        all_image_attachments.append(attachment)\n\n    # Add all collected image attachments to state for the generation\n    if all_image_attachments:\n        state['image_attachments'] = all_image_attachments\n\n    # Generate the prompt\n    kwargs = {\n        '_continue': _continue,\n        'history': output if _continue else {\n            k: (v[:-1] if k in ['internal', 'visible'] else v)\n            for k, v in output.items()\n        }\n    }\n\n    prompt = apply_extensions('custom_generate_chat_prompt', text, state, **kwargs)\n    if prompt is None:\n        prompt = generate_chat_prompt(text, state, **kwargs)\n\n    # Add timestamp for assistant's response at the start of generation\n    update_message_metadata(output['metadata'], \"assistant\", row_idx, timestamp=get_current_timestamp(), model_name=shared.model_name)\n\n    # Detect if the template appended a thinking start tag to the prompt\n    thinking_prefix = None\n    if not _continue:\n        stripped_prompt = prompt.rstrip('\\n')\n        for start_tag, end_tag, content_tag in THINKING_FORMATS:\n            if start_tag is not None and stripped_prompt.endswith(start_tag):\n                thinking_prefix = start_tag\n                break\n\n    # When tools are active, buffer streaming output during potential tool\n    # call generation to prevent raw markup from leaking into the display.\n    _check_tool_markers = bool(state.get('tools'))\n    _last_visible_before_tool_buffer = None\n    if _check_tool_markers:\n        from modules.tool_parsing import streaming_tool_buffer_check, detect_tool_call_format\n        _tool_names = [t['function']['name'] for t in state['tools'] if 'function' in t and 'name' in t['function']]\n        _template_str = state.get('instruction_template_str', '') if state.get('mode') == 'instruct' else state.get('chat_template_str', '')\n        _, _streaming_markers, _check_bare_names = detect_tool_call_format(_template_str)\n\n    # Generate\n    reply = None\n    for j, reply in enumerate(generate_reply(prompt, state, stopping_strings=stopping_strings, is_chat=True, for_ui=for_ui)):\n\n        # Prepend thinking tag if the template appended it to the prompt\n        if thinking_prefix:\n            reply = thinking_prefix + reply\n\n        # Extract the reply\n        if state['mode'] in ['chat', 'chat-instruct']:\n            if not _continue:\n                reply = reply.lstrip()\n\n            if reply.startswith(state['name2'] + ':'):\n                reply = reply[len(state['name2'] + ':'):]\n            elif reply.startswith(state['name1'] + ':'):\n                reply = reply[len(state['name1'] + ':'):]\n\n            visible_reply = re.sub(\"(<USER>|<user>|{{user}})\", state['name1'], reply)\n        else:\n            visible_reply = reply\n\n        visible_reply = html.escape(visible_reply)\n\n        if shared.stop_everything:\n            if not state.get('_skip_output_extensions'):\n                output['visible'][-1][1] = apply_extensions('output', output['visible'][-1][1], state, is_chat=True)\n\n            yield output\n            return\n\n        if _continue:\n            output['internal'][-1] = [text, last_reply[0] + reply]\n            output['visible'][-1] = [visible_text, last_reply[1] + visible_reply]\n        elif not (j == 0 and visible_reply.strip() == ''):\n            output['internal'][-1] = [text, reply.lstrip(' ')]\n            output['visible'][-1] = [visible_text, visible_reply.lstrip(' ')]\n\n        # Keep version metadata in sync during streaming (for regeneration)\n        if regenerate and not state.get('_tool_turn'):\n            row_idx = len(output['internal']) - 1\n            key = f\"assistant_{row_idx}\"\n            current_idx = output['metadata'][key]['current_version_index']\n            output['metadata'][key]['versions'][current_idx].update({\n                'content': output['internal'][row_idx][1],\n                'visible_content': output['visible'][row_idx][1]\n            })\n\n        if is_stream:\n            if _check_tool_markers:\n                if streaming_tool_buffer_check(output['internal'][-1][1], markers=_streaming_markers, tool_names=_tool_names, check_bare_names=_check_bare_names):\n                    continue\n                _last_visible_before_tool_buffer = output['visible'][-1][1]\n\n            yield output\n\n    if _continue:\n        # Reprocess the entire internal text for extensions (like translation).\n        # Skip entirely when the visible text contains <tool_call> markers,\n        # since those only exist in visible (internal is cleared after each tool\n        # execution) and rebuilding from internal would destroy them. Output\n        # extensions also can't handle the raw <tool_call> markup safely.\n        if '<tool_call>' not in output['visible'][-1][1]:\n            full_internal = output['internal'][-1][1]\n            if state['mode'] in ['chat', 'chat-instruct']:\n                full_visible = re.sub(\"(<USER>|<user>|{{user}})\", state['name1'], full_internal)\n            else:\n                full_visible = full_internal\n\n            full_visible = html.escape(full_visible)\n            if not state.get('_skip_output_extensions'):\n                output['visible'][-1][1] = apply_extensions('output', full_visible, state, is_chat=True)\n    else:\n        if not state.get('_skip_output_extensions'):\n            output['visible'][-1][1] = apply_extensions('output', output['visible'][-1][1], state, is_chat=True)\n\n    # Final sync for version metadata (in case streaming was disabled)\n    if regenerate and not state.get('_tool_turn'):\n        row_idx = len(output['internal']) - 1\n        key = f\"assistant_{row_idx}\"\n        current_idx = output['metadata'][key]['current_version_index']\n        output['metadata'][key]['versions'][current_idx].update({\n            'content': output['internal'][row_idx][1],\n            'visible_content': output['visible'][row_idx][1]\n        })\n\n    # When tool markers were detected during streaming, restore the last\n    # visible text from before buffering started so raw markup doesn't flash\n    # in the UI.  The internal text is left intact so the caller can still\n    # parse tool calls from it.\n    if is_stream and _check_tool_markers and streaming_tool_buffer_check(output['internal'][-1][1], markers=_streaming_markers, tool_names=_tool_names, check_bare_names=_check_bare_names):\n        output['visible'][-1][1] = _last_visible_before_tool_buffer or ''\n\n    yield output\n\n\ndef impersonate_wrapper(textbox, state):\n    text = textbox['text']\n    static_output = chat_html_wrapper(state['history'], state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])\n\n    prompt = generate_chat_prompt('', state, impersonate=True)\n    stopping_strings = get_stopping_strings(state)\n\n    textbox['text'] = text + '...'\n    yield textbox, static_output\n    reply = None\n    for reply in generate_reply(prompt + text, state, stopping_strings=stopping_strings, is_chat=True):\n        textbox['text'] = (text + reply).lstrip(' ')\n        yield textbox, static_output\n        if shared.stop_everything:\n            return\n\n\ndef generate_chat_reply(text, state, regenerate=False, _continue=False, loading_message=True, for_ui=False):\n    history = state['history']\n    if regenerate or _continue:\n        text = ''\n        if (len(history['visible']) == 1 and not history['visible'][0][0]) or len(history['internal']) == 0:\n            yield history\n            return\n\n    for history in chatbot_wrapper(text, state, regenerate=regenerate, _continue=_continue, loading_message=loading_message, for_ui=for_ui):\n        yield history\n\n\ndef character_is_loaded(state, raise_exception=False):\n    if state['mode'] in ['chat', 'chat-instruct'] and state['name2'] == '':\n        logger.error('It looks like no character is loaded. Please load one under Parameters > Character.')\n        if raise_exception:\n            raise ValueError\n\n        return False\n    else:\n        return True\n\n\ndef generate_chat_reply_wrapper(text, state, regenerate=False, _continue=False):\n    '''\n    Same as above but returns HTML for the UI.\n    When tools are selected, wraps generation in a loop that detects\n    tool calls, executes them, and re-generates until the model stops.\n    All tool output is consolidated into a single visible chat bubble\n    using metadata['assistant_N']['tool_sequence'].\n    '''\n\n    if not character_is_loaded(state):\n        return\n\n    if state['start_with'] != '' and not _continue:\n        if regenerate:\n            text, state['history'] = remove_last_message(state['history'])\n            regenerate = False\n\n        _continue = True\n        send_dummy_message(text, state)\n        send_dummy_reply(state['start_with'], state)\n\n    # On regenerate, clear old tool_sequence metadata so it gets rebuilt.\n    # Save it first so it can be stored per-version below.\n    # This must happen after the start_with logic above, which may remove\n    # and re-add messages, changing which row we operate on.\n    _old_tool_sequence = None\n    if regenerate:\n        history = state['history']\n        meta = history.get('metadata', {})\n        row_idx = len(history['internal']) - 1\n        if row_idx >= 0:\n            _old_tool_sequence = meta.get(f'assistant_{row_idx}', {}).pop('tool_sequence', None)\n\n    # Load tools if any are selected\n    selected = state.get('selected_tools', [])\n    parse_tool_call = None\n    _tool_parsers = None\n    if selected:\n        from modules.tool_use import load_tools, execute_tool\n        from modules.tool_parsing import parse_tool_call, get_tool_call_id, detect_tool_call_format\n\n    if selected:\n        tool_defs, tool_executors = load_tools(selected)\n        state['tools'] = tool_defs\n        tool_func_names = [t['function']['name'] for t in tool_defs]\n        _template_str = state.get('instruction_template_str', '') if state.get('mode') == 'instruct' else state.get('chat_template_str', '')\n        _tool_parsers, _, _ = detect_tool_call_format(_template_str)\n    else:\n        tool_func_names = None\n\n    visible_prefix = []  # Accumulated tool call summaries + results\n    last_save_time = time.monotonic()\n    save_interval = 8\n    _tool_turn = 0\n    while True:\n        history = state['history']\n\n        # Turn 0: use original flags; turns 2+: regenerate into the same entry.\n        # _tool_turn tells chatbot_wrapper to skip version creation/sync so\n        # that intermediate tool-loop regenerations don't pollute swipe history.\n        if _tool_turn > 0:\n            state['_tool_turn'] = True\n            state['_skip_output_extensions'] = True\n\n        regen = regenerate if _tool_turn == 0 else True\n        cont = _continue if _tool_turn == 0 else False\n        cur_text = text if _tool_turn == 0 else ''\n\n        for i, history in enumerate(generate_chat_reply(cur_text, state, regen, cont, loading_message=True, for_ui=True)):\n            # Prepend accumulated tool output to visible reply for display.\n            # Save and restore the original to prevent the markers from leaking\n            # back into chatbot_wrapper's shared output object, which would cause\n            # duplication on the next yield.\n            _original_visible = history['visible'][-1][1] if visible_prefix else None\n            if visible_prefix:\n                history['visible'][-1][1] = '\\n\\n'.join(visible_prefix + [_original_visible])\n\n            yield chat_html_wrapper(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'], last_message_only=(i > 0)), history\n\n            if visible_prefix:\n                history['visible'][-1][1] = _original_visible\n\n            if i == 0:\n                # Save old tool_sequence into version 0 (created by chatbot_wrapper\n                # on the first yield).  Only needed on the first regeneration when\n                # versions didn't previously exist.\n                if _old_tool_sequence is not None and _tool_turn == 0:\n                    _ri = len(history['internal']) - 1\n                    _versions = history.get('metadata', {}).get(f'assistant_{_ri}', {}).get('versions', [])\n                    if _versions and 'tool_sequence' not in _versions[0]:\n                        _versions[0]['tool_sequence'] = _old_tool_sequence\n                    _old_tool_sequence = None\n\n                time.sleep(0.125)\n\n            current_time = time.monotonic()\n            if i == 0 or (current_time - last_save_time) >= save_interval:\n                save_history(history, state['unique_id'], state['character_menu'], state['mode'])\n                last_save_time = current_time\n\n            # Early stop on tool call detection\n            if tool_func_names and parse_tool_call(history['internal'][-1][1], tool_func_names, parsers=_tool_parsers):\n                break\n\n        # Save the model's visible output before re-applying visible_prefix,\n        # so we can extract thinking content from just this turn's output.\n        _model_visible = history['visible'][-1][1]\n\n        # Recover visible_prefix from existing visible text (e.g. on Continue\n        # after a previous session had tool calls). Extract all <tool_call>\n        # blocks and any text between them (thinking blocks, intermediate text).\n        if tool_func_names and not visible_prefix and _model_visible:\n            tc_matches = list(re.finditer(r'<tool_call>.*?</tool_call>', _model_visible, re.DOTALL))\n            if tc_matches:\n                prefix_end = tc_matches[-1].end()\n                prefix = _model_visible[:prefix_end].strip()\n                if prefix:\n                    visible_prefix = [prefix]\n                _model_visible = _model_visible[prefix_end:].strip()\n\n        # Re-apply visible prefix to the final state after streaming completes.\n        # This is safe because we're no longer sharing the object with chatbot_wrapper.\n        if visible_prefix:\n            history['visible'][-1][1] = '\\n\\n'.join(visible_prefix + [_model_visible])\n\n        if tool_func_names:\n            save_history(history, state['unique_id'], state['character_menu'], state['mode'])\n\n        # Check for tool calls\n        if not tool_func_names or shared.stop_everything:\n            break\n\n        answer = history['internal'][-1][1]\n        parsed_calls, content_prefix = parse_tool_call(answer, tool_func_names, return_prefix=True, parsers=_tool_parsers) if answer else (None, '')\n\n        if not parsed_calls:\n            break  # No tool calls — done\n\n        # --- Process tool calls ---\n        row_idx = len(history['internal']) - 1\n        meta = history.get('metadata', {})\n        seq = meta.setdefault(f'assistant_{row_idx}', {}).setdefault('tool_sequence', [])\n\n        def _render():\n            return chat_html_wrapper(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])\n\n        # Serialize tool calls and build display headers in one pass\n        serialized = []\n        tc_headers = []\n        for tc in parsed_calls:\n            tc['id'] = get_tool_call_id()\n            fn_name = tc['function']['name']\n            fn_args = tc['function'].get('arguments', {})\n\n            serialized.append({\n                'id': tc['id'],\n                'type': 'function',\n                'function': {\n                    'name': fn_name,\n                    'arguments': json.dumps(fn_args) if isinstance(fn_args, dict) else fn_args\n                }\n            })\n\n            if isinstance(fn_args, dict) and fn_args:\n                args_summary = ', '.join(f'{k}={json.dumps(v, ensure_ascii=False)}' for k, v in fn_args.items())\n            elif isinstance(fn_args, dict):\n                args_summary = ''\n            else:\n                args_summary = str(fn_args)\n\n            tc_headers.append(f'{fn_name}({args_summary})')\n\n        seq_entry = {'tool_calls': serialized}\n        if content_prefix.strip():\n            # Strip GPT-OSS channel tokens so they don't get double-wrapped\n            # by the template (which adds its own channel markup).\n            clean = content_prefix.strip()\n            if '<|channel|>' in clean and '<|message|>' in clean:\n                inner = clean.split('<|message|>', 1)[1]\n                if '<|end|>' in inner:\n                    inner = inner.split('<|end|>', 1)[0]\n                clean = inner.strip()\n            if clean:\n                seq_entry['content'] = clean\n        seq.append(seq_entry)\n\n        # Clear internal (raw tool markup)\n        history['internal'][-1][1] = ''\n\n        # Preserve thinking block and intermediate text from this turn.\n        # content_prefix is the raw text before tool call syntax (returned\n        # by parse_tool_call); HTML-escape it and extract thinking to get\n        # the content the user should see.\n        content_text = html.escape(content_prefix)\n        thinking_content, intermediate = extract_thinking_block(content_text)\n        if thinking_content:\n            visible_prefix.append(f'&lt;think&gt;\\n{thinking_content}\\n&lt;/think&gt;')\n        if intermediate and intermediate.strip():\n            visible_prefix.append(intermediate.strip())\n\n        # Show placeholder accordions with \"...\" before execution starts\n        # (tool calls may be slow, e.g. web search).\n        pending_placeholders = [f'<tool_call>{h}\\n...\\n</tool_call>' for h in tc_headers]\n        history['visible'][-1][1] = '\\n\\n'.join(visible_prefix + pending_placeholders)\n        yield _render(), history\n\n        # Execute tools, store results, and replace placeholders with real results\n        for i, tc in enumerate(parsed_calls):\n            # Check for stop request before each tool execution\n            if shared.stop_everything:\n                for j in range(i, len(parsed_calls)):\n                    seq.append({'role': 'tool', 'content': 'Tool execution was cancelled by the user.', 'tool_call_id': parsed_calls[j]['id']})\n                    pending_placeholders[j] = f'<tool_call>{tc_headers[j]}\\nCancelled\\n</tool_call>'\n\n                history['visible'][-1][1] = '\\n\\n'.join(visible_prefix + pending_placeholders)\n                yield _render(), history\n                break\n\n            fn_name = tc['function']['name']\n            fn_args = tc['function'].get('arguments', {})\n            result = execute_tool(fn_name, fn_args, tool_executors)\n\n            seq.append({'role': 'tool', 'content': result, 'tool_call_id': tc['id']})\n            try:\n                pretty_result = json.dumps(json.loads(result), indent=2, ensure_ascii=False)\n            except (json.JSONDecodeError, TypeError):\n                pretty_result = result\n\n            # Replace the placeholder with the real result\n            pending_placeholders[i] = f'<tool_call>{tc_headers[i]}\\n{pretty_result}\\n</tool_call>'\n            history['visible'][-1][1] = '\\n\\n'.join(visible_prefix + pending_placeholders)\n            yield _render(), history\n\n        # Move completed tool calls into visible_prefix for next turns\n        visible_prefix.extend(pending_placeholders)\n        history['visible'][-1][1] = '\\n\\n'.join(visible_prefix)\n        save_history(history, state['unique_id'], state['character_menu'], state['mode'])\n\n        state['history'] = history\n        _tool_turn += 1\n\n    state.pop('_tool_turn', None)\n\n    # If output extensions were deferred during tool turns, apply them now\n    # to the final model response only (not to tool call markers).\n    if state.pop('_skip_output_extensions', None):\n        _model_visible = apply_extensions('output', _model_visible, state, is_chat=True)\n        if visible_prefix:\n            history['visible'][-1][1] = '\\n\\n'.join(visible_prefix + [_model_visible])\n        else:\n            history['visible'][-1][1] = _model_visible\n\n        yield chat_html_wrapper(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']), history\n\n    state['history'] = history\n\n    # Sync version metadata so swipes show the full visible (with tool prefix)\n    if visible_prefix and history.get('metadata'):\n        row_idx = len(history['internal']) - 1\n        key = f\"assistant_{row_idx}\"\n        meta_entry = history['metadata'].get(key, {})\n        if 'versions' in meta_entry and 'current_version_index' in meta_entry:\n            current_idx = meta_entry['current_version_index']\n            if current_idx < len(meta_entry['versions']):\n                version_update = {\n                    'content': history['internal'][row_idx][1],\n                    'visible_content': history['visible'][row_idx][1]\n                }\n                ts = meta_entry.get('tool_sequence')\n                if ts is not None:\n                    version_update['tool_sequence'] = ts\n                meta_entry['versions'][current_idx].update(version_update)\n\n    save_history(history, state['unique_id'], state['character_menu'], state['mode'])\n\n\ndef remove_last_message(history):\n    if 'metadata' not in history:\n        history['metadata'] = {}\n\n    if len(history['visible']) > 0 and history['internal'][-1][0] != '<|BEGIN-VISIBLE-CHAT|>':\n        row_idx = len(history['internal']) - 1\n        last = history['visible'].pop()\n        history['internal'].pop()\n\n        # Remove metadata directly by known keys\n        if f\"user_{row_idx}\" in history['metadata']:\n            del history['metadata'][f\"user_{row_idx}\"]\n        if f\"assistant_{row_idx}\" in history['metadata']:\n            del history['metadata'][f\"assistant_{row_idx}\"]\n    else:\n        last = ['', '']\n\n    return html.unescape(last[0]), history\n\n\ndef send_dummy_message(text, state):\n    history = state['history']\n\n    # Handle both dict and string inputs\n    if isinstance(text, dict):\n        text = text['text']\n\n    # Initialize metadata if not present\n    if 'metadata' not in history:\n        history['metadata'] = {}\n\n    row_idx = len(history['internal'])\n    history['visible'].append([html.escape(text), ''])\n    history['internal'].append([apply_extensions('input', text, state, is_chat=True), ''])\n    update_message_metadata(history['metadata'], \"user\", row_idx, timestamp=get_current_timestamp())\n\n    return history\n\n\ndef send_dummy_reply(text, state):\n    history = state['history']\n\n    # Handle both dict and string inputs\n    if isinstance(text, dict):\n        text = text['text']\n\n    # Initialize metadata if not present\n    if 'metadata' not in history:\n        history['metadata'] = {}\n\n    if len(history['visible']) > 0 and not history['visible'][-1][1] == '':\n        row_idx = len(history['internal'])\n        history['visible'].append(['', ''])\n        history['internal'].append(['', ''])\n        # We don't need to add system metadata\n\n    row_idx = len(history['internal']) - 1\n    history['visible'][-1][1] = html.escape(text)\n    history['internal'][-1][1] = apply_extensions('input', text, state, is_chat=True)\n    update_message_metadata(history['metadata'], \"assistant\", row_idx, timestamp=get_current_timestamp())\n\n    return history\n\n\ndef redraw_html(history, name1, name2, mode, style, character, reset_cache=False):\n    return chat_html_wrapper(history, name1, name2, mode, style, character, reset_cache=reset_cache)\n\n\ndef start_new_chat(state, unique_id=None):\n    mode = state['mode']\n    # Initialize with empty metadata dictionary\n    history = {'internal': [], 'visible': [], 'metadata': {}}\n\n    if mode != 'instruct':\n        greeting = replace_character_names(state['greeting'], state['name1'], state['name2'])\n        if greeting != '':\n            history['internal'] += [['<|BEGIN-VISIBLE-CHAT|>', greeting]]\n            history['visible'] += [['', apply_extensions('output', html.escape(greeting), state, is_chat=True)]]\n\n            # Add timestamp for assistant's greeting\n            update_message_metadata(history['metadata'], \"assistant\", 0, timestamp=get_current_timestamp())\n\n    if unique_id is None:\n        unique_id = datetime.now().strftime('%Y%m%d-%H-%M-%S')\n\n    save_history(history, unique_id, state['character_menu'], state['mode'])\n\n    return history\n\n\ndef get_history_file_path(unique_id, character, mode):\n    if mode == 'instruct':\n        p = shared.user_data_dir / 'logs' / 'instruct' / f'{unique_id}.json'\n    else:\n        p = shared.user_data_dir / 'logs' / 'chat' / character / f'{unique_id}.json'\n\n    return p\n\n\ndef save_history(history, unique_id, character, mode):\n    if shared.args.multi_user:\n        return\n\n    if unique_id and unique_id.startswith('incognito-'):\n        return\n\n    p = get_history_file_path(unique_id, character, mode)\n    if not p.parent.is_dir():\n        p.parent.mkdir(parents=True)\n\n    with _history_file_lock:\n        with open(p, 'w', encoding='utf-8') as f:\n            f.write(json.dumps(history, indent=4, ensure_ascii=False))\n\n\ndef rename_history(old_id, new_id, character, mode):\n    if shared.args.multi_user:\n        return\n\n    old_p = get_history_file_path(old_id, character, mode)\n    new_p = get_history_file_path(new_id, character, mode)\n    if new_p.parent != old_p.parent:\n        logger.error(f\"The following path is not allowed: \\\"{new_p}\\\".\")\n    elif new_p == old_p:\n        logger.info(\"The provided path is identical to the old one.\")\n    elif new_p.exists():\n        logger.error(f\"The new path already exists and will not be overwritten: \\\"{new_p}\\\".\")\n    else:\n        logger.info(f\"Renaming \\\"{old_p}\\\" to \\\"{new_p}\\\"\")\n        old_p.rename(new_p)\n\n\ndef get_paths(state):\n    if state['mode'] == 'instruct':\n        return (shared.user_data_dir / 'logs' / 'instruct').glob('*.json')\n    else:\n        character = state['character_menu']\n\n        # Handle obsolete filenames and paths\n        old_p = shared.user_data_dir / 'logs' / f'{character}_persistent.json'\n        new_p = shared.user_data_dir / 'logs' / f'persistent_{character}.json'\n        if old_p.exists():\n            logger.warning(f\"Renaming \\\"{old_p}\\\" to \\\"{new_p}\\\"\")\n            old_p.rename(new_p)\n\n        if new_p.exists():\n            unique_id = datetime.now().strftime('%Y%m%d-%H-%M-%S')\n            p = get_history_file_path(unique_id, character, state['mode'])\n            logger.warning(f\"Moving \\\"{new_p}\\\" to \\\"{p}\\\"\")\n            p.parent.mkdir(exist_ok=True)\n            new_p.rename(p)\n\n        return (shared.user_data_dir / 'logs' / 'chat' / character).glob('*.json')\n\n\ndef find_all_histories(state):\n    if shared.args.multi_user:\n        return ['']\n\n    paths = get_paths(state)\n    histories = sorted(paths, key=lambda x: x.stat().st_mtime, reverse=True)\n    return [path.stem for path in histories]\n\n\ndef find_all_histories_with_first_prompts(state):\n    if shared.args.multi_user:\n        return []\n\n    paths = get_paths(state)\n    histories = sorted(paths, key=lambda x: x.stat().st_mtime, reverse=True)\n\n    result = []\n    for i, path in enumerate(histories):\n        filename = path.stem\n        file_content = \"\"\n        with open(path, 'r', encoding='utf-8') as f:\n            file_content = f.read()\n\n        if state['search_chat'] and state['search_chat'] not in file_content:\n            continue\n\n        data = json.loads(file_content)\n        if re.match(r'^[0-9]{8}-[0-9]{2}-[0-9]{2}-[0-9]{2}$', filename):\n            first_prompt = \"\"\n            if data and 'visible' in data and len(data['visible']) > 0:\n                if len(data['internal']) > 0 and data['internal'][0][0] == '<|BEGIN-VISIBLE-CHAT|>':\n                    if len(data['visible']) > 1:\n                        first_prompt = html.unescape(data['visible'][1][0])\n                    elif i == 0:\n                        first_prompt = \"New chat\"\n                else:\n                    first_prompt = html.unescape(data['visible'][0][0])\n            elif i == 0:\n                first_prompt = \"New chat\"\n        else:\n            first_prompt = filename\n\n        first_prompt = first_prompt.strip()\n\n        # Truncate the first prompt if it's longer than 30 characters\n        if len(first_prompt) > 30:\n            first_prompt = first_prompt[:30 - 3] + '...'\n\n        result.append((first_prompt, filename))\n\n    return result\n\n\ndef load_latest_history(state):\n    '''\n    Loads the latest history for the given character in chat or chat-instruct\n    mode, or the latest instruct history for instruct mode.\n    '''\n\n    if shared.args.multi_user:\n        return start_new_chat(state), None\n\n    histories = find_all_histories(state)\n\n    if len(histories) > 0:\n        # Try to load the last visited chat for this character/mode\n        chat_state = load_last_chat_state()\n        key = get_chat_state_key(state['character_menu'], state['mode'])\n        last_chat_id = chat_state.get(\"last_chats\", {}).get(key)\n\n        # If we have a stored last chat and it still exists, use it\n        if last_chat_id and last_chat_id in histories:\n            unique_id = last_chat_id\n        else:\n            # Fall back to most recent (current behavior)\n            unique_id = histories[0]\n\n        history = load_history(unique_id, state['character_menu'], state['mode'])\n        return history, unique_id\n    else:\n        return start_new_chat(state), None\n\n\ndef load_history_after_deletion(state, idx):\n    '''\n    Loads the latest history for the given character in chat or chat-instruct\n    mode, or the latest instruct history for instruct mode.\n    '''\n    import gradio as gr\n\n    if shared.args.multi_user:\n        return start_new_chat(state)\n\n    histories = find_all_histories_with_first_prompts(state)\n    idx = min(int(idx), len(histories) - 1)\n    idx = max(0, idx)\n\n    if len(histories) > 0:\n        history = load_history(histories[idx][1], state['character_menu'], state['mode'])\n    else:\n        history = start_new_chat(state)\n        histories = find_all_histories_with_first_prompts(state)\n\n    return history, gr.update(choices=histories, value=histories[idx][1])\n\n\ndef update_character_menu_after_deletion(idx):\n    import gradio as gr\n    characters = utils.get_available_characters()\n    idx = min(int(idx), len(characters) - 1)\n    idx = max(0, idx)\n    return gr.update(choices=characters, value=characters[idx])\n\n\ndef get_chat_state_key(character, mode):\n    \"\"\"Generate a key for storing last chat state\"\"\"\n    if mode == 'instruct':\n        return 'instruct'\n    else:\n        return f\"chat_{character}\"\n\n\ndef load_last_chat_state():\n    \"\"\"Load the last chat state from file\"\"\"\n    state_file = shared.user_data_dir / 'logs' / 'chat_state.json'\n    if state_file.exists():\n        try:\n            with open(state_file, 'r', encoding='utf-8') as f:\n                return json.loads(f.read())\n        except Exception:\n            pass\n\n    return {\"last_chats\": {}}\n\n\ndef save_last_chat_state(character, mode, unique_id):\n    \"\"\"Save the last visited chat for a character/mode\"\"\"\n    if shared.args.multi_user:\n        return\n\n    if unique_id and unique_id.startswith('incognito-'):\n        return\n\n    state = load_last_chat_state()\n    key = get_chat_state_key(character, mode)\n    state[\"last_chats\"][key] = unique_id\n\n    state_file = shared.user_data_dir / 'logs' / 'chat_state.json'\n    state_file.parent.mkdir(exist_ok=True)\n    with open(state_file, 'w', encoding='utf-8') as f:\n        f.write(json.dumps(state, indent=2))\n\n\ndef load_history(unique_id, character, mode):\n    p = get_history_file_path(unique_id, character, mode)\n\n    if not p.exists():\n        return {'internal': [], 'visible': [], 'metadata': {}}\n\n    f = json.loads(open(p, 'rb').read())\n    if 'internal' in f and 'visible' in f:\n        history = f\n    else:\n        history = {\n            'internal': f['data'],\n            'visible': f['data_visible']\n        }\n\n    # Add metadata if it doesn't exist\n    if 'metadata' not in history:\n        history['metadata'] = {}\n        # Add placeholder timestamps for existing messages\n        for i, (user_msg, asst_msg) in enumerate(history['internal']):\n            if user_msg and user_msg != '<|BEGIN-VISIBLE-CHAT|>':\n                update_message_metadata(history['metadata'], \"user\", i, timestamp=\"\")\n            if asst_msg:\n                update_message_metadata(history['metadata'], \"assistant\", i, timestamp=\"\")\n\n    return history\n\n\ndef load_history_json(file, history):\n    try:\n        file = file.decode('utf-8')\n        f = json.loads(file)\n        if 'internal' in f and 'visible' in f:\n            history = f\n        else:\n            history = {\n                'internal': f['data'],\n                'visible': f['data_visible']\n            }\n\n        # Add metadata if it doesn't exist\n        if 'metadata' not in history:\n            history['metadata'] = {}\n            # Add placeholder timestamps\n            for i, (user_msg, asst_msg) in enumerate(history['internal']):\n                if user_msg and user_msg != '<|BEGIN-VISIBLE-CHAT|>':\n                    update_message_metadata(history['metadata'], \"user\", i, timestamp=\"\")\n                if asst_msg:\n                    update_message_metadata(history['metadata'], \"assistant\", i, timestamp=\"\")\n\n        return history\n    except Exception:\n        return history\n\n\ndef delete_history(unique_id, character, mode):\n    p = get_history_file_path(unique_id, character, mode)\n    delete_file(p)\n\n\ndef replace_character_names(text, name1, name2):\n    text = text.replace('{{user}}', name1).replace('{{char}}', name2)\n    return text.replace('<USER>', name1).replace('<BOT>', name2)\n\n\ndef generate_pfp_cache(character):\n    cache_folder = Path(shared.args.disk_cache_dir)\n    if not cache_folder.exists():\n        cache_folder.mkdir()\n\n    for path in [shared.user_data_dir / 'characters' / f\"{character}.{extension}\" for extension in ['png', 'jpg', 'jpeg']]:\n        if path.exists():\n            original_img = Image.open(path)\n            # Define file paths\n            pfp_path = Path(f'{cache_folder}/pfp_character.png')\n            thumb_path = Path(f'{cache_folder}/pfp_character_thumb.png')\n\n            # Save main picture and thumbnail\n            original_img.save(pfp_path, format='PNG')\n            thumb = make_thumbnail(original_img)\n            thumb.save(thumb_path, format='PNG')\n\n            # Return the path to the thumbnail, not the in-memory PIL Image object.\n            return str(thumb_path)\n\n    return None\n\n\ndef load_character(character, name1, name2):\n    context = greeting = \"\"\n    greeting_field = 'greeting'\n    picture = None\n\n    filepath = None\n    for extension in [\"yml\", \"yaml\", \"json\"]:\n        filepath = shared.user_data_dir / 'characters' / f'{character}.{extension}'\n        if filepath.exists():\n            break\n\n    if filepath is None or not filepath.exists():\n        logger.error(f\"Could not find the character \\\"{character}\\\" inside {shared.user_data_dir}/characters. No character has been loaded.\")\n        raise ValueError\n\n    file_contents = open(filepath, 'r', encoding='utf-8').read()\n    data = json.loads(file_contents) if extension == \"json\" else yaml.safe_load(file_contents)\n    cache_folder = Path(shared.args.disk_cache_dir)\n\n    for path in [Path(f\"{cache_folder}/pfp_character.png\"), Path(f\"{cache_folder}/pfp_character_thumb.png\")]:\n        if path.exists():\n            path.unlink()\n\n    picture = generate_pfp_cache(character)\n\n    # Finding the bot's name\n    for k in ['name', 'bot', '<|bot|>', 'char_name']:\n        if k in data and data[k] != '':\n            name2 = data[k]\n            break\n\n    # Find the user name (if any)\n    for k in ['your_name', 'user', '<|user|>']:\n        if k in data and data[k] != '':\n            name1 = data[k]\n            break\n\n    if 'context' in data:\n        context = data['context'].strip()\n    elif \"char_persona\" in data:\n        context = build_pygmalion_style_context(data)\n        greeting_field = 'char_greeting'\n\n    greeting = data.get(greeting_field, greeting)\n    return name1, name2, picture, greeting, context\n\n\ndef restore_character_for_ui(state):\n    \"\"\"Reset character fields to the currently loaded character's saved values\"\"\"\n    if state['character_menu'] and state['character_menu'] != 'None':\n        try:\n            name1, name2, picture, greeting, context = load_character(state['character_menu'], state['name1'], state['name2'])\n\n            state['name2'] = name2\n            state['greeting'] = greeting\n            state['context'] = context\n            state['character_picture'] = picture  # This triggers cache update via generate_pfp_cache\n\n            return state, name2, context, greeting, picture\n\n        except Exception as e:\n            logger.error(f\"Failed to reset character '{state['character_menu']}': {e}\")\n            return clear_character_for_ui(state)\n    else:\n        return clear_character_for_ui(state)\n\n\ndef clear_character_for_ui(state):\n    \"\"\"Clear all character fields and picture cache\"\"\"\n    state['name2'] = shared.settings['name2']\n    state['context'] = shared.settings['context']\n    state['greeting'] = shared.settings['greeting']\n    state['character_picture'] = None\n\n    # Clear the cache files\n    cache_folder = Path(shared.args.disk_cache_dir)\n    for cache_file in ['pfp_character.png', 'pfp_character_thumb.png']:\n        cache_path = Path(f'{cache_folder}/{cache_file}')\n        if cache_path.exists():\n            cache_path.unlink()\n\n    return state, state['name2'], state['context'], state['greeting'], None\n\n\n@functools.cache\ndef load_character_memoized(character, name1, name2):\n    return load_character(character, name1, name2)\n\n\n@functools.cache\ndef load_instruction_template_memoized(template):\n    from modules.models_settings import load_instruction_template\n    return load_instruction_template(template)\n\n\ndef upload_character(file, img_path, tavern=False):\n    import gradio as gr\n    img = open_image_safely(img_path)\n    decoded_file = file if isinstance(file, str) else file.decode('utf-8')\n    try:\n        data = json.loads(decoded_file)\n    except Exception:\n        data = yaml.safe_load(decoded_file)\n\n    if 'char_name' in data:\n        name = sanitize_filename(data['char_name'])\n        greeting = data['char_greeting']\n        context = build_pygmalion_style_context(data)\n        yaml_data = generate_character_yaml(name, greeting, context)\n    else:\n        name = sanitize_filename(data['name'])\n        yaml_data = generate_character_yaml(data['name'], data['greeting'], data['context'])\n\n    outfile_name = name\n    i = 1\n    while (shared.user_data_dir / 'characters' / f'{outfile_name}.yaml').exists():\n        outfile_name = f'{name}_{i:03d}'\n        i += 1\n\n    with open(shared.user_data_dir / 'characters' / f'{outfile_name}.yaml', 'w', encoding='utf-8') as f:\n        f.write(yaml_data)\n\n    if img is not None:\n        img.save(shared.user_data_dir / 'characters' / f'{outfile_name}.png')\n\n    logger.info(f'New character saved to \"{shared.user_data_dir}/characters/{outfile_name}.yaml\".')\n    return gr.update(value=outfile_name, choices=get_available_characters())\n\n\ndef build_pygmalion_style_context(data):\n    context = \"\"\n    if 'char_persona' in data and data['char_persona'] != '':\n        context += f\"{data['char_name']}'s Persona: {data['char_persona']}\\n\"\n\n    if 'world_scenario' in data and data['world_scenario'] != '':\n        context += f\"Scenario: {data['world_scenario']}\\n\"\n\n    if 'example_dialogue' in data and data['example_dialogue'] != '':\n        context += f\"{data['example_dialogue'].strip()}\\n\"\n\n    context = f\"{context.strip()}\\n\"\n    return context\n\n\ndef upload_tavern_character(img_path, _json):\n    _json = {'char_name': _json['name'], 'char_persona': _json['description'], 'char_greeting': _json['first_mes'], 'example_dialogue': _json['mes_example'], 'world_scenario': _json['scenario']}\n    return upload_character(json.dumps(_json), img_path, tavern=True)\n\n\ndef check_tavern_character(img_path):\n    import gradio as gr\n    img = open_image_safely(img_path)\n\n    if img is None:\n        return \"Invalid or disallowed image file.\", None, None, gr.update(interactive=False)\n\n    if \"chara\" not in img.info:\n        return \"Not a TavernAI card\", None, None, gr.update(interactive=False)\n\n    decoded_string = base64.b64decode(img.info['chara']).replace(b'\\\\r\\\\n', b'\\\\n')\n    _json = json.loads(decoded_string)\n    if \"data\" in _json:\n        _json = _json[\"data\"]\n\n    return _json['name'], _json['description'], _json, gr.update(interactive=True)\n\n\ndef upload_your_profile_picture(img_path):\n    img = open_image_safely(img_path)\n    cache_folder = Path(shared.args.disk_cache_dir)\n    if not cache_folder.exists():\n        cache_folder.mkdir()\n\n    if img is None:\n        if Path(f\"{cache_folder}/pfp_me.png\").exists():\n            Path(f\"{cache_folder}/pfp_me.png\").unlink()\n    else:\n        img = make_thumbnail(img)\n        img.save(Path(f'{cache_folder}/pfp_me.png'))\n        logger.info(f'Profile picture saved to \"{cache_folder}/pfp_me.png\"')\n\n\ndef generate_character_yaml(name, greeting, context):\n    data = {\n        'name': name,\n        'greeting': greeting,\n        'context': context,\n    }\n\n    data = {k: v for k, v in data.items() if v}  # Strip falsy\n    return yaml.dump(data, sort_keys=False, width=float(\"inf\"))\n\n\ndef generate_instruction_template_yaml(instruction_template):\n    data = {\n        'instruction_template': instruction_template\n    }\n\n    return my_yaml_output(data)\n\n\ndef save_character(name, greeting, context, picture, filename):\n    filename = sanitize_filename(filename)\n    if filename == \"\":\n        logger.error(\"The filename is empty, so the character will not be saved.\")\n        return\n\n    data = generate_character_yaml(name, greeting, context)\n    filepath = shared.user_data_dir / 'characters' / f'{filename}.yaml'\n    save_file(filepath, data)\n    path_to_img = shared.user_data_dir / 'characters' / f'{filename}.png'\n    if picture is not None:\n        # Copy the image file from its source path to the character folder\n        shutil.copy(picture, path_to_img)\n        logger.info(f'Saved {path_to_img}.')\n\n\ndef delete_character(name, instruct=False):\n    name = sanitize_filename(name)\n    # Check for character data files\n    for extension in [\"yml\", \"yaml\", \"json\"]:\n        delete_file(shared.user_data_dir / 'characters' / f'{name}.{extension}')\n\n    # Check for character image files\n    for extension in [\"png\", \"jpg\", \"jpeg\"]:\n        delete_file(shared.user_data_dir / 'characters' / f'{name}.{extension}')\n\n\ndef generate_user_pfp_cache(user):\n    \"\"\"Generate cached profile picture for user\"\"\"\n    cache_folder = Path(shared.args.disk_cache_dir)\n    if not cache_folder.exists():\n        cache_folder.mkdir()\n\n    for path in [shared.user_data_dir / 'users' / f\"{user}.{extension}\" for extension in ['png', 'jpg', 'jpeg']]:\n        if path.exists():\n            original_img = Image.open(path)\n            # Define file paths\n            pfp_path = Path(f'{cache_folder}/pfp_me.png')\n\n            # Save thumbnail\n            thumb = make_thumbnail(original_img)\n            thumb.save(pfp_path, format='PNG')\n            logger.info(f'User profile picture cached to \"{pfp_path}\"')\n\n            return str(pfp_path)\n\n    return None\n\n\ndef load_user(user_name, name1, user_bio):\n    \"\"\"Load user profile from YAML file\"\"\"\n    picture = None\n\n    filepath = None\n    for extension in [\"yml\", \"yaml\", \"json\"]:\n        filepath = shared.user_data_dir / 'users' / f'{user_name}.{extension}'\n        if filepath.exists():\n            break\n\n    if filepath is None or not filepath.exists():\n        logger.error(f\"Could not find the user \\\"{user_name}\\\" inside {shared.user_data_dir}/users. No user has been loaded.\")\n        raise ValueError\n\n    with open(filepath, 'r', encoding='utf-8') as f:\n        file_contents = f.read()\n\n    extension = filepath.suffix[1:]  # Remove the leading dot\n    data = json.loads(file_contents) if extension == \"json\" else yaml.safe_load(file_contents)\n\n    # Clear existing user picture cache\n    cache_folder = Path(shared.args.disk_cache_dir)\n    pfp_path = Path(f\"{cache_folder}/pfp_me.png\")\n    if pfp_path.exists():\n        pfp_path.unlink()\n\n    # Generate new picture cache\n    picture = generate_user_pfp_cache(user_name)\n\n    # Get user name\n    if 'name' in data and data['name'] != '':\n        name1 = data['name']\n\n    # Get user bio\n    if 'user_bio' in data:\n        user_bio = data['user_bio']\n\n    return name1, user_bio, picture\n\n\ndef generate_user_yaml(name, user_bio):\n    \"\"\"Generate YAML content for user profile\"\"\"\n    data = {\n        'name': name,\n        'user_bio': user_bio,\n    }\n\n    return yaml.dump(data, sort_keys=False, width=float(\"inf\"))\n\n\ndef save_user(name, user_bio, picture, filename):\n    \"\"\"Save user profile to YAML file\"\"\"\n    filename = sanitize_filename(filename)\n    if filename == \"\":\n        logger.error(\"The filename is empty, so the user will not be saved.\")\n        return\n\n    # Ensure the users directory exists\n    users_dir = shared.user_data_dir / 'users'\n    users_dir.mkdir(parents=True, exist_ok=True)\n\n    data = generate_user_yaml(name, user_bio)\n    filepath = shared.user_data_dir / 'users' / f'{filename}.yaml'\n    save_file(filepath, data)\n\n    path_to_img = shared.user_data_dir / 'users' / f'{filename}.png'\n    if picture is not None:\n        # Copy the image file from its source path to the users folder\n        shutil.copy(picture, path_to_img)\n        logger.info(f'Saved user profile picture to {path_to_img}.')\n\n\ndef delete_user(name):\n    \"\"\"Delete user profile files\"\"\"\n    name = sanitize_filename(name)\n    # Check for user data files\n    for extension in [\"yml\", \"yaml\", \"json\"]:\n        delete_file(shared.user_data_dir / 'users' / f'{name}.{extension}')\n\n    # Check for user image files\n    for extension in [\"png\", \"jpg\", \"jpeg\"]:\n        delete_file(shared.user_data_dir / 'users' / f'{name}.{extension}')\n\n\ndef update_user_menu_after_deletion(idx):\n    \"\"\"Update user menu after a user is deleted\"\"\"\n    import gradio as gr\n    users = get_available_users()\n    if len(users) == 0:\n        # Create a default user if none exist\n        save_user('You', '', None, 'Default')\n        users = get_available_users()\n\n    idx = min(int(idx), len(users) - 1)\n    idx = max(0, idx)\n    return gr.update(choices=users, value=users[idx])\n\n\ndef handle_user_menu_change(state):\n    \"\"\"Handle user menu selection change\"\"\"\n    try:\n        name1, user_bio, picture = load_user(state['user_menu'], state['name1'], state['user_bio'])\n\n        return [\n            name1,\n            user_bio,\n            picture\n        ]\n    except Exception as e:\n        logger.error(f\"Failed to load user '{state['user_menu']}': {e}\")\n        return [\n            state['name1'],\n            state['user_bio'],\n            None\n        ]\n\n\ndef handle_save_user_click(name1):\n    \"\"\"Handle save user button click\"\"\"\n    import gradio as gr\n    return [\n        name1,\n        gr.update(visible=True)\n    ]\n\n\ndef my_yaml_output(data):\n    '''\n    pyyaml is very inconsistent with multiline strings.\n    for simple instruction template outputs, this is enough.\n    '''\n    result = \"\"\n    for k in data:\n        result += k + \": |-\\n\"\n        for line in data[k].splitlines():\n            result += \"  \" + line.rstrip(' ') + \"\\n\"\n\n    return result\n\n\ndef handle_send_dummy_message_click(text, state):\n    history = send_dummy_message(text, state)\n    save_history(history, state['unique_id'], state['character_menu'], state['mode'])\n    html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])\n\n    return [history, html, {\"text\": \"\", \"files\": []}]\n\n\ndef handle_send_dummy_reply_click(text, state):\n    history = send_dummy_reply(text, state)\n    save_history(history, state['unique_id'], state['character_menu'], state['mode'])\n    html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])\n\n    return [history, html, {\"text\": \"\", \"files\": []}]\n\n\ndef handle_remove_last_click(state):\n    last_input, history = remove_last_message(state['history'])\n    save_history(history, state['unique_id'], state['character_menu'], state['mode'])\n    html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])\n\n    return [history, html, {\"text\": last_input, \"files\": []}]\n\n\ndef handle_unique_id_select(state):\n    history = load_history(state['unique_id'], state['character_menu'], state['mode'])\n    html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])\n\n    # Save this as the last visited chat\n    save_last_chat_state(state['character_menu'], state['mode'], state['unique_id'])\n\n    convert_to_markdown.cache_clear()\n\n    return [history, html]\n\n\ndef handle_start_new_chat_click(state):\n    import gradio as gr\n    history = start_new_chat(state)\n    histories = find_all_histories_with_first_prompts(state)\n    html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])\n\n    convert_to_markdown.cache_clear()\n\n    if len(histories) > 0:\n        past_chats_update = gr.update(choices=histories, value=histories[0][1])\n    else:\n        past_chats_update = gr.update(choices=histories)\n\n    return [history, html, past_chats_update]\n\n\ndef handle_start_incognito_chat_click(state):\n    import gradio as gr\n    unique_id = 'incognito-' + datetime.now().strftime('%Y%m%d-%H-%M-%S')\n    history = start_new_chat(state, unique_id=unique_id)\n    html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])\n\n    convert_to_markdown.cache_clear()\n\n    histories = find_all_histories_with_first_prompts(state)\n    past_chats_update = gr.update(choices=histories, value=unique_id)\n\n    return [history, html, past_chats_update]\n\n\ndef handle_delete_chat_confirm_click(state):\n    filtered_histories = find_all_histories_with_first_prompts(state)\n    filtered_ids = [h[1] for h in filtered_histories]\n\n    if state['unique_id'] not in filtered_ids:\n        # Incognito or unknown chat — just load the most recent saved chat\n        index = '0'\n    else:\n        index = str(filtered_ids.index(state['unique_id']))\n\n    delete_history(state['unique_id'], state['character_menu'], state['mode'])\n    history, unique_id = load_history_after_deletion(state, index)\n    html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])\n\n    convert_to_markdown.cache_clear()\n\n    return [history, html, unique_id]\n\n\ndef handle_branch_chat_click(state):\n    import gradio as gr\n    branch_from_index = state['branch_index']\n    if branch_from_index == -1:\n        history = state['history']\n    else:\n        history = state['history']\n        history['visible'] = history['visible'][:branch_from_index + 1]\n        history['internal'] = history['internal'][:branch_from_index + 1]\n        # Prune the metadata dictionary to remove entries beyond the branch point\n        if 'metadata' in history:\n            history['metadata'] = {k: v for k, v in history['metadata'].items() if int(k.split('_')[-1]) <= branch_from_index}\n\n    prefix = 'incognito-' if state['unique_id'] and state['unique_id'].startswith('incognito-') else ''\n    new_unique_id = prefix + datetime.now().strftime('%Y%m%d-%H-%M-%S')\n    save_history(history, new_unique_id, state['character_menu'], state['mode'])\n\n    histories = find_all_histories_with_first_prompts(state)\n    html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])\n\n    convert_to_markdown.cache_clear()\n\n    past_chats_update = gr.update(choices=histories, value=new_unique_id)\n\n    return [history, html, past_chats_update, -1]\n\n\ndef handle_edit_message_click(state):\n    history = state['history']\n    message_index = int(state['edit_message_index'])\n    new_text = state['edit_message_text']\n    role = state['edit_message_role']  # \"user\" or \"assistant\"\n\n    if message_index >= len(history['internal']):\n        html_output = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])\n        return [history, html_output]\n\n    role_idx = 0 if role == \"user\" else 1\n\n    if 'metadata' not in history:\n        history['metadata'] = {}\n\n    key = f\"{role}_{message_index}\"\n    if key not in history['metadata']:\n        history['metadata'][key] = {}\n\n    # If no versions exist yet for this message, store the current (pre-edit) content as the first version.\n    if \"versions\" not in history['metadata'][key] or not history['metadata'][key][\"versions\"]:\n        original_content = history['internal'][message_index][role_idx]\n        original_visible = history['visible'][message_index][role_idx]\n        original_timestamp = history['metadata'][key].get('timestamp', get_current_timestamp())\n\n        version_entry = {\n            \"content\": original_content,\n            \"visible_content\": original_visible,\n            \"timestamp\": original_timestamp\n        }\n        ts = history['metadata'][key].get('tool_sequence')\n        if ts is not None:\n            version_entry['tool_sequence'] = ts\n        history['metadata'][key][\"versions\"] = [version_entry]\n\n    history['internal'][message_index][role_idx] = apply_extensions('input', new_text, state, is_chat=True)\n    history['visible'][message_index][role_idx] = html.escape(new_text)\n    history['metadata'][key].pop('tool_sequence', None)\n\n    add_message_version(history, role, message_index, is_current=True)\n\n    save_history(history, state['unique_id'], state['character_menu'], state['mode'])\n    html_output = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])\n\n    return [history, html_output]\n\n\ndef handle_navigate_version_click(state):\n    history = state['history']\n    message_index = int(state['navigate_message_index'])\n    direction = state['navigate_direction']\n    role = state['navigate_message_role']\n\n    if not role:\n        logger.error(\"Role not provided for version navigation.\")\n        html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])\n        return [history, html]\n\n    key = f\"{role}_{message_index}\"\n    if 'metadata' not in history or key not in history['metadata'] or 'versions' not in history['metadata'][key]:\n        html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])\n        return [history, html]\n\n    metadata = history['metadata'][key]\n    versions = metadata['versions']\n    # Default to the last version if current_version_index is not set\n    current_idx = metadata.get('current_version_index', len(versions) - 1 if versions else 0)\n\n    if direction == 'left':\n        new_idx = max(0, current_idx - 1)\n    else:  # right\n        new_idx = min(len(versions) - 1, current_idx + 1)\n\n    if new_idx == current_idx:\n        html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])\n        return [history, html]\n\n    msg_content_idx = 0 if role == 'user' else 1  # 0 for user content, 1 for assistant content in the pair\n    version_to_load = versions[new_idx]\n    history['internal'][message_index][msg_content_idx] = version_to_load['content']\n    history['visible'][message_index][msg_content_idx] = version_to_load['visible_content']\n    metadata['current_version_index'] = new_idx\n\n    # Restore per-version tool_sequence so follow-up prompts see consistent context\n    version_ts = version_to_load.get('tool_sequence')\n    if version_ts is not None:\n        metadata['tool_sequence'] = version_ts\n    else:\n        metadata.pop('tool_sequence', None)\n\n    update_message_metadata(history['metadata'], role, message_index, timestamp=version_to_load['timestamp'])\n\n    # Redraw and save\n    html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])\n    save_history(history, state['unique_id'], state['character_menu'], state['mode'])\n\n    return [history, html]\n\n\ndef handle_rename_chat_click():\n    import gradio as gr\n    return [\n        gr.update(value=\"My New Chat\"),\n        gr.update(visible=True),\n    ]\n\n\ndef handle_rename_chat_confirm(rename_to, state):\n    import gradio as gr\n\n    if state['unique_id'] and state['unique_id'].startswith('incognito-'):\n        return [\n            gr.update(),\n            gr.update(visible=False),\n        ]\n\n    rename_history(state['unique_id'], rename_to, state['character_menu'], state['mode'])\n    histories = find_all_histories_with_first_prompts(state)\n\n    return [\n        gr.update(choices=histories, value=rename_to),\n        gr.update(visible=False),\n    ]\n\n\ndef handle_search_chat_change(state):\n    import gradio as gr\n    histories = find_all_histories_with_first_prompts(state)\n    return gr.update(choices=histories)\n\n\ndef handle_upload_chat_history(load_chat_history, state):\n    import gradio as gr\n    history = start_new_chat(state)\n    history = load_history_json(load_chat_history, history)\n    save_history(history, state['unique_id'], state['character_menu'], state['mode'])\n    histories = find_all_histories_with_first_prompts(state)\n\n    html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])\n\n    convert_to_markdown.cache_clear()\n\n    if len(histories) > 0:\n        past_chats_update = gr.update(choices=histories, value=histories[0][1])\n    else:\n        past_chats_update = gr.update(choices=histories)\n\n    return [\n        history,\n        html,\n        past_chats_update\n    ]\n\n\ndef handle_character_menu_change(state):\n    import gradio as gr\n    name1, name2, picture, greeting, context = load_character(state['character_menu'], state['name1'], state['name2'])\n\n    state['name1'] = name1\n    state['name2'] = name2\n    state['character_picture'] = picture\n    state['greeting'] = greeting\n    state['context'] = context\n\n    history, loaded_unique_id = load_latest_history(state)\n    histories = find_all_histories_with_first_prompts(state)\n    html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])\n\n    convert_to_markdown.cache_clear()\n\n    if len(histories) > 0:\n        past_chats_update = gr.update(choices=histories, value=loaded_unique_id or histories[0][1])\n    else:\n        past_chats_update = gr.update(choices=histories)\n\n    return [\n        history,\n        html,\n        name1,\n        name2,\n        picture,\n        greeting,\n        context,\n        past_chats_update\n    ]\n\n\ndef handle_character_picture_change(picture_path):\n    \"\"\"Update or clear cache when character picture changes\"\"\"\n    picture = open_image_safely(picture_path)\n    cache_folder = Path(shared.args.disk_cache_dir)\n    if not cache_folder.exists():\n        cache_folder.mkdir()\n\n    if picture is not None:\n        # Save to cache\n        picture.save(Path(f'{cache_folder}/pfp_character.png'), format='PNG')\n        thumb = make_thumbnail(picture)\n        thumb.save(Path(f'{cache_folder}/pfp_character_thumb.png'), format='PNG')\n    else:\n        # Remove cache files when picture is cleared\n        for cache_file in ['pfp_character.png', 'pfp_character_thumb.png']:\n            cache_path = Path(f'{cache_folder}/{cache_file}')\n            if cache_path.exists():\n                cache_path.unlink()\n\n\ndef handle_mode_change(state):\n    import gradio as gr\n    history, loaded_unique_id = load_latest_history(state)\n    histories = find_all_histories_with_first_prompts(state)\n\n    # Ensure character picture cache exists\n    if state['mode'] in ['chat', 'chat-instruct'] and state['character_menu'] and state['character_menu'] != 'None':\n        generate_pfp_cache(state['character_menu'])\n\n    html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])\n\n    convert_to_markdown.cache_clear()\n\n    if len(histories) > 0:\n        past_chats_update = gr.update(choices=histories, value=loaded_unique_id or histories[0][1])\n    else:\n        past_chats_update = gr.update(choices=histories)\n\n    return [\n        history,\n        html,\n        gr.update(visible=state['mode'] != 'instruct'),\n        gr.update(visible=state['mode'] == 'chat-instruct'),\n        past_chats_update\n    ]\n\n\ndef handle_save_character_click(name2):\n    import gradio as gr\n    return [\n        name2,\n        gr.update(visible=True)\n    ]\n\n\ndef handle_load_template_click(instruction_template):\n    from modules.models_settings import load_instruction_template\n    output = load_instruction_template(instruction_template)\n    return [\n        output,\n        \"Select template to load...\"\n    ]\n\n\ndef handle_save_template_click(instruction_template_str):\n    import gradio as gr\n    contents = generate_instruction_template_yaml(instruction_template_str)\n    root = str(shared.user_data_dir / 'instruction-templates') + '/'\n    return [\n        \"My Template.yaml\",\n        root,\n        contents,\n        root,\n        gr.update(visible=True)\n    ]\n\n\ndef handle_delete_template_click(template):\n    import gradio as gr\n    root = str(shared.user_data_dir / 'instruction-templates') + '/'\n    return [\n        f\"{template}.yaml\",\n        root,\n        root,\n        gr.update(visible=False)\n    ]\n\n\ndef handle_your_picture_change(picture, state):\n    upload_your_profile_picture(picture)\n    html = redraw_html(state['history'], state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'], reset_cache=True)\n\n    return html\n\n\ndef handle_send_instruction_click(state):\n    import gradio as gr\n    state['mode'] = 'instruct'\n    state['history'] = {'internal': [], 'visible': [], 'metadata': {}}\n\n    output = generate_chat_prompt(\"Input\", state)\n\n    if state[\"show_two_notebook_columns\"]:\n        return gr.update(), output, \"\"\n    else:\n        return output, gr.update(), gr.update()\n\n\ndef handle_send_chat_click(state):\n    import gradio as gr\n    output = generate_chat_prompt(\"\", state, _continue=True)\n\n    if state[\"show_two_notebook_columns\"]:\n        return gr.update(), output, \"\"\n    else:\n        return output, gr.update(), gr.update()\n"
  },
  {
    "path": "modules/evaluate.py",
    "content": "import datetime\nfrom pathlib import Path\n\nimport pandas as pd\nfrom tqdm import tqdm\n\nfrom modules import shared\nfrom modules.logging_colors import logger\nfrom modules.models import load_model, unload_model\nfrom modules.models_settings import get_model_metadata, update_model_parameters\nfrom modules.text_generation import encode\n\n\ndef load_past_evaluations():\n    if (shared.user_data_dir / 'logs' / 'evaluations.csv').exists():\n        df = pd.read_csv(shared.user_data_dir / 'logs' / 'evaluations.csv', dtype=str)\n        df['Perplexity'] = pd.to_numeric(df['Perplexity'])\n        return df\n    else:\n        return pd.DataFrame(columns=['Model', 'LoRAs', 'Dataset', 'Perplexity', 'stride', 'max_length', 'Date', 'Comment'])\n\n\npast_evaluations = load_past_evaluations()\n\n\ndef save_past_evaluations(df):\n    global past_evaluations\n    past_evaluations = df\n    filepath = shared.user_data_dir / 'logs' / 'evaluations.csv'\n    filepath.parent.mkdir(parents=True, exist_ok=True)\n    df.to_csv(filepath, index=False)\n\n\ndef calculate_perplexity(models, input_dataset, stride, _max_length):\n    '''\n    Based on:\n    https://huggingface.co/docs/transformers/perplexity#calculating-ppl-with-fixedlength-models\n    '''\n\n    import torch\n    from datasets import load_dataset\n\n    from modules.torch_utils import clear_torch_cache\n\n    if shared.args.loader == \"llama.cpp\":\n        logger.error(\"Perplexity evaluation is not implemented for the llama.cpp loader.\")\n        raise ValueError\n\n    if not shared.args.no_use_fast:\n        logger.warning(\"--no_use_fast is not set. If tokenizing the input dataset takes a long time, try reloading the model with that option set/checked.\")\n\n    global past_evaluations\n    cumulative_log = ''\n    cumulative_log += \"Loading the input dataset...\\n\\n\"\n    yield cumulative_log\n\n    # Copied from https://github.com/qwopqwop200/GPTQ-for-LLaMa/blob/triton/utils/datautils.py\n    if input_dataset == 'wikitext':\n        data = load_dataset('wikitext', 'wikitext-2-raw-v1', split='test')\n        text = \"\\n\\n\".join(data['text'])\n    elif input_dataset == 'ptb':\n        data = load_dataset('ptb_text_only', 'penn_treebank', split='validation')\n        text = \"\\n\\n\".join(data['sentence'])\n    elif input_dataset == 'ptb_new':\n        data = load_dataset('ptb_text_only', 'penn_treebank', split='test')\n        text = \" \".join(data['sentence'])\n    else:\n        with open(shared.user_data_dir / 'training' / 'datasets' / f'{input_dataset}.txt', 'r', encoding='utf-8') as f:\n            text = f.read()\n\n    for model in models:\n        if is_in_past_evaluations(model, input_dataset, stride, _max_length):\n            cumulative_log += f\"`{model}` has already been tested. Ignoring.\\n\\n\"\n            yield cumulative_log\n            continue\n\n        if model != 'current model':\n            try:\n                yield cumulative_log + f\"Loading `{model}`...\\n\\n\"\n                model_settings = get_model_metadata(model)\n                shared.settings.update({k: v for k, v in model_settings.items() if k in shared.settings})  # hijacking the interface defaults\n                update_model_parameters(model_settings)  # hijacking the command-line arguments\n                unload_model()\n                shared.model, shared.tokenizer = load_model(model)\n            except Exception:\n                cumulative_log += f\"Failed to load `{model}`. Moving on.\\n\\n\"\n                yield cumulative_log\n                continue\n\n        cumulative_log += f\"Processing `{shared.model_name}`...\\n\\n\"\n        yield cumulative_log + \"Tokenizing the input dataset...\\n\\n\"\n        encodings = encode(text, add_special_tokens=False)\n        seq_len = encodings.shape[1]\n        if _max_length:\n            max_length = _max_length\n        elif hasattr(shared.model.config, 'max_position_embeddings'):\n            max_length = shared.model.config.max_position_embeddings\n        else:\n            max_length = 2048\n\n        nlls = []\n        prev_end_loc = 0\n        for begin_loc in tqdm(range(0, seq_len, stride)):\n            yield cumulative_log + f\"Evaluating... {100*begin_loc/seq_len:.2f}%\"\n            end_loc = min(begin_loc + max_length, seq_len)\n            trg_len = end_loc - prev_end_loc  # may be different from stride on last loop\n            input_ids = encodings[:, begin_loc:end_loc]\n            target_ids = input_ids.clone()\n            target_ids[:, :-trg_len] = -100\n            clear_torch_cache()\n            with torch.no_grad():\n                outputs = shared.model(input_ids=input_ids, labels=target_ids)\n\n                # loss is calculated using CrossEntropyLoss which averages over valid labels\n                # N.B. the model only calculates loss over trg_len - 1 labels, because it internally shifts the labels\n                # to the left by 1.\n                neg_log_likelihood = outputs.loss\n\n            nlls.append(neg_log_likelihood)\n            prev_end_loc = end_loc\n            if end_loc == seq_len:\n                break\n\n        ppl = torch.exp(torch.stack(nlls).mean())\n\n        add_entry_to_past_evaluations(float(ppl), shared.model_name, input_dataset, stride, _max_length)\n        save_past_evaluations(past_evaluations)\n\n        message = f\"The perplexity for `{shared.model_name}` is: {float(ppl)}\"\n        logger.info(message)\n\n        cumulative_log += f\"{message}\\n\\n\"\n        yield cumulative_log\n\n\ndef add_entry_to_past_evaluations(perplexity, model, dataset, stride, max_length):\n    global past_evaluations\n    entry = {\n        'Model': model,\n        'LoRAs': ', '.join(shared.lora_names) or '-',\n        'Dataset': dataset,\n        'Perplexity': perplexity,\n        'stride': str(stride),\n        'max_length': str(max_length),\n        'Date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),\n        'Comment': ''\n    }\n    past_evaluations = pd.concat([past_evaluations, pd.DataFrame([entry])], ignore_index=True)\n\n\ndef is_in_past_evaluations(model, dataset, stride, max_length):\n    entries = past_evaluations[(past_evaluations['Model'] == model) &\n                               (past_evaluations['Dataset'] == dataset) &\n                               (past_evaluations['max_length'] == str(max_length)) &\n                               (past_evaluations['stride'] == str(stride))]\n\n    if entries.shape[0] > 0:\n        return True\n    else:\n        return False\n\n\ndef generate_markdown_table():\n    sorted_df = past_evaluations.sort_values(by=['Dataset', 'stride', 'Perplexity', 'Date'])\n    return sorted_df\n"
  },
  {
    "path": "modules/exllamav3.py",
    "content": "import math\nimport queue\nimport threading\nimport traceback\nfrom pathlib import Path\nfrom typing import Any, List, Tuple\n\nimport torch\n\nfrom exllamav3 import Cache, Config, Generator, Model, Tokenizer\nfrom exllamav3.cache import CacheLayer_fp16, CacheLayer_quant\nfrom exllamav3.generator import Job\nfrom exllamav3.generator.filter import Filter\nfrom exllamav3.generator.sampler import (\n    CustomSampler,\n    SS_AdaptiveP,\n    SS_Argmax,\n    SS_MinP,\n    SS_PresFreqP,\n    SS_RepP,\n    SS_Sample,\n    SS_Temperature,\n    SS_TopK,\n    SS_TopP\n)\nfrom modules import shared\nfrom modules.image_utils import (\n    convert_image_attachments_to_pil,\n    convert_openai_messages_to_images\n)\nfrom modules.logging_colors import logger\nfrom modules.text_generation import get_max_prompt_length\n\ntry:\n    import flash_attn\nexcept Exception:\n    logger.warning('Failed to load flash-attention due to the following error:\\n')\n    traceback.print_exc()\n\n\nclass LogitBiasFilter(Filter):\n    \"\"\"Filter subclass that applies a static additive logit bias mask.\"\"\"\n\n    def __init__(self, tokenizer, logit_bias_dict):\n        super().__init__(tokenizer=tokenizer, trigger_token=None, prefix_str=None, eos_after_completed=False)\n        self.logit_bias_dict = logit_bias_dict\n        self._mask = None\n\n    def reset(self): pass\n    def accept_token(self, token): pass\n    def is_completed(self): return False\n    def use_background_worker(self): return False\n\n    def get_next_logit_mask(self):\n        if self._mask is None:\n            self._mask = torch.zeros((1, self.vocab_size), dtype=self.logits_dtype)\n            for token_id_str, bias in self.logit_bias_dict.items():\n                token_id = int(token_id_str)\n                if 0 <= token_id < self.vocab_size:\n                    self._mask[0, token_id] = bias\n        return self._mask\n\n\nclass ConcurrentGenerator:\n    def __init__(self, generator):\n        self.generator = generator\n        self.lock = threading.Lock()\n        self.job_queues = {}\n        self.active = True\n        self.has_jobs = threading.Event()\n        self.thread = threading.Thread(target=self._iterate_loop, daemon=True)\n        self.thread.start()\n\n    def _iterate_loop(self):\n        while self.active:\n            self.has_jobs.wait(timeout=0.5)\n            with self.lock:\n                if not self.job_queues:\n                    self.has_jobs.clear()\n                    continue\n                try:\n                    results = self.generator.iterate()\n                except Exception:\n                    logger.error(\"Exception in ConcurrentGenerator iterate loop:\\n\" + traceback.format_exc())\n                    for q in self.job_queues.values():\n                        q.put(None)\n                    self.job_queues.clear()\n                    self.generator.clear_queue()\n                    self.has_jobs.clear()\n                    continue\n            for result in results:\n                job = result[\"job\"]\n                q = self.job_queues.get(job)\n                if q:\n                    q.put(result)\n                    if result.get(\"eos\"):\n                        self.job_queues.pop(job, None)\n            if not self.job_queues:\n                self.has_jobs.clear()\n\n    def submit(self, job) -> queue.Queue:\n        q = queue.Queue()\n        with self.lock:\n            self.job_queues[job] = q\n            self.generator.enqueue(job)\n        self.has_jobs.set()\n        return q\n\n    def cancel(self, job):\n        with self.lock:\n            if job in self.job_queues:\n                self.generator.cancel(job)\n                self.job_queues[job].put(None)\n                del self.job_queues[job]\n\n    def stop(self):\n        self.active = False\n        self.has_jobs.set()\n        self.thread.join(timeout=5)\n\n\nclass Exllamav3Model:\n    def __init__(self):\n        pass\n\n    @property\n    def device(self) -> torch.device:\n        return torch.device(0)\n\n    @classmethod\n    def from_pretrained(cls, path_to_model):\n        path_to_model = Path(f'{shared.args.model_dir}') / Path(path_to_model)\n\n        # Reset global MMTokenAllocator to prevent token ID corruption when switching models\n        from exllamav3.tokenizer.mm_embedding import (\n            FIRST_MM_EMBEDDING_INDEX,\n            global_allocator\n        )\n        global_allocator.next_token_index = FIRST_MM_EMBEDDING_INDEX\n\n        config = Config.from_directory(str(path_to_model))\n        model = Model.from_config(config)\n\n        # Calculate the closest multiple of 256 at or above the chosen value\n        max_tokens = shared.args.ctx_size\n        if max_tokens % 256 != 0:\n            adjusted_tokens = ((max_tokens // 256) + 1) * 256\n            logger.warning(f\"max_num_tokens must be a multiple of 256. Adjusting from {max_tokens} to {adjusted_tokens}\")\n            max_tokens = adjusted_tokens\n\n        # Parse cache type\n        cache_type = shared.args.cache_type.lower()\n        cache_kwargs = {}\n        if cache_type == 'fp16':\n            layer_type = CacheLayer_fp16\n        elif cache_type.startswith('q'):\n            layer_type = CacheLayer_quant\n            if '_' in cache_type:\n                # Different bits for k and v (e.g., q4_q8)\n                k_part, v_part = cache_type.split('_')\n                k_bits = int(k_part[1:])\n                v_bits = int(v_part[1:])\n            else:\n                # Same bits for k and v (e.g., q4)\n                k_bits = v_bits = int(cache_type[1:])\n\n            # Validate bit ranges\n            if not (2 <= k_bits <= 8 and 2 <= v_bits <= 8):\n                logger.warning(f\"Invalid quantization bits: k_bits={k_bits}, v_bits={v_bits}. Must be between 2 and 8. Falling back to fp16.\")\n                layer_type = CacheLayer_fp16\n            else:\n                cache_kwargs = {'k_bits': k_bits, 'v_bits': v_bits}\n        else:\n            logger.warning(f\"Unrecognized cache type: {cache_type}. Falling back to fp16.\")\n            layer_type = CacheLayer_fp16\n\n        cache = Cache(model, max_num_tokens=max_tokens, layer_type=layer_type, **cache_kwargs)\n\n        load_params = {'progressbar': True}\n        split = None\n        if shared.args.gpu_split:\n            split = [float(alloc) for alloc in shared.args.gpu_split.split(\",\")]\n            load_params['use_per_device'] = split\n\n        # Tensor-parallelism\n        if shared.args.enable_tp:\n            load_params['tensor_p'] = True\n            load_params['tp_backend'] = shared.args.tp_backend\n\n        # Load vision and draft before the main model so autosplit\n        # accounts for their VRAM usage.\n\n        # Load vision model component (ExLlamaV3 native)\n        vision_model = None\n        if \"vision_config\" in config.config_dict:\n            logger.info(\"Vision component detected in model config. Attempting to load...\")\n            try:\n                vision_model = Model.from_config(config, component=\"vision\")\n                vision_model.load(progressbar=True)\n                logger.info(\"Vision model loaded successfully.\")\n            except Exception as e:\n                logger.warning(f\"Vision model loading failed (multimodal disabled): {e}\")\n        else:\n            logger.info(\"No vision component in model config. Skipping multimodal setup.\")\n\n        # Initialize draft model for speculative decoding\n        draft_model = None\n        draft_cache = None\n        if shared.args.model_draft and shared.args.model_draft.lower() not in [\"\", \"none\"]:\n            logger.info(f\"Loading draft model for speculative decoding: {shared.args.model_draft}\")\n\n            draft_path = Path(shared.args.model_draft)\n            if not draft_path.is_dir():\n                draft_path = Path(f'{shared.args.model_dir}') / Path(shared.args.model_draft)\n\n            if not draft_path.is_dir():\n                logger.warning(f\"Draft model not found at {draft_path}, speculative decoding disabled.\")\n            else:\n                draft_config = Config.from_directory(str(draft_path))\n                draft_model = Model.from_config(draft_config)\n                draft_cache = Cache(draft_model, max_num_tokens=max_tokens, layer_type=layer_type, **cache_kwargs)\n\n                draft_load_params = {'progressbar': True}\n                if split:\n                    draft_load_params['use_per_device'] = split\n\n                draft_model.load(**draft_load_params)\n                logger.info(f\"Draft model loaded successfully. Max speculative tokens: {shared.args.draft_max}\")\n\n        # Load main model last\n        model.load(**load_params)\n        tokenizer = Tokenizer.from_config(config)\n\n        generator = Generator(\n            model=model,\n            cache=cache,\n            tokenizer=tokenizer,\n            draft_model=draft_model,\n            draft_cache=draft_cache,\n            num_draft_tokens=shared.args.draft_max if draft_model is not None else 0,\n        )\n\n        result = cls()\n        result.model = model\n        result.cache = cache\n        result.tokenizer = tokenizer\n        result.generator = generator\n        result.parallel_generator = ConcurrentGenerator(generator)\n        result.config = config\n        result.max_tokens = max_tokens\n        result.vision_model = vision_model\n        result.draft_model = draft_model\n        result.draft_cache = draft_cache\n\n        return result, result\n\n    def is_multimodal(self) -> bool:\n        \"\"\"Check if this model supports multimodal input.\"\"\"\n        return hasattr(self, 'vision_model') and self.vision_model is not None\n\n    def _process_images_for_generation(self, prompt: str, state: dict) -> Tuple[str, List[Any]]:\n        \"\"\"\n        Process all possible image inputs and return modified prompt + embeddings.\n        Returns: (processed_prompt, image_embeddings)\n        \"\"\"\n        # Collect images from various sources using shared utilities\n        pil_images = []\n\n        # From webui image_attachments (preferred format)\n        if 'image_attachments' in state and state['image_attachments']:\n            pil_images.extend(convert_image_attachments_to_pil(state['image_attachments']))\n        # From OpenAI API raw_images\n        elif 'raw_images' in state and state['raw_images']:\n            pil_images.extend(state['raw_images'])\n        # From OpenAI API messages format\n        elif 'messages' in state and state['messages']:\n            pil_images.extend(convert_openai_messages_to_images(state['messages']))\n\n        if not pil_images:\n            return prompt, []\n\n        # ExLlamaV3-specific: Generate embeddings\n        try:\n            # Use pre-computed embeddings if available (proper MMEmbedding lifetime)\n            if 'image_embeddings' in state and state['image_embeddings']:\n                # Use existing embeddings - this preserves MMEmbedding lifetime\n                image_embeddings = state['image_embeddings']\n            else:\n                # Do not reset the cache/allocator index; it causes token ID conflicts during generation.\n                logger.info(f\"Processing {len(pil_images)} image(s) with ExLlamaV3 vision model\")\n                image_embeddings = [\n                    self.vision_model.get_image_embeddings(tokenizer=self.tokenizer, image=img)\n                    for img in pil_images\n                ]\n\n            # ExLlamaV3-specific: Handle prompt processing with placeholders\n            placeholders = [ie.text_alias for ie in image_embeddings]\n\n            if '<__media__>' in prompt:\n                # Web chat: Replace <__media__> placeholders\n                for alias in placeholders:\n                    prompt = prompt.replace('<__media__>', alias, 1)\n                logger.info(f\"Replaced {len(placeholders)} <__media__> placeholder(s)\")\n            else:\n                # API: Prepend embedding aliases\n                combined_placeholders = \"\\n\".join(placeholders)\n                prompt = combined_placeholders + \"\\n\" + prompt\n                logger.info(f\"Prepended {len(placeholders)} embedding(s) to prompt\")\n\n            return prompt, image_embeddings\n\n        except Exception as e:\n            logger.error(f\"Failed to process images: {e}\")\n            return prompt, []\n\n    def generate_with_streaming(self, prompt, state):\n        \"\"\"\n        Generate text with streaming using native ExLlamaV3 API\n        \"\"\"\n\n        if shared.is_multimodal:\n            # Process images and modify prompt (ExLlamaV3-specific)\n            prompt, image_embeddings = self._process_images_for_generation(prompt, state)\n        else:\n            image_embeddings = []\n\n        # Greedy decoding is a special case\n        if state['temperature'] == 0:\n            sampler = CustomSampler([SS_Argmax()])\n        else:\n            # 1. Create a list of all active, unordered samplers\n            unordered_samplers = []\n\n            # Penalties\n            penalty_range = state['repetition_penalty_range']\n            if penalty_range <= 0:\n                penalty_range = int(10e7)  # Use large number for \"full context\"\n            rep_decay = 0  # Not a configurable parameter\n\n            # Add penalty samplers if they are active\n            if state['repetition_penalty'] != 1.0:\n                unordered_samplers.append(SS_RepP(state['repetition_penalty'], penalty_range, rep_decay))\n            if state['presence_penalty'] != 0.0 or state['frequency_penalty'] != 0.0:\n                unordered_samplers.append(SS_PresFreqP(state['presence_penalty'], state['frequency_penalty'], penalty_range, rep_decay))\n\n            # Standard samplers\n            if state['top_k'] > 0:\n                unordered_samplers.append(SS_TopK(state['top_k']))\n            if state['top_p'] < 1.0:\n                unordered_samplers.append(SS_TopP(state['top_p']))\n            if state['min_p'] > 0.0:\n                unordered_samplers.append(SS_MinP(state['min_p']))\n\n            # Temperature (SS_NoOp is returned if temp is 1.0)\n            unordered_samplers.append(SS_Temperature(state['temperature']))\n\n            # 2. Define the mapping from class names to the priority list keys\n            class_name_to_nickname = {\n                'SS_RepP': 'repetition_penalty',\n                'SS_PresFreqP': 'presence_frequency_penalty',\n                'SS_TopK': 'top_k',\n                'SS_TopP': 'top_p',\n                'SS_MinP': 'min_p',\n                'SS_Temperature': 'temperature',\n            }\n\n            # 3. Get the priority list and handle temperature_last\n            default_priority = ['repetition_penalty', 'presence_frequency_penalty', 'top_k', 'top_p', 'min_p', 'temperature']\n            sampler_priority = list(state.get('sampler_priority') or default_priority)\n\n            if state['temperature_last'] and 'temperature' in sampler_priority:\n                sampler_priority.append(sampler_priority.pop(sampler_priority.index('temperature')))\n\n            # The preset system uses separate 'presence_penalty' and\n            # 'frequency_penalty', but ExLlamaV3 has a single combined\n            # SS_PresFreqP sampler. Normalize to the combined name.\n            sampler_priority = ['presence_frequency_penalty' if x in ('presence_penalty', 'frequency_penalty') else x for x in sampler_priority]\n\n            # 4. Sort the unordered list based on the priority list\n            def custom_sort_key(sampler_obj):\n                class_name = sampler_obj.__class__.__name__\n                nickname = class_name_to_nickname.get(class_name)\n                if nickname and nickname in sampler_priority:\n                    return sampler_priority.index(nickname)\n                return -1\n\n            ordered_samplers = sorted(unordered_samplers, key=custom_sort_key)\n\n            # 5. Add the final sampling stage and build the sampler\n            if state.get('adaptive_target', 0) > 0:\n                ordered_samplers.append(SS_AdaptiveP(state['adaptive_target'], state['adaptive_decay']))\n            else:\n                ordered_samplers.append(SS_Sample())\n\n            sampler = CustomSampler(ordered_samplers)\n\n        # Encode prompt with embeddings (ExLlamaV3-specific)\n        input_ids = self.tokenizer.encode(\n            prompt,\n            add_bos=state['add_bos_token'],\n            encode_special_tokens=True,\n            embeddings=image_embeddings,\n        )\n\n        input_ids = input_ids[:, -get_max_prompt_length(state):]\n\n        self._last_prompt_token_count = input_ids.shape[-1]\n\n        # Determine max_new_tokens\n        if state['auto_max_new_tokens']:\n            max_new_tokens = state['truncation_length'] - self._last_prompt_token_count\n        else:\n            max_new_tokens = state['max_new_tokens']\n\n        # Use full EOS token list from config (may contain multiple IDs)\n        stop_conditions = []\n        if not state['ban_eos_token']:\n            for eos_id in self.config.eos_token_id_list:\n                if eos_id is not None:\n                    stop_conditions.append(eos_id)\n\n        # Build filters for logit_bias (OpenAI API)\n        filters = []\n        logit_bias = state.get('logit_bias')\n        if logit_bias:\n            filters.append(LogitBiasFilter(self.tokenizer, logit_bias))\n\n        # Logprobs support (OpenAI API)\n        logprobs = state.get('logprobs', 0) or 0\n        return_top_tokens = logprobs if logprobs > 0 else 0\n\n        seed = state.get('seed', -1)\n        job = Job(\n            input_ids=input_ids,\n            max_new_tokens=max_new_tokens,\n            decode_special_tokens=not state['skip_special_tokens'],\n            embeddings=image_embeddings if image_embeddings else None,\n            sampler=sampler,\n            seed=seed if seed >= 0 else None,\n            stop_conditions=stop_conditions if stop_conditions else None,\n            filters=filters if filters else None,\n            return_top_tokens=return_top_tokens,\n            return_probs=return_top_tokens > 0,\n        )\n\n        # Stream generation\n        response_text = \"\"\n        stop_event = state.get('stop_event')\n        self.last_completion_probabilities = []\n\n        result_queue = self.parallel_generator.submit(job)\n        try:\n            while True:\n                if shared.stop_everything or (stop_event and stop_event.is_set()):\n                    break\n                try:\n                    result = result_queue.get(timeout=0.1)\n                except queue.Empty:\n                    continue\n                if result is None or result.get(\"eos\"):\n                    # Capture logprobs from the final eos result too\n                    if result is not None and return_top_tokens > 0:\n                        self._capture_logprobs(result)\n                    break\n                chunk = result.get(\"text\", \"\")\n\n                # Capture logprobs from streaming results\n                if return_top_tokens > 0:\n                    self._capture_logprobs(result)\n\n                if chunk:\n                    response_text += chunk\n                    yield response_text\n        finally:\n            self.parallel_generator.cancel(job)\n\n    def _capture_logprobs(self, result):\n        \"\"\"Convert ExLlamav3 top-k token data to the shared logprobs format.\"\"\"\n        top_k_tokens = result.get(\"top_k_tokens\")\n        top_k_probs = result.get(\"top_k_probs\")\n        if top_k_tokens is None or top_k_probs is None:\n            return\n\n        id_to_piece = self.tokenizer.get_id_to_piece_list(True)\n        # top_k_tokens shape: (batch, seq_len, k), top_k_probs same\n        for seq_idx in range(top_k_tokens.shape[1]):\n            entry = {\"top_logprobs\": []}\n            for k_idx in range(top_k_tokens.shape[2]):\n                token_id = top_k_tokens[0, seq_idx, k_idx].item()\n                prob = top_k_probs[0, seq_idx, k_idx].item()\n                token_str = id_to_piece[token_id] if token_id < len(id_to_piece) else f\"<{token_id}>\"\n                logprob = math.log(prob) if prob > 0 else float(\"-inf\")\n                entry[\"top_logprobs\"].append({\"token\": token_str, \"logprob\": logprob})\n            self.last_completion_probabilities.append(entry)\n\n    def generate(self, prompt, state):\n        output = \"\"\n        for chunk in self.generate_with_streaming(prompt, state):\n            output = chunk\n\n        return output\n\n    def get_logits(self, token_ids, **kwargs):\n        \"\"\"\n        Process a batch of token_ids and return the logits for the last token.\n        This will reset and overwrite the model's cache.\n        \"\"\"\n        # Initialize a single params dictionary that will be updated in-place\n        params = {\n            \"cache\": self.cache,\n            \"reconstruct\": False,\n            \"attn_mode\": \"flash_attn\",\n            \"batch_shape\": (1, self.max_tokens),\n            \"past_len\": 0\n        }\n        params.update(kwargs)\n\n        # Process prefix tokens to fill the cache and generate recurrent state\n        if token_ids.shape[-1] > 1:\n            prefix_ids = token_ids[:, :-1]\n\n            # This forward call updates the 'params' dict with the recurrent state\n            self.model.forward(\n                input_ids=prefix_ids,\n                params=params\n            )\n\n            # Update past_len for the next call\n            params[\"past_len\"] = prefix_ids.shape[-1]\n\n        # Process the last token, now using the state-filled 'params' dict\n        last_token_ids = token_ids[:, -1:]\n        logits = self.model.forward(\n            input_ids=last_token_ids,\n            params=params\n        )\n\n        return logits.float().cpu()\n\n    def encode(self, string, **kwargs):\n        add_bos = kwargs.pop('add_bos', True)\n        return self.tokenizer.encode(string, add_bos=add_bos, **kwargs)\n\n    def decode(self, ids, **kwargs):\n        if isinstance(ids, torch.Tensor) and ids.dim() == 0:\n            ids = ids.view(1)\n\n        return self.tokenizer.decode(ids, **kwargs)\n\n    @property\n    def last_prompt_token_count(self):\n        return getattr(self, '_last_prompt_token_count', 0)\n\n    def unload(self):\n        logger.info(\"Unloading ExLlamaV3 model components...\")\n\n        if hasattr(self, 'parallel_generator') and self.parallel_generator is not None:\n            try:\n                self.parallel_generator.stop()\n            except Exception as e:\n                logger.warning(f\"Error stopping parallel generator: {e}\")\n            self.parallel_generator = None\n\n        if hasattr(self, 'vision_model') and self.vision_model is not None:\n            try:\n                del self.vision_model\n            except Exception as e:\n                logger.warning(f\"Error unloading vision model: {e}\")\n            self.vision_model = None\n\n        if hasattr(self, 'draft_model') and self.draft_model is not None:\n            try:\n                self.draft_model.unload()\n                del self.draft_model\n            except Exception as e:\n                logger.warning(f\"Error unloading draft model: {e}\")\n            self.draft_model = None\n\n        if hasattr(self, 'draft_cache') and self.draft_cache is not None:\n            self.draft_cache = None\n\n        if hasattr(self, 'model') and self.model is not None:\n            try:\n                self.model.unload()\n                del self.model\n            except Exception as e:\n                logger.warning(f\"Error unloading main model: {e}\")\n\n            self.model = None\n\n        if hasattr(self, 'cache') and self.cache is not None:\n            self.cache = None\n\n        if hasattr(self, 'generator') and self.generator is not None:\n            self.generator = None\n\n        if hasattr(self, 'tokenizer') and self.tokenizer is not None:\n            self.tokenizer = None\n"
  },
  {
    "path": "modules/exllamav3_hf.py",
    "content": "import os\nimport traceback\nfrom pathlib import Path\nfrom typing import Any, Dict, Optional, Union\n\nimport torch\nfrom torch.nn import CrossEntropyLoss\nfrom transformers import (\n    GenerationConfig,\n    GenerationMixin,\n    PretrainedConfig,\n    PreTrainedModel\n)\nfrom transformers.modeling_outputs import CausalLMOutputWithPast\n\nfrom exllamav3 import Cache, Config, Model\nfrom exllamav3.cache import CacheLayer_fp16, CacheLayer_quant\nfrom modules import shared\nfrom modules.logging_colors import logger\n\ntry:\n    import flash_attn\nexcept Exception:\n    logger.warning('Failed to load flash-attention due to the following error:\\n')\n    traceback.print_exc()\n\n\nclass Exllamav3HF(PreTrainedModel, GenerationMixin):\n    def __init__(self, model_dir):\n        hf_config = PretrainedConfig.from_pretrained(model_dir)\n        super().__init__(hf_config)\n\n        exl3_config = Config.from_directory(model_dir)\n\n        self.generation_config = GenerationConfig()\n        self.ex_model = Model.from_config(exl3_config)\n\n        # Calculate the closest multiple of 256 at or above the chosen value\n        max_tokens = shared.args.ctx_size\n        if max_tokens % 256 != 0:\n            adjusted_tokens = ((max_tokens // 256) + 1) * 256\n            logger.warning(f\"max_num_tokens must be a multiple of 256. Adjusting from {max_tokens} to {adjusted_tokens}\")\n            max_tokens = adjusted_tokens\n\n        # Parse cache type\n        cache_type = shared.args.cache_type.lower()\n        cache_kwargs = {}\n        if cache_type == 'fp16':\n            layer_type = CacheLayer_fp16\n        elif cache_type.startswith('q'):\n            layer_type = CacheLayer_quant\n            if '_' in cache_type:\n                # Different bits for k and v (e.g., q4_q8)\n                k_part, v_part = cache_type.split('_')\n                k_bits = int(k_part[1:])\n                v_bits = int(v_part[1:])\n            else:\n                # Same bits for k and v (e.g., q4)\n                k_bits = v_bits = int(cache_type[1:])\n\n            # Validate bit ranges\n            if not (2 <= k_bits <= 8 and 2 <= v_bits <= 8):\n                logger.warning(f\"Invalid quantization bits: k_bits={k_bits}, v_bits={v_bits}. Must be between 2 and 8. Falling back to fp16.\")\n                layer_type = CacheLayer_fp16\n            else:\n                cache_kwargs = {'k_bits': k_bits, 'v_bits': v_bits}\n        else:\n            logger.warning(f\"Unrecognized cache type: {cache_type}. Falling back to fp16.\")\n            layer_type = CacheLayer_fp16\n\n        self.ex_cache = Cache(self.ex_model, max_num_tokens=max_tokens, layer_type=layer_type, **cache_kwargs)\n\n        # Create load parameters dictionary\n        load_params = {'progressbar': True}\n        if shared.args.gpu_split:\n            split = [float(alloc) for alloc in shared.args.gpu_split.split(\",\")]\n            load_params['use_per_device'] = split\n\n        # Tensor-parallelism\n        if shared.args.enable_tp:\n            load_params['tensor_p'] = True\n            load_params['tp_backend'] = shared.args.tp_backend\n\n        self.ex_model.load(**load_params)\n        self.past_seq = None\n        self.max_tokens = max_tokens\n        self.layer_type = layer_type\n        self.cache_kwargs = cache_kwargs\n\n        if shared.args.cfg_cache:\n            self.ex_cache_negative = Cache(self.ex_model, max_num_tokens=max_tokens, layer_type=layer_type, **cache_kwargs)\n            self.past_seq_negative = None\n\n    def _validate_model_class(self):\n        pass\n\n    def _validate_model_kwargs(self, model_kwargs: Dict[str, Any]):\n        pass\n\n    def prepare_inputs_for_generation(self, input_ids, **kwargs):\n        return {'input_ids': input_ids, **kwargs}\n\n    @property\n    def device(self) -> torch.device:\n        return torch.device(0)\n\n    def __call__(self, *args, **kwargs):\n        use_cache = kwargs.get('use_cache', True)\n        labels = kwargs.get('labels', None)\n        past_key_values = kwargs.get('past_key_values', None)\n\n        if len(args) > 0:\n            if not shared.args.cfg_cache:\n                logger.error(\"Please enable the cfg-cache option to use CFG with ExLlamav3_HF.\")\n                return\n\n            input_ids = args[0]\n            is_negative = True\n            past_seq = self.past_seq_negative\n            ex_cache = self.ex_cache_negative\n        else:\n            input_ids = kwargs['input_ids']\n            is_negative = False\n            past_seq = self.past_seq\n            ex_cache = self.ex_cache\n\n        seq = input_ids[0].tolist()\n        if is_negative and past_key_values is not None:\n            seq = past_key_values + seq\n\n        seq_tensor = torch.tensor(seq)\n        reset = True\n\n        # Maximum number of tokens to process in a single forward pass\n        max_chunk_size = 2048\n\n        # Make the forward call\n        if labels is None:\n            if past_seq is not None:\n                min_length = min(past_seq.shape[0], seq_tensor.shape[0])\n                indices = torch.nonzero(~torch.eq(past_seq[:min_length], seq_tensor[:min_length]))\n                if len(indices) > 0:\n                    longest_prefix = indices[0].item()\n                else:\n                    longest_prefix = min_length\n\n                if longest_prefix > 0:\n                    reset = False\n                    current_len = longest_prefix\n                    remaining_tokens = len(seq_tensor) - longest_prefix - 1\n\n                    if remaining_tokens > 0:\n                        # Process tokens from longest_prefix to second-to-last token\n                        tokens_to_process = seq_tensor[longest_prefix:-1]\n\n                        # Use prefill() to fill the cache without computing logits\n                        for i in range(0, tokens_to_process.shape[0], max_chunk_size):\n                            chunk = tokens_to_process[i:i + max_chunk_size]\n                            self.ex_model.prefill(\n                                input_ids=chunk.view(1, -1),\n                                params={\n                                    \"attn_mode\": \"flash_attn\",\n                                    \"cache\": ex_cache,\n                                    \"past_len\": longest_prefix + i,\n                                    \"batch_shape\": (1, self.max_tokens),\n                                }\n                            )\n\n                        current_len = longest_prefix + remaining_tokens\n\n            if reset:\n                if len(seq_tensor) > 1:\n                    # Process all tokens except the last one\n                    tokens_to_process = seq_tensor[:-1]\n\n                    # Use prefill() to fill the cache without computing logits\n                    current_len = 0\n                    for i in range(0, tokens_to_process.shape[0], max_chunk_size):\n                        chunk = tokens_to_process[i:i + max_chunk_size]\n                        self.ex_model.prefill(\n                            input_ids=chunk.view(1, -1),\n                            params={\n                                \"attn_mode\": \"flash_attn\",\n                                \"cache\": ex_cache,\n                                \"past_len\": current_len,\n                                \"batch_shape\": (1, self.max_tokens),\n                            }\n                        )\n                        current_len += chunk.shape[0]\n                else:\n                    current_len = 0\n\n            # Process the last token and get logits\n            logits = self.ex_model.forward(\n                input_ids=seq_tensor[-1:].view(1, -1),\n                params={\n                    \"attn_mode\": \"flash_attn\",\n                    \"cache\": ex_cache,\n                    \"past_len\": current_len,\n                    \"batch_shape\": (1, self.max_tokens),\n                }\n            ).to(input_ids.device).float()\n        else:\n            # Labels path: use cache for cross-chunk attention.\n            tokens_to_process = seq_tensor\n            all_logits = None\n            current_len = 0\n\n            for i in range(0, tokens_to_process.shape[0], max_chunk_size):\n                chunk = tokens_to_process[i:i + max_chunk_size]\n                chunk_logits = self.ex_model.forward(\n                    input_ids=chunk.view(1, -1),\n                    params={\n                        \"attn_mode\": \"flash_attn\",\n                        \"cache\": ex_cache,\n                        \"past_len\": current_len,\n                        \"batch_shape\": (1, self.max_tokens),\n                    }\n                ).float()\n                current_len += chunk.shape[0]\n\n                if all_logits is None:\n                    all_logits = chunk_logits\n                else:\n                    all_logits = torch.cat([all_logits, chunk_logits], dim=1)\n\n            logits = all_logits\n\n        if is_negative:\n            self.past_seq_negative = seq_tensor\n        else:\n            self.past_seq = seq_tensor\n\n        if torch.cuda.is_available():\n            torch.cuda.synchronize()\n\n        loss = None\n        if labels is not None:\n            # Shift so that tokens < n predict n\n            shift_logits = logits[..., :-1, :].contiguous()\n            shift_labels = labels[..., 1:].contiguous()\n            # Flatten the tokens\n            loss_fct = CrossEntropyLoss()\n            shift_logits = shift_logits.view(-1, logits.shape[-1])\n            shift_labels = shift_labels.view(-1)\n            # Enable model parallelism\n            shift_labels = shift_labels.to(shift_logits.device)\n            loss = loss_fct(shift_logits, shift_labels)\n\n        return CausalLMOutputWithPast(logits=logits, past_key_values=seq if use_cache else None, loss=loss)\n\n    @classmethod\n    def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], *model_args, **kwargs):\n        assert len(model_args) == 0 and len(kwargs) == 0, \"extra args is currently not supported\"\n        if isinstance(pretrained_model_name_or_path, str):\n            pretrained_model_name_or_path = Path(pretrained_model_name_or_path)\n\n        pretrained_model_name_or_path = Path(f'{shared.args.model_dir}') / Path(pretrained_model_name_or_path)\n\n        return Exllamav3HF(pretrained_model_name_or_path)\n\n    def unload(self):\n        \"\"\"Properly unload the ExllamaV3 model and free GPU memory.\"\"\"\n        if hasattr(self, 'ex_model') and self.ex_model is not None:\n            self.ex_model.unload()\n            self.ex_model = None\n\n        if hasattr(self, 'ex_cache') and self.ex_cache is not None:\n            self.ex_cache = None\n\n        # Clean up any additional ExllamaV3 resources\n        if hasattr(self, 'past_seq'):\n            self.past_seq = None\n        if hasattr(self, 'past_seq_negative'):\n            self.past_seq_negative = None\n        if hasattr(self, 'ex_cache_negative'):\n            self.ex_cache_negative = None\n"
  },
  {
    "path": "modules/extensions.py",
    "content": "import importlib\nimport importlib.util\nimport sys\nimport traceback\nfrom functools import partial\nfrom inspect import signature\nfrom pathlib import Path\n\nimport modules.shared as shared\nfrom modules.logging_colors import logger\n\nstate = {}\navailable_extensions = []\nsetup_called = set()\n\n\ndef apply_settings(extension, name):\n    if not hasattr(extension, 'params'):\n        return\n\n    for param in extension.params:\n        _id = f\"{name}-{param}\"\n        shared.default_settings[_id] = extension.params[param]\n        if _id in shared.settings:\n            extension.params[param] = shared.settings[_id]\n\n\ndef load_extensions():\n    global state, setup_called\n    state = {}\n\n    for i, name in enumerate(shared.args.extensions):\n        if name not in available_extensions:\n            continue\n\n        if name != 'api':\n            logger.info(f'Loading the extension \"{name}\"')\n\n        try:\n            # Prefer user extension, fall back to system extension\n            user_script_path = shared.user_data_dir / 'extensions' / name / 'script.py'\n            if user_script_path.exists():\n                spec = importlib.util.spec_from_file_location(\n                    f\"user_ext_{name}\",\n                    str(user_script_path)\n                )\n                extension = importlib.util.module_from_spec(spec)\n                sys.modules[spec.name] = extension\n                spec.loader.exec_module(extension)\n            else:\n                extension = importlib.import_module(f\"extensions.{name}.script\")\n\n            if extension not in setup_called:\n                apply_settings(extension, name)\n                if hasattr(extension, \"setup\"):\n                    extension.setup()\n                setup_called.add(extension)\n\n            state[name] = [True, i, extension]  # Store extension object\n\n        except ModuleNotFoundError:\n            extension_location = shared.user_data_dir / 'extensions' / name if user_script_path.exists() else Path('extensions') / name\n            windows_path = str(extension_location).replace('/', '\\\\')\n            logger.error(\n                f\"Could not import the requirements for '{name}'. Make sure to install the requirements for the extension.\\n\\n\"\n                f\"* To install requirements automatically, launch the update_wizard script for your OS and:\\n\\n\"\n                f\"1. Choose option B (Install/update extensions requirements)\\n\"\n                f\"2. Select '{name}' from the extension list\\n\\n\"\n                f\"* To install requirements manually, launch the cmd script for your OS and paste the following command:\\n\\n\"\n                f\"Linux / Mac:\\n\\n\"\n                f\"pip install -r {extension_location}/requirements.txt --upgrade\\n\\n\"\n                f\"Windows:\\n\\n\"\n                f\"pip install -r {windows_path}\\\\requirements.txt --upgrade\\n\"\n            )\n            raise\n\n        except Exception:\n            logger.error(f'Failed to load the extension \"{name}\".')\n            traceback.print_exc()\n\n\n# This iterator returns the extensions in the order specified in the command-line\ndef iterator():\n    for name in sorted(state, key=lambda x: state[x][1]):\n        if state[name][0]:\n            yield state[name][2], name  # Use stored extension object\n\n\n# Extension functions that map string -> string\ndef _apply_string_extensions(function_name, text, state, is_chat=False):\n    for extension, _ in iterator():\n        if hasattr(extension, function_name):\n            func = getattr(extension, function_name)\n\n            # Handle old extensions without the 'state' arg or\n            # the 'is_chat' kwarg\n            count = 0\n            has_chat = False\n            for k in signature(func).parameters:\n                if k == 'is_chat':\n                    has_chat = True\n                else:\n                    count += 1\n\n            if count == 2:\n                args = [text, state]\n            else:\n                args = [text]\n\n            if has_chat:\n                kwargs = {'is_chat': is_chat}\n            else:\n                kwargs = {}\n\n            text = func(*args, **kwargs)\n\n    return text\n\n\n# Extension functions that map string -> string\ndef _apply_chat_input_extensions(text, visible_text, state):\n    for extension, _ in iterator():\n        if hasattr(extension, 'chat_input_modifier'):\n            text, visible_text = extension.chat_input_modifier(text, visible_text, state)\n\n    return text, visible_text\n\n\n# custom_generate_chat_prompt handling - currently only the first one will work\ndef _apply_custom_generate_chat_prompt(text, state, **kwargs):\n    for extension, _ in iterator():\n        if hasattr(extension, 'custom_generate_chat_prompt'):\n            return extension.custom_generate_chat_prompt(text, state, **kwargs)\n\n    return None\n\n\n# Extension that modifies the input parameters before they are used\ndef _apply_state_modifier_extensions(state):\n    for extension, _ in iterator():\n        if hasattr(extension, \"state_modifier\"):\n            state = getattr(extension, \"state_modifier\")(state)\n\n    return state\n\n\n# Extension that modifies the chat history before it is used\ndef _apply_history_modifier_extensions(history):\n    for extension, _ in iterator():\n        if hasattr(extension, \"history_modifier\"):\n            history = getattr(extension, \"history_modifier\")(history)\n\n    return history\n\n\n# Extension functions that override the default tokenizer output - The order of execution is not defined\ndef _apply_tokenizer_extensions(function_name, state, prompt, input_ids, input_embeds):\n    for extension, _ in iterator():\n        if hasattr(extension, function_name):\n            prompt, input_ids, input_embeds = getattr(extension, function_name)(state, prompt, input_ids, input_embeds)\n\n    return prompt, input_ids, input_embeds\n\n\n# Allow extensions to add their own logits processors to the stack being run.\n# Each extension would call `processor_list.append({their LogitsProcessor}())`.\ndef _apply_logits_processor_extensions(function_name, processor_list, input_ids):\n    for extension, _ in iterator():\n        if hasattr(extension, function_name):\n            result = getattr(extension, function_name)(processor_list, input_ids)\n            if type(result) is list:\n                processor_list = result\n\n    return processor_list\n\n\n# Get prompt length in tokens after applying extension functions which override the default tokenizer output\n# currently only the first one will work\ndef _apply_custom_tokenized_length(prompt):\n    for extension, _ in iterator():\n        if hasattr(extension, 'custom_tokenized_length'):\n            return getattr(extension, 'custom_tokenized_length')(prompt)\n\n    return None\n\n\n# Custom generate reply handling - currently only the first one will work\ndef _apply_custom_generate_reply():\n    for extension, _ in iterator():\n        if hasattr(extension, 'custom_generate_reply'):\n            return getattr(extension, 'custom_generate_reply')\n\n    return None\n\n\ndef _apply_custom_css():\n    all_css = ''\n    for extension, _ in iterator():\n        if hasattr(extension, 'custom_css'):\n            all_css += getattr(extension, 'custom_css')()\n\n    return all_css\n\n\ndef _apply_custom_js():\n    all_js = ''\n    for extension, _ in iterator():\n        if hasattr(extension, 'custom_js'):\n            all_js += getattr(extension, 'custom_js')()\n\n    return all_js\n\n\ndef create_extensions_block():\n    import gradio as gr\n    to_display = []\n    for extension, name in iterator():\n        if hasattr(extension, \"ui\") and not (hasattr(extension, 'params') and extension.params.get('is_tab', False)):\n            to_display.append((extension, name))\n\n    # Creating the extension ui elements\n    if len(to_display) > 0:\n        with gr.Column(elem_id=\"extensions\"):\n            for row in to_display:\n                extension, _ = row\n                extension.ui()\n\n\ndef create_extensions_tabs():\n    import gradio as gr\n    for extension, name in iterator():\n        if hasattr(extension, \"ui\") and (hasattr(extension, 'params') and extension.params.get('is_tab', False)):\n            display_name = getattr(extension, 'params', {}).get('display_name', name)\n            with gr.Tab(display_name, elem_classes=\"extension-tab\"):\n                extension.ui()\n\n\nEXTENSION_MAP = {\n    \"input\": partial(_apply_string_extensions, \"input_modifier\"),\n    \"output\": partial(_apply_string_extensions, \"output_modifier\"),\n    \"chat_input\": _apply_chat_input_extensions,\n    \"state\": _apply_state_modifier_extensions,\n    \"history\": _apply_history_modifier_extensions,\n    \"bot_prefix\": partial(_apply_string_extensions, \"bot_prefix_modifier\"),\n    \"tokenizer\": partial(_apply_tokenizer_extensions, \"tokenizer_modifier\"),\n    'logits_processor': partial(_apply_logits_processor_extensions, 'logits_processor_modifier'),\n    \"custom_generate_chat_prompt\": _apply_custom_generate_chat_prompt,\n    \"custom_generate_reply\": _apply_custom_generate_reply,\n    \"tokenized_length\": _apply_custom_tokenized_length,\n    \"css\": _apply_custom_css,\n    \"js\": _apply_custom_js\n}\n\n\ndef apply_extensions(typ, *args, **kwargs):\n    if typ not in EXTENSION_MAP:\n        raise ValueError(f\"Invalid extension type {typ}\")\n\n    return EXTENSION_MAP[typ](*args, **kwargs)\n"
  },
  {
    "path": "modules/grammar/grammar_utils.py",
    "content": "'''\nThis file has been 100% copied from this PR to the Transformers library:\nhttps://github.com/huggingface/transformers/pull/27557\n\nAuthor: Saibo-creator\nAuthor GitHub: https://github.com/Saibo-creator\n\nAll credits go to the author.\n'''\n\nimport logging\nimport re\nimport time\nfrom abc import ABC\nfrom functools import lru_cache\nfrom typing import Dict, List\n\nimport torch\n\nfrom modules import shared\n\nlogger = logging.getLogger(__name__)\n\n\n########################\n# EBNF Grammar Parsing #\n########################\n\nEND_OF_ALTERNATE_MARKER = 0\nEND_OF_RULE_MARKER = 0\nTO_BE_FILLED_MARKER = 0\nREF_RULE_MARKER = 1\nLITERAL_MARKER = 2\n\n\nclass ParseState:\n    def __init__(self):\n        self.symbol_ids = {}\n        self.grammar_encoding = []  # old name: out_grammar\n\n\ndef get_symbol_id(state, src):\n    if src not in state.symbol_ids:\n        state.symbol_ids[src] = len(state.symbol_ids)\n    return state.symbol_ids[src]\n\n\ndef generate_symbol_id(state, base_name):\n    next_id = len(state.symbol_ids)\n    state.symbol_ids[base_name + \"_\" + str(next_id)] = next_id\n    return next_id\n\n\ndef is_word_char(c):\n    return c.isalnum() or c == \"-\" or c == \"_\"\n\n\ndef hex_to_int(c):\n    if c.isdigit():\n        return int(c)\n    elif \"a\" <= c.lower() <= \"f\":\n        return ord(c.lower()) - ord(\"a\") + 10\n    raise RuntimeError(\"unknown hex char \" + c)\n\n\ndef remove_leading_white_space(src, newline_ok):\n    \"\"\"\n    Skips over whitespace and comments in the input string.\n    This function processes the input string, skipping over any spaces, tabs,\n    and content following a '#' character, which denotes a comment. The parsing\n    of a comment continues until the end of the line (denoted by newline characters\n    '\\r' or '\\n'). If the 'newline_ok' parameter is set to False, the function\n    will stop processing and return the remaining string upon encountering a\n    newline character, otherwise it will skip over newline characters as well.\n    Parameters:\n    src (str): The input string to be processed.\n    newline_ok (bool): A flag indicating whether encountering a newline character\n                       should stop the parsing (False) or if it should be skipped (True).\n    Returns:\n    str: The remaining portion of the input string after skipping whitespace and comments.\n    \"\"\"\n    pos = 0\n    while pos < len(src) and (src[pos].isspace() or src[pos] == \"#\"):\n        if src[pos] == \"#\":\n            while pos < len(src) and src[pos] not in (\"\\r\", \"\\n\"):\n                pos += 1\n        else:\n            if not newline_ok and src[pos] in (\"\\r\", \"\\n\"):\n                break\n            pos += 1\n    return src[pos:]\n\n\ndef parse_name(src):\n    pos = 0\n    while pos < len(src) and is_word_char(src[pos]):\n        pos += 1\n    if pos == 0:\n        raise RuntimeError(\"expecting name at \" + src)\n    return src[:pos], src[pos:]\n\n\ndef read_hex(s):\n    val = 0\n    for c in s:\n        val = (val << 4) + hex_to_int(c)\n    return chr(val)\n\n\ndef parse_char(src):\n    \"\"\"\n    parse the leading char from the input string\n    :param src:\n    :return: char, remaining_src\n    \"\"\"\n\n    # if we have a backslash, it's maybe an escape\n    if src[0] == \"\\\\\":\n        esc = src[1]\n        if esc == \"x\":\n            return read_hex(src[2:4]), src[4:]\n        elif esc == \"u\":\n            return read_hex(src[2:6]), src[6:]\n        elif esc == \"U\":\n            return read_hex(src[2:10]), src[10:]\n        elif esc in ('\"', \"[\", \"]\", \"\\\\\", \"-\"):\n            return esc, src[2:]\n        elif esc == \"r\":\n            return \"\\r\", src[2:]\n        elif esc == \"n\":\n            return \"\\n\", src[2:]\n        elif esc == \"t\":\n            return \"\\t\", src[2:]\n        elif esc == \"\\\\\":\n            return \"\\\\\", src[2:]\n        raise RuntimeError(\"unknown escape at \" + src)\n    elif src:\n        return src[0], src[1:]\n    raise RuntimeError(\"unexpected end of input\")\n\n\ndef parse_sequence(state, src, rule_name, outbuf, is_nested):\n    out_start_pos = len(outbuf)\n\n    # sequence size, will be replaced at end when known\n    outbuf.append(TO_BE_FILLED_MARKER)\n\n    last_sym_start = len(outbuf)\n    remaining_src = src\n    while remaining_src:\n        if remaining_src[0] == '\"':  # literal string\n            remaining_src = remaining_src[1:]\n            last_sym_start = len(outbuf)\n            while remaining_src[0] != '\"':\n                char, remaining_src = parse_char(remaining_src)\n\n                # each char of a literal is encoded as a \"range\" of char - char\n                outbuf.append(LITERAL_MARKER)\n                outbuf.append(ord(char))\n                outbuf.append(ord(char))\n            remaining_src = remove_leading_white_space(remaining_src[1:], is_nested)\n        elif remaining_src[0] == \"[\":  # char range(s)\n            remaining_src = remaining_src[1:]\n            last_sym_start = len(outbuf)\n            # num chars in range - replaced at end of loop\n            outbuf.append(TO_BE_FILLED_MARKER)\n            while remaining_src[0] != \"]\":\n                char, remaining_src = parse_char(remaining_src)\n\n                outbuf.append(ord(char))\n                if remaining_src[0] == \"-\" and remaining_src[1] != \"]\":\n                    endchar_pair, remaining_src = parse_char(remaining_src[1:])\n                    outbuf.append(ord(endchar_pair))\n                else:\n                    # chars that aren't part of a c1-c2 range are just doubled (i.e., c-c)\n                    outbuf.append(ord(char))\n            # replace num chars with actual\n            outbuf[last_sym_start] = len(outbuf) - last_sym_start - 1\n            remaining_src = remove_leading_white_space(remaining_src[1:], is_nested)\n        elif is_word_char(remaining_src[0]):  # rule reference\n            name, remaining_src = parse_name(remaining_src)\n            ref_rule_id = get_symbol_id(state, name)\n            remaining_src = remove_leading_white_space(remaining_src, is_nested)\n            last_sym_start = len(outbuf)\n            outbuf.append(REF_RULE_MARKER)\n            outbuf.append(ref_rule_id)\n        elif remaining_src[0] == \"(\":  # grouping\n            # parse nested alternates into synthesized rule\n            remaining_src = remove_leading_white_space(remaining_src[1:], True)\n            sub_rule_id = generate_symbol_id(state, rule_name)\n            remaining_src = parse_alternates(state, remaining_src, rule_name, sub_rule_id, True)\n            last_sym_start = len(outbuf)\n            # output reference to synthesized rule\n            outbuf.append(REF_RULE_MARKER)\n            outbuf.append(sub_rule_id)\n            if remaining_src[0] != \")\":\n                raise RuntimeError(\"expecting ')' at \" + remaining_src)\n            remaining_src = remove_leading_white_space(remaining_src[1:], is_nested)\n        elif remaining_src[0] in (\"*\", \"+\", \"?\"):  # repetition operator\n            if len(outbuf) - out_start_pos - 1 == 0:\n                raise RuntimeError(\"expecting preceeding item to */+/? at \" + remaining_src)\n            out_grammar = state.grammar_encoding\n\n            # apply transformation to previous symbol (last_sym_start -\n            # end) according to rewrite rules:\n            # S* --> S' ::= S S' |\n            # S+ --> S' ::= S S' | S\n            # S? --> S' ::= S |\n            sub_rule_id = generate_symbol_id(state, rule_name)\n            out_grammar.append(sub_rule_id)\n            sub_rule_start = len(out_grammar)\n            # placeholder for size of 1st alternate\n            out_grammar.append(TO_BE_FILLED_MARKER)\n            # add preceding symbol to generated rule\n            out_grammar.extend(outbuf[last_sym_start:])\n            if remaining_src[0] in (\"*\", \"+\"):\n                # cause generated rule to recurse\n                out_grammar.append(REF_RULE_MARKER)\n                out_grammar.append(sub_rule_id)\n            # apply actual size\n            out_grammar[sub_rule_start] = len(out_grammar) - sub_rule_start\n            # mark end of 1st alternate\n            out_grammar.append(END_OF_ALTERNATE_MARKER)\n            sub_rule_start = len(out_grammar)\n            # placeholder for size of 2nd alternate\n            out_grammar.append(TO_BE_FILLED_MARKER)\n            if remaining_src[0] == \"+\":\n                # add preceding symbol as alternate only for '+'\n                out_grammar.extend(outbuf[last_sym_start:])\n            # apply actual size of 2nd alternate\n            out_grammar[sub_rule_start] = len(out_grammar) - sub_rule_start\n            # mark end of 2nd alternate, then end of rule\n            out_grammar.append(END_OF_ALTERNATE_MARKER)\n            out_grammar.append(END_OF_RULE_MARKER)\n\n            # in original rule, replace previous symbol with reference to generated rule\n            outbuf[last_sym_start:] = [1, sub_rule_id]\n\n            remaining_src = remove_leading_white_space(remaining_src[1:], is_nested)\n        else:\n            break\n    # apply actual size of this alternate sequence\n    outbuf[out_start_pos] = len(outbuf) - out_start_pos\n    # mark end of alternate\n    outbuf.append(END_OF_ALTERNATE_MARKER)\n    return remaining_src\n\n\ndef parse_alternates(state, src, rule_name, rule_id, is_nested):\n    outbuf = []\n    remaining_src = parse_sequence(state, src, rule_name, outbuf, is_nested)\n    while remaining_src and remaining_src[0] == \"|\":\n        remaining_src = remove_leading_white_space(remaining_src[1:], True)\n        remaining_src = parse_sequence(state, remaining_src, rule_name, outbuf, is_nested)\n\n    state.grammar_encoding.append(rule_id)\n    state.grammar_encoding.extend(outbuf)\n    state.grammar_encoding.append(0)\n    return remaining_src\n\n\ndef parse_rule(state, src):\n    name, remaining_src = parse_name(src)\n    remaining_src = remove_leading_white_space(remaining_src, False)\n    rule_id = get_symbol_id(state, name)\n\n    if remaining_src[:3] != \"::=\":\n        raise RuntimeError(\"expecting ::= at \" + remaining_src)\n    remaining_src = remove_leading_white_space(remaining_src[3:], True)\n\n    remaining_src = parse_alternates(state, remaining_src, name, rule_id, False)\n\n    if remaining_src and remaining_src[0] == \"\\r\":\n        remaining_src = remaining_src[2:] if remaining_src[1] == \"\\n\" else remaining_src[1:]\n    elif remaining_src and remaining_src[0] == \"\\n\":\n        remaining_src = remaining_src[1:]\n    elif remaining_src:\n        raise RuntimeError(\"expecting newline or end at \" + remaining_src)\n    return remove_leading_white_space(remaining_src, True)\n\n\ndef parse_ebnf(src):\n    try:\n        state = ParseState()\n        grammar_repr = remove_leading_white_space(src, True)\n        last_grammar_repr = \"\"\n        while grammar_repr:\n            if last_grammar_repr:\n                last_parsed_rule_len = len(last_grammar_repr) - len(grammar_repr)\n                logger.debug(f\"last_parsed_rule: {last_grammar_repr[:last_parsed_rule_len]}\")\n            last_grammar_repr = grammar_repr\n            grammar_repr = parse_rule(state, grammar_repr)\n        state.grammar_encoding.append(0xFFFF)\n        return state\n    except RuntimeError as err:\n        logger.warning(\"error parsing grammar:\", err)\n        return ParseState()\n\n\ndef print_rule(file, grammar_encoding, index, symbol_id_names):\n    rule_id = grammar_encoding[index]\n    print(f\"<{index}>{symbol_id_names[rule_id]} ::=\", end=\" \", file=file)\n    pos = index + 1\n    while grammar_encoding[pos]:\n        if pos - 1 > index:\n            print(\"|\", end=\" \", file=file)\n        pos += 1  # sequence size, not needed here\n        while grammar_encoding[pos]:\n            if grammar_encoding[pos] == REF_RULE_MARKER:\n                ref_rule_id = grammar_encoding[pos + 1]\n                print(\n                    f\"<{pos}>{symbol_id_names[ref_rule_id]}\",\n                    end=\" \",\n                    file=file,\n                )\n                pos += 2\n            else:\n                print(\"<{}>[\".format(pos), end=\"\", file=file)\n                num_chars = grammar_encoding[pos]\n                pos += 1\n\n                for i in range(0, num_chars, 2):\n                    print(\"{}-\".format(chr(grammar_encoding[pos + i])), end=\"\", file=file)\n                    if i + 1 < num_chars:\n                        print(\"{}\".format(chr(grammar_encoding[pos + i + 1])), end=\"\", file=file)\n                print(\"]\", end=\" \", file=file)\n                pos += num_chars\n        pos += 1\n    print(file=file)\n    return pos + 1\n\n\ndef print_grammar(file, state):\n    pos = 0\n    symbol_id_names = {v: k for k, v in state.symbol_ids.items()}\n    print(\"Grammar Rules:\", file=file)\n\n    while state.grammar_encoding[pos] != 0xFFFF:\n        pos = print_rule(file, state.grammar_encoding, pos, symbol_id_names)\n    pos = 0\n    print(\"\\nBinary representation:\", file=file)\n    while state.grammar_encoding[pos] != 0xFFFF:\n        print(f\"{state.grammar_encoding[pos]:04x}\", end=\" \", file=file)\n        pos += 1\n    print(\"ffff\\n\")\n\n\n###################################\n# EBNF Grammar Parsing ends here  #\n###################################\n\n\nclass GrammarConstraint(ABC):\n    def __init__(self, grammar_str, start_rule_name, tokenizer):\n        self.tt = 0\n        self.nt = 0\n        state = parse_ebnf(grammar_str)\n        grammar_encoding = state.grammar_encoding\n        self.start_rule_id = state.symbol_ids.get(start_rule_name)\n\n        self.eos_token_id = tokenizer.eos_token_id\n        self.token_trie = TokenTrie(tokenizer)\n        self.tokenizer = tokenizer\n        self.grammar_encoding = grammar_encoding\n\n        pos = 0\n        rules: Dict[int, int] = {}\n\n        while grammar_encoding[pos] != 0xFFFF:\n            rule_id = grammar_encoding[pos]\n\n            # Store the current position in the 'rules' list at the index corresponding to rule_id.\n            # This effectively maps each rule_id to its position in the grammar encoding.\n            rules[rule_id] = pos\n            pos += 1\n\n            # Continue to the next rule in the encoding.\n            # The loop advances by the size indicated at the current position (grammar_encoding[pos])\n            # plus one for the size field itself.\n            while grammar_encoding[pos]:\n                pos += 1 + grammar_encoding[pos]\n            # Now we're at the end of the rule,\n            # so advance to the next rule by skipping the 0, which means 'end of rule'.\n            pos += 1\n\n        self.start_rule_pos = rules[self.start_rule_id]\n        self.rules_pos_dict: Dict[int, int] = rules\n\n    def init_stacks(self):\n        # suppose the start rule position is 0, then grammar_encoding[0] = rule_id\n        # grammar_encoding[1] = rule_size\n        # grammar_encoding[2] = rule_type\n        # this is why we need to add 2 to the start rule position\n        stack = [self.start_rule_pos + 2]\n        # convert to tuple for caching(immutable)\n        return self.advance_stack(tuple(stack))\n\n    # For each stack, resolve rules to find the actual characters that are\n    # accepted by this stack (not the set of sub-rules).\n    # This is where the parsing happens.\n    # The parsing is a top-down, left-to-right, depth-first traversal of the\n    # grammar.\n    @lru_cache(maxsize=32768)\n    def advance_stack(self, stack):\n        stack = list(stack)\n        # If the stack is empty, we're done. Because no more tokens should be accepted.\n        if len(stack) == 0:\n            return [stack]\n\n        # Get the top of the stack.\n        pos = stack[-1]\n\n        # If the stack head is a terminal(literal), we can resolve it immediately.\n        # literal is marked with 2 in the grammar encoding.\n        if self.grammar_encoding[pos] > 1:\n            return [stack]\n\n        # The stack head is a nonterminal (a rule reference, 1 in the grammar encoding).\n        # Resolving this rule gives a set of one or more possible positions\n        # (e.g. two in `a ::= b | c`)\n        # We pop the current rule off the stack and, for each option, push:\n        # - the symbol following this symbol in the current rule; then\n        # - the first symbol of the resolved rule.\n        referenced_rule_id = self.grammar_encoding[pos + 1]\n\n        # subpos should points to the size of the subrule\n        subpos = self.rules_pos_dict[referenced_rule_id] + 1\n        stacks: List[List[int]] = []\n\n        # do depth-first search to find all possible rules and check the next terminal\n        # When this value is non-zero, it indicates that subpos is not yet at the end of the rule, so we can continue.\n        # here subpos is a pointer, and the value in the rule encoding can never be 0 except for the end of the rule.\n        while self.grammar_encoding[subpos]:\n            new_stack = stack[:-1]\n            if self.grammar_encoding[pos + 2]:\n                # check if there is a next symbol in the current rule, e.g. `a ::= b c | d`\n                # if yes, push the pos to rule_size to the stack\n                new_stack.append(pos + 2)\n\n            # if the type of the next symbol is not \"empty\", push the first symbol of the resolved rule to the stack\n            if self.grammar_encoding[subpos + 1]:\n                new_stack.append(subpos + 1)\n            stacks.extend(self.advance_stack(tuple(new_stack)))\n            # The increment subpos += self.grammar_encoding[subpos] + 1\n            # moves subpos forward in the grammar encoding array to the next alternative in the current rule.\n            subpos += self.grammar_encoding[subpos] + 1\n        return stacks\n\n    def accept_char(self, *args, **kwargs):\n        \"\"\"Process a byte according to the grammar rules.\"\"\"\n        raise NotImplementedError\n\n    def accept_token_id(self, *args, **kwargs):\n        \"\"\"Process a token according to the grammar rules.\"\"\"\n        raise NotImplementedError\n\n    def filter_vocab(self, *args, **kwargs):\n        raise NotImplementedError\n\n\nclass IncrementalGrammarConstraint(GrammarConstraint):\n    def __init__(self, grammar_str, start_rule_name, tokenizer):\n        super().__init__(grammar_str, start_rule_name, tokenizer)\n\n    def accept_char(self, char, stacks):\n        byte = char if isinstance(char, int) else ord(char)\n        new_stacks = []\n        for stack in stacks:\n            # stack is empty\n            if not stack:\n                continue\n\n            pos = stack[-1]\n            num_chars = self.grammar_encoding[pos]\n\n            # to make pos point to the size of the char range rule\n            pos += 1\n            found = False\n            for i in range(0, num_chars, 2):\n                if self.grammar_encoding[pos + i] <= byte and byte <= self.grammar_encoding[pos + i + 1]:\n                    found = True\n                    break\n                if self.grammar_encoding[pos + i] >= byte and byte >= self.grammar_encoding[pos + i + 1]:\n                    found = True\n                    break\n            if not found:\n                continue\n\n            pos += num_chars\n            new_stack = stack[:-1]\n            if self.grammar_encoding[pos]:\n                new_stack.append(pos)\n            new_stacks.extend(self.advance_stack(tuple(new_stack)))\n\n        return new_stacks\n\n    def accept_string(self, string: str, stacks: List[List[int]]):\n        for char in string:\n            stacks = self.accept_char(char, stacks)\n        return stacks\n\n    def accept_token_id(self, token_id: int, stacks: List[List[int]]):\n        if token_id == self.eos_token_id:\n            if stacks and all(len(stack) != 0 for stack in stacks):\n                raise Exception(\n                    f\"At least one of the stack should be empty when EOS is reached. However, \"\n                    f\"the stacks are {stacks}\"\n                )\n            return []\n\n        for byte in self.token_trie.id2str(token_id):\n            stacks = self.accept_char(byte, stacks)\n            # check updated stacks\n            # TODO, I commented this out because it will fail when the stack is empty\n            # empty stack means the end of the grammar\n            # assert stacks != []\n\n        return stacks\n\n    def accept_token_ids(self, token_ids: List[int], stacks: List[List[int]], as_string=True):\n        if as_string:\n            string = self.tokenizer.decode(token_ids)\n            stacks = self.accept_string(string, stacks)\n        else:\n            for token_id in token_ids:\n                stacks = self.accept_token_id(token_id, stacks)\n        return stacks\n\n    def batch_filter_vocab(self, batch_stacks, device):\n        batch_acceptance = []\n        for stacks in batch_stacks:\n            batch_acceptance.append(self.filter_vocab(stacks, device))\n        return torch.stack(batch_acceptance)\n\n    def filter_vocab(self, stacks, device):\n        if not stacks:  # Check if stacks is empty\n            # Handle the empty case: for example, return a tensor of False\n            # The size of the tensor should match the size of your vocabulary\n            vocab_size = len(self.token_trie)\n            logger.debug(f\"sum of acceptance: {0}\")\n            return torch.zeros(vocab_size, dtype=torch.bool, device=device)\n\n        acceptance_matrix = torch.cat([self.token_acceptance_for_stack(tuple(stack), device) for stack in stacks])\n        # Merge stacks: any True => True\n        acceptance = acceptance_matrix.reshape(len(stacks), -1).any(dim=0)\n        logger.debug(f\"sum of acceptance: {acceptance.sum()}\")\n        return acceptance\n\n    # For each sub-rule in the grammar, cache whether each byte is accepted.\n    @lru_cache(maxsize=None)\n    def pos_char_acceptance(self, pos, char):\n        byte = char if isinstance(char, int) else ord(char)\n        num_chars = self.grammar_encoding[pos]\n        pos += 1\n        for i in range(0, num_chars, 2):\n            start = self.grammar_encoding[pos + i]\n            end = self.grammar_encoding[pos + i + 1]\n            if byte >= start and byte <= end:\n                return True\n            if byte <= start and byte >= end:\n                return True\n        return False\n\n    # Probably this should be configurable. If the grammar has an exceedingly\n    # large number of states, the correct setting is a tradeoff between GPU\n    # RAM usage and recomputation time.\n    #\n    # The main variable that pushes usage up here is number of states in the\n    # grammar.\n    @lru_cache(maxsize=32768)\n    def token_acceptance_for_stack(self, stack, device):\n        st = time.time()\n        stack = list(stack)  # needs to come in as a tuple for lru_cache\n\n        accepts = [False] * len(self.token_trie)\n        accepts[self.eos_token_id] = len(stack) == 0\n        if len(stack) == 0:\n            logger.debug(\"empty stack\")\n\n        def traverse_trie(trie, stacks):\n            for byte, next_trie in trie.items():\n                if byte == LEAF:\n                    token_id = next_trie\n                    if token_id != self.eos_token_id:\n                        accepts[token_id] = bool(stacks)\n                    continue\n\n                new_stacks = []\n                for stk in stacks:\n                    if not stk:\n                        continue\n\n                    pos = stk[-1]\n                    num_chars = self.grammar_encoding[pos]\n\n                    if not self.pos_char_acceptance(pos, byte):\n                        continue\n\n                    pos += num_chars + 1\n                    new_stack = stk[:-1]\n                    if self.grammar_encoding[pos]:\n                        new_stack.append(pos)\n                    new_stacks.extend(self.advance_stack(tuple(new_stack)))\n\n                if new_stacks:\n                    traverse_trie(next_trie, new_stacks)\n\n        traverse_trie(self.token_trie.trie, [stack])\n\n        et = time.time() - st\n        x = torch.tensor(accepts, dtype=torch.bool, device=device)\n        self.tt += et\n        self.nt += 1\n        return x\n\n\nclass StaticGrammarConstraint(GrammarConstraint):\n    def __init__(self, grammar_str, start_rule_name, tokenizer):\n        super().__init__(grammar_str, start_rule_name, tokenizer)\n\n    def accept_char(self):\n        raise NotImplementedError\n\n\n#################\n# DATA STRUCTURES\n#################\n\n\nLEAF = -1\n\n\nclass TokenTrie:\n    def __init__(self, tokenizer):\n        self.eos_token_id = tokenizer.eos_token_id\n        self.tokens = []\n        self.trie = {}\n        self.load_tokens(tokenizer)\n\n    def id2str(self, token_id):\n        return self.tokens[token_id]\n\n    def __len__(self):\n        return len(self.tokens)\n\n    def load_tokens(self, tokenizer):\n        def replace_hex(match):\n            hex_value = match.group(1)\n            return chr(int(hex_value, 16))\n\n        if \"gpt2\" in tokenizer.__class__.__name__.lower():\n            special = tokenizer.additional_special_tokens_ids\n\n            # Here, the decoder does a string replace on a bunch of sequences\n            # like ' .' for '.'. This interferes with our assumptions, where a\n            # token should always have exactly one representation.\n            # Fortunately(?) text-generation-inference doesn't seem to run this\n            # cleanup, so we get extraneous spaces. So, in order to generate\n            # the right token set for TGI, we have to skip the space trimming.\n            # See:\n            # https://github.com/huggingface/transformers/blob/main/src/transformers/tokenization_utils_base.py#L3588-L3600\n            def fmt_token(id):\n                if id in special:\n                    return None\n                return bytes(tokenizer.decode([id], clean_up_tokenization_spaces=False), \"utf-8\")\n\n        elif \"llama\" in tokenizer.__class__.__name__.lower():\n\n            def fmt_token(id):\n                token = tokenizer.convert_ids_to_tokens(id)\n                token = re.sub(r\"<0x([0-9a-fA-F]{2})>\", replace_hex, token)\n                token = token.replace(\"▁\", \" \")\n                return token\n\n        else:\n            print(\"Warning: unrecognized tokenizer: using default token formatting\")\n\n            def fmt_token(id):\n                token = tokenizer.convert_ids_to_tokens(id)\n                return token\n\n        # note: vocab_size doesn't work here because there are also\n        # get_added_vocab() tokens\n        self.tokens = [fmt_token(i) for i in range(len(tokenizer.get_vocab()))]\n        for token_id, token_bytes in enumerate(self.tokens):\n            if token_bytes is not None:\n                self.insert_into_trie(self.trie, token_bytes, token_id)\n\n    def insert_into_trie(self, trie, token_bytes, token_id):\n        current = trie\n        for byte in token_bytes:\n            if byte not in current:\n                current[byte] = {}\n            current = current[byte]\n        current[LEAF] = token_id\n\n\n@lru_cache(maxsize=5)\ndef initialize_grammar(grammar_string):\n    return IncrementalGrammarConstraint(grammar_string.strip(), start_rule_name=\"root\", tokenizer=shared.tokenizer)\n"
  },
  {
    "path": "modules/grammar/logits_process.py",
    "content": "'''\nThis file has been 100% copied from this PR to the Transformers library:\nhttps://github.com/huggingface/transformers/pull/27557\n\nAuthor: Saibo-creator\nAuthor GitHub: https://github.com/Saibo-creator\n\nAll credits go to the author.\n'''\n\nimport math\n\nimport torch\nfrom transformers.generation.logits_process import LogitsProcessor\nfrom transformers.utils import add_start_docstrings\n\nLOGITS_PROCESSOR_INPUTS_DOCSTRING = r\"\"\"\n    Args:\n        input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):\n            Indices of input sequence tokens in the vocabulary. [What are input IDs?](../glossary#input-ids)\n        scores (`torch.FloatTensor` of shape `(batch_size, config.vocab_size)`):\n            Prediction scores of a language modeling head. These can be logits for each vocabulary when not using beam\n            search or log softmax for each vocabulary token when using beam search\n\n    Return:\n        `torch.FloatTensor` of shape `(batch_size, config.vocab_size)`: The processed prediction scores.\n\n\"\"\"\n\n\nclass GrammarConstrainedLogitsProcessor(LogitsProcessor):\n    def __init__(self, grammar_constraint):\n        self.last_size = None\n        self.grammar_constraint = grammar_constraint\n        self.batch_stacks = None\n\n    def filter_logits(self, logits, device):\n        # resolve each stack to a tensor of True/False for each token\n        # indicating acceptance\n        # acceptance = self.grammar_acceptor.filter_vocab(self.stacks, device)\n        acceptance = self.grammar_constraint.batch_filter_vocab(self.batch_stacks, device)\n        # logger.debug(acceptance)\n        # Logits to -inf where False\n        logits[~acceptance] = -math.inf\n\n    # TODO: batching\n    def process_logits(self, input_ids, scores, parse_start_index=None):\n        \"\"\"\n        :param input_ids:\n        :param scores:\n        :param parse_start_index: default None, which means generate from scratch. Set to 0 to parse all input_ids\n        :return:\n        \"\"\"\n        # we dynamically create stacks at the first call, so that we know the batch size and beam size\n        if self.batch_stacks is None:\n            self.batch_stacks = [self.grammar_constraint.init_stacks() for _ in range(len(input_ids))]\n\n        # if self.last_size is not set (which would be the case when processing the first token).\n        # In this case, do nothing.\n        if self.last_size is None:\n            prefix_to_parse = [\n                single_input_ids[parse_start_index:] if parse_start_index is not None else []\n                for single_input_ids in input_ids\n            ]\n            # self.grammar_acceptor.accept_token_ids(prefix_to_parse, self.stacks)\n            self.batch_stacks = [\n                self.grammar_constraint.accept_token_ids(prefix, stack)\n                for prefix, stack in zip(prefix_to_parse, self.batch_stacks)\n            ]\n        #  if the length of the current input IDs (input_ids[0]) is exactly one more than self.last_size.\n        #  This is expected in a scenario where inputs are processed incrementally, one token at a time.\n        elif len(input_ids[0]) == self.last_size + 1:\n            # self.stacks = self.grammar_acceptor.accept_token_id(input_ids[0][-1], self.stacks)\n            self.batch_stacks = [\n                self.grammar_constraint.accept_token_id(single_input_ids[-1], stack)\n                for single_input_ids, stack in zip(input_ids, self.batch_stacks)\n            ]\n        #  ensure that the input size is consistent with the expected incremental processing\n        #  (i.e., one token at a time).\n        else:\n            # here we check if the input_ids are one token longer than the last time we processed\n            # but we don't check if input_ids are actually valid.\n            # Imagine a scenario where we generate 10 tokens, then we replace the 10 generated tokens with 10 new tokens.\n            # In this case, the input_ids will be consistent with the last_size, but the input_ids are not valid.\n            # However, should we really check if the input_ids are valid here?\n            # If we do, then we need to reparse the whole input_ids at each call, which is not efficient.\n            # Maybe we should just trust the user to provide valid input_ids?\n            # The conclusion is that, we assume the input_ids are valid, and our generation will be correct.\n            # If the input_ids are not valid, then the generation result will be wrong and we don't take responsibility for that.\n            raise RuntimeError(\n                \"Input ID's length is inconsistent with the current state of \"\n                \"the GrammarConstrainedLogitsProcessor. If you want to process \"\n                \"another input sequence, please instantiate a new \"\n                \"GrammarConstrainedLogitsProcessor.\"\n            )\n\n        self.filter_logits(scores, scores.device)\n\n        self.last_size = len(input_ids[0])\n        return scores\n\n    @add_start_docstrings(LOGITS_PROCESSOR_INPUTS_DOCSTRING)\n    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:\n        return self.process_logits(input_ids, scores)\n"
  },
  {
    "path": "modules/html_generator.py",
    "content": "import datetime\nimport functools\nimport html\nimport os\nimport re\nimport time\nfrom pathlib import Path\n\nimport markdown\nfrom PIL import Image, ImageOps\n\nfrom modules import shared\nfrom modules.reasoning import extract_reasoning\nfrom modules.sane_markdown_lists import SaneListExtension\nfrom modules.utils import get_available_chat_styles\n\n# This is to store the paths to the thumbnails of the profile pictures\nimage_cache = {}\n\n\ndef minify_css(css: str) -> str:\n    # Step 1: Remove comments\n    css = re.sub(r'/\\*.*?\\*/', '', css, flags=re.DOTALL)\n\n    # Step 2: Remove leading and trailing whitespace\n    css = re.sub(r'^[ \\t]*|[ \\t]*$', '', css, flags=re.MULTILINE)\n\n    # Step 3: Remove spaces after specific characters ({ : ; ,})\n    css = re.sub(r'([:{;,])\\s+', r'\\1', css)\n\n    # Step 4: Remove spaces before `{`\n    css = re.sub(r'\\s+{', '{', css)\n\n    # Step 5: Remove empty lines\n    css = re.sub(r'^\\s*$', '', css, flags=re.MULTILINE)\n\n    # Step 6: Collapse all lines into one\n    css = re.sub(r'\\n', '', css)\n\n    return css\n\n\nwith open(Path(__file__).resolve().parent / '../css/html_readable_style.css', 'r', encoding='utf-8') as f:\n    readable_css = f.read()\nwith open(Path(__file__).resolve().parent / '../css/html_instruct_style.css', 'r', encoding='utf-8') as f:\n    instruct_css = f.read()\n\n# Custom chat styles\nchat_styles = {}\nfor k in get_available_chat_styles():\n    with open(Path(f'css/chat_style-{k}.css'), 'r', encoding='utf-8') as f:\n        chat_styles[k] = f.read()\n\n# Handle styles that derive from other styles\nfor k in chat_styles:\n    lines = chat_styles[k].split('\\n')\n    input_string = lines[0]\n    match = re.search(r'chat_style-([a-z\\-]*)\\.css', input_string)\n\n    if match:\n        style = match.group(1)\n        chat_styles[k] = chat_styles.get(style, '') + '\\n\\n' + '\\n'.join(lines[1:])\n\n# Reduce the size of the CSS sources above\nreadable_css = minify_css(readable_css)\ninstruct_css = minify_css(instruct_css)\nfor k in chat_styles:\n    chat_styles[k] = minify_css(chat_styles[k])\n\n\ndef fix_newlines(string):\n    string = string.replace('\\n', '\\n\\n')\n    string = re.sub(r\"\\n{3,}\", \"\\n\\n\", string)\n    string = string.strip()\n    return string\n\n\ndef replace_quotes(text):\n    # Define a list of quote pairs (opening and closing), using HTML entities\n    quote_pairs = [\n        ('&quot;', '&quot;'),  # Double quotes\n        ('&ldquo;', '&rdquo;'),  # Unicode left and right double quotation marks\n        ('&lsquo;', '&rsquo;'),  # Unicode left and right single quotation marks\n        ('&laquo;', '&raquo;'),  # French quotes\n        ('&bdquo;', '&ldquo;'),  # German quotes\n        ('&lsquo;', '&rsquo;'),  # Alternative single quotes\n        ('&#8220;', '&#8221;'),  # Unicode quotes (numeric entities)\n        ('&#x201C;', '&#x201D;'),  # Unicode quotes (hex entities)\n        ('\\u201C', '\\u201D'),  # Unicode quotes (literal chars)\n    ]\n\n    # Create a regex pattern that matches any of the quote pairs, including newlines\n    pattern = '|'.join(f'({re.escape(open_q)})(.*?)({re.escape(close_q)})' for open_q, close_q in quote_pairs)\n\n    # Replace matched patterns with <q> tags, keeping original quotes\n    def replacer(m):\n        # Find the first non-None group set\n        for i in range(1, len(m.groups()), 3):  # Step through each sub-pattern's groups\n            if m.group(i):  # If this sub-pattern matched\n                return f'<q>{m.group(i)}{m.group(i + 1)}{m.group(i + 2)}</q>'\n\n        return m.group(0)  # Fallback (shouldn't happen)\n\n    replaced_text = re.sub(pattern, replacer, text, flags=re.DOTALL)\n    return replaced_text\n\n\ndef replace_blockquote(m):\n    return m.group().replace('\\n', '\\n> ').replace('\\\\begin{blockquote}', '').replace('\\\\end{blockquote}', '')\n\n\ndef extract_thinking_block(string):\n    \"\"\"Extract thinking blocks from the beginning of an HTML-escaped string.\"\"\"\n    return extract_reasoning(string, html_escaped=True)\n\n\n\ndef build_tool_call_block(header, body, message_id, index):\n    \"\"\"Build HTML for a tool call accordion block.\"\"\"\n    block_id = f\"tool-call-{message_id}-{index}\"\n\n    if body == '...':\n        # Pending placeholder — no expandable body, just title with ellipsis\n        return f'''\n        <details class=\"thinking-block\" data-block-id=\"{block_id}\">\n            <summary class=\"thinking-header\">\n                {tool_svg_small}\n                <span class=\"thinking-title\">{html.escape(header)} ...</span>\n            </summary>\n        </details>\n        '''\n\n    # Build a plain <pre> directly to avoid highlight.js auto-detection\n    escaped_body = html.escape(body)\n    return f'''\n    <details class=\"thinking-block\" data-block-id=\"{block_id}\">\n        <summary class=\"thinking-header\">\n            {tool_svg_small}\n            <span class=\"thinking-title\">{html.escape(header)}</span>\n        </summary>\n        <div class=\"thinking-content pretty_scrollbar\"><pre><code class=\"nohighlight\">{escaped_body}</code></pre></div>\n    </details>\n    '''\n\n\ndef build_thinking_block(thinking_content, message_id, has_remaining_content, thinking_index=0):\n    \"\"\"Build HTML for a thinking block.\"\"\"\n    if thinking_content is None:\n        return None\n\n    # Process the thinking content through markdown\n    thinking_html = process_markdown_content(thinking_content)\n\n    # Generate unique ID for the thinking block\n    block_id = f\"thinking-{message_id}-{thinking_index}\"\n\n    # Check if thinking is complete or still in progress\n    is_streaming = not has_remaining_content\n    title_text = \"Thinking...\" if is_streaming else \"Thought\"\n\n    return f'''\n    <details class=\"thinking-block\" data-block-id=\"{block_id}\" data-streaming=\"{str(is_streaming).lower()}\">\n        <summary class=\"thinking-header\">\n            {info_svg_small}\n            <span class=\"thinking-title\">{title_text}</span>\n        </summary>\n        <div class=\"thinking-content pretty_scrollbar\">{thinking_html}</div>\n    </details>\n    '''\n\n\ndef build_main_content_block(content):\n    \"\"\"Build HTML for the main content block.\"\"\"\n    if not content:\n        return \"\"\n\n    return process_markdown_content(content)\n\n\ndef process_markdown_content(string):\n    \"\"\"\n    Process a string through the markdown conversion pipeline.\n    Uses robust manual parsing to ensure correct LaTeX and Code Block rendering.\n    \"\"\"\n    if not string:\n        return \"\"\n\n    # Define unique placeholders for LaTeX asterisks and underscores\n    LATEX_ASTERISK_PLACEHOLDER = \"LATEXASTERISKPLACEHOLDER\"\n    LATEX_UNDERSCORE_PLACEHOLDER = \"LATEXUNDERSCOREPLACEHOLDER\"\n\n    def protect_asterisks_underscores_in_latex(match):\n        \"\"\"A replacer function for re.sub to protect asterisks and underscores in multiple LaTeX formats.\"\"\"\n        # Check which delimiter group was captured\n        if match.group(1) is not None:  # Content from $$...$$\n            content = match.group(1)\n            modified_content = content.replace('*', LATEX_ASTERISK_PLACEHOLDER)\n            modified_content = modified_content.replace('_', LATEX_UNDERSCORE_PLACEHOLDER)\n            return f'{modified_content}'\n        elif match.group(2) is not None:  # Content from \\[...\\]\n            content = match.group(2)\n            modified_content = content.replace('*', LATEX_ASTERISK_PLACEHOLDER)\n            modified_content = modified_content.replace('_', LATEX_UNDERSCORE_PLACEHOLDER)\n            return f'\\\\[{modified_content}\\\\]'\n        elif match.group(3) is not None:  # Content from \\(...\\)\n            content = match.group(3)\n            modified_content = content.replace('*', LATEX_ASTERISK_PLACEHOLDER)\n            modified_content = modified_content.replace('_', LATEX_UNDERSCORE_PLACEHOLDER)\n            return f'\\\\({modified_content}\\\\)'\n\n        return match.group(0)  # Fallback\n\n    # Make \\[ \\]  LaTeX equations inline\n    pattern = r'^\\s*\\\\\\[\\s*\\n([\\s\\S]*?)\\n\\s*\\\\\\]\\s*$'\n    replacement = r'\\\\[ \\1 \\\\]'\n    string = re.sub(pattern, replacement, string, flags=re.MULTILINE)\n\n    # Escape backslashes\n    string = string.replace('\\\\', '\\\\\\\\')\n\n    # Quote to <q></q>\n    string = replace_quotes(string)\n\n    # Blockquote\n    string = re.sub(r'(^|[\\n])&gt;', r'\\1>', string)\n    pattern = re.compile(r'\\\\begin{blockquote}(.*?)\\\\end{blockquote}', re.DOTALL)\n    string = pattern.sub(replace_blockquote, string)\n\n    # Code block standardization\n    string = string.replace('\\\\begin{code}', '```')\n    string = string.replace('\\\\end{code}', '```')\n    string = string.replace('\\\\begin{align*}', '$$')\n    string = string.replace('\\\\end{align*}', '$$')\n    string = string.replace('\\\\begin{align}', '$$')\n    string = string.replace('\\\\end{align}', '$$')\n    string = string.replace('\\\\begin{equation}', '$$')\n    string = string.replace('\\\\end{equation}', '$$')\n    string = string.replace('\\\\begin{equation*}', '$$')\n    string = string.replace('\\\\end{equation*}', '$$')\n    string = re.sub(r\"(.)```\", r\"\\1\\n```\", string)\n\n    # Protect asterisks and underscores within all LaTeX blocks before markdown conversion\n    latex_pattern = re.compile(r'((?:^|[\\r\\n\\s])\\$\\$[^`]*?\\$\\$)|\\\\\\[(.*?)\\\\\\]|\\\\\\((.*?)\\\\\\)',\n                               re.DOTALL)\n    string = latex_pattern.sub(protect_asterisks_underscores_in_latex, string)\n\n    result = ''\n    is_code = False\n    is_latex = False\n\n    # Manual line iteration for robust structure parsing\n    for line in string.split('\\n'):\n        stripped_line = line.strip()\n\n        if stripped_line.startswith('```'):\n            is_code = not is_code\n        elif stripped_line.startswith('$$') and (stripped_line == \"$$\" or not stripped_line.endswith('$$')):\n            is_latex = not is_latex\n        elif stripped_line.endswith('$$'):\n            is_latex = False\n        elif stripped_line.startswith('\\\\\\\\[') and not stripped_line.endswith('\\\\\\\\]'):\n            is_latex = True\n        elif stripped_line.startswith('\\\\\\\\]'):\n            is_latex = False\n        elif stripped_line.endswith('\\\\\\\\]'):\n            is_latex = False\n\n        result += line\n\n        # Don't add an extra \\n for code, LaTeX, or tables\n        if is_code or is_latex or line.startswith('|'):\n            result += '\\n'\n        # Also don't add an extra \\n for lists\n        elif stripped_line.startswith('-') or stripped_line.startswith('*') or stripped_line.startswith('+') or stripped_line.startswith('>') or re.match(r'\\d+\\.', stripped_line):\n            result += '  \\n'\n        else:\n            result += '  \\n'\n\n    result = result.strip()\n    if is_code:\n        result += '\\n```'  # Unfinished code block\n\n    # Unfinished list, like \"\\n1.\". A |delete| string is added and then\n    # removed to force a <ol> or <ul> to be generated instead of a <p>.\n    list_item_pattern = r'(\\n\\d+\\.?|\\n\\s*[-*+]\\s*([*_~]{1,3})?)$'\n    if re.search(list_item_pattern, result):\n        delete_str = '|delete|'\n\n        if re.search(r'(\\d+\\.?)$', result) and not result.endswith('.'):\n            result += '.'\n\n        # Add the delete string after the list item\n        result = re.sub(list_item_pattern, r'\\g<1> ' + delete_str, result)\n\n        # Convert to HTML using markdown\n        html_output = markdown.markdown(result, extensions=['fenced_code', 'tables', SaneListExtension()])\n\n        # Remove the delete string from the HTML output\n        pos = html_output.rfind(delete_str)\n        if pos > -1:\n            html_output = html_output[:pos] + html_output[pos + len(delete_str):]\n    else:\n        # Convert to HTML using markdown\n        html_output = markdown.markdown(result, extensions=['fenced_code', 'tables', SaneListExtension()])\n\n    # Restore the LaTeX asterisks and underscores after markdown conversion\n    html_output = html_output.replace(LATEX_ASTERISK_PLACEHOLDER, '*')\n    html_output = html_output.replace(LATEX_UNDERSCORE_PLACEHOLDER, '_')\n\n    # Remove extra newlines before </code>\n    html_output = re.sub(r'\\s*</code>', '</code>', html_output)\n\n    # Unescape code blocks\n    pattern = re.compile(r'<code[^>]*>(.*?)</code>', re.DOTALL)\n    html_output = pattern.sub(lambda x: html.unescape(x.group()), html_output)\n\n    # Unescape backslashes\n    html_output = html_output.replace('\\\\\\\\', '\\\\')\n\n    # Wrap tables in a scrollable div\n    html_output = html_output.replace('<table>', '<div class=\"table-wrapper pretty_scrollbar\"><table>').replace('</table>', '</table></div>')\n\n    return html_output\n\n\n@functools.lru_cache(maxsize=None)\ndef convert_to_markdown(string, message_id=None):\n    \"\"\"\n    Convert a string to markdown HTML with support for multiple block types.\n    Blocks are assembled in order: thinking, main content, etc.\n    \"\"\"\n    if not string:\n        return \"\"\n\n    # Use a default message ID if none provided\n    if message_id is None:\n        message_id = \"unknown\"\n\n    # Find tool call blocks by position, then process the text segments\n    # between them using extract_thinking_block (which supports all\n    # THINKING_FORMATS, including end-only variants like Qwen's).\n    tool_call_pattern = re.compile(r'<tool_call>(.*?)\\n(.*?)\\n</tool_call>', re.DOTALL)\n    tool_calls = list(tool_call_pattern.finditer(string))\n\n    if not tool_calls:\n        # No tool calls — use original single-pass extraction\n        thinking_content, remaining_content = extract_thinking_block(string)\n        blocks = []\n        thinking_html = build_thinking_block(thinking_content, message_id, bool(remaining_content))\n        if thinking_html:\n            blocks.append(thinking_html)\n\n        main_html = build_main_content_block(remaining_content)\n        if main_html:\n            blocks.append(main_html)\n\n        return ''.join(blocks)\n\n    # Split string into text segments around tool_call blocks and\n    # run extract_thinking_block on each segment for full format support.\n    html_parts = []\n    last_end = 0\n    tool_idx = 0\n    think_idx = 0\n\n    def process_text_segment(text, is_last_segment):\n        \"\"\"Process a text segment between tool_call blocks for thinking content.\"\"\"\n        nonlocal think_idx\n        if not text.strip():\n            return\n\n        while text.strip():\n            thinking_content, remaining = extract_thinking_block(text)\n            if thinking_content is None:\n                break\n            has_remaining = bool(remaining.strip()) or not is_last_segment\n            html_parts.append(build_thinking_block(thinking_content, message_id, has_remaining, think_idx))\n            think_idx += 1\n            text = remaining\n\n        if text.strip():\n            html_parts.append(process_markdown_content(text))\n\n    for tc in tool_calls:\n        # Process text before this tool_call\n        process_text_segment(string[last_end:tc.start()], is_last_segment=False)\n\n        # Add tool call accordion\n        header = tc.group(1).strip()\n        body = tc.group(2).strip()\n        html_parts.append(build_tool_call_block(header, body, message_id, tool_idx))\n        tool_idx += 1\n        last_end = tc.end()\n\n    # Process text after the last tool_call\n    process_text_segment(string[last_end:], is_last_segment=True)\n\n    return ''.join(html_parts)\n\n\ndef convert_to_markdown_wrapped(string, message_id=None, use_cache=True):\n    '''\n    Used to avoid caching convert_to_markdown calls during streaming.\n    '''\n\n    if use_cache:\n        return convert_to_markdown(string, message_id=message_id)\n\n    return convert_to_markdown.__wrapped__(string, message_id=message_id)\n\n\ndef generate_basic_html(string):\n    convert_to_markdown.cache_clear()\n    string = convert_to_markdown(string)\n    string = f'<style>{readable_css}</style><div class=\"readable-container\">{string}</div>'\n    return string\n\n\ndef make_thumbnail(image):\n    image = image.resize((350, round(image.size[1] / image.size[0] * 350)), Image.Resampling.LANCZOS)\n    if image.size[1] > 470:\n        image = ImageOps.fit(image, (350, 470), Image.LANCZOS)\n\n    return image\n\n\ndef get_image_cache(path):\n    cache_folder = Path(shared.args.disk_cache_dir)\n    if not cache_folder.exists():\n        cache_folder.mkdir()\n\n    mtime = os.stat(path).st_mtime\n    if (path in image_cache and mtime != image_cache[path][0]) or (path not in image_cache):\n        img = make_thumbnail(Image.open(path))\n\n        old_p = Path(f'{cache_folder}/{path.name}_cache.png')\n        p = Path(f'{cache_folder}/cache_{path.name}.png')\n        if old_p.exists():\n            old_p.rename(p)\n\n        output_file = p\n        img.convert('RGBA').save(output_file, format='PNG')\n        image_cache[path] = [mtime, output_file.as_posix()]\n\n    return image_cache[path][1]\n\n\ncopy_svg = '''<svg xmlns=\"http://www.w3.org/2000/svg\" width=\"20\" height=\"20\" viewBox=\"0 0 24 24\" fill=\"none\" stroke=\"currentColor\" stroke-width=\"2\" stroke-linecap=\"round\" stroke-linejoin=\"round\" class=\"tabler-icon tabler-icon-copy\"><path d=\"M8 8m0 2a2 2 0 0 1 2 -2h8a2 2 0 0 1 2 2v8a2 2 0 0 1 -2 2h-8a2 2 0 0 1 -2 -2z\"></path><path d=\"M16 8v-2a2 2 0 0 0 -2 -2h-8a2 2 0 0 0 -2 2v8a2 2 0 0 0 2 2h2\"></path></svg>'''\nrefresh_svg = '''<svg xmlns=\"http://www.w3.org/2000/svg\" width=\"20\" height=\"20\" viewBox=\"0 0 24 24\" fill=\"none\" stroke=\"currentColor\" stroke-width=\"2\" stroke-linecap=\"round\" stroke-linejoin=\"round\" class=\"tabler-icon tabler-icon-repeat\"><path d=\"M4 12v-3a3 3 0 0 1 3 -3h13m-3 -3l3 3l-3 3\"></path><path d=\"M20 12v3a3 3 0 0 1 -3 3h-13m3 3l-3 -3l3 -3\"></path></svg>'''\ncontinue_svg = '''<svg  xmlns=\"http://www.w3.org/2000/svg\"  width=\"20\"  height=\"20\"  viewBox=\"0 0 24 24\"  fill=\"none\"  stroke=\"currentColor\"  stroke-width=\"2\"  stroke-linecap=\"round\"  stroke-linejoin=\"round\"  class=\"icon icon-tabler icons-tabler-outline icon-tabler-player-play\"><path stroke=\"none\" d=\"M0 0h24v24H0z\" fill=\"none\"/><path d=\"M7 4v16l13 -8z\" /></svg>'''\nremove_svg = '''<svg  xmlns=\"http://www.w3.org/2000/svg\"  width=\"20\"  height=\"20\"  viewBox=\"0 0 24 24\"  fill=\"none\"  stroke=\"currentColor\"  stroke-width=\"2\"  stroke-linecap=\"round\"  stroke-linejoin=\"round\"  class=\"icon icon-tabler icons-tabler-outline icon-tabler-trash\"><path stroke=\"none\" d=\"M0 0h24v24H0z\" fill=\"none\"/><path d=\"M4 7l16 0\" /><path d=\"M10 11l0 6\" /><path d=\"M14 11l0 6\" /><path d=\"M5 7l1 12a2 2 0 0 0 2 2h8a2 2 0 0 0 2 -2l1 -12\" /><path d=\"M9 7v-3a1 1 0 0 1 1 -1h4a1 1 0 0 1 1 1v3\" /></svg>'''\nbranch_svg = '''<svg  xmlns=\"http://www.w3.org/2000/svg\"  width=\"24\"  height=\"24\"  viewBox=\"0 0 24 24\"  fill=\"none\"  stroke=\"currentColor\"  stroke-width=\"2\"  stroke-linecap=\"round\"  stroke-linejoin=\"round\"  class=\"icon icon-tabler icons-tabler-outline icon-tabler-git-branch\"><path stroke=\"none\" d=\"M0 0h24v24H0z\" fill=\"none\"/><path d=\"M7 18m-2 0a2 2 0 1 0 4 0a2 2 0 1 0 -4 0\" /><path d=\"M7 6m-2 0a2 2 0 1 0 4 0a2 2 0 1 0 -4 0\" /><path d=\"M17 6m-2 0a2 2 0 1 0 4 0a2 2 0 1 0 -4 0\" /><path d=\"M7 8l0 8\" /><path d=\"M9 18h6a2 2 0 0 0 2 -2v-5\" /><path d=\"M14 14l3 -3l3 3\" /></svg>'''\nedit_svg = '''<svg xmlns=\"http://www.w3.org/2000/svg\" width=\"20\" height=\"20\" viewBox=\"0 0 24 24\" fill=\"none\" stroke=\"currentColor\" stroke-width=\"2\" stroke-linecap=\"round\" stroke-linejoin=\"round\" class=\"tabler-icon tabler-icon-pencil\"><path d=\"M4 20h4l10.5 -10.5a2.828 2.828 0 1 0 -4 -4l-10.5 10.5v4\"></path><path d=\"M13.5 6.5l4 4\"></path></svg>'''\ninfo_svg = '''<svg xmlns=\"http://www.w3.org/2000/svg\" width=\"20\" height=\"20\" viewBox=\"0 0 24 24\" fill=\"none\" stroke=\"currentColor\" stroke-width=\"2\" stroke-linecap=\"round\" stroke-linejoin=\"round\" class=\"thinking-icon tabler-icon tabler-icon-info-circle\"><path stroke=\"none\" d=\"M0 0h24v24H0z\" fill=\"none\"/><path d=\"M12 2a10 10 0 0 1 0 20a10 10 0 0 1 0 -20z\" /><path d=\"M12 16v-4\" /><path d=\"M12 8h.01\" /></svg>'''\ninfo_svg_small = '''<svg xmlns=\"http://www.w3.org/2000/svg\" width=\"16\" height=\"16\" viewBox=\"0 0 24 24\" fill=\"none\" stroke=\"currentColor\" stroke-width=\"2\" stroke-linecap=\"round\" stroke-linejoin=\"round\" class=\"thinking-icon tabler-icon tabler-icon-info-circle\"><path stroke=\"none\" d=\"M0 0h24v24H0z\" fill=\"none\"/><path d=\"M12 2a10 10 0 0 1 0 20a10 10 0 0 1 0 -20z\" /><path d=\"M12 16v-4\" /><path d=\"M12 8h.01\" /></svg>'''\ntool_svg_small = '''<svg xmlns=\"http://www.w3.org/2000/svg\" width=\"16\" height=\"16\" viewBox=\"0 0 24 24\" fill=\"none\" stroke=\"currentColor\" stroke-width=\"2\" stroke-linecap=\"round\" stroke-linejoin=\"round\" class=\"thinking-icon tabler-icon tabler-icon-tool\"><path stroke=\"none\" d=\"M0 0h24v24H0z\" fill=\"none\"/><path d=\"M7 10h3v-3l-3.5 -3.5a6 6 0 0 1 8 8l6 6a2 2 0 0 1 -3 3l-6 -6a6 6 0 0 1 -8 -8l3.5 3.5\" /></svg>'''\nattachment_svg = '''<svg xmlns=\"http://www.w3.org/2000/svg\" width=\"16\" height=\"16\" viewBox=\"0 0 24 24\" fill=\"none\" stroke=\"currentColor\" stroke-width=\"2\" stroke-linecap=\"round\" stroke-linejoin=\"round\"><path d=\"M21.44 11.05l-9.19 9.19a6 6 0 0 1-8.48-8.48l9.19-9.19a4 4 0 0 1 5.66 5.66l-9.2 9.19a2 2 0 0 1-2.83-2.83l8.49-8.48\"></path></svg>'''\n\ncopy_button = f'<button class=\"footer-button footer-copy-button\" title=\"Copy\" onclick=\"copyToClipboard(this)\">{copy_svg}</button>'\nbranch_button = f'<button class=\"footer-button footer-branch-button\" title=\"Branch here\" onclick=\"branchHere(this)\">{branch_svg}</button>'\nedit_button = f'<button class=\"footer-button footer-edit-button\" title=\"Edit\" onclick=\"editHere(this)\">{edit_svg}</button>'\nrefresh_button = f'<button class=\"footer-button footer-refresh-button\" title=\"Regenerate\" onclick=\"regenerateClick()\">{refresh_svg}</button>'\ncontinue_button = f'<button class=\"footer-button footer-continue-button\" title=\"Continue\" onclick=\"continueClick()\">{continue_svg}</button>'\nremove_button = f'<button class=\"footer-button footer-remove-button\" title=\"Remove last reply\" onclick=\"removeLastClick()\">{remove_svg}</button>'\ninfo_button = f'<button class=\"footer-button footer-info-button\" title=\"message\">{info_svg}</button>'\n\n\ndef format_message_timestamp(history, role, index, tooltip_include_timestamp=True):\n    \"\"\"Get a formatted timestamp HTML span for a message if available\"\"\"\n    key = f\"{role}_{index}\"\n    if 'metadata' in history and key in history['metadata'] and history['metadata'][key].get('timestamp'):\n        timestamp = history['metadata'][key]['timestamp']\n        tooltip_text = get_message_tooltip(history, role, index, include_timestamp=tooltip_include_timestamp)\n        title_attr = f' title=\"{html.escape(tooltip_text)}\"' if tooltip_text else ''\n        return f\"<span class='timestamp'{title_attr}>{timestamp}</span>\"\n\n    return \"\"\n\n\ndef format_message_attachments(history, role, index):\n    \"\"\"Get formatted HTML for message attachments if available\"\"\"\n    key = f\"{role}_{index}\"\n    if 'metadata' in history and key in history['metadata'] and 'attachments' in history['metadata'][key]:\n        attachments = history['metadata'][key]['attachments']\n        if not attachments:\n            return \"\"\n\n        attachments_html = '<div class=\"message-attachments\">'\n        for attachment in attachments:\n            name = html.escape(attachment[\"name\"])\n\n            if attachment.get(\"type\") == \"image\":\n                image_data = attachment.get(\"image_data\", \"\")\n                attachments_html += (\n                    f'<div class=\"attachment-box image-attachment\">'\n                    f'<img src=\"{image_data}\" alt=\"{name}\" class=\"image-preview\" />'\n                    f'<div class=\"attachment-name\">{name}</div>'\n                    f'</div>'\n                )\n            else:\n                # Make clickable if URL exists (web search)\n                if \"url\" in attachment:\n                    name = f'<a href=\"{html.escape(attachment[\"url\"])}\" target=\"_blank\" rel=\"noopener noreferrer\">{name}</a>'\n\n                attachments_html += (\n                    f'<div class=\"attachment-box\">'\n                    f'<div class=\"attachment-icon\">{attachment_svg}</div>'\n                    f'<div class=\"attachment-name\">{name}</div>'\n                    f'</div>'\n                )\n\n        attachments_html += '</div>'\n        return attachments_html\n\n    return \"\"\n\n\ndef get_message_tooltip(history, role, index, include_timestamp=True):\n    \"\"\"Get tooltip text combining timestamp and model name for a message\"\"\"\n    key = f\"{role}_{index}\"\n    if 'metadata' not in history or key not in history['metadata']:\n        return \"\"\n\n    meta = history['metadata'][key]\n    tooltip_parts = []\n\n    if include_timestamp and meta.get('timestamp'):\n        tooltip_parts.append(meta['timestamp'])\n    if meta.get('model_name'):\n        tooltip_parts.append(f\"Model: {meta['model_name']}\")\n\n    return \" | \".join(tooltip_parts)\n\n\ndef get_version_navigation_html(history, i, role):\n    \"\"\"Generate simple navigation arrows for message versions\"\"\"\n    key = f\"{role}_{i}\"\n    metadata = history.get('metadata', {})\n\n    if key not in metadata or 'versions' not in metadata[key]:\n        return \"\"\n\n    versions = metadata[key]['versions']\n    # Default to the last version if current_version_index isn't set in metadata\n    current_idx = metadata[key].get('current_version_index', len(versions) - 1 if versions else 0)\n\n    if len(versions) <= 1:\n        return \"\"\n\n    left_disabled = ' disabled' if current_idx == 0 else ''\n    right_disabled = ' disabled' if current_idx >= len(versions) - 1 else ''\n\n    left_arrow = f'<button class=\"footer-button version-nav-button\"{left_disabled} onclick=\"navigateVersion(this, \\'left\\')\" title=\"Previous version\">&lt;</button>'\n    right_arrow = f'<button class=\"footer-button version-nav-button\"{right_disabled} onclick=\"navigateVersion(this, \\'right\\')\" title=\"Next version\">&gt;</button>'\n    position = f'<span class=\"version-position\">{current_idx + 1}/{len(versions)}</span>'\n\n    return f'<div class=\"version-navigation\">{left_arrow}{position}{right_arrow}</div>'\n\n\ndef actions_html(history, i, role, info_message=\"\"):\n    action_buttons = \"\"\n    version_nav_html = \"\"\n\n    if role == \"assistant\":\n        action_buttons = (\n            f'{copy_button}'\n            f'{edit_button}'\n            f'{refresh_button if i == len(history[\"visible\"]) - 1 else \"\"}'\n            f'{continue_button if i == len(history[\"visible\"]) - 1 else \"\"}'\n            f'{remove_button if i == len(history[\"visible\"]) - 1 else \"\"}'\n            f'{branch_button}'\n        )\n\n        version_nav_html = get_version_navigation_html(history, i, \"assistant\")\n    elif role == \"user\":\n        action_buttons = (\n            f'{copy_button}'\n            f'{edit_button}'\n        )\n\n        version_nav_html = get_version_navigation_html(history, i, \"user\")\n\n    return (f'<div class=\"message-actions\">'\n            f'{action_buttons}'\n            f'{info_message}'\n            f'</div>'\n            f'{version_nav_html}')\n\n\ndef generate_instruct_html(history, last_message_only=False):\n    if not last_message_only:\n        output = f'<style>{instruct_css}</style><div class=\"chat\" id=\"chat\" data-mode=\"instruct\"><div class=\"messages\">'\n    else:\n        output = \"\"\n\n    def create_message(role, content, raw_content):\n        \"\"\"Inner function that captures variables from outer scope.\"\"\"\n        class_name = \"user-message\" if role == \"user\" else \"assistant-message\"\n\n        # Get role-specific data\n        timestamp = format_message_timestamp(history, role, i)\n        attachments = format_message_attachments(history, role, i)\n\n        # Create info button if timestamp exists\n        info_message = \"\"\n        if timestamp:\n            tooltip_text = get_message_tooltip(history, role, i)\n            info_message = info_button.replace('title=\"message\"', f'title=\"{html.escape(tooltip_text)}\"')\n\n        return (\n            f'<div class=\"{class_name}\" '\n            f'data-raw=\"{html.escape(raw_content, quote=True)}\"'\n            f'data-index={i}>'\n            f'<div class=\"text\">'\n            f'<div class=\"message-body\">{content}</div>'\n            f'{attachments}'\n            f'{actions_html(history, i, role, info_message)}'\n            f'</div>'\n            f'</div>'\n        )\n\n    # Determine range\n    start_idx = len(history['visible']) - 1 if last_message_only else 0\n    end_idx = len(history['visible'])\n\n    for i in range(start_idx, end_idx):\n        row_visible = history['visible'][i]\n        row_internal = history['internal'][i]\n\n        # Convert content\n        if last_message_only:\n            converted_visible = [None, convert_to_markdown_wrapped(row_visible[1], message_id=i, use_cache=i != len(history['visible']) - 1)]\n        else:\n            converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]\n\n        # Generate messages\n        if not last_message_only and converted_visible[0]:\n            output += create_message(\"user\", converted_visible[0], row_internal[0])\n\n        output += create_message(\"assistant\", converted_visible[1], row_internal[1])\n\n    if not last_message_only:\n        output += \"</div></div>\"\n\n    return output\n\n\ndef get_character_image_with_cache_buster():\n    \"\"\"Get character image URL with cache busting based on file modification time\"\"\"\n    cache_path = shared.user_data_dir / \"cache\" / \"pfp_character_thumb.png\"\n    if cache_path.exists():\n        mtime = int(cache_path.stat().st_mtime)\n        return f'<img src=\"file/{shared.user_data_dir}/cache/pfp_character_thumb.png?{mtime}\" class=\"pfp_character\">'\n\n    return ''\n\n\ndef generate_cai_chat_html(history, name1, name2, style, character, reset_cache=False, last_message_only=False):\n    if not last_message_only:\n        output = f'<style>{chat_styles[style]}</style><div class=\"chat\" id=\"chat\"><div class=\"messages\">'\n    else:\n        output = \"\"\n\n    img_bot = get_character_image_with_cache_buster()\n\n    def create_message(role, content, raw_content):\n        \"\"\"Inner function for CAI-style messages.\"\"\"\n        circle_class = \"circle-you\" if role == \"user\" else \"circle-bot\"\n        name = name1 if role == \"user\" else name2\n\n        # Get role-specific data\n        timestamp = format_message_timestamp(history, role, i, tooltip_include_timestamp=False)\n        attachments = format_message_attachments(history, role, i)\n\n        # Get appropriate image\n        if role == \"user\":\n            img = (f'<img src=\"file/{shared.user_data_dir}/cache/pfp_me.png?{time.time() if reset_cache else \"\"}\">'\n                   if (shared.user_data_dir / \"cache\" / \"pfp_me.png\").exists() else '')\n        else:\n            img = img_bot\n\n        return (\n            f'<div class=\"message\" '\n            f'data-raw=\"{html.escape(raw_content, quote=True)}\"'\n            f'data-index={i}>'\n            f'<div class=\"{circle_class}\">{img}</div>'\n            f'<div class=\"text\">'\n            f'<div class=\"username\">{name}{timestamp}</div>'\n            f'<div class=\"message-body\">{content}</div>'\n            f'{attachments}'\n            f'{actions_html(history, i, role)}'\n            f'</div>'\n            f'</div>'\n        )\n\n    # Determine range\n    start_idx = len(history['visible']) - 1 if last_message_only else 0\n    end_idx = len(history['visible'])\n\n    for i in range(start_idx, end_idx):\n        row_visible = history['visible'][i]\n        row_internal = history['internal'][i]\n\n        # Convert content\n        if last_message_only:\n            converted_visible = [None, convert_to_markdown_wrapped(row_visible[1], message_id=i, use_cache=i != len(history['visible']) - 1)]\n        else:\n            converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]\n\n        # Generate messages\n        if not last_message_only and converted_visible[0]:\n            output += create_message(\"user\", converted_visible[0], row_internal[0])\n\n        output += create_message(\"assistant\", converted_visible[1], row_internal[1])\n\n    if not last_message_only:\n        output += \"</div></div>\"\n\n    return output\n\n\ndef time_greeting():\n    current_hour = datetime.datetime.now().hour\n    if 5 <= current_hour < 12:\n        return \"Good morning!\"\n    elif 12 <= current_hour < 18:\n        return \"Good afternoon!\"\n    else:\n        return \"Good evening!\"\n\n\ndef chat_html_wrapper(history, name1, name2, mode, style, character, reset_cache=False, last_message_only=False):\n    if len(history['visible']) == 0:\n        greeting = f\"<div class=\\\"welcome-greeting\\\">{time_greeting()} How can I help you today?</div>\"\n        result = f'<div class=\"chat\" id=\"chat\">{greeting}</div>'\n    elif mode == 'instruct':\n        result = generate_instruct_html(history, last_message_only=last_message_only)\n    else:\n        result = generate_cai_chat_html(history, name1, name2, style, character, reset_cache=reset_cache, last_message_only=last_message_only)\n\n    return {'html': result, 'last_message_only': last_message_only}\n"
  },
  {
    "path": "modules/image_models.py",
    "content": "import time\n\nimport modules.shared as shared\nfrom modules.logging_colors import logger\nfrom modules.utils import resolve_model_path\n\n\ndef get_quantization_config(quant_method):\n    \"\"\"\n    Get the appropriate quantization config based on the selected method.\n    Applies quantization to both the transformer and the text_encoder.\n    \"\"\"\n    import torch\n    # Import BitsAndBytesConfig from BOTH libraries to be safe\n    from diffusers import BitsAndBytesConfig as DiffusersBnBConfig\n    from diffusers import TorchAoConfig\n    from diffusers.quantizers import PipelineQuantizationConfig\n    from transformers import BitsAndBytesConfig as TransformersBnBConfig\n\n    if quant_method == 'none' or not quant_method:\n        return None\n\n    # Bitsandbytes 8-bit quantization\n    elif quant_method == 'bnb-8bit':\n        return PipelineQuantizationConfig(\n            quant_mapping={\n                \"transformer\": DiffusersBnBConfig(\n                    load_in_8bit=True\n                ),\n                \"text_encoder\": TransformersBnBConfig(\n                    load_in_8bit=True\n                )\n            }\n        )\n\n    # Bitsandbytes 4-bit quantization\n    elif quant_method == 'bnb-4bit':\n        return PipelineQuantizationConfig(\n            quant_mapping={\n                \"transformer\": DiffusersBnBConfig(\n                    load_in_4bit=True,\n                    bnb_4bit_quant_type=\"nf4\",\n                    bnb_4bit_compute_dtype=torch.bfloat16,\n                    bnb_4bit_use_double_quant=True\n                ),\n                \"text_encoder\": TransformersBnBConfig(\n                    load_in_4bit=True,\n                    bnb_4bit_quant_type=\"nf4\",\n                    bnb_4bit_compute_dtype=torch.bfloat16,\n                    bnb_4bit_use_double_quant=True\n                )\n            }\n        )\n\n    # torchao int8 weight-only\n    elif quant_method == 'torchao-int8wo':\n        return PipelineQuantizationConfig(\n            quant_mapping={\n                \"transformer\": TorchAoConfig(\"int8wo\"),\n                \"text_encoder\": TorchAoConfig(\"int8wo\")\n            }\n        )\n\n    # torchao fp4 (e2m1)\n    elif quant_method == 'torchao-fp4':\n        return PipelineQuantizationConfig(\n            quant_mapping={\n                \"transformer\": TorchAoConfig(\"fp4_e2m1\"),\n                \"text_encoder\": TorchAoConfig(\"fp4_e2m1\")\n            }\n        )\n\n    # torchao float8 weight-only\n    elif quant_method == 'torchao-float8wo':\n        return PipelineQuantizationConfig(\n            quant_mapping={\n                \"transformer\": TorchAoConfig(\"float8wo\"),\n                \"text_encoder\": TorchAoConfig(\"float8wo\")\n            }\n        )\n\n    else:\n        logger.warning(f\"Unknown quantization method: {quant_method}. Loading without quantization.\")\n        return None\n\n\ndef get_pipeline_type(pipe):\n    \"\"\"\n    Detect the pipeline type based on the loaded pipeline class.\n\n    Returns:\n        str: 'zimage', 'qwenimage', or 'unknown'\n    \"\"\"\n    class_name = pipe.__class__.__name__\n    if class_name == 'ZImagePipeline':\n        return 'zimage'\n    elif class_name == 'QwenImagePipeline':\n        return 'qwenimage'\n    else:\n        return 'unknown'\n\n\ndef load_image_model(model_name, dtype='bfloat16', attn_backend='sdpa', cpu_offload=False, compile_model=False, quant_method='none'):\n    \"\"\"\n    Load a diffusers image generation model.\n\n    Args:\n        model_name: Name of the model directory\n        dtype: 'bfloat16' or 'float16'\n        attn_backend: 'sdpa' or 'flash_attention_2'\n        cpu_offload: Enable CPU offloading for low VRAM\n        compile_model: Compile the model for faster inference (slow first run)\n        quant_method: 'none', 'bnb-8bit', 'bnb-4bit', or torchao options (int8wo, fp4, float8wo)\n    \"\"\"\n    import torch\n    from diffusers import DiffusionPipeline\n\n    from modules.torch_utils import get_device\n\n    logger.info(f\"Loading image model \\\"{model_name}\\\" with quantization: {quant_method}\")\n    t0 = time.time()\n\n    dtype_map = {\"bfloat16\": torch.bfloat16, \"float16\": torch.float16}\n    target_dtype = dtype_map.get(dtype, torch.bfloat16)\n\n    model_path = resolve_model_path(model_name, image_model=True)\n\n    try:\n        # Get quantization config based on selected method\n        pipeline_quant_config = get_quantization_config(quant_method)\n\n        # Load the pipeline\n        load_kwargs = {\n            \"torch_dtype\": target_dtype,\n            \"low_cpu_mem_usage\": True,\n        }\n\n        if pipeline_quant_config is not None:\n            load_kwargs[\"quantization_config\"] = pipeline_quant_config\n\n        # Use DiffusionPipeline for automatic pipeline detection\n        # This handles both ZImagePipeline and QwenImagePipeline\n        pipe = DiffusionPipeline.from_pretrained(\n            str(model_path),\n            **load_kwargs\n        )\n\n        pipeline_type = get_pipeline_type(pipe)\n\n        if not cpu_offload:\n            pipe.to(get_device())\n\n        modules = [\"transformer\", \"unet\"]\n\n        # Set attention backend\n        if attn_backend == 'flash_attention_2':\n            for name in modules:\n                mod = getattr(pipe, name, None)\n                if hasattr(mod, \"set_attention_backend\"):\n                    mod.set_attention_backend(\"flash\")\n                    break\n\n        # Compile model\n        if compile_model:\n            for name in modules:\n                mod = getattr(pipe, name, None)\n                if hasattr(mod, \"compile\"):\n                    logger.info(\"Compiling model (first run will be slow)...\")\n                    mod.compile()\n                    break\n\n        if cpu_offload:\n            pipe.enable_model_cpu_offload()\n\n        shared.image_model = pipe\n        shared.image_model_name = model_name\n        shared.image_pipeline_type = pipeline_type\n\n        logger.info(f\"Loaded image model \\\"{model_name}\\\" in {(time.time() - t0):.2f} seconds.\")\n        return pipe\n\n    except Exception as e:\n        logger.error(f\"Failed to load image model: {str(e)}\")\n        return None\n\n\ndef unload_image_model():\n    \"\"\"Unload the current image model and free VRAM.\"\"\"\n    if shared.image_model is None:\n        return\n\n    del shared.image_model\n    shared.image_model = None\n    shared.image_model_name = 'None'\n    shared.image_pipeline_type = None\n\n    from modules.torch_utils import clear_torch_cache\n    clear_torch_cache()\n\n    logger.info(\"Image model unloaded.\")\n"
  },
  {
    "path": "modules/image_utils.py",
    "content": "import base64\nimport io\nimport os\nfrom pathlib import Path\nfrom typing import Any, List, Tuple\n\nfrom PIL import Image\n\nfrom modules.logging_colors import logger\n\n\ndef open_image_safely(path):\n    if path is None or not isinstance(path, str) or not Path(path).exists():\n        return None\n\n    if os.path.islink(path):\n        return None\n\n    try:\n        return Image.open(path)\n    except Exception as e:\n        logger.error(f\"Failed to open image file: {path}. Reason: {e}\")\n        return None\n\n\ndef convert_pil_to_base64(image: Image.Image) -> str:\n    \"\"\"Converts a PIL Image to a base64 encoded string.\"\"\"\n    buffered = io.BytesIO()\n    # Save image to an in-memory bytes buffer in PNG format\n    image.save(buffered, format=\"PNG\")\n    # Encode the bytes to a base64 string\n    return base64.b64encode(buffered.getvalue()).decode('utf-8')\n\n\ndef decode_base64_image(base64_string: str) -> Image.Image:\n    \"\"\"Decodes a base64 string to a PIL Image.\"\"\"\n    try:\n        if base64_string.startswith('data:image/'):\n            base64_string = base64_string.split(',', 1)[1]\n\n        image_data = base64.b64decode(base64_string)\n        image = Image.open(io.BytesIO(image_data))\n        return image\n    except Exception as e:\n        logger.error(f\"Failed to decode base64 image: {e}\")\n        raise ValueError(f\"Invalid base64 image data: {e}\")\n\n\ndef process_message_content(content: Any) -> Tuple[str, List[Image.Image]]:\n    \"\"\"\n    Processes message content that may contain text and images.\n    Returns: A tuple of (text_content, list_of_pil_images).\n    \"\"\"\n    if isinstance(content, str):\n        return content, []\n\n    if isinstance(content, list):\n        text_parts = []\n        images = []\n        for item in content:\n            if not isinstance(item, dict):\n                continue\n\n            item_type = item.get('type', '')\n            if item_type == 'text':\n                text_parts.append(item.get('text', ''))\n            elif item_type == 'image_url':\n                image_url_data = item.get('image_url', {})\n                image_url = image_url_data.get('url', '')\n\n                if image_url.startswith('data:image/'):\n                    try:\n                        images.append(decode_base64_image(image_url))\n                    except Exception as e:\n                        logger.warning(f\"Failed to process a base64 image: {e}\")\n                elif image_url.startswith('http'):\n                    # Support external URLs\n                    try:\n                        import requests\n                        from urllib.parse import urljoin\n                        from modules.web_search import _validate_url\n                        _validate_url(image_url)\n                        url = image_url\n                        for _ in range(5):\n                            response = requests.get(url, timeout=10, allow_redirects=False)\n                            if response.is_redirect and 'Location' in response.headers:\n                                url = urljoin(url, response.headers['Location'])\n                                _validate_url(url)\n                            else:\n                                break\n\n                        response.raise_for_status()\n                        image_data = response.content\n                        image = Image.open(io.BytesIO(image_data))\n                        images.append(image)\n                        logger.info(\"Successfully loaded external image from URL\")\n                    except Exception as e:\n                        logger.warning(f\"Failed to fetch external image: {e}\")\n                else:\n                    logger.warning(f\"Unsupported image URL format: {image_url[:70]}...\")\n\n        return ' '.join(text_parts), images\n\n    return str(content), []\n\n\ndef convert_image_attachments_to_pil(image_attachments: List[dict]) -> List[Image.Image]:\n    \"\"\"Convert webui image_attachments format to PIL Images.\"\"\"\n    pil_images = []\n    for attachment in image_attachments:\n        if attachment.get('type') == 'image' and 'image_data' in attachment:\n            try:\n                image = decode_base64_image(attachment['image_data'])\n                if image.mode != 'RGB':\n                    image = image.convert('RGB')\n                pil_images.append(image)\n            except Exception as e:\n                logger.warning(f\"Failed to process image attachment: {e}\")\n    return pil_images\n\n\ndef convert_openai_messages_to_images(messages: List[dict]) -> List[Image.Image]:\n    \"\"\"Convert OpenAI messages format to PIL Images.\"\"\"\n    all_images = []\n    for message in messages:\n        if isinstance(message, dict) and 'content' in message:\n            _, images = process_message_content(message['content'])\n            all_images.extend(images)\n    return all_images\n"
  },
  {
    "path": "modules/llama_cpp_server.py",
    "content": "import json\nimport os\nimport pprint\nimport re\nimport socket\nimport subprocess\nimport sys\nimport threading\nimport time\nfrom pathlib import Path\nfrom typing import Any, List\n\nimport llama_cpp_binaries\nimport requests\n\nfrom modules import shared\nfrom modules.image_utils import (\n    convert_image_attachments_to_pil,\n    convert_openai_messages_to_images,\n    convert_pil_to_base64\n)\nfrom modules.logging_colors import logger\nfrom modules.utils import resolve_model_path\n\nllamacpp_valid_cache_types = {\"fp16\", \"q8_0\", \"q4_0\"}\n\n\nclass LlamaServer:\n    def __init__(self, model_path, server_path=None):\n        \"\"\"\n        Initialize and start a server for llama.cpp models.\n        \"\"\"\n        self.model_path = model_path\n        self.server_path = server_path\n        self.port = self._find_available_port()\n        self.process = None\n        self.session = requests.Session()\n        self.vocabulary_size = None\n        self.n_ctx = None\n        self.bos_token = \"<s>\"\n        self.last_prompt_token_count = 0\n\n        # Start the server\n        self._start_server()\n\n    def encode(self, text, add_bos_token=False, **kwargs):\n        if self.bos_token and text.startswith(self.bos_token):\n            add_bos_token = False\n\n        url = f\"http://127.0.0.1:{self.port}/tokenize\"\n        payload = {\n            \"content\": text,\n            \"add_special\": add_bos_token,\n        }\n\n        response = self.session.post(url, json=payload)\n        result = response.json()\n        return result.get(\"tokens\", [])\n\n    def decode(self, token_ids, **kwargs):\n        url = f\"http://127.0.0.1:{self.port}/detokenize\"\n        payload = {\n            \"tokens\": token_ids,\n        }\n\n        response = self.session.post(url, json=payload)\n        result = response.json()\n        return result.get(\"content\", \"\")\n\n    def prepare_payload(self, state):\n        payload = {\n            \"temperature\": state[\"temperature\"] if not state[\"dynamic_temperature\"] else (state[\"dynatemp_low\"] + state[\"dynatemp_high\"]) / 2,\n            \"dynatemp_range\": 0 if not state[\"dynamic_temperature\"] else (state[\"dynatemp_high\"] - state[\"dynatemp_low\"]) / 2,\n            \"dynatemp_exponent\": state[\"dynatemp_exponent\"],\n            \"top_k\": state[\"top_k\"],\n            \"top_p\": state[\"top_p\"],\n            \"min_p\": state[\"min_p\"],\n            \"top_n_sigma\": state[\"top_n_sigma\"] if state[\"top_n_sigma\"] > 0 else -1,\n            \"adaptive_target\": state[\"adaptive_target\"] if state[\"adaptive_target\"] > 0 else -1,\n            \"adaptive_decay\": state[\"adaptive_decay\"],\n            \"typical_p\": state[\"typical_p\"],\n            \"repeat_penalty\": state[\"repetition_penalty\"],\n            \"repeat_last_n\": state[\"repetition_penalty_range\"],\n            \"presence_penalty\": state[\"presence_penalty\"],\n            \"frequency_penalty\": state[\"frequency_penalty\"],\n            \"dry_multiplier\": state[\"dry_multiplier\"],\n            \"dry_base\": state[\"dry_base\"],\n            \"dry_allowed_length\": state[\"dry_allowed_length\"],\n            \"dry_penalty_last_n\": state[\"repetition_penalty_range\"],\n            \"xtc_probability\": state[\"xtc_probability\"],\n            \"xtc_threshold\": state[\"xtc_threshold\"],\n            \"mirostat\": state[\"mirostat_mode\"],\n            \"mirostat_tau\": state[\"mirostat_tau\"],\n            \"mirostat_eta\": state[\"mirostat_eta\"],\n            \"grammar\": state[\"grammar_string\"],\n            \"seed\": state[\"seed\"],\n            \"ignore_eos\": state[\"ban_eos_token\"],\n        }\n\n        # DRY\n        dry_sequence_breakers = state['dry_sequence_breakers']\n        if not dry_sequence_breakers.startswith(\"[\"):\n            dry_sequence_breakers = \"[\" + dry_sequence_breakers + \"]\"\n\n        dry_sequence_breakers = json.loads(dry_sequence_breakers)\n        payload[\"dry_sequence_breakers\"] = dry_sequence_breakers\n\n        # Sampler order\n        if state[\"sampler_priority\"]:\n            samplers = state[\"sampler_priority\"]\n            samplers = samplers.split(\"\\n\") if isinstance(samplers, str) else samplers\n            filtered_samplers = []\n\n            penalty_found = False\n            for s in samplers:\n                if s.strip() in [\"dry\", \"top_k\", \"top_p\", \"top_n_sigma\", \"min_p\", \"temperature\", \"xtc\"]:\n                    filtered_samplers.append(s.strip())\n                elif s.strip() == \"typical_p\":\n                    filtered_samplers.append(\"typ_p\")\n                elif not penalty_found and s.strip() == \"repetition_penalty\":\n                    filtered_samplers.append(\"penalties\")\n                    penalty_found = True\n\n            # Move temperature to the end if temperature_last is true and temperature exists in the list\n            if state[\"temperature_last\"] and \"temperature\" in filtered_samplers:\n                filtered_samplers.remove(\"temperature\")\n                filtered_samplers.append(\"temperature\")\n\n            # adaptive-p replaces the default dist sampler; llama.cpp always\n            # places it at the end of the chain regardless of position, so we\n            # activate it based on the parameter value rather than sampler order.\n            if state.get(\"adaptive_target\", 0) > 0:\n                filtered_samplers.append(\"adaptive_p\")\n\n            payload[\"samplers\"] = filtered_samplers\n\n        logit_bias = []\n        if state['custom_token_bans']:\n            logit_bias.extend([[int(token_id.strip()), False] for token_id in state['custom_token_bans'].split(',') if token_id.strip()])\n\n        if state.get('logit_bias'):\n            for token_id_str, bias in state['logit_bias'].items():\n                logit_bias.append([int(token_id_str), bias])\n\n        if logit_bias:\n            payload[\"logit_bias\"] = logit_bias\n\n        n_probs = state.get('logprobs', 0)\n        if n_probs and n_probs > 0:\n            payload[\"n_probs\"] = n_probs\n\n        return payload\n\n    def _process_images_for_generation(self, state: dict) -> List[Any]:\n        \"\"\"\n        Process all possible image inputs and return PIL images\n        \"\"\"\n        pil_images = []\n        # Source 1: Web UI (from chatbot_wrapper)\n        if 'image_attachments' in state and state['image_attachments']:\n            pil_images.extend(convert_image_attachments_to_pil(state['image_attachments']))\n        # Source 2: Chat Completions API (/v1/chat/completions)\n        elif 'history' in state and state.get('history', {}).get('messages'):\n            pil_images.extend(convert_openai_messages_to_images(state['history']['messages']))\n        # Source 3: Legacy Completions API (/v1/completions)\n        elif 'raw_images' in state and state['raw_images']:\n            pil_images.extend(state.get('raw_images', []))\n\n        return pil_images\n\n    def is_multimodal(self) -> bool:\n        \"\"\"Check if this model supports multimodal input.\"\"\"\n        return shared.args.mmproj not in [None, 'None']\n\n    def generate_with_streaming(self, prompt, state):\n        url = f\"http://127.0.0.1:{self.port}/completion\"\n        payload = self.prepare_payload(state)\n\n        pil_images = []\n\n        if shared.is_multimodal:\n            pil_images = self._process_images_for_generation(state)\n\n        if pil_images:\n            # Multimodal case\n            IMAGE_TOKEN_COST_ESTIMATE = 600  # A safe, conservative estimate per image\n\n            base64_images = [convert_pil_to_base64(img) for img in pil_images]\n            payload[\"prompt\"] = {\n                \"prompt_string\": prompt,\n                \"multimodal_data\": base64_images\n            }\n\n            # Calculate an estimated token count\n            text_tokens = self.encode(prompt, add_bos_token=state[\"add_bos_token\"])\n            self.last_prompt_token_count = len(text_tokens) + (len(pil_images) * IMAGE_TOKEN_COST_ESTIMATE)\n        else:\n            # Text only case\n            token_ids = self.encode(prompt, add_bos_token=state[\"add_bos_token\"])\n            self.last_prompt_token_count = len(token_ids)\n            payload[\"prompt\"] = token_ids\n\n        if state['auto_max_new_tokens']:\n            max_new_tokens = state['truncation_length'] - self.last_prompt_token_count\n        else:\n            max_new_tokens = state['max_new_tokens']\n\n        payload.update({\n            \"n_predict\": max_new_tokens,\n            \"stream\": True,\n            \"cache_prompt\": True\n        })\n\n        if shared.args.verbose:\n            logger.info(\"GENERATE_PARAMS=\")\n            printable_payload = {k: v for k, v in payload.items() if k != \"prompt\"}\n            pprint.PrettyPrinter(indent=4, sort_dicts=False).pprint(printable_payload)\n            print()\n\n        # Make the generation request\n        response = self.session.post(url, json=payload, stream=True)\n        try:\n            if response.status_code == 400 and response.json().get(\"error\", {}).get(\"type\") == \"exceed_context_size_error\":\n                logger.error(\"The request exceeds the available context size, try increasing it\")\n                return\n            else:\n                response.raise_for_status()  # Raise an exception for HTTP errors\n\n            full_text = \"\"\n            self.last_completion_probabilities = []\n\n            # Process the streaming response\n            stop_event = state.get('stop_event')\n            for line in response.iter_lines():\n                if shared.stop_everything or (stop_event and stop_event.is_set()):\n                    break\n\n                if not line:\n                    continue\n\n                try:\n                    line = line.decode('utf-8')\n\n                    # Check if the line starts with \"data: \" and remove it\n                    if line.startswith('data: '):\n                        line = line[6:]  # Remove the \"data: \" prefix\n\n                    # Parse the JSON data\n                    data = json.loads(line)\n\n                    # Extract the token content\n                    if data.get('content', ''):\n                        full_text += data['content']\n                        yield full_text\n\n                    # Capture logprobs if present\n                    if 'completion_probabilities' in data:\n                        self.last_completion_probabilities.extend(data['completion_probabilities'])\n\n                    # Check if generation is complete\n                    if data.get('stop', False):\n                        break\n\n                except json.JSONDecodeError as e:\n                    # Log the error and the problematic line\n                    print(f\"JSON decode error: {e}\")\n                    print(f\"Problematic line: {line}\")\n                    continue\n        finally:\n            response.close()\n\n    def generate(self, prompt, state):\n        output = \"\"\n        for output in self.generate_with_streaming(prompt, state):\n            pass\n\n        return output\n\n    def get_logits(self, prompt, state, n_probs=128, use_samplers=False):\n        \"\"\"Get the logits/probabilities for the next token after a prompt\"\"\"\n        url = f\"http://127.0.0.1:{self.port}/completion\"\n\n        payload = self.prepare_payload(state)\n        payload.update({\n            \"prompt\": self.encode(prompt, add_bos_token=state[\"add_bos_token\"]),\n            \"n_predict\": 0,\n            \"logprobs\": True,\n            \"n_probs\": n_probs,\n            \"stream\": False,\n            \"post_sampling_probs\": use_samplers,\n        })\n\n        if shared.args.verbose and use_samplers:\n            logger.info(\"GENERATE_PARAMS=\")\n            printable_payload = {k: v for k, v in payload.items() if k != \"prompt\"}\n            pprint.PrettyPrinter(indent=4, sort_dicts=False).pprint(printable_payload)\n            print()\n\n        for retry in range(5):\n            response = self.session.post(url, json=payload)\n            result = response.json()\n\n            if \"completion_probabilities\" in result:\n                if use_samplers:\n                    return result[\"completion_probabilities\"][0][\"top_probs\"]\n                else:\n                    return result[\"completion_probabilities\"][0][\"top_logprobs\"]\n\n            time.sleep(0.05)\n        else:\n            raise Exception(f\"Unexpected response format: 'completion_probabilities' not found in {result}\")\n\n    def _get_vocabulary_size(self):\n        \"\"\"Get and store the model's maximum context length.\"\"\"\n        url = f\"http://127.0.0.1:{self.port}/v1/models\"\n        response = self.session.get(url).json()\n\n        if \"data\" in response and len(response[\"data\"]) > 0:\n            model_info = response[\"data\"][0]\n            if \"meta\" in model_info and \"n_vocab\" in model_info[\"meta\"]:\n                self.vocabulary_size = model_info[\"meta\"][\"n_vocab\"]\n\n    def _get_bos_token(self):\n        \"\"\"Get and store the model's BOS token and context size.\"\"\"\n        url = f\"http://127.0.0.1:{self.port}/props\"\n        response = self.session.get(url).json()\n        if \"bos_token\" in response:\n            self.bos_token = response[\"bos_token\"]\n\n        # Get actual n_ctx from the server (important when --fit auto-selects it)\n        n_ctx = response.get(\"default_generation_settings\", {}).get(\"n_ctx\")\n        if n_ctx:\n            self.n_ctx = n_ctx\n\n    def _is_port_available(self, port):\n        \"\"\"Check if a port is available for use.\"\"\"\n        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:\n            try:\n                s.bind(('', port))\n                return True\n            except OSError:\n                return False\n\n    def _find_available_port(self):\n        \"\"\"Find an available port, preferring main port + 5.\"\"\"\n        preferred_port = shared.args.api_port + 5\n        if self._is_port_available(preferred_port):\n            return preferred_port\n\n        # Fall back to OS-assigned random port\n        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:\n            s.bind(('', 0))\n            return s.getsockname()[1]\n\n    def _start_server(self):\n        \"\"\"Start the llama.cpp server and wait until it's ready.\"\"\"\n        # Determine the server path\n        if self.server_path is None:\n            self.server_path = llama_cpp_binaries.get_binary_path()\n\n        # Build the command\n        cmd = [\n            self.server_path,\n            \"--model\", self.model_path,\n            \"--batch-size\", str(shared.args.batch_size),\n            \"--ubatch-size\", str(shared.args.ubatch_size),\n            \"--port\", str(self.port),\n            \"--no-webui\",\n            \"--flash-attn\", \"on\",\n        ]\n\n        if shared.args.ctx_size > 0:\n            cmd += [\"--ctx-size\", str(shared.args.ctx_size)]\n        elif shared.args.gpu_layers >= 0:\n            cmd += [\"--ctx-size\", \"8192\"]\n\n        if shared.args.gpu_layers >= 0:\n            cmd += [\"--gpu-layers\", str(shared.args.gpu_layers), \"--fit\", \"off\"]\n        else:\n            cmd += [\"--fit\", \"on\"]\n            cmd += [\"--fit-ctx\", \"8192\"]\n            if shared.args.fit_target:\n                cmd += [\"--fit-target\", shared.args.fit_target]\n\n        if shared.args.threads > 0:\n            cmd += [\"--threads\", str(shared.args.threads)]\n        if shared.args.threads_batch > 0:\n            cmd += [\"--threads-batch\", str(shared.args.threads_batch)]\n        if shared.args.cpu_moe:\n            cmd.append(\"--cpu-moe\")\n        if shared.args.no_mmap:\n            cmd.append(\"--no-mmap\")\n        if shared.args.mlock:\n            cmd.append(\"--mlock\")\n        if shared.args.tensor_split:\n            cmd += [\"--tensor-split\", shared.args.tensor_split]\n        if shared.args.numa:\n            cmd += [\"--numa\", \"distribute\"]\n        if shared.args.no_kv_offload:\n            cmd.append(\"--no-kv-offload\")\n        if shared.args.row_split:\n            cmd += [\"--split-mode\", \"row\"]\n        cache_type = \"fp16\"\n        if shared.args.cache_type != \"fp16\" and shared.args.cache_type in llamacpp_valid_cache_types:\n            cmd += [\"--cache-type-k\", shared.args.cache_type, \"--cache-type-v\", shared.args.cache_type]\n            cache_type = shared.args.cache_type\n        if shared.args.mmproj not in [None, 'None']:\n            path = Path(shared.args.mmproj)\n            if not path.exists():\n                path = shared.user_data_dir / 'mmproj' / shared.args.mmproj\n\n            if path.exists():\n                cmd += [\"--mmproj\", str(path)]\n        if shared.args.model_draft not in [None, 'None']:\n            path = resolve_model_path(shared.args.model_draft)\n\n            if path.is_file():\n                model_file = path\n            else:\n                model_file = sorted(path.glob('*.gguf'))[0]\n\n            cmd += [\"--model-draft\", str(model_file)]\n            if shared.args.draft_max > 0:\n                cmd += [\"--draft-max\", str(shared.args.draft_max)]\n            if shared.args.gpu_layers_draft > 0:\n                cmd += [\"--gpu-layers-draft\", str(shared.args.gpu_layers_draft)]\n            if shared.args.device_draft:\n                cmd += [\"--device-draft\", shared.args.device_draft]\n            if shared.args.ctx_size_draft > 0:\n                cmd += [\"--ctx-size-draft\", str(shared.args.ctx_size_draft)]\n        if shared.args.spec_type != 'none':\n            cmd += [\"--spec-type\", shared.args.spec_type]\n            cmd += [\"--draft-max\", str(shared.args.draft_max)]\n            cmd += [\"--spec-ngram-size-n\", str(shared.args.spec_ngram_size_n)]\n            cmd += [\"--spec-ngram-size-m\", str(shared.args.spec_ngram_size_m)]\n            cmd += [\"--spec-ngram-min-hits\", str(shared.args.spec_ngram_min_hits)]\n        cmd += [\"--parallel\", str(shared.args.parallel)]\n        if shared.args.streaming_llm:\n            cmd += [\"--cache-reuse\", \"1\"]\n            cmd += [\"--swa-full\"]\n        if shared.args.extra_flags:\n            # Clean up the input\n            extra_flags = shared.args.extra_flags.strip()\n            if extra_flags.startswith('\"') and extra_flags.endswith('\"'):\n                extra_flags = extra_flags[1:-1].strip()\n            elif extra_flags.startswith(\"'\") and extra_flags.endswith(\"'\"):\n                extra_flags = extra_flags[1:-1].strip()\n\n            for flag_item in extra_flags.split(','):\n                flag_item = flag_item.strip()\n                if '=' in flag_item:\n                    flag, value = flag_item.split('=', 1)\n                    flag = flag.strip()\n                    value = value.strip()\n                    if len(flag) <= 3:\n                        cmd += [f\"-{flag}\", value]\n                    else:\n                        cmd += [f\"--{flag}\", value]\n                else:\n                    if len(flag_item) <= 3:\n                        cmd.append(f\"-{flag_item}\")\n                    else:\n                        cmd.append(f\"--{flag_item}\")\n\n        env = os.environ.copy()\n        if os.name == 'posix':\n            current_path = env.get('LD_LIBRARY_PATH', '')\n            if current_path:\n                env['LD_LIBRARY_PATH'] = f\"{current_path}:{os.path.dirname(self.server_path)}\"\n            else:\n                env['LD_LIBRARY_PATH'] = os.path.dirname(self.server_path)\n\n        if shared.args.verbose:\n            logger.info(\"llama-server command-line flags:\")\n            print(' '.join(str(item) for item in cmd[1:]))\n            print()\n\n        gpu_layers_str = \"auto\" if shared.args.gpu_layers < 0 else str(shared.args.gpu_layers)\n        ctx_size_str = \"auto\" if shared.args.ctx_size == 0 and shared.args.gpu_layers < 0 else str(shared.args.ctx_size or 8192)\n        logger.info(f\"Using gpu_layers={gpu_layers_str} | ctx_size={ctx_size_str} | cache_type={cache_type}\")\n        # Start the server with pipes for output\n        self.process = subprocess.Popen(\n            cmd,\n            stderr=subprocess.PIPE,\n            bufsize=0,\n            env=env\n        )\n\n        threading.Thread(target=filter_stderr_with_progress, args=(self.process.stderr,), daemon=True).start()\n\n        # Wait for server to be healthy\n        health_url = f\"http://127.0.0.1:{self.port}/health\"\n        while True:\n            # Check if process is still alive\n            if self.process.poll() is not None:\n                # Process has terminated\n                exit_code = self.process.poll()\n                raise RuntimeError(f\"Server process terminated unexpectedly with exit code: {exit_code}\")\n\n            try:\n                response = self.session.get(health_url)\n                if response.status_code == 200:\n                    break\n            except Exception:\n                pass\n\n            time.sleep(1)\n\n        # Server is now healthy, get model info\n        self._get_vocabulary_size()\n        self._get_bos_token()\n        return self.port\n\n    def __enter__(self):\n        \"\"\"Support for context manager.\"\"\"\n        return self\n\n    def __exit__(self, exc_type, exc_val, exc_tb):\n        \"\"\"Support for context manager.\"\"\"\n        self.stop()\n\n    def __del__(self):\n        \"\"\"Cleanup when the object is deleted.\"\"\"\n        self.stop()\n\n    def stop(self):\n        \"\"\"Stop the server process.\"\"\"\n        if self.process:\n            self.process.terminate()\n            try:\n                self.process.wait(timeout=5)\n            except subprocess.TimeoutExpired:\n                self.process.kill()\n                self.process.wait(timeout=5)\n\n            self.process = None\n\n\ndef filter_stderr_with_progress(process_stderr):\n    \"\"\"\n    Reads stderr lines from a process, filters out noise, and displays progress updates\n    inline (overwriting the same line) until completion.\n    \"\"\"\n    progress_re = re.compile(r'slot update_slots: id.*progress = (\\d+\\.\\d+)')\n    ansi_re = re.compile(r'\\x1b\\[[0-9;]*[a-zA-Z]')\n    log_prefix_re = re.compile(r'^[IWED] ')\n    last_was_progress = False\n\n    try:\n        # Read in binary mode and decode manually\n        buffer = b\"\"\n        while True:\n            # Read chunks aggressively to prevent buffer overflow\n            chunk = process_stderr.read(4096)\n            if not chunk:\n                break\n\n            buffer += chunk\n\n            # Process complete lines\n            while b'\\n' in buffer:\n                line_bytes, buffer = buffer.split(b'\\n', 1)\n                try:\n                    line = line_bytes.decode('utf-8', errors='replace').strip('\\r\\n')\n                    line = log_prefix_re.sub('', ansi_re.sub('', line))\n                    if line:  # Process non-empty lines\n                        match = progress_re.search(line)\n\n                        if match:\n                            progress = float(match.group(1))\n\n                            # Extract just the part from \"prompt processing\" onwards\n                            prompt_processing_idx = line.find('prompt processing')\n                            if prompt_processing_idx != -1:\n                                display_line = line[prompt_processing_idx:]\n                            else:\n                                display_line = line  # fallback to full line\n\n                            # choose carriage return for in-progress or newline at completion\n                            end_char = '\\r' if progress < 1.0 else '\\n'\n                            print(display_line, end=end_char, file=sys.stderr, flush=True)\n                            last_was_progress = (progress < 1.0)\n\n                        # skip noise lines\n                        elif not (line.startswith(('srv ', 'slot ')) or 'log_server_r: request: GET /health' in line or 'No parser definition detected' in line):\n                            # if we were in progress, finish that line first\n                            if last_was_progress:\n                                print(file=sys.stderr)\n\n                            print(line, file=sys.stderr, flush=True)\n                            last_was_progress = False\n\n                except Exception:\n                    continue\n\n    except (ValueError, IOError):\n        pass\n    finally:\n        try:\n            process_stderr.close()\n        except Exception:\n            pass\n"
  },
  {
    "path": "modules/loaders.py",
    "content": "import functools\nfrom collections import OrderedDict\n\nloaders_and_params = OrderedDict({\n    'llama.cpp': [\n        'gpu_layers',\n        'fit_target',\n        'cpu_moe',\n        'threads',\n        'threads_batch',\n        'batch_size',\n        'ubatch_size',\n        'ctx_size',\n        'cache_type',\n        'tensor_split',\n        'extra_flags',\n        'streaming_llm',\n        'row_split',\n        'no_kv_offload',\n        'no_mmap',\n        'mlock',\n        'numa',\n        'parallel',\n        'model_draft',\n        'draft_max',\n        'gpu_layers_draft',\n        'device_draft',\n        'ctx_size_draft',\n        'ngram_header',\n        'spec_type',\n        'spec_ngram_size_n',\n        'spec_ngram_size_m',\n        'spec_ngram_min_hits',\n        'speculative_decoding_accordion',\n        'mmproj',\n        'mmproj_accordion',\n        'vram_info',\n    ],\n    'Transformers': [\n        'gpu_split',\n        'cpu_memory',\n        'compute_dtype',\n        'quant_type',\n        'load_in_8bit',\n        'load_in_4bit',\n        'attn_implementation',\n        'cpu',\n        'disk',\n        'use_double_quant',\n        'bf16',\n        'no_use_fast',\n    ],\n    'ExLlamav3_HF': [\n        'ctx_size',\n        'cache_type',\n        'gpu_split',\n        'cfg_cache',\n        'no_use_fast',\n        'enable_tp',\n        'tp_backend',\n    ],\n    'ExLlamav3': [\n        'ctx_size',\n        'cache_type',\n        'gpu_split',\n        'model_draft',\n        'draft_max',\n        'speculative_decoding_accordion',\n        'enable_tp',\n        'tp_backend',\n    ],\n    'TensorRT-LLM': [\n        'ctx_size',\n        'tensorrt_llm_info',\n    ]\n})\n\n\ndef transformers_samplers():\n    return {\n        'temperature',\n        'dynatemp_low',\n        'dynatemp_high',\n        'dynatemp_exponent',\n        'smoothing_factor',\n        'smoothing_curve',\n        'min_p',\n        'top_p',\n        'top_k',\n        'typical_p',\n        'xtc_threshold',\n        'xtc_probability',\n        'epsilon_cutoff',\n        'eta_cutoff',\n        'tfs',\n        'top_a',\n        'top_n_sigma',\n        'adaptive_target',\n        'adaptive_decay',\n        'dry_multiplier',\n        'dry_allowed_length',\n        'dry_base',\n        'repetition_penalty',\n        'frequency_penalty',\n        'presence_penalty',\n        'encoder_repetition_penalty',\n        'no_repeat_ngram_size',\n        'repetition_penalty_range',\n        'penalty_alpha',\n        'guidance_scale',\n        'mirostat_mode',\n        'mirostat_tau',\n        'mirostat_eta',\n        'prompt_lookup_num_tokens',\n        'do_sample',\n        'dynamic_temperature',\n        'temperature_last',\n        'auto_max_new_tokens',\n        'ban_eos_token',\n        'add_bos_token',\n        'enable_thinking',\n        'reasoning_effort',\n        'skip_special_tokens',\n        'static_cache',\n        'seed',\n        'sampler_priority',\n        'custom_token_bans',\n        'negative_prompt',\n        'dry_sequence_breakers',\n        'grammar_string',\n        'grammar_file_row',\n    }\n\n\nloaders_samplers = {\n    'Transformers': transformers_samplers(),\n    'ExLlamav3_HF': {\n        'temperature',\n        'dynatemp_low',\n        'dynatemp_high',\n        'dynatemp_exponent',\n        'smoothing_factor',\n        'smoothing_curve',\n        'min_p',\n        'top_p',\n        'top_k',\n        'typical_p',\n        'xtc_threshold',\n        'xtc_probability',\n        'epsilon_cutoff',\n        'eta_cutoff',\n        'tfs',\n        'top_a',\n        'top_n_sigma',\n        'adaptive_target',\n        'adaptive_decay',\n        'dry_multiplier',\n        'dry_allowed_length',\n        'dry_base',\n        'repetition_penalty',\n        'frequency_penalty',\n        'presence_penalty',\n        'encoder_repetition_penalty',\n        'no_repeat_ngram_size',\n        'repetition_penalty_range',\n        'guidance_scale',\n        'mirostat_mode',\n        'mirostat_tau',\n        'mirostat_eta',\n        'do_sample',\n        'dynamic_temperature',\n        'temperature_last',\n        'auto_max_new_tokens',\n        'ban_eos_token',\n        'add_bos_token',\n        'enable_thinking',\n        'reasoning_effort',\n        'skip_special_tokens',\n        'seed',\n        'sampler_priority',\n        'custom_token_bans',\n        'negative_prompt',\n        'dry_sequence_breakers',\n        'grammar_string',\n        'grammar_file_row',\n    },\n    'ExLlamav3': {\n        'temperature',\n        'min_p',\n        'top_p',\n        'top_k',\n        'adaptive_target',\n        'adaptive_decay',\n        'repetition_penalty',\n        'frequency_penalty',\n        'presence_penalty',\n        'repetition_penalty_range',\n        'temperature_last',\n        'sampler_priority',\n        'auto_max_new_tokens',\n        'ban_eos_token',\n        'add_bos_token',\n        'enable_thinking',\n        'reasoning_effort',\n        'seed',\n        'skip_special_tokens',\n    },\n    'llama.cpp': {\n        'temperature',\n        'dynatemp_low',\n        'dynatemp_high',\n        'dynatemp_exponent',\n        'min_p',\n        'top_p',\n        'top_k',\n        'typical_p',\n        'xtc_threshold',\n        'xtc_probability',\n        'top_n_sigma',\n        'adaptive_target',\n        'adaptive_decay',\n        'dry_multiplier',\n        'dry_allowed_length',\n        'dry_base',\n        'repetition_penalty',\n        'frequency_penalty',\n        'presence_penalty',\n        'repetition_penalty_range',\n        'mirostat_mode',\n        'mirostat_tau',\n        'mirostat_eta',\n        'dynamic_temperature',\n        'temperature_last',\n        'auto_max_new_tokens',\n        'ban_eos_token',\n        'add_bos_token',\n        'enable_thinking',\n        'reasoning_effort',\n        'seed',\n        'sampler_priority',\n        'custom_token_bans',\n        'dry_sequence_breakers',\n        'grammar_string',\n        'grammar_file_row',\n    },\n    'TensorRT-LLM': {\n        'temperature',\n        'top_p',\n        'top_k',\n        'min_p',\n        'repetition_penalty',\n        'frequency_penalty',\n        'presence_penalty',\n        'no_repeat_ngram_size',\n        'auto_max_new_tokens',\n        'ban_eos_token',\n        'add_bos_token',\n        'skip_special_tokens',\n        'seed',\n    }\n}\n\n\n@functools.cache\ndef list_all_samplers():\n    all_samplers = set()\n    for k in loaders_samplers:\n        for sampler in loaders_samplers[k]:\n            all_samplers.add(sampler)\n\n    return sorted(all_samplers)\n\n\ndef blacklist_samplers(loader, dynamic_temperature):\n    import gradio as gr\n    all_samplers = list_all_samplers()\n    output = []\n\n    for sampler in all_samplers:\n        if loader == 'All' or sampler in loaders_samplers[loader]:\n            if sampler.startswith('dynatemp'):\n                output.append(gr.update(visible=dynamic_temperature))\n            else:\n                output.append(gr.update(visible=True))\n        else:\n            output.append(gr.update(visible=False))\n\n    return output\n\n\n@functools.cache\ndef get_all_params():\n    all_params = set()\n    for k in loaders_and_params:\n        for el in loaders_and_params[k]:\n            all_params.add(el)\n\n    return sorted(all_params)\n\n\ndef list_model_elements():\n    return [\n        'filter_by_loader',\n        'loader',\n        'cpu_memory',\n        'gpu_layers',\n        'fit_target',\n        'cpu_moe',\n        'threads',\n        'threads_batch',\n        'batch_size',\n        'ubatch_size',\n        'ctx_size',\n        'cache_type',\n        'tensor_split',\n        'extra_flags',\n        'streaming_llm',\n        'gpu_split',\n        'compute_dtype',\n        'quant_type',\n        'load_in_8bit',\n        'load_in_4bit',\n        'attn_implementation',\n        'cpu',\n        'disk',\n        'row_split',\n        'no_kv_offload',\n        'no_mmap',\n        'mlock',\n        'numa',\n        'parallel',\n        'use_double_quant',\n        'bf16',\n        'enable_tp',\n        'tp_backend',\n        'cfg_cache',\n        'no_use_fast',\n        'model_draft',\n        'draft_max',\n        'gpu_layers_draft',\n        'device_draft',\n        'ctx_size_draft',\n        'spec_type',\n        'spec_ngram_size_n',\n        'spec_ngram_size_m',\n        'spec_ngram_min_hits',\n        'mmproj',\n    ]\n\n\ndef make_loader_params_visible(loader):\n    import gradio as gr\n    params = []\n    all_params = get_all_params()\n    if loader in loaders_and_params:\n        params = loaders_and_params[loader]\n\n    return [gr.update(visible=True) if k in params else gr.update(visible=False) for k in all_params]\n"
  },
  {
    "path": "modules/logging_colors.py",
    "content": "import logging\n\nlogger = logging.getLogger('text-generation-webui')\n\n\ndef setup_logging():\n    '''\n    Copied from: https://github.com/vladmandic/automatic\n\n    All credits to vladmandic.\n    '''\n\n    class RingBuffer(logging.StreamHandler):\n        def __init__(self, capacity):\n            super().__init__()\n            self.capacity = capacity\n            self.buffer = []\n            self.formatter = logging.Formatter('{ \"asctime\":\"%(asctime)s\", \"created\":%(created)f, \"facility\":\"%(name)s\", \"pid\":%(process)d, \"tid\":%(thread)d, \"level\":\"%(levelname)s\", \"module\":\"%(module)s\", \"func\":\"%(funcName)s\", \"msg\":\"%(message)s\" }')\n\n        def emit(self, record):\n            msg = self.format(record)\n            # self.buffer.append(json.loads(msg))\n            self.buffer.append(msg)\n            if len(self.buffer) > self.capacity:\n                self.buffer.pop(0)\n\n        def get(self):\n            return self.buffer\n\n    from rich.console import Console\n    from rich.logging import RichHandler\n    from rich.pretty import install as pretty_install\n    from rich.theme import Theme\n    from rich.traceback import install as traceback_install\n\n    level = logging.DEBUG\n    logger.setLevel(logging.DEBUG)  # log to file is always at level debug for facility `sd`\n    console = Console(log_time=True, log_time_format='%H:%M:%S-%f', theme=Theme({\n        \"traceback.border\": \"black\",\n        \"traceback.border.syntax_error\": \"black\",\n        \"inspect.value.border\": \"black\",\n    }))\n    logging.basicConfig(level=logging.ERROR, format='%(asctime)s | %(name)s | %(levelname)s | %(module)s | %(message)s', handlers=[logging.NullHandler()])  # redirect default logger to null\n    pretty_install(console=console)\n    traceback_install(console=console, extra_lines=1, max_frames=10, width=console.width, word_wrap=False, indent_guides=False, suppress=[])\n    while logger.hasHandlers() and len(logger.handlers) > 0:\n        logger.removeHandler(logger.handlers[0])\n\n    # handlers\n    rh = RichHandler(show_time=True, omit_repeated_times=False, show_level=True, show_path=False, markup=False, rich_tracebacks=True, log_time_format='%H:%M:%S-%f', level=level, console=console)\n    rh.setLevel(level)\n    logger.addHandler(rh)\n\n    rb = RingBuffer(100)  # 100 entries default in log ring buffer\n    rb.setLevel(level)\n    logger.addHandler(rb)\n    logger.buffer = rb.buffer\n\n    # overrides\n    logging.getLogger(\"urllib3\").setLevel(logging.ERROR)\n    logging.getLogger(\"httpx\").setLevel(logging.ERROR)\n    logging.getLogger(\"diffusers\").setLevel(logging.ERROR)\n    logging.getLogger(\"torch\").setLevel(logging.ERROR)\n    logging.getLogger(\"lycoris\").handlers = logger.handlers\n\n\nsetup_logging()\n"
  },
  {
    "path": "modules/logits.py",
    "content": "import time\nimport traceback\n\nimport numpy as np\n\nfrom modules import models, shared\nfrom modules.logging_colors import logger\nfrom modules.models import load_model\nfrom modules.text_generation import generate_reply\nfrom modules.utils import check_model_loaded\n\nglobal_scores = None\n\n\ndef get_next_logits(*args, **kwargs):\n    if shared.args.idle_timeout > 0 and shared.model is None and shared.model_name not in [None, 'None']:\n        shared.model, shared.tokenizer = load_model(shared.model_name)\n\n    needs_lock = not args[2]  # use_samplers\n    if needs_lock:\n        shared.generation_lock.acquire()\n\n    try:\n        result = _get_next_logits(*args, **kwargs)\n    except Exception:\n        traceback.print_exc()\n        result = None\n\n    if needs_lock:\n        models.last_generation_time = time.time()\n        shared.generation_lock.release()\n\n    return result\n\n\ndef _get_next_logits(prompt, state, use_samplers, previous, top_logits=25, return_dict=False):\n    model_is_loaded, error_message = check_model_loaded()\n    if not model_is_loaded:\n        return error_message, previous\n\n    # llama.cpp case\n    if shared.model.__class__.__name__ == 'LlamaServer':\n        logprobs = shared.model.get_logits(prompt, state, n_probs=top_logits, use_samplers=use_samplers)\n\n        if return_dict:\n            output = {}\n            for entry in logprobs:\n                token = repr(entry['token'])\n                if len(token) > 2 and token.startswith(\"'\") and token.endswith(\"'\"):\n                    token = token[1:-1]\n\n                prob = entry['prob'] if use_samplers else np.exp(entry['logprob'])\n                output[token] = prob\n            return output\n        else:\n            output = ''\n            for entry in logprobs:\n                token = repr(entry['token'])\n                if len(token) > 2 and token.startswith(\"'\") and token.endswith(\"'\"):\n                    token = token[1:-1]\n\n                prob = entry['prob'] if use_samplers else np.exp(entry['logprob'])\n                output += f\"{prob:.5f}  -  {token}\\n\"\n            return output, previous\n\n    # All other model types\n    else:\n        import torch\n\n        from modules import sampler_hijack\n        from modules.torch_utils import get_device\n\n        is_non_hf_exllamav3 = shared.model.__class__.__name__ == 'Exllamav3Model'\n\n        if not use_samplers:\n            state = {'stream': True}\n\n        if use_samplers:\n            state['max_new_tokens'] = 1\n            state['auto_max_new_tokens'] = False\n            state.setdefault('stream', True)\n            for _ in generate_reply(prompt, state):\n                pass\n\n            scores = sampler_hijack.global_scores[-1]\n        else:\n            if is_non_hf_exllamav3:\n                device = get_device()\n                tokens = shared.tokenizer.encode(prompt)\n                if device:\n                    tokens = tokens.to(device)\n\n                scores = shared.model.get_logits(tokens)[-1][-1]\n            else:\n                device = get_device()\n                tokens = shared.tokenizer.encode(prompt, return_tensors='pt')\n                if device:\n                    tokens = tokens.to(device)\n\n                output = shared.model(input_ids=tokens)\n                scores = output['logits'][-1][-1]\n\n        probs = torch.softmax(scores.detach(), dim=-1, dtype=torch.float)\n        topk_values, topk_indices = torch.topk(probs, k=top_logits, largest=True, sorted=True)\n        if hasattr(shared.tokenizer, 'convert_ids_to_tokens'):\n            tokens = [shared.tokenizer.convert_ids_to_tokens(int(i)) for i in topk_indices]\n        else:\n            tokens = [shared.tokenizer.decode(i) for i in topk_indices]\n\n        if return_dict:\n            topk_values = [float(i) for i in topk_values]\n            output = {}\n            for row in list(zip(topk_values, tokens)):\n                key = row[1]\n                if isinstance(key, bytes):\n                    try:\n                        key = key.decode()\n                    except Exception:\n                        key = key.decode('latin')\n\n                output[key] = row[0]\n\n            return output\n        else:\n            topk_values = [f\"{float(i):.5f}\" for i in topk_values]\n            output = ''\n            for row in list(zip(topk_values, tokens)):\n                output += f\"{row[0]}  -  {repr(row[1])}\\n\"\n\n            return output, previous\n"
  },
  {
    "path": "modules/metadata_gguf.py",
    "content": "import struct\nfrom enum import IntEnum\n\n\nclass GGUFValueType(IntEnum):\n    UINT8 = 0\n    INT8 = 1\n    UINT16 = 2\n    INT16 = 3\n    UINT32 = 4\n    INT32 = 5\n    FLOAT32 = 6\n    BOOL = 7\n    STRING = 8\n    ARRAY = 9\n    UINT64 = 10\n    INT64 = 11\n    FLOAT64 = 12\n\n\n_simple_value_packing = {\n    GGUFValueType.UINT8: \"<B\",\n    GGUFValueType.INT8: \"<b\",\n    GGUFValueType.UINT16: \"<H\",\n    GGUFValueType.INT16: \"<h\",\n    GGUFValueType.UINT32: \"<I\",\n    GGUFValueType.INT32: \"<i\",\n    GGUFValueType.FLOAT32: \"<f\",\n    GGUFValueType.UINT64: \"<Q\",\n    GGUFValueType.INT64: \"<q\",\n    GGUFValueType.FLOAT64: \"<d\",\n    GGUFValueType.BOOL: \"?\",\n}\n\nvalue_type_info = {\n    GGUFValueType.UINT8: 1,\n    GGUFValueType.INT8: 1,\n    GGUFValueType.UINT16: 2,\n    GGUFValueType.INT16: 2,\n    GGUFValueType.UINT32: 4,\n    GGUFValueType.INT32: 4,\n    GGUFValueType.FLOAT32: 4,\n    GGUFValueType.UINT64: 8,\n    GGUFValueType.INT64: 8,\n    GGUFValueType.FLOAT64: 8,\n    GGUFValueType.BOOL: 1,\n}\n\n\ndef get_single(value_type, file):\n    if value_type == GGUFValueType.STRING:\n        value_length = struct.unpack(\"<Q\", file.read(8))[0]\n        value = file.read(value_length)\n        try:\n            value = value.decode('utf-8')\n        except Exception:\n            pass\n    else:\n        type_str = _simple_value_packing.get(value_type)\n        bytes_length = value_type_info.get(value_type)\n        value = struct.unpack(type_str, file.read(bytes_length))[0]\n\n    return value\n\n\ndef load_metadata(fname):\n    metadata = {}\n    with open(fname, 'rb') as file:\n        GGUF_MAGIC = struct.unpack(\"<I\", file.read(4))[0]\n        GGUF_VERSION = struct.unpack(\"<I\", file.read(4))[0]\n        ti_data_count = struct.unpack(\"<Q\", file.read(8))[0]\n        kv_data_count = struct.unpack(\"<Q\", file.read(8))[0]\n\n        if GGUF_VERSION == 1:\n            raise Exception('You are using an outdated GGUF, please download a new one.')\n\n        for i in range(kv_data_count):\n            key_length = struct.unpack(\"<Q\", file.read(8))[0]\n            key = file.read(key_length)\n\n            value_type = GGUFValueType(struct.unpack(\"<I\", file.read(4))[0])\n            if value_type == GGUFValueType.ARRAY:\n                ltype = GGUFValueType(struct.unpack(\"<I\", file.read(4))[0])\n                length = struct.unpack(\"<Q\", file.read(8))[0]\n\n                arr = [get_single(ltype, file) for _ in range(length)]\n                metadata[key.decode()] = arr\n            else:\n                value = get_single(value_type, file)\n                metadata[key.decode()] = value\n\n    return metadata\n"
  },
  {
    "path": "modules/models.py",
    "content": "import sys\nimport time\n\nimport modules.shared as shared\nfrom modules.logging_colors import logger\nfrom modules.models_settings import get_model_metadata\nfrom modules.utils import resolve_model_path\n\nlast_generation_time = time.time()\n\n\ndef load_model(model_name, loader=None):\n    logger.info(f\"Loading \\\"{model_name}\\\"\")\n    t0 = time.time()\n\n    shared.is_seq2seq = False\n    shared.model_name = model_name\n    load_func_map = {\n        'llama.cpp': llama_cpp_server_loader,\n        'Transformers': transformers_loader,\n        'ExLlamav3_HF': ExLlamav3_HF_loader,\n        'ExLlamav3': ExLlamav3_loader,\n        'TensorRT-LLM': TensorRT_LLM_loader,\n    }\n\n    metadata = get_model_metadata(model_name)\n    if loader is None:\n        if shared.args.loader is not None:\n            loader = shared.args.loader\n        else:\n            loader = metadata['loader']\n            if loader is None:\n                logger.error('The path to the model does not exist. Exiting.')\n                raise ValueError\n\n    if loader != 'llama.cpp' and 'sampler_hijack' not in sys.modules:\n        from modules import sampler_hijack\n        sampler_hijack.hijack_samplers()\n\n    shared.args.loader = loader\n    if loader != 'llama.cpp' and shared.args.ctx_size == 0:\n        shared.args.ctx_size = 8192\n\n    output = load_func_map[loader](model_name)\n    if type(output) is tuple:\n        model, tokenizer = output\n    else:\n        model = output\n        if model is not None:\n            from modules.transformers_loader import load_tokenizer\n            tokenizer = load_tokenizer(model_name)\n\n    if model is None:\n        return None, None\n\n    shared.settings.update({k: v for k, v in metadata.items() if k in shared.settings})\n    if loader.lower().startswith('exllama') or loader.lower().startswith('tensorrt') or loader == 'llama.cpp':\n        if shared.args.ctx_size > 0:\n            shared.settings['truncation_length'] = shared.args.ctx_size\n        elif loader == 'llama.cpp' and hasattr(model, 'n_ctx') and model.n_ctx:\n            shared.settings['truncation_length'] = model.n_ctx\n\n    shared.is_multimodal = False\n    if loader.lower() in ('exllamav3', 'llama.cpp') and hasattr(model, 'is_multimodal'):\n        shared.is_multimodal = model.is_multimodal()\n\n    logger.info(f\"Loaded \\\"{model_name}\\\" in {(time.time()-t0):.2f} seconds.\")\n    logger.info(f\"LOADER: \\\"{loader}\\\"\")\n    logger.info(f\"TRUNCATION LENGTH: {shared.settings['truncation_length']}\")\n    logger.info(f\"INSTRUCTION TEMPLATE: \\\"{metadata['instruction_template']}\\\"\")\n    return model, tokenizer\n\n\ndef llama_cpp_server_loader(model_name):\n    from modules.llama_cpp_server import LlamaServer\n\n    path = resolve_model_path(model_name)\n\n    if path.is_file():\n        model_file = path\n    else:\n        gguf_files = sorted(path.glob('*.gguf'))\n        if not gguf_files:\n            logger.error(f\"No .gguf models found in the directory: {path}\")\n            return None, None\n\n        model_file = gguf_files[0]\n\n    try:\n        model = LlamaServer(model_file)\n        return model, model\n    except Exception as e:\n        logger.error(f\"Error loading the model with llama.cpp: {str(e)}\")\n        return None, None\n\n\ndef transformers_loader(model_name):\n    from modules.transformers_loader import load_model_HF\n    return load_model_HF(model_name)\n\n\ndef ExLlamav3_HF_loader(model_name):\n    from modules.exllamav3_hf import Exllamav3HF\n\n    return Exllamav3HF.from_pretrained(model_name)\n\n\ndef ExLlamav3_loader(model_name):\n    from modules.exllamav3 import Exllamav3Model\n\n    model, tokenizer = Exllamav3Model.from_pretrained(model_name)\n    return model, tokenizer\n\n\ndef TensorRT_LLM_loader(model_name):\n    try:\n        from modules.tensorrt_llm import TensorRTLLMModel\n    except ModuleNotFoundError:\n        raise ModuleNotFoundError(\"Failed to import 'tensorrt_llm'. Please install it manually following the instructions in the TensorRT-LLM GitHub repository.\")\n\n    model = TensorRTLLMModel.from_pretrained(model_name)\n    return model, model.tokenizer\n\n\ndef unload_model(keep_model_name=False):\n    if shared.model is None:\n        return\n\n    model_class_name = shared.model.__class__.__name__\n    is_llamacpp = (model_class_name == 'LlamaServer')\n\n    if model_class_name in ['Exllamav3Model', 'Exllamav3HF', 'TensorRTLLMModel']:\n        shared.model.unload()\n    elif model_class_name == 'LlamaServer':\n        shared.model.stop()\n\n    shared.model = shared.tokenizer = None\n    shared.lora_names = []\n    shared.model_dirty_from_training = False\n\n    if not is_llamacpp:\n        from modules.torch_utils import clear_torch_cache\n        clear_torch_cache()\n\n    if not keep_model_name:\n        shared.model_name = 'None'\n\n\ndef reload_model():\n    unload_model()\n    shared.model, shared.tokenizer = load_model(shared.model_name)\n\n\ndef unload_model_if_idle():\n    global last_generation_time\n\n    logger.info(f\"Setting a timeout of {shared.args.idle_timeout} minutes to unload the model in case of inactivity.\")\n\n    while True:\n        shared.generation_lock.acquire()\n        try:\n            if time.time() - last_generation_time > shared.args.idle_timeout * 60:\n                if shared.model is not None:\n                    logger.info(\"Unloading the model for inactivity.\")\n                    unload_model(keep_model_name=True)\n        finally:\n            shared.generation_lock.release()\n\n        time.sleep(60)\n"
  },
  {
    "path": "modules/models_settings.py",
    "content": "import functools\nimport json\nimport re\nfrom math import floor\nfrom pathlib import Path\n\nimport yaml\n\nfrom modules import loaders, metadata_gguf, shared\nfrom modules.logging_colors import logger\nfrom modules.utils import resolve_model_path\n\n\ndef get_fallback_settings():\n    return {\n        'bf16': False,\n        'ctx_size': 8192,\n        'truncation_length': shared.settings['truncation_length'],\n        'truncation_length_info': shared.settings['truncation_length'],\n        'skip_special_tokens': shared.settings['skip_special_tokens'],\n    }\n\n\ndef get_model_metadata(model):\n    model_path = resolve_model_path(model)\n    model_settings = {}\n\n    # Get settings from user_data/models/config.yaml and user_data/models/config-user.yaml\n    settings = shared.model_config\n    for pat in settings:\n        if re.match(pat.lower(), Path(model).name.lower()):\n            for k in settings[pat]:\n                model_settings[k] = settings[pat][k]\n\n    path = model_path / 'config.json'\n    if path.exists():\n        hf_metadata = json.loads(open(path, 'r', encoding='utf-8').read())\n    else:\n        hf_metadata = None\n\n    if 'loader' not in model_settings:\n        quant_method = None if hf_metadata is None else hf_metadata.get(\"quantization_config\", {}).get(\"quant_method\", None)\n        model_settings['loader'] = infer_loader(\n            model,\n            model_settings,\n            hf_quant_method=quant_method\n        )\n\n    # GGUF metadata\n    if model_settings['loader'] == 'llama.cpp':\n        path = model_path\n        if path.is_file():\n            model_file = path\n        else:\n            gguf_files = list(path.glob('*.gguf'))\n            if not gguf_files:\n                error_msg = f\"No .gguf models found in directory: {path}\"\n                logger.error(error_msg)\n                raise FileNotFoundError(error_msg)\n\n            model_file = gguf_files[0]\n\n        metadata = load_gguf_metadata_with_cache(model_file)\n\n        for k in metadata:\n            if k.endswith('.context_length'):\n                model_settings['ctx_size'] = 0\n                model_settings['truncation_length_info'] = metadata[k]\n            elif k.endswith('.block_count'):\n                model_settings['gpu_layers'] = -1\n                model_settings['max_gpu_layers'] = metadata[k] + 1\n\n        if 'tokenizer.chat_template' in metadata:\n            template = metadata['tokenizer.chat_template']\n            if 'tokenizer.ggml.eos_token_id' in metadata:\n                eos_token = metadata['tokenizer.ggml.tokens'][metadata['tokenizer.ggml.eos_token_id']]\n            else:\n                eos_token = \"\"\n\n            if 'tokenizer.ggml.bos_token_id' in metadata:\n                bos_token = metadata['tokenizer.ggml.tokens'][metadata['tokenizer.ggml.bos_token_id']]\n            else:\n                bos_token = \"\"\n\n            shared.bos_token = bos_token\n            shared.eos_token = eos_token\n\n            template = re.sub(r\"\\{\\{-?\\s*raise_exception\\(.*?\\)\\s*-?\\}\\}\", \"\", template, flags=re.DOTALL)\n            template = re.sub(r'raise_exception\\([^)]*\\)', \"''\", template)\n            model_settings['instruction_template'] = 'Custom (obtained from model metadata)'\n            model_settings['instruction_template_str'] = template\n\n    else:\n        # Transformers metadata\n        if hf_metadata is not None:\n            metadata = json.loads(open(path, 'r', encoding='utf-8').read())\n            if 'pretrained_config' in metadata:\n                metadata = metadata['pretrained_config']\n\n            for k in ['max_position_embeddings', 'model_max_length', 'max_seq_len']:\n                if k in metadata:\n                    value = metadata[k]\n                elif k in metadata.get('text_config', {}):\n                    value = metadata['text_config'][k]\n                else:\n                    continue\n\n                model_settings['truncation_length'] = value\n                model_settings['truncation_length_info'] = value\n                model_settings['ctx_size'] = min(value, 8192)\n                break\n\n            if 'torch_dtype' in metadata and metadata['torch_dtype'] == 'bfloat16':\n                model_settings['bf16'] = True\n\n    # Try to find the Jinja instruct template\n    path = model_path / 'tokenizer_config.json'\n    template = None\n\n    # 1. Prioritize reading from chat_template.jinja if it exists\n    jinja_path = model_path / 'chat_template.jinja'\n    if jinja_path.exists():\n        with open(jinja_path, 'r', encoding='utf-8') as f:\n            template = f.read()\n\n    # 2. If no .jinja file, try chat_template.json\n    if template is None:\n        json_template_path = model_path / 'chat_template.json'\n        if json_template_path.exists():\n            with open(json_template_path, 'r', encoding='utf-8') as f:\n                json_data = json.load(f)\n                if 'chat_template' in json_data:\n                    template = json_data['chat_template']\n\n    # 3. Fall back to tokenizer_config.json metadata\n    if path.exists():\n        metadata = json.loads(open(path, 'r', encoding='utf-8').read())\n\n        # Only read from metadata if we haven't already loaded from .jinja or .json\n        if template is None and 'chat_template' in metadata:\n            template = metadata['chat_template']\n            if isinstance(template, list):\n                template = template[0]['template']\n\n        # 4. If a template was found from any source, process it\n        if template:\n            shared.bos_token = '<s>'\n            shared.eos_token = '</s>'\n\n            for k in ['eos_token', 'bos_token']:\n                if k in metadata:\n                    value = metadata[k]\n                    if isinstance(value, dict):\n                        value = value['content']\n\n                    setattr(shared, k, value)\n\n            template = re.sub(r\"\\{\\{-?\\s*raise_exception\\(.*?\\)\\s*-?\\}\\}\", \"\", template, flags=re.DOTALL)\n            template = re.sub(r'raise_exception\\([^)]*\\)', \"''\", template)\n            model_settings['instruction_template'] = 'Custom (obtained from model metadata)'\n            model_settings['instruction_template_str'] = template\n\n    if 'instruction_template' not in model_settings:\n        model_settings['instruction_template'] = 'Alpaca'\n\n    # Apply user settings from user_data/models/config-user.yaml\n    settings = shared.user_config\n    for pat in settings:\n        if re.match(pat.lower(), Path(model).name.lower()):\n            for k in settings[pat]:\n                new_k = k\n                if k == 'n_gpu_layers':\n                    new_k = 'gpu_layers'\n\n                model_settings[new_k] = settings[pat][k]\n\n    # Load instruction template if defined by name rather than by value\n    if model_settings['instruction_template'] != 'Custom (obtained from model metadata)':\n        model_settings['instruction_template_str'] = load_instruction_template(model_settings['instruction_template'])\n\n    return model_settings\n\n\ndef infer_loader(model_name, model_settings, hf_quant_method=None):\n    path_to_model = resolve_model_path(model_name)\n    if not path_to_model.exists():\n        loader = None\n    elif shared.args.portable:\n        loader = 'llama.cpp'\n    elif len(list(path_to_model.glob('*.gguf'))) > 0:\n        loader = 'llama.cpp'\n    elif re.match(r'.*\\.gguf', model_name.lower()):\n        loader = 'llama.cpp'\n    elif hf_quant_method == 'exl3':\n        loader = 'ExLlamav3'\n    elif re.match(r'.*exl3', model_name.lower()):\n        loader = 'ExLlamav3'\n    else:\n        loader = 'Transformers'\n\n    return loader\n\n\ndef update_model_parameters(state, initial=False):\n    '''\n    UI: update the command-line arguments based on the interface values\n    '''\n    elements = loaders.list_model_elements()  # the names of the parameters\n\n    for i, element in enumerate(elements):\n        if element not in state:\n            continue\n\n        value = state[element]\n        if initial and element in shared.provided_arguments:\n            continue\n\n        if element == 'cpu_memory' and value == 0:\n            value = vars(shared.args_defaults)[element]\n\n        setattr(shared.args, element, value)\n\n\ndef apply_model_settings_to_state(model, state):\n    '''\n    UI: update the state variable with the model settings\n    '''\n    import gradio as gr\n    model_settings = get_model_metadata(model)\n    if 'loader' in model_settings:\n        loader = model_settings.pop('loader')\n        if not (loader == 'ExLlamav3_HF' and state['loader'] == 'ExLlamav3'):\n            state['loader'] = loader\n\n    for k in model_settings:\n        if k in state and k != 'gpu_layers':  # Skip gpu_layers, handle separately\n            state[k] = model_settings[k]\n\n    # Handle GPU layers and VRAM update for llama.cpp\n    if state['loader'] == 'llama.cpp' and 'gpu_layers' in model_settings:\n        gpu_layers = model_settings['gpu_layers']  # -1 (auto) by default, or user-saved value\n        max_layers = model_settings.get('max_gpu_layers', 256)\n        state['gpu_layers'] = gr.update(value=gpu_layers, maximum=max_layers)\n\n        vram_info = update_gpu_layers_and_vram(\n            state['loader'],\n            model,\n            gpu_layers,\n            state['ctx_size'],\n            state['cache_type'],\n        )\n\n        state['vram_info'] = vram_info\n\n    return state\n\n\ndef save_model_settings(model, state):\n    '''\n    Save the settings for this model to user_data/models/config-user.yaml\n    '''\n    if model == 'None':\n        yield (\"Not saving the settings because no model is selected in the menu.\")\n        return\n\n    user_config = shared.load_user_config()\n    model_regex = Path(model).name + '$'  # For exact matches\n    if model_regex not in user_config:\n        user_config[model_regex] = {}\n\n    for k in loaders.list_model_elements():\n        if k == 'loader' or k in loaders.loaders_and_params[state['loader']]:\n            user_config[model_regex][k] = state[k]\n\n    shared.user_config = user_config\n\n    output = yaml.dump(user_config, sort_keys=False)\n    p = Path(f'{shared.args.model_dir}/config-user.yaml')\n    with open(p, 'w') as f:\n        f.write(output)\n\n    yield (f\"Settings for `{model}` saved to `{p}`.\")\n\n\ndef save_instruction_template(model, template):\n    '''\n    Similar to the function above, but it saves only the instruction template.\n    '''\n    if model == 'None':\n        yield (\"Not saving the template because no model is selected in the menu.\")\n        return\n\n    user_config = shared.load_user_config()\n    model_regex = Path(model).name + '$'  # For exact matches\n    if model_regex not in user_config:\n        user_config[model_regex] = {}\n\n    if template == 'None':\n        user_config[model_regex].pop('instruction_template', None)\n    else:\n        user_config[model_regex]['instruction_template'] = template\n\n    shared.user_config = user_config\n\n    output = yaml.dump(user_config, sort_keys=False)\n    p = Path(f'{shared.args.model_dir}/config-user.yaml')\n    with open(p, 'w') as f:\n        f.write(output)\n\n    if template == 'None':\n        yield (f\"Instruction template for `{model}` unset in `{p}`, as the value for template was `{template}`.\")\n    else:\n        yield (f\"Instruction template for `{model}` saved to `{p}` as `{template}`.\")\n\n\n@functools.lru_cache(maxsize=1)\ndef load_gguf_metadata_with_cache(model_file):\n    return metadata_gguf.load_metadata(model_file)\n\n\ndef get_model_size_mb(model_file: Path) -> float:\n    filename = model_file.name\n\n    # Check for multipart pattern\n    match = re.match(r'(.+)-\\d+-of-\\d+\\.gguf$', filename)\n\n    if match:\n        # It's a multipart file, find all matching parts\n        base_pattern = match.group(1)\n        part_files = sorted(model_file.parent.glob(f'{base_pattern}-*-of-*.gguf'))\n        total_size = sum(p.stat().st_size for p in part_files)\n    else:\n        # Single part\n        total_size = model_file.stat().st_size\n\n    return total_size / (1024 ** 2)  # Return size in MB\n\n\ndef estimate_vram(gguf_file, gpu_layers, ctx_size, cache_type):\n    model_file = resolve_model_path(gguf_file)\n    metadata = load_gguf_metadata_with_cache(model_file)\n    size_in_mb = get_model_size_mb(model_file)\n\n    # Extract values from metadata\n    n_layers = None\n    n_kv_heads = None\n    n_attention_heads = None  # Fallback for models without separate KV heads\n    embedding_dim = None\n\n    for key, value in metadata.items():\n        if key.endswith('.block_count'):\n            n_layers = value\n        elif key.endswith('.attention.head_count_kv'):\n            n_kv_heads = max(value) if isinstance(value, list) else value\n        elif key.endswith('.attention.head_count'):\n            n_attention_heads = max(value) if isinstance(value, list) else value\n        elif key.endswith('.embedding_length'):\n            embedding_dim = value\n\n    if n_kv_heads is None:\n        n_kv_heads = n_attention_heads\n\n    if gpu_layers > n_layers:\n        gpu_layers = n_layers\n\n    # Convert cache_type to numeric\n    if cache_type == 'q4_0':\n        cache_type = 4\n    elif cache_type == 'q8_0':\n        cache_type = 8\n    else:\n        cache_type = 16\n\n    # Derived features\n    size_per_layer = size_in_mb / max(n_layers, 1e-6)\n    kv_cache_factor = n_kv_heads * cache_type * ctx_size\n    embedding_per_context = embedding_dim / ctx_size\n\n    # Calculate VRAM using the model\n    # Details: https://oobabooga.github.io/blog/posts/gguf-vram-formula/\n    vram = (\n        (size_per_layer - 17.99552795246051 + 3.148552680382576e-05 * kv_cache_factor)\n        * (gpu_layers + max(0.9690636483914102, cache_type - (floor(50.77817218646521 * embedding_per_context) + 9.987899908205632)))\n        + 1516.522943869404\n    )\n\n    return vram\n\n\ndef update_gpu_layers_and_vram(loader, model, gpu_layers, ctx_size, cache_type):\n    \"\"\"\n    Compute the estimated VRAM usage for the given GPU layers and return\n    an HTML string for the UI display.\n    \"\"\"\n    if loader != 'llama.cpp' or model in [\"None\", None] or not model.endswith(\".gguf\") or gpu_layers < 0 or ctx_size == 0:\n        return f\"<div id=\\\"vram-info\\\"'>Estimated VRAM to load the model: <span class=\\\"value\\\">auto</span></div>\"\n\n    vram_usage = estimate_vram(model, gpu_layers, ctx_size, cache_type)\n    return f\"<div id=\\\"vram-info\\\"'>Estimated VRAM to load the model: <span class=\\\"value\\\">{vram_usage:.0f} MiB</span></div>\"\n\n\ndef load_instruction_template(template):\n    if template == 'None':\n        return ''\n\n    for filepath in [shared.user_data_dir / 'instruction-templates' / f'{template}.yaml', shared.user_data_dir / 'instruction-templates' / 'Alpaca.yaml']:\n        if filepath.exists():\n            break\n    else:\n        return ''\n\n    with open(filepath, 'r', encoding='utf-8') as f:\n        file_contents = f.read()\n    data = yaml.safe_load(file_contents)\n    if 'instruction_template' in data:\n        return data['instruction_template']\n    else:\n        return _jinja_template_from_old_format(data)\n\n\ndef _jinja_template_from_old_format(params, verbose=False):\n    MASTER_TEMPLATE = \"\"\"\n{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n    {%- if message['role'] == 'system' -%}\n        {%- set ns.found = true -%}\n    {%- endif -%}\n{%- endfor -%}\n{%- if not ns.found -%}\n    {{- '<|PRE-SYSTEM|>' + '<|SYSTEM-MESSAGE|>' + '<|POST-SYSTEM|>' -}}\n{%- endif %}\n{%- for message in messages %}\n    {%- if message['role'] == 'system' -%}\n        {{- '<|PRE-SYSTEM|>' + message['content'] + '<|POST-SYSTEM|>' -}}\n    {%- else -%}\n        {%- if message['role'] == 'user' -%}\n            {{-'<|PRE-USER|>' + message['content'] + '<|POST-USER|>'-}}\n        {%- else -%}\n            {{-'<|PRE-ASSISTANT|>' + message['content'] + '<|POST-ASSISTANT|>' -}}\n        {%- endif -%}\n    {%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n    {{-'<|PRE-ASSISTANT-GENERATE|>'-}}\n{%- endif -%}\n\"\"\"\n\n    if 'context' in params and '<|system-message|>' in params['context']:\n        pre_system = params['context'].split('<|system-message|>')[0]\n        post_system = params['context'].split('<|system-message|>')[1]\n    else:\n        pre_system = ''\n        post_system = ''\n\n    pre_user = params['turn_template'].split('<|user-message|>')[0].replace('<|user|>', params['user'])\n    post_user = params['turn_template'].split('<|user-message|>')[1].split('<|bot|>')[0]\n\n    pre_assistant = '<|bot|>' + params['turn_template'].split('<|bot-message|>')[0].split('<|bot|>')[1]\n    pre_assistant = pre_assistant.replace('<|bot|>', params['bot'])\n    post_assistant = params['turn_template'].split('<|bot-message|>')[1]\n\n    def preprocess(string):\n        return string.replace('\\n', '\\\\n').replace('\\'', '\\\\\\'')\n\n    pre_system = preprocess(pre_system)\n    post_system = preprocess(post_system)\n    pre_user = preprocess(pre_user)\n    post_user = preprocess(post_user)\n    pre_assistant = preprocess(pre_assistant)\n    post_assistant = preprocess(post_assistant)\n\n    if verbose:\n        print(\n            '\\n',\n            repr(pre_system) + '\\n',\n            repr(post_system) + '\\n',\n            repr(pre_user) + '\\n',\n            repr(post_user) + '\\n',\n            repr(pre_assistant) + '\\n',\n            repr(post_assistant) + '\\n',\n        )\n\n    result = MASTER_TEMPLATE\n    if 'system_message' in params:\n        result = result.replace('<|SYSTEM-MESSAGE|>', preprocess(params['system_message']))\n    else:\n        result = result.replace('<|SYSTEM-MESSAGE|>', '')\n\n    result = result.replace('<|PRE-SYSTEM|>', pre_system)\n    result = result.replace('<|POST-SYSTEM|>', post_system)\n    result = result.replace('<|PRE-USER|>', pre_user)\n    result = result.replace('<|POST-USER|>', post_user)\n    result = result.replace('<|PRE-ASSISTANT|>', pre_assistant)\n    result = result.replace('<|PRE-ASSISTANT-GENERATE|>', pre_assistant.rstrip(' '))\n    result = result.replace('<|POST-ASSISTANT|>', post_assistant)\n\n    result = result.strip()\n\n    return result\n"
  },
  {
    "path": "modules/paths.py",
    "content": "import sys\nfrom pathlib import Path\n\n\ndef resolve_user_data_dir():\n    \"\"\"\n    Resolve the user_data directory path. Order of precedence:\n    1. --user-data-dir CLI flag (pre-parsed from sys.argv before argparse)\n    2. In --portable mode, prefer ../user_data if it exists\n    3. Default: 'user_data'\n    \"\"\"\n    script_dir = Path(__file__).resolve().parent.parent\n\n    # Check sys.argv for --user-data-dir before argparse runs\n    for i, arg in enumerate(sys.argv):\n        if arg == '--user-data-dir' and i + 1 < len(sys.argv):\n            return Path(sys.argv[i + 1])\n        elif arg.startswith('--user-data-dir='):\n            return Path(arg.split('=', 1)[1])\n\n    # In portable mode, prefer ../user_data if it exists\n    is_portable = '--portable' in sys.argv\n    if is_portable:\n        parent_path = script_dir.parent / 'user_data'\n        if parent_path.exists():\n            return parent_path\n\n    return Path('user_data')\n"
  },
  {
    "path": "modules/presets.py",
    "content": "import functools\nimport pprint\nfrom pathlib import Path\n\nimport yaml\n\nfrom modules import shared\nfrom modules.loaders import loaders_samplers\nfrom modules.logging_colors import logger\n\n\ndefault_preset_values = {\n    'temperature': 1,\n    'dynatemp_low': 1,\n    'dynatemp_high': 1,\n    'dynatemp_exponent': 1,\n    'smoothing_factor': 0,\n    'smoothing_curve': 1,\n    'top_p': 1,\n    'top_k': 0,\n    'min_p': 0,\n    'top_n_sigma': 0,\n    'typical_p': 1,\n    'xtc_threshold': 0.1,\n    'xtc_probability': 0,\n    'epsilon_cutoff': 0,\n    'eta_cutoff': 0,\n    'tfs': 1,\n    'top_a': 0,\n    'adaptive_target': 0,\n    'adaptive_decay': 0.9,\n    'dry_multiplier': 0,\n    'dry_allowed_length': 2,\n    'dry_base': 1.75,\n    'repetition_penalty': 1,\n    'frequency_penalty': 0,\n    'presence_penalty': 0,\n    'encoder_repetition_penalty': 1,\n    'no_repeat_ngram_size': 0,\n    'repetition_penalty_range': 1024,\n    'penalty_alpha': 0,\n    'guidance_scale': 1,\n    'mirostat_mode': 0,\n    'mirostat_tau': 5,\n    'mirostat_eta': 0.1,\n    'do_sample': True,\n    'dynamic_temperature': False,\n    'temperature_last': False,\n    'sampler_priority': 'repetition_penalty\\npresence_penalty\\nfrequency_penalty\\ndry\\ntop_n_sigma\\ntemperature\\ndynamic_temperature\\nquadratic_sampling\\ntop_k\\ntop_p\\ntypical_p\\nepsilon_cutoff\\neta_cutoff\\ntfs\\ntop_a\\nmin_p\\nadaptive_p\\nmirostat\\nxtc\\nencoder_repetition_penalty\\nno_repeat_ngram',\n    'dry_sequence_breakers': '\"\\\\n\", \":\", \"\\\\\"\", \"*\"',\n}\n\n\ndef default_preset():\n    result = dict(default_preset_values)\n\n    if shared.args.portable:\n        samplers = result['sampler_priority'].split('\\n')\n        samplers = [sampler for sampler in samplers if sampler in [\"dry\", \"top_k\", \"top_p\", \"top_n_sigma\", \"min_p\", \"temperature\", \"xtc\", \"typical_p\", \"repetition_penalty\"]]\n        result['sampler_priority'] = '\\n'.join(samplers)\n\n    return result\n\n\ndef presets_params():\n    return [k for k in default_preset()]\n\n\ndef load_preset(name, verbose=False):\n    generate_params = default_preset()\n    if name not in ['None', None, '']:\n        path = shared.user_data_dir / 'presets' / f'{name}.yaml'\n        if path.exists():\n            with open(path, 'r') as infile:\n                preset = yaml.safe_load(infile)\n\n            for k in preset:\n                generate_params[k] = preset[k]\n        else:\n            logger.error(f\"The preset \\\"{name}\\\" does not exist under \\\"{path}\\\". Using the default parameters.\")\n\n    if verbose:\n        logger.info(f\"\\\"{name}\\\" preset:\")\n        pprint.PrettyPrinter(indent=4, width=1, sort_dicts=False).pprint(remove_defaults(generate_params))\n\n    return generate_params\n\n\n@functools.cache\ndef load_preset_memoized(name):\n    return load_preset(name)\n\n\ndef load_preset_for_ui(name, state):\n    generate_params = load_preset(name, verbose=True)\n    state.update(generate_params)\n    return state, *[generate_params[k] for k in presets_params()]\n\n\ndef reset_preset_for_ui(name, state):\n    \"\"\"Reset current preset to its saved values from file\"\"\"\n    generate_params = load_preset(name, verbose=True)\n    state.update(generate_params)\n    return state, *[generate_params[k] for k in presets_params()]\n\n\ndef neutralize_samplers_for_ui(state):\n    \"\"\"Set all samplers to their default/neutral values\"\"\"\n    generate_params = default_preset()\n    state.update(generate_params)\n    return state, *[generate_params[k] for k in presets_params()]\n\n\ndef loader_contains(sampler):\n    if sampler == 'dynamic_temperature' and 'dynatemp_low' in loaders_samplers[shared.args.loader]:\n        return True\n    else:\n        return sampler in loaders_samplers[shared.args.loader]\n\n\ndef remove_defaults(state):\n    defaults = default_preset()\n    data = {k: state[k] for k in presets_params()}\n\n    for k in list(data.keys()):\n        if data[k] == defaults[k]:\n            del data[k]\n\n    return data\n\n\ndef generate_preset_yaml(state):\n    data = remove_defaults(state)\n    return yaml.dump(data, sort_keys=False)\n"
  },
  {
    "path": "modules/prompts.py",
    "content": "from pathlib import Path\n\nfrom modules import shared, utils\nfrom modules.text_generation import get_encoded_length\n\n\ndef load_prompt(fname):\n    if not fname:\n        # Create new file\n        new_name = utils.current_time()\n        prompt_path = shared.user_data_dir / \"logs\" / \"notebook\" / f\"{new_name}.txt\"\n        prompt_path.parent.mkdir(parents=True, exist_ok=True)\n        initial_content = \"In this story,\"\n        prompt_path.write_text(initial_content, encoding='utf-8')\n\n        # Update settings to point to new file\n        shared.settings['prompt-notebook'] = new_name\n\n        return initial_content\n\n    file_path = shared.user_data_dir / 'logs' / 'notebook' / f'{fname}.txt'\n    if file_path.exists():\n        with open(file_path, 'r', encoding='utf-8') as f:\n            text = f.read()\n            text = text.rstrip()\n\n            return text\n    else:\n        return ''\n\n\ndef count_tokens(text):\n    try:\n        tokens = get_encoded_length(text)\n        return str(tokens)\n    except Exception:\n        return '0'\n"
  },
  {
    "path": "modules/reasoning.py",
    "content": "import html as html_module\n\n# Thinking block format definitions: (start_tag, end_tag, content_start_tag)\n# Use None for start_tag to match from beginning (end-only formats should be listed last)\nTHINKING_FORMATS = [\n    ('<think>', '</think>', None),\n    ('<|channel|>analysis<|message|>', '<|end|>', '<|channel|>final<|message|>'),\n    ('<|channel|>commentary<|message|>', '<|end|>', '<|channel|>final<|message|>'),\n    ('<seed:think>', '</seed:think>', None),\n    ('<|think|>', '<|end|>', '<|content|>'),  # Solar Open\n    # ('Thinking Process:', '</think>', None),  # Qwen3.5 verbose thinking outside tags -- removed: too prone to false positives in streaming\n    (None, '</think>', None),  # End-only variant (e.g., Qwen3-next)\n]\n\n\ndef extract_reasoning(text, html_escaped=False):\n    \"\"\"Extract reasoning/thinking blocks from the beginning of a string.\n\n    When html_escaped=True, tags are HTML-escaped before searching\n    (for use on already-escaped UI strings).\n\n    Returns (reasoning_content, final_content) where reasoning_content is\n    None if no thinking block is found.\n    \"\"\"\n    if not text:\n        return None, text\n\n    esc = html_module.escape if html_escaped else lambda s: s\n\n    for start_tag, end_tag, content_tag in THINKING_FORMATS:\n        end_esc = esc(end_tag)\n        content_esc = esc(content_tag) if content_tag else None\n\n        if start_tag is None:\n            # End-only format: require end tag, start from beginning\n            end_pos = text.find(end_esc)\n            if end_pos == -1:\n                continue\n            thought_start = 0\n        else:\n            # Normal format: require start tag\n            start_esc = esc(start_tag)\n            start_pos = text.find(start_esc)\n            if start_pos == -1:\n                # During streaming, the start tag may be arriving partially.\n                # If the text is a prefix of a start tag, return empty content\n                # to prevent the partial tag from leaking.\n                stripped = text.strip()\n                if stripped and start_esc.startswith(stripped):\n                    return '', ''\n                continue\n            thought_start = start_pos + len(start_esc)\n            end_pos = text.find(end_esc, thought_start)\n\n        if end_pos == -1:\n            # End tag missing - check if content tag can serve as fallback\n            if content_esc:\n                content_pos = text.find(content_esc, thought_start)\n                if content_pos != -1:\n                    thought_end = content_pos\n                    content_start = content_pos + len(content_esc)\n                else:\n                    thought_end = len(text)\n                    content_start = len(text)\n            else:\n                thought_end = len(text)\n                content_start = len(text)\n        else:\n            thought_end = end_pos\n            if content_esc:\n                content_pos = text.find(content_esc, end_pos)\n                if content_pos != -1:\n                    content_start = content_pos + len(content_esc)\n                else:\n                    # Content tag expected but not yet present (e.g. partial\n                    # streaming) — suppress intermediate tags between end_tag\n                    # and content_tag so they don't leak as content.\n                    content_start = len(text)\n            else:\n                content_start = end_pos + len(end_esc)\n\n        return text[thought_start:thought_end], text[content_start:]\n\n    # Handle standalone GPT-OSS final channel marker without a preceding\n    # analysis/commentary block (the model skipped thinking entirely).\n    for marker in ['<|start|>assistant<|channel|>final<|message|>', '<|channel|>final<|message|>']:\n        marker_esc = esc(marker)\n        pos = text.find(marker_esc)\n        if pos != -1:\n            before = text[:pos].strip()\n            after = text[pos + len(marker_esc):]\n            return (before if before else None), after\n\n    return None, text\n"
  },
  {
    "path": "modules/sampler_hijack.py",
    "content": "import json\nimport math\nimport pprint\nimport random\n\nimport torch\nimport transformers\nfrom transformers.generation.logits_process import (\n    LogitNormalization,\n    LogitsProcessor,\n    LogitsProcessorList\n)\n\nfrom modules import shared\nfrom modules.logging_colors import logger\nfrom modules.torch_utils import get_device\n\noriginal_init = transformers.GenerationConfig.__init__\noriginal_get_logits_processor = transformers.GenerationMixin._get_logits_processor\n\nglobal_scores = None\n\n\nclass TemperatureLogitsWarperCustom(LogitsProcessor):\n    '''\n    A copy of the original Transformers temperature logits warper.\n    '''\n\n    def __init__(self, temperature: float):\n        if not isinstance(temperature, float) or not (temperature > 0):\n            except_msg = (\n                f\"`temperature` (={temperature}) has to be a strictly positive float, otherwise your next token \"\n                \"scores will be invalid.\"\n            )\n            if isinstance(temperature, float) and temperature == 0.0:\n                except_msg += \" If you're looking for greedy decoding strategies, set `do_sample=False`.\"\n\n            raise ValueError(except_msg)\n\n        self.temperature = temperature\n\n    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:\n        scores = scores / self.temperature\n        return scores\n\n\nclass DynamicTemperatureLogitsWarper(LogitsProcessor):\n    '''\n    Dynamic temperature.\n    '''\n\n    def __init__(self, dynatemp_low: float, dynatemp_high: float, dynatemp_exponent: float):\n        self.dynatemp_low = dynatemp_low\n        self.dynatemp_high = dynatemp_high\n        self.dynatemp_exponent = dynatemp_exponent\n\n    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:\n        min_temp = self.dynatemp_low\n        max_temp = self.dynatemp_high\n        exponent_val = self.dynatemp_exponent\n\n        # Convert logits to probabilities\n        probs = torch.softmax(scores, dim=-1)\n\n        # Calculate entropy of the softmax probabilities\n        entropy = -1.0 * torch.where(probs > 0, probs * torch.log(probs), torch.zeros_like(probs)).sum()\n\n        # Guard against future possible division by zero\n        entropy = max(entropy, torch.tensor(1e-10))  # Ensures entropy is slightly greater than 0\n\n        # Any logits which are not -Infinity will be considered for calculating max entropy.\n        num_valid_tokens = torch.sum(scores > -float('inf')).item()\n\n        # Now, calculate the max entropy by using only the valid tokens' count\n        max_entropy = math.log(num_valid_tokens)\n\n        # Guard against future possible division by zero\n        max_entropy = max_entropy if max_entropy > 0.0 else 1e-10\n\n        # Normalize the entropy\n        normalized_entropy = entropy / max_entropy\n\n        # Map the normalized entropy to the desired temperature range using the power function\n        dyn_temp = min_temp + (max_temp - min_temp) * (normalized_entropy.pow(exponent_val))\n\n        # Apply the dynamically calculated temperature scaling\n        scores = scores / dyn_temp\n\n        # print(\"----------------------\\nTemperature from generation_config:\", self.temperature)\n        # print(\"min_temp:\", min_temp)\n        # print(\"max_temp:\", max_temp)\n        # print(\"Entropy:\", entropy.item())\n        # print(\"Max Possible Entropy considering valid tokens only:\", max_entropy)\n        # print(\"Normalized Entropy:\", normalized_entropy.item())\n        # print(\"Dynamic Temperature (dyn_temp):\", dyn_temp.item())\n        # print(\"----------------------\")\n\n        # max_prob_token_id = torch.argmax(scores, dim=-1)  # Get the token ID with the highest probability\n        # max_prob_token = shared.tokenizer.convert_ids_to_tokens(int(max_prob_token_id))  # Convert ID to token\n        # print(\"--- T=\", float(dyn_temp), \"token=\", max_prob_token, \"min=\", min_temp, \"max=\", max_temp, \"exponent=\", exponent_val)\n\n        return scores\n\n\nclass QuadraticSamplingLogitsWarper(LogitsProcessor):\n    '''\n    Quadratic sampling with smoothing factor and smoothing curve parameters.\n    '''\n\n    def __init__(self, smoothing_factor, smoothing_curve):\n        self.smoothing_factor = smoothing_factor\n        self.smoothing_curve = smoothing_curve\n\n    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:\n\n        # Compute necessary values\n        max_logit = scores.max()\n        diff = scores - max_logit\n        k = (3 - self.smoothing_curve) / 2\n        s = (self.smoothing_curve - 1) / 2\n\n        # Apply transformation to non-negative infinity values\n        transformed_logits = torch.where(\n            scores != float('-inf'),\n            -(k * self.smoothing_factor * diff**2) + (s * self.smoothing_factor * diff**3) + max_logit,\n            scores\n        )\n\n        return transformed_logits\n\n\nclass TailFreeLogitsWarper(LogitsProcessor):\n    def __init__(self, tfs: float, filter_value: float = -float(\"Inf\"), min_tokens_to_keep: int = 1):\n        tfs = float(tfs)\n        if tfs < 0 or tfs > 1.0:\n            raise ValueError(f\"`tfs` has to be a float >= 0 and <= 1, but is {tfs}\")\n        self.tfs = tfs\n        self.filter_value = filter_value\n        self.min_tokens_to_keep = min_tokens_to_keep\n\n    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:\n        sorted_logits, sorted_indices = torch.sort(scores, descending=True)\n        probs = sorted_logits.softmax(dim=-1)\n\n        # Compute second derivative normalized CDF\n        d2 = probs.diff().diff().abs()\n        normalized_d2 = d2 / d2.sum(dim=-1, keepdim=True)\n        normalized_d2_cdf = normalized_d2.cumsum(dim=-1)\n\n        # Remove tokens with CDF value above the threshold (token with 0 are kept)\n        sorted_indices_to_remove = normalized_d2_cdf > self.tfs\n\n        # Centre the distribution around the cutoff as in the original implementation of the algorithm\n        sorted_indices_to_remove = torch.cat(\n            (\n                torch.zeros(scores.shape[0], 1, dtype=torch.bool, device=scores.device),\n                sorted_indices_to_remove,\n                torch.ones(scores.shape[0], 1, dtype=torch.bool, device=scores.device),\n            ),\n            dim=-1,\n        )\n\n        if self.min_tokens_to_keep > 1:\n            # Keep at least min_tokens_to_keep\n            sorted_indices_to_remove[..., : self.min_tokens_to_keep] = 0\n\n        indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)\n        scores = scores.masked_fill(indices_to_remove, self.filter_value)\n        return scores\n\n\nclass TopALogitsWarper(LogitsProcessor):\n    def __init__(self, top_a: float, filter_value: float = -float(\"Inf\"), min_tokens_to_keep: int = 1):\n        top_a = float(top_a)\n        if top_a < 0 or top_a > 1.0:\n            raise ValueError(f\"`top_a` has to be a float >= 0 and <= 1, but is {top_a}\")\n        self.top_a = top_a\n        self.filter_value = filter_value\n        self.min_tokens_to_keep = min_tokens_to_keep\n\n    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:\n        sorted_logits, sorted_indices = torch.sort(scores, descending=True)\n        probs = sorted_logits.softmax(dim=-1)\n\n        # Remove tokens with probability less than top_a*(max(probs))^2 (token with 0 are kept)\n        probs_max = probs[..., 0, None]\n        sorted_indices_to_remove = probs < probs_max * probs_max * self.top_a\n\n        if self.min_tokens_to_keep > 1:\n            # Keep at least min_tokens_to_keep\n            sorted_indices_to_remove[..., : self.min_tokens_to_keep] = 0\n\n        indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)\n        scores = scores.masked_fill(indices_to_remove, self.filter_value)\n        return scores\n\n\nclass TopNSigmaLogitsWarper(LogitsProcessor):\n    def __init__(self, n_sigma: float = 2.0, filter_value: float = -float(\"Inf\"), min_tokens_to_keep: int = 1):\n        \"\"\"\n        Initialize Top-nσ Sampling logits warper.\n\n        Args:\n            n_sigma: The threshold multiplier for standard deviation\n            filter_value: Value to assign to filtered logits\n            min_tokens_to_keep: Minimum number of tokens to keep\n        \"\"\"\n        if n_sigma < 0:\n            raise ValueError(f\"`n_sigma` must be a non-negative float, but is {n_sigma}\")\n        self.n_sigma = n_sigma\n        self.filter_value = filter_value\n        self.min_tokens_to_keep = min_tokens_to_keep\n\n    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:\n        # Calculate max of logits\n        max_logit = torch.max(scores, dim=-1, keepdim=True)[0]\n\n        # Calculate standard deviation only on finite values\n        finite_mask = torch.isfinite(scores)\n        finite_scores = scores.masked_fill(~finite_mask, 0.0)\n        std_logit = torch.std(finite_scores, dim=-1, keepdim=True)\n\n        # Create mask where tokens with logits >= max_logit - n_sigma * std_logit are kept\n        threshold = max_logit - self.n_sigma * std_logit\n        indices_to_remove = scores < threshold\n\n        if self.min_tokens_to_keep > 1:\n            # Keep at least min_tokens_to_keep tokens\n            top_k_indices = torch.topk(scores, self.min_tokens_to_keep, dim=-1)[1]\n            indices_to_remove.scatter_(-1, top_k_indices, False)\n\n        # Apply mask by setting filtered tokens to filter_value\n        scores = scores.masked_fill(indices_to_remove, self.filter_value)\n\n        return scores\n\n\nclass AdaptivePLogitsWarper(LogitsProcessor):\n    '''\n    Adaptive-p sampling. A stateful sampler that favors tokens near a target\n    probability, using an EMA-based control loop to adapt over time.\n\n    Matches the llama.cpp implementation from PR #17927.\n    '''\n\n    DISTRIBUTION_WIDTH = 0.3\n    PEAK_LOGIT_VALUE = 5.0\n    SHARPNESS = 10.0\n    INV_WIDTH = 1.0 / DISTRIBUTION_WIDTH\n\n    def __init__(self, adaptive_target, adaptive_decay, filter_value=-float(\"Inf\"), min_tokens_to_keep=1):\n        self.target = adaptive_target\n        self.decay = min(adaptive_decay, 0.99)\n        self.filter_value = filter_value\n        self.min_tokens_to_keep = min_tokens_to_keep\n\n        # Initialize EMA at equilibrium (as if target was already achieved)\n        if self.decay < 1.0:\n            self.weighted_sum = self.target / (1.0 - self.decay)\n            self.total_weight = 1.0 / (1.0 - self.decay)\n        else:\n            self.weighted_sum = 0.0\n            self.total_weight = 0.0\n\n    def __call__(self, input_ids, scores):\n        logits = scores[0]\n\n        # Compute original probabilities (before transform)\n        probs = torch.softmax(logits, dim=-1)\n\n        # Compute adapted target using proportional control on the EMA\n        if self.total_weight > 0:\n            ema_avg = self.weighted_sum / self.total_weight\n        else:\n            ema_avg = self.target\n\n        adapted_target = max(0.0, min(1.0, 2.0 * self.target - ema_avg))\n\n        # Adaptive probability transform:\n        # quadratic near target for fine differentiation, transitioning\n        # to linear decay in the tails for proper suppression after softmax\n        dist = torch.abs((probs - adapted_target) * self.INV_WIDTH)\n        new_logits = self.PEAK_LOGIT_VALUE - self.SHARPNESS * dist * dist / (1.0 + dist)\n\n        # Preserve already-masked tokens (-inf logits from prior samplers)\n        new_logits = torch.where(torch.isfinite(logits), new_logits, logits)\n\n        # Softmax and sample from the transformed distribution\n        new_probs = torch.softmax(new_logits, dim=-1)\n        selected = torch.multinomial(new_probs, num_samples=1, replacement=True)\n\n        # Update EMA with the original probability of the selected token\n        original_prob = probs[selected[0]].item()\n        self.weighted_sum = original_prob + self.decay * self.weighted_sum\n        self.total_weight = 1.0 + self.decay * self.total_weight\n\n        # Mask all tokens except the selected one\n        indices_to_remove = torch.ones_like(scores[0], dtype=torch.bool)\n        indices_to_remove[selected[0]] = False\n        indices_to_remove = indices_to_remove.unsqueeze(0)\n        scores = scores.masked_fill(indices_to_remove, self.filter_value)\n        return scores\n\n\n# Exclude Top Choices (XTC)\nclass XTCLogitsWarper(LogitsProcessor):\n    def __init__(self, threshold: float, probability: float, filter_value: float = -float(\"Inf\")):\n        self.threshold = threshold\n        self.probability = probability\n        self.filter_value = filter_value\n        self.special_token_ids = [\n            shared.tokenizer.encode(\"\\n\")[-1],\n        ]\n\n        if shared.tokenizer.eos_token_id is not None:\n            self.special_token_ids.append(shared.tokenizer.eos_token_id)\n\n    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:\n        # `random` returns values in the half-open range [0, 1), so setting `probability`\n        # to 0 means the sampler never takes action, while setting it to 1 means the sampler\n        # always takes action.\n        #\n        # Note that while XTC is most intuitively described as \"if multiple tokens meet\n        # the threshold, then with probability...\", reversing the two conditions is logically\n        # equivalent, and improves performance because processing can immediately be stopped\n        # if the random check fails.\n        if random.random() >= self.probability:\n            return scores\n\n        sorted_logits, sorted_indices = torch.sort(scores, descending=True)\n        probs = sorted_logits.softmax(dim=-1)\n\n        sorted_indices_to_remove = torch.full_like(probs, False, dtype=torch.bool)\n\n        # This operation sets exactly those indices to `True` for which the next index has\n        # probability above the threshold. Since `probs` is sorted, those are the indices\n        # of all tokens that meet the threshold, *except* the least probable one.\n        sorted_indices_to_remove[..., :-1] = probs[..., 1:] >= self.threshold\n\n        # Convert sorted_indices_to_remove to the original indices\n        indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)\n\n        # If newline or EOS tokens would be removed, return the original scores\n        if indices_to_remove[:, self.special_token_ids].any():\n            return scores\n\n        # Otherwise, remove tokens with the mask\n        scores = scores.masked_fill(indices_to_remove, self.filter_value)\n        return scores\n\n\nclass DRYLogitsProcessor(LogitsProcessor):\n    def __init__(self, multiplier: float, base: float, allowed_length: int, sequence_breakers: set[int], _range: int):\n        self.multiplier = multiplier\n        self.base = base\n        self.allowed_length = allowed_length\n        self.sequence_breakers = sequence_breakers\n        self._range = _range\n\n    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:\n        if self._range > 0:\n            input_ids = input_ids[:, -self._range:]\n\n        for input_ids_row, scores_row in zip(input_ids, scores):\n            # Use normal Python data types for improved performance\n            input_ids = input_ids_row.tolist()\n\n            last_token = input_ids[-1]\n            if last_token in self.sequence_breakers:\n                continue\n\n            # Exclude the last token as it always matches.\n            match_indices = []\n            for idx, val in enumerate(input_ids[:-1]):\n                if val == last_token:\n                    match_indices.append(idx)\n\n            # Stores the maximum matching sequence length\n            # for each token immediately following the sequence in the input.\n            match_lengths = {}\n\n            for i in match_indices:\n                next_token = input_ids[i + 1]\n\n                if next_token in self.sequence_breakers:\n                    continue\n\n                # We have already found that `last_token` matches at this index,\n                # so the match is at least of length 1.\n                match_length = 1\n\n                # Extend the match backwards (at most to 50 to prevent exponent overflow at penalty calculation) (this cap also improves performance on worst case)\n                while match_length < 50:\n                    j = i - match_length\n                    if j < 0:\n                        # Start of input reached.\n                        break\n\n                    previous_token = input_ids[-(match_length + 1)]\n                    if input_ids[j] != previous_token:\n                        # Start of match reached.\n                        break\n\n                    if previous_token in self.sequence_breakers:\n                        # Sequence-breaking token reached.\n                        break\n\n                    match_length += 1\n\n                if next_token in match_lengths:\n                    match_lengths[next_token] = max(match_length, match_lengths[next_token])\n                else:\n                    match_lengths[next_token] = match_length\n\n            # Apply penalties.\n            for token, match_length in match_lengths.items():\n                if match_length >= self.allowed_length:\n                    penalty = self.multiplier * self.base ** (match_length - self.allowed_length)\n                    scores_row[token] -= penalty\n\n        return scores\n\n\nclass MirostatLogitsWarper(LogitsProcessor):\n    def __init__(self, mirostat_mode: int, mirostat_tau: float, mirostat_eta: float, filter_value: float = -float(\"Inf\"), min_tokens_to_keep: int = 1):\n        if mirostat_mode not in [2]:\n            raise ValueError(f\"`mirostat` has to be a an integer 2, but is {mirostat_mode}\")\n\n        self.mirostat_mode = mirostat_mode\n        self.mirostat_eta = mirostat_eta\n        self.mirostat_tau = mirostat_tau\n        self.filter_value = filter_value\n        self.min_tokens_to_keep = min_tokens_to_keep\n        self.mu = 2 * self.mirostat_tau\n        self.e = 0\n\n    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:\n        logits = scores[0]\n        sorted_logits, sorted_indices = torch.sort(logits, descending=True)\n        prob_original = torch.softmax(sorted_logits, dim=-1).tolist()  # candidates\n\n        # Truncate the words with surprise values greater than mu\n        for i, candidate in enumerate(prob_original):\n            if candidate > 0 and -math.log2(candidate) > self.mu:\n                if (i == 0):\n                    sorted_logits = sorted_logits[:1]\n                else:\n                    sorted_logits = sorted_logits[:i]\n                break\n\n        # Normalize the probabilities of the remaining words\n        prob_topk = torch.softmax(sorted_logits, dim=0)\n        prev_i = torch.multinomial(prob_topk, num_samples=1, replacement=True)\n        device = get_device()\n        if device:\n            prob_topk = prob_topk.to(device)\n            prev_i = prev_i.to(device)\n\n        observed_surprise = -math.log2(prob_topk[prev_i])\n        self.e = observed_surprise - self.mirostat_tau\n\n        # Update mu using the learning rate and error\n        self.mu -= self.mirostat_eta * self.e\n\n        sorted_indices_to_remove = torch.ones_like(scores[0], dtype=torch.bool)\n        sorted_indices_to_remove[prev_i] = False\n\n        indices_to_remove = sorted_indices_to_remove.unsqueeze(0).scatter(1, sorted_indices.unsqueeze(0), sorted_indices_to_remove.unsqueeze(0))\n        scores = scores.masked_fill(indices_to_remove, self.filter_value)\n        return scores\n\n\nclass SpyLogitsWarper(LogitsProcessor):\n    def __init__(self):\n        pass\n\n    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:\n        global global_scores\n        global_scores = scores\n        return scores\n\n\nclass RepetitionPenaltyLogitsProcessorWithRange(LogitsProcessor):\n    def __init__(self, penalty: float, _range: int):\n        if not (penalty > 0):\n            raise ValueError(f\"`penalty` has to be strictly positive, but is {penalty}\")\n        self.penalty = penalty\n        self._range = _range\n\n    def apply_repetition_penalty(self, input_ids_row, scores_row):\n        unique_ids = torch.unique(input_ids_row)\n        score = torch.gather(scores_row, 0, unique_ids)\n\n        # Apply multiplicative repetition penalty\n        score = torch.where(score < 0, score * self.penalty, score / self.penalty)\n        scores_row.scatter_(0, unique_ids, score)\n        return scores_row\n\n    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:\n        input_ids = input_ids[:, -self._range:]\n        for input_ids_row, scores_row in zip(input_ids, scores):\n            scores_row = self.apply_repetition_penalty(input_ids_row, scores_row)\n\n        return scores\n\n\nclass PresencePenaltyLogitsProcessor(LogitsProcessor):\n    def __init__(self, presence_penalty: float, _range: int):\n        self.presence_penalty = presence_penalty\n        self._range = _range\n\n    def apply_presence_penalty(self, input_ids_row, scores_row):\n        unique_ids, counts = torch.unique(input_ids_row, return_counts=True)\n\n        # Apply presence penalty\n        raw_presence_penalty = (counts > 0).to(scores_row.dtype)\n        presence_penalty = raw_presence_penalty * self.presence_penalty\n        scores_row.scatter_add_(0, unique_ids, -presence_penalty)\n        return scores_row\n\n    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:\n        input_ids = input_ids[:, -self._range:]\n        for input_ids_row, scores_row in zip(input_ids, scores):\n            scores_row = self.apply_presence_penalty(input_ids_row, scores_row)\n        return scores\n\n\nclass FrequencyPenaltyLogitsProcessor(LogitsProcessor):\n    def __init__(self, frequency_penalty: float, _range: int):\n        self.frequency_penalty = frequency_penalty\n        self._range = _range\n\n    def apply_frequency_penalty(self, input_ids_row, scores_row):\n        unique_ids, counts = torch.unique(input_ids_row, return_counts=True)\n\n        # Apply frequency penalty\n        raw_frequency_penalty = counts.to(scores_row.dtype)\n        frequency_penalty = raw_frequency_penalty * self.frequency_penalty\n        scores_row.scatter_add_(0, unique_ids, -frequency_penalty)\n        return scores_row\n\n    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:\n        input_ids = input_ids[:, -self._range:]\n        for input_ids_row, scores_row in zip(input_ids, scores):\n            scores_row = self.apply_frequency_penalty(input_ids_row, scores_row)\n        return scores\n\n\ndef get_logits_processor_patch(self, **kwargs):\n    generation_config = kwargs['generation_config']\n\n    # Parameter sanitization\n    if isinstance(generation_config.temperature, int):\n        generation_config.temperature = float(generation_config.temperature)  # Must be float\n\n    # Get the original warpers\n    warpers = original_get_logits_processor(self, **kwargs)\n\n    for i in range(len(warpers) - 1, -1, -1):\n        # Replace temperature with our modified class.\n        if warpers[i].__class__.__name__ == 'TemperatureLogitsWarper':\n            warpers[i] = TemperatureLogitsWarperCustom(\n                generation_config.temperature,\n            )\n\n        # Stuff we don't need\n        elif warpers[i].__class__.__name__ in ['RepetitionPenaltyLogitsProcessor']:\n            del warpers[i]\n\n    # Add custom warpers\n    warpers_to_add = LogitsProcessorList()\n    min_tokens_to_keep = 2 if generation_config.num_beams > 1 else 1\n\n    if generation_config.repetition_penalty is not None and generation_config.repetition_penalty != 1.0:\n        warpers_to_add.append(\n            RepetitionPenaltyLogitsProcessorWithRange(\n                penalty=generation_config.repetition_penalty,\n                _range=generation_config.repetition_penalty_range\n            )\n        )\n\n    if generation_config.presence_penalty is not None and generation_config.presence_penalty != 0.0:\n        warpers_to_add.append(\n            PresencePenaltyLogitsProcessor(\n                presence_penalty=generation_config.presence_penalty,\n                _range=generation_config.repetition_penalty_range\n            )\n        )\n\n    if generation_config.frequency_penalty is not None and generation_config.frequency_penalty != 0.0:\n        warpers_to_add.append(\n            FrequencyPenaltyLogitsProcessor(\n                frequency_penalty=generation_config.frequency_penalty,\n                _range=generation_config.repetition_penalty_range\n            )\n        )\n\n    if generation_config.dry_multiplier is not None and generation_config.dry_multiplier > 0.0:\n        dry_sequence_breakers = generation_config.dry_sequence_breakers\n\n        # Support both JSON array notation and comma-separated strings.\n        if not dry_sequence_breakers.startswith(\"[\"):\n            dry_sequence_breakers = \"[\" + dry_sequence_breakers + \"]\"\n\n        sequence_breaker_strings = json.loads(dry_sequence_breakers)\n        # Prefix with 'a' to get the correct encoding of the token at the end of a text.\n        sequence_breakers = {\n            shared.tokenizer.encode(f'a{s}')[-1] for s in sequence_breaker_strings\n        }\n\n        warpers.append(\n            DRYLogitsProcessor(\n                multiplier=generation_config.dry_multiplier,\n                base=generation_config.dry_base,\n                allowed_length=generation_config.dry_allowed_length,\n                sequence_breakers=sequence_breakers,\n                _range=generation_config.repetition_penalty_range,\n            )\n        )\n\n    if generation_config.tfs is not None and 0.0 <= generation_config.tfs < 1.0:\n        warpers_to_add.append(\n            TailFreeLogitsWarper(\n                tfs=generation_config.tfs,\n                min_tokens_to_keep=min_tokens_to_keep\n            )\n        )\n\n    if generation_config.top_a is not None and 0.0 < generation_config.top_a <= 1.0:\n        warpers_to_add.append(\n            TopALogitsWarper(\n                top_a=generation_config.top_a,\n                min_tokens_to_keep=min_tokens_to_keep\n            )\n        )\n\n    if generation_config.top_n_sigma is not None and generation_config.top_n_sigma > 0.0:\n        warpers_to_add.append(\n            TopNSigmaLogitsWarper(\n                n_sigma=generation_config.top_n_sigma,\n                min_tokens_to_keep=min_tokens_to_keep\n            )\n        )\n\n    if generation_config.adaptive_target is not None and generation_config.adaptive_target > 0.0:\n        warpers_to_add.append(\n            AdaptivePLogitsWarper(\n                adaptive_target=generation_config.adaptive_target,\n                adaptive_decay=generation_config.adaptive_decay,\n                min_tokens_to_keep=min_tokens_to_keep\n            )\n        )\n\n    if generation_config.xtc_probability is not None and generation_config.xtc_probability > 0:\n        warpers_to_add.append(\n            XTCLogitsWarper(\n                threshold=generation_config.xtc_threshold,\n                probability=generation_config.xtc_probability,\n            )\n        )\n\n    if generation_config.dynamic_temperature:\n        warpers_to_add.append(\n            DynamicTemperatureLogitsWarper(\n                dynatemp_low=generation_config.dynatemp_low,\n                dynatemp_high=generation_config.dynatemp_high,\n                dynatemp_exponent=generation_config.dynatemp_exponent,\n            )\n        )\n\n    if generation_config.smoothing_factor > 0:\n        warpers_to_add.append(\n            QuadraticSamplingLogitsWarper(\n                smoothing_factor=generation_config.smoothing_factor,\n                smoothing_curve=generation_config.smoothing_curve\n            )\n        )\n\n    if generation_config.mirostat_mode is not None and generation_config.mirostat_mode == 2:\n        warpers_to_add.append(\n            MirostatLogitsWarper(\n                mirostat_mode=generation_config.mirostat_mode,\n                mirostat_eta=generation_config.mirostat_eta,\n                mirostat_tau=generation_config.mirostat_tau,\n                min_tokens_to_keep=min_tokens_to_keep\n            )\n        )\n\n    if len(warpers) > 0 and isinstance(warpers[-1], LogitNormalization):\n        normalize = warpers.pop(-1)\n    else:\n        normalize = None\n\n    warpers += warpers_to_add\n\n    # Sort the samplers.\n    sampler_priority = generation_config.sampler_priority\n\n    # Handle temperature_last\n    if generation_config.temperature_last:\n        for param_name in ['temperature', 'dynamic_temperature', 'quadratic_sampling']:\n            if param_name in sampler_priority:\n                index = sampler_priority.index(param_name)\n                sampler_priority.append(sampler_priority.pop(index))\n            else:\n                sampler_priority.append(param_name)\n\n    class_name_to_nickname = {\n        'DynamicTemperatureLogitsWarper': 'dynamic_temperature',\n        'EpsilonLogitsWarper': 'epsilon_cutoff',\n        'EtaLogitsWarper': 'eta_cutoff',\n        'MinPLogitsWarper': 'min_p',\n        'MirostatLogitsWarper': 'mirostat',\n        'QuadraticSamplingLogitsWarper': 'quadratic_sampling',\n        'TailFreeLogitsWarper': 'tfs',\n        'TemperatureLogitsWarperCustom': 'temperature',\n        'TopALogitsWarper': 'top_a',\n        'TopNSigmaLogitsWarper': 'top_n_sigma',\n        'AdaptivePLogitsWarper': 'adaptive_p',\n        'TopKLogitsWarper': 'top_k',\n        'TopPLogitsWarper': 'top_p',\n        'TypicalLogitsWarper': 'typical_p',\n        'XTCLogitsWarper': 'xtc',\n        'RepetitionPenaltyLogitsProcessorWithRange': 'repetition_penalty',\n        'PresencePenaltyLogitsProcessor': 'presence_penalty',\n        'FrequencyPenaltyLogitsProcessor': 'frequency_penalty',\n        'DRYLogitsProcessor': 'dry',\n        'EncoderRepetitionPenaltyLogitsProcessor': 'encoder_repetition_penalty',\n        'NoRepeatNGramLogitsProcessor': 'no_repeat_ngram',\n    }\n\n    def custom_sort_key(obj):\n        class_name = obj.__class__.__name__\n\n        # Return -1 if class_name is not mapped\n        if class_name not in class_name_to_nickname or class_name_to_nickname[class_name] not in sampler_priority:\n            return -1\n\n        return sampler_priority.index(class_name_to_nickname[class_name])\n\n    # Sort the list using the custom key function\n    warpers = sorted(warpers, key=custom_sort_key)\n    if shared.args.verbose:\n        logger.info(\"WARPERS=\")\n        pprint.PrettyPrinter(indent=4, sort_dicts=False).pprint([x.__class__.__name__ for x in warpers])\n        print()\n\n    if normalize is not None:\n        warpers.append(normalize)\n\n    warpers.append(SpyLogitsWarper())\n    warpers = LogitsProcessorList(warpers)\n    return warpers\n\n\ndef generation_config_init_patch(self, **kwargs):\n    original_init(self, **kwargs)\n    self.min_p = kwargs.pop(\"min_p\", 0.0)\n    self.dynamic_temperature = kwargs.pop(\"dynamic_temperature\", False)\n    self.dynatemp_low = kwargs.pop(\"dynatemp_low\", 1)\n    self.dynatemp_high = kwargs.pop(\"dynatemp_high\", 1)\n    self.dynatemp_exponent = kwargs.pop(\"dynatemp_exponent\", 1)\n    self.smoothing_factor = kwargs.pop(\"smoothing_factor\", 0.0)\n    self.smoothing_curve = kwargs.pop(\"smoothing_curve\", 1.0)\n    self.tfs = kwargs.pop(\"tfs\", 1.0)\n    self.top_a = kwargs.pop(\"top_a\", 0.0)\n    self.top_n_sigma = kwargs.pop(\"top_n_sigma\", 0.0)\n    self.adaptive_target = kwargs.pop(\"adaptive_target\", 0.0)\n    self.adaptive_decay = kwargs.pop(\"adaptive_decay\", 0.9)\n    self.mirostat_mode = kwargs.pop(\"mirostat_mode\", 0)\n    self.mirostat_eta = kwargs.pop(\"mirostat_eta\", 0.1)\n    self.mirostat_tau = kwargs.pop(\"mirostat_tau\", 5)\n    self.repetition_penalty_range = kwargs.pop(\"repetition_penalty_range\", 0)\n    self.presence_penalty = kwargs.pop(\"presence_penalty\", 0)\n    self.frequency_penalty = kwargs.pop(\"frequency_penalty\", 0)\n    self.dry_multiplier = kwargs.pop(\"dry_multiplier\", 0.0)\n    self.dry_base = kwargs.pop(\"dry_base\", 1.75)\n    self.dry_allowed_length = kwargs.pop(\"dry_allowed_length\", 2)\n    self.dry_sequence_breakers = kwargs.pop(\"dry_sequence_breakers\", '\"\\\\n\", \":\", \"\\\\\"\", \"*\"')\n    self.xtc_threshold = kwargs.pop(\"xtc_threshold\", 0.1)\n    self.xtc_probability = kwargs.pop(\"xtc_probability\", 0)\n    self.temperature_last = kwargs.pop(\"temperature_last\", False)\n    self.sampler_priority = kwargs.pop(\"sampler_priority\", ['repetition_penalty', 'presence_penalty', 'frequency_penalty', 'dry', 'temperature', 'dynamic_temperature', 'quadratic_sampling', 'top_n_sigma', 'top_k', 'top_p', 'typical_p', 'epsilon_cutoff', 'eta_cutoff', 'tfs', 'top_a', 'min_p', 'adaptive_p', 'mirostat', 'xtc', 'encoder_repetition_penalty', 'no_repeat_ngram'])\n\n\ndef hijack_samplers():\n    transformers.GenerationMixin._get_logits_processor = get_logits_processor_patch\n    transformers.GenerationConfig.__init__ = generation_config_init_patch\n"
  },
  {
    "path": "modules/sane_markdown_lists.py",
    "content": "# Code based on the Sane List Extension for Python-Markdown\n# =======================================\n\n# Modify the behavior of Lists in Python-Markdown to act in a sane manner.\n\n# See https://Python-Markdown.github.io/extensions/sane_lists\n# for documentation.\n\n# Original code Copyright 2011 [Waylan Limberg](http://achinghead.com)\n\n# All changes Copyright 2011-2014 The Python Markdown Project\n\n# License: [BSD](https://opensource.org/licenses/bsd-license.php)\n\n\"\"\"\nModify the behavior of Lists in Python-Markdown to act in a sane manner.\n\"\"\"\n\nfrom __future__ import annotations\n\nimport re\nimport xml.etree.ElementTree as etree\nfrom typing import TYPE_CHECKING\n\nfrom markdown import Extension\nfrom markdown.blockparser import BlockParser\nfrom markdown.blockprocessors import (\n    ListIndentProcessor,\n    OListProcessor,\n    ParagraphProcessor\n)\n\nif TYPE_CHECKING:  # pragma: no cover\n    from markdown import blockparser\n\n\n# The min. number of added leading spaces needed to start a nested list\nMIN_NESTED_LIST_INDENT = 2\nassert MIN_NESTED_LIST_INDENT > 1, \"'MIN_NESTED_LIST_INDENT' must be > 1\"\n\n\nclass SaneListIndentProcessor(ListIndentProcessor):\n    \"\"\" Process children of list items.\n\n    Example\n\n        * a list item\n            process this part\n\n            or this part\n\n    \"\"\"\n\n    def __init__(self, *args):\n        super().__init__(*args)\n        self.INDENT_RE = re.compile(r'^(([ ])+)')\n\n    def test(self, parent: etree.Element, block: str) -> bool:\n        return block.startswith(' ' * MIN_NESTED_LIST_INDENT) and \\\n            not self.parser.state.isstate('detabbed') and \\\n            (parent.tag in self.ITEM_TYPES or (len(parent) and parent[-1] is not None and (parent[-1].tag in\n                                                                                           self.LIST_TYPES)))\n\n    def get_level(self, parent: etree.Element, block: str) -> tuple[int, etree.Element]:\n        \"\"\" Get level of indentation based on list level. \"\"\"\n        # Get indent level\n        m = self.INDENT_RE.match(block)\n        if m:\n            indent_level = len(m.group(1)) / MIN_NESTED_LIST_INDENT\n        else:\n            indent_level = 0\n        if self.parser.state.isstate('list'):\n            # We're in a tight-list - so we already are at correct parent.\n            level = 1\n        else:\n            # We're in a loose-list - so we need to find parent.\n            level = 0\n        # Step through children of tree to find matching indent level.\n        while indent_level > level:\n            child = self.lastChild(parent)\n            if child is not None and (child.tag in self.LIST_TYPES or child.tag in self.ITEM_TYPES):\n                if child.tag in self.LIST_TYPES:\n                    level += 1\n                parent = child\n            else:\n                # No more child levels. If we're short of `indent_level`,\n                # we have a code block. So we stop here.\n                break\n        return level, parent\n\n    def detab(self, text: str, length: int | None = None) -> tuple[str, str]:\n        \"\"\" Remove a tab from the front of each line of the given text. \"\"\"\n        if length is None:\n            length = MIN_NESTED_LIST_INDENT\n        newtext = []\n        lines = text.split('\\n')\n        for line in lines:\n            if line.startswith(' ' * length):\n                newtext.append(line[length:])\n            elif not line.strip():\n                newtext.append('')\n            else:\n                break\n        return '\\n'.join(newtext), '\\n'.join(lines[len(newtext):])\n\n    def looseDetab(self, text: str, level: int = 1) -> str:\n        \"\"\" Remove indentation from front of lines but allowing dedented lines. \"\"\"\n        lines = text.split('\\n')\n        for i in range(len(lines)):\n            if lines[i].startswith(' ' * MIN_NESTED_LIST_INDENT * level):\n                lines[i] = lines[i][MIN_NESTED_LIST_INDENT * level:]\n        return '\\n'.join(lines)\n\n\nclass SaneOListProcessor(OListProcessor):\n    \"\"\" Override `SIBLING_TAGS` to not include `ul` and set `LAZY_OL` to `False`. \"\"\"\n\n    SIBLING_TAGS = ['ol']\n    \"\"\" Exclude `ul` from list of siblings. \"\"\"\n    LAZY_OL = False\n    \"\"\" Disable lazy list behavior. \"\"\"\n\n    def __init__(self, parser: blockparser.BlockParser):\n        super().__init__(parser)\n        max_list_start_indent = self.tab_length\n        # Detect an item (e.g., `1. item`)\n        self.RE = re.compile(r'^[ ]{0,%d}[\\*_]{0,2}\\d+\\.[ ]+(.*)' % max_list_start_indent)\n        # Detect items on secondary lines. they can be of either list type.\n        self.CHILD_RE = re.compile(r'^[ ]{0,%d}([\\*_]{0,2})((\\d+\\.))[ ]+(.*)' % (MIN_NESTED_LIST_INDENT - 1))\n        # Detect indented (nested) items of either type\n        self.INDENT_RE = re.compile(r'^[ ]{%d,%d}[\\*_]{0,2}((\\d+\\.)|[*+-])[ ]+.*' %\n                                    (MIN_NESTED_LIST_INDENT, self.tab_length * 2))\n\n    def run(self, parent: etree.Element, blocks: list[str]) -> None:\n        # Check for multiple items in one block.\n        items = self.get_items(blocks.pop(0))\n        sibling = self.lastChild(parent)\n\n        if sibling is not None and sibling.tag in self.SIBLING_TAGS:\n            # Previous block was a list item, so set that as parent\n            lst = sibling\n            # make sure previous item is in a `p` - if the item has text,\n            # then it isn't in a `p`\n            if lst[-1].text:\n                # since it's possible there are other children for this\n                # sibling, we can't just `SubElement` the `p`, we need to\n                # insert it as the first item.\n                p = etree.Element('p')\n                p.text = lst[-1].text\n                lst[-1].text = ''\n                lst[-1].insert(0, p)\n            # if the last item has a tail, then the tail needs to be put in a `p`\n            # likely only when a header is not followed by a blank line\n            lch = self.lastChild(lst[-1])\n            if lch is not None and lch.tail:\n                p = etree.SubElement(lst[-1], 'p')\n                p.text = lch.tail.lstrip()\n                lch.tail = ''\n\n            # parse first block differently as it gets wrapped in a `p`.\n            li = etree.SubElement(lst, 'li')\n            self.parser.state.set('looselist')\n            firstitem = items.pop(0)\n            self.parser.parseBlocks(li, [firstitem])\n            self.parser.state.reset()\n        elif parent.tag in ['ol', 'ul']:\n            # this catches the edge case of a multi-item indented list whose\n            # first item is in a blank parent-list item:\n            #     * * subitem1\n            #         * subitem2\n            # see also `ListIndentProcessor`\n            lst = parent\n        else:\n            # This is a new list so create parent with appropriate tag.\n            lst = etree.SubElement(parent, self.TAG)\n            # Check if a custom start integer is set\n            if not self.LAZY_OL and self.STARTSWITH != '1':\n                lst.attrib['start'] = self.STARTSWITH\n\n        self.parser.state.set('list')\n        # Loop through items in block, recursively parsing each with the\n        # appropriate parent.\n        for item in items:\n            if item.startswith(\" \" * MIN_NESTED_LIST_INDENT):\n                # Item is indented. Parse with last item as parent\n                self.parser.parseBlocks(lst[-1], [item])\n            else:\n                # New item. Create `li` and parse with it as parent\n                li = etree.SubElement(lst, 'li')\n                self.parser.parseBlocks(li, [item])\n        self.parser.state.reset()\n\n    def looseDetab(self, text: str, indent_length: int, level: int = 1) -> str:\n        \"\"\" Remove indentation from front of lines but allowing dedented lines. \"\"\"\n        lines = text.split('\\n')\n        for i in range(len(lines)):\n            if lines[i].startswith(' ' * indent_length * level):\n                lines[i] = lines[i][indent_length * level:]\n        return '\\n'.join(lines)\n\n    def get_items(self, block: str) -> list[str]:\n        \"\"\" Break a block into list items. \"\"\"\n        # If first level of list is indented, remove that indentation\n        if (indent_len := len(block) - len(block.lstrip())) > 0:\n            block = self.looseDetab(block, indent_len)\n        items = []\n        for line in block.split('\\n'):\n            m = self.CHILD_RE.match(line)\n            if m:\n                # This is a new list item\n                # Check first item for the start index\n                if not items:\n                    # Detect the integer value of first list item\n                    INTEGER_RE = re.compile(r'(\\d+)')\n                    self.STARTSWITH = INTEGER_RE.match(m.group(2)).group()\n                # Append to the list\n                items.append(m.group(1) + m.group(4))\n            elif self.INDENT_RE.match(line):\n                # This is an indented (possibly nested) item.\n                if items[-1].startswith(' ' * MIN_NESTED_LIST_INDENT):\n                    # Previous item was indented. Append to that item.\n                    items[-1] = '{}\\n{}'.format(items[-1], line)\n                else:\n                    items.append(line)\n            else:\n                # This is another line of previous item. Append to that item.\n                items[-1] = '{}\\n{}'.format(items[-1], line)\n        return items\n\n\nclass SaneUListProcessor(SaneOListProcessor):\n    \"\"\" Override `SIBLING_TAGS` to not include `ol`. \"\"\"\n\n    TAG: str = 'ul'\n    SIBLING_TAGS = ['ul']\n    \"\"\" Exclude `ol` from list of siblings. \"\"\"\n\n    def __init__(self, parser: blockparser.BlockParser):\n        super().__init__(parser)\n        # Detect an item (e.g., `- item` or `+ item` or `* item`).\n        max_list_start_indent = self.tab_length\n        self.RE = re.compile(r'^[ ]{0,%d}[*+-][ ]+(.*)' % max_list_start_indent)\n        self.CHILD_RE = re.compile(r'^[ ]{0,%d}(([*+-]))[ ]+(.*)' % (MIN_NESTED_LIST_INDENT - 1))\n\n    def get_items(self, block: str) -> list[str]:\n        \"\"\" Break a block into list items. \"\"\"\n        # If first level of list is indented, remove that indentation\n        if (indent_len := len(block) - len(block.lstrip())) > 0:\n            block = self.looseDetab(block, indent_len)\n        items = []\n        for line in block.split('\\n'):\n            m = self.CHILD_RE.match(line)\n            if m:\n                # Append to the list\n                items.append(m.group(3))\n            elif self.INDENT_RE.match(line):\n                # This is an indented (possibly nested) item.\n                if items[-1].startswith(' ' * MIN_NESTED_LIST_INDENT):\n                    # Previous item was indented. Append to that item.\n                    items[-1] = '{}\\n{}'.format(items[-1], line)\n                else:\n                    items.append(line)\n            else:\n                # This is another line of previous item. Append to that item.\n                items[-1] = '{}\\n{}'.format(items[-1], line)\n        return items\n\n\nclass SaneParagraphProcessor(ParagraphProcessor):\n    \"\"\" Process Paragraph blocks. \"\"\"\n\n    def __init__(self, parser: BlockParser):\n        super().__init__(parser)\n        max_list_start_indent = self.tab_length\n        self.LIST_RE = re.compile(r\"\\s{2}\\n(\\s{0,%d}[\\d+*-])\" % max_list_start_indent)\n\n    def run(self, parent: etree.Element, blocks: list[str]) -> None:\n        block = blocks.pop(0)\n        if block.strip():\n            # Not a blank block. Add to parent, otherwise throw it away.\n            if self.parser.state.isstate('list'):\n                # The parent is a tight-list.\n                #\n                # Check for any children. This will likely only happen in a\n                # tight-list when a header isn't followed by a blank line.\n                # For example:\n                #\n                #     * # Header\n                #     Line 2 of list item - not part of header.\n                sibling = self.lastChild(parent)\n                if sibling is not None:\n                    # Insert after sibling.\n                    if sibling.tail:\n                        sibling.tail = '{}\\n{}'.format(sibling.tail, block)\n                    else:\n                        sibling.tail = '\\n%s' % block\n                else:\n                    # Append to parent.text\n                    if parent.text:\n                        parent.text = '{}\\n{}'.format(parent.text, block)\n                    else:\n                        parent.text = block.lstrip()\n            else:\n                # Check if paragraph contains a list\n                next_list_block = None\n                if list_match := self.LIST_RE.search(block):\n                    list_start = list_match.end() - len(list_match.group(1))\n                    next_list_block = block[list_start:]\n                    block = block[:list_start]\n\n                # Create a regular paragraph\n                p = etree.SubElement(parent, 'p')\n                p.text = block.lstrip()\n\n                # If a list was found, parse its block separately with the paragraph as the parent\n                if next_list_block:\n                    self.parser.parseBlocks(p, [next_list_block])\n\n\nclass SaneListExtension(Extension):\n    \"\"\" Add sane lists to Markdown. \"\"\"\n\n    def extendMarkdown(self, md):\n        \"\"\" Override existing Processors. \"\"\"\n        md.parser.blockprocessors.register(SaneListIndentProcessor(md.parser), 'indent', 90)\n        md.parser.blockprocessors.register(SaneOListProcessor(md.parser), 'olist', 40)\n        md.parser.blockprocessors.register(SaneUListProcessor(md.parser), 'ulist', 30)\n        md.parser.blockprocessors.register(SaneParagraphProcessor(md.parser), 'paragraph', 10)\n\n        # Disable uncommon indented codeblocks (as opposed to fenced codeblocks delimited by \"```\")\n        md.parser.blockprocessors.deregister('code')\n\n\ndef makeExtension(**kwargs):  # pragma: no cover\n    return SaneListExtension(**kwargs)\n"
  },
  {
    "path": "modules/shared.py",
    "content": "import argparse\nimport copy\nimport os\nimport shlex\nimport sys\nfrom collections import OrderedDict\nfrom pathlib import Path\n\nimport yaml\n\nfrom modules.logging_colors import logger\nfrom modules.paths import resolve_user_data_dir\nfrom modules.presets import default_preset, default_preset_values\n\n# Resolve user_data directory early (before argparse defaults are set)\nuser_data_dir = resolve_user_data_dir()\n\n# Text model variables\nmodel = None\ntokenizer = None\nmodel_name = 'None'\nis_seq2seq = False\nis_multimodal = False\nmodel_dirty_from_training = False\nlora_names = []\nbos_token = '<s>'\neos_token = '</s>'\n\n# Image model variables\nimage_model = None\nimage_model_name = 'None'\nimage_pipeline_type = None\n\n# Generation variables\nstop_everything = False\ngeneration_lock = None\nprocessing_message = ''\n\n# UI variables\ngradio = {}\npersistent_interface_state = {}\nneed_restart = False\n\n# Parser copied from https://github.com/vladmandic/automatic\nparser = argparse.ArgumentParser(description=\"Text Generation Web UI\", conflict_handler='resolve', add_help=True, formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=55, indent_increment=2, width=200))\n\n# Basic settings\ngroup = parser.add_argument_group('Basic settings')\ngroup.add_argument('--user-data-dir', type=str, default=str(user_data_dir), help='Path to the user data directory. Default: auto-detected.')\ngroup.add_argument('--multi-user', action='store_true', help='Multi-user mode. Chat histories are not saved or automatically loaded. Best suited for small trusted teams.')\ngroup.add_argument('--model', type=str, help='Name of the model to load by default.')\ngroup.add_argument('--lora', type=str, nargs='+', help='The list of LoRAs to load. If you want to load more than one LoRA, write the names separated by spaces.')\ngroup.add_argument('--model-dir', type=str, default=str(user_data_dir / 'models'), help='Path to directory with all the models.')\ngroup.add_argument('--lora-dir', type=str, default=str(user_data_dir / 'loras'), help='Path to directory with all the loras.')\ngroup.add_argument('--model-menu', action='store_true', help='Show a model menu in the terminal when the web UI is first launched.')\ngroup.add_argument('--settings', type=str, help='Load the default interface settings from this yaml file. See user_data/settings-template.yaml for an example. If you create a file called user_data/settings.yaml, this file will be loaded by default without the need to use the --settings flag.')\ngroup.add_argument('--extensions', type=str, nargs='+', help='The list of extensions to load. If you want to load more than one extension, write the names separated by spaces.')\ngroup.add_argument('--verbose', action='store_true', help='Print the prompts to the terminal.')\ngroup.add_argument('--idle-timeout', type=int, default=0, help='Unload model after this many minutes of inactivity. It will be automatically reloaded when you try to use it again.')\n\n# Image generation\ngroup = parser.add_argument_group('Image model')\ngroup.add_argument('--image-model', type=str, help='Name of the image model to select on startup (overrides saved setting).')\ngroup.add_argument('--image-model-dir', type=str, default=str(user_data_dir / 'image_models'), help='Path to directory with all the image models.')\ngroup.add_argument('--image-dtype', type=str, default=None, choices=['bfloat16', 'float16'], help='Data type for image model.')\ngroup.add_argument('--image-attn-backend', type=str, default=None, choices=['flash_attention_2', 'sdpa'], help='Attention backend for image model.')\ngroup.add_argument('--image-cpu-offload', action='store_true', help='Enable CPU offloading for image model.')\ngroup.add_argument('--image-compile', action='store_true', help='Compile the image model for faster inference.')\ngroup.add_argument('--image-quant', type=str, default=None,\n                   choices=['none', 'bnb-8bit', 'bnb-4bit', 'torchao-int8wo', 'torchao-fp4', 'torchao-float8wo'],\n                   help='Quantization method for image model.')\n\n# Model loader\ngroup = parser.add_argument_group('Model loader')\ngroup.add_argument('--loader', type=str, help='Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, ExLlamav3_HF, ExLlamav3, TensorRT-LLM.')\n\n# Cache\ngroup = parser.add_argument_group('Context and cache')\ngroup.add_argument('--ctx-size', '--n_ctx', '--max_seq_len', type=int, default=0, metavar='N', help='Context size in tokens. 0 = auto for llama.cpp (requires gpu-layers=-1), 8192 for other loaders.')\ngroup.add_argument('--cache-type', '--cache_type', type=str, default='fp16', metavar='N', help='KV cache type; valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV3 - fp16, q2 to q8 (can specify k_bits and v_bits separately, e.g. q4_q8).')\n\n# Speculative decoding\ngroup = parser.add_argument_group('Speculative decoding')\ngroup.add_argument('--model-draft', type=str, default=None, help='Path to the draft model for speculative decoding.')\ngroup.add_argument('--draft-max', type=int, default=4, help='Number of tokens to draft for speculative decoding.')\ngroup.add_argument('--gpu-layers-draft', type=int, default=256, help='Number of layers to offload to the GPU for the draft model.')\ngroup.add_argument('--device-draft', type=str, default=None, help='Comma-separated list of devices to use for offloading the draft model. Example: CUDA0,CUDA1')\ngroup.add_argument('--ctx-size-draft', type=int, default=0, help='Size of the prompt context for the draft model. If 0, uses the same as the main model.')\ngroup.add_argument('--spec-type', type=str, default='none', choices=['none', 'ngram-mod', 'ngram-simple', 'ngram-map-k', 'ngram-map-k4v', 'ngram-cache'], help='Draftless speculative decoding type. Recommended: ngram-mod.')\ngroup.add_argument('--spec-ngram-size-n', type=int, default=24, help='N-gram lookup size for ngram speculative decoding.')\ngroup.add_argument('--spec-ngram-size-m', type=int, default=48, help='Draft n-gram size for ngram speculative decoding.')\ngroup.add_argument('--spec-ngram-min-hits', type=int, default=1, help='Minimum n-gram hits for ngram-map speculative decoding.')\n\n# llama.cpp\ngroup = parser.add_argument_group('llama.cpp')\ngroup.add_argument('--gpu-layers', '--n-gpu-layers', type=int, default=-1, metavar='N', help='Number of layers to offload to the GPU. -1 = auto.')\ngroup.add_argument('--cpu-moe', action='store_true', help='Move the experts to the CPU (for MoE models).')\ngroup.add_argument('--mmproj', type=str, default=None, help='Path to the mmproj file for vision models.')\ngroup.add_argument('--streaming-llm', action='store_true', help='Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')\ngroup.add_argument('--tensor-split', type=str, default=None, help='Split the model across multiple GPUs. Comma-separated list of proportions. Example: 60,40.')\ngroup.add_argument('--row-split', action='store_true', help='Split the model by rows across GPUs. This may improve multi-gpu performance.')\ngroup.add_argument('--no-mmap', action='store_true', help='Prevent mmap from being used.')\ngroup.add_argument('--mlock', action='store_true', help='Force the system to keep the model in RAM.')\ngroup.add_argument('--no-kv-offload', action='store_true', help='Do not offload the  K, Q, V to the GPU. This saves VRAM but reduces the performance.')\ngroup.add_argument('--batch-size', type=int, default=1024, help='Maximum number of prompt tokens to batch together when calling llama-server. This is the application level batch size.')\ngroup.add_argument('--ubatch-size', type=int, default=1024, help='Maximum number of prompt tokens to batch together when calling llama-server. This is the max physical batch size for computation (device level).')\ngroup.add_argument('--threads', type=int, default=0, help='Number of threads to use.')\ngroup.add_argument('--threads-batch', type=int, default=0, help='Number of threads to use for batches/prompt processing.')\ngroup.add_argument('--numa', action='store_true', help='Activate NUMA task allocation for llama.cpp.')\ngroup.add_argument('--parallel', type=int, default=1, help='Number of parallel request slots. The context size is divided equally among slots. For example, to have 4 slots with 8192 context each, set ctx_size to 32768.')\ngroup.add_argument('--fit-target', type=str, default='512', help='Target VRAM margin per device for auto GPU layers, comma-separated list of values in MiB. A single value is broadcast across all devices.')\ngroup.add_argument('--extra-flags', type=str, default=None, help='Extra flags to pass to llama-server. Format: \"flag1=value1,flag2,flag3=value3\". Example: \"override-tensor=exps=CPU\"')\n\n# Transformers/Accelerate\ngroup = parser.add_argument_group('Transformers/Accelerate')\ngroup.add_argument('--cpu', action='store_true', help='Use the CPU to generate text. Warning: Training on CPU is extremely slow.')\ngroup.add_argument('--cpu-memory', type=float, default=0, help='Maximum CPU memory in GiB. Use this for CPU offloading.')\ngroup.add_argument('--disk', action='store_true', help='If the model is too large for your GPU(s) and CPU combined, send the remaining layers to the disk.')\ngroup.add_argument('--disk-cache-dir', type=str, default=str(user_data_dir / 'cache'), help='Directory to save the disk cache to.')\ngroup.add_argument('--load-in-8bit', action='store_true', help='Load the model with 8-bit precision (using bitsandbytes).')\ngroup.add_argument('--bf16', action='store_true', help='Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU.')\ngroup.add_argument('--no-cache', action='store_true', help='Set use_cache to False while generating text. This reduces VRAM usage slightly, but it comes at a performance cost.')\ngroup.add_argument('--trust-remote-code', action='store_true', help='Set trust_remote_code=True while loading the model. Necessary for some models.')\ngroup.add_argument('--force-safetensors', action='store_true', help='Set use_safetensors=True while loading the model. This prevents arbitrary code execution.')\ngroup.add_argument('--no_use_fast', action='store_true', help='Set use_fast=False while loading the tokenizer (it\\'s True by default). Use this if you have any problems related to use_fast.')\ngroup.add_argument('--attn-implementation', type=str, default='sdpa', metavar=\"IMPLEMENTATION\", help='Attention implementation. Valid options: sdpa, eager, flash_attention_2.')\n\n# bitsandbytes 4-bit\ngroup = parser.add_argument_group('bitsandbytes 4-bit')\ngroup.add_argument('--load-in-4bit', action='store_true', help='Load the model with 4-bit precision (using bitsandbytes).')\ngroup.add_argument('--use_double_quant', action='store_true', help='use_double_quant for 4-bit.')\ngroup.add_argument('--compute_dtype', type=str, default='float16', help='compute dtype for 4-bit. Valid options: bfloat16, float16, float32.')\ngroup.add_argument('--quant_type', type=str, default='nf4', help='quant_type for 4-bit. Valid options: nf4, fp4.')\n\n# ExLlamaV3\ngroup = parser.add_argument_group('ExLlamaV3')\ngroup.add_argument('--gpu-split', type=str, help='Comma-separated list of VRAM (in GB) to use per GPU device for model layers. Example: 20,7,7.')\ngroup.add_argument('--enable-tp', '--enable_tp', action='store_true', help='Enable Tensor Parallelism (TP) to split the model across GPUs.')\ngroup.add_argument('--tp-backend', type=str, default='native', help='The backend for tensor parallelism. Valid options: native, nccl. Default: native.')\ngroup.add_argument('--cfg-cache', action='store_true', help='Create an additional cache for CFG negative prompts. Necessary to use CFG with that loader.')\n\n# Gradio\ngroup = parser.add_argument_group('Gradio')\ngroup.add_argument('--listen', action='store_true', help='Make the web UI reachable from your local network.')\ngroup.add_argument('--listen-port', type=int, help='The listening port that the server will use.')\ngroup.add_argument('--listen-host', type=str, help='The hostname that the server will use.')\ngroup.add_argument('--share', action='store_true', help='Create a public URL. This is useful for running the web UI on Google Colab or similar.')\ngroup.add_argument('--auto-launch', action='store_true', default=False, help='Open the web UI in the default browser upon launch.')\ngroup.add_argument('--gradio-auth', type=str, help='Set Gradio authentication password in the format \"username:password\". Multiple credentials can also be supplied with \"u1:p1,u2:p2,u3:p3\".', default=None)\ngroup.add_argument('--gradio-auth-path', type=str, help='Set the Gradio authentication file path. The file should contain one or more user:password pairs in the same format as above.', default=None)\ngroup.add_argument('--ssl-keyfile', type=str, help='The path to the SSL certificate key file.', default=None)\ngroup.add_argument('--ssl-certfile', type=str, help='The path to the SSL certificate cert file.', default=None)\ngroup.add_argument('--subpath', type=str, help='Customize the subpath for gradio, use with reverse proxy')\ngroup.add_argument('--old-colors', action='store_true', help='Use the legacy Gradio colors, before the December/2024 update.')\ngroup.add_argument('--portable', action='store_true', help='Hide features not available in portable mode like training.')\n\n# API\ngroup = parser.add_argument_group('API')\ngroup.add_argument('--api', action='store_true', help='Enable the API extension.')\ngroup.add_argument('--public-api', action='store_true', help='Create a public URL for the API using Cloudflare.')\ngroup.add_argument('--public-api-id', type=str, help='Tunnel ID for named Cloudflare Tunnel. Use together with public-api option.', default=None)\ngroup.add_argument('--api-port', type=int, default=5000, help='The listening port for the API.')\ngroup.add_argument('--api-key', type=str, default='', help='API authentication key.')\ngroup.add_argument('--admin-key', type=str, default='', help='API authentication key for admin tasks like loading and unloading models. If not set, will be the same as --api-key.')\ngroup.add_argument('--api-enable-ipv6', action='store_true', help='Enable IPv6 for the API')\ngroup.add_argument('--api-disable-ipv4', action='store_true', help='Disable IPv4 for the API')\ngroup.add_argument('--nowebui', action='store_true', help='Do not launch the Gradio UI. Useful for launching the API in standalone mode.')\n\n# API generation defaults\n_d = default_preset_values\ngroup = parser.add_argument_group('API generation defaults')\ngroup.add_argument('--temperature', type=float, default=_d['temperature'], metavar='N', help='Temperature')\ngroup.add_argument('--dynatemp-low', type=float, default=_d['dynatemp_low'], metavar='N', help='Dynamic temperature low')\ngroup.add_argument('--dynatemp-high', type=float, default=_d['dynatemp_high'], metavar='N', help='Dynamic temperature high')\ngroup.add_argument('--dynatemp-exponent', type=float, default=_d['dynatemp_exponent'], metavar='N', help='Dynamic temperature exponent')\ngroup.add_argument('--smoothing-factor', type=float, default=_d['smoothing_factor'], metavar='N', help='Smoothing factor')\ngroup.add_argument('--smoothing-curve', type=float, default=_d['smoothing_curve'], metavar='N', help='Smoothing curve')\ngroup.add_argument('--top-p', type=float, default=_d['top_p'], metavar='N', help='Top P')\ngroup.add_argument('--top-k', type=int, default=_d['top_k'], metavar='N', help='Top K')\ngroup.add_argument('--min-p', type=float, default=_d['min_p'], metavar='N', help='Min P')\ngroup.add_argument('--top-n-sigma', type=float, default=_d['top_n_sigma'], metavar='N', help='Top N Sigma')\ngroup.add_argument('--typical-p', type=float, default=_d['typical_p'], metavar='N', help='Typical P')\ngroup.add_argument('--xtc-threshold', type=float, default=_d['xtc_threshold'], metavar='N', help='XTC threshold')\ngroup.add_argument('--xtc-probability', type=float, default=_d['xtc_probability'], metavar='N', help='XTC probability')\ngroup.add_argument('--epsilon-cutoff', type=float, default=_d['epsilon_cutoff'], metavar='N', help='Epsilon cutoff')\ngroup.add_argument('--eta-cutoff', type=float, default=_d['eta_cutoff'], metavar='N', help='Eta cutoff')\ngroup.add_argument('--tfs', type=float, default=_d['tfs'], metavar='N', help='TFS')\ngroup.add_argument('--top-a', type=float, default=_d['top_a'], metavar='N', help='Top A')\ngroup.add_argument('--adaptive-target', type=float, default=_d['adaptive_target'], metavar='N', help='Adaptive target')\ngroup.add_argument('--adaptive-decay', type=float, default=_d['adaptive_decay'], metavar='N', help='Adaptive decay')\ngroup.add_argument('--dry-multiplier', type=float, default=_d['dry_multiplier'], metavar='N', help='DRY multiplier')\ngroup.add_argument('--dry-allowed-length', type=int, default=_d['dry_allowed_length'], metavar='N', help='DRY allowed length')\ngroup.add_argument('--dry-base', type=float, default=_d['dry_base'], metavar='N', help='DRY base')\ngroup.add_argument('--repetition-penalty', type=float, default=_d['repetition_penalty'], metavar='N', help='Repetition penalty')\ngroup.add_argument('--frequency-penalty', type=float, default=_d['frequency_penalty'], metavar='N', help='Frequency penalty')\ngroup.add_argument('--presence-penalty', type=float, default=_d['presence_penalty'], metavar='N', help='Presence penalty')\ngroup.add_argument('--encoder-repetition-penalty', type=float, default=_d['encoder_repetition_penalty'], metavar='N', help='Encoder repetition penalty')\ngroup.add_argument('--no-repeat-ngram-size', type=int, default=_d['no_repeat_ngram_size'], metavar='N', help='No repeat ngram size')\ngroup.add_argument('--repetition-penalty-range', type=int, default=_d['repetition_penalty_range'], metavar='N', help='Repetition penalty range')\ngroup.add_argument('--penalty-alpha', type=float, default=_d['penalty_alpha'], metavar='N', help='Penalty alpha')\ngroup.add_argument('--guidance-scale', type=float, default=_d['guidance_scale'], metavar='N', help='Guidance scale')\ngroup.add_argument('--mirostat-mode', type=int, default=_d['mirostat_mode'], metavar='N', help='Mirostat mode')\ngroup.add_argument('--mirostat-tau', type=float, default=_d['mirostat_tau'], metavar='N', help='Mirostat tau')\ngroup.add_argument('--mirostat-eta', type=float, default=_d['mirostat_eta'], metavar='N', help='Mirostat eta')\ngroup.add_argument('--do-sample', action=argparse.BooleanOptionalAction, default=_d['do_sample'], help='Do sample')\ngroup.add_argument('--dynamic-temperature', action=argparse.BooleanOptionalAction, default=_d['dynamic_temperature'], help='Dynamic temperature')\ngroup.add_argument('--temperature-last', action=argparse.BooleanOptionalAction, default=_d['temperature_last'], help='Temperature last')\ngroup.add_argument('--sampler-priority', type=str, default=_d['sampler_priority'], metavar='N', help='Sampler priority')\ngroup.add_argument('--dry-sequence-breakers', type=str, default=_d['dry_sequence_breakers'], metavar='N', help='DRY sequence breakers')\ngroup.add_argument('--enable-thinking', action=argparse.BooleanOptionalAction, default=True, help='Enable thinking')\ngroup.add_argument('--reasoning-effort', type=str, default='medium', metavar='N', help='Reasoning effort')\ngroup.add_argument('--chat-template-file', type=str, default=None, help='Path to a chat template file (.jinja, .jinja2, or .yaml) to use as the default instruction template for API requests. Overrides the model\\'s built-in template.')\n\n# Handle CMD_FLAGS.txt\ncmd_flags_path = user_data_dir / \"CMD_FLAGS.txt\"\nif cmd_flags_path.exists():\n    with cmd_flags_path.open('r', encoding='utf-8') as f:\n        cmd_flags = ' '.join(\n            line.strip().rstrip('\\\\').strip()\n            for line in f\n            if line.strip().rstrip('\\\\').strip() and not line.strip().startswith('#')\n        )\n\n    if cmd_flags:\n        # Command-line takes precedence over CMD_FLAGS.txt\n        sys.argv = [sys.argv[0]] + shlex.split(cmd_flags) + sys.argv[1:]\n\n\nargs = parser.parse_args()\nuser_data_dir = Path(args.user_data_dir)  # Update from parsed args (may differ from pre-parse)\noriginal_args = copy.deepcopy(args)\nargs_defaults = parser.parse_args([])\n\n# Create a mapping of all argument aliases to their canonical names\nalias_to_dest = {}\nfor action in parser._actions:\n    for opt in action.option_strings:\n        alias_to_dest[opt.lstrip('-').replace('-', '_')] = action.dest\n\nprovided_arguments = []\nfor arg in sys.argv[1:]:\n    arg = arg.lstrip('-').replace('-', '_')\n    if arg in alias_to_dest:\n        provided_arguments.append(alias_to_dest[arg])\n    elif hasattr(args, arg):\n        provided_arguments.append(arg)\n\n# Default generation parameters\nneutral_samplers = default_preset()\n\n# UI defaults\nsettings = {\n    'show_controls': True,\n    'start_with': '',\n    'mode': 'instruct',\n    'chat_style': 'cai-chat',\n    'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character \"<|character|>\". Reply directly, without starting the reply with the character name.\\n\\n<|prompt|>',\n    'enable_web_search': False,\n    'web_search_pages': 3,\n    'selected_tools': [],\n    'prompt-notebook': '',\n    'preset': 'Top-P' if (user_data_dir / 'presets/Top-P.yaml').exists() else None,\n    'max_new_tokens': 512,\n    'max_new_tokens_min': 1,\n    'max_new_tokens_max': 4096,\n    'prompt_lookup_num_tokens': 0,\n    'max_tokens_second': 0,\n    'auto_max_new_tokens': True,\n    'ban_eos_token': False,\n    'add_bos_token': True,\n    'enable_thinking': True,\n    'reasoning_effort': 'medium',\n    'skip_special_tokens': True,\n    'stream': True,\n    'static_cache': False,\n    'truncation_length': 8192,\n    'seed': -1,\n    'custom_stopping_strings': '',\n    'custom_token_bans': '',\n    'negative_prompt': '',\n    'dark_theme': True,\n    'show_two_notebook_columns': False,\n    'paste_to_attachment': False,\n    'include_past_attachments': True,\n\n    # Generation parameters - Curve shape\n    'temperature': neutral_samplers['temperature'],\n    'dynatemp_low': neutral_samplers['dynatemp_low'],\n    'dynatemp_high': neutral_samplers['dynatemp_high'],\n    'dynatemp_exponent': neutral_samplers['dynatemp_exponent'],\n    'smoothing_factor': neutral_samplers['smoothing_factor'],\n    'smoothing_curve': neutral_samplers['smoothing_curve'],\n\n    # Generation parameters - Curve cutoff\n    'top_p': 0.95,\n    'top_k': neutral_samplers['top_k'],\n    'min_p': neutral_samplers['min_p'],\n    'top_n_sigma': neutral_samplers['top_n_sigma'],\n    'typical_p': neutral_samplers['typical_p'],\n    'xtc_threshold': neutral_samplers['xtc_threshold'],\n    'xtc_probability': neutral_samplers['xtc_probability'],\n    'epsilon_cutoff': neutral_samplers['epsilon_cutoff'],\n    'eta_cutoff': neutral_samplers['eta_cutoff'],\n    'tfs': neutral_samplers['tfs'],\n    'top_a': neutral_samplers['top_a'],\n    'adaptive_target': neutral_samplers['adaptive_target'],\n    'adaptive_decay': neutral_samplers['adaptive_decay'],\n\n    # Generation parameters - Repetition suppression\n    'dry_multiplier': neutral_samplers['dry_multiplier'],\n    'dry_allowed_length': neutral_samplers['dry_allowed_length'],\n    'dry_base': neutral_samplers['dry_base'],\n    'repetition_penalty': neutral_samplers['repetition_penalty'],\n    'frequency_penalty': neutral_samplers['frequency_penalty'],\n    'presence_penalty': neutral_samplers['presence_penalty'],\n    'encoder_repetition_penalty': neutral_samplers['encoder_repetition_penalty'],\n    'no_repeat_ngram_size': neutral_samplers['no_repeat_ngram_size'],\n    'repetition_penalty_range': neutral_samplers['repetition_penalty_range'],\n\n    # Generation parameters - Alternative sampling methods\n    'penalty_alpha': neutral_samplers['penalty_alpha'],\n    'guidance_scale': neutral_samplers['guidance_scale'],\n    'mirostat_mode': neutral_samplers['mirostat_mode'],\n    'mirostat_tau': neutral_samplers['mirostat_tau'],\n    'mirostat_eta': neutral_samplers['mirostat_eta'],\n\n    # Generation parameters - Other options\n    'do_sample': neutral_samplers['do_sample'],\n    'dynamic_temperature': neutral_samplers['dynamic_temperature'],\n    'temperature_last': neutral_samplers['temperature_last'],\n    'sampler_priority': neutral_samplers['sampler_priority'],\n    'dry_sequence_breakers': neutral_samplers['dry_sequence_breakers'],\n    'grammar_string': '',\n\n    # Character settings\n    'character': 'Assistant',\n    'user': 'Default',\n    'name1': 'You',\n    'name2': 'AI',\n    'user_bio': '',\n    'context': 'The following is a conversation with an AI Large Language Model. The AI has been trained to answer questions, provide recommendations, and help with decision making. The AI follows user requests. The AI thinks outside the box.',\n    'greeting': 'How can I help you today?',\n    'custom_system_message': '',\n    'instruction_template_str': \"{%- set ns = namespace(found=false) -%}\\n{%- for message in messages -%}\\n    {%- if message['role'] == 'system' -%}\\n        {%- set ns.found = true -%}\\n    {%- endif -%}\\n{%- endfor -%}\\n{%- if not ns.found -%}\\n    {{- '' + 'Below is an instruction that describes a task. Write a response that appropriately completes the request.' + '\\\\n\\\\n' -}}\\n{%- endif %}\\n{%- for message in messages %}\\n    {%- if message['role'] == 'system' -%}\\n        {{- '' + message['content'] + '\\\\n\\\\n' -}}\\n    {%- else -%}\\n        {%- if message['role'] == 'user' -%}\\n            {{-'### Instruction:\\\\n' + message['content'] + '\\\\n\\\\n'-}}\\n        {%- else -%}\\n            {{-'### Response:\\\\n' + message['content'] + '\\\\n\\\\n' -}}\\n        {%- endif -%}\\n    {%- endif -%}\\n{%- endfor -%}\\n{%- if add_generation_prompt -%}\\n    {{-'### Response:\\\\n'-}}\\n{%- endif -%}\",\n    'chat_template_str': \"{%- for message in messages %}\\n    {%- if message['role'] == 'system' -%}\\n        {%- if message['content'] -%}\\n            {{- message['content'] + '\\\\n\\\\n' -}}\\n        {%- endif -%}\\n        {%- if user_bio -%}\\n            {{- user_bio + '\\\\n\\\\n' -}}\\n        {%- endif -%}\\n    {%- elif message['role'] == 'tool' -%}\\n        {{- '[Tool result: ' + message['content'] + ']\\\\n' -}}\\n    {%- elif message['role'] == 'user' -%}\\n        {{- name1 + ': ' + message['content'] + '\\\\n'-}}\\n    {%- elif message['tool_calls'] is defined and message['tool_calls'] -%}\\n        {%- for tc in message['tool_calls'] -%}\\n            {{- '[Calling: ' + tc['function']['name'] + '(' + tc['function']['arguments'] + ')]\\\\n' -}}\\n        {%- endfor -%}\\n    {%- else -%}\\n        {{- name2 + ': ' + message['content'] + '\\\\n' -}}\\n    {%- endif -%}\\n{%- endfor -%}\\n{%- if add_generation_prompt %}\\n    {{- name2 + ':' -}}\\n{%- endif %}\",\n\n    # Extensions\n    'default_extensions': [],\n\n    # Image generation settings\n    'image_prompt': '',\n    'image_neg_prompt': '',\n    'image_width': 1024,\n    'image_height': 1024,\n    'image_aspect_ratio': '1:1 Square',\n    'image_steps': 9,\n    'image_cfg_scale': 0.0,\n    'image_seed': -1,\n    'image_batch_size': 1,\n    'image_batch_count': 1,\n    'image_llm_variations': False,\n    'image_llm_variations_prompt': 'Write a variation of the image generation prompt above. Consider the intent of the user with that prompt and write something that will likely please them, with added details. Output only the new prompt. Do not add any explanations, prefixes, or additional text.',\n    'image_model_menu': 'None',\n    'image_dtype': 'bfloat16',\n    'image_attn_backend': 'flash_attention_2',\n    'image_cpu_offload': False,\n    'image_compile': False,\n    'image_quant': 'none',\n}\n\ndefault_settings = copy.deepcopy(settings)\n\n\ndef do_cmd_flags_warnings():\n    # Validate --chat-template-file\n    if args.chat_template_file and not Path(args.chat_template_file).is_file():\n        logger.error(f\"--chat-template-file: file not found: {args.chat_template_file}\")\n        sys.exit(1)\n\n    # Security warnings\n    if args.trust_remote_code:\n        logger.warning(\n            \"The `--trust-remote-code` flag is enabled.\\n\"\n            \"This allows models to execute arbitrary code on your machine.\\n\\n\"\n            \"1. Only use with models from sources you fully trust.\\n\"\n            \"2. Set an access password with `--gradio-auth`.\"\n        )\n\n    if 'COLAB_GPU' not in os.environ and not args.nowebui:\n        if args.share:\n            logger.warning(\"The gradio \\\"share link\\\" feature uses a proprietary executable to create a reverse tunnel. Use it with care.\")\n        if any((args.listen, args.share)) and not any((args.gradio_auth, args.gradio_auth_path)):\n            logger.warning(\"You are potentially exposing the web UI to the entire internet without any access password.\\nYou can create one with the \\\"--gradio-auth\\\" flag like this:\\n\\n--gradio-auth username:password\\n\\nMake sure to replace username:password with your own.\")\n    if args.multi_user:\n        logger.warning(\n            'Multi-user mode is enabled. Known limitations:'\n            '\\n- The Stop button stops generation for all users, not just you.'\n            '\\n- Chat history is not saved and will be lost on page refresh.'\n            '\\n- Only one user can generate at a time unless using a parallel-capable backend (e.g. llama.cpp with --parallel N for N > 1, or ExLlamaV3).'\n            '\\n\\nThis mode works best for small trusted teams.'\n            '\\n\\nDo not expose publicly. Grayed-out actions can easily be bypassed client-side.\\n'\n        )\n\n\ndef apply_image_model_cli_overrides():\n    \"\"\"Apply command-line overrides for image model settings.\"\"\"\n    if args.image_model is not None:\n        settings['image_model_menu'] = args.image_model\n    if args.image_dtype is not None:\n        settings['image_dtype'] = args.image_dtype\n    if args.image_attn_backend is not None:\n        settings['image_attn_backend'] = args.image_attn_backend\n    if args.image_cpu_offload:\n        settings['image_cpu_offload'] = True\n    if args.image_compile:\n        settings['image_compile'] = True\n    if args.image_quant is not None:\n        settings['image_quant'] = args.image_quant\n\n\ndef fix_loader_name(name):\n    if not name:\n        return name\n\n    name = name.lower()\n    if name in ['llama.cpp', 'llamacpp', 'llama-cpp', 'llama cpp']:\n        return 'llama.cpp'\n    elif name in ['transformers', 'huggingface', 'hf', 'hugging_face', 'hugging face']:\n        return 'Transformers'\n    elif name in ['exllamav3-hf', 'exllamav3_hf', 'exllama-v3-hf', 'exllama_v3_hf', 'exllama-v3_hf', 'exllama3-hf', 'exllama3_hf', 'exllama-3-hf', 'exllama_3_hf', 'exllama-3_hf']:\n        return 'ExLlamav3_HF'\n    elif name in ['exllamav3']:\n        return 'ExLlamav3'\n    elif name in ['tensorrt', 'tensorrtllm', 'tensorrt_llm', 'tensorrt-llm', 'tensort', 'tensortllm']:\n        return 'TensorRT-LLM'\n\n\ndef add_extension(name, last=False):\n    if args.extensions is None:\n        args.extensions = [name]\n    elif last:\n        args.extensions = [x for x in args.extensions if x != name]\n        args.extensions.append(name)\n    elif name not in args.extensions:\n        args.extensions.append(name)\n\n\ndef is_chat():\n    return True\n\n\ndef load_user_config():\n    '''\n    Loads custom model-specific settings\n    '''\n    if Path(f'{args.model_dir}/config-user.yaml').exists():\n        file_content = open(f'{args.model_dir}/config-user.yaml', 'r').read().strip()\n\n        if file_content:\n            user_config = yaml.safe_load(file_content)\n        else:\n            user_config = {}\n    else:\n        user_config = {}\n\n    return user_config\n\n\nargs.loader = fix_loader_name(args.loader)\n\n# Activate the API extension\nif args.api or args.public_api:\n    add_extension('openai', last=True)\n\n# Load model-specific settings\np = Path(f'{args.model_dir}/config.yaml')\nif p.exists():\n    model_config = yaml.safe_load(open(p, 'r').read())\nelse:\n    model_config = {}\ndel p\n\n\n# Load custom model-specific settings\nuser_config = load_user_config()\n\nmodel_config = OrderedDict(model_config)\nuser_config = OrderedDict(user_config)\n"
  },
  {
    "path": "modules/tensorrt_llm.py",
    "content": "from pathlib import Path\n\nfrom tensorrt_llm._tensorrt_engine import LLM\nfrom tensorrt_llm.llmapi import SamplingParams\n\nfrom modules import shared\nfrom modules.logging_colors import logger\n\n\nclass TensorRTLLMModel:\n    def __init__(self):\n        pass\n\n    @classmethod\n    def from_pretrained(cls, path_to_model):\n        path_to_model = Path(f'{shared.args.model_dir}') / Path(path_to_model)\n\n        llm = LLM(\n            model=str(path_to_model),\n            skip_tokenizer_init=False,\n        )\n\n        result = cls()\n        result.llm = llm\n        result.tokenizer = llm.tokenizer\n        return result\n\n    def generate_with_streaming(self, prompt, state):\n        sampling_params = SamplingParams(\n            max_tokens=state['max_new_tokens'] if not state['auto_max_new_tokens']\n                       else state['truncation_length'] - len(shared.tokenizer.encode(prompt)),\n            end_id=shared.tokenizer.eos_token_id,\n            temperature=state['temperature'],\n            top_k=state['top_k'],\n            top_p=state['top_p'],\n            min_p=state['min_p'],\n            repetition_penalty=state['repetition_penalty'],\n            presence_penalty=state['presence_penalty'],\n            frequency_penalty=state['frequency_penalty'],\n            no_repeat_ngram_size=state['no_repeat_ngram_size'] if state['no_repeat_ngram_size'] > 0 else None,\n            seed=state['seed'],\n            ignore_eos=state['ban_eos_token'],\n            add_special_tokens=state['add_bos_token'],\n            skip_special_tokens=state['skip_special_tokens'],\n        )\n\n        stop_event = state.get('stop_event')\n        result = self.llm.generate_async(prompt, sampling_params=sampling_params, streaming=True)\n\n        cumulative_reply = ''\n        for output in result:\n            if shared.stop_everything or (stop_event and stop_event.is_set()):\n                result.abort()\n                break\n\n            text_diff = output.outputs[0].text_diff\n            if text_diff:\n                cumulative_reply += text_diff\n                yield cumulative_reply\n\n    def generate(self, prompt, state):\n        output = ''\n        for output in self.generate_with_streaming(prompt, state):\n            pass\n\n        return output\n\n    def unload(self):\n        if hasattr(self, 'llm') and self.llm is not None:\n            self.llm.shutdown()\n            self.llm = None\n"
  },
  {
    "path": "modules/text_generation.py",
    "content": "import ast\nimport copy\nimport html\nimport pprint\nimport random\nimport time\nimport traceback\n\nimport numpy as np\n\nimport modules.shared as shared\nfrom modules import models\nfrom modules.callbacks import Iteratorize\nfrom modules.extensions import apply_extensions\nfrom modules.html_generator import generate_basic_html\nfrom modules.logging_colors import logger\nfrom modules.utils import check_model_loaded\n\n\ndef generate_reply(*args, **kwargs):\n    if shared.args.idle_timeout > 0 and shared.model is None and shared.model_name not in [None, 'None']:\n        from modules.models import load_model\n        shared.model, shared.tokenizer = load_model(shared.model_name)\n\n    state = args[1] if len(args) > 1 else kwargs.get('state', {})\n    use_parallel = (\n        state.get('stop_event') is not None\n        and shared.model.__class__.__name__ in ['Exllamav3Model', 'LlamaServer', 'TensorRTLLMModel']\n        and (shared.model.__class__.__name__ != 'LlamaServer' or shared.args.parallel > 1)\n    )\n\n    if not use_parallel:\n        shared.generation_lock.acquire()\n\n    try:\n        for result in _generate_reply(*args, **kwargs):\n            yield result\n    finally:\n        models.last_generation_time = time.time()\n        if not use_parallel:\n            shared.generation_lock.release()\n\n\ndef _generate_reply(question, state, stopping_strings=None, is_chat=False, escape_html=False, for_ui=False):\n    # Find the appropriate generation function\n    generate_func = apply_extensions('custom_generate_reply')\n    if generate_func is None:\n        model_is_loaded, error_message = check_model_loaded()\n        if not model_is_loaded:\n            yield ''\n            return\n\n        if shared.model.__class__.__name__ in ['LlamaServer', 'Exllamav3Model', 'TensorRTLLMModel']:\n            generate_func = generate_reply_custom\n        else:\n            generate_func = generate_reply_HF\n\n    if generate_func != generate_reply_HF and shared.args.verbose:\n        logger.info(\"PROMPT=\")\n        print_prompt(question)\n\n    # Prepare the input\n    original_question = question\n    if not is_chat:\n        state = apply_extensions('state', state)\n        question = apply_extensions('input', question, state)\n\n    # Find the stopping strings\n    all_stop_strings = []\n    for st in (stopping_strings, state['custom_stopping_strings']):\n        if type(st) is str:\n            st = ast.literal_eval(f\"[{st}]\")\n\n        if type(st) is list and len(st) > 0:\n            all_stop_strings += st\n\n    shared.stop_everything = False\n    reply = ''\n    is_stream = state['stream']\n    if len(all_stop_strings) > 0 and not state['stream']:\n        original_logits_processor = state.get('logits_processor')\n        stop_event_ref = state.pop('stop_event', None)\n        state = copy.deepcopy(state)\n        if stop_event_ref is not None:\n            state['stop_event'] = stop_event_ref\n        if original_logits_processor is not None:\n            state['logits_processor'] = original_logits_processor\n        state['stream'] = True\n\n    # Generate\n    last_update = -1\n    latency_threshold = 1 / 1000\n    for reply in generate_func(question, original_question, state, stopping_strings, is_chat=is_chat):\n        cur_time = time.monotonic()\n        reply, stop_found = apply_stopping_strings(reply, all_stop_strings)\n        if escape_html:\n            reply = html.escape(reply)\n\n        if is_stream:\n            # Limit number of tokens/second to make text readable in real time\n            if state['max_tokens_second'] > 0:\n                diff = 1 / state['max_tokens_second'] - (cur_time - last_update)\n                if diff > 0:\n                    time.sleep(diff)\n\n                last_update = time.monotonic()\n                yield reply\n\n            # Limit updates to avoid lag in the Gradio UI\n            # API updates are not limited\n            else:\n                # If 'generate_func' takes less than 0.001 seconds to yield the next token\n                # (equivalent to more than 1000 tok/s), assume that the UI is lagging behind and skip yielding\n                if (cur_time - last_update) > latency_threshold:\n                    yield reply\n                last_update = time.monotonic()\n\n        stop_event = state.get('stop_event')\n        if stop_found or shared.stop_everything or (stop_event and stop_event.is_set()):\n            break\n\n    if not is_chat:\n        reply = apply_extensions('output', reply, state)\n\n    yield reply\n\n\ndef encode(prompt, add_special_tokens=True, add_bos_token=True, truncation_length=None):\n    if shared.tokenizer is None:\n        raise ValueError('No tokenizer is loaded')\n\n    # llama.cpp case\n    if shared.model.__class__.__name__ == 'LlamaServer':\n        input_ids = shared.tokenizer.encode(str(prompt), add_bos_token=add_bos_token)\n        input_ids = np.array(input_ids).reshape(1, len(input_ids))\n\n        if truncation_length is not None:\n            input_ids = input_ids[:, -truncation_length:]\n\n        return input_ids\n\n    # All other model types\n    else:\n        import torch\n\n        from modules.torch_utils import get_device\n\n        if shared.model.__class__.__name__ in ['Exllamav3Model', 'TensorRTLLMModel']:\n            input_ids = shared.tokenizer.encode(str(prompt))\n            if shared.model.__class__.__name__ not in ['Exllamav3Model']:\n                input_ids = np.array(input_ids).reshape(1, len(input_ids))\n        else:\n            input_ids = shared.tokenizer.encode(str(prompt), return_tensors='pt', add_special_tokens=add_special_tokens)\n            if hasattr(shared.tokenizer, 'bos_token_id') and shared.tokenizer.bos_token_id is not None:\n                if add_bos_token:\n                    # Add BOS token if missing\n                    if (len(input_ids[0]) > 0 and input_ids[0][0] != shared.tokenizer.bos_token_id) or len(input_ids[0]) == 0:\n                        bos_tensor = torch.tensor([[shared.tokenizer.bos_token_id]])\n                        input_ids = torch.cat((bos_tensor, input_ids), 1)\n\n                # Always prevent double BOS tokens (regardless of add_bos_token setting)\n                while len(input_ids[0]) > 1 and input_ids[0][0] == shared.tokenizer.bos_token_id and input_ids[0][1] == shared.tokenizer.bos_token_id:\n                    input_ids = input_ids[:, 1:]\n\n        if truncation_length is not None:\n            input_ids = input_ids[:, -truncation_length:]\n\n        if shared.model.__class__.__name__ in ['Exllamav3Model', 'TensorRTLLMModel'] or shared.args.cpu:\n            return input_ids\n        else:\n            device = get_device()\n            if device:\n                return input_ids.to(device)\n\n            return input_ids\n\n\ndef decode(output_ids, skip_special_tokens=True):\n    if shared.tokenizer is None:\n        raise ValueError('No tokenizer is loaded')\n\n    return shared.tokenizer.decode(output_ids, skip_special_tokens=skip_special_tokens)\n\n\ndef get_encoded_length(prompt):\n    length_after_extensions = apply_extensions('tokenized_length', prompt)\n    if length_after_extensions is not None:\n        return length_after_extensions\n\n    return len(encode(prompt)[0])\n\n\ndef get_token_ids(prompt):\n    tokens = encode(prompt)[0]\n    decoded_tokens = [shared.tokenizer.decode([int(i)]) for i in tokens]\n\n    output = ''\n    for row in list(zip(tokens, decoded_tokens)):\n        output += f\"{str(int(row[0])).ljust(5)}  -  {repr(row[1])}\\n\"\n\n    return output\n\n\ndef get_max_prompt_length(state):\n    return state['truncation_length'] - state['max_new_tokens']\n\n\ndef generate_reply_wrapper(question, state, stopping_strings=None):\n    \"\"\"\n    Returns formatted outputs for the UI\n    \"\"\"\n    reply = question if not shared.is_seq2seq else ''\n    yield formatted_outputs(reply, shared.model_name)\n\n    for reply in generate_reply(question, state, stopping_strings, is_chat=False, escape_html=True, for_ui=True):\n        if not shared.is_seq2seq:\n            reply = question + reply\n\n        yield formatted_outputs(reply, shared.model_name)\n\n\ndef formatted_outputs(reply, model_name):\n    return html.unescape(reply), generate_basic_html(reply)\n\n\ndef set_manual_seed(seed):\n    seed = int(seed)\n    if seed == -1:\n        seed = random.randint(1, 2**31)\n\n    if shared.args.loader != 'llama.cpp':\n        import torch\n        from transformers import is_torch_npu_available, is_torch_xpu_available\n\n        torch.manual_seed(seed)\n        if torch.cuda.is_available():\n            torch.cuda.manual_seed_all(seed)\n        elif is_torch_xpu_available():\n            torch.xpu.manual_seed_all(seed)\n        elif is_torch_npu_available():\n            torch.npu.manual_seed_all(seed)\n\n    return seed\n\n\ndef stop_everything_event():\n    shared.stop_everything = True\n\n\ndef apply_stopping_strings(reply, all_stop_strings):\n    stop_found = False\n    for string in all_stop_strings:\n        idx = reply.find(string)\n        if idx != -1:\n            reply = reply[:idx]\n            stop_found = True\n            break\n\n    if not stop_found:\n        # If something like \"\\nYo\" is generated just before \"\\nYou:\"\n        # is completed, trim it\n        for string in all_stop_strings:\n            for j in range(len(string) - 1, 0, -1):\n                if reply[-j:] == string[:j]:\n                    reply = reply[:-j]\n                    break\n            else:\n                continue\n\n            break\n\n    return reply, stop_found\n\n\ndef get_reply_from_output_ids(output_ids, state=None, starting_from=0):\n    import torch\n\n    if torch.cuda.is_available():\n        torch.cuda.synchronize()\n\n    reply = decode(output_ids[starting_from:], state['skip_special_tokens'] if state else True)\n\n    # Handle tokenizers that do not add the leading space for the first token\n    if (hasattr(shared.tokenizer, 'convert_ids_to_tokens') and len(output_ids) > starting_from) and not reply.startswith(' '):\n        first_token = shared.tokenizer.convert_ids_to_tokens(int(output_ids[starting_from]))\n        if isinstance(first_token, (bytes,)):\n            # try to decode the bytes to a string\n            # if it fails, which means it's not a string in this turn, just ignore it\n            try:\n                first_token = first_token.decode('utf8')\n            except UnicodeDecodeError:\n                first_token = ''\n\n        if first_token.startswith('▁'):\n            reply = ' ' + reply\n\n    return reply\n\n\ndef generate_reply_HF(question, original_question, state, stopping_strings=None, is_chat=False):\n    import torch\n    import transformers\n    from transformers import LogitsProcessorList\n\n    from modules.grammar.grammar_utils import initialize_grammar\n    from modules.grammar.logits_process import (\n        GrammarConstrainedLogitsProcessor\n    )\n    from modules.torch_utils import clear_torch_cache, get_device\n    from modules.transformers_loader import (\n        Stream,\n        _StopEverythingStoppingCriteria\n    )\n\n    if shared.args.loader == 'Transformers':\n        clear_torch_cache()\n\n    seed = set_manual_seed(state['seed'])\n\n    generate_params = {}\n    for k in [\n        'temperature',\n        'dynatemp_low',\n        'dynatemp_high',\n        'dynatemp_exponent',\n        'smoothing_factor',\n        'smoothing_curve',\n        'min_p',\n        'top_p',\n        'top_k',\n        'typical_p',\n        'xtc_threshold',\n        'xtc_probability',\n        'tfs',\n        'top_a',\n        'top_n_sigma',\n        'adaptive_target',\n        'adaptive_decay',\n        'dry_multiplier',\n        'dry_allowed_length',\n        'dry_base',\n        'repetition_penalty',\n        'frequency_penalty',\n        'presence_penalty',\n        'encoder_repetition_penalty',\n        'no_repeat_ngram_size',\n        'repetition_penalty_range',\n        'penalty_alpha',\n        'guidance_scale',\n        'mirostat_mode',\n        'mirostat_tau',\n        'mirostat_eta',\n        'max_new_tokens',\n        'do_sample',\n        'dynamic_temperature',\n        'temperature_last',\n        'dry_sequence_breakers',\n    ]:\n        if k in state:\n            generate_params[k] = state[k]\n\n    for k in ['epsilon_cutoff', 'eta_cutoff']:\n        if state[k] > 0:\n            generate_params[k] = state[k] * 1e-4\n\n    if state['prompt_lookup_num_tokens'] > 0:\n        generate_params['prompt_lookup_num_tokens'] = state['prompt_lookup_num_tokens']\n\n    if state['ban_eos_token']:\n        generate_params['suppress_tokens'] = [shared.tokenizer.eos_token_id]\n\n    if state['static_cache']:\n        generate_params['cache_implementation'] = 'static'\n\n    if isinstance(state['sampler_priority'], list) and len(state['sampler_priority']) > 0:\n        generate_params['sampler_priority'] = state['sampler_priority']\n    elif isinstance(state['sampler_priority'], str) and state['sampler_priority'].strip() != '':\n        generate_params['sampler_priority'] = [x.strip() for x in state['sampler_priority'].replace('\\n', ',').split(',') if x.strip()]\n\n    if state['custom_token_bans']:\n        to_ban = [int(x.strip()) for x in state['custom_token_bans'].split(',') if x.strip()]\n        if len(to_ban) > 0:\n            if generate_params.get('suppress_tokens', None):\n                generate_params['suppress_tokens'] += to_ban\n            else:\n                generate_params['suppress_tokens'] = to_ban\n\n    if state['negative_prompt'] != '':\n        generate_params['negative_prompt_ids'] = encode(state['negative_prompt'])\n\n    generate_params.update({'use_cache': not shared.args.no_cache})\n\n    # Encode the input\n    input_ids = encode(question, add_bos_token=state['add_bos_token'], truncation_length=get_max_prompt_length(state))\n    output = input_ids[0]\n    if state['auto_max_new_tokens']:\n        generate_params['max_new_tokens'] = state['truncation_length'] - input_ids.shape[-1]\n\n    # Add the encoded tokens to generate_params\n    question, input_ids, inputs_embeds = apply_extensions('tokenizer', state, question, input_ids, None)\n    original_input_ids = input_ids\n    generate_params.update({'inputs': input_ids})\n    if inputs_embeds is not None:\n        generate_params.update({'inputs_embeds': inputs_embeds})\n\n    # Stopping criteria / eos token\n    eos_token_ids = [shared.tokenizer.eos_token_id] if shared.tokenizer.eos_token_id is not None else []\n    generate_params['eos_token_id'] = eos_token_ids\n    generate_params['stopping_criteria'] = transformers.StoppingCriteriaList()\n    generate_params['stopping_criteria'].append(_StopEverythingStoppingCriteria())\n\n    # Logits processor\n    processor = state.get('logits_processor', LogitsProcessorList([]))\n    if not isinstance(processor, LogitsProcessorList):\n        processor = LogitsProcessorList([processor])\n\n    # Grammar\n    if state['grammar_string'].strip() != '':\n        grammar = initialize_grammar(state['grammar_string'])\n        grammar_processor = GrammarConstrainedLogitsProcessor(grammar)\n        processor.append(grammar_processor)\n\n    apply_extensions('logits_processor', processor, input_ids)\n    generate_params['logits_processor'] = processor\n\n    if shared.args.verbose:\n        logger.info(\"GENERATE_PARAMS=\")\n        filtered_params = {key: value for key, value in generate_params.items() if not isinstance(value, torch.Tensor)}\n        pprint.PrettyPrinter(indent=4, sort_dicts=False).pprint(filtered_params)\n        print()\n\n        logger.info(\"PROMPT=\")\n        print_prompt(decode(input_ids[0], skip_special_tokens=False))\n\n    t0 = time.time()\n    try:\n        if not is_chat and not shared.is_seq2seq:\n            yield ''\n\n        # Generate the entire reply at once.\n        if not state['stream']:\n            with torch.no_grad():\n                output = shared.model.generate(**generate_params)[0]\n                device = get_device()\n                if device:\n                    output = output.to(device)\n\n            starting_from = 0 if shared.is_seq2seq else len(input_ids[0])\n            yield get_reply_from_output_ids(output, state, starting_from=starting_from)\n\n        # Stream the reply 1 token at a time.\n        # This is based on the trick of using 'stopping_criteria' to create an iterator.\n        else:\n\n            def generate_with_callback(callback=None, *args, **kwargs):\n                kwargs['stopping_criteria'].append(Stream(callback_func=callback))\n                with torch.no_grad():\n                    shared.model.generate(**kwargs)\n\n            def generate_with_streaming(**kwargs):\n                return Iteratorize(generate_with_callback, [], kwargs, callback=None)\n\n            with generate_with_streaming(**generate_params) as generator:\n                cumulative_reply = ''\n                starting_from = 0 if shared.is_seq2seq else len(input_ids[0])\n                for output in generator:\n                    if output[-1] in eos_token_ids:\n                        break\n\n                    new_content = get_reply_from_output_ids(output, state, starting_from=starting_from)\n                    # check the partial unicode character\n                    if chr(0xfffd) in new_content:\n                        continue\n\n                    cumulative_reply += new_content\n                    starting_from = len(output)\n                    yield cumulative_reply\n\n    except Exception:\n        traceback.print_exc()\n    finally:\n        t1 = time.time()\n        original_tokens = len(original_input_ids[0])\n        new_tokens = len(output) - (original_tokens if not shared.is_seq2seq else 0)\n        logger.info(f'Output generated in {(t1-t0):.2f} seconds ({new_tokens/(t1-t0):.2f} tokens/s, {new_tokens} tokens, context {original_tokens}, seed {seed})')\n        return\n\n\ndef generate_reply_custom(question, original_question, state, stopping_strings=None, is_chat=False):\n    \"\"\"\n    For models that do not use the transformers library for sampling\n    \"\"\"\n\n    stop_event_ref = state.pop('stop_event', None)\n    state = copy.deepcopy(state)\n    if stop_event_ref is not None:\n        state['stop_event'] = stop_event_ref\n    state['seed'] = set_manual_seed(state['seed'])\n    t0 = time.time()\n    reply = ''\n    try:\n        if not is_chat:\n            yield ''\n\n        if not state['stream']:\n            reply = shared.model.generate(question, state)\n            yield reply\n        else:\n            for reply in shared.model.generate_with_streaming(question, state):\n                yield reply\n\n    except Exception:\n        traceback.print_exc()\n    finally:\n        t1 = time.time()\n\n        if hasattr(shared.model, 'last_prompt_token_count'):\n            original_tokens = shared.model.last_prompt_token_count\n            new_tokens = len(encode(reply)[0]) if reply else 0\n        else:\n            original_tokens = len(encode(original_question)[0])\n            new_tokens = len(encode(original_question + reply)[0]) - original_tokens\n\n        logger.info(f'Output generated in {(t1-t0):.2f} seconds ({new_tokens/(t1-t0):.2f} tokens/s, {new_tokens} tokens, context {original_tokens}, seed {state[\"seed\"]})')\n        return\n\n\ndef print_prompt(prompt, max_chars=-1):\n    DARK_YELLOW = \"\\033[38;5;3m\"\n    RESET = \"\\033[0m\"\n\n    if max_chars > 0 and len(prompt) > max_chars:\n        half_chars = max_chars // 2\n        hidden_len = len(prompt[half_chars:-half_chars])\n        hidden_msg = f\"{DARK_YELLOW}[...{hidden_len} characters hidden...]{RESET}\"\n        print(prompt[:half_chars] + hidden_msg + prompt[-half_chars:])\n    else:\n        print(prompt)\n\n    print()\n"
  },
  {
    "path": "modules/tool_parsing.py",
    "content": "import json\nimport random\nimport re\n\n\ndef get_tool_call_id() -> str:\n    letter_bytes = \"abcdefghijklmnopqrstuvwxyz0123456789\"\n    b = [random.choice(letter_bytes) for _ in range(8)]\n    return \"call_\" + \"\".join(b).lower()\n\n\n# All known opening markers for tool calls across model formats.\nTOOL_CALL_OPENING_MARKERS = [\n    '<tool_call>',\n    '<function_call>',\n    '<minimax:tool_call>',\n    '<|tool_call_begin|>',\n    '<|tool_calls_section_begin|>',\n    '<｜tool▁call▁begin｜>',\n    '<｜tool▁calls▁begin｜>',\n    '[TOOL_CALLS]',\n    'to=functions.',\n    '<|channel|>commentary',\n]\n\n\ndef streaming_tool_buffer_check(text, markers=None, tool_names=None, check_bare_names=False):\n    '''\n    Check whether streaming output should be withheld because it may\n    contain tool-call markup.\n\n    Args:\n        text: Full accumulated internal text.\n        markers: Template-specific markers for partial-prefix matching.\n                 If None, falls back to TOOL_CALL_OPENING_MARKERS.\n        tool_names: List of tool function names.\n        check_bare_names: Whether to do partial-prefix matching on tool\n                          names (for models with unknown template format).\n    '''\n    # Full marker found in text → buffer permanently.\n    # Always checks ALL known markers regardless of template (cheap safety net).\n    for marker in TOOL_CALL_OPENING_MARKERS:\n        if marker in text:\n            return True\n\n    # Bare function-name full match: \"get_weather{...}\" or \"get_weather {...}\"\n    if tool_names:\n        for name in tool_names:\n            if name + '{' in text or name + ' {' in text:\n                return True\n\n    # Partial-prefix matching: only for template-specific markers.\n    for marker in (markers if markers is not None else TOOL_CALL_OPENING_MARKERS):\n        for prefix_len in range(min(len(marker) - 1, len(text)), 0, -1):\n            if text.endswith(marker[:prefix_len]):\n                return True\n\n    # Bare-name partial matching: only when template format is unknown.\n    if check_bare_names and tool_names:\n        for name in tool_names:\n            if text.endswith(name):\n                return True\n            for prefix_len in range(min(len(name) - 1, len(text)), 0, -1):\n                if text.endswith(name[:prefix_len]):\n                    return True\n\n    return False\n\n\ndef check_and_sanitize_tool_call_candidate(candidate_dict: dict, tool_names: list[str]):\n    # check if property 'function' exists and is a dictionary, otherwise adapt dict\n    if 'function' not in candidate_dict and 'name' in candidate_dict and isinstance(candidate_dict['name'], str):\n        candidate_dict = {\"type\": \"function\", \"function\": candidate_dict}\n    if 'function' in candidate_dict and isinstance(candidate_dict['function'], str):\n        candidate_dict['name'] = candidate_dict['function']\n        del candidate_dict['function']\n        candidate_dict = {\"type\": \"function\", \"function\": candidate_dict}\n    if 'function' in candidate_dict and isinstance(candidate_dict['function'], dict):\n        # check if 'name' exists within 'function' and is part of known tools\n        if 'name' in candidate_dict['function'] and candidate_dict['function']['name'] in tool_names:\n            candidate_dict[\"type\"] = \"function\"  # ensure required property 'type' exists and has the right value\n            # map property 'parameters' used by some older models to 'arguments'\n            if \"arguments\" not in candidate_dict[\"function\"] and \"parameters\" in candidate_dict[\"function\"]:\n                candidate_dict[\"function\"][\"arguments\"] = candidate_dict[\"function\"][\"parameters\"]\n                del candidate_dict[\"function\"][\"parameters\"]\n            return candidate_dict\n    return None\n\n\ndef _extract_balanced_json(text: str, start: int) -> str | None:\n    \"\"\"Extract a balanced JSON object from text starting at the given position.\n\n    Walks through the string tracking brace depth and string boundaries\n    to correctly handle arbitrary nesting levels.\n    \"\"\"\n    if start >= len(text) or text[start] != '{':\n        return None\n    depth = 0\n    in_string = False\n    escape_next = False\n    for i in range(start, len(text)):\n        c = text[i]\n        if escape_next:\n            escape_next = False\n            continue\n        if c == '\\\\' and in_string:\n            escape_next = True\n            continue\n        if c == '\"':\n            in_string = not in_string\n            continue\n        if in_string:\n            continue\n        if c == '{':\n            depth += 1\n        elif c == '}':\n            depth -= 1\n            if depth == 0:\n                return text[start:i + 1]\n    return None\n\n\ndef _parse_channel_tool_calls(answer: str, tool_names: list[str]):\n    \"\"\"Parse channel-based tool calls used by GPT-OSS and similar models.\n\n    Format:\n        <|start|>assistant to=functions.func_name<|channel|>commentary json<|message|>{\"arg\": \"value\"}\n    or:\n        <|channel|>commentary to=functions.func_name <|constrain|>json<|message|>{\"arg\": \"value\"}\n    \"\"\"\n    matches = []\n    start_pos = None\n    # Pattern 1: to=functions.NAME before <|channel|> (GPT-OSS primary format)\n    # Pattern 2: to=functions.NAME after <|channel|> (alternative format)\n    patterns = [\n        r'to=functions\\.([^<\\s]+)\\s*<\\|channel\\|>[^<]*<\\|message\\|>',\n        r'<\\|channel\\|>\\w+ to=functions\\.([^<\\s]+).*?<\\|message\\|>',\n    ]\n    for pattern in patterns:\n        for m in re.finditer(pattern, answer):\n            func_name = m.group(1).strip()\n            if func_name not in tool_names:\n                continue\n            json_str = _extract_balanced_json(answer, m.end())\n            if json_str is None:\n                continue\n            try:\n                arguments = json.loads(json_str)\n                if start_pos is None:\n                    prefix = answer.rfind('<|start|>assistant', 0, m.start())\n                    start_pos = prefix if prefix != -1 else m.start()\n                matches.append({\n                    \"type\": \"function\",\n                    \"function\": {\n                        \"name\": func_name,\n                        \"arguments\": arguments\n                    }\n                })\n            except json.JSONDecodeError:\n                pass\n        if matches:\n            break\n    return matches, start_pos\n\n\ndef _parse_mistral_token_tool_calls(answer: str, tool_names: list[str]):\n    \"\"\"Parse Mistral/Devstral-style tool calls with [TOOL_CALLS] and [ARGS] special tokens.\n\n    Format:\n        [TOOL_CALLS]func_name[ARGS]{\"arg\": \"value\"}\n    \"\"\"\n    matches = []\n    start_pos = None\n    for m in re.finditer(\n        r'\\[TOOL_CALLS\\]\\s*(\\S+?)\\s*\\[ARGS\\]\\s*',\n        answer\n    ):\n        func_name = m.group(1).strip()\n        if func_name not in tool_names:\n            continue\n        json_str = _extract_balanced_json(answer, m.end())\n        if json_str is None:\n            continue\n        try:\n            arguments = json.loads(json_str)\n            if start_pos is None:\n                start_pos = m.start()\n            matches.append({\n                \"type\": \"function\",\n                \"function\": {\n                    \"name\": func_name,\n                    \"arguments\": arguments\n                }\n            })\n        except json.JSONDecodeError:\n            pass\n    return matches, start_pos\n\n\ndef _parse_bare_name_tool_calls(answer: str, tool_names: list[str]):\n    \"\"\"Parse bare function-name style tool calls used by Mistral and similar models.\n\n    Format:\n        functionName{\"arg\": \"value\"}\n    Multiple calls are concatenated directly or separated by whitespace.\n    \"\"\"\n    matches = []\n    start_pos = None\n    # Match tool name followed by opening brace, then extract balanced JSON\n    escaped_names = [re.escape(name) for name in tool_names]\n    pattern = r'(?:' + '|'.join(escaped_names) + r')\\s*\\{'\n    for match in re.finditer(pattern, answer):\n        text = match.group(0)\n        name = None\n        for n in tool_names:\n            if text.startswith(n):\n                name = n\n                break\n        if not name:\n            continue\n        brace_start = match.end() - 1\n        json_str = _extract_balanced_json(answer, brace_start)\n        if json_str is None:\n            continue\n        try:\n            arguments = json.loads(json_str)\n            if start_pos is None:\n                start_pos = match.start()\n            matches.append({\n                \"type\": \"function\",\n                \"function\": {\n                    \"name\": name,\n                    \"arguments\": arguments\n                }\n            })\n        except json.JSONDecodeError:\n            pass\n    return matches, start_pos\n\n\ndef _parse_xml_param_tool_calls(answer: str, tool_names: list[str]):\n    \"\"\"Parse XML-parameter style tool calls used by Qwen3.5 and similar models.\n\n    Format:\n        <tool_call>\n        <function=function_name>\n        <parameter=param_name>value</parameter>\n        </function>\n        </tool_call>\n    \"\"\"\n    matches = []\n    start_pos = None\n    for tc_match in re.finditer(r'<tool_call>\\s*(.*?)\\s*</tool_call>', answer, re.DOTALL):\n        tc_content = tc_match.group(1)\n        func_match = re.search(r'<function=([^>]+)>', tc_content)\n        if not func_match:\n            continue\n        func_name = func_match.group(1).strip()\n        if func_name not in tool_names:\n            continue\n        arguments = {}\n        for param_match in re.finditer(r'<parameter=([^>]+)>\\s*(.*?)\\s*</parameter>', tc_content, re.DOTALL):\n            param_name = param_match.group(1).strip()\n            param_value = param_match.group(2).strip()\n            try:\n                param_value = json.loads(param_value)\n            except (json.JSONDecodeError, ValueError):\n                pass  # keep as string\n            arguments[param_name] = param_value\n        if start_pos is None:\n            start_pos = tc_match.start()\n        matches.append({\n            \"type\": \"function\",\n            \"function\": {\n                \"name\": func_name,\n                \"arguments\": arguments\n            }\n        })\n    return matches, start_pos\n\n\ndef _parse_kimi_tool_calls(answer: str, tool_names: list[str]):\n    \"\"\"Parse Kimi-K2-style tool calls using pipe-delimited tokens.\n\n    Format:\n        <|tool_calls_section_begin|>\n        <|tool_call_begin|>functions.func_name:index<|tool_call_argument_begin|>{\"arg\": \"value\"}<|tool_call_end|>\n        <|tool_calls_section_end|>\n    \"\"\"\n    matches = []\n    start_pos = None\n    for m in re.finditer(\n        r'<\\|tool_call_begin\\|>\\s*(?:functions\\.)?(\\S+?)(?::\\d+)?\\s*<\\|tool_call_argument_begin\\|>\\s*',\n        answer\n    ):\n        func_name = m.group(1).strip()\n        if func_name not in tool_names:\n            continue\n        json_str = _extract_balanced_json(answer, m.end())\n        if json_str is None:\n            continue\n        try:\n            arguments = json.loads(json_str)\n            if start_pos is None:\n                # Check for section begin marker before the call marker\n                section = answer.rfind('<|tool_calls_section_begin|>', 0, m.start())\n                start_pos = section if section != -1 else m.start()\n            matches.append({\n                \"type\": \"function\",\n                \"function\": {\n                    \"name\": func_name,\n                    \"arguments\": arguments\n                }\n            })\n        except json.JSONDecodeError:\n            pass\n    return matches, start_pos\n\n\ndef _parse_minimax_tool_calls(answer: str, tool_names: list[str]):\n    \"\"\"Parse MiniMax-style tool calls using invoke/parameter XML tags.\n\n    Format:\n        <minimax:tool_call>\n        <invoke name=\"function_name\">\n        <parameter name=\"param_name\">value</parameter>\n        </invoke>\n        </minimax:tool_call>\n    \"\"\"\n    matches = []\n    start_pos = None\n    for tc_match in re.finditer(r'<minimax:tool_call>\\s*(.*?)\\s*</minimax:tool_call>', answer, re.DOTALL):\n        tc_content = tc_match.group(1)\n        # Split on <invoke> to handle multiple parallel calls in one block\n        for invoke_match in re.finditer(r'<invoke\\s+name=\"([^\"]+)\">(.*?)</invoke>', tc_content, re.DOTALL):\n            func_name = invoke_match.group(1).strip()\n            if func_name not in tool_names:\n                continue\n            invoke_body = invoke_match.group(2)\n            arguments = {}\n            for param_match in re.finditer(r'<parameter\\s+name=\"([^\"]+)\">\\s*(.*?)\\s*</parameter>', invoke_body, re.DOTALL):\n                param_name = param_match.group(1).strip()\n                param_value = param_match.group(2).strip()\n                try:\n                    param_value = json.loads(param_value)\n                except (json.JSONDecodeError, ValueError):\n                    pass  # keep as string\n                arguments[param_name] = param_value\n            if start_pos is None:\n                start_pos = tc_match.start()\n            matches.append({\n                \"type\": \"function\",\n                \"function\": {\n                    \"name\": func_name,\n                    \"arguments\": arguments\n                }\n            })\n    return matches, start_pos\n\n\ndef _parse_deep_seek_tool_calls(answer: str, tool_names: list[str]):\n    \"\"\"Parse DeepSeek-style tool calls using fullwidth Unicode token delimiters.\n\n    Format:\n        <｜tool▁calls▁begin｜><｜tool▁call▁begin｜>func_name<｜tool▁sep｜>{\"arg\": \"value\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>\n    \"\"\"\n    matches = []\n    start_pos = None\n    for m in re.finditer(\n        r'<｜tool▁call▁begin｜>\\s*(\\S+?)\\s*<｜tool▁sep｜>\\s*',\n        answer\n    ):\n        func_name = m.group(1).strip()\n        if func_name not in tool_names:\n            continue\n        json_str = _extract_balanced_json(answer, m.end())\n        if json_str is None:\n            continue\n        try:\n            arguments = json.loads(json_str)\n            if start_pos is None:\n                # Check for section begin marker before the call marker\n                section = answer.rfind('<｜tool▁calls▁begin｜>', 0, m.start())\n                start_pos = section if section != -1 else m.start()\n            matches.append({\n                \"type\": \"function\",\n                \"function\": {\n                    \"name\": func_name,\n                    \"arguments\": arguments\n                }\n            })\n        except json.JSONDecodeError:\n            pass\n    return matches, start_pos\n\n\ndef _parse_glm_tool_calls(answer: str, tool_names: list[str]):\n    \"\"\"Parse GLM-style tool calls using arg_key/arg_value XML pairs.\n\n    Format:\n        <tool_call>function_name\n        <arg_key>key1</arg_key>\n        <arg_value>value1</arg_value>\n        </tool_call>\n    \"\"\"\n    matches = []\n    start_pos = None\n    for tc_match in re.finditer(r'<tool_call>\\s*(.*?)\\s*</tool_call>', answer, re.DOTALL):\n        tc_content = tc_match.group(1)\n        # First non-tag text is the function name\n        name_match = re.match(r'([^<\\s]+)', tc_content.strip())\n        if not name_match:\n            continue\n        func_name = name_match.group(1).strip()\n        if func_name not in tool_names:\n            continue\n        # Extract arg_key/arg_value pairs\n        keys = [k.group(1).strip() for k in re.finditer(r'<arg_key>\\s*(.*?)\\s*</arg_key>', tc_content, re.DOTALL)]\n        vals = [v.group(1).strip() for v in re.finditer(r'<arg_value>\\s*(.*?)\\s*</arg_value>', tc_content, re.DOTALL)]\n        if len(keys) != len(vals):\n            continue\n        arguments = {}\n        for k, v in zip(keys, vals):\n            try:\n                v = json.loads(v)\n            except (json.JSONDecodeError, ValueError):\n                pass  # keep as string\n            arguments[k] = v\n        if start_pos is None:\n            start_pos = tc_match.start()\n        matches.append({\n            \"type\": \"function\",\n            \"function\": {\n                \"name\": func_name,\n                \"arguments\": arguments\n            }\n        })\n    return matches, start_pos\n\n\ndef _parse_pythonic_tool_calls(answer: str, tool_names: list[str]):\n    \"\"\"Parse pythonic-style tool calls used by Llama 4 and similar models.\n\n    Format:\n        [func_name(param1=\"value1\", param2=\"value2\"), func_name2(...)]\n    \"\"\"\n    matches = []\n    start_pos = None\n    # Match a bracketed list of function calls\n    bracket_match = re.search(r'\\[([^\\[\\]]+)\\]', answer)\n    if not bracket_match:\n        return matches, start_pos\n\n    inner = bracket_match.group(1)\n\n    # Build pattern for known tool names\n    escaped_names = [re.escape(name) for name in tool_names]\n    name_pattern = '|'.join(escaped_names)\n\n    for call_match in re.finditer(\n        r'(' + name_pattern + r')\\(([^)]*)\\)',\n        inner\n    ):\n        func_name = call_match.group(1)\n        params_str = call_match.group(2).strip()\n        arguments = {}\n\n        if params_str:\n            # Parse key=\"value\" pairs, handling commas inside quoted values\n            for param_match in re.finditer(\n                r'(\\w+)\\s*=\\s*(\"(?:[^\"\\\\]|\\\\.)*\"|\\'(?:[^\\'\\\\]|\\\\.)*\\'|[^,\\)]+)',\n                params_str\n            ):\n                param_name = param_match.group(1)\n                param_value = param_match.group(2).strip()\n                # Strip surrounding quotes\n                if (param_value.startswith('\"') and param_value.endswith('\"')) or \\\n                   (param_value.startswith(\"'\") and param_value.endswith(\"'\")):\n                    param_value = param_value[1:-1]\n                # Try to parse as JSON for numeric/bool/null values\n                try:\n                    param_value = json.loads(param_value)\n                except (json.JSONDecodeError, ValueError):\n                    pass\n                arguments[param_name] = param_value\n\n        if start_pos is None:\n            start_pos = bracket_match.start()\n        matches.append({\n            \"type\": \"function\",\n            \"function\": {\n                \"name\": func_name,\n                \"arguments\": arguments\n            }\n        })\n\n    return matches, start_pos\n\n\n# Format registry: maps template substrings to the parser and streaming\n# markers for that format.  When a format's hints are NOT found in the\n# template, its parser and markers are excluded.\nTOOL_CALL_FORMATS = [\n    {\n        'template_hints': ['tool▁call▁begin', 'tool▁calls▁begin'],\n        'parser': _parse_deep_seek_tool_calls,\n        'markers': ['<｜tool▁call▁begin｜>', '<｜tool▁calls▁begin｜>'],\n    },\n    {\n        'template_hints': ['<|tool_call_begin|>', 'tool_calls_section'],\n        'parser': _parse_kimi_tool_calls,\n        'markers': ['<|tool_call_begin|>', '<|tool_calls_section_begin|>'],\n    },\n    {\n        'template_hints': ['to=functions.', '<|channel|>'],\n        'parser': _parse_channel_tool_calls,\n        'markers': ['to=functions.', '<|channel|>commentary'],\n    },\n    {\n        'template_hints': ['minimax:tool_call'],\n        'parser': _parse_minimax_tool_calls,\n        'markers': ['<minimax:tool_call>'],\n    },\n    {\n        'template_hints': ['<arg_key>'],\n        'parser': _parse_glm_tool_calls,\n        'markers': ['<tool_call>'],\n    },\n    {\n        'template_hints': ['<tool_call>'],\n        'parser': _parse_xml_param_tool_calls,\n        'markers': ['<tool_call>'],\n    },\n    {\n        'template_hints': ['[TOOL_CALLS]'],\n        'parser': _parse_mistral_token_tool_calls,\n        'markers': ['[TOOL_CALLS]'],\n    },\n    {\n        'template_hints': ['<function_call>'],\n        'parser': None,\n        'markers': ['<function_call>'],\n    },\n]\n\n# Default ordered list of all specialized parsers.\nALL_PARSERS = [\n    _parse_deep_seek_tool_calls,\n    _parse_kimi_tool_calls,\n    _parse_channel_tool_calls,\n    _parse_minimax_tool_calls,\n    _parse_glm_tool_calls,\n    _parse_xml_param_tool_calls,\n    _parse_mistral_token_tool_calls,\n    _parse_bare_name_tool_calls,\n    _parse_pythonic_tool_calls,\n]\n\n\ndef detect_tool_call_format(template_str):\n    \"\"\"Inspect a chat/instruction template to determine which tool call\n    formats are relevant.\n\n    Uses an exclude-based approach: starts with all parsers/markers,\n    then removes the ones whose hints are not found in the template.\n\n    Returns (parsers, streaming_markers, check_bare_names).\n    \"\"\"\n    if not template_str:\n        return None, TOOL_CALL_OPENING_MARKERS, True\n\n    matched_any = False\n    exclude_parsers = []\n    exclude_markers = []\n    matched_markers = []\n\n    for fmt in TOOL_CALL_FORMATS:\n        if any(hint in template_str for hint in fmt['template_hints']):\n            matched_any = True\n            matched_markers.extend(fmt['markers'])\n        else:\n            if fmt['parser'] is not None:\n                exclude_parsers.append(fmt['parser'])\n            exclude_markers.extend(fmt['markers'])\n\n    if not matched_any:\n        return None, TOOL_CALL_OPENING_MARKERS, True\n\n    parsers = [p for p in ALL_PARSERS if p not in exclude_parsers]\n    markers = [m for m in TOOL_CALL_OPENING_MARKERS if m not in exclude_markers or m in matched_markers]\n\n    return parsers, markers, False\n\n\ndef parse_tool_call(answer: str, tool_names: list[str], return_prefix: bool = False, parsers: list = None):\n    matches = []\n    start_pos = None\n\n    def _return(matches, start_pos):\n        if return_prefix:\n            prefix = answer[:start_pos] if matches and start_pos is not None else ''\n            return matches, prefix\n        return matches\n\n    # Try specialized parsers.\n    for parser in (parsers if parsers is not None else ALL_PARSERS):\n        matches, start_pos = parser(answer, tool_names)\n        if matches:\n            return _return(matches, start_pos)\n\n    # Generic fallback: regex pattern to find the JSON content wrapped in <function>, <tools>, <tool_call>, and other tags observed from various models\n    patterns = [r\"(```[^\\n]*)\\n(.*?)```\", r\"<([^>]+)>(.*?)</\\1>\"]\n\n    for pattern in patterns:\n        for match in re.finditer(pattern, answer, re.DOTALL):\n            if match.group(2) is None:\n                continue\n            # remove backtick wraps if present\n            candidate = re.sub(r\"^```(json|xml|python[^\\n]*)\\n\", \"\", match.group(2).strip())\n            candidate = re.sub(r\"```$\", \"\", candidate.strip())\n            # unwrap inner tags\n            candidate = re.sub(pattern, r\"\\2\", candidate.strip(), flags=re.DOTALL)\n            # llm might have generated multiple json objects separated by linebreaks, check for this pattern and try parsing each object individually\n            if re.search(r\"\\}\\s*\\n\\s*\\{\", candidate) is not None:\n                candidate = re.sub(r\"\\}\\s*\\n\\s*\\{\", \"},\\n{\", candidate)\n            if not candidate.strip().startswith(\"[\"):\n                candidate = \"[\" + candidate + \"]\"\n\n            candidates = []\n            try:\n                # parse the candidate JSON into a dictionary\n                candidates = json.loads(candidate)\n                if not isinstance(candidates, list):\n                    candidates = [candidates]\n            except json.JSONDecodeError:\n                # Ignore invalid JSON silently\n                continue\n\n            for candidate_dict in candidates:\n                checked_candidate = check_and_sanitize_tool_call_candidate(candidate_dict, tool_names)\n                if checked_candidate is not None:\n                    if start_pos is None:\n                        start_pos = match.start()\n                    matches.append(checked_candidate)\n\n        # last resort if nothing has been mapped: LLM might have produced plain json tool call without xml-like tags\n        if len(matches) == 0:\n            try:\n                candidate = answer\n                # llm might have generated multiple json objects separated by linebreaks, check for this pattern and try parsing each object individually\n                if re.search(r\"\\}\\s*\\n\\s*\\{\", candidate) is not None:\n                    candidate = re.sub(r\"\\}\\s*\\n\\s*\\{\", \"},\\n{\", candidate)\n                if not candidate.strip().startswith(\"[\"):\n                    candidate = \"[\" + candidate + \"]\"\n                # parse the candidate JSON into a dictionary\n                candidates = json.loads(candidate)\n                if not isinstance(candidates, list):\n                    candidates = [candidates]\n                for candidate_dict in candidates:\n                    checked_candidate = check_and_sanitize_tool_call_candidate(candidate_dict, tool_names)\n                    if checked_candidate is not None:\n                        matches.append(checked_candidate)\n            except json.JSONDecodeError:\n                # Ignore invalid JSON silently\n                pass\n\n    return _return(matches, start_pos)\n"
  },
  {
    "path": "modules/tool_use.py",
    "content": "import importlib.util\nimport json\n\nfrom modules import shared\nfrom modules.logging_colors import logger\nfrom modules.utils import natural_keys, sanitize_filename\n\n\ndef get_available_tools():\n    \"\"\"Return sorted list of tool script names from user_data/tools/*.py.\"\"\"\n    tools_dir = shared.user_data_dir / 'tools'\n    tools_dir.mkdir(parents=True, exist_ok=True)\n    return sorted((p.stem for p in tools_dir.glob('*.py')), key=natural_keys)\n\n\ndef load_tools(selected_names):\n    \"\"\"\n    Import selected tool scripts and return their definitions and executors.\n    Returns (tool_defs, executors) where:\n      - tool_defs: list of OpenAI-format tool dicts\n      - executors: dict mapping function_name -> execute callable\n    \"\"\"\n    tool_defs = []\n    executors = {}\n    for name in selected_names:\n        name = sanitize_filename(name)\n        if not name:\n            continue\n\n        path = shared.user_data_dir / 'tools' / f'{name}.py'\n        if not path.exists():\n            continue\n\n        try:\n            spec = importlib.util.spec_from_file_location(f\"tool_{name}\", str(path))\n            module = importlib.util.module_from_spec(spec)\n            spec.loader.exec_module(module)\n        except Exception:\n            logger.exception(f'Failed to load tool script \"{name}\"')\n            continue\n\n        tool_def = getattr(module, 'tool', None)\n        execute_fn = getattr(module, 'execute', None)\n        if tool_def is None or execute_fn is None:\n            logger.warning(f'Tool \"{name}\" is missing a \"tool\" dict or \"execute\" function.')\n            continue\n\n        func_name = tool_def.get('function', {}).get('name', name)\n        if func_name in executors:\n            logger.warning(f'Tool \"{name}\" declares function name \"{func_name}\" which conflicts with an already loaded tool. Skipping.')\n            continue\n        tool_defs.append(tool_def)\n        executors[func_name] = execute_fn\n\n    return tool_defs, executors\n\n\ndef execute_tool(func_name, arguments, executors):\n    \"\"\"Execute a tool by function name. Returns result as a JSON string.\"\"\"\n    fn = executors.get(func_name)\n    if fn is None:\n        return json.dumps({\"error\": f\"Unknown tool: {func_name}\"})\n\n    try:\n        if isinstance(arguments, str):\n            arguments = json.loads(arguments)\n        result = fn(arguments)\n        return json.dumps(result) if not isinstance(result, str) else result\n    except Exception as e:\n        logger.exception(f'Tool \"{func_name}\" execution failed')\n        return json.dumps({\"error\": str(e)})\n"
  },
  {
    "path": "modules/torch_utils.py",
    "content": "import gc\n\nimport torch\nfrom accelerate.utils import is_npu_available, is_xpu_available\nfrom transformers import is_torch_npu_available, is_torch_xpu_available\n\nfrom modules import shared\n\n\ndef get_device():\n    if hasattr(shared.model, 'device'):\n        return shared.model.device\n    elif torch.cuda.is_available():\n        return torch.device('cuda')\n    elif torch.backends.mps.is_available():\n        return torch.device('mps')\n    elif is_torch_xpu_available():\n        return torch.device('xpu:0')\n    elif is_torch_npu_available():\n        return torch.device('npu:0')\n    else:\n        return None\n\n\ndef clear_torch_cache():\n    gc.collect()\n    if not shared.args.cpu:\n        if torch.cuda.is_available():\n            torch.cuda.empty_cache()\n        elif is_xpu_available():\n            torch.xpu.empty_cache()\n        elif is_npu_available():\n            torch.npu.empty_cache()\n        elif torch.backends.mps.is_available():\n            if hasattr(torch.backends.mps, 'empty_cache'):\n                torch.backends.mps.empty_cache()\n"
  },
  {
    "path": "modules/training.py",
    "content": "import os\n\nos.environ[\"WANDB_MODE\"] = \"offline\"\n# os.environ[\"WANDB_DISABLED\"] = \"true\"\n\nimport json\nimport math\nimport random\nimport shutil\nimport sys\nimport threading\nimport time\nimport traceback\nfrom datetime import datetime\nfrom pathlib import Path\n\nimport yaml\nimport gradio as gr\n\nfrom modules import shared, ui, utils\nfrom modules.evaluate import (\n    calculate_perplexity,\n    generate_markdown_table,\n    save_past_evaluations\n)\nfrom modules.logging_colors import logger\nfrom modules.models import reload_model\n\nPARAMETERS = [\"lora_name\", \"always_override\", \"all_linear\", \"q_proj_en\", \"v_proj_en\", \"k_proj_en\", \"o_proj_en\", \"gate_proj_en\", \"down_proj_en\", \"up_proj_en\", \"save_steps\", \"micro_batch_size\", \"batch_size\", \"epochs\", \"learning_rate\", \"lr_scheduler_type\", \"lora_rank\", \"lora_alpha\", \"lora_dropout\", \"cutoff_len\", \"dataset\", \"eval_dataset\", \"format\", \"eval_steps\", \"text_dataset\", \"higher_rank_limit\", \"warmup_steps\", \"optimizer\", \"stride_length\", \"stop_at_loss\", \"add_eos_token\", \"excess_length\", \"report_to\"]\nWANT_INTERRUPT = False\n\ntrain_log = {}\ntrain_template = {}\n\n\ndef create_ui():\n    mu = shared.args.multi_user\n    with gr.Tab(\"Training\", elem_id=\"training-tab\"):\n        with gr.Tab('Train LoRA', elem_id='lora-train-tab'):\n            tmp = gr.State('')\n            with gr.Row():\n                with gr.Column():\n                    gr.Markdown(\"[Tutorial](https://github.com/oobabooga/text-generation-webui/wiki/05-%E2%80%90-Training-Tab)\")\n\n                    with gr.Row():\n                        copy_from = gr.Dropdown(label='Copy parameters from', value='None', choices=utils.get_available_loras(), elem_classes=['slim-dropdown'], interactive=not mu)\n                        ui.create_refresh_button(copy_from, lambda: None, lambda: {'choices': utils.get_available_loras()}, 'refresh-button', interactive=not mu)\n\n                    with gr.Row():\n                        with gr.Column(scale=5):\n                            lora_name = gr.Textbox(label='Name', info='The name of your new LoRA file')\n                        with gr.Column():\n                            always_override = gr.Checkbox(label='Override Existing Files', value=False, info='If the name is the same, checking will replace the existing file, and unchecking will load and continue from it (the rank must be the same).', elem_classes=['no-background'])\n\n                    with gr.Accordion(label='Target Modules', open=False, elem_classes='tgw-accordion'):\n                        gr.Markdown(\"Selects which modules to target in training. Targeting more modules is closer to a full fine-tune at the cost of increased VRAM and adapter size.\")\n                        all_linear = gr.Checkbox(label='Target all linear layers', value=True, info='Targets every nn.Linear layer except lm_head. Works for any model architecture. When checked, the individual module checkboxes below are ignored.', elem_classes=['no-background'])\n                        with gr.Row():\n                            with gr.Column():\n                                q_proj_en = gr.Checkbox(label='Enable q_proj', value=True)\n                            with gr.Column():\n                                v_proj_en = gr.Checkbox(label='Enable v_proj', value=True)\n                            with gr.Column():\n                                k_proj_en = gr.Checkbox(label='Enable k_proj', value=False)\n                            with gr.Column():\n                                o_proj_en = gr.Checkbox(label='Enable o_proj', value=False)\n                            with gr.Column():\n                                gate_proj_en = gr.Checkbox(label='Enable gate_proj', value=False)\n                            with gr.Column():\n                                down_proj_en = gr.Checkbox(label='Enable down_proj', value=False)\n                            with gr.Column():\n                                up_proj_en = gr.Checkbox(label='Enable up_proj', value=False)\n\n                    with gr.Row():\n                        with gr.Column():\n                            lora_rank = gr.Slider(label='LoRA Rank', value=8, minimum=0, maximum=1024, step=4, info='Also called dimension count. Higher values = larger file, more content control. Smaller values = smaller file, less control. Use 4 or 8 for style, 128 or 256 to teach, 1024+ for fine-detail on big data. More VRAM is needed for higher ranks.')\n                            lora_alpha = gr.Slider(label='LoRA Alpha', value=16, minimum=0, maximum=2048, step=4, info='This divided by the rank becomes the scaling of the LoRA. Higher means stronger. A good standard value is twice your Rank.')\n                            batch_size = gr.Slider(label='Batch Size', value=32, minimum=0, maximum=1024, step=4, info='Global batch size. The two batch sizes together determine gradient accumulation (gradientAccum = batch / microBatch). Higher gradient accum values lead to better quality training.')\n                            micro_batch_size = gr.Slider(label='Micro Batch Size', value=4, minimum=1, maximum=128, step=1, info='Per-device batch size (NOTE: multiple devices not yet implemented). Increasing this will increase VRAM usage.')\n                            cutoff_len = gr.Slider(label='Cutoff Length', minimum=0, maximum=4096, value=512, step=32, info='Maximum sequence length in tokens. For instruction datasets, conversations longer than this are dropped. For text datasets, documents are split into chunks of this size. Higher values require more VRAM.')\n\n                        with gr.Column():\n                            save_steps = gr.Number(label='Save every n steps', value=0, info='If above 0, a full training checkpoint (adapter weights, optimizer, scheduler) will be saved every time this many steps pass. Training can be resumed from these checkpoints.')\n\n                            epochs = gr.Number(label='Epochs', value=3, info='Number of times every entry in the dataset should be fed into training. So 1 means feed each item in once, 5 means feed it in five times, etc.')\n                            learning_rate = gr.Textbox(label='Learning Rate', value='3e-4', info='In scientific notation. 3e-4 is a good starting base point. 1e-2 is extremely high, 1e-6 is extremely low.')\n                            with gr.Row():\n                                lr_scheduler_type = gr.Dropdown(label='LR Scheduler', value='cosine', choices=['linear', 'constant', 'constant_with_warmup', 'cosine', 'cosine_with_restarts', 'polynomial', 'inverse_sqrt'], info='Learning rate scheduler - defines how the learning rate changes over time. \"Constant\" means never change, \"linear\" means to go in a straight line from the learning rate down to 0, cosine follows a curve, etc.', elem_classes=['slim-dropdown'])\n\n                    with gr.Accordion(label='Advanced Options', open=False, elem_classes='tgw-accordion'):\n                        with gr.Row():\n                            with gr.Column():\n                                lora_dropout = gr.Slider(label='LoRA Dropout', minimum=0.0, maximum=1.0, step=0.025, value=0.0, info='Percentage probability for dropout of LoRA layers. This can help reduce overfitting. Most users should leave at default.')\n                                stop_at_loss = gr.Slider(label='Stop at loss', minimum=0.0, maximum=3.0, step=0.1, value=0.00, info='The process will automatically stop once the desired loss value is reached. (reasonable numbers are 1.5-1.8)')\n                                with gr.Row():\n                                    optimizer = gr.Dropdown(label='Optimizer', value='adamw_torch', choices=['adamw_hf', 'adamw_torch', 'adamw_torch_fused', 'adamw_torch_xla', 'adamw_apex_fused', 'adafactor', 'adamw_bnb_8bit', 'adamw_anyprecision', 'sgd', 'adagrad'], info='Optimizer algorithm. adamw_torch is the standard choice. adamw_bnb_8bit uses less VRAM. adafactor is memory-efficient for large models.', elem_classes=['slim-dropdown'])\n\n                            with gr.Column():\n                                warmup_steps = gr.Number(label='Warmup Steps', value=100, info='For this many steps at the start, the learning rate is gradually ramped up from 0 to the target value. This prevents unstable updates early in training.')\n\n                                add_eos_token = gr.Checkbox(label='Add EOS token', value=True, info=\"Adds EOS token for each document in text datasets.\")\n                                excess_length = gr.Dropdown(label='Excess length', value='drop', choices=['drop', 'truncate'], info='What to do with conversations that exceed the cutoff length. \"Drop\" removes them entirely (recommended). \"Truncate\" cuts from the right, which may produce incomplete responses.', elem_classes=['slim-dropdown'])\n\n                                higher_rank_limit = gr.Checkbox(label='Enable higher ranks', value=False, info='If checked, changes Rank/Alpha slider above to go much higher. This will not work without a datacenter-class GPU.')\n                                report_to = gr.Radio(label=\"Save detailed logs with\", value=\"None\", choices=[\"None\", \"wandb\", \"tensorboard\"], interactive=True)\n\n                with gr.Column():\n                    with gr.Tab(label='Chat Dataset'):\n                        with gr.Row():\n                            dataset = gr.Dropdown(choices=utils.get_chat_datasets(str(shared.user_data_dir / 'training/datasets')), value='None', label='Dataset File', info='A JSON file with chat conversations (messages or ShareGPT format). Each row is one conversation.', elem_classes=['slim-dropdown'], interactive=not mu)\n                            ui.create_refresh_button(dataset, lambda: None, lambda: {'choices': utils.get_chat_datasets(str(shared.user_data_dir / 'training/datasets'))}, 'refresh-button', interactive=not mu)\n\n                        with gr.Row():\n                            format = gr.Dropdown(choices=get_instruction_templates(), value='None', label='Instruction Template', info='Select an instruction template for formatting the dataset, or \"Chat Template\" to use the model\\'s built-in chat template.', elem_classes=['slim-dropdown'], interactive=not mu)\n                            ui.create_refresh_button(format, lambda: None, lambda: {'choices': get_instruction_templates()}, 'refresh-button', interactive=not mu)\n\n                    with gr.Tab(label=\"Text Dataset\"):\n                        with gr.Row():\n                            text_dataset = gr.Dropdown(choices=utils.get_text_datasets(str(shared.user_data_dir / 'training/datasets')), value='None', label='Dataset File', info='A JSON file with a \"text\" key per row, for pretraining-style training. Each row is one document.', elem_classes=['slim-dropdown'], interactive=not mu)\n                            ui.create_refresh_button(text_dataset, lambda: None, lambda: {'choices': utils.get_text_datasets(str(shared.user_data_dir / 'training/datasets'))}, 'refresh-button', interactive=not mu)\n\n                        stride_length = gr.Slider(label='Stride Length', minimum=0, maximum=2048, value=256, step=32, info='Overlap between chunks in tokens. 0 = no overlap. Values like 256 or 512 help preserve context across chunk boundaries.')\n\n                    with gr.Row():\n                        eval_dataset = gr.Dropdown(choices=utils.get_datasets(str(shared.user_data_dir / 'training/datasets'), 'json'), value='None', label='Evaluation Dataset', info='The (optional) dataset file used to evaluate the model after training.', elem_classes=['slim-dropdown'], interactive=not mu)\n                        ui.create_refresh_button(eval_dataset, lambda: None, lambda: {'choices': utils.get_datasets(str(shared.user_data_dir / 'training/datasets'), 'json')}, 'refresh-button', interactive=not mu)\n\n                    eval_steps = gr.Number(label='Evaluate every n steps', value=100, info='If an evaluation dataset is given, test it every time this many steps pass.')\n\n                    with gr.Row():\n                        start_button = gr.Button(\"Start LoRA Training\", variant='primary', interactive=not mu)\n                        stop_button = gr.Button(\"Interrupt\", interactive=not mu)\n\n                    output = gr.Markdown(value=\"Ready\")\n\n        with gr.Tab('Perplexity evaluation', elem_id='evaluate-tab'):\n            with gr.Row():\n                with gr.Column():\n                    models = gr.Dropdown(utils.get_available_models(), label='Models', multiselect=True, interactive=not mu)\n                    evaluate_text_file = gr.Dropdown(choices=['wikitext', 'ptb', 'ptb_new'] + utils.get_datasets(str(shared.user_data_dir / 'training/datasets'), 'txt')[1:], value='wikitext', label='Input dataset', info=f'The raw text file on which the model will be evaluated. The first options are automatically downloaded: wikitext, ptb, and ptb_new. The next options are your local text files under {shared.user_data_dir}/training/datasets.', interactive=not mu)\n                    with gr.Row():\n                        with gr.Column():\n                            stride_length = gr.Slider(label='Stride', minimum=0, maximum=32768, value=512, step=256, info='Used to make the evaluation faster at the cost of accuracy. 1 = slowest but most accurate. 512 is a common value.')\n\n                        with gr.Column():\n                            max_length = gr.Number(label='max_length', precision=0, step=256, value=0, info='The context for each evaluation. If set to 0, the maximum context length for the model will be used.')\n\n                    with gr.Row():\n                        start_current_evaluation = gr.Button(\"Evaluate loaded model\", interactive=not mu)\n                        start_evaluation = gr.Button(\"Evaluate selected models\", interactive=not mu)\n                        stop_evaluation = gr.Button(\"Interrupt\", interactive=not mu)\n\n                with gr.Column():\n                    evaluation_log = gr.Markdown(value='')\n\n            evaluation_table = gr.Dataframe(value=generate_markdown_table(), interactive=True)\n            with gr.Row():\n                save_comments = gr.Button('Save comments', elem_classes=\"small-button\", interactive=not mu)\n                refresh_table = gr.Button('Refresh the table', elem_classes=\"small-button\", interactive=not mu)\n\n    # Training events\n    all_params = [lora_name, always_override, all_linear, q_proj_en, v_proj_en, k_proj_en, o_proj_en, gate_proj_en, down_proj_en, up_proj_en, save_steps, micro_batch_size, batch_size, epochs, learning_rate, lr_scheduler_type, lora_rank, lora_alpha, lora_dropout, cutoff_len, dataset, eval_dataset, format, eval_steps, text_dataset, higher_rank_limit, warmup_steps, optimizer, stride_length, stop_at_loss, add_eos_token, excess_length, report_to]\n\n    copy_from.change(do_copy_params, [copy_from] + all_params, all_params)\n    start_button.click(do_train, all_params, output)\n    stop_button.click(do_interrupt, None, None, queue=False)\n    higher_rank_limit.change(change_rank_limit, [higher_rank_limit], [lora_rank, lora_alpha])\n\n    # Evaluation events. For some reason, the interrupt event\n    # doesn't work with the .then() syntax, so I write them one\n    # by one in this ugly but functional way.\n    ev = start_evaluation.click(calculate_perplexity, [models, evaluate_text_file, stride_length, max_length], evaluation_log, show_progress=False)\n    ev.then(generate_markdown_table, None, evaluation_table, show_progress=False)\n\n    ev_cur = start_current_evaluation.click(\n        lambda: ['current model'], None, tmp).then(\n        calculate_perplexity, [tmp, evaluate_text_file, stride_length, max_length], evaluation_log, show_progress=False)\n\n    ev_cur.then(generate_markdown_table, None, evaluation_table, show_progress=False)\n\n    stop_evaluation.click(None, None, None, cancels=[ev, ev_cur], queue=False)\n    refresh_table.click(generate_markdown_table, None, evaluation_table, show_progress=True)\n    save_comments.click(\n        save_past_evaluations, evaluation_table, None).then(\n        lambda: \"Comments saved.\", None, evaluation_log, show_progress=False)\n\n\ndef do_interrupt():\n    global WANT_INTERRUPT\n    WANT_INTERRUPT = True\n\n\ndef do_copy_params(lora_name: str, *args):\n    f_name = f\"{shared.args.lora_dir}/{clean_path(None, lora_name)}/training_parameters.json\"\n    if Path(f_name).is_file():\n        with open(f_name, 'r', encoding='utf-8') as format_file:\n            params: dict[str, str] = json.load(format_file)\n    else:\n        params = {}\n\n    result = list()\n    for i in range(0, len(PARAMETERS)):\n        key = PARAMETERS[i]\n        if key in params:\n            result.append(params[key])\n        else:\n            result.append(args[i])\n\n    return result\n\n\ndef change_rank_limit(use_higher_ranks: bool):\n    mult = 2 if use_higher_ranks else 1\n    return {\"maximum\": 1024 * mult, \"__type__\": \"update\"}, {\"maximum\": 2048 * mult, \"__type__\": \"update\"}\n\n\ndef clean_path(base_path: str, path: str):\n    \"\"\"Strips unusual symbols and forcibly builds a path as relative to the intended directory.\"\"\"\n    path = path.replace('\\\\', '/').replace('..', '_')\n    if base_path is None:\n        return path\n\n    return f'{Path(base_path).absolute()}/{path}'\n\n\ndef get_instruction_templates():\n    path = shared.user_data_dir / 'instruction-templates'\n    names = set()\n    for ext in ['yaml', 'yml', 'jinja', 'jinja2']:\n        for f in path.glob(f'*.{ext}'):\n            names.add(f.stem)\n    return ['None', 'Chat Template'] + sorted(names, key=utils.natural_keys)\n\n\ndef load_template(name):\n    \"\"\"Load a Jinja2 template string from {user_data_dir}/instruction-templates/.\"\"\"\n    path = shared.user_data_dir / 'instruction-templates'\n    for ext in ['jinja', 'jinja2', 'yaml', 'yml']:\n        filepath = path / f'{name}.{ext}'\n        if filepath.exists():\n            if ext in ['jinja', 'jinja2']:\n                return filepath.read_text(encoding='utf-8')\n            else:\n                data = yaml.safe_load(filepath.read_text(encoding='utf-8'))\n                return data.get('instruction_template', '')\n    return ''\n\n\ndef backup_adapter(input_folder):\n    # Get the creation date of the adapter file (safetensors or bin)\n    try:\n        adapter_file = Path(f\"{input_folder}/adapter_model.safetensors\")\n        if not adapter_file.is_file():\n            adapter_file = Path(f\"{input_folder}/adapter_model.bin\")\n        if adapter_file.is_file():\n\n            logger.info(\"Backing up existing LoRA adapter\")\n            creation_date = datetime.fromtimestamp(adapter_file.stat().st_ctime)\n            creation_date_str = creation_date.strftime(\"Backup-%Y-%m-%d\")\n\n            # Create the new subfolder\n            subfolder_path = Path(f\"{input_folder}/{creation_date_str}\")\n            subfolder_path.mkdir(parents=True, exist_ok=True)\n\n            # Check if the file already exists in the subfolder\n            backup_adapter_file = subfolder_path / adapter_file.name\n            if backup_adapter_file.is_file():\n                print(\" - Backup already exists. Skipping backup process.\")\n                return\n\n            # Copy existing files to the new subfolder\n            existing_files = Path(input_folder).iterdir()\n            for file in existing_files:\n                if file.is_file():\n                    shutil.copy2(file, subfolder_path)\n    except Exception as e:\n        print(\"An error occurred in backup_adapter:\", str(e))\n\n\ndef calc_trainable_parameters(model):\n    trainable_params = 0\n    all_param = 0\n    for _, param in model.named_parameters():\n        num_params = param.numel()\n        # if using DS Zero 3 and the weights are initialized empty\n        if num_params == 0 and hasattr(param, \"ds_numel\"):\n            num_params = param.ds_numel\n\n        all_param += num_params\n        if param.requires_grad:\n            trainable_params += num_params\n\n    return trainable_params, all_param\n\n\ndef do_train(lora_name: str, always_override: bool, all_linear: bool, q_proj_en: bool, v_proj_en: bool, k_proj_en: bool, o_proj_en: bool, gate_proj_en: bool, down_proj_en: bool, up_proj_en: bool, save_steps: int, micro_batch_size: int, batch_size: int, epochs: int, learning_rate: str, lr_scheduler_type: str, lora_rank: int, lora_alpha: int, lora_dropout: float, cutoff_len: int, dataset: str, eval_dataset: str, format: str, eval_steps: int, text_dataset: str, higher_rank_limit: bool, warmup_steps: int, optimizer: str, stride_length: int, stop_at_loss: float, add_eos_token: bool, excess_length: str, report_to: str):\n\n    import torch\n    import transformers\n    from datasets import Dataset, load_dataset\n    from peft import (\n        LoraConfig,\n        get_peft_model,\n        prepare_model_for_kbit_training,\n        set_peft_model_state_dict\n    )\n\n    global WANT_INTERRUPT\n    WANT_INTERRUPT = False\n\n    # == Input validation / processing ==\n    yield \"Preparing the input...\"\n\n    if shared.args.loader == 'llama.cpp':\n        yield \"Error: LoRA training requires a model loaded with the Transformers loader. GGUF models are not supported for training.\"\n        return\n\n    lora_file_path = clean_path(None, lora_name)\n    if lora_file_path.strip() == '':\n        yield \"Missing or invalid LoRA file name input.\"\n        return\n\n    lora_file_path = f\"{Path(shared.args.lora_dir)}/{lora_file_path}\"\n    actual_lr = float(learning_rate)\n    model_type = type(shared.model).__name__\n\n    if model_type == \"PeftModelForCausalLM\":\n        if len(shared.lora_names) > 0:\n            yield \"You are trying to train a LoRA while you already have another LoRA loaded. This will work, but may have unexpected effects. *(Will continue anyway in 5 seconds, press `Interrupt` to stop.)*\"\n            logger.warning(\"Training LoRA over top of another LoRA. May have unexpected effects.\")\n        else:\n            yield \"Model ID not matched due to LoRA loading. Consider reloading base model. *(Will continue anyway in 5 seconds, press `Interrupt` to stop.)*\"\n            logger.warning(\"Model ID not matched due to LoRA loading. Consider reloading base model.\")\n\n        time.sleep(5)\n\n    if cutoff_len <= 0 or micro_batch_size <= 0 or batch_size <= 0 or actual_lr <= 0 or lora_rank <= 0 or lora_alpha <= 0:\n        yield \"Cannot input zeroes.\"\n        return\n\n    gradient_accumulation_steps = max(1, batch_size // micro_batch_size)\n    original_chat_template = getattr(shared.tokenizer, 'chat_template', None)\n    if shared.tokenizer.pad_token_id is None:\n        shared.tokenizer.pad_token_id = shared.tokenizer.eos_token_id\n    shared.tokenizer.padding_side = \"right\"\n\n    def list_target_modules():\n        if all_linear:\n            return \"all-linear\"\n\n        target_mods = [f\"{name}_proj\" for name, enabled in {\n            \"q\": q_proj_en, \"k\": k_proj_en, \"v\": v_proj_en, \"o\": o_proj_en,\n            \"gate\": gate_proj_en, \"down\": down_proj_en, \"up\": up_proj_en,\n        }.items() if enabled]\n        return target_mods\n\n    def normalize_messages(data_point):\n        \"\"\"Convert a dataset row to OpenAI messages format for apply_chat_template().\"\"\"\n        if \"messages\" in data_point:\n            return data_point[\"messages\"]\n\n        if \"conversations\" in data_point:\n            role_map = {\"human\": \"user\", \"gpt\": \"assistant\"}\n            return [\n                {\"role\": role_map.get(turn.get(\"from\", \"\"), turn.get(\"from\", \"\")), \"content\": turn[\"value\"]}\n                for turn in data_point[\"conversations\"]\n            ]\n\n        raise RuntimeError(\n            f'Dataset row must contain \"messages\" or \"conversations\" key. '\n            f'Found: {list(data_point.keys())}'\n        )\n\n    def tokenize_conversation(data_point):\n        \"\"\"Tokenize using apply_chat_template() with assistant-only label masking.\"\"\"\n        messages = normalize_messages(data_point)\n        full_ids = list(shared.tokenizer.apply_chat_template(messages, tokenize=True, return_dict=False))\n\n        # Build labels: -100 for everything, then unmask assistant turns.\n        # This assumes apply_chat_template(messages[:i]) is a token-for-token\n        # prefix of apply_chat_template(messages[:i+1]), which holds for all\n        # standard chat templates (Llama, ChatML, Mistral, etc.).\n        labels = [-100] * len(full_ids)\n        for i, msg in enumerate(messages):\n            if msg[\"role\"] == \"assistant\":\n                # Tokens up to where this assistant turn starts\n                header_ids = shared.tokenizer.apply_chat_template(\n                    messages[:i], tokenize=True, return_dict=False, add_generation_prompt=True\n                )\n                # Tokens through end of this assistant turn\n                through_ids = shared.tokenizer.apply_chat_template(\n                    messages[:i + 1], tokenize=True, return_dict=False\n                )\n                # Unmask assistant tokens\n                start = len(header_ids)\n                end = min(len(through_ids), len(full_ids))\n                labels[start:end] = full_ids[start:end]\n\n        if len(full_ids) > cutoff_len:\n            if excess_length == 'truncate':\n                full_ids = full_ids[:cutoff_len]\n                labels = labels[:cutoff_len]\n            else:\n                return {\"input_ids\": [], \"labels\": [], \"attention_mask\": []}\n\n        return {\n            \"input_ids\": full_ids,\n            \"labels\": labels,\n            \"attention_mask\": [1] * len(full_ids),\n        }\n\n    train_template.clear()\n\n    # == Prep the dataset, format, etc ==\n    has_text_dataset = text_dataset not in ['None', '']\n    has_chat_dataset = dataset not in ['None', '']\n    if has_text_dataset and has_chat_dataset:\n        yield \"Error: select either a Chat Dataset or a Text Dataset, not both.\"\n        return\n\n    def tokenize_text_data(data):\n        \"\"\"Tokenize text dataset rows, concatenate, and split into chunks.\"\"\"\n        all_tokens = []\n        for row in data:\n            tokens = shared.tokenizer.encode(row['text'])\n            if add_eos_token:\n                tokens.append(shared.tokenizer.eos_token_id)\n            all_tokens.extend(tokens)\n\n        stride = int(stride_length)\n        step = cutoff_len - stride if stride > 0 else cutoff_len\n\n        if step <= 0:\n            return None, \"Error: stride length must be smaller than cutoff length.\"\n        if len(all_tokens) < cutoff_len:\n            return None, \"Error: dataset is too short to fill even one chunk of the given cutoff length.\"\n\n        chunks = []\n        for start in range(0, len(all_tokens), step):\n            chunk = all_tokens[start:start + cutoff_len]\n            if len(chunk) == 0:\n                break\n            if len(chunk) < cutoff_len:\n                pad_len = cutoff_len - len(chunk)\n                chunks.append({\n                    \"input_ids\": chunk + [shared.tokenizer.pad_token_id] * pad_len,\n                    \"labels\": list(chunk) + [-100] * pad_len,\n                    \"attention_mask\": [1] * len(chunk) + [0] * pad_len,\n                })\n            else:\n                chunks.append({\n                    \"input_ids\": chunk,\n                    \"labels\": list(chunk),\n                    \"attention_mask\": [1] * cutoff_len,\n                })\n\n        return Dataset.from_list(chunks), None\n\n    if has_text_dataset:\n        train_template[\"template_type\"] = \"text_dataset\"\n        logger.info(\"Loading text dataset\")\n        data = load_dataset(\"json\", data_files=clean_path(str(shared.user_data_dir / 'training/datasets'), f'{text_dataset}.json'))\n\n        if \"text\" not in data['train'].column_names:\n            yield \"Error: text dataset must have a \\\"text\\\" key per row.\"\n            return\n\n        train_data, err = tokenize_text_data(data['train'])\n        if err:\n            yield err\n            return\n\n        if eval_dataset == 'None':\n            eval_data = None\n        else:\n            eval_raw = load_dataset(\"json\", data_files=clean_path(str(shared.user_data_dir / 'training/datasets'), f'{eval_dataset}.json'))\n            if \"text\" not in eval_raw['train'].column_names:\n                yield \"Error: evaluation dataset must have a \\\"text\\\" key per row.\"\n                return\n            eval_data, err = tokenize_text_data(eval_raw['train'])\n            if err:\n                yield err\n                return\n    elif has_chat_dataset:\n        if format in ['None', '']:\n            yield \"Missing format choice input, cannot continue.\"\n            return\n\n        if format == 'Chat Template':\n            if not getattr(shared.tokenizer, 'chat_template', None):\n                yield \"Error: this model's tokenizer does not have a chat template. Select an instruction template instead, or load an instruct/chat model.\"\n                return\n        else:\n            # Load custom instruction template and set on tokenizer\n            template_str = load_template(format)\n            if not template_str:\n                yield f\"Error: could not load instruction template '{format}'.\"\n                return\n            shared.tokenizer.chat_template = template_str\n\n        # Unified path — both cases use tokenize_conversation()\n        train_template[\"template_type\"] = \"chat_template\"\n\n        logger.info(\"Loading JSON dataset with chat template format\")\n        data = load_dataset(\"json\", data_files=clean_path(str(shared.user_data_dir / 'training/datasets'), f'{dataset}.json'))\n\n        # Validate the first row\n        try:\n            normalize_messages(data['train'][0])\n        except (RuntimeError, KeyError, IndexError) as e:\n            yield f\"Error: {e}\"\n            return\n\n        total = len(data['train'])\n        train_data = data['train'].map(\n            tokenize_conversation,\n            remove_columns=data['train'].column_names,\n            new_fingerprint='%030x' % random.randrange(16**30)\n        )\n        train_data = train_data.filter(lambda x: len(x['input_ids']) > 0)\n        dropped = total - len(train_data)\n        if dropped > 0:\n            logger.warning(f\"Dropped {dropped}/{total} conversations exceeding cutoff length of {cutoff_len} tokens.\")\n        if len(train_data) == 0:\n            yield f\"Error: all {total} conversations exceed the cutoff length of {cutoff_len} tokens. Increase the cutoff length or shorten your data.\"\n            return\n\n        if eval_dataset == 'None':\n            eval_data = None\n        else:\n            eval_data = load_dataset(\"json\", data_files=clean_path(str(shared.user_data_dir / 'training/datasets'), f'{eval_dataset}.json'))\n            eval_data = eval_data['train'].map(\n                tokenize_conversation,\n                remove_columns=eval_data['train'].column_names,\n                new_fingerprint='%030x' % random.randrange(16**30)\n            )\n            eval_data = eval_data.filter(lambda x: len(x['input_ids']) > 0)\n    else:\n        yield \"No dataset selected. Choose a Chat Dataset or a Text Dataset.\"\n        return\n\n    # == We MUST reload model if it went through any previous training, even failed one ==\n    if shared.model_dirty_from_training:\n        selected_model = shared.model_name\n        if selected_model:\n            print(\"\\033[1;31;1m(Model has been modified by previous training, it needs to be reloaded...)\\033[0;37;0m\")\n            try:\n                yield f\"Reloading {selected_model}...\"\n                reload_model()\n                if shared.model is not None:\n                    print(\"Model reloaded OK, continue with training.\")\n                else:\n                    yield f\"Failed to load {selected_model}.\"\n                    return\n            except Exception:\n                exc = traceback.format_exc()\n                logger.error('Failed to reload the model.')\n                print(exc)\n                yield exc.replace('\\n', '\\n\\n')\n                return\n\n    # == Start prepping the model itself ==\n    if not hasattr(shared.model, 'lm_head') or hasattr(shared.model.lm_head, 'weight'):\n        logger.info(\"Getting model ready\")\n        if 'quantization_config' in shared.model.config.to_dict():\n            prepare_model_for_kbit_training(shared.model)\n\n    # base model is now frozen and should not be reused for any other LoRA training than this one\n    shared.model_dirty_from_training = True\n\n    logger.info(\"Preparing for training\")\n    target_modules = list_target_modules()\n    if not target_modules:\n        yield \"No target modules selected. Enable at least one module or check 'Target all linear layers'.\"\n        return\n\n    config = LoraConfig(\n        r=lora_rank,\n        lora_alpha=lora_alpha,\n        target_modules=target_modules,\n        lora_dropout=lora_dropout,\n        bias=\"none\",\n        task_type=\"CAUSAL_LM\"\n    )\n\n    # == Backup the existing adapter ==\n    if not always_override:\n        backup_adapter(lora_file_path)\n\n    # == get model trainable params\n    model_trainable_params, model_all_params = calc_trainable_parameters(shared.model)\n\n    # == Determine if we can resume from a checkpoint ==\n    resume_checkpoint = None\n    try:\n        logger.info(\"Creating LoRA model\")\n        lora_model = get_peft_model(shared.model, config)\n        if not always_override and Path(lora_file_path).exists():\n            # Look for HF Trainer checkpoint dirs (full resumption)\n            checkpoints = sorted(Path(lora_file_path).glob(\"checkpoint-*\"), key=os.path.getmtime)\n            if checkpoints:\n                resume_checkpoint = str(checkpoints[-1])\n                logger.info(f\"Will resume from checkpoint: {resume_checkpoint}\")\n            else:\n                # Legacy fallback: load bare adapter weights only\n                safetensors_path = Path(f\"{lora_file_path}/adapter_model.safetensors\")\n                bin_path = Path(f\"{lora_file_path}/adapter_model.bin\")\n                if safetensors_path.is_file():\n                    logger.info(\"Loading existing LoRA data (safetensors)\")\n                    from safetensors.torch import load_file\n                    state_dict_peft = load_file(str(safetensors_path))\n                    set_peft_model_state_dict(lora_model, state_dict_peft)\n                elif bin_path.is_file():\n                    logger.info(\"Loading existing LoRA data (bin)\")\n                    state_dict_peft = torch.load(str(bin_path), weights_only=True)\n                    set_peft_model_state_dict(lora_model, state_dict_peft)\n    except Exception:\n        yield traceback.format_exc().replace('\\n', '\\n\\n')\n        return\n\n    class Tracked():\n        def __init__(self):\n            self.current_steps = 0\n            self.max_steps = 0\n            self.did_save = False\n\n    tracked = Tracked()\n    actual_save_steps = math.ceil(save_steps / gradient_accumulation_steps)\n\n    class Callbacks(transformers.TrainerCallback):\n        def on_step_begin(self, args: transformers.TrainingArguments, state: transformers.TrainerState, control: transformers.TrainerControl, **kwargs):\n            tracked.current_steps = state.global_step * gradient_accumulation_steps\n            tracked.max_steps = state.max_steps * gradient_accumulation_steps\n            if WANT_INTERRUPT:\n                control.should_epoch_stop = True\n                control.should_training_stop = True\n\n        def on_substep_end(self, args: transformers.TrainingArguments, state: transformers.TrainerState, control: transformers.TrainerControl, **kwargs):\n            tracked.current_steps += 1\n            if WANT_INTERRUPT:\n                control.should_epoch_stop = True\n                control.should_training_stop = True\n\n        def on_log(self, args: transformers.TrainingArguments, state: transformers.TrainerState, control: transformers.TrainerControl, logs, **kwargs):\n            train_log.update(logs)\n            train_log.update({\"current_steps\": tracked.current_steps})\n            if WANT_INTERRUPT:\n                print(\"\\033[1;31;1mInterrupted by user\\033[0;37;0m\")\n\n            print(f\"\\033[1;30;40mStep: {tracked.current_steps} \\033[0;37;0m\", end='')\n            if 'loss' in logs:\n                loss = float(logs['loss'])\n                if stop_at_loss > 0 and loss <= stop_at_loss:\n                    control.should_epoch_stop = True\n                    control.should_training_stop = True\n                    print(f\"\\033[1;31;1mStop Loss {stop_at_loss} reached.\\033[0;37;0m\")\n\n        def on_save(self, args: transformers.TrainingArguments, state: transformers.TrainerState, control: transformers.TrainerControl, **kwargs):\n            checkpoint_dir = Path(args.output_dir) / f\"checkpoint-{state.global_step}\"\n            if checkpoint_dir.exists():\n                with open(checkpoint_dir / \"training_log.json\", 'w', encoding='utf-8') as file:\n                    json.dump(train_log, file, indent=2)\n                with open(checkpoint_dir / \"training_prompt.json\", 'w', encoding='utf-8') as file:\n                    json.dump(train_template, file, indent=2)\n\n    # Fix training for mixed precision models\n    for param in shared.model.parameters():\n        if param.requires_grad:\n            param.data = param.data.float()\n\n    lora_model.config.use_cache = False\n\n    def collate_fn(batch):\n        max_len = max(len(item['input_ids']) for item in batch)\n        input_ids, labels, attention_mask = [], [], []\n        for item in batch:\n            pad_len = max_len - len(item['input_ids'])\n            input_ids.append(item['input_ids'] + [shared.tokenizer.pad_token_id] * pad_len)\n            labels.append(item['labels'] + [-100] * pad_len)\n            attention_mask.append(item['attention_mask'] + [0] * pad_len)\n        return {\n            'input_ids': torch.tensor(input_ids),\n            'labels': torch.tensor(labels),\n            'attention_mask': torch.tensor(attention_mask),\n        }\n\n    trainer = transformers.Trainer(\n        model=lora_model,\n        train_dataset=train_data,\n        eval_dataset=eval_data,\n        args=transformers.TrainingArguments(\n            report_to=report_to if report_to != \"None\" else \"none\",\n            per_device_train_batch_size=micro_batch_size,\n            gradient_accumulation_steps=gradient_accumulation_steps,\n            warmup_steps=math.ceil(warmup_steps / gradient_accumulation_steps),\n            num_train_epochs=epochs,\n            learning_rate=actual_lr,\n            fp16=False if shared.args.cpu or shared.args.bf16 else True,\n            bf16=shared.args.bf16,\n            optim=optimizer,\n            logging_steps=1,\n            eval_strategy=\"steps\" if eval_data is not None else \"no\",\n            eval_steps=math.ceil(eval_steps / gradient_accumulation_steps) if eval_data is not None else None,\n            save_strategy=\"steps\" if save_steps > 0 or eval_data is not None else \"no\",\n            save_steps=actual_save_steps if save_steps > 0 else None,\n            output_dir=lora_file_path,\n            lr_scheduler_type=lr_scheduler_type,\n            load_best_model_at_end=eval_data is not None,\n            # TODO: Enable multi-device support\n            ddp_find_unused_parameters=None,\n            use_cpu=shared.args.cpu,\n            remove_unused_columns=False,\n        ),\n        data_collator=collate_fn,\n        callbacks=[Callbacks()]\n    )\n\n    # == Save parameters for reuse ==\n    with open(f\"{lora_file_path}/training_parameters.json\", 'w', encoding='utf-8') as file:\n        local_vars = locals()\n        json.dump({x: local_vars[x] for x in PARAMETERS}, file, indent=2)\n\n    # == Save training prompt ==\n    with open(f\"{lora_file_path}/training_prompt.json\", 'w', encoding='utf-8') as file:\n        json.dump(train_template, file, indent=2)\n\n    # == Main run and monitor loop ==\n    logger.info(\"Starting training\")\n    yield \"Starting...\"\n\n    lora_trainable_param, lora_all_param = calc_trainable_parameters(lora_model)\n\n    if target_modules == \"all-linear\":\n        projections_string = \"all-linear\"\n    else:\n        projections_string = \", \".join([projection.replace(\"_proj\", \"\") for projection in target_modules])\n\n    print(f\"Training '{model_type}' model using ({projections_string}) projections\")\n\n    if lora_all_param > 0:\n        print(f\"Trainable params: {lora_trainable_param:,d} ({100 * lora_trainable_param / lora_all_param:.4f} %), All params: {lora_all_param:,d} (Model: {model_all_params:,d})\")\n\n    train_log.update({\"base_model_name\": shared.model_name})\n    train_log.update({\"base_model_class\": shared.model.__class__.__name__})\n    train_log.update({\"base_loaded_in_4bit\": getattr(lora_model, \"is_loaded_in_4bit\", False)})\n    train_log.update({\"base_loaded_in_8bit\": getattr(lora_model, \"is_loaded_in_8bit\", False)})\n    train_log.update({\"projections\": projections_string})\n\n    if stop_at_loss > 0:\n        print(f\"Monitoring loss \\033[1;31;1m(Auto-Stop at: {stop_at_loss})\\033[0;37;0m\")\n\n    if WANT_INTERRUPT:\n        yield \"Interrupted before start.\"\n        return\n\n    def log_train_dataset(trainer):\n        decoded_entries = []\n        # Try to decode the entries and write the log file\n        try:\n            # Iterate over the first 10 elements in the dataset (or fewer if there are less than 10)\n            for i in range(min(10, len(trainer.train_dataset))):\n                decoded_text = shared.tokenizer.decode(trainer.train_dataset[i]['input_ids'])\n                decoded_entries.append({\"value\": decoded_text})\n\n            # Write the log file\n            (shared.user_data_dir / 'logs').mkdir(exist_ok=True)\n            with open(shared.user_data_dir / 'logs' / 'train_dataset_sample.json', 'w') as json_file:\n                json.dump(decoded_entries, json_file, indent=4)\n\n            logger.info(f\"Log file 'train_dataset_sample.json' created in the '{shared.user_data_dir}/logs' directory.\")\n        except Exception as e:\n            logger.error(f\"Failed to create log file due to error: {e}\")\n\n    thread_error = None\n\n    def threaded_run():\n        nonlocal thread_error\n        try:\n            log_train_dataset(trainer)\n            trainer.train(resume_from_checkpoint=resume_checkpoint)\n            # Note: save in the thread in case the gradio thread breaks (eg browser closed)\n            lora_model.save_pretrained(lora_file_path)\n            tracked.did_save = True\n            logger.info(\"LoRA training run is completed and saved.\")\n            # Save log\n            with open(f\"{lora_file_path}/training_log.json\", 'w', encoding='utf-8') as file:\n                json.dump(train_log, file, indent=2)\n        except Exception as e:\n            thread_error = e\n            logger.error(f\"Training error: {e}\")\n\n    thread = threading.Thread(target=threaded_run)\n    thread.start()\n    last_step = 0\n    start_time = time.perf_counter()\n\n    while thread.is_alive():\n        time.sleep(0.5)\n        if WANT_INTERRUPT:\n            yield \"Interrupting, please wait... *(Run will stop after the current training step completes.)*\"\n\n        elif tracked.current_steps != last_step:\n            last_step = tracked.current_steps\n            time_elapsed = time.perf_counter() - start_time\n            if time_elapsed <= 0:\n                timer_info = \"\"\n                total_time_estimate = 999\n            else:\n                its = tracked.current_steps / time_elapsed\n                if its > 1:\n                    timer_info = f\"`{its:.2f}` it/s\"\n                else:\n                    timer_info = f\"`{1.0/its:.2f}` s/it\"\n\n                total_time_estimate = (1.0 / its) * (tracked.max_steps)\n\n            yield f\"Running... **{tracked.current_steps}** / **{tracked.max_steps}** ... {timer_info}, {format_time(time_elapsed)} / {format_time(total_time_estimate)} ... {format_time(total_time_estimate - time_elapsed)} remaining\"\n\n    # Check for errors from the training thread\n    if thread_error is not None:\n        yield f\"Training failed: {thread_error}\"\n        return\n\n    # Saving in the train thread might fail if an error occurs, so save here if so.\n    if not tracked.did_save:\n        logger.info(\"Training complete, saving\")\n        lora_model.save_pretrained(lora_file_path)\n\n    # Restore the original chat_template if we changed it for training\n    if shared.tokenizer is not None and hasattr(shared.tokenizer, 'chat_template'):\n        shared.tokenizer.chat_template = original_chat_template\n\n    if WANT_INTERRUPT:\n        logger.info(\"Training interrupted.\")\n        yield f\"Interrupted. Incomplete LoRA saved to `{lora_file_path}`.\"\n    else:\n        logger.info(\"Training complete!\")\n        yield f\"Done! LoRA saved to `{lora_file_path}`.\\n\\nBefore testing your new LoRA, make sure to first reload the model, as it is currently dirty from training.\"\n\n\ndef format_time(seconds: float):\n    if seconds < 120:\n        return f\"`{seconds:.0f}` seconds\"\n\n    minutes = seconds / 60\n    if minutes < 120:\n        return f\"`{minutes:.0f}` minutes\"\n\n    hours = minutes / 60\n    return f\"`{hours:.0f}` hours\"\n"
  },
  {
    "path": "modules/transformers_loader.py",
    "content": "import pprint\nfrom pathlib import Path\n\nimport torch\nimport torch.nn.functional as F\nimport transformers\nfrom accelerate import infer_auto_device_map, init_empty_weights\nfrom accelerate.utils import is_xpu_available\nfrom transformers import (\n    AutoConfig,\n    AutoModel,\n    AutoModelForCausalLM,\n    AutoModelForSeq2SeqLM,\n    AutoTokenizer,\n    BitsAndBytesConfig,\n    LogitsProcessor\n)\n\nimport modules.shared as shared\nfrom modules.logging_colors import logger\nfrom modules.text_generation import get_reply_from_output_ids\nfrom modules.torch_utils import get_device\n\ntransformers.logging.set_verbosity_error()\n\n\nclass _StopEverythingStoppingCriteria(transformers.StoppingCriteria):\n    def __init__(self):\n        transformers.StoppingCriteria.__init__(self)\n\n    def __call__(self, input_ids: torch.LongTensor, _scores: torch.FloatTensor) -> bool:\n        return shared.stop_everything\n\n\nclass Stream(transformers.StoppingCriteria):\n    def __init__(self, callback_func=None):\n        self.callback_func = callback_func\n\n    def __call__(self, input_ids, scores) -> bool:\n        if self.callback_func is not None:\n            self.callback_func(input_ids[0])\n\n        return False\n\n\nclass LogitsBiasProcessor(LogitsProcessor):\n    def __init__(self, logit_bias={}):\n        self.logit_bias = logit_bias\n        if self.logit_bias:\n            self.keys = list([int(key) for key in self.logit_bias.keys()])\n            values = [self.logit_bias[str(key)] for key in self.keys]\n            self.values = torch.tensor(values, dtype=torch.float, device=shared.model.device)\n\n    def __call__(self, input_ids: torch.LongTensor, logits: torch.FloatTensor) -> torch.FloatTensor:\n        if self.logit_bias:\n            logits[0, self.keys] += self.values\n\n        return logits\n\n    def __repr__(self):\n        return f\"<{self.__class__.__name__}(logit_bias={self.logit_bias})>\"\n\n\nclass LogprobProcessor(LogitsProcessor):\n    def __init__(self, logprobs=None):\n        self.logprobs = logprobs\n        self.token_alternatives = {}\n        self.token_alternatives_history = []\n\n    def __call__(self, input_ids: torch.LongTensor, logits: torch.FloatTensor) -> torch.FloatTensor:\n        if self.logprobs is not None:  # 0-5\n            log_e_probabilities = F.log_softmax(logits, dim=1)\n            top_values, top_indices = torch.topk(log_e_probabilities, k=self.logprobs)\n            top_tokens = [get_reply_from_output_ids([tok]) for tok in top_indices[0]]\n            top_probs = [float(x) for x in top_values[0]]\n            self.token_alternatives = dict(zip(top_tokens, top_probs))\n            self.token_alternatives_history.append(self.token_alternatives)\n\n        return logits\n\n    def __repr__(self):\n        return f\"<{self.__class__.__name__}(logprobs={self.logprobs}, token_alternatives={self.token_alternatives})>\"\n\n\ndef load_tokenizer(model_name, tokenizer_dir=None):\n    if tokenizer_dir:\n        path_to_model = Path(tokenizer_dir)\n    else:\n        path_to_model = Path(f\"{shared.args.model_dir}/{model_name}/\")\n\n    tokenizer = None\n    if path_to_model.exists():\n        if shared.args.no_use_fast:\n            logger.info('Loading the tokenizer with use_fast=False.')\n\n        tokenizer = AutoTokenizer.from_pretrained(\n            path_to_model,\n            trust_remote_code=shared.original_args.trust_remote_code,\n            use_fast=not shared.args.no_use_fast\n        )\n\n    return tokenizer\n\n\ndef load_model_HF(model_name):\n    torch._dynamo.config.disable = True\n\n    path_to_model = Path(f'{shared.args.model_dir}/{model_name}')\n    params = {\n        'low_cpu_mem_usage': True,\n        'attn_implementation': shared.args.attn_implementation,\n        'torch_dtype': torch.bfloat16 if shared.args.bf16 else torch.float16,\n    }\n\n    if shared.original_args.trust_remote_code:\n        params['trust_remote_code'] = True\n\n    if shared.args.force_safetensors:\n        params['force_safetensors'] = True\n\n    config = AutoConfig.from_pretrained(path_to_model, trust_remote_code=shared.original_args.trust_remote_code)\n\n    if 'chatglm' in model_name.lower():\n        LoaderClass = AutoModel\n    else:\n        if config.to_dict().get('is_encoder_decoder', False):\n            LoaderClass = AutoModelForSeq2SeqLM\n            shared.is_seq2seq = True\n        else:\n            LoaderClass = AutoModelForCausalLM\n\n    # Determine if we should use default loading\n    should_use_default_loading = not any([\n        shared.args.cpu,\n        shared.args.load_in_8bit,\n        shared.args.load_in_4bit,\n        shared.args.disk,\n        shared.args.cpu_memory is not None,\n    ])\n\n    # Load the model without any special settings\n    if should_use_default_loading:\n        params['device_map'] = 'auto'\n\n        logger.info(\"TRANSFORMERS_PARAMS=\")\n        pprint.PrettyPrinter(indent=4, sort_dicts=False).pprint(params)\n        print()\n\n        model = LoaderClass.from_pretrained(path_to_model, **params)\n        if not (hasattr(model, 'is_loaded_in_4bit') and model.is_loaded_in_4bit):\n            device = get_device()\n            if device:\n                model = model.to(device)\n\n    # Load with quantization and/or offloading\n    else:\n        if not any((shared.args.cpu, torch.cuda.is_available(), is_xpu_available(), torch.backends.mps.is_available())):\n            logger.warning('torch.cuda.is_available() and is_xpu_available() returned False. This means that no GPU has been detected. Falling back to CPU mode.')\n            shared.args.cpu = True\n\n        if shared.args.cpu:\n            params['torch_dtype'] = torch.float32\n        else:\n            params['device_map'] = 'auto'\n            if x := get_max_memory_dict():\n                params['max_memory'] = x\n\n            if shared.args.load_in_4bit:\n                # See https://github.com/huggingface/transformers/pull/23479/files\n                # and https://huggingface.co/blog/4bit-transformers-bitsandbytes\n                quantization_config_params = {\n                    'load_in_4bit': True,\n                    'bnb_4bit_compute_dtype': eval(f\"torch.{shared.args.compute_dtype}\") if shared.args.compute_dtype in [\"bfloat16\", \"float16\", \"float32\"] else None,\n                    'bnb_4bit_quant_type': shared.args.quant_type,\n                    'bnb_4bit_use_double_quant': shared.args.use_double_quant,\n                    'llm_int8_enable_fp32_cpu_offload': True\n                }\n                params['quantization_config'] = BitsAndBytesConfig(**quantization_config_params)\n\n            elif shared.args.load_in_8bit:\n                if shared.args.gpu_split:\n                    params['quantization_config'] = BitsAndBytesConfig(load_in_8bit=True, llm_int8_enable_fp32_cpu_offload=True)\n                else:\n                    params['quantization_config'] = BitsAndBytesConfig(load_in_8bit=True)\n\n                if params.get('max_memory') is not None:\n                    with init_empty_weights():\n                        model = LoaderClass.from_config(config, trust_remote_code=params.get('trust_remote_code'))\n\n                    model.tie_weights()\n                    params['device_map'] = infer_auto_device_map(\n                        model,\n                        dtype=torch.int8,\n                        max_memory=params.get('max_memory'),\n                        no_split_module_classes=model._no_split_modules\n                    )\n\n            if shared.args.disk:\n                params['offload_folder'] = str(Path(shared.args.disk_cache_dir))\n\n        logger.info(\"TRANSFORMERS_PARAMS=\")\n        pprint.PrettyPrinter(indent=4, sort_dicts=False).pprint(params)\n        print()\n        model = LoaderClass.from_pretrained(path_to_model, **params)\n\n    return model\n\n\ndef get_max_memory_dict():\n    max_memory = {}\n    if shared.args.cpu_memory > 0:\n        max_memory['cpu'] = f'{shared.args.cpu_memory}GiB'\n\n    if shared.args.gpu_split:\n        for i, memory in enumerate(shared.args.gpu_split.split(',')):\n            max_memory[i] = f'{memory}GiB'\n\n    return max_memory if len(max_memory) > 0 else None\n"
  },
  {
    "path": "modules/ui.py",
    "content": "import copy\nimport threading\nfrom pathlib import Path\n\nimport gradio as gr\nimport yaml\n\nimport extensions\nimport modules.extensions as extensions_module\nfrom modules import shared\nfrom modules.chat import load_history\nfrom modules.utils import gradio\n\n# Global state for auto-saving UI settings with debouncing\n_auto_save_timer = None\n_auto_save_lock = threading.Lock()\n_last_interface_state = None\n_last_preset = None\n_last_extensions = None\n_last_show_controls = None\n_last_theme_state = None\n\nwith open(Path(__file__).resolve().parent / '../css/NotoSans/stylesheet.css', 'r', encoding='utf-8') as f:\n    css = f.read()\nwith open(Path(__file__).resolve().parent / '../css/main.css', 'r', encoding='utf-8') as f:\n    css += f.read()\nwith open(Path(__file__).resolve().parent / '../css/katex/katex.min.css', 'r', encoding='utf-8') as f:\n    css += f.read()\nwith open(Path(__file__).resolve().parent / '../css/highlightjs/highlightjs-copy.min.css', 'r', encoding='utf-8') as f:\n    css += f.read()\nwith open(Path(__file__).resolve().parent / '../js/main.js', 'r', encoding='utf-8') as f:\n    js = f.read()\nwith open(Path(__file__).resolve().parent / '../js/global_scope_js.js', 'r', encoding='utf-8') as f:\n    global_scope_js = f.read()\nwith open(Path(__file__).resolve().parent / '../js/save_files.js', 'r', encoding='utf-8') as f:\n    save_files_js = f.read()\nwith open(Path(__file__).resolve().parent / '../js/switch_tabs.js', 'r', encoding='utf-8') as f:\n    switch_tabs_js = f.read()\nwith open(Path(__file__).resolve().parent / '../js/show_controls.js', 'r', encoding='utf-8') as f:\n    show_controls_js = f.read()\nwith open(Path(__file__).resolve().parent / '../js/update_big_picture.js', 'r', encoding='utf-8') as f:\n    update_big_picture_js = f.read()\nwith open(Path(__file__).resolve().parent / '../js/dark_theme.js', 'r', encoding='utf-8') as f:\n    dark_theme_js = f.read()\n\nrefresh_symbol = '🔄'\ndelete_symbol = '🗑️'\nsave_symbol = '💾'\n\ntheme = gr.themes.Default(\n    font=['Noto Sans', 'Helvetica', 'ui-sans-serif', 'system-ui', 'sans-serif'],\n    font_mono=['IBM Plex Mono', 'ui-monospace', 'Consolas', 'monospace'],\n).set(\n    border_color_primary='#c5c5d2',\n    button_large_padding='6px 12px',\n    body_text_color_subdued='#484848',\n    background_fill_secondary='#eaeaea',\n    background_fill_primary='var(--neutral-50)',\n    body_background_fill=\"white\",\n    block_background_fill=\"#f4f4f4\",\n    body_text_color=\"#333\",\n    button_secondary_background_fill=\"#f4f4f4\",\n    button_secondary_border_color=\"var(--border-color-primary)\"\n)\n\nif not shared.args.old_colors:\n    theme = theme.set(\n        # General Colors\n        border_color_primary='#c5c5d2',\n        body_text_color_subdued='#484848',\n        background_fill_secondary='#eaeaea',\n        background_fill_secondary_dark='var(--selected-item-color-dark, #282930)',\n        background_fill_primary='var(--neutral-50)',\n        background_fill_primary_dark='var(--darker-gray, #1C1C1D)',\n        body_background_fill=\"white\",\n        block_background_fill=\"transparent\",\n        body_text_color='rgb(64, 64, 64)',\n        button_secondary_background_fill=\"white\",\n        button_secondary_border_color=\"var(--border-color-primary)\",\n        input_shadow=\"none\",\n        button_shadow_hover=\"none\",\n\n        # Dark Mode Colors\n        input_background_fill_dark='var(--darker-gray, #1C1C1D)',\n        checkbox_background_color_dark='var(--darker-gray, #1C1C1D)',\n        block_background_fill_dark='transparent',\n        block_border_color_dark='transparent',\n        input_border_color_dark='var(--border-color-dark, #525252)',\n        input_border_color_focus_dark='var(--border-color-dark, #525252)',\n        checkbox_border_color_dark='var(--border-color-dark, #525252)',\n        border_color_primary_dark='var(--border-color-dark, #525252)',\n        button_secondary_border_color_dark='var(--border-color-dark, #525252)',\n        body_background_fill_dark='var(--dark-gray, #212125)',\n        button_primary_background_fill_dark='transparent',\n        button_secondary_background_fill_dark='transparent',\n        checkbox_label_background_fill_dark='transparent',\n        button_cancel_background_fill_dark='transparent',\n        button_secondary_background_fill_hover_dark='var(--selected-item-color-dark, #282930)',\n        checkbox_label_background_fill_hover_dark='var(--selected-item-color-dark, #282930)',\n        table_even_background_fill_dark='var(--darker-gray, #1C1C1D)',\n        table_odd_background_fill_dark='var(--selected-item-color-dark, #282930)',\n        code_background_fill_dark='var(--darker-gray, #1C1C1D)',\n\n        # Shadows and Radius\n        checkbox_label_shadow='none',\n        block_shadow='none',\n        block_shadow_dark='none',\n        input_shadow_focus='none',\n        input_shadow_focus_dark='none',\n        button_large_radius='0.375rem',\n        button_large_padding='6px 12px',\n        input_radius='0.375rem',\n        block_radius='0',\n    )\n\nif (shared.user_data_dir / \"notification.mp3\").exists():\n    audio_notification_js = \"document.querySelector('#audio_notification audio')?.play();\"\nelse:\n    audio_notification_js = \"\"\n\n\ndef list_model_elements():\n    from modules.loaders import list_model_elements\n    return list_model_elements()\n\n\ndef list_interface_input_elements():\n    elements = [\n        'temperature',\n        'dynatemp_low',\n        'dynatemp_high',\n        'dynatemp_exponent',\n        'smoothing_factor',\n        'smoothing_curve',\n        'min_p',\n        'top_p',\n        'top_k',\n        'typical_p',\n        'xtc_threshold',\n        'xtc_probability',\n        'epsilon_cutoff',\n        'eta_cutoff',\n        'tfs',\n        'top_a',\n        'top_n_sigma',\n        'adaptive_target',\n        'adaptive_decay',\n        'dry_multiplier',\n        'dry_allowed_length',\n        'dry_base',\n        'repetition_penalty',\n        'frequency_penalty',\n        'presence_penalty',\n        'encoder_repetition_penalty',\n        'no_repeat_ngram_size',\n        'repetition_penalty_range',\n        'penalty_alpha',\n        'guidance_scale',\n        'mirostat_mode',\n        'mirostat_tau',\n        'mirostat_eta',\n        'max_new_tokens',\n        'prompt_lookup_num_tokens',\n        'max_tokens_second',\n        'do_sample',\n        'dynamic_temperature',\n        'temperature_last',\n        'auto_max_new_tokens',\n        'ban_eos_token',\n        'add_bos_token',\n        'enable_thinking',\n        'reasoning_effort',\n        'skip_special_tokens',\n        'stream',\n        'static_cache',\n        'truncation_length',\n        'seed',\n        'sampler_priority',\n        'custom_stopping_strings',\n        'custom_token_bans',\n        'negative_prompt',\n        'dry_sequence_breakers',\n        'grammar_string',\n        'navigate_message_index',\n        'navigate_direction',\n        'navigate_message_role',\n        'edit_message_index',\n        'edit_message_text',\n        'edit_message_role',\n        'branch_index',\n        'enable_web_search',\n        'web_search_pages',\n    ]\n\n    # Chat elements\n    elements += [\n        'history',\n        'search_chat',\n        'unique_id',\n        'textbox',\n        'start_with',\n        'selected_tools',\n        'mode',\n        'chat_style',\n        'chat-instruct_command',\n        'character_menu',\n        'user_menu',\n        'name2',\n        'context',\n        'greeting',\n        'name1',\n        'user_bio',\n        'custom_system_message',\n        'instruction_template_str',\n        'chat_template_str',\n    ]\n\n    # Notebook/default elements\n    elements += [\n        'textbox-default',\n        'textbox-notebook',\n        'prompt_menu-default',\n        'prompt_menu-notebook',\n        'output_textbox',\n    ]\n\n    # Model elements\n    elements += list_model_elements()\n\n    # Other elements\n    elements += [\n        'show_two_notebook_columns',\n        'paste_to_attachment',\n        'include_past_attachments',\n    ]\n\n    if not shared.args.portable:\n        # Image generation elements\n        elements += [\n            'image_prompt',\n            'image_neg_prompt',\n            'image_width',\n            'image_height',\n            'image_aspect_ratio',\n            'image_steps',\n            'image_cfg_scale',\n            'image_seed',\n            'image_batch_size',\n            'image_batch_count',\n            'image_llm_variations',\n            'image_llm_variations_prompt',\n            'image_model_menu',\n            'image_dtype',\n            'image_attn_backend',\n            'image_compile',\n            'image_cpu_offload',\n            'image_quant',\n        ]\n\n    return elements\n\n\ndef gather_interface_values(*args):\n    interface_elements = list_interface_input_elements()\n\n    output = {}\n    for element, value in zip(interface_elements, args):\n        output[element] = value\n\n    if not shared.args.multi_user:\n        shared.persistent_interface_state = output\n\n        # Remove the chat input, as it gets cleared after this function call\n        shared.persistent_interface_state.pop('textbox')\n\n    # Prevent history loss if backend is restarted but UI is not refreshed\n    if (output['history'] is None or (len(output['history'].get('visible', [])) == 0 and len(output['history'].get('internal', [])) == 0)) and output['unique_id'] is not None:\n        output['history'] = load_history(output['unique_id'], output['character_menu'], output['mode'])\n\n    return output\n\n\ndef apply_interface_values(state, use_persistent=False):\n    if use_persistent:\n        state = shared.persistent_interface_state\n        if 'textbox-default' in state and 'prompt_menu-default' in state:\n            state.pop('prompt_menu-default')\n\n        if 'textbox-notebook' in state and 'prompt_menu-notebook' in state:\n            state.pop('prompt_menu-notebook')\n\n    elements = list_interface_input_elements()\n\n    if len(state) == 0:\n        return [gr.update() for k in elements]  # Dummy, do nothing\n    else:\n        return [state[k] if k in state else gr.update() for k in elements]\n\n\ndef save_settings(state, preset, extensions_list, show_controls, theme_state, manual_save=False):\n    output = copy.deepcopy(shared.settings)\n    exclude = []\n    for k in state:\n        if k in shared.settings and k not in exclude:\n            output[k] = state[k]\n\n    if preset:\n        output['preset'] = preset\n    output['prompt-notebook'] = state['prompt_menu-default'] if state['show_two_notebook_columns'] else state['prompt_menu-notebook']\n    if state.get('character_menu'):\n        output['character'] = state['character_menu']\n    if state.get('user_menu'):\n        output['user'] = state['user_menu']\n    output['seed'] = int(output['seed'])\n    output['custom_stopping_strings'] = output.get('custom_stopping_strings') or ''\n    output['custom_token_bans'] = output.get('custom_token_bans') or ''\n    output['show_controls'] = show_controls\n    output['dark_theme'] = True if theme_state == 'dark' else False\n    output.pop('instruction_template_str')\n    output.pop('truncation_length')\n\n    # Handle extensions and extension parameters\n    if manual_save:\n        # Save current extensions and their parameter values\n        output['default_extensions'] = extensions_list\n\n        for extension_name in extensions_list:\n            extension = getattr(extensions, extension_name, None)\n            if extension:\n                extension = extension.script\n                if hasattr(extension, 'params'):\n                    params = getattr(extension, 'params')\n                    for param in params:\n                        _id = f\"{extension_name}-{param}\"\n                        # Only save if different from default value\n                        if param not in shared.default_settings or params[param] != shared.default_settings[param]:\n                            output[_id] = params[param]\n    else:\n        # Preserve existing extensions and extension parameters during autosave\n        settings_path = shared.user_data_dir / 'settings.yaml'\n        if settings_path.exists():\n            try:\n                with open(settings_path, 'r', encoding='utf-8') as f:\n                    existing_settings = yaml.safe_load(f.read()) or {}\n\n                # Preserve default_extensions\n                if 'default_extensions' in existing_settings:\n                    output['default_extensions'] = existing_settings['default_extensions']\n\n                # Preserve extension parameter values\n                for key, value in existing_settings.items():\n                    if any(key.startswith(f\"{ext_name}-\") for ext_name in extensions_module.available_extensions):\n                        output[key] = value\n            except Exception:\n                pass  # If we can't read the file, just don't modify extensions\n\n    # Do not save unchanged settings\n    for key in list(output.keys()):\n        if key in shared.default_settings and output[key] == shared.default_settings[key]:\n            output.pop(key)\n\n    return yaml.dump(output, sort_keys=False, width=float(\"inf\"), allow_unicode=True)\n\n\ndef store_current_state_and_debounce(interface_state, preset, extensions, show_controls, theme_state):\n    \"\"\"Store current state and trigger debounced save\"\"\"\n    global _auto_save_timer, _last_interface_state, _last_preset, _last_extensions, _last_show_controls, _last_theme_state\n\n    if shared.args.multi_user:\n        return\n\n    # Store the current state in global variables\n    _last_interface_state = interface_state\n    _last_preset = preset\n    _last_extensions = extensions\n    _last_show_controls = show_controls\n    _last_theme_state = theme_state\n\n    # Reset the debounce timer\n    with _auto_save_lock:\n        if _auto_save_timer is not None:\n            _auto_save_timer.cancel()\n\n        _auto_save_timer = threading.Timer(1.0, _perform_debounced_save)\n        _auto_save_timer.start()\n\n\ndef _perform_debounced_save():\n    \"\"\"Actually perform the save using the stored state\"\"\"\n    global _auto_save_timer\n\n    try:\n        if _last_interface_state is not None:\n            contents = save_settings(_last_interface_state, _last_preset, _last_extensions, _last_show_controls, _last_theme_state, manual_save=False)\n            settings_path = shared.user_data_dir / 'settings.yaml'\n            settings_path.parent.mkdir(exist_ok=True)\n            with open(settings_path, 'w', encoding='utf-8') as f:\n                f.write(contents)\n    except Exception as e:\n        print(f\"Auto-save failed: {e}\")\n    finally:\n        with _auto_save_lock:\n            _auto_save_timer = None\n\n\ndef setup_auto_save():\n    \"\"\"Attach auto-save to key UI elements\"\"\"\n    if shared.args.multi_user:\n        return\n\n    change_elements = [\n        # Chat tab (ui_chat.py)\n        'start_with',\n        'enable_web_search',\n        'web_search_pages',\n        'mode',\n        'chat_style',\n        'chat-instruct_command',\n        'character_menu',\n        'user_menu',\n        'name1',\n        'name2',\n        'context',\n        'greeting',\n        'user_bio',\n        'custom_system_message',\n        'chat_template_str',\n        'selected_tools',\n\n        # Parameters tab (ui_parameters.py) - Generation parameters\n        'preset_menu',\n        'temperature',\n        'dynatemp_low',\n        'dynatemp_high',\n        'dynatemp_exponent',\n        'smoothing_factor',\n        'smoothing_curve',\n        'min_p',\n        'top_p',\n        'top_k',\n        'typical_p',\n        'xtc_threshold',\n        'xtc_probability',\n        'epsilon_cutoff',\n        'eta_cutoff',\n        'tfs',\n        'top_a',\n        'top_n_sigma',\n        'adaptive_target',\n        'adaptive_decay',\n        'dry_multiplier',\n        'dry_allowed_length',\n        'dry_base',\n        'repetition_penalty',\n        'frequency_penalty',\n        'presence_penalty',\n        'encoder_repetition_penalty',\n        'no_repeat_ngram_size',\n        'repetition_penalty_range',\n        'penalty_alpha',\n        'guidance_scale',\n        'mirostat_mode',\n        'mirostat_tau',\n        'mirostat_eta',\n        'max_new_tokens',\n        'prompt_lookup_num_tokens',\n        'max_tokens_second',\n        'do_sample',\n        'dynamic_temperature',\n        'temperature_last',\n        'auto_max_new_tokens',\n        'ban_eos_token',\n        'add_bos_token',\n        'enable_thinking',\n        'reasoning_effort',\n        'skip_special_tokens',\n        'stream',\n        'static_cache',\n        'seed',\n        'sampler_priority',\n        'custom_stopping_strings',\n        'custom_token_bans',\n        'negative_prompt',\n        'dry_sequence_breakers',\n        'grammar_string',\n\n        # Default tab (ui_default.py)\n        'prompt_menu-default',\n\n        # Notebook tab (ui_notebook.py)\n        'prompt_menu-notebook',\n\n        # Session tab (ui_session.py)\n        'show_controls',\n        'theme_state',\n        'show_two_notebook_columns',\n        'paste_to_attachment',\n        'include_past_attachments',\n\n    ]\n\n    if not shared.args.portable:\n        # Image generation tab (ui_image_generation.py)\n        change_elements += [\n            'image_prompt',\n            'image_neg_prompt',\n            'image_width',\n            'image_height',\n            'image_aspect_ratio',\n            'image_steps',\n            'image_cfg_scale',\n            'image_seed',\n            'image_batch_size',\n            'image_batch_count',\n            'image_llm_variations',\n            'image_llm_variations_prompt',\n            'image_model_menu',\n            'image_dtype',\n            'image_attn_backend',\n            'image_compile',\n            'image_cpu_offload',\n            'image_quant',\n        ]\n\n    for element_name in change_elements:\n        if element_name in shared.gradio:\n            shared.gradio[element_name].change(\n                gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n                store_current_state_and_debounce, gradio('interface_state', 'preset_menu', 'extensions_menu', 'show_controls', 'theme_state'), None, show_progress=False)\n\n\ndef create_refresh_button(refresh_component, refresh_method, refreshed_args, elem_class, interactive=True):\n    \"\"\"\n    Copied from https://github.com/AUTOMATIC1111/stable-diffusion-webui\n    \"\"\"\n    def refresh():\n        refresh_method()\n        args = refreshed_args() if callable(refreshed_args) else refreshed_args\n\n        return gr.update(**(args or {}))\n\n    refresh_button = gr.Button(refresh_symbol, elem_classes=elem_class, interactive=interactive)\n    refresh_button.click(\n        fn=lambda: {k: tuple(v) if type(k) is list else v for k, v in refresh().items()},\n        inputs=[],\n        outputs=[refresh_component]\n    )\n\n    return refresh_button\n"
  },
  {
    "path": "modules/ui_chat.py",
    "content": "import json\nfrom functools import partial\nfrom pathlib import Path\n\nimport gradio as gr\nfrom PIL import Image\n\nfrom modules import chat, shared, ui, utils\nfrom modules.html_generator import chat_html_wrapper\nfrom modules.text_generation import stop_everything_event\nfrom modules.utils import gradio\n\ninputs = ('Chat input', 'interface_state')\nreload_arr = ('history', 'name1', 'name2', 'mode', 'chat_style', 'character_menu')\n\n\ndef create_ui():\n    mu = shared.args.multi_user\n\n    shared.gradio['Chat input'] = gr.State()\n    shared.gradio['history'] = gr.State({'internal': [], 'visible': [], 'metadata': {}})\n    shared.gradio['display'] = gr.Headless(value={})\n\n    with gr.Tab('Chat', elem_id='chat-tab'):\n        with gr.Row(elem_id='past-chats-row', elem_classes=['pretty_scrollbar']):\n            with gr.Column():\n                with gr.Row(elem_id='past-chats-buttons'):\n                    shared.gradio['branch_chat'] = gr.Button('Branch', elem_classes=['refresh-button', 'refresh-button-medium'], elem_id='Branch', interactive=not mu)\n                    shared.gradio['rename_chat'] = gr.Button('Rename', elem_classes=['refresh-button', 'refresh-button-medium'], interactive=not mu)\n                    shared.gradio['delete_chat'] = gr.Button('🗑️', visible=False, elem_classes='refresh-button', interactive=not mu, elem_id='delete_chat')\n                    shared.gradio['Start new chat'] = gr.Button('New chat', elem_classes=['refresh-button', 'refresh-button-medium', 'focus-on-chat-input'], elem_id='new-chat-btn')\n                    shared.gradio['Start incognito chat'] = gr.Button('Incognito chat', visible=False, elem_id='incognito-chat-btn')\n                    shared.gradio['branch_index'] = gr.Number(value=-1, precision=0, visible=False, elem_id=\"Branch-index\", interactive=True)\n\n                shared.gradio['search_chat'] = gr.Textbox(placeholder='Search chats...', max_lines=1, elem_id='search_chat')\n\n                with gr.Row(elem_id='delete-chat-row', visible=False) as shared.gradio['delete-chat-row']:\n                    shared.gradio['delete_chat-cancel'] = gr.Button('Cancel', elem_classes=['refresh-button', 'focus-on-chat-input'], elem_id='delete_chat-cancel')\n                    shared.gradio['delete_chat-confirm'] = gr.Button('Confirm', variant='stop', elem_classes=['refresh-button', 'focus-on-chat-input'], elem_id='delete_chat-confirm')\n\n                with gr.Row(elem_id='rename-row', visible=False) as shared.gradio['rename-row']:\n                    shared.gradio['rename_to'] = gr.Textbox(label='Rename to:', placeholder='New name', elem_classes=['no-background'])\n                    with gr.Row():\n                        shared.gradio['rename_to-cancel'] = gr.Button('Cancel', elem_classes=['refresh-button', 'focus-on-chat-input'])\n                        shared.gradio['rename_to-confirm'] = gr.Button('Confirm', elem_classes=['refresh-button', 'focus-on-chat-input'], variant='primary')\n\n                with gr.Row():\n                    shared.gradio['unique_id'] = gr.Radio(label=\"\", elem_classes=['slim-dropdown', 'pretty_scrollbar'], interactive=not mu, elem_id='past-chats')\n\n        with gr.Row():\n            with gr.Column(elem_id='chat-col'):\n                shared.gradio['html_display'] = gr.HTML(value=chat_html_wrapper({'internal': [], 'visible': [], 'metadata': {}}, '', '', 'chat', 'cai-chat', '')['html'], visible=True)\n                with gr.Row(elem_id=\"chat-input-row\"):\n                    with gr.Column(scale=1, elem_id='gr-hover-container'):\n                        gr.HTML(value='<div class=\"hover-element\" onclick=\"void(0)\"><span style=\"width: 100px; display: block\" id=\"hover-element-button\">&#9776;</span><div class=\"hover-menu\" id=\"hover-menu\"></div>', elem_id='gr-hover')\n\n                    with gr.Column(scale=10, elem_id='chat-input-container'):\n                        shared.gradio['textbox'] = gr.MultimodalTextbox(label='', placeholder='Send a message', file_types=['text', '.pdf', 'image'], file_count=\"multiple\", elem_id='chat-input', elem_classes=['add_scrollbar'])\n                        shared.gradio['typing-dots'] = gr.HTML(value='<div class=\"typing\"><span></span><span class=\"dot1\"></span><span class=\"dot2\"></span></div>', label='typing', elem_id='typing-container')\n\n                    with gr.Column(scale=1, elem_id='generate-stop-container'):\n                        with gr.Row():\n                            shared.gradio['Stop'] = gr.Button('Stop', elem_id='stop', visible=False)\n                            shared.gradio['Generate'] = gr.Button('Send', elem_id='Generate', variant='primary')\n\n        # Hover menu buttons\n        with gr.Column(elem_id='chat-buttons'):\n            shared.gradio['Regenerate'] = gr.Button('Regenerate (Ctrl + Enter)', elem_id='Regenerate')\n            shared.gradio['Continue'] = gr.Button('Continue (Alt + Enter)', elem_id='Continue')\n            shared.gradio['Remove last'] = gr.Button('Remove last reply (Ctrl + Shift + Backspace)', elem_id='Remove-last')\n            shared.gradio['Impersonate'] = gr.Button('Impersonate (Ctrl + Shift + M)', elem_id='Impersonate')\n            shared.gradio['Send dummy message'] = gr.Button('Send dummy message')\n            shared.gradio['Send dummy reply'] = gr.Button('Send dummy reply')\n            shared.gradio['send-chat-to-notebook'] = gr.Button('Send to Notebook')\n            shared.gradio['show_controls'] = gr.Checkbox(value=shared.settings['show_controls'], label='Show controls (Ctrl+S)', elem_id='show-controls')\n\n        with gr.Row(elem_id='chat-controls', elem_classes=['pretty_scrollbar']):\n            with gr.Column():\n                with gr.Row():\n                    shared.gradio['start_with'] = gr.Textbox(label='Start reply with', placeholder='Sure thing!', value=shared.settings['start_with'], elem_classes=['add_scrollbar'])\n\n                gr.HTML(\"<div class='sidebar-vertical-separator'></div>\")\n\n                shared.gradio['reasoning_effort'] = gr.Dropdown(value=shared.settings['reasoning_effort'], choices=['low', 'medium', 'high'], label='Reasoning effort', info='Used by GPT-OSS.')\n                shared.gradio['enable_thinking'] = gr.Checkbox(value=shared.settings['enable_thinking'], label='Enable thinking', info='Used by Seed-OSS and pre-2507 Qwen3.')\n\n                gr.HTML(\"<div class='sidebar-vertical-separator'></div>\")\n\n                shared.gradio['enable_web_search'] = gr.Checkbox(value=shared.settings.get('enable_web_search', False), label='Activate web search', elem_id='web-search')\n                with gr.Row(visible=shared.settings.get('enable_web_search', False)) as shared.gradio['web_search_row']:\n                    shared.gradio['web_search_pages'] = gr.Number(value=shared.settings.get('web_search_pages', 3), precision=0, label='Number of pages to download', minimum=1, maximum=10)\n\n                gr.HTML(\"<div class='sidebar-vertical-separator'></div>\")\n\n                from modules.tool_use import get_available_tools\n                shared.gradio['selected_tools'] = gr.CheckboxGroup(choices=get_available_tools(), value=shared.settings.get('selected_tools', []), label='Tools', info='Functions the model can call during generation.', elem_id='tools-group')\n                shared.gradio['tools_refresh'] = gr.Button('Refresh list', elem_id='tools-refresh-btn', visible=False)\n                shared.gradio['tools_refresh'].click(fn=lambda: gr.update(choices=get_available_tools()), inputs=[], outputs=[shared.gradio['selected_tools']])\n\n                def sync_web_tools(selected):\n                    if 'web_search' in selected and 'fetch_webpage' not in selected and 'fetch_webpage' in get_available_tools():\n                        selected.append('fetch_webpage')\n\n                    return gr.update(value=selected)\n\n                shared.gradio['selected_tools'].change(fn=sync_web_tools, inputs=[shared.gradio['selected_tools']], outputs=[shared.gradio['selected_tools']], show_progress=False)\n\n                gr.HTML(\"<div class='sidebar-vertical-separator'></div>\")\n\n                with gr.Row():\n                    shared.gradio['mode'] = gr.Radio(choices=['instruct', 'chat-instruct', 'chat'], value=None, label='Mode', info='In instruct and chat-instruct modes, the template under Parameters > Instruction template is used.', elem_id='chat-mode')\n\n                with gr.Row():\n                    shared.gradio['chat_style'] = gr.Dropdown(choices=utils.get_available_chat_styles(), label='Chat style', value=shared.settings['chat_style'], visible=shared.settings['mode'] != 'instruct')\n\n                with gr.Row():\n                    shared.gradio['chat-instruct_command'] = gr.Textbox(value=shared.settings['chat-instruct_command'], lines=12, label='Command for chat-instruct mode', info='<|character|> and <|prompt|> get replaced with the bot name and the regular chat prompt respectively.', visible=shared.settings['mode'] == 'chat-instruct', elem_classes=['add_scrollbar'])\n\n                gr.HTML(\"<div class='sidebar-vertical-separator'></div>\")\n\n                with gr.Row():\n                    shared.gradio['count_tokens'] = gr.Button('Count tokens', size='sm')\n\n                shared.gradio['token_display'] = gr.HTML(value='', elem_classes='token-display')\n\n        # Hidden elements for version navigation and editing\n        with gr.Row(visible=False):\n            shared.gradio['navigate_message_index'] = gr.Number(value=-1, precision=0, elem_id=\"Navigate-message-index\")\n            shared.gradio['navigate_direction'] = gr.Textbox(value=\"\", elem_id=\"Navigate-direction\")\n            shared.gradio['navigate_message_role'] = gr.Textbox(value=\"\", elem_id=\"Navigate-message-role\")\n            shared.gradio['navigate_version'] = gr.Button(elem_id=\"Navigate-version\")\n            shared.gradio['edit_message_index'] = gr.Number(value=-1, precision=0, elem_id=\"Edit-message-index\")\n            shared.gradio['edit_message_text'] = gr.Textbox(value=\"\", elem_id=\"Edit-message-text\")\n            shared.gradio['edit_message_role'] = gr.Textbox(value=\"\", elem_id=\"Edit-message-role\")\n            shared.gradio['edit_message'] = gr.Button(elem_id=\"Edit-message\")\n\n\ndef create_character_settings_ui():\n    mu = shared.args.multi_user\n    with gr.Tab('Character', elem_id=\"character-tab\"):\n        with gr.Row():\n            with gr.Column(scale=8):\n                with gr.Tab(\"Character\"):\n                    with gr.Row():\n                        shared.gradio['character_menu'] = gr.Dropdown(value=shared.settings['character'], choices=utils.get_available_characters(), label='Character', elem_id='character-menu', info='Used in chat and chat-instruct modes.', elem_classes='slim-dropdown')\n                        ui.create_refresh_button(shared.gradio['character_menu'], lambda: None, lambda: {'choices': utils.get_available_characters()}, 'refresh-button', interactive=not mu)\n                        shared.gradio['save_character'] = gr.Button('💾', elem_classes='refresh-button', elem_id=\"save-character\", interactive=not mu)\n                        shared.gradio['delete_character'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)\n                        shared.gradio['restore_character'] = gr.Button('Restore character', elem_classes='refresh-button', interactive=True, elem_id='restore-character')\n\n                    shared.gradio['name2'] = gr.Textbox(value=shared.settings['name2'], lines=1, label='Character\\'s name')\n                    shared.gradio['context'] = gr.Textbox(value=shared.settings['context'], lines=10, label='Context', elem_classes=['add_scrollbar'], elem_id=\"character-context\")\n                    shared.gradio['greeting'] = gr.Textbox(value=shared.settings['greeting'], lines=5, label='Greeting', elem_classes=['add_scrollbar'], elem_id=\"character-greeting\")\n\n                with gr.Tab(\"User\"):\n                    with gr.Row():\n                        shared.gradio['user_menu'] = gr.Dropdown(value=shared.settings['user'], choices=utils.get_available_users(), label='User', elem_id='user-menu', info='Select a user profile.', elem_classes='slim-dropdown')\n                        ui.create_refresh_button(shared.gradio['user_menu'], lambda: None, lambda: {'choices': utils.get_available_users()}, 'refresh-button', interactive=not mu)\n                        shared.gradio['save_user'] = gr.Button('💾', elem_classes='refresh-button', elem_id=\"save-user\", interactive=not mu)\n                        shared.gradio['delete_user'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)\n\n                    shared.gradio['name1'] = gr.Textbox(value=shared.settings['name1'], lines=1, label='Name')\n                    shared.gradio['user_bio'] = gr.Textbox(value=shared.settings['user_bio'], lines=10, label='Description', info='Here you can optionally write a description of yourself.', placeholder='{{user}}\\'s personality: ...', elem_classes=['add_scrollbar'], elem_id=\"user-description\")\n\n                with gr.Tab('Chat history'):\n                    with gr.Row():\n                        with gr.Column():\n                            shared.gradio['save_chat_history'] = gr.Button(value='Save history')\n\n                        with gr.Column():\n                            shared.gradio['load_chat_history'] = gr.File(type='binary', file_types=['.json', '.txt'], label='Upload History JSON')\n\n                with gr.Tab('Upload character'):\n                    with gr.Tab('YAML or JSON'):\n                        with gr.Row():\n                            shared.gradio['upload_json'] = gr.File(type='binary', file_types=['.json', '.yaml'], label='JSON or YAML File', interactive=not mu)\n                            shared.gradio['upload_img_bot'] = gr.Image(type='filepath', label='Profile Picture (optional)', interactive=not mu)\n\n                        shared.gradio['Submit character'] = gr.Button(value='Submit', interactive=False)\n\n                    with gr.Tab('TavernAI PNG'):\n                        with gr.Row():\n                            with gr.Column():\n                                shared.gradio['upload_img_tavern'] = gr.Image(type='filepath', label='TavernAI PNG File', elem_id='upload_img_tavern', interactive=not mu)\n                                shared.gradio['tavern_json'] = gr.State()\n                            with gr.Column():\n                                shared.gradio['tavern_name'] = gr.Textbox(value='', lines=1, label='Name', interactive=False)\n                                shared.gradio['tavern_desc'] = gr.Textbox(value='', lines=10, label='Description', interactive=False, elem_classes=['add_scrollbar'])\n\n                        shared.gradio['Submit tavern character'] = gr.Button(value='Submit', interactive=False)\n\n            with gr.Column(scale=1):\n                shared.gradio['character_picture'] = gr.Image(label='Character picture', type='filepath', interactive=not mu)\n                shared.gradio['your_picture'] = gr.Image(label='Your picture', type='filepath', value=Image.open(shared.user_data_dir / 'cache' / 'pfp_me.png') if (shared.user_data_dir / 'cache' / 'pfp_me.png').exists() else None, interactive=not mu)\n\n\ndef create_chat_settings_ui():\n    mu = shared.args.multi_user\n    with gr.Tab('Instruction template'):\n        with gr.Row():\n            with gr.Column():\n                with gr.Row():\n                    shared.gradio['instruction_template'] = gr.Dropdown(choices=utils.get_available_instruction_templates(), label='Saved instruction templates', info=\"After selecting the template, click on \\\"Load\\\" to load and apply it.\", value='None', elem_classes='slim-dropdown')\n                    ui.create_refresh_button(shared.gradio['instruction_template'], lambda: None, lambda: {'choices': utils.get_available_instruction_templates()}, 'refresh-button', interactive=not mu)\n                    shared.gradio['load_template'] = gr.Button(\"Load\", elem_classes='refresh-button')\n                    shared.gradio['save_template'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)\n                    shared.gradio['delete_template'] = gr.Button('🗑️ ', elem_classes='refresh-button', interactive=not mu)\n\n            with gr.Column():\n                pass\n\n        with gr.Row():\n            with gr.Column():\n                shared.gradio['instruction_template_str'] = gr.Textbox(value=shared.settings['instruction_template_str'], label='Instruction template', lines=24, info='This gets autodetected; you usually don\\'t need to change it. Used in instruct and chat-instruct modes.', elem_classes=['add_scrollbar', 'monospace'], elem_id='instruction-template-str')\n                with gr.Row():\n                    shared.gradio['send_instruction_to_notebook'] = gr.Button('Send to notebook', elem_classes=['small-button'])\n\n            with gr.Column():\n                shared.gradio['chat_template_str'] = gr.Textbox(value=shared.settings['chat_template_str'], label='Chat template', lines=22, elem_classes=['add_scrollbar', 'monospace'], info='Defines how the chat prompt in chat/chat-instruct modes is generated.', elem_id='chat-template-str')\n\n\ndef create_event_handlers():\n\n    # Obsolete variables, kept for compatibility with old extensions\n    shared.input_params = gradio(inputs)\n    shared.reload_inputs = gradio(reload_arr)\n\n    # Morph HTML updates instead of updating everything\n    shared.gradio['display'].change(None, gradio('display'), None, js=\"(data) => handleMorphdomUpdate(data)\")\n\n    shared.gradio['Generate'].click(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        lambda x: (x, {\"text\": \"\", \"files\": []}), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then(\n        lambda: None, None, None, js='() => document.getElementById(\"chat\").parentNode.parentNode.parentNode.classList.add(\"_generating\")').then(\n        chat.generate_chat_reply_wrapper, gradio(inputs), gradio('display', 'history'), show_progress=False).then(\n        None, None, None, js='() => document.getElementById(\"chat\").parentNode.parentNode.parentNode.classList.remove(\"_generating\")').then(\n        None, None, None, js=f'() => {{{ui.audio_notification_js}}}')\n\n    shared.gradio['textbox'].submit(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        lambda x: (x, {\"text\": \"\", \"files\": []}), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then(\n        lambda: None, None, None, js='() => document.getElementById(\"chat\").parentNode.parentNode.parentNode.classList.add(\"_generating\")').then(\n        chat.generate_chat_reply_wrapper, gradio(inputs), gradio('display', 'history'), show_progress=False).then(\n        None, None, None, js='() => document.getElementById(\"chat\").parentNode.parentNode.parentNode.classList.remove(\"_generating\")').then(\n        None, None, None, js=f'() => {{{ui.audio_notification_js}}}')\n\n    shared.gradio['Regenerate'].click(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        lambda: None, None, None, js='() => document.getElementById(\"chat\").parentNode.parentNode.parentNode.classList.add(\"_generating\")').then(\n        partial(chat.generate_chat_reply_wrapper, regenerate=True), gradio(inputs), gradio('display', 'history'), show_progress=False).then(\n        None, None, None, js='() => document.getElementById(\"chat\").parentNode.parentNode.parentNode.classList.remove(\"_generating\")').then(\n        None, None, None, js=f'() => {{{ui.audio_notification_js}}}')\n\n    shared.gradio['Continue'].click(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        lambda: None, None, None, js='() => document.getElementById(\"chat\").parentNode.parentNode.parentNode.classList.add(\"_generating\")').then(\n        partial(chat.generate_chat_reply_wrapper, _continue=True), gradio(inputs), gradio('display', 'history'), show_progress=False).then(\n        None, None, None, js='() => document.getElementById(\"chat\").parentNode.parentNode.parentNode.classList.remove(\"_generating\")').then(\n        None, None, None, js=f'() => {{{ui.audio_notification_js}}}')\n\n    shared.gradio['Impersonate'].click(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        lambda x: x, gradio('textbox'), gradio('Chat input'), show_progress=False).then(\n        lambda: None, None, None, js='() => document.getElementById(\"chat\").parentNode.parentNode.parentNode.classList.add(\"_generating\")').then(\n        chat.impersonate_wrapper, gradio(inputs), gradio('textbox', 'display'), show_progress=False).then(\n        None, None, None, js='() => document.getElementById(\"chat\").parentNode.parentNode.parentNode.classList.remove(\"_generating\")').then(\n        None, None, None, js=f'() => {{{ui.audio_notification_js}}}')\n\n    shared.gradio['Send dummy message'].click(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        chat.handle_send_dummy_message_click, gradio('textbox', 'interface_state'), gradio('history', 'display', 'textbox'), show_progress=False)\n\n    shared.gradio['Send dummy reply'].click(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        chat.handle_send_dummy_reply_click, gradio('textbox', 'interface_state'), gradio('history', 'display', 'textbox'), show_progress=False)\n\n    shared.gradio['Remove last'].click(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        chat.handle_remove_last_click, gradio('interface_state'), gradio('history', 'display', 'textbox'), show_progress=False)\n\n    shared.gradio['Stop'].click(\n        stop_everything_event, None, None, queue=False).then(\n        chat.redraw_html, gradio(reload_arr), gradio('display'), show_progress=False)\n\n    if not shared.args.multi_user:\n        shared.gradio['unique_id'].select(\n            ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n            chat.handle_unique_id_select, gradio('interface_state'), gradio('history', 'display'), show_progress=False)\n\n    shared.gradio['Start new chat'].click(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        chat.handle_start_new_chat_click, gradio('interface_state'), gradio('history', 'display', 'unique_id'), show_progress=False)\n\n    shared.gradio['Start incognito chat'].click(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        chat.handle_start_incognito_chat_click, gradio('interface_state'), gradio('history', 'display', 'unique_id'), show_progress=False)\n\n    shared.gradio['delete_chat-confirm'].click(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        chat.handle_delete_chat_confirm_click, gradio('interface_state'), gradio('history', 'display', 'unique_id'), show_progress=False)\n\n    shared.gradio['branch_chat'].click(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        chat.handle_branch_chat_click, gradio('interface_state'), gradio('history', 'display', 'unique_id', 'branch_index'), show_progress=False)\n\n    shared.gradio['rename_chat'].click(chat.handle_rename_chat_click, None, gradio('rename_to', 'rename-row'), show_progress=False)\n    shared.gradio['rename_to-cancel'].click(lambda: gr.update(visible=False), None, gradio('rename-row'), show_progress=False)\n    shared.gradio['rename_to-confirm'].click(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        chat.handle_rename_chat_confirm, gradio('rename_to', 'interface_state'), gradio('unique_id', 'rename-row'))\n\n    shared.gradio['rename_to'].submit(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        chat.handle_rename_chat_confirm, gradio('rename_to', 'interface_state'), gradio('unique_id', 'rename-row'), show_progress=False)\n\n    shared.gradio['search_chat'].change(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        chat.handle_search_chat_change, gradio('interface_state'), gradio('unique_id'), show_progress=False)\n\n    shared.gradio['load_chat_history'].upload(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        chat.handle_upload_chat_history, gradio('load_chat_history', 'interface_state'), gradio('history', 'display', 'unique_id'), show_progress=False).then(\n        None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_chat()}}')\n\n    shared.gradio['character_menu'].change(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        chat.handle_character_menu_change, gradio('interface_state'), gradio('history', 'display', 'name1', 'name2', 'character_picture', 'greeting', 'context', 'unique_id'), show_progress=False).then(\n        None, None, None, js=f'() => {{{ui.update_big_picture_js}; updateBigPicture()}}')\n\n    shared.gradio['character_picture'].change(chat.handle_character_picture_change, gradio('character_picture'), None, show_progress=False)\n\n    shared.gradio['mode'].change(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        chat.handle_mode_change, gradio('interface_state'), gradio('history', 'display', 'chat_style', 'chat-instruct_command', 'unique_id'), show_progress=False).then(\n        None, gradio('mode'), None, js=\"(mode) => {const characterContainer = document.getElementById('character-menu').parentNode.parentNode; const isInChatTab = document.querySelector('#chat-controls').contains(characterContainer); if (isInChatTab) { characterContainer.style.display = mode === 'instruct' ? 'none' : ''; } if (mode === 'instruct') document.querySelectorAll('.bigProfilePicture').forEach(el => el.remove());}\")\n\n    shared.gradio['chat_style'].change(chat.redraw_html, gradio(reload_arr), gradio('display'), show_progress=False)\n\n    shared.gradio['navigate_version'].click(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        chat.handle_navigate_version_click, gradio('interface_state'), gradio('history', 'display'), show_progress=False)\n\n    shared.gradio['edit_message'].click(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        chat.handle_edit_message_click, gradio('interface_state'), gradio('history', 'display'), show_progress=False)\n\n    # Save/delete a character\n    shared.gradio['save_character'].click(chat.handle_save_character_click, gradio('name2'), gradio('save_character_filename', 'character_saver'), show_progress=False)\n    shared.gradio['delete_character'].click(lambda: gr.update(visible=True), None, gradio('character_deleter'), show_progress=False)\n    shared.gradio['load_template'].click(chat.handle_load_template_click, gradio('instruction_template'), gradio('instruction_template_str', 'instruction_template'), show_progress=False)\n    shared.gradio['save_template'].click(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        chat.handle_save_template_click, gradio('instruction_template_str'), gradio('save_filename', 'save_root', 'save_contents', 'save_root_state', 'file_saver'), show_progress=False)\n\n    shared.gradio['restore_character'].click(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        chat.restore_character_for_ui, gradio('interface_state'), gradio('interface_state', 'name2', 'context', 'greeting', 'character_picture'), show_progress=False)\n\n    shared.gradio['delete_template'].click(chat.handle_delete_template_click, gradio('instruction_template'), gradio('delete_filename', 'delete_root', 'delete_root_state', 'file_deleter'), show_progress=False)\n    shared.gradio['save_chat_history'].click(\n        lambda x: json.dumps(x, indent=4), gradio('history'), gradio('temporary_text')).then(\n        None, gradio('temporary_text', 'character_menu', 'mode'), None, js=f'(hist, char, mode) => {{{ui.save_files_js}; saveHistory(hist, char, mode)}}')\n\n    shared.gradio['Submit character'].click(\n        chat.upload_character, gradio('upload_json', 'upload_img_bot'), gradio('character_menu'), show_progress=False).then(\n        None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_character()}}')\n\n    shared.gradio['Submit tavern character'].click(\n        chat.upload_tavern_character, gradio('upload_img_tavern', 'tavern_json'), gradio('character_menu'), show_progress=False).then(\n        None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_character()}}')\n\n    shared.gradio['upload_json'].upload(lambda: gr.update(interactive=True), None, gradio('Submit character'))\n    shared.gradio['upload_json'].clear(lambda: gr.update(interactive=False), None, gradio('Submit character'))\n    shared.gradio['upload_img_tavern'].upload(chat.check_tavern_character, gradio('upload_img_tavern'), gradio('tavern_name', 'tavern_desc', 'tavern_json', 'Submit tavern character'), show_progress=False)\n    shared.gradio['upload_img_tavern'].clear(lambda: (None, None, None, gr.update(interactive=False)), None, gradio('tavern_name', 'tavern_desc', 'tavern_json', 'Submit tavern character'), show_progress=False)\n    shared.gradio['your_picture'].change(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        chat.handle_your_picture_change, gradio('your_picture', 'interface_state'), gradio('display'), show_progress=False)\n\n    shared.gradio['send_instruction_to_notebook'].click(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        chat.handle_send_instruction_click, gradio('interface_state'), gradio('textbox-notebook', 'textbox-default', 'output_textbox'), show_progress=False).then(\n        None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_notebook()}}')\n\n    shared.gradio['send-chat-to-notebook'].click(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        chat.handle_send_chat_click, gradio('interface_state'), gradio('textbox-notebook', 'textbox-default', 'output_textbox'), show_progress=False).then(\n        None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_notebook()}}')\n\n    shared.gradio['show_controls'].change(None, gradio('show_controls'), None, js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}')\n\n    shared.gradio['count_tokens'].click(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        chat.count_prompt_tokens, gradio('textbox', 'interface_state'), gradio('token_display'), show_progress=False)\n\n    shared.gradio['enable_web_search'].change(\n        lambda x: gr.update(visible=x),\n        gradio('enable_web_search'),\n        gradio('web_search_row')\n    )\n\n    # User menu event handlers\n    shared.gradio['user_menu'].change(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        chat.handle_user_menu_change, gradio('interface_state'), gradio('name1', 'user_bio', 'your_picture'), show_progress=False)\n\n    shared.gradio['save_user'].click(chat.handle_save_user_click, gradio('name1'), gradio('save_user_filename', 'user_saver'), show_progress=False)\n    shared.gradio['delete_user'].click(lambda: gr.update(visible=True), None, gradio('user_deleter'), show_progress=False)\n"
  },
  {
    "path": "modules/ui_default.py",
    "content": "from pathlib import Path\n\nimport gradio as gr\n\nfrom modules import logits, shared, ui, utils\nfrom modules.prompts import count_tokens, load_prompt\nfrom modules.text_generation import (\n    generate_reply_wrapper,\n    get_token_ids,\n    stop_everything_event\n)\nfrom modules.ui_notebook import store_notebook_state_and_debounce\nfrom modules.utils import gradio\n\ninputs = ('textbox-default', 'interface_state')\noutputs = ('output_textbox', 'html-default')\n\n\ndef create_ui():\n    mu = shared.args.multi_user\n    with gr.Row(visible=shared.settings['show_two_notebook_columns']) as shared.gradio['default-tab']:\n        with gr.Row():\n            with gr.Column():\n                with gr.Row():\n                    shared.gradio['textbox-default'] = gr.Textbox(value=\"\", lines=27, label='Input', elem_classes=['textbox_default', 'add_scrollbar'])\n                    shared.gradio['token-counter-default'] = gr.HTML(value=\"<span>0</span>\", elem_id=\"default-token-counter\")\n\n                with gr.Row():\n                    shared.gradio['Continue-default'] = gr.Button('Continue')\n                    shared.gradio['Stop-default'] = gr.Button('Stop', elem_id='stop', visible=False)\n                    shared.gradio['Generate-default'] = gr.Button('Generate', variant='primary')\n\n                with gr.Row():\n                    shared.gradio['prompt_menu-default'] = gr.Dropdown(choices=utils.get_available_prompts(), value=shared.settings['prompt-notebook'], label='Prompt', elem_classes='slim-dropdown')\n                    ui.create_refresh_button(shared.gradio['prompt_menu-default'], lambda: None, lambda: {'choices': utils.get_available_prompts()}, 'refresh-button', interactive=not mu)\n                    shared.gradio['new_prompt-default'] = gr.Button('New', elem_classes='refresh-button', interactive=not mu)\n                    shared.gradio['rename_prompt-default'] = gr.Button('Rename', elem_classes='refresh-button', interactive=not mu)\n                    shared.gradio['delete_prompt-default'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)\n\n                    # Rename elements (initially hidden)\n                    shared.gradio['rename_prompt_to-default'] = gr.Textbox(label=\"New name\", elem_classes=['no-background'], visible=False)\n                    shared.gradio['rename_prompt-cancel-default'] = gr.Button('Cancel', elem_classes=['refresh-button'], visible=False)\n                    shared.gradio['rename_prompt-confirm-default'] = gr.Button('Confirm', elem_classes=['refresh-button'], variant='primary', visible=False)\n\n                    # Delete confirmation elements (initially hidden)\n                    shared.gradio['delete_prompt-cancel-default'] = gr.Button('Cancel', elem_classes=['refresh-button'], visible=False)\n                    shared.gradio['delete_prompt-confirm-default'] = gr.Button('Confirm', variant='stop', elem_classes=['refresh-button'], visible=False)\n\n            with gr.Column():\n                with gr.Tab('Raw'):\n                    shared.gradio['output_textbox'] = gr.Textbox(lines=27, label='Output', elem_id='textbox-default', elem_classes=['textbox_default_output', 'add_scrollbar'])\n\n                with gr.Tab('Markdown'):\n                    shared.gradio['markdown_render-default'] = gr.Button('Render')\n                    shared.gradio['markdown-default'] = gr.Markdown()\n\n                with gr.Tab('HTML'):\n                    shared.gradio['html-default'] = gr.HTML()\n\n                with gr.Tab('Logits'):\n                    with gr.Row():\n                        with gr.Column(scale=10):\n                            shared.gradio['get_logits-default'] = gr.Button('Get next token probabilities')\n                        with gr.Column(scale=1):\n                            shared.gradio['use_samplers-default'] = gr.Checkbox(label='Use samplers', value=True, elem_classes=['no-background'])\n\n                    with gr.Row():\n                        shared.gradio['logits-default'] = gr.Textbox(lines=23, label='Output', elem_classes=['textbox_logits', 'add_scrollbar'])\n                        shared.gradio['logits-default-previous'] = gr.Textbox(lines=23, label='Previous output', elem_classes=['textbox_logits', 'add_scrollbar'])\n\n                with gr.Tab('Tokens'):\n                    shared.gradio['get_tokens-default'] = gr.Button('Get token IDs for the input')\n                    shared.gradio['tokens-default'] = gr.Textbox(lines=23, label='Tokens', elem_classes=['textbox_logits', 'add_scrollbar', 'monospace'])\n\n\ndef create_event_handlers():\n    shared.gradio['Generate-default'].click(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        lambda: [gr.update(visible=True), gr.update(visible=False)], None, gradio('Stop-default', 'Generate-default')).then(\n        generate_reply_wrapper, gradio('textbox-default', 'interface_state'), gradio(outputs), show_progress=False).then(\n        lambda state, left, right: state.update({'textbox-default': left, 'output_textbox': right}), gradio('interface_state', 'textbox-default', 'output_textbox'), None).then(\n        lambda: [gr.update(visible=False), gr.update(visible=True)], None, gradio('Stop-default', 'Generate-default')).then(\n        None, None, None, js=f'() => {{{ui.audio_notification_js}}}')\n\n    shared.gradio['textbox-default'].submit(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        lambda: [gr.update(visible=True), gr.update(visible=False)], None, gradio('Stop-default', 'Generate-default')).then(\n        generate_reply_wrapper, gradio('textbox-default', 'interface_state'), gradio(outputs), show_progress=False).then(\n        lambda state, left, right: state.update({'textbox-default': left, 'output_textbox': right}), gradio('interface_state', 'textbox-default', 'output_textbox'), None).then(\n        lambda: [gr.update(visible=False), gr.update(visible=True)], None, gradio('Stop-default', 'Generate-default')).then(\n        None, None, None, js=f'() => {{{ui.audio_notification_js}}}')\n\n    shared.gradio['Continue-default'].click(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        lambda: [gr.update(visible=True), gr.update(visible=False)], None, gradio('Stop-default', 'Generate-default')).then(\n        generate_reply_wrapper, gradio('output_textbox', 'interface_state'), gradio(outputs), show_progress=False).then(\n        lambda state, left, right: state.update({'textbox-default': left, 'output_textbox': right}), gradio('interface_state', 'textbox-default', 'output_textbox'), None).then(\n        lambda: [gr.update(visible=False), gr.update(visible=True)], None, gradio('Stop-default', 'Generate-default')).then(\n        None, None, None, js=f'() => {{{ui.audio_notification_js}}}')\n\n    shared.gradio['Stop-default'].click(stop_everything_event, None, None, queue=False)\n    shared.gradio['markdown_render-default'].click(lambda x: x, gradio('output_textbox'), gradio('markdown-default'), queue=False)\n    shared.gradio['prompt_menu-default'].change(lambda x: (load_prompt(x), \"\"), gradio('prompt_menu-default'), gradio('textbox-default', 'output_textbox'), show_progress=False)\n    shared.gradio['new_prompt-default'].click(handle_new_prompt, None, gradio('prompt_menu-default'), show_progress=False)\n\n    # Input change handler to save input (reusing notebook's debounced saving)\n    shared.gradio['textbox-default'].change(\n        store_notebook_state_and_debounce,\n        gradio('textbox-default', 'prompt_menu-default'),\n        None,\n        show_progress=False\n    )\n\n    shared.gradio['delete_prompt-default'].click(\n        lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=True)],\n        None,\n        gradio('delete_prompt-default', 'delete_prompt-cancel-default', 'delete_prompt-confirm-default'),\n        show_progress=False)\n\n    shared.gradio['delete_prompt-cancel-default'].click(\n        lambda: [gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)],\n        None,\n        gradio('delete_prompt-default', 'delete_prompt-cancel-default', 'delete_prompt-confirm-default'),\n        show_progress=False)\n\n    shared.gradio['delete_prompt-confirm-default'].click(\n        handle_delete_prompt_confirm_default,\n        gradio('prompt_menu-default'),\n        gradio('prompt_menu-default', 'delete_prompt-default', 'delete_prompt-cancel-default', 'delete_prompt-confirm-default'),\n        show_progress=False)\n\n    shared.gradio['rename_prompt-default'].click(\n        handle_rename_prompt_click_default,\n        gradio('prompt_menu-default'),\n        gradio('rename_prompt_to-default', 'rename_prompt-default', 'rename_prompt-cancel-default', 'rename_prompt-confirm-default'),\n        show_progress=False)\n\n    shared.gradio['rename_prompt-cancel-default'].click(\n        lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)],\n        None,\n        gradio('rename_prompt_to-default', 'rename_prompt-default', 'rename_prompt-cancel-default', 'rename_prompt-confirm-default'),\n        show_progress=False)\n\n    shared.gradio['rename_prompt-confirm-default'].click(\n        handle_rename_prompt_confirm_default,\n        gradio('rename_prompt_to-default', 'prompt_menu-default'),\n        gradio('prompt_menu-default', 'rename_prompt_to-default', 'rename_prompt-default', 'rename_prompt-cancel-default', 'rename_prompt-confirm-default'),\n        show_progress=False)\n\n    shared.gradio['textbox-default'].change(lambda x: f\"<span>{count_tokens(x)}</span>\", gradio('textbox-default'), gradio('token-counter-default'), show_progress=False)\n    shared.gradio['get_logits-default'].click(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        logits.get_next_logits, gradio('textbox-default', 'interface_state', 'use_samplers-default', 'logits-default'), gradio('logits-default', 'logits-default-previous'), show_progress=False)\n\n    shared.gradio['get_tokens-default'].click(get_token_ids, gradio('textbox-default'), gradio('tokens-default'), show_progress=False)\n\n\ndef handle_new_prompt():\n    new_name = utils.current_time()\n\n    # Create the new prompt file\n    prompt_path = shared.user_data_dir / \"logs\" / \"notebook\" / f\"{new_name}.txt\"\n    prompt_path.parent.mkdir(parents=True, exist_ok=True)\n    prompt_path.write_text(\"In this story,\", encoding='utf-8')\n\n    return gr.update(choices=utils.get_available_prompts(), value=new_name)\n\n\ndef handle_delete_prompt_confirm_default(prompt_name):\n    available_prompts = utils.get_available_prompts()\n    current_index = available_prompts.index(prompt_name) if prompt_name in available_prompts else 0\n\n    (shared.user_data_dir / \"logs\" / \"notebook\" / f\"{prompt_name}.txt\").unlink(missing_ok=True)\n    available_prompts = utils.get_available_prompts()\n\n    if available_prompts:\n        new_value = available_prompts[min(current_index, len(available_prompts) - 1)]\n    else:\n        new_value = utils.current_time()\n        (shared.user_data_dir / \"logs\" / \"notebook\").mkdir(parents=True, exist_ok=True)\n        (shared.user_data_dir / \"logs\" / \"notebook\" / f\"{new_value}.txt\").write_text(\"In this story,\")\n        available_prompts = [new_value]\n\n    return [\n        gr.update(choices=available_prompts, value=new_value),\n        gr.update(visible=True),\n        gr.update(visible=False),\n        gr.update(visible=False)\n    ]\n\n\ndef handle_rename_prompt_click_default(current_name):\n    return [\n        gr.update(value=current_name, visible=True),\n        gr.update(visible=False),\n        gr.update(visible=True),\n        gr.update(visible=True)\n    ]\n\n\ndef handle_rename_prompt_confirm_default(new_name, current_name):\n    old_path = shared.user_data_dir / \"logs\" / \"notebook\" / f\"{current_name}.txt\"\n    new_path = shared.user_data_dir / \"logs\" / \"notebook\" / f\"{new_name}.txt\"\n\n    if old_path.exists() and not new_path.exists():\n        old_path.rename(new_path)\n\n    available_prompts = utils.get_available_prompts()\n    return [\n        gr.update(choices=available_prompts, value=new_name),\n        gr.update(visible=False),\n        gr.update(visible=True),\n        gr.update(visible=False),\n        gr.update(visible=False)\n    ]\n"
  },
  {
    "path": "modules/ui_file_saving.py",
    "content": "import traceback\n\nimport gradio as gr\n\nfrom modules import chat, presets, shared, ui, utils\nfrom modules.utils import gradio, sanitize_filename\n\n\ndef create_ui():\n    mu = shared.args.multi_user\n\n    # Server-side per-session root paths for the generic file saver/deleter.\n    # Set by the handler that opens the dialog, read by the confirm handler.\n    # Using gr.State so they are session-scoped and safe for multi-user.\n    shared.gradio['save_root_state'] = gr.State(None)\n    shared.gradio['delete_root_state'] = gr.State(None)\n\n    # Text file saver\n    with gr.Group(visible=False, elem_classes='file-saver') as shared.gradio['file_saver']:\n        shared.gradio['save_filename'] = gr.Textbox(lines=1, label='File name')\n        shared.gradio['save_root'] = gr.Textbox(lines=1, label='File folder', info='For reference. Unchangeable.', interactive=False)\n        shared.gradio['save_contents'] = gr.Textbox(lines=10, label='File contents')\n        with gr.Row():\n            shared.gradio['save_cancel'] = gr.Button('Cancel', elem_classes=\"small-button\")\n            shared.gradio['save_confirm'] = gr.Button('Save', elem_classes=\"small-button\", variant='primary', interactive=not mu)\n\n    # Text file deleter\n    with gr.Group(visible=False, elem_classes='file-saver') as shared.gradio['file_deleter']:\n        shared.gradio['delete_filename'] = gr.Textbox(lines=1, label='File name')\n        shared.gradio['delete_root'] = gr.Textbox(lines=1, label='File folder', info='For reference. Unchangeable.', interactive=False)\n        with gr.Row():\n            shared.gradio['delete_cancel'] = gr.Button('Cancel', elem_classes=\"small-button\")\n            shared.gradio['delete_confirm'] = gr.Button('Delete', elem_classes=\"small-button\", variant='stop', interactive=not mu)\n\n    # Character saver/deleter\n    with gr.Group(visible=False, elem_classes='file-saver') as shared.gradio['character_saver']:\n        shared.gradio['save_character_filename'] = gr.Textbox(lines=1, label='File name', info=f'The character will be saved to your {shared.user_data_dir}/characters folder with this base filename.')\n        with gr.Row():\n            shared.gradio['save_character_cancel'] = gr.Button('Cancel', elem_classes=\"small-button\")\n            shared.gradio['save_character_confirm'] = gr.Button('Save', elem_classes=\"small-button\", variant='primary', interactive=not mu)\n\n    with gr.Group(visible=False, elem_classes='file-saver') as shared.gradio['character_deleter']:\n        gr.Markdown('Confirm the character deletion?')\n        with gr.Row():\n            shared.gradio['delete_character_cancel'] = gr.Button('Cancel', elem_classes=\"small-button\")\n            shared.gradio['delete_character_confirm'] = gr.Button('Delete', elem_classes=\"small-button\", variant='stop', interactive=not mu)\n\n    # User saver/deleter\n    with gr.Group(visible=False, elem_classes='file-saver') as shared.gradio['user_saver']:\n        shared.gradio['save_user_filename'] = gr.Textbox(lines=1, label='File name', info=f'The user profile will be saved to your {shared.user_data_dir}/users folder with this base filename.')\n        with gr.Row():\n            shared.gradio['save_user_cancel'] = gr.Button('Cancel', elem_classes=\"small-button\")\n            shared.gradio['save_user_confirm'] = gr.Button('Save', elem_classes=\"small-button\", variant='primary', interactive=not mu)\n\n    with gr.Group(visible=False, elem_classes='file-saver') as shared.gradio['user_deleter']:\n        gr.Markdown('Confirm the user deletion?')\n        with gr.Row():\n            shared.gradio['delete_user_cancel'] = gr.Button('Cancel', elem_classes=\"small-button\")\n            shared.gradio['delete_user_confirm'] = gr.Button('Delete', elem_classes=\"small-button\", variant='stop', interactive=not mu)\n\n    # Preset saver\n    with gr.Group(visible=False, elem_classes='file-saver') as shared.gradio['preset_saver']:\n        shared.gradio['save_preset_filename'] = gr.Textbox(lines=1, label='File name', info=f'The preset will be saved to your {shared.user_data_dir}/presets folder with this base filename.')\n        shared.gradio['save_preset_contents'] = gr.Textbox(lines=10, label='File contents')\n        with gr.Row():\n            shared.gradio['save_preset_cancel'] = gr.Button('Cancel', elem_classes=\"small-button\")\n            shared.gradio['save_preset_confirm'] = gr.Button('Save', elem_classes=\"small-button\", variant='primary', interactive=not mu)\n\n\ndef create_event_handlers():\n    shared.gradio['save_preset'].click(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        handle_save_preset_click, gradio('interface_state'), gradio('save_preset_contents', 'save_preset_filename', 'preset_saver'), show_progress=False)\n\n    shared.gradio['delete_preset'].click(handle_delete_preset_click, gradio('preset_menu'), gradio('delete_filename', 'delete_root', 'delete_root_state', 'file_deleter'), show_progress=False)\n    shared.gradio['save_grammar'].click(handle_save_grammar_click, gradio('grammar_string'), gradio('save_contents', 'save_filename', 'save_root', 'save_root_state', 'file_saver'), show_progress=False)\n    shared.gradio['delete_grammar'].click(handle_delete_grammar_click, gradio('grammar_file'), gradio('delete_filename', 'delete_root', 'delete_root_state', 'file_deleter'), show_progress=False)\n\n    shared.gradio['save_preset_confirm'].click(handle_save_preset_confirm_click, gradio('save_preset_filename', 'save_preset_contents'), gradio('preset_menu', 'preset_saver'), show_progress=False)\n    shared.gradio['save_confirm'].click(handle_save_confirm_click, gradio('save_root_state', 'save_filename', 'save_contents'), gradio('save_root_state', 'file_saver'), show_progress=False)\n    shared.gradio['delete_confirm'].click(handle_delete_confirm_click, gradio('delete_root_state', 'delete_filename'), gradio('delete_root_state', 'file_deleter'), show_progress=False)\n    shared.gradio['save_character_confirm'].click(handle_save_character_confirm_click, gradio('name2', 'greeting', 'context', 'character_picture', 'save_character_filename'), gradio('character_menu', 'character_saver'), show_progress=False)\n    shared.gradio['delete_character_confirm'].click(handle_delete_character_confirm_click, gradio('character_menu'), gradio('character_menu', 'character_deleter'), show_progress=False)\n\n    shared.gradio['save_preset_cancel'].click(lambda: gr.update(visible=False), None, gradio('preset_saver'), show_progress=False)\n    shared.gradio['save_cancel'].click(lambda: gr.update(visible=False), None, gradio('file_saver'))\n    shared.gradio['delete_cancel'].click(lambda: gr.update(visible=False), None, gradio('file_deleter'))\n    shared.gradio['save_character_cancel'].click(lambda: gr.update(visible=False), None, gradio('character_saver'), show_progress=False)\n    shared.gradio['delete_character_cancel'].click(lambda: gr.update(visible=False), None, gradio('character_deleter'), show_progress=False)\n\n    # User save/delete event handlers\n    shared.gradio['save_user_confirm'].click(handle_save_user_confirm_click, gradio('name1', 'user_bio', 'your_picture', 'save_user_filename'), gradio('user_menu', 'user_saver'), show_progress=False)\n    shared.gradio['delete_user_confirm'].click(handle_delete_user_confirm_click, gradio('user_menu'), gradio('user_menu', 'user_deleter'), show_progress=False)\n    shared.gradio['save_user_cancel'].click(lambda: gr.update(visible=False), None, gradio('user_saver'), show_progress=False)\n    shared.gradio['delete_user_cancel'].click(lambda: gr.update(visible=False), None, gradio('user_deleter'), show_progress=False)\n\n\ndef handle_save_preset_confirm_click(filename, contents):\n    try:\n        filename = sanitize_filename(filename)\n        utils.save_file(str(shared.user_data_dir / \"presets\" / f\"{filename}.yaml\"), contents)\n        available_presets = utils.get_available_presets()\n        output = gr.update(choices=available_presets, value=filename)\n    except Exception:\n        output = gr.update()\n        traceback.print_exc()\n\n    return [\n        output,\n        gr.update(visible=False)\n    ]\n\n\ndef handle_save_confirm_click(root_state, filename, contents):\n    try:\n        if root_state is None:\n            return None, gr.update(visible=False)\n\n        filename = sanitize_filename(filename)\n        utils.save_file(root_state + filename, contents)\n    except Exception:\n        traceback.print_exc()\n\n    return None, gr.update(visible=False)\n\n\ndef handle_delete_confirm_click(root_state, filename):\n    try:\n        if root_state is None:\n            return None, gr.update(visible=False)\n\n        filename = sanitize_filename(filename)\n        utils.delete_file(root_state + filename)\n    except Exception:\n        traceback.print_exc()\n\n    return None, gr.update(visible=False)\n\n\ndef handle_save_character_confirm_click(name2, greeting, context, character_picture, filename):\n    try:\n        chat.save_character(name2, greeting, context, character_picture, filename)\n        available_characters = utils.get_available_characters()\n        output = gr.update(choices=available_characters, value=filename)\n    except Exception:\n        output = gr.update()\n        traceback.print_exc()\n\n    return [\n        output,\n        gr.update(visible=False)\n    ]\n\n\ndef handle_delete_character_confirm_click(character):\n    try:\n        index = str(utils.get_available_characters().index(character))\n        chat.delete_character(character)\n        output = chat.update_character_menu_after_deletion(index)\n    except Exception:\n        output = gr.update()\n        traceback.print_exc()\n\n    return [\n        output,\n        gr.update(visible=False)\n    ]\n\n\ndef handle_save_preset_click(state):\n    contents = presets.generate_preset_yaml(state)\n    return [\n        contents,\n        \"My Preset\",\n        gr.update(visible=True)\n    ]\n\n\ndef handle_delete_preset_click(preset):\n    root = str(shared.user_data_dir / \"presets\") + \"/\"\n    return [\n        f\"{preset}.yaml\",\n        root,\n        root,\n        gr.update(visible=True)\n    ]\n\n\ndef handle_save_grammar_click(grammar_string):\n    root = str(shared.user_data_dir / \"grammars\") + \"/\"\n    return [\n        grammar_string,\n        \"My Fancy Grammar.gbnf\",\n        root,\n        root,\n        gr.update(visible=True)\n    ]\n\n\ndef handle_delete_grammar_click(grammar_file):\n    root = str(shared.user_data_dir / \"grammars\") + \"/\"\n    return [\n        grammar_file,\n        root,\n        root,\n        gr.update(visible=True)\n    ]\n\n\ndef handle_save_user_confirm_click(name1, user_bio, your_picture, filename):\n    try:\n        chat.save_user(name1, user_bio, your_picture, filename)\n        available_users = utils.get_available_users()\n        output = gr.update(choices=available_users, value=filename)\n    except Exception:\n        output = gr.update()\n        traceback.print_exc()\n\n    return [\n        output,\n        gr.update(visible=False)\n    ]\n\n\ndef handle_delete_user_confirm_click(user):\n    try:\n        index = str(utils.get_available_users().index(user))\n        chat.delete_user(user)\n        output = chat.update_user_menu_after_deletion(index)\n    except Exception:\n        output = gr.update()\n        traceback.print_exc()\n\n    return [\n        output,\n        gr.update(visible=False)\n    ]\n"
  },
  {
    "path": "modules/ui_image_generation.py",
    "content": "import json\nimport os\nimport random\nimport time\nimport traceback\nfrom datetime import datetime\nfrom pathlib import Path\n\nimport gradio as gr\nfrom PIL.PngImagePlugin import PngInfo\n\nfrom modules import shared, ui, utils\nfrom modules.image_models import (\n    get_pipeline_type,\n    load_image_model,\n    unload_image_model\n)\nfrom modules.image_utils import open_image_safely\nfrom modules.logging_colors import logger\nfrom modules.text_generation import stop_everything_event\nfrom modules.utils import check_model_loaded, gradio\n\nASPECT_RATIOS = {\n    \"1:1 Square\": (1, 1),\n    \"16:9 Cinema\": (16, 9),\n    \"9:16 Mobile\": (9, 16),\n    \"4:3 Photo\": (4, 3),\n    \"Custom\": None,\n}\n\nSTEP = 16\nIMAGES_PER_PAGE = 32\n\n# Settings keys to save in PNG metadata (Generate tab only)\nMETADATA_SETTINGS_KEYS = [\n    'image_prompt',\n    'image_neg_prompt',\n    'image_width',\n    'image_height',\n    'image_aspect_ratio',\n    'image_steps',\n    'image_seed',\n    'image_cfg_scale',\n]\n\n# Cache for all image paths\n_image_cache = []\n_cache_timestamp = 0\n\n\ndef round_to_step(value, step=STEP):\n    return round(value / step) * step\n\n\ndef clamp(value, min_val, max_val):\n    return max(min_val, min(max_val, value))\n\n\ndef apply_aspect_ratio(aspect_ratio, current_width, current_height):\n    if aspect_ratio == \"Custom\" or aspect_ratio not in ASPECT_RATIOS:\n        return current_width, current_height\n\n    w_ratio, h_ratio = ASPECT_RATIOS[aspect_ratio]\n\n    if w_ratio == h_ratio:\n        base = min(current_width, current_height)\n        new_width = base\n        new_height = base\n    elif w_ratio < h_ratio:\n        new_width = current_width\n        new_height = round_to_step(current_width * h_ratio / w_ratio)\n    else:\n        new_height = current_height\n        new_width = round_to_step(current_height * w_ratio / h_ratio)\n\n    new_width = clamp(new_width, 256, 2048)\n    new_height = clamp(new_height, 256, 2048)\n\n    return int(new_width), int(new_height)\n\n\ndef update_height_from_width(width, aspect_ratio):\n    if aspect_ratio == \"Custom\" or aspect_ratio not in ASPECT_RATIOS:\n        return gr.update()\n\n    w_ratio, h_ratio = ASPECT_RATIOS[aspect_ratio]\n    new_height = round_to_step(width * h_ratio / w_ratio)\n    new_height = clamp(new_height, 256, 2048)\n\n    return int(new_height)\n\n\ndef update_width_from_height(height, aspect_ratio):\n    if aspect_ratio == \"Custom\" or aspect_ratio not in ASPECT_RATIOS:\n        return gr.update()\n\n    w_ratio, h_ratio = ASPECT_RATIOS[aspect_ratio]\n    new_width = round_to_step(height * w_ratio / h_ratio)\n    new_width = clamp(new_width, 256, 2048)\n\n    return int(new_width)\n\n\ndef swap_dimensions_and_update_ratio(width, height, aspect_ratio):\n    new_width, new_height = height, width\n\n    new_ratio = \"Custom\"\n    for name, ratios in ASPECT_RATIOS.items():\n        if ratios is None:\n            continue\n        w_r, h_r = ratios\n        expected_height = new_width * h_r / w_r\n        if abs(expected_height - new_height) < STEP:\n            new_ratio = name\n            break\n\n    return new_width, new_height, new_ratio\n\n\ndef build_generation_metadata(state, actual_seed):\n    \"\"\"Build metadata dict from generation settings.\"\"\"\n    metadata = {}\n    for key in METADATA_SETTINGS_KEYS:\n        if key in state:\n            metadata[key] = state[key]\n\n    # Store the actual seed used (not -1)\n    metadata['image_seed'] = actual_seed\n    metadata['generated_at'] = datetime.now().isoformat()\n    metadata['model'] = shared.image_model_name\n\n    return metadata\n\n\ndef save_generated_images(images, state, actual_seed):\n    \"\"\"Save images with generation metadata embedded in PNG. Returns list of saved file paths.\"\"\"\n    if shared.args.multi_user:\n        return []\n\n    date_str = datetime.now().strftime(\"%Y-%m-%d\")\n    folder_path = str(shared.user_data_dir / \"image_outputs\" / date_str)\n    os.makedirs(folder_path, exist_ok=True)\n\n    metadata = build_generation_metadata(state, actual_seed)\n    metadata_json = json.dumps(metadata, ensure_ascii=False)\n\n    saved_paths = []\n    for idx, img in enumerate(images):\n        timestamp = datetime.now().strftime(\"%H-%M-%S\")\n        filename = f\"TGW_{timestamp}_{actual_seed:010d}_{idx:03d}.png\"\n        filepath = os.path.join(folder_path, filename)\n\n        # Create PNG metadata\n        png_info = PngInfo()\n        png_info.add_text(\"image_gen_settings\", metadata_json)\n\n        # Save with metadata\n        img.save(filepath, pnginfo=png_info)\n        saved_paths.append(filepath)\n\n    return saved_paths\n\n\ndef read_image_metadata(image_path):\n    \"\"\"Read generation metadata from PNG file.\"\"\"\n    try:\n        img = open_image_safely(image_path)\n        if img is None:\n            return None\n        try:\n            if hasattr(img, 'text') and 'image_gen_settings' in img.text:\n                return json.loads(img.text['image_gen_settings'])\n        finally:\n            img.close()\n    except Exception as e:\n        logger.debug(f\"Could not read metadata from {image_path}: {e}\")\n    return None\n\n\ndef format_metadata_for_display(metadata):\n    \"\"\"Format metadata as readable text.\"\"\"\n    if not metadata:\n        return \"No generation settings found in this image.\"\n\n    lines = []\n\n    # Display in a nice order\n    display_order = [\n        ('image_prompt', 'Prompt'),\n        ('image_neg_prompt', 'Negative Prompt'),\n        ('image_width', 'Width'),\n        ('image_height', 'Height'),\n        ('image_aspect_ratio', 'Aspect Ratio'),\n        ('image_steps', 'Steps'),\n        ('image_cfg_scale', 'CFG Scale'),\n        ('image_seed', 'Seed'),\n        ('model', 'Model'),\n        ('generated_at', 'Generated At'),\n    ]\n\n    for key, label in display_order:\n        if key in metadata:\n            value = metadata[key]\n            if key in ['image_prompt', 'image_neg_prompt'] and value:\n                # Truncate long prompts for display\n                if len(str(value)) > 200:\n                    value = str(value)[:200] + \"...\"\n            lines.append(f\"**{label}:** {value}\")\n\n    return \"\\n\\n\".join(lines)\n\n\ndef get_all_history_images(force_refresh=False):\n    \"\"\"Get all history images sorted by modification time (newest first). Uses caching.\"\"\"\n    global _image_cache, _cache_timestamp\n\n    output_dir = str(shared.user_data_dir / \"image_outputs\")\n    if not os.path.exists(output_dir):\n        return []\n\n    # Check if we need to refresh cache\n    current_time = time.time()\n    if not force_refresh and _image_cache and (current_time - _cache_timestamp) < 2:\n        return _image_cache\n\n    image_files = []\n    for root, _, files in os.walk(output_dir):\n        for file in files:\n            if file.endswith((\".png\", \".jpg\", \".jpeg\")):\n                full_path = os.path.join(root, file)\n                image_files.append((full_path, os.path.getmtime(full_path)))\n\n    image_files.sort(key=lambda x: x[1], reverse=True)\n    _image_cache = [x[0] for x in image_files]\n    _cache_timestamp = current_time\n\n    return _image_cache\n\n\ndef get_paginated_images(page=0, force_refresh=False):\n    \"\"\"Get images for a specific page.\"\"\"\n    all_images = get_all_history_images(force_refresh)\n    total_images = len(all_images)\n    total_pages = max(1, (total_images + IMAGES_PER_PAGE - 1) // IMAGES_PER_PAGE)\n\n    # Clamp page to valid range\n    page = max(0, min(page, total_pages - 1))\n\n    start_idx = page * IMAGES_PER_PAGE\n    end_idx = min(start_idx + IMAGES_PER_PAGE, total_images)\n\n    page_images = all_images[start_idx:end_idx]\n\n    return page_images, page, total_pages, total_images\n\n\ndef get_initial_page_info():\n    \"\"\"Get page info string for initial load.\"\"\"\n    _, page, total_pages, total_images = get_paginated_images(0)\n    return f\"Page {page + 1} of {total_pages} ({total_images} total images)\"\n\n\ndef refresh_gallery(current_page=0):\n    \"\"\"Refresh gallery with current page.\"\"\"\n    images, page, total_pages, total_images = get_paginated_images(current_page, force_refresh=True)\n    page_info = f\"Page {page + 1} of {total_pages} ({total_images} total images)\"\n    return images, page, page_info\n\n\ndef go_to_page(page_num, current_page):\n    \"\"\"Go to a specific page (1-indexed input).\"\"\"\n    try:\n        page = int(page_num) - 1  # Convert to 0-indexed\n    except (ValueError, TypeError):\n        page = current_page\n\n    images, page, total_pages, total_images = get_paginated_images(page)\n    page_info = f\"Page {page + 1} of {total_pages} ({total_images} total images)\"\n    return images, page, page_info\n\n\ndef next_page(current_page):\n    \"\"\"Go to next page.\"\"\"\n    images, page, total_pages, total_images = get_paginated_images(current_page + 1)\n    page_info = f\"Page {page + 1} of {total_pages} ({total_images} total images)\"\n    return images, page, page_info\n\n\ndef prev_page(current_page):\n    \"\"\"Go to previous page.\"\"\"\n    images, page, total_pages, total_images = get_paginated_images(current_page - 1)\n    page_info = f\"Page {page + 1} of {total_pages} ({total_images} total images)\"\n    return images, page, page_info\n\n\ndef on_gallery_select(evt: gr.SelectData, current_page):\n    \"\"\"Handle image selection from gallery.\"\"\"\n    if evt.index is None:\n        return \"\", \"Select an image to view its settings\"\n\n    if not _image_cache:\n        get_all_history_images()\n\n    all_images = _image_cache\n    total_images = len(all_images)\n\n    # Calculate the actual index in the full list\n    start_idx = current_page * IMAGES_PER_PAGE\n    actual_idx = start_idx + evt.index\n\n    if actual_idx >= total_images:\n        return \"\", \"Image not found\"\n\n    image_path = all_images[actual_idx]\n    metadata = read_image_metadata(image_path)\n    metadata_display = format_metadata_for_display(metadata)\n\n    return image_path, metadata_display\n\n\ndef send_to_generate(selected_image_path):\n    \"\"\"Load settings from selected image and return updates for all Generate tab inputs.\"\"\"\n    if not selected_image_path or not os.path.exists(selected_image_path):\n        return [gr.update()] * 8 + [\"No image selected\"]\n\n    metadata = read_image_metadata(selected_image_path)\n    if not metadata:\n        return [gr.update()] * 8 + [\"No settings found in this image\"]\n\n    # Return updates for each input element in order\n    updates = [\n        gr.update(value=metadata.get('image_prompt', '')),\n        gr.update(value=metadata.get('image_neg_prompt', '')),\n        gr.update(value=metadata.get('image_width', 1024)),\n        gr.update(value=metadata.get('image_height', 1024)),\n        gr.update(value=metadata.get('image_aspect_ratio', '1:1 Square')),\n        gr.update(value=metadata.get('image_steps', 9)),\n        gr.update(value=metadata.get('image_seed', -1)),\n        gr.update(value=metadata.get('image_cfg_scale', 0.0)),\n    ]\n\n    status = f\"✓ Settings loaded from image (seed: {metadata.get('image_seed', 'unknown')})\"\n    return updates + [status]\n\n\ndef read_dropped_image_metadata(image_path):\n    \"\"\"Read metadata from a dropped/uploaded image.\"\"\"\n    if not image_path:\n        return \"Drop an image to view its generation settings.\"\n\n    metadata = read_image_metadata(image_path)\n    return format_metadata_for_display(metadata)\n\n\ndef create_ui():\n    if shared.settings['image_model_menu'] != 'None':\n        shared.image_model_name = shared.settings['image_model_menu']\n\n    with gr.Tab(\"Image AI\", elem_id=\"image-ai-tab\"):\n        with gr.Tabs():\n            # TAB 1: GENERATE\n            with gr.TabItem(\"Generate\"):\n                with gr.Row():\n                    with gr.Column(scale=4, min_width=350):\n                        shared.gradio['image_prompt'] = gr.Textbox(\n                            label=\"Prompt\",\n                            placeholder=\"Describe your imagination...\",\n                            lines=3,\n                            autofocus=True,\n                            value=shared.settings['image_prompt']\n                        )\n                        shared.gradio['image_neg_prompt'] = gr.Textbox(\n                            label=\"Negative Prompt\",\n                            placeholder=\"Low quality...\",\n                            lines=3,\n                            value=shared.settings['image_neg_prompt']\n                        )\n                        shared.gradio['image_llm_variations'] = gr.Checkbox(\n                            value=shared.settings['image_llm_variations'],\n                            label='LLM Prompt Variations',\n                            elem_id=\"llm-prompt-variations\",\n                        )\n                        shared.gradio['image_llm_variations_prompt'] = gr.Textbox(\n                            value=shared.settings['image_llm_variations_prompt'],\n                            label='Variation Prompt',\n                            lines=3,\n                            placeholder='Instructions for generating prompt variations...',\n                            visible=shared.settings['image_llm_variations'],\n                            info='Use the loaded LLM to generate creative prompt variations for each sequential batch.'\n                        )\n\n                        shared.gradio['image_generate_btn'] = gr.Button(\"Generate\", variant=\"primary\", size=\"lg\")\n                        shared.gradio['image_stop_btn'] = gr.Button(\"Stop\", size=\"lg\", visible=False)\n                        shared.gradio['image_progress'] = gr.HTML(\n                            value=progress_bar_html(),\n                            elem_id=\"image-progress\"\n                        )\n\n                        gr.Markdown(\"### Dimensions\")\n                        with gr.Row():\n                            with gr.Column():\n                                shared.gradio['image_width'] = gr.Slider(256, 2048, value=shared.settings['image_width'], step=STEP, label=\"Width\")\n                            with gr.Column():\n                                shared.gradio['image_height'] = gr.Slider(256, 2048, value=shared.settings['image_height'], step=STEP, label=\"Height\")\n                            shared.gradio['image_swap_btn'] = gr.Button(\"⇄ Swap\", elem_classes='refresh-button', scale=0, min_width=80, elem_id=\"swap-height-width\")\n\n                        with gr.Row():\n                            shared.gradio['image_aspect_ratio'] = gr.Radio(\n                                choices=[\"1:1 Square\", \"16:9 Cinema\", \"9:16 Mobile\", \"4:3 Photo\", \"Custom\"],\n                                value=shared.settings['image_aspect_ratio'],\n                                label=\"Aspect Ratio\",\n                                interactive=True\n                            )\n\n                        gr.Markdown(\"### Config\")\n                        with gr.Row():\n                            with gr.Column():\n                                shared.gradio['image_steps'] = gr.Slider(1, 100, value=shared.settings['image_steps'], step=1, label=\"Steps\")\n                                shared.gradio['image_cfg_scale'] = gr.Slider(\n                                    0.0, 10.0,\n                                    value=shared.settings['image_cfg_scale'],\n                                    step=0.1,\n                                    label=\"CFG Scale\",\n                                    info=\"Z-Image Turbo: 0.0 | Qwen: 4.0\"\n                                )\n                                shared.gradio['image_seed'] = gr.Number(label=\"Seed\", value=shared.settings['image_seed'], precision=0, info=\"-1 = Random\")\n\n                            with gr.Column():\n                                shared.gradio['image_batch_size'] = gr.Slider(1, 32, value=shared.settings['image_batch_size'], step=1, label=\"Batch Size (VRAM Heavy)\", info=\"Generates N images at once.\")\n                                shared.gradio['image_batch_count'] = gr.Slider(1, 128, value=shared.settings['image_batch_count'], step=1, label=\"Sequential Count (Loop)\", info=\"Repeats the generation N times.\")\n\n                    with gr.Column(scale=6, min_width=500):\n                        with gr.Column(elem_classes=[\"viewport-container\"]):\n                            shared.gradio['image_output_gallery'] = gr.Gallery(label=\"Output\", show_label=False, columns=2, rows=2, height=\"80vh\", object_fit=\"contain\", preview=True, elem_id=\"image-output-gallery\")\n\n            # TAB 2: GALLERY (with pagination)\n            with gr.TabItem(\"Gallery\"):\n                with gr.Row():\n                    with gr.Column(scale=3):\n                        # Pagination controls\n                        with gr.Row():\n                            shared.gradio['image_refresh_history'] = gr.Button(\"🔄 Refresh\", elem_classes=\"refresh-button\")\n                            shared.gradio['image_prev_page'] = gr.Button(\"◀ Prev Page\", elem_classes=\"refresh-button\")\n                            shared.gradio['image_page_info'] = gr.Markdown(value=get_initial_page_info, elem_id=\"image-page-info\")\n                            shared.gradio['image_next_page'] = gr.Button(\"Next Page ▶\", elem_classes=\"refresh-button\")\n                            shared.gradio['image_page_input'] = gr.Number(value=1, label=\"Page\", precision=0, minimum=1, scale=0, min_width=80)\n                            shared.gradio['image_go_to_page'] = gr.Button(\"Go\", elem_classes=\"refresh-button\", scale=0, min_width=50)\n\n                        # State for current page and selected image path\n                        shared.gradio['image_current_page'] = gr.State(value=0)\n                        shared.gradio['image_selected_path'] = gr.State(value=\"\")\n\n                        # Paginated gallery using gr.Gallery\n                        shared.gradio['image_history_gallery'] = gr.Gallery(\n                            value=lambda: get_paginated_images(0)[0],\n                            label=\"Image History\",\n                            show_label=False,\n                            columns=6,\n                            object_fit=\"cover\",\n                            height=\"auto\",\n                            allow_preview=True,\n                            elem_id=\"image-history-gallery\"\n                        )\n\n                    with gr.Column(scale=1):\n                        gr.Markdown(\"### Generation Settings\")\n                        shared.gradio['image_settings_display'] = gr.Markdown(\"Select an image to view its settings\")\n                        shared.gradio['image_send_to_generate'] = gr.Button(\"Send to Generate\", variant=\"primary\")\n                        shared.gradio['image_gallery_status'] = gr.Markdown(\"\")\n\n                        gr.Markdown(\"### Import Image\")\n                        shared.gradio['image_drop_upload'] = gr.Image(\n                            label=\"Drop image here to view settings\",\n                            type=\"filepath\",\n                            height=150\n                        )\n\n            # TAB 3: MODEL\n            with gr.TabItem(\"Model\"):\n                with gr.Row():\n                    with gr.Column():\n                        with gr.Row():\n                            shared.gradio['image_model_menu'] = gr.Dropdown(\n                                choices=utils.get_available_image_models(),\n                                value=shared.settings['image_model_menu'],\n                                label='Model',\n                                elem_classes='slim-dropdown'\n                            )\n                            shared.gradio['image_refresh_models'] = gr.Button(\"🔄\", elem_classes='refresh-button', scale=0, min_width=40)\n                            shared.gradio['image_load_model'] = gr.Button(\"Load\", variant='primary', elem_classes='refresh-button')\n                            shared.gradio['image_unload_model'] = gr.Button(\"Unload\", elem_classes='refresh-button')\n\n                        gr.Markdown(\"## Settings\")\n                        with gr.Row():\n                            with gr.Column():\n                                shared.gradio['image_quant'] = gr.Dropdown(\n                                    label='Quantization',\n                                    choices=['none', 'bnb-8bit', 'bnb-4bit', 'torchao-int8wo', 'torchao-fp4', 'torchao-float8wo'],\n                                    value=shared.settings['image_quant'],\n                                    info='BnB: bitsandbytes quantization. torchao: int8wo, fp4, float8wo.'\n                                )\n\n                                shared.gradio['image_dtype'] = gr.Dropdown(\n                                    choices=['bfloat16', 'float16'],\n                                    value=shared.settings['image_dtype'],\n                                    label='Data Type',\n                                    info='bfloat16 recommended for modern GPUs'\n                                )\n                                shared.gradio['image_attn_backend'] = gr.Dropdown(\n                                    choices=['sdpa', 'flash_attention_2'],\n                                    value=shared.settings['image_attn_backend'],\n                                    label='Attention Backend',\n                                    info='SDPA is default. Flash Attention requires compatible GPU.'\n                                )\n                            with gr.Column():\n                                shared.gradio['image_compile'] = gr.Checkbox(\n                                    value=shared.settings['image_compile'],\n                                    label='Compile Model',\n                                    info='Faster inference after first run. First run will be slow.'\n                                )\n                                shared.gradio['image_cpu_offload'] = gr.Checkbox(\n                                    value=shared.settings['image_cpu_offload'],\n                                    label='CPU Offload',\n                                    info='Enable for low VRAM GPUs. Slower but uses less memory.'\n                                )\n\n                    with gr.Column():\n                        shared.gradio['image_download_path'] = gr.Textbox(\n                            label=\"Download model\",\n                            placeholder=\"Tongyi-MAI/Z-Image-Turbo\",\n                            info=\"Enter HuggingFace path. Use : for branch, e.g. user/model:main\"\n                        )\n                        shared.gradio['image_download_btn'] = gr.Button(\"Download\", variant='primary')\n                        shared.gradio['image_model_status'] = gr.Markdown(value=\"\")\n\n\ndef create_event_handlers():\n    # Dimension controls\n    shared.gradio['image_aspect_ratio'].change(\n        apply_aspect_ratio,\n        gradio('image_aspect_ratio', 'image_width', 'image_height'),\n        gradio('image_width', 'image_height'),\n        show_progress=False\n    )\n\n    shared.gradio['image_width'].release(\n        update_height_from_width,\n        gradio('image_width', 'image_aspect_ratio'),\n        gradio('image_height'),\n        show_progress=False\n    )\n\n    shared.gradio['image_height'].release(\n        update_width_from_height,\n        gradio('image_height', 'image_aspect_ratio'),\n        gradio('image_width'),\n        show_progress=False\n    )\n\n    shared.gradio['image_swap_btn'].click(\n        swap_dimensions_and_update_ratio,\n        gradio('image_width', 'image_height', 'image_aspect_ratio'),\n        gradio('image_width', 'image_height', 'image_aspect_ratio'),\n        show_progress=False\n    )\n\n    # Generation\n    shared.gradio['image_generate_btn'].click(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        lambda: [gr.update(visible=True), gr.update(visible=False)], None, gradio('image_stop_btn', 'image_generate_btn')).then(\n        generate, gradio('interface_state'), gradio('image_output_gallery', 'image_progress'), show_progress=False).then(\n        lambda: [gr.update(visible=False), gr.update(visible=True)], None, gradio('image_stop_btn', 'image_generate_btn'))\n\n    shared.gradio['image_prompt'].submit(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        lambda: [gr.update(visible=True), gr.update(visible=False)], None, gradio('image_stop_btn', 'image_generate_btn')).then(\n        generate, gradio('interface_state'), gradio('image_output_gallery', 'image_progress'), show_progress=False).then(\n        lambda: [gr.update(visible=False), gr.update(visible=True)], None, gradio('image_stop_btn', 'image_generate_btn'))\n\n    shared.gradio['image_neg_prompt'].submit(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        lambda: [gr.update(visible=True), gr.update(visible=False)], None, gradio('image_stop_btn', 'image_generate_btn')).then(\n        generate, gradio('interface_state'), gradio('image_output_gallery', 'image_progress'), show_progress=False).then(\n        lambda: [gr.update(visible=False), gr.update(visible=True)], None, gradio('image_stop_btn', 'image_generate_btn'))\n\n    # Stop button\n    shared.gradio['image_stop_btn'].click(\n        stop_everything_event, None, None, show_progress=False\n    )\n\n    # Model management\n    shared.gradio['image_refresh_models'].click(\n        lambda: gr.update(choices=utils.get_available_image_models()),\n        None,\n        gradio('image_model_menu'),\n        show_progress=False\n    )\n\n    shared.gradio['image_load_model'].click(\n        load_image_model_wrapper,\n        gradio('image_model_menu', 'image_dtype', 'image_attn_backend', 'image_cpu_offload', 'image_compile', 'image_quant'),\n        gradio('image_model_status'),\n        show_progress=True\n    )\n\n    shared.gradio['image_unload_model'].click(\n        unload_image_model_wrapper,\n        None,\n        gradio('image_model_status'),\n        show_progress=False\n    )\n\n    shared.gradio['image_download_btn'].click(\n        download_image_model_wrapper,\n        gradio('image_download_path'),\n        gradio('image_model_status', 'image_model_menu'),\n        show_progress=True\n    )\n\n    # Gallery pagination handlers\n    shared.gradio['image_refresh_history'].click(\n        refresh_gallery,\n        gradio('image_current_page'),\n        gradio('image_history_gallery', 'image_current_page', 'image_page_info'),\n        show_progress=False\n    )\n\n    shared.gradio['image_next_page'].click(\n        next_page,\n        gradio('image_current_page'),\n        gradio('image_history_gallery', 'image_current_page', 'image_page_info'),\n        show_progress=False\n    )\n\n    shared.gradio['image_prev_page'].click(\n        prev_page,\n        gradio('image_current_page'),\n        gradio('image_history_gallery', 'image_current_page', 'image_page_info'),\n        show_progress=False\n    )\n\n    shared.gradio['image_go_to_page'].click(\n        go_to_page,\n        gradio('image_page_input', 'image_current_page'),\n        gradio('image_history_gallery', 'image_current_page', 'image_page_info'),\n        show_progress=False\n    )\n\n    # Image selection from gallery\n    shared.gradio['image_history_gallery'].select(\n        on_gallery_select,\n        gradio('image_current_page'),\n        gradio('image_selected_path', 'image_settings_display'),\n        show_progress=False\n    )\n\n    # Send to Generate\n    shared.gradio['image_send_to_generate'].click(\n        send_to_generate,\n        gradio('image_selected_path'),\n        gradio(\n            'image_prompt',\n            'image_neg_prompt',\n            'image_width',\n            'image_height',\n            'image_aspect_ratio',\n            'image_steps',\n            'image_seed',\n            'image_cfg_scale',\n            'image_gallery_status'\n        ),\n        js=f'() => {{{ui.switch_tabs_js}; switch_to_image_ai_generate()}}',\n        show_progress=False\n    )\n\n    shared.gradio['image_drop_upload'].change(\n        read_dropped_image_metadata,\n        gradio('image_drop_upload'),\n        gradio('image_settings_display'),\n        show_progress=False\n    )\n\n    # LLM Variations visibility toggle\n    shared.gradio['image_llm_variations'].change(\n        lambda x: gr.update(visible=x),\n        gradio('image_llm_variations'),\n        gradio('image_llm_variations_prompt'),\n        show_progress=False\n    )\n\n\ndef generate_prompt_variation(state):\n    \"\"\"Generate a creative variation of the image prompt using the LLM.\"\"\"\n    from modules.chat import generate_chat_prompt\n    from modules.text_generation import generate_reply\n\n    prompt = state['image_prompt']\n\n    # Check if LLM is loaded\n    model_loaded, _ = check_model_loaded()\n    if not model_loaded:\n        logger.warning(\"No LLM loaded for prompt variation. Using original prompt.\")\n        return prompt\n\n    # Get the custom variation prompt or use default\n    variation_instruction = state.get('image_llm_variations_prompt', '')\n    if not variation_instruction:\n        variation_instruction = 'Write a variation of the image generation prompt above. Consider the intent of the user with that prompt and write something that will likely please them, with added details. Output only the new prompt. Do not add any explanations, prefixes, or additional text.'\n\n    augmented_message = f\"{prompt}\\n\\n=====\\n\\n{variation_instruction}\"\n\n    # Use minimal state for generation\n    var_state = state.copy()\n    var_state['history'] = {'internal': [], 'visible': [], 'metadata': {}}\n    var_state['auto_max_new_tokens'] = True\n    var_state['enable_thinking'] = False\n    var_state['reasoning_effort'] = 'low'\n    var_state['start_with'] = \"\"\n\n    formatted_prompt = generate_chat_prompt(augmented_message, var_state)\n\n    variation = \"\"\n    for reply in generate_reply(formatted_prompt, var_state, stopping_strings=[], is_chat=True):\n        variation = reply\n\n    # Strip thinking blocks if present\n    if \"</think>\" in variation:\n        variation = variation.rsplit(\"</think>\", 1)[1]\n    elif \"<|start|>assistant<|channel|>final<|message|>\" in variation:\n        variation = variation.rsplit(\"<|start|>assistant<|channel|>final<|message|>\", 1)[1]\n    elif \"<|channel|>final<|message|>\" in variation:\n        variation = variation.rsplit(\"<|channel|>final<|message|>\", 1)[1]\n    elif \"</seed:think>\" in variation:\n        variation = variation.rsplit(\"</seed:think>\", 1)[1]\n\n    variation = variation.strip()\n    if len(variation) >= 2 and variation.startswith('\"') and variation.endswith('\"'):\n        variation = variation[1:-1]\n\n    if variation:\n        logger.info(\"Prompt variation:\")\n        print(variation)\n        return variation\n\n    return prompt\n\n\ndef progress_bar_html(progress=0, text=\"\"):\n    \"\"\"Generate HTML for progress bar. Empty div when progress <= 0.\"\"\"\n    if progress <= 0:\n        return '<div class=\"image-ai-separator\"></div>'\n\n    return f'''<div class=\"image-ai-progress-wrapper\">\n        <div class=\"image-ai-progress-track\">\n            <div class=\"image-ai-progress-fill\" style=\"width: {progress * 100:.1f}%;\"></div>\n        </div>\n        <div class=\"image-ai-progress-text\">{text}</div>\n    </div>'''\n\n\ndef generate(state, save_images=True):\n    \"\"\"\n    Generate images using the loaded model.\n    Automatically adjusts parameters based on pipeline type.\n    \"\"\"\n    import queue\n    import threading\n\n    import torch\n\n    from modules.torch_utils import clear_torch_cache, get_device\n\n    try:\n        model_name = state['image_model_menu']\n\n        if not model_name or model_name == 'None':\n            logger.error(\"No image model selected. Go to the Model tab and select a model.\")\n            yield [], progress_bar_html()\n            return\n\n        if shared.image_model is None:\n            result = load_image_model(\n                model_name,\n                dtype=state['image_dtype'],\n                attn_backend=state['image_attn_backend'],\n                cpu_offload=state['image_cpu_offload'],\n                compile_model=state['image_compile'],\n                quant_method=state['image_quant']\n            )\n            if result is None:\n                logger.error(f\"Failed to load model `{model_name}`.\")\n                yield [], progress_bar_html()\n                return\n\n            shared.image_model_name = model_name\n\n        seed = state['image_seed']\n        if seed == -1:\n            seed = random.randint(0, 2**32 - 1)\n\n        device = get_device()\n        if device is None:\n            device = \"cpu\"\n        generator = torch.Generator(device)\n\n        all_images = []\n\n        # Get pipeline type for parameter adjustment\n        pipeline_type = getattr(shared, 'image_pipeline_type', None)\n        if pipeline_type is None:\n            pipeline_type = get_pipeline_type(shared.image_model)\n\n        prompt = state['image_prompt']\n\n        shared.stop_everything = False\n\n        batch_count = int(state['image_batch_count'])\n        steps_per_batch = int(state['image_steps'])\n        total_steps = steps_per_batch * batch_count\n\n        # Queue for progress updates from callback\n        progress_queue = queue.Queue()\n\n        def interrupt_callback(pipe, step_index, timestep, callback_kwargs):\n            if shared.stop_everything:\n                pipe._interrupt = True\n            progress_queue.put(step_index + 1)\n            return callback_kwargs\n\n        gen_kwargs = {\n            \"prompt\": prompt,\n            \"negative_prompt\": state['image_neg_prompt'],\n            \"height\": int(state['image_height']),\n            \"width\": int(state['image_width']),\n            \"num_inference_steps\": steps_per_batch,\n            \"num_images_per_prompt\": int(state['image_batch_size']),\n            \"generator\": generator,\n            \"callback_on_step_end\": interrupt_callback,\n        }\n\n        cfg_val = state.get('image_cfg_scale', 0.0)\n        if pipeline_type == 'qwenimage':\n            gen_kwargs[\"true_cfg_scale\"] = cfg_val\n        else:\n            gen_kwargs[\"guidance_scale\"] = cfg_val\n\n        t0 = time.time()\n\n        for batch_idx in range(batch_count):\n            if shared.stop_everything:\n                break\n\n            generator.manual_seed(int(seed + batch_idx))\n\n            # Generate prompt variation if enabled\n            if state['image_llm_variations']:\n                gen_kwargs[\"prompt\"] = generate_prompt_variation(state)\n\n            # Run generation in thread so we can yield progress\n            result_holder = []\n            error_holder = []\n\n            def run_batch():\n                try:\n                    # Apply magic suffix only at generation time for qwenimage\n                    clean_prompt = gen_kwargs[\"prompt\"]\n                    if pipeline_type == 'qwenimage':\n                        magic_suffix = \", Ultra HD, 4K, cinematic composition\"\n                        if magic_suffix.strip(\", \") not in clean_prompt:\n                            gen_kwargs[\"prompt\"] = clean_prompt + magic_suffix\n\n                    result_holder.extend(shared.image_model(**gen_kwargs).images)\n                    gen_kwargs[\"prompt\"] = clean_prompt  # restore\n                except Exception as e:\n                    error_holder.append(e)\n\n            thread = threading.Thread(target=run_batch)\n            thread.start()\n\n            # Yield progress updates while generation runs\n            while thread.is_alive():\n                try:\n                    step = progress_queue.get(timeout=0.1)\n                    absolute_step = batch_idx * steps_per_batch + step\n                    pct = absolute_step / total_steps\n                    text = f\"Batch {batch_idx + 1}/{batch_count} — Step {step}/{steps_per_batch}\"\n                    yield all_images, progress_bar_html(pct, text)\n                except queue.Empty:\n                    pass\n\n            thread.join()\n\n            if error_holder:\n                raise error_holder[0]\n\n            # Save this batch's images with the actual prompt and seed used\n            if save_images:\n                batch_seed = seed + batch_idx\n                original_prompt = state['image_prompt']\n                state['image_prompt'] = gen_kwargs[\"prompt\"]\n                saved_paths = save_generated_images(result_holder, state, batch_seed)\n                state['image_prompt'] = original_prompt\n                # Use file paths so gallery serves actual PNGs with metadata\n                all_images.extend(saved_paths)\n            else:\n                # Fallback to PIL objects if not saving\n                all_images.extend(result_holder)\n\n            yield all_images, progress_bar_html((batch_idx + 1) / batch_count, f\"Batch {batch_idx + 1}/{batch_count} complete\")\n\n        t1 = time.time()\n\n        total_images = batch_count * int(state['image_batch_size'])\n        logger.info(f'Generated {total_images} {\"image\" if total_images == 1 else \"images\"} in {(t1 - t0):.2f} seconds ({total_steps / (t1 - t0):.2f} steps/s, seed {seed})')\n\n        yield all_images, progress_bar_html()\n        clear_torch_cache()\n\n    except Exception as e:\n        logger.error(f\"Image generation failed: {e}\")\n        traceback.print_exc()\n        yield [], progress_bar_html()\n        clear_torch_cache()\n\n\ndef load_image_model_wrapper(model_name, dtype, attn_backend, cpu_offload, compile_model, quant_method):\n    if not model_name or model_name == 'None':\n        yield \"No model selected\"\n        return\n\n    try:\n        yield f\"Loading `{model_name}`...\"\n        unload_image_model()\n\n        result = load_image_model(\n            model_name,\n            dtype=dtype,\n            attn_backend=attn_backend,\n            cpu_offload=cpu_offload,\n            compile_model=compile_model,\n            quant_method=quant_method\n        )\n\n        if result is not None:\n            shared.image_model_name = model_name\n            yield f\"✓ Loaded **{model_name}** (quantization: {quant_method})\"\n        else:\n            yield f\"✗ Failed to load `{model_name}`\"\n    except Exception:\n        yield f\"Error:\\n```\\n{traceback.format_exc()}\\n```\"\n\n\ndef unload_image_model_wrapper():\n    previous_name = shared.image_model_name\n    unload_image_model()\n    if previous_name != 'None':\n        return f\"Model: **{previous_name}** (unloaded)\"\n    return \"No model loaded\"\n\n\ndef download_image_model_wrapper(model_path):\n    from huggingface_hub import snapshot_download\n\n    if not model_path:\n        yield \"No model specified\", gr.update()\n        return\n\n    try:\n        model_path = model_path.strip()\n        if model_path.startswith('https://huggingface.co/'):\n            model_path = model_path[len('https://huggingface.co/'):]\n        elif model_path.startswith('huggingface.co/'):\n            model_path = model_path[len('huggingface.co/'):]\n\n        if ':' in model_path:\n            model_id, branch = model_path.rsplit(':', 1)\n        else:\n            model_id, branch = model_path, 'main'\n\n        folder_name = model_id.replace('/', '_')\n        output_folder = Path(shared.args.image_model_dir) / folder_name\n\n        yield f\"Downloading `{model_id}` (branch: {branch})...\", gr.update()\n\n        snapshot_download(\n            repo_id=model_id,\n            revision=branch,\n            local_dir=output_folder,\n            local_dir_use_symlinks=False,\n        )\n\n        new_choices = utils.get_available_image_models()\n        yield f\"✓ Downloaded to `{output_folder}`\", gr.update(choices=new_choices, value=folder_name)\n    except Exception:\n        yield f\"Error:\\n```\\n{traceback.format_exc()}\\n```\", gr.update()\n"
  },
  {
    "path": "modules/ui_model_menu.py",
    "content": "import importlib\nimport math\nimport queue\nimport threading\nimport traceback\nfrom functools import partial\nfrom pathlib import Path\n\nimport gradio as gr\n\nfrom modules import loaders, shared, ui, utils\nfrom modules.logging_colors import logger\nfrom modules.LoRA import add_lora_to_model\nfrom modules.models import load_model, unload_model\nfrom modules.models_settings import (\n    apply_model_settings_to_state,\n    get_model_metadata,\n    save_instruction_template,\n    save_model_settings,\n    update_gpu_layers_and_vram,\n    update_model_parameters\n)\nfrom modules.utils import gradio\n\n\ndef create_ui():\n    mu = shared.args.multi_user\n\n    with gr.Tab(\"Model\", elem_id=\"model-tab\"):\n        with gr.Row():\n            with gr.Column():\n                with gr.Row():\n                    shared.gradio['model_menu'] = gr.Dropdown(choices=utils.get_available_models(), value=lambda: shared.model_name, label='Model', elem_classes='slim-dropdown', interactive=not mu)\n                    ui.create_refresh_button(shared.gradio['model_menu'], lambda: None, lambda: {'choices': utils.get_available_models()}, 'refresh-button', interactive=not mu)\n                    shared.gradio['load_model'] = gr.Button(\"Load\", elem_classes='refresh-button', interactive=not mu)\n                    shared.gradio['unload_model'] = gr.Button(\"Unload\", elem_classes='refresh-button', interactive=not mu)\n                    shared.gradio['save_model_settings'] = gr.Button(\"Save settings\", elem_classes='refresh-button', interactive=not mu)\n\n                shared.gradio['loader'] = gr.Dropdown(label=\"Model loader\", choices=loaders.loaders_and_params.keys() if not shared.args.portable else ['llama.cpp'], value=None)\n                with gr.Blocks():\n                    gr.Markdown(\"## Main options\")\n                    with gr.Row():\n                        with gr.Column():\n                            shared.gradio['gpu_layers'] = gr.Slider(label=\"gpu-layers\", minimum=-1, maximum=get_initial_gpu_layers_max(), step=1, value=shared.args.gpu_layers, info='Number of layers to offload to the GPU. -1 = auto.')\n                            shared.gradio['ctx_size'] = gr.Slider(label='ctx-size', minimum=0, maximum=1048576, step=1024, value=shared.args.ctx_size, info='Context length. 0 = auto for llama.cpp (requires gpu-layers=-1), 8192 for other loaders. Common values: 4096, 8192, 16384, 32768, 65536, 131072.')\n                            shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7')\n                            shared.gradio['attn_implementation'] = gr.Dropdown(label=\"attn-implementation\", choices=['sdpa', 'eager', 'flash_attention_2'], value=shared.args.attn_implementation, info='Attention implementation.')\n                            shared.gradio['cache_type'] = gr.Dropdown(label=\"cache-type\", choices=['fp16', 'q8_0', 'q4_0', 'fp8', 'q8', 'q7', 'q6', 'q5', 'q4', 'q3', 'q2'], value=shared.args.cache_type, allow_custom_value=True, info='Valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV3 - fp16, q2 to q8. For ExLlamaV3, you can type custom combinations for separate k/v bits (e.g. q4_q8).')\n                            shared.gradio['fit_target'] = gr.Textbox(label='fit-target', value=shared.args.fit_target, info='Target VRAM margin per device for auto GPU layers (MiB). Comma-separated list for multiple devices.')\n                            shared.gradio['tp_backend'] = gr.Dropdown(label=\"tp-backend\", choices=['native', 'nccl'], value=shared.args.tp_backend, info='The backend for tensor parallelism.')\n\n                        with gr.Column():\n                            shared.gradio['vram_info'] = gr.HTML(value=get_initial_vram_info())\n                            shared.gradio['cpu_moe'] = gr.Checkbox(label=\"cpu-moe\", value=shared.args.cpu_moe, info='Move the experts to the CPU. Saves VRAM on MoE models.')\n                            shared.gradio['streaming_llm'] = gr.Checkbox(label=\"streaming-llm\", value=shared.args.streaming_llm, info='Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')\n                            shared.gradio['load_in_8bit'] = gr.Checkbox(label=\"load-in-8bit\", value=shared.args.load_in_8bit)\n                            shared.gradio['load_in_4bit'] = gr.Checkbox(label=\"load-in-4bit\", value=shared.args.load_in_4bit)\n                            shared.gradio['use_double_quant'] = gr.Checkbox(label=\"use_double_quant\", value=shared.args.use_double_quant, info='Used by load-in-4bit.')\n                            shared.gradio['enable_tp'] = gr.Checkbox(label=\"enable_tp\", value=shared.args.enable_tp, info='Enable tensor parallelism (TP).')\n                            shared.gradio['tensorrt_llm_info'] = gr.Markdown(\n                                '* TensorRT-LLM has to be installed manually: `pip install tensorrt_llm==1.1.0 --extra-index-url https://pypi.nvidia.com`.\\n\\n'\n                                '* You can load either a pre-built TensorRT engine or a regular HF model. '\n                                'HF models will be compiled to a TensorRT engine automatically on each load (this can take a while).'\n                            )\n\n                            # Multimodal\n                            with gr.Accordion(\"Multimodal (vision)\", open=False, elem_classes='tgw-accordion') as shared.gradio['mmproj_accordion']:\n                                with gr.Row():\n                                    shared.gradio['mmproj'] = gr.Dropdown(label=\"mmproj file\", choices=utils.get_available_mmproj(), value=lambda: shared.args.mmproj or 'None', elem_classes='slim-dropdown', info=f'Select a file that matches your model. Must be placed in {shared.user_data_dir}/mmproj/', interactive=not mu)\n                                    ui.create_refresh_button(shared.gradio['mmproj'], lambda: None, lambda: {'choices': utils.get_available_mmproj()}, 'refresh-button', interactive=not mu)\n\n                            # Speculative decoding\n                            with gr.Accordion(\"Speculative decoding\", open=False, elem_classes='tgw-accordion') as shared.gradio['speculative_decoding_accordion']:\n                                shared.gradio['draft_max'] = gr.Number(label=\"draft-max\", precision=0, step=1, value=shared.args.draft_max, info='Maximum number of tokens to draft for speculative decoding. Recommended: 4 for draft model, 64 for n-gram.')\n\n                                gr.Markdown('#### Draft model')\n                                with gr.Row():\n                                    shared.gradio['model_draft'] = gr.Dropdown(label=\"model-draft\", choices=['None'] + utils.get_available_models(), value=lambda: shared.args.model_draft, elem_classes='slim-dropdown', info='Draft model. Must share the same vocabulary as the main model.', interactive=not mu)\n                                    ui.create_refresh_button(shared.gradio['model_draft'], lambda: None, lambda: {'choices': ['None'] + utils.get_available_models()}, 'refresh-button', interactive=not mu)\n\n                                shared.gradio['gpu_layers_draft'] = gr.Slider(label=\"gpu-layers-draft\", minimum=0, maximum=256, value=shared.args.gpu_layers_draft, info='Number of layers to offload to the GPU for the draft model.')\n                                shared.gradio['device_draft'] = gr.Textbox(label=\"device-draft\", value=shared.args.device_draft, info='Comma-separated list of devices to use for offloading the draft model. Example: CUDA0,CUDA1')\n                                shared.gradio['ctx_size_draft'] = gr.Number(label=\"ctx-size-draft\", precision=0, step=256, value=shared.args.ctx_size_draft, info='Size of the prompt context for the draft model. If 0, uses the same as the main model.')\n\n                                shared.gradio['ngram_header'] = gr.Markdown('#### N-gram (draftless)')\n                                shared.gradio['spec_type'] = gr.Dropdown(label=\"spec-type\", choices=['none', 'ngram-mod', 'ngram-simple', 'ngram-map-k', 'ngram-map-k4v', 'ngram-cache'], value=shared.args.spec_type, info='Draftless speculative decoding type. Recommended: ngram-mod.')\n                                shared.gradio['spec_ngram_size_n'] = gr.Number(label=\"spec-ngram-size-n\", precision=0, step=1, value=shared.args.spec_ngram_size_n, info='N-gram lookup size for speculative decoding.', visible=shared.args.spec_type != 'none')\n                                shared.gradio['spec_ngram_size_m'] = gr.Number(label=\"spec-ngram-size-m\", precision=0, step=1, value=shared.args.spec_ngram_size_m, info='Draft n-gram size for speculative decoding.', visible=shared.args.spec_type != 'none')\n                                shared.gradio['spec_ngram_min_hits'] = gr.Number(label=\"spec-ngram-min-hits\", precision=0, step=1, value=shared.args.spec_ngram_min_hits, info='Minimum n-gram hits for ngram-map speculative decoding.', visible=shared.args.spec_type != 'none')\n\n                    gr.Markdown(\"## Other options\")\n                    with gr.Accordion(\"See more options\", open=False, elem_classes='tgw-accordion'):\n                        with gr.Row():\n                            with gr.Column():\n                                shared.gradio['parallel'] = gr.Slider(label=\"parallel\", minimum=1, step=1, maximum=64, value=shared.args.parallel, info='Number of parallel request slots for the API. The context size is divided equally among slots. For example, to have 4 slots with 8192 context each, set ctx_size to 32768.')\n                                shared.gradio['threads'] = gr.Slider(label=\"threads\", minimum=0, step=1, maximum=256, value=shared.args.threads)\n                                shared.gradio['threads_batch'] = gr.Slider(label=\"threads_batch\", minimum=0, step=1, maximum=256, value=shared.args.threads_batch)\n                                shared.gradio['batch_size'] = gr.Slider(label=\"batch_size\", minimum=1, maximum=4096, step=1, value=shared.args.batch_size)\n                                shared.gradio['ubatch_size'] = gr.Slider(label=\"ubatch_size\", minimum=1, maximum=4096, step=1, value=shared.args.ubatch_size)\n                                shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='List of proportions to split the model across multiple GPUs. Example: 60,40')\n                                shared.gradio['extra_flags'] = gr.Textbox(label='extra-flags', info='Additional flags to pass to llama-server. Format: \"flag1=value1,flag2,flag3=value3\". Example: \"override-tensor=exps=CPU\"', value=shared.args.extra_flags)\n                                shared.gradio['cpu_memory'] = gr.Number(label=\"Maximum CPU memory in GiB. Use this for CPU offloading.\", value=shared.args.cpu_memory)\n                                shared.gradio['compute_dtype'] = gr.Dropdown(label=\"compute_dtype\", choices=[\"bfloat16\", \"float16\", \"float32\"], value=shared.args.compute_dtype, info='Used by load-in-4bit.')\n                                shared.gradio['quant_type'] = gr.Dropdown(label=\"quant_type\", choices=[\"nf4\", \"fp4\"], value=shared.args.quant_type, info='Used by load-in-4bit.')\n\n                            with gr.Column():\n                                shared.gradio['cpu'] = gr.Checkbox(label=\"cpu\", value=shared.args.cpu, info='Use PyTorch in CPU mode.')\n                                shared.gradio['disk'] = gr.Checkbox(label=\"disk\", value=shared.args.disk)\n                                shared.gradio['row_split'] = gr.Checkbox(label=\"row_split\", value=shared.args.row_split, info='Split the model by rows across GPUs. This may improve multi-gpu performance.')\n                                shared.gradio['no_kv_offload'] = gr.Checkbox(label=\"no_kv_offload\", value=shared.args.no_kv_offload, info='Do not offload the  K, Q, V to the GPU. This saves VRAM but reduces the performance.')\n                                shared.gradio['no_mmap'] = gr.Checkbox(label=\"no-mmap\", value=shared.args.no_mmap)\n                                shared.gradio['mlock'] = gr.Checkbox(label=\"mlock\", value=shared.args.mlock)\n                                shared.gradio['numa'] = gr.Checkbox(label=\"numa\", value=shared.args.numa, info='NUMA support can help on some systems with non-uniform memory access.')\n                                shared.gradio['bf16'] = gr.Checkbox(label=\"bf16\", value=shared.args.bf16)\n                                shared.gradio['cfg_cache'] = gr.Checkbox(label=\"cfg-cache\", value=shared.args.cfg_cache, info='Necessary to use CFG with this loader.')\n                                shared.gradio['no_use_fast'] = gr.Checkbox(label=\"no_use_fast\", value=shared.args.no_use_fast, info='Set use_fast=False while loading the tokenizer.')\n                                if not shared.args.portable:\n                                    with gr.Row():\n                                        shared.gradio['lora_menu'] = gr.Dropdown(multiselect=True, choices=utils.get_available_loras(), value=shared.lora_names, label='LoRA(s)', elem_classes='slim-dropdown', interactive=not mu)\n                                        ui.create_refresh_button(shared.gradio['lora_menu'], lambda: None, lambda: {'choices': utils.get_available_loras(), 'value': shared.lora_names}, 'refresh-button', interactive=not mu)\n                                        shared.gradio['lora_menu_apply'] = gr.Button(value='Apply LoRAs', elem_classes='refresh-button', interactive=not mu)\n\n            with gr.Column():\n                with gr.Tab(\"Download\"):\n                    shared.gradio['custom_model_menu'] = gr.Textbox(label=\"Download model or LoRA\", info=\"Enter the Hugging Face username/model path, for instance: facebook/galactica-125m. To specify a branch, add it at the end after a \\\":\\\" character like this: facebook/galactica-125m:main. To download a single file, enter its name in the second box.\", interactive=not mu)\n                    shared.gradio['download_specific_file'] = gr.Textbox(placeholder=\"File name (for GGUF models)\", show_label=False, max_lines=1, interactive=not mu)\n                    with gr.Row():\n                        shared.gradio['download_model_button'] = gr.Button(\"Download\", variant='primary', interactive=not mu)\n                        shared.gradio['get_file_list'] = gr.Button(\"Get file list\", interactive=not mu)\n\n                with gr.Tab(\"Customize instruction template\"):\n                    with gr.Row():\n                        shared.gradio['customized_template'] = gr.Dropdown(choices=utils.get_available_instruction_templates(), value='None', label='Select the desired instruction template', elem_classes='slim-dropdown')\n                        ui.create_refresh_button(shared.gradio['customized_template'], lambda: None, lambda: {'choices': utils.get_available_instruction_templates()}, 'refresh-button', interactive=not mu)\n\n                    shared.gradio['customized_template_submit'] = gr.Button(\"Submit\", variant=\"primary\", interactive=not mu)\n                    gr.Markdown(\"This allows you to set a customized template for the model currently selected in the \\\"Model loader\\\" menu. Whenever the model gets loaded, this template will be used in place of the template specified in the model's medatada, which sometimes is wrong.\")\n\n                with gr.Row():\n                    shared.gradio['model_status'] = gr.Markdown('No model is loaded' if shared.model_name == 'None' else 'Ready')\n\n\ndef create_event_handlers():\n    mu = shared.args.multi_user\n    if mu:\n        return\n\n    shared.gradio['loader'].change(loaders.make_loader_params_visible, gradio('loader'), gradio(loaders.get_all_params()), show_progress=False)\n\n    # In this event handler, the interface state is read and updated\n    # with the model defaults (if any), and then the model is loaded\n    shared.gradio['model_menu'].change(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        handle_load_model_event_initial, gradio('model_menu', 'interface_state'), gradio(ui.list_interface_input_elements()) + gradio('interface_state') + gradio('vram_info'), show_progress=False).then(\n        partial(load_model_wrapper, autoload=False), gradio('model_menu', 'loader'), gradio('model_status'), show_progress=True).success(\n        handle_load_model_event_final, gradio('truncation_length', 'loader', 'interface_state'), gradio('truncation_length', 'filter_by_loader'), show_progress=False)\n\n    shared.gradio['load_model'].click(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        update_model_parameters, gradio('interface_state'), None).then(\n        partial(load_model_wrapper, autoload=True), gradio('model_menu', 'loader'), gradio('model_status'), show_progress=True).success(\n        handle_load_model_event_final, gradio('truncation_length', 'loader', 'interface_state'), gradio('truncation_length', 'filter_by_loader'), show_progress=False)\n\n    shared.gradio['unload_model'].click(handle_unload_model_click, None, gradio('model_status'), show_progress=False).then(\n        update_gpu_layers_and_vram, gradio('loader', 'model_menu', 'gpu_layers', 'ctx_size', 'cache_type'), gradio('vram_info'), show_progress=False)\n\n    shared.gradio['save_model_settings'].click(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        save_model_settings, gradio('model_menu', 'interface_state'), gradio('model_status'), show_progress=False)\n\n    # For ctx_size and cache_type - update VRAM display\n    for param in ['ctx_size', 'cache_type']:\n        shared.gradio[param].change(\n            update_gpu_layers_and_vram,\n            gradio('loader', 'model_menu', 'gpu_layers', 'ctx_size', 'cache_type'),\n            gradio('vram_info'), show_progress=False)\n\n    # For manual gpu_layers changes - only update VRAM\n    shared.gradio['gpu_layers'].change(\n        update_gpu_layers_and_vram,\n        gradio('loader', 'model_menu', 'gpu_layers', 'ctx_size', 'cache_type'),\n        gradio('vram_info'), show_progress=False)\n\n    if not shared.args.portable:\n        shared.gradio['lora_menu_apply'].click(load_lora_wrapper, gradio('lora_menu'), gradio('model_status'), show_progress=False)\n\n    shared.gradio['spec_type'].change(\n        lambda x: [gr.update(visible=x != 'none')] * 3,\n        gradio('spec_type'),\n        gradio('spec_ngram_size_n', 'spec_ngram_size_m', 'spec_ngram_min_hits'),\n        show_progress=False\n    )\n\n    shared.gradio['download_model_button'].click(download_model_wrapper, gradio('custom_model_menu', 'download_specific_file'), gradio('model_status'), show_progress=True)\n    shared.gradio['get_file_list'].click(partial(download_model_wrapper, return_links=True), gradio('custom_model_menu', 'download_specific_file'), gradio('model_status'), show_progress=True)\n    shared.gradio['customized_template_submit'].click(save_instruction_template, gradio('model_menu', 'customized_template'), gradio('model_status'), show_progress=True)\n\n\ndef load_model_wrapper(selected_model, loader, autoload=False):\n    try:\n        settings = get_model_metadata(selected_model)\n    except FileNotFoundError:\n        exc = traceback.format_exc()\n        yield exc.replace('\\n', '\\n\\n')\n        return\n\n    if not autoload:\n        yield \"### {}\\n\\n- Settings updated: Click \\\"Load\\\" to load the model\\n- Max sequence length: {}\".format(selected_model, settings['truncation_length_info'])\n        return\n\n    if selected_model == 'None':\n        yield \"No model selected\"\n    else:\n        try:\n            yield f\"Loading `{selected_model}`...\"\n            unload_model()\n            if selected_model != '':\n                shared.model, shared.tokenizer = load_model(selected_model, loader)\n\n            if shared.model is not None:\n                yield f\"Successfully loaded `{selected_model}`.\"\n            else:\n                yield f\"Failed to load `{selected_model}`.\"\n        except Exception:\n            exc = traceback.format_exc()\n            logger.error('Failed to load the model.')\n            print(exc)\n            yield exc.replace('\\n', '\\n\\n')\n\n\ndef load_lora_wrapper(selected_loras):\n    yield (\"Applying the following LoRAs to {}:\\n\\n{}\".format(shared.model_name, '\\n'.join(selected_loras)))\n    add_lora_to_model(selected_loras)\n    yield (\"Successfuly applied the LoRAs\")\n\n\ndef download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), return_links=False, check=False):\n    downloader_module = importlib.import_module(\"download-model\")\n    downloader = downloader_module.ModelDownloader()\n    update_queue = queue.Queue()\n\n    try:\n        # Handle direct GGUF URLs\n        if repo_id.startswith(\"https://\") and (\"huggingface.co\" in repo_id) and (repo_id.endswith(\".gguf\") or repo_id.endswith(\".gguf?download=true\")):\n            try:\n                path = repo_id.split(\"huggingface.co/\")[1]\n                parts = path.split(\"/\")\n                if len(parts) >= 2:\n                    extracted_repo_id = f\"{parts[0]}/{parts[1]}\"\n                    filename = repo_id.split(\"/\")[-1].replace(\"?download=true\", \"\")\n                    repo_id = extracted_repo_id\n                    specific_file = filename\n            except Exception as e:\n                yield f\"Error parsing GGUF URL: {e}\"\n                progress(0.0)\n                return\n\n        if not repo_id:\n            yield \"Please enter a model path.\"\n            progress(0.0)\n            return\n\n        repo_id = repo_id.strip()\n        specific_file = specific_file.strip()\n\n        progress(0.0, \"Preparing download...\")\n\n        model, branch = downloader.sanitize_model_and_branch_names(repo_id, None)\n        yield \"Getting download links from Hugging Face...\"\n        links, sha256, is_lora, is_llamacpp, file_sizes = downloader.get_download_links_from_huggingface(model, branch, text_only=False, specific_file=specific_file)\n\n        if not links:\n            yield \"No files found to download for the given model/criteria.\"\n            progress(0.0)\n            return\n\n        # Check for multiple GGUF files\n        gguf_files = [link for link in links if link.lower().endswith('.gguf')]\n        if len(gguf_files) > 1 and not specific_file:\n            # Sort by size in ascending order\n            gguf_data = []\n            for i, link in enumerate(links):\n                if link.lower().endswith('.gguf'):\n                    file_size = file_sizes[i]\n                    gguf_data.append((file_size, link))\n\n            gguf_data.sort(key=lambda x: x[0])\n\n            output = \"Multiple GGUF files found. Please copy one of the following filenames to the 'File name' field above:\\n\\n```\\n\"\n            for file_size, link in gguf_data:\n                size_str = format_file_size(file_size)\n                output += f\"{size_str} - {Path(link).name}\\n\"\n\n            output += \"```\"\n            yield output\n            return\n\n        if return_links:\n            # Sort by size in ascending order\n            file_data = list(zip(file_sizes, links))\n            file_data.sort(key=lambda x: x[0])\n\n            output = \"```\\n\"\n            for file_size, link in file_data:\n                size_str = format_file_size(file_size)\n                output += f\"{size_str} - {Path(link).name}\\n\"\n\n            output += \"```\"\n            yield output\n            return\n\n        yield \"Determining output folder...\"\n        output_folder = downloader.get_output_folder(\n            model, branch, is_lora, is_llamacpp=is_llamacpp,\n            model_dir=shared.args.model_dir if shared.args.model_dir != shared.args_defaults.model_dir else None\n        )\n\n        if output_folder == shared.user_data_dir / \"models\":\n            output_folder = Path(shared.args.model_dir)\n        elif output_folder == shared.user_data_dir / \"loras\":\n            output_folder = Path(shared.args.lora_dir)\n\n        if check:\n            yield \"Checking previously downloaded files...\"\n            progress(0.5, \"Verifying files...\")\n            downloader.check_model_files(model, branch, links, sha256, output_folder)\n            progress(1.0, \"Verification complete.\")\n            yield \"File check complete.\"\n            return\n\n        yield \"\"\n        progress(0.0, \"Download starting...\")\n\n        def downloader_thread_target():\n            try:\n                downloader.download_model_files(\n                    model, branch, links, sha256, output_folder,\n                    progress_queue=update_queue,\n                    threads=4,\n                    is_llamacpp=is_llamacpp,\n                    specific_file=specific_file\n                )\n                update_queue.put((\"COMPLETED\", f\"Model successfully saved to `{output_folder}/`.\"))\n            except Exception as e:\n                tb_str = traceback.format_exc().replace('\\n', '\\n\\n')\n                update_queue.put((\"ERROR\", tb_str))\n\n        download_thread = threading.Thread(target=downloader_thread_target)\n        download_thread.start()\n\n        while True:\n            try:\n                message = update_queue.get(timeout=0.2)\n                if not isinstance(message, tuple) or len(message) != 2:\n                    continue\n\n                msg_identifier, data = message\n\n                if msg_identifier == \"COMPLETED\":\n                    progress(1.0, \"Download complete!\")\n                    yield data\n                    break\n                elif msg_identifier == \"ERROR\":\n                    progress(0.0, \"Error occurred\")\n                    yield data\n                    break\n                elif isinstance(msg_identifier, float):\n                    progress_value = msg_identifier\n                    description_str = data\n                    progress(progress_value, f\"Downloading: {description_str}\")\n\n            except queue.Empty:\n                if not download_thread.is_alive():\n                    yield \"Download process finished.\"\n                    break\n\n        download_thread.join()\n\n    except Exception as e:\n        progress(0.0)\n        tb_str = traceback.format_exc().replace('\\n', '\\n\\n')\n        yield tb_str\n\n\ndef update_truncation_length(current_length, state):\n    if 'loader' in state:\n        if state['loader'].lower().startswith('exllama') or state['loader'] == 'llama.cpp':\n            if state['ctx_size'] > 0:\n                return state['ctx_size']\n\n            # ctx_size == 0 means auto: use the actual value from the server\n            return shared.settings['truncation_length']\n\n    return current_length\n\n\ndef get_initial_vram_info():\n    if shared.model_name != 'None' and shared.args.loader == 'llama.cpp':\n        return update_gpu_layers_and_vram(\n            shared.args.loader,\n            shared.model_name,\n            shared.args.gpu_layers,\n            shared.args.ctx_size,\n            shared.args.cache_type,\n        )\n\n    return \"<div id=\\\"vram-info\\\"'>Estimated VRAM to load the model:</div>\"\n\n\ndef get_initial_gpu_layers_max():\n    if shared.model_name != 'None' and shared.args.loader == 'llama.cpp':\n        model_settings = get_model_metadata(shared.model_name)\n        return model_settings.get('max_gpu_layers', 256)\n\n    return 256\n\n\ndef handle_load_model_event_initial(model, state):\n    state = apply_model_settings_to_state(model, state)\n    output = ui.apply_interface_values(state)\n    update_model_parameters(state)  # This updates the command-line flags\n\n    vram_info = state.get('vram_info', \"<div id=\\\"vram-info\\\"'>Estimated VRAM to load the model:</div>\")\n    return output + [state] + [vram_info]\n\n\ndef handle_load_model_event_final(truncation_length, loader, state):\n    truncation_length = update_truncation_length(truncation_length, state)\n    return [truncation_length, loader]\n\n\ndef handle_unload_model_click():\n    unload_model()\n    return \"Model unloaded\"\n\n\ndef format_file_size(size_bytes):\n    \"\"\"Convert bytes to human readable format with 2 decimal places for GB and above\"\"\"\n    if size_bytes == 0:\n        return \"0 B\"\n\n    size_names = [\"B\", \"KB\", \"MB\", \"GB\", \"TB\"]\n    i = int(math.floor(math.log(size_bytes, 1024)))\n    p = math.pow(1024, i)\n    s = size_bytes / p\n\n    if i >= 3:  # GB or TB\n        return f\"{s:.2f} {size_names[i]}\"\n    else:\n        return f\"{s:.1f} {size_names[i]}\"\n"
  },
  {
    "path": "modules/ui_notebook.py",
    "content": "import threading\nimport time\nfrom pathlib import Path\n\nimport gradio as gr\n\nfrom modules import logits, shared, ui, utils\nfrom modules.prompts import count_tokens, load_prompt\nfrom modules.text_generation import (\n    generate_reply_wrapper,\n    get_token_ids,\n    stop_everything_event\n)\nfrom modules.utils import gradio\n\n_notebook_file_lock = threading.Lock()\n_notebook_auto_save_timer = None\n_last_notebook_text = None\n_last_notebook_prompt = None\n\ninputs = ('textbox-notebook', 'interface_state')\noutputs = ('textbox-notebook', 'html-notebook')\n\n\ndef create_ui():\n    mu = shared.args.multi_user\n    with gr.Row(visible=not shared.settings['show_two_notebook_columns']) as shared.gradio['notebook-tab']:\n        shared.gradio['last_input-notebook'] = gr.State('')\n        with gr.Row():\n            with gr.Column(scale=4):\n                with gr.Tab('Raw'):\n                    with gr.Row():\n                        shared.gradio['textbox-notebook'] = gr.Textbox(label=\"\", value=\"\", lines=27, elem_id='textbox-notebook', elem_classes=['textbox', 'add_scrollbar'])\n                        shared.gradio['token-counter-notebook'] = gr.HTML(value=\"<span>0</span>\", elem_id=\"notebook-token-counter\")\n\n                with gr.Tab('Markdown'):\n                    shared.gradio['markdown_render-notebook'] = gr.Button('Render')\n                    shared.gradio['markdown-notebook'] = gr.Markdown()\n\n                with gr.Tab('HTML'):\n                    shared.gradio['html-notebook'] = gr.HTML()\n\n                with gr.Tab('Logits'):\n                    with gr.Row():\n                        with gr.Column(scale=10):\n                            shared.gradio['get_logits-notebook'] = gr.Button('Get next token probabilities')\n                        with gr.Column(scale=1):\n                            shared.gradio['use_samplers-notebook'] = gr.Checkbox(label='Use samplers', value=True, elem_classes=['no-background'])\n\n                    with gr.Row():\n                        shared.gradio['logits-notebook'] = gr.Textbox(lines=23, label='Output', elem_classes=['textbox_logits_notebook', 'add_scrollbar'])\n                        shared.gradio['logits-notebook-previous'] = gr.Textbox(lines=23, label='Previous output', elem_classes=['textbox_logits_notebook', 'add_scrollbar'])\n\n                with gr.Tab('Tokens'):\n                    shared.gradio['get_tokens-notebook'] = gr.Button('Get token IDs for the input')\n                    shared.gradio['tokens-notebook'] = gr.Textbox(lines=23, label='Tokens', elem_classes=['textbox_logits_notebook', 'add_scrollbar', 'monospace'])\n\n                with gr.Row():\n                    shared.gradio['Undo'] = gr.Button('Undo', elem_classes='small-button')\n                    shared.gradio['Regenerate-notebook'] = gr.Button('Regenerate', elem_classes='small-button')\n                    shared.gradio['Stop-notebook'] = gr.Button('Stop', visible=False, elem_classes='small-button', elem_id='stop')\n                    shared.gradio['Generate-notebook'] = gr.Button('Generate', variant='primary', elem_classes='small-button')\n\n            with gr.Column(scale=1):\n                gr.HTML('<div style=\"padding-bottom: 13px\"></div>')\n                with gr.Row():\n                    shared.gradio['prompt_menu-notebook'] = gr.Dropdown(choices=utils.get_available_prompts(), value=shared.settings['prompt-notebook'], label='Prompt', elem_classes='slim-dropdown')\n\n                with gr.Row():\n                    ui.create_refresh_button(shared.gradio['prompt_menu-notebook'], lambda: None, lambda: {'choices': utils.get_available_prompts()}, ['refresh-button'], interactive=not mu)\n                    shared.gradio['new_prompt-notebook'] = gr.Button('New', elem_classes=['refresh-button'], interactive=not mu)\n                    shared.gradio['rename_prompt-notebook'] = gr.Button('Rename', elem_classes=['refresh-button'], interactive=not mu)\n                    shared.gradio['delete_prompt-notebook'] = gr.Button('🗑️', elem_classes=['refresh-button'], interactive=not mu)\n                    shared.gradio['delete_prompt-confirm-notebook'] = gr.Button('Confirm', variant='stop', elem_classes=['refresh-button'], visible=False)\n                    shared.gradio['delete_prompt-cancel-notebook'] = gr.Button('Cancel', elem_classes=['refresh-button'], visible=False)\n\n                with gr.Row(visible=False) as shared.gradio['rename-row-notebook']:\n                    shared.gradio['rename_prompt_to-notebook'] = gr.Textbox(label=\"New name\", elem_classes=['no-background'])\n                    shared.gradio['rename_prompt-cancel-notebook'] = gr.Button('Cancel', elem_classes=['refresh-button'])\n                    shared.gradio['rename_prompt-confirm-notebook'] = gr.Button('Confirm', elem_classes=['refresh-button'], variant='primary')\n\n\ndef create_event_handlers():\n    shared.gradio['Generate-notebook'].click(\n        lambda x: x, gradio('textbox-notebook'), gradio('last_input-notebook')).then(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        lambda: [gr.update(visible=True), gr.update(visible=False)], None, gradio('Stop-notebook', 'Generate-notebook')).then(\n        generate_and_save_wrapper_notebook, gradio('textbox-notebook', 'interface_state', 'prompt_menu-notebook'), gradio(outputs), show_progress=False).then(\n        lambda state, text: state.update({'textbox-notebook': text}), gradio('interface_state', 'textbox-notebook'), None).then(\n        lambda: [gr.update(visible=False), gr.update(visible=True)], None, gradio('Stop-notebook', 'Generate-notebook')).then(\n        None, None, None, js=f'() => {{{ui.audio_notification_js}}}')\n\n    shared.gradio['textbox-notebook'].submit(\n        lambda x: x, gradio('textbox-notebook'), gradio('last_input-notebook')).then(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        lambda: [gr.update(visible=True), gr.update(visible=False)], None, gradio('Stop-notebook', 'Generate-notebook')).then(\n        generate_and_save_wrapper_notebook, gradio('textbox-notebook', 'interface_state', 'prompt_menu-notebook'), gradio(outputs), show_progress=False).then(\n        lambda state, text: state.update({'textbox-notebook': text}), gradio('interface_state', 'textbox-notebook'), None).then(\n        lambda: [gr.update(visible=False), gr.update(visible=True)], None, gradio('Stop-notebook', 'Generate-notebook')).then(\n        None, None, None, js=f'() => {{{ui.audio_notification_js}}}')\n\n    shared.gradio['Regenerate-notebook'].click(\n        lambda x: x, gradio('last_input-notebook'), gradio('textbox-notebook'), show_progress=False).then(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        lambda: [gr.update(visible=True), gr.update(visible=False)], None, gradio('Stop-notebook', 'Generate-notebook')).then(\n        generate_and_save_wrapper_notebook, gradio('textbox-notebook', 'interface_state', 'prompt_menu-notebook'), gradio(outputs), show_progress=False).then(\n        lambda state, text: state.update({'textbox-notebook': text}), gradio('interface_state', 'textbox-notebook'), None).then(\n        lambda: [gr.update(visible=False), gr.update(visible=True)], None, gradio('Stop-notebook', 'Generate-notebook')).then(\n        None, None, None, js=f'() => {{{ui.audio_notification_js}}}')\n\n    shared.gradio['Undo'].click(\n        lambda x: x, gradio('last_input-notebook'), gradio('textbox-notebook'), show_progress=False).then(\n        lambda state, text: state.update({'textbox-notebook': text}), gradio('interface_state', 'textbox-notebook'), None)\n\n    shared.gradio['markdown_render-notebook'].click(lambda x: x, gradio('textbox-notebook'), gradio('markdown-notebook'), queue=False)\n    shared.gradio['Stop-notebook'].click(stop_everything_event, None, None, queue=False)\n    shared.gradio['prompt_menu-notebook'].change(load_prompt, gradio('prompt_menu-notebook'), gradio('textbox-notebook'), show_progress=False)\n    shared.gradio['new_prompt-notebook'].click(handle_new_prompt, None, gradio('prompt_menu-notebook'), show_progress=False)\n\n    shared.gradio['delete_prompt-notebook'].click(\n        lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=True)],\n        None,\n        gradio('delete_prompt-notebook', 'delete_prompt-cancel-notebook', 'delete_prompt-confirm-notebook'),\n        show_progress=False)\n\n    shared.gradio['delete_prompt-cancel-notebook'].click(\n        lambda: [gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)],\n        None,\n        gradio('delete_prompt-notebook', 'delete_prompt-cancel-notebook', 'delete_prompt-confirm-notebook'),\n        show_progress=False)\n\n    shared.gradio['delete_prompt-confirm-notebook'].click(\n        handle_delete_prompt_confirm_notebook,\n        gradio('prompt_menu-notebook'),\n        gradio('prompt_menu-notebook', 'delete_prompt-notebook', 'delete_prompt-cancel-notebook', 'delete_prompt-confirm-notebook'),\n        show_progress=False)\n\n    shared.gradio['rename_prompt-notebook'].click(\n        handle_rename_prompt_click_notebook,\n        gradio('prompt_menu-notebook'),\n        gradio('rename_prompt_to-notebook', 'rename_prompt-notebook', 'rename-row-notebook'),\n        show_progress=False)\n\n    shared.gradio['rename_prompt-cancel-notebook'].click(\n        lambda: [gr.update(visible=True), gr.update(visible=False)],\n        None,\n        gradio('rename_prompt-notebook', 'rename-row-notebook'),\n        show_progress=False)\n\n    shared.gradio['rename_prompt-confirm-notebook'].click(\n        handle_rename_prompt_confirm_notebook,\n        gradio('rename_prompt_to-notebook', 'prompt_menu-notebook'),\n        gradio('prompt_menu-notebook', 'rename_prompt-notebook', 'rename-row-notebook'),\n        show_progress=False)\n\n    shared.gradio['textbox-notebook'].input(lambda x: f\"<span>{count_tokens(x)}</span>\", gradio('textbox-notebook'), gradio('token-counter-notebook'), show_progress=False)\n    shared.gradio['textbox-notebook'].change(\n        store_notebook_state_and_debounce,\n        gradio('textbox-notebook', 'prompt_menu-notebook'),\n        None,\n        show_progress=False\n    )\n\n    shared.gradio['get_logits-notebook'].click(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        logits.get_next_logits, gradio('textbox-notebook', 'interface_state', 'use_samplers-notebook', 'logits-notebook'), gradio('logits-notebook', 'logits-notebook-previous'), show_progress=False)\n\n    shared.gradio['get_tokens-notebook'].click(get_token_ids, gradio('textbox-notebook'), gradio('tokens-notebook'), show_progress=False)\n\n\ndef generate_and_save_wrapper_notebook(textbox_content, interface_state, prompt_name):\n    \"\"\"Generate reply and automatically save the result for notebook mode with periodic saves\"\"\"\n    last_save_time = time.monotonic()\n    save_interval = 8\n    output = textbox_content\n\n    # Initial autosave\n    safe_autosave_prompt(output, prompt_name)\n\n    for i, (output, html_output) in enumerate(generate_reply_wrapper(textbox_content, interface_state)):\n        yield output, html_output\n\n        current_time = time.monotonic()\n        # Save on first iteration or if save_interval seconds have passed\n        if i == 0 or (current_time - last_save_time) >= save_interval:\n            safe_autosave_prompt(output, prompt_name)\n            last_save_time = current_time\n\n    # Final autosave\n    safe_autosave_prompt(output, prompt_name)\n\n\ndef handle_new_prompt():\n    new_name = utils.current_time()\n\n    # Create the new prompt file\n    prompt_path = shared.user_data_dir / \"logs\" / \"notebook\" / f\"{new_name}.txt\"\n    prompt_path.parent.mkdir(parents=True, exist_ok=True)\n    prompt_path.write_text(\"In this story,\", encoding='utf-8')\n\n    return gr.update(choices=utils.get_available_prompts(), value=new_name)\n\n\ndef handle_delete_prompt_confirm_notebook(prompt_name):\n    available_prompts = utils.get_available_prompts()\n    current_index = available_prompts.index(prompt_name) if prompt_name in available_prompts else 0\n\n    (shared.user_data_dir / \"logs\" / \"notebook\" / f\"{prompt_name}.txt\").unlink(missing_ok=True)\n    available_prompts = utils.get_available_prompts()\n\n    if available_prompts:\n        new_value = available_prompts[min(current_index, len(available_prompts) - 1)]\n    else:\n        new_value = utils.current_time()\n        (shared.user_data_dir / \"logs\" / \"notebook\").mkdir(parents=True, exist_ok=True)\n        (shared.user_data_dir / \"logs\" / \"notebook\" / f\"{new_value}.txt\").write_text(\"In this story,\")\n        available_prompts = [new_value]\n\n    return [\n        gr.update(choices=available_prompts, value=new_value),\n        gr.update(visible=True),\n        gr.update(visible=False),\n        gr.update(visible=False)\n    ]\n\n\ndef handle_rename_prompt_click_notebook(current_name):\n    return [\n        gr.update(value=current_name),\n        gr.update(visible=False),\n        gr.update(visible=True)\n    ]\n\n\ndef handle_rename_prompt_confirm_notebook(new_name, current_name):\n    old_path = shared.user_data_dir / \"logs\" / \"notebook\" / f\"{current_name}.txt\"\n    new_path = shared.user_data_dir / \"logs\" / \"notebook\" / f\"{new_name}.txt\"\n\n    if old_path.exists() and not new_path.exists():\n        old_path.rename(new_path)\n\n    available_prompts = utils.get_available_prompts()\n    return [\n        gr.update(choices=available_prompts, value=new_name),\n        gr.update(visible=True),\n        gr.update(visible=False)\n    ]\n\n\ndef autosave_prompt(text, prompt_name):\n    \"\"\"Automatically save the text to the selected prompt file\"\"\"\n    if prompt_name and text.strip():\n        prompt_path = shared.user_data_dir / \"logs\" / \"notebook\" / f\"{prompt_name}.txt\"\n        prompt_path.parent.mkdir(parents=True, exist_ok=True)\n        prompt_path.write_text(text, encoding='utf-8')\n\n\ndef safe_autosave_prompt(content, prompt_name):\n    \"\"\"Thread-safe wrapper for autosave_prompt to prevent file corruption\"\"\"\n    with _notebook_file_lock:\n        autosave_prompt(content, prompt_name)\n\n\ndef store_notebook_state_and_debounce(text, prompt_name):\n    \"\"\"Store current notebook state and trigger debounced save\"\"\"\n    global _notebook_auto_save_timer, _last_notebook_text, _last_notebook_prompt\n\n    if shared.args.multi_user:\n        return\n\n    _last_notebook_text = text\n    _last_notebook_prompt = prompt_name\n\n    if _notebook_auto_save_timer is not None:\n        _notebook_auto_save_timer.cancel()\n\n    _notebook_auto_save_timer = threading.Timer(1.0, _perform_notebook_debounced_save)\n    _notebook_auto_save_timer.start()\n\n\ndef _perform_notebook_debounced_save():\n    \"\"\"Actually perform the notebook save using the stored state\"\"\"\n    try:\n        if _last_notebook_text is not None and _last_notebook_prompt is not None:\n            safe_autosave_prompt(_last_notebook_text, _last_notebook_prompt)\n    except Exception as e:\n        print(f\"Notebook auto-save failed: {e}\")\n"
  },
  {
    "path": "modules/ui_parameters.py",
    "content": "from pathlib import Path\n\nimport gradio as gr\n\nfrom modules import loaders, presets, shared, ui, ui_chat, utils\nfrom modules.utils import gradio\n\n\ndef create_ui():\n    mu = shared.args.multi_user\n    with gr.Tab(\"Parameters\", elem_id=\"parameters\"):\n        with gr.Tab(\"Generation\"):\n            with gr.Row():\n                with gr.Column():\n                    with gr.Row():\n                        shared.gradio['preset_menu'] = gr.Dropdown(choices=utils.get_available_presets(), value=shared.settings['preset'], label='Preset', elem_classes='slim-dropdown')\n                        ui.create_refresh_button(shared.gradio['preset_menu'], lambda: None, lambda: {'choices': utils.get_available_presets()}, 'refresh-button', interactive=not mu)\n                        shared.gradio['save_preset'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)\n                        shared.gradio['delete_preset'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)\n                        shared.gradio['reset_preset'] = gr.Button('Restore preset', elem_classes='refresh-button', interactive=True)\n                        shared.gradio['neutralize_samplers'] = gr.Button('Neutralize samplers', elem_classes='refresh-button', interactive=True)\n\n                with gr.Column():\n                    shared.gradio['filter_by_loader'] = gr.Dropdown(label=\"Filter by loader\", choices=[\"All\"] + list(loaders.loaders_and_params.keys()) if not shared.args.portable else ['llama.cpp'], value=\"All\", elem_classes='slim-dropdown')\n\n            with gr.Row():\n                with gr.Column():\n                    with gr.Row():\n                        with gr.Column():\n                            gr.Markdown('## Curve shape')\n                            shared.gradio['temperature'] = gr.Slider(0.01, 5, value=shared.settings['temperature'], step=0.01, label='temperature')\n                            shared.gradio['dynatemp_low'] = gr.Slider(0.01, 5, value=shared.settings['dynatemp_low'], step=0.01, label='dynatemp_low', visible=shared.settings['dynamic_temperature'])\n                            shared.gradio['dynatemp_high'] = gr.Slider(0.01, 5, value=shared.settings['dynatemp_high'], step=0.01, label='dynatemp_high', visible=shared.settings['dynamic_temperature'])\n                            shared.gradio['dynatemp_exponent'] = gr.Slider(0.01, 5, value=shared.settings['dynatemp_exponent'], step=0.01, label='dynatemp_exponent', visible=shared.settings['dynamic_temperature'])\n                            shared.gradio['smoothing_factor'] = gr.Slider(0.0, 10.0, value=shared.settings['smoothing_factor'], step=0.01, label='smoothing_factor', info='Activates Quadratic Sampling.')\n                            shared.gradio['smoothing_curve'] = gr.Slider(1.0, 10.0, value=shared.settings['smoothing_curve'], step=0.01, label='smoothing_curve', info='Adjusts the dropoff curve of Quadratic Sampling.')\n                            shared.gradio['dynamic_temperature'] = gr.Checkbox(value=shared.settings['dynamic_temperature'], label='dynamic_temperature')\n\n                            gr.Markdown('## Curve cutoff')\n                            shared.gradio['top_p'] = gr.Slider(0.0, 1.0, value=shared.settings['top_p'], step=0.01, label='top_p')\n                            shared.gradio['top_k'] = gr.Slider(0, 200, value=shared.settings['top_k'], step=1, label='top_k')\n                            shared.gradio['min_p'] = gr.Slider(0.0, 1.0, value=shared.settings['min_p'], step=0.01, label='min_p')\n                            shared.gradio['top_n_sigma'] = gr.Slider(0.0, 5.0, value=shared.settings['top_n_sigma'], step=0.01, label='top_n_sigma')\n                            shared.gradio['typical_p'] = gr.Slider(0.0, 1.0, value=shared.settings['typical_p'], step=0.01, label='typical_p')\n                            shared.gradio['xtc_threshold'] = gr.Slider(0, 0.5, value=shared.settings['xtc_threshold'], step=0.01, label='xtc_threshold', info='If 2 or more tokens have probability above this threshold, consider removing all but the last one.')\n                            shared.gradio['xtc_probability'] = gr.Slider(0, 1, value=shared.settings['xtc_probability'], step=0.01, label='xtc_probability', info='Probability that the removal will actually happen. 0 disables the sampler. 1 makes it always happen.')\n                            shared.gradio['epsilon_cutoff'] = gr.Slider(0, 9, value=shared.settings['epsilon_cutoff'], step=0.01, label='epsilon_cutoff')\n                            shared.gradio['eta_cutoff'] = gr.Slider(0, 20, value=shared.settings['eta_cutoff'], step=0.01, label='eta_cutoff')\n                            shared.gradio['tfs'] = gr.Slider(0.0, 1.0, value=shared.settings['tfs'], step=0.01, label='tfs')\n                            shared.gradio['top_a'] = gr.Slider(0.0, 1.0, value=shared.settings['top_a'], step=0.01, label='top_a')\n\n                            gr.Markdown('## Repetition suppression')\n                            shared.gradio['dry_multiplier'] = gr.Slider(0, 5, value=shared.settings['dry_multiplier'], step=0.01, label='dry_multiplier', info='Set to greater than 0 to enable DRY. Recommended value: 0.8.')\n                            shared.gradio['dry_allowed_length'] = gr.Slider(1, 20, value=shared.settings['dry_allowed_length'], step=1, label='dry_allowed_length', info='Longest sequence that can be repeated without being penalized.')\n                            shared.gradio['dry_base'] = gr.Slider(1, 4, value=shared.settings['dry_base'], step=0.01, label='dry_base', info='Controls how fast the penalty grows with increasing sequence length.')\n                            shared.gradio['repetition_penalty'] = gr.Slider(1.0, 1.5, value=shared.settings['repetition_penalty'], step=0.01, label='repetition_penalty')\n                            shared.gradio['frequency_penalty'] = gr.Slider(0, 2, value=shared.settings['frequency_penalty'], step=0.05, label='frequency_penalty')\n                            shared.gradio['presence_penalty'] = gr.Slider(0, 2, value=shared.settings['presence_penalty'], step=0.05, label='presence_penalty')\n                            shared.gradio['encoder_repetition_penalty'] = gr.Slider(0.8, 1.5, value=shared.settings['encoder_repetition_penalty'], step=0.01, label='encoder_repetition_penalty')\n                            shared.gradio['no_repeat_ngram_size'] = gr.Slider(0, 20, step=1, value=shared.settings['no_repeat_ngram_size'], label='no_repeat_ngram_size')\n                            shared.gradio['repetition_penalty_range'] = gr.Slider(0, 4096, step=64, value=shared.settings['repetition_penalty_range'], label='repetition_penalty_range')\n\n                        with gr.Column():\n                            gr.Markdown('## Alternative sampling methods')\n                            shared.gradio['penalty_alpha'] = gr.Slider(0, 5, value=shared.settings['penalty_alpha'], label='penalty_alpha', info='For Contrastive Search. do_sample must be unchecked.')\n                            shared.gradio['guidance_scale'] = gr.Slider(-0.5, 2.5, step=0.05, value=shared.settings['guidance_scale'], label='guidance_scale', info='For CFG. 1.5 is a good value.')\n                            shared.gradio['mirostat_mode'] = gr.Slider(0, 2, step=1, value=shared.settings['mirostat_mode'], label='mirostat_mode', info='mode=1 is for llama.cpp only.')\n                            shared.gradio['mirostat_tau'] = gr.Slider(0, 10, step=0.01, value=shared.settings['mirostat_tau'], label='mirostat_tau')\n                            shared.gradio['mirostat_eta'] = gr.Slider(0, 1, step=0.01, value=shared.settings['mirostat_eta'], label='mirostat_eta')\n                            shared.gradio['adaptive_target'] = gr.Slider(0.0, 1.0, value=shared.settings['adaptive_target'], step=0.01, label='adaptive_target', info='Target probability for adaptive-p sampling. Tokens near this probability are favored. 0 disables.')\n                            shared.gradio['adaptive_decay'] = gr.Slider(0.0, 0.99, value=shared.settings['adaptive_decay'], step=0.01, label='adaptive_decay', info='EMA decay rate for adaptive-p. Controls history window (~1/(1-decay) tokens). Default: 0.9.')\n\n                            gr.Markdown('## Other options')\n                            shared.gradio['do_sample'] = gr.Checkbox(value=shared.settings['do_sample'], label='do_sample')\n                            shared.gradio['temperature_last'] = gr.Checkbox(value=shared.settings['temperature_last'], label='temperature_last', info='Moves temperature/dynamic temperature/quadratic sampling to the end of the sampler stack, ignoring their positions in \"Sampler priority\".')\n                            shared.gradio['sampler_priority'] = gr.DragDrop(value=shared.settings['sampler_priority'], label='Sampler priority', info='Parameter names separated by new lines or commas.', elem_classes=['add_scrollbar'])\n                            shared.gradio['dry_sequence_breakers'] = gr.Textbox(value=shared.settings['dry_sequence_breakers'], label='dry_sequence_breakers', info='Tokens across which sequence matching is not continued. Specified as a comma-separated list of quoted strings.')\n\n                with gr.Column():\n                    with gr.Row():\n                        with gr.Column():\n                            with gr.Blocks():\n                                shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], value=shared.settings['max_new_tokens'], step=1, label='max_new_tokens', info='⚠️ Setting this too high can cause prompt truncation.')\n                                shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info='Activates Prompt Lookup Decoding.')\n                                shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label='Maximum tokens/second', info='To make text readable in real time.')\n\n                            shared.gradio['auto_max_new_tokens'] = gr.Checkbox(value=shared.settings['auto_max_new_tokens'], label='auto_max_new_tokens', info='Expand max_new_tokens to the available context length.')\n                            shared.gradio['ban_eos_token'] = gr.Checkbox(value=shared.settings['ban_eos_token'], label='Ban the eos_token', info='Forces the model to never end the generation prematurely.')\n                            shared.gradio['add_bos_token'] = gr.Checkbox(value=shared.settings['add_bos_token'], label='Add the bos_token to the beginning of prompts', info='Only applies to text completion (notebook). In chat mode, templates control BOS tokens.')\n                            shared.gradio['skip_special_tokens'] = gr.Checkbox(value=shared.settings['skip_special_tokens'], label='Skip special tokens', info='Some specific models need this unset.')\n                            shared.gradio['stream'] = gr.Checkbox(value=shared.settings['stream'], label='Activate text streaming')\n                            shared.gradio['static_cache'] = gr.Checkbox(value=shared.settings['static_cache'], label='Static KV cache', info='Use a static cache for improved performance.')\n\n                        with gr.Column():\n                            shared.gradio['truncation_length'] = gr.Number(precision=0, step=256, value=get_truncation_length(), label='Truncate the prompt up to this length', info='The leftmost tokens are removed if the prompt exceeds this length.')\n                            shared.gradio['seed'] = gr.Number(value=shared.settings['seed'], label='Seed (-1 for random)')\n                            shared.gradio['custom_system_message'] = gr.Textbox(value=shared.settings['custom_system_message'], lines=2, label='Custom system message', info='If not empty, will be used instead of the default one.', elem_classes=['add_scrollbar'])\n                            shared.gradio['custom_stopping_strings'] = gr.Textbox(lines=2, value=shared.settings[\"custom_stopping_strings\"] or None, label='Custom stopping strings', info='Written between \"\" and separated by commas.', placeholder='\"\\\\n\", \"\\\\nYou:\"')\n                            shared.gradio['custom_token_bans'] = gr.Textbox(value=shared.settings['custom_token_bans'] or None, label='Token bans', info='Token IDs to ban, separated by commas. The IDs can be found in the Default or Notebook tab.')\n                            shared.gradio['negative_prompt'] = gr.Textbox(value=shared.settings['negative_prompt'], label='Negative prompt', info='For CFG. Only used when guidance_scale is different than 1.', lines=3, elem_classes=['add_scrollbar'])\n                            with gr.Row() as shared.gradio['grammar_file_row']:\n                                shared.gradio['grammar_file'] = gr.Dropdown(value='None', choices=utils.get_available_grammars(), label='Load grammar from file (.gbnf)', elem_classes='slim-dropdown')\n                                ui.create_refresh_button(shared.gradio['grammar_file'], lambda: None, lambda: {'choices': utils.get_available_grammars()}, 'refresh-button', interactive=not mu)\n                                shared.gradio['save_grammar'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)\n                                shared.gradio['delete_grammar'] = gr.Button('🗑️ ', elem_classes='refresh-button', interactive=not mu)\n\n                            shared.gradio['grammar_string'] = gr.Textbox(value=shared.settings['grammar_string'], label='Grammar', lines=16, elem_classes=['add_scrollbar', 'monospace'])\n\n        ui_chat.create_chat_settings_ui()\n\n\ndef create_event_handlers():\n    shared.gradio['filter_by_loader'].change(loaders.blacklist_samplers, gradio('filter_by_loader', 'dynamic_temperature'), gradio(loaders.list_all_samplers()), show_progress=False)\n    shared.gradio['preset_menu'].change(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        presets.load_preset_for_ui, gradio('preset_menu', 'interface_state'), gradio('interface_state') + gradio(presets.presets_params()), show_progress=False)\n\n    shared.gradio['reset_preset'].click(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        presets.reset_preset_for_ui, gradio('preset_menu', 'interface_state'), gradio('interface_state') + gradio(presets.presets_params()), show_progress=False)\n\n    shared.gradio['neutralize_samplers'].click(\n        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n        presets.neutralize_samplers_for_ui, gradio('interface_state'), gradio('interface_state') + gradio(presets.presets_params()), show_progress=False)\n\n    shared.gradio['grammar_file'].change(load_grammar, gradio('grammar_file'), gradio('grammar_string'), show_progress=False)\n    shared.gradio['dynamic_temperature'].change(lambda x: [gr.update(visible=x)] * 3, gradio('dynamic_temperature'), gradio('dynatemp_low', 'dynatemp_high', 'dynatemp_exponent'), show_progress=False)\n\n\ndef get_truncation_length():\n    if shared.args.ctx_size > 0 and ('ctx_size' in shared.provided_arguments or shared.args.ctx_size != shared.args_defaults.ctx_size):\n        return shared.args.ctx_size\n    else:\n        return shared.settings['truncation_length']\n\n\ndef load_grammar(name):\n    p = shared.user_data_dir / 'grammars' / name\n    if p.exists():\n        return open(p, 'r', encoding='utf-8').read()\n    else:\n        return ''\n"
  },
  {
    "path": "modules/ui_session.py",
    "content": "import gradio as gr\n\nfrom modules import shared, ui, utils\nfrom modules.utils import gradio\n\n\ndef create_ui():\n    mu = shared.args.multi_user\n    with gr.Tab(\"Session\", elem_id=\"session-tab\"):\n        with gr.Row():\n            with gr.Column():\n                gr.Markdown(\"## Settings\")\n                shared.gradio['toggle_dark_mode'] = gr.Button('Toggle light/dark theme 💡', elem_classes='refresh-button')\n                shared.gradio['show_two_notebook_columns'] = gr.Checkbox(label='Show two columns in the Notebook tab', value=shared.settings['show_two_notebook_columns'])\n                shared.gradio['paste_to_attachment'] = gr.Checkbox(label='Turn long pasted text into attachments in the Chat tab', value=shared.settings['paste_to_attachment'], elem_id='paste_to_attachment')\n                shared.gradio['include_past_attachments'] = gr.Checkbox(label='Include attachments/search results from previous messages in the chat prompt', value=shared.settings['include_past_attachments'])\n\n            with gr.Column():\n                gr.Markdown(\"## Extensions & flags\")\n                shared.gradio['save_settings'] = gr.Button(f'Save extensions settings to {shared.user_data_dir}/settings.yaml', elem_classes='refresh-button', interactive=not mu)\n                shared.gradio['reset_interface'] = gr.Button(\"Apply flags/extensions and restart\", interactive=not mu)\n                with gr.Row():\n                    with gr.Column():\n                        shared.gradio['extensions_menu'] = gr.CheckboxGroup(choices=utils.get_available_extensions(), value=shared.args.extensions, label=\"Available extensions\", info='Note that some of these extensions may require manually installing Python requirements through the command: pip install -r extensions/extension_name/requirements.txt', elem_classes='checkboxgroup-table')\n\n                    with gr.Column():\n                        shared.gradio['bool_menu'] = gr.CheckboxGroup(choices=get_boolean_arguments(), value=get_boolean_arguments(active=True), label=\"Boolean command-line flags\", elem_classes='checkboxgroup-table')\n\n        shared.gradio['theme_state'] = gr.Textbox(visible=False, value='dark' if shared.settings['dark_theme'] else 'light')\n        if not mu:\n            shared.gradio['save_settings'].click(\n                ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(\n                handle_save_settings, gradio('interface_state', 'preset_menu', 'extensions_menu', 'show_controls', 'theme_state'), gradio('save_contents', 'save_filename', 'save_root', 'save_root_state', 'file_saver'), show_progress=False)\n\n        shared.gradio['toggle_dark_mode'].click(\n            lambda x: 'dark' if x == 'light' else 'light', gradio('theme_state'), gradio('theme_state')).then(\n            None, None, None, js=f'() => {{{ui.dark_theme_js}; toggleDarkMode(); localStorage.setItem(\"theme\", document.body.classList.contains(\"dark\") ? \"dark\" : \"light\")}}')\n\n        shared.gradio['show_two_notebook_columns'].change(\n            handle_default_to_notebook_change,\n            gradio('show_two_notebook_columns', 'textbox-default', 'output_textbox', 'prompt_menu-default', 'textbox-notebook', 'prompt_menu-notebook'),\n            gradio('default-tab', 'notebook-tab', 'textbox-default', 'output_textbox', 'prompt_menu-default', 'textbox-notebook', 'prompt_menu-notebook')\n        )\n\n        # Reset interface event\n        if not mu:\n            shared.gradio['reset_interface'].click(\n                set_interface_arguments, gradio('extensions_menu', 'bool_menu'), None).then(\n                None, None, None, js='() => {document.body.innerHTML=\\'<h1 style=\"font-family:monospace;padding-top:20%;margin:0;height:100vh;color:lightgray;text-align:center;background:var(--body-background-fill)\">Reloading...</h1>\\'; setTimeout(function(){location.reload()},2500); return []}')\n\n\ndef handle_save_settings(state, preset, extensions, show_controls, theme):\n    contents = ui.save_settings(state, preset, extensions, show_controls, theme, manual_save=True)\n    root = str(shared.user_data_dir) + \"/\"\n    return [\n        contents,\n        \"settings.yaml\",\n        root,\n        root,\n        gr.update(visible=True)\n    ]\n\n\ndef handle_default_to_notebook_change(show_two_columns, default_input, default_output, default_prompt, notebook_input, notebook_prompt):\n    if show_two_columns:\n        # Notebook to default\n        return [\n            gr.update(visible=True),\n            gr.update(visible=False),\n            notebook_input,\n            \"\",\n            gr.update(value=notebook_prompt, choices=utils.get_available_prompts()),\n            gr.update(),\n            gr.update(),\n        ]\n    else:\n        # Default to notebook\n        return [\n            gr.update(visible=False),\n            gr.update(visible=True),\n            gr.update(),\n            gr.update(),\n            gr.update(),\n            default_input,\n            gr.update(value=default_prompt, choices=utils.get_available_prompts())\n        ]\n\n\ndef set_interface_arguments(extensions, bool_active):\n    shared.args.extensions = extensions\n\n    bool_list = get_boolean_arguments()\n\n    for k in bool_list:\n        setattr(shared.args, k, False)\n    for k in bool_active:\n        setattr(shared.args, k, True)\n        if k == 'api':\n            shared.add_extension('openai', last=True)\n\n    shared.need_restart = True\n\n\ndef get_boolean_arguments(active=False):\n    cmd_list = vars(shared.args)\n    bool_list = sorted([k for k in cmd_list if type(cmd_list[k]) is bool and k not in ui.list_model_elements()])\n    bool_active = [k for k in bool_list if vars(shared.args)[k]]\n\n    if active:\n        return bool_active\n    else:\n        return bool_list\n"
  },
  {
    "path": "modules/utils.py",
    "content": "import os\nimport re\nfrom datetime import datetime\nfrom pathlib import Path\n\nfrom modules import shared\nfrom modules.logging_colors import logger\n\n\n# Helper function to get multiple values from shared.gradio\ndef gradio(*keys):\n    if len(keys) == 1 and type(keys[0]) in [list, tuple]:\n        keys = keys[0]\n\n    return [shared.gradio[k] for k in keys]\n\n\ndef sanitize_filename(name):\n    \"\"\"Strip path traversal components from a filename.\n\n    Returns only the final path component with leading dots removed,\n    preventing directory traversal via '../' or absolute paths.\n    \"\"\"\n    name = Path(name).name  # drop all directory components\n    name = name.lstrip('.')  # remove leading dots\n    return name\n\n\ndef _is_path_allowed(abs_path_str):\n    \"\"\"Check if a path is under the configured user_data directory.\"\"\"\n    abs_path = Path(abs_path_str).resolve()\n    user_data_resolved = shared.user_data_dir.resolve()\n    try:\n        abs_path.relative_to(user_data_resolved)\n        return True\n    except ValueError:\n        return False\n\n\ndef save_file(fname, contents):\n    if fname == '':\n        logger.error('File name is empty!')\n        return\n\n    abs_path_str = os.path.abspath(fname)\n    if not _is_path_allowed(abs_path_str):\n        logger.error(f'Invalid file path: \\\"{fname}\\\"')\n        return\n\n    if Path(abs_path_str).suffix.lower() not in ('.yaml', '.yml', '.json', '.txt', '.gbnf'):\n        logger.error(f'Refusing to save file with disallowed extension: \\\"{fname}\\\"')\n        return\n\n    with open(abs_path_str, 'w', encoding='utf-8') as f:\n        f.write(contents)\n\n    logger.info(f'Saved \\\"{abs_path_str}\\\".')\n\n\ndef delete_file(fname):\n    if fname == '':\n        logger.error('File name is empty!')\n        return\n\n    abs_path_str = os.path.abspath(fname)\n    if not _is_path_allowed(abs_path_str):\n        logger.error(f'Invalid file path: \\\"{fname}\\\"')\n        return\n\n    p = Path(abs_path_str)\n    if p.exists():\n        p.unlink()\n        logger.info(f'Deleted \\\"{fname}\\\".')\n\n\ndef current_time():\n    return f\"{datetime.now().strftime('%Y-%m-%d_%Hh%Mm%Ss')}\"\n\n\ndef atoi(text):\n    return int(text) if text.isdigit() else text.lower()\n\n\n# Replace multiple string pairs in a string\ndef replace_all(text, dic):\n    for i, j in dic.items():\n        text = text.replace(i, j)\n\n    return text\n\n\ndef natural_keys(text):\n    return [atoi(c) for c in re.split(r'(\\d+)', text)]\n\n\ndef check_model_loaded():\n    if shared.model_name == 'None' or shared.model is None:\n        if len(get_available_models()) == 0:\n            error_msg = f\"No model is loaded.\\n\\nTo get started:\\n1) Place a GGUF file in your {shared.user_data_dir}/models folder\\n2) Go to the Model tab and select it\"\n            logger.error(error_msg)\n            return False, error_msg\n        else:\n            error_msg = \"No model is loaded. Please select one in the Model tab.\"\n            logger.error(error_msg)\n            return False, error_msg\n\n    return True, None\n\n\ndef resolve_model_path(model_name_or_path, image_model=False):\n    \"\"\"\n    Resolves a model path, checking for a direct path\n    before the default models directory.\n    \"\"\"\n\n    path_candidate = Path(model_name_or_path)\n    if path_candidate.exists():\n        return path_candidate\n    elif image_model:\n        return Path(f'{shared.args.image_model_dir}/{model_name_or_path}')\n    else:\n        return Path(f'{shared.args.model_dir}/{model_name_or_path}')\n\n\ndef get_available_models():\n    # Get all GGUF files\n    gguf_files = get_available_ggufs()\n\n    # Filter out non-first parts of multipart GGUF files\n    filtered_gguf_files = []\n    for gguf_path in gguf_files:\n        filename = os.path.basename(gguf_path)\n\n        match = re.search(r'-(\\d+)-of-\\d+\\.gguf$', filename)\n\n        if match:\n            part_number = match.group(1)\n            # Keep only if it's part 1\n            if part_number.lstrip(\"0\") == \"1\":\n                filtered_gguf_files.append(gguf_path)\n        else:\n            # Not a multi-part file\n            filtered_gguf_files.append(gguf_path)\n\n    model_dir = Path(shared.args.model_dir)\n\n    # Find top-level directories containing GGUF files\n    dirs_with_gguf = set()\n    for gguf_path in gguf_files:\n        path = Path(gguf_path)\n        if len(path.parts) > 0:\n            dirs_with_gguf.add(path.parts[0])\n\n    # Find directories with safetensors files\n    dirs_with_safetensors = set()\n    for item in os.listdir(model_dir):\n        item_path = model_dir / item\n        if item_path.is_dir():\n            if any(file.lower().endswith(('.safetensors', '.pt')) for file in os.listdir(item_path) if (item_path / file).is_file()):\n                dirs_with_safetensors.add(item)\n\n    # Find valid model directories\n    model_dirs = []\n    for item in os.listdir(model_dir):\n        item_path = model_dir / item\n        if not item_path.is_dir():\n            continue\n\n        # Include directory if it either doesn't contain GGUF files\n        # or contains both GGUF and safetensors files\n        if item not in dirs_with_gguf or item in dirs_with_safetensors:\n            model_dirs.append(item)\n\n    model_dirs = sorted(model_dirs, key=natural_keys)\n\n    return filtered_gguf_files + model_dirs\n\n\ndef get_available_image_models():\n    model_dir = Path(shared.args.image_model_dir)\n    model_dir.mkdir(parents=True, exist_ok=True)\n\n    # Find valid model directories\n    model_dirs = []\n    for item in os.listdir(model_dir):\n        item_path = model_dir / item\n        if not item_path.is_dir():\n            continue\n\n        model_dirs.append(item)\n\n    model_dirs = sorted(model_dirs, key=natural_keys)\n\n    return model_dirs\n\n\ndef get_available_ggufs():\n    model_list = []\n    model_dir = Path(shared.args.model_dir)\n\n    for dirpath, _, files in os.walk(model_dir, followlinks=True):\n        for file in files:\n            if file.lower().endswith(\".gguf\"):\n                model_path = Path(dirpath) / file\n                rel_path = model_path.relative_to(model_dir)\n                model_list.append(str(rel_path))\n\n    return sorted(model_list, key=natural_keys)\n\n\ndef get_available_mmproj():\n    mmproj_dir = shared.user_data_dir / 'mmproj'\n    if not mmproj_dir.exists():\n        return ['None']\n\n    mmproj_files = []\n    for item in mmproj_dir.iterdir():\n        if item.is_file() and item.suffix.lower() in ('.gguf', '.bin'):\n            mmproj_files.append(item.name)\n\n    return ['None'] + sorted(mmproj_files, key=natural_keys)\n\n\ndef get_available_presets():\n    return sorted(set((k.stem for k in (shared.user_data_dir / 'presets').glob('*.yaml'))), key=natural_keys)\n\n\ndef get_available_prompts():\n    notebook_dir = shared.user_data_dir / 'logs' / 'notebook'\n    notebook_dir.mkdir(parents=True, exist_ok=True)\n\n    prompt_files = list(notebook_dir.glob('*.txt'))\n    if not prompt_files:\n        new_name = current_time()\n        new_path = notebook_dir / f\"{new_name}.txt\"\n        new_path.write_text(\"In this story,\", encoding='utf-8')\n        prompt_files = [new_path]\n\n    sorted_files = sorted(prompt_files, key=lambda x: x.stat().st_mtime, reverse=True)\n    prompts = [file.stem for file in sorted_files]\n    return prompts\n\n\ndef get_available_characters():\n    paths = (x for x in (shared.user_data_dir / 'characters').iterdir() if x.suffix in ('.json', '.yaml', '.yml'))\n    return sorted(set((k.stem for k in paths)), key=natural_keys)\n\n\ndef get_available_users():\n    users_dir = shared.user_data_dir / 'users'\n    users_dir.mkdir(parents=True, exist_ok=True)\n    paths = (x for x in users_dir.iterdir() if x.suffix in ('.json', '.yaml', '.yml'))\n    return sorted(set((k.stem for k in paths)), key=natural_keys)\n\n\ndef get_available_instruction_templates():\n    path = str(shared.user_data_dir / \"instruction-templates\")\n    paths = []\n    if os.path.exists(path):\n        paths = (x for x in Path(path).iterdir() if x.suffix in ('.json', '.yaml', '.yml'))\n\n    return ['None'] + sorted(set((k.stem for k in paths)), key=natural_keys)\n\n\ndef get_available_extensions():\n    # User extensions (higher priority)\n    user_extensions = []\n    user_ext_path = shared.user_data_dir / 'extensions'\n    if user_ext_path.exists():\n        user_exts = map(lambda x: x.parent.name, user_ext_path.glob('*/script.py'))\n        user_extensions = sorted(set(user_exts), key=natural_keys)\n\n    # System extensions (excluding those overridden by user extensions)\n    system_exts = map(lambda x: x.parent.name, Path('extensions').glob('*/script.py'))\n    system_extensions = sorted(set(system_exts) - set(user_extensions), key=natural_keys)\n\n    return user_extensions + system_extensions\n\n\ndef get_available_loras():\n    return ['None'] + sorted([item.name for item in list(Path(shared.args.lora_dir).glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=natural_keys)\n\n\ndef get_datasets(path: str, ext: str):\n    # include subdirectories for raw txt files to allow training from a subdirectory of txt files\n    if ext == \"txt\":\n        return ['None'] + sorted(set([k.stem for k in list(Path(path).glob('*.txt')) + list(Path(path).glob('*/')) if k.stem != 'put-trainer-datasets-here']), key=natural_keys)\n\n    return ['None'] + sorted(set([k.stem for k in Path(path).glob(f'*.{ext}') if k.stem != 'put-trainer-datasets-here']), key=natural_keys)\n\n\ndef get_chat_datasets(path: str):\n    \"\"\"List JSON datasets that contain chat conversations (messages or ShareGPT format).\"\"\"\n    return ['None'] + sorted(set([k.stem for k in Path(path).glob('*.json') if k.stem != 'put-trainer-datasets-here' and _is_chat_dataset(k)]), key=natural_keys)\n\n\ndef get_text_datasets(path: str):\n    \"\"\"List JSON datasets that contain raw text ({\"text\": ...} format).\"\"\"\n    return ['None'] + sorted(set([k.stem for k in Path(path).glob('*.json') if k.stem != 'put-trainer-datasets-here' and _is_text_dataset(k)]), key=natural_keys)\n\n\ndef _peek_json_keys(filepath):\n    \"\"\"Read the first object in a JSON array file and return its keys.\"\"\"\n    import json\n    decoder = json.JSONDecoder()\n    WS = ' \\t\\n\\r'\n    try:\n        with open(filepath, 'r', encoding='utf-8') as f:\n            buf = ''\n            obj_start = None\n            while len(buf) < 1 << 20:  # Read up to 1MB\n                chunk = f.read(8192)\n                if not chunk:\n                    break\n                buf += chunk\n                if obj_start is None:\n                    idx = 0\n                    while idx < len(buf) and buf[idx] in WS:\n                        idx += 1\n                    if idx >= len(buf):\n                        continue\n                    if buf[idx] != '[':\n                        return set()\n                    idx += 1\n                    while idx < len(buf) and buf[idx] in WS:\n                        idx += 1\n                    if idx >= len(buf):\n                        continue\n                    obj_start = idx\n                try:\n                    obj, _ = decoder.raw_decode(buf, obj_start)\n                    if isinstance(obj, dict):\n                        return set(obj.keys())\n                    return set()\n                except json.JSONDecodeError:\n                    continue\n    except Exception:\n        pass\n    return set()\n\n\ndef _is_chat_dataset(filepath):\n    keys = _peek_json_keys(filepath)\n    return bool(keys & {'messages', 'conversations'})\n\n\ndef _is_text_dataset(filepath):\n    keys = _peek_json_keys(filepath)\n    return 'text' in keys\n\n\ndef get_available_chat_styles():\n    return sorted(set(('-'.join(k.stem.split('-')[1:]) for k in Path('css').glob('chat_style*.css'))), key=natural_keys)\n\n\ndef get_available_grammars():\n    return ['None'] + sorted([item.name for item in list((shared.user_data_dir / 'grammars').glob('*.gbnf'))], key=natural_keys)\n"
  },
  {
    "path": "modules/web_search.py",
    "content": "import concurrent.futures\nimport html\nimport ipaddress\nimport random\nimport re\nimport socket\nfrom concurrent.futures import as_completed\nfrom datetime import datetime\nfrom urllib.parse import parse_qs, quote_plus, urljoin, urlparse\n\nimport requests\n\nfrom modules import shared\nfrom modules.logging_colors import logger\n\n\ndef _validate_url(url):\n    \"\"\"Validate that a URL is safe to fetch (not targeting private/internal networks).\"\"\"\n    parsed = urlparse(url)\n    if parsed.scheme not in ('http', 'https'):\n        raise ValueError(f\"Unsupported URL scheme: {parsed.scheme}\")\n\n    hostname = parsed.hostname\n    if not hostname:\n        raise ValueError(\"No hostname in URL\")\n\n    # Resolve hostname and check all returned addresses\n    try:\n        for family, _, _, _, sockaddr in socket.getaddrinfo(hostname, None):\n            ip = ipaddress.ip_address(sockaddr[0])\n            if not ip.is_global:\n                raise ValueError(f\"Access to non-public address {ip} is blocked\")\n    except socket.gaierror:\n        raise ValueError(f\"Could not resolve hostname: {hostname}\")\n\n\ndef get_current_timestamp():\n    \"\"\"Returns the current time in 24-hour format\"\"\"\n    return datetime.now().strftime('%b %d, %Y %H:%M')\n\n\ndef download_web_page(url, timeout=10, include_links=False):\n    \"\"\"\n    Download a web page and extract its main content as Markdown text.\n    \"\"\"\n    import trafilatura\n\n    try:\n        _validate_url(url)\n        headers = {\n            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'\n        }\n        max_redirects = 5\n        for _ in range(max_redirects):\n            response = requests.get(url, headers=headers, timeout=timeout, allow_redirects=False)\n            if response.is_redirect and 'Location' in response.headers:\n                url = urljoin(url, response.headers['Location'])\n                _validate_url(url)\n            else:\n                break\n\n        response.raise_for_status()\n\n        result = trafilatura.extract(\n            response.text,\n            include_links=include_links,\n            output_format='markdown',\n            url=url\n        )\n        return result or \"\"\n    except requests.exceptions.RequestException as e:\n        logger.error(f\"Error downloading {url}: {e}\")\n        return \"\"\n    except Exception as e:\n        logger.error(f\"An unexpected error occurred: {e}\")\n        return \"\"\n\n\ndef perform_web_search(query, num_pages=3, max_workers=5, timeout=10, fetch_content=True):\n    \"\"\"Perform web search and return results, optionally with page content\"\"\"\n    try:\n        search_url = f\"https://html.duckduckgo.com/html/?q={quote_plus(query)}\"\n\n        agents = [\n            \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36\",\n            \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36\"\n        ]\n\n        response = requests.get(search_url, headers={'User-Agent': random.choice(agents)}, timeout=timeout)\n        response.raise_for_status()\n        response_text = response.text\n\n        # Extract results - title and URL come from the same <a class=\"result__a\"> element\n        result_links = re.findall(r'<a[^>]*class=\"[^\"]*result__a[^\"]*\"[^>]*>(.*?)</a>', response_text, re.DOTALL)\n        result_tags = re.findall(r'<a([^>]*class=\"[^\"]*result__a[^\"]*\"[^>]*)>', response_text, re.DOTALL)\n\n        # Prepare download tasks\n        download_tasks = []\n        for i, (tag_attrs, raw_title) in enumerate(zip(result_tags, result_links)):\n            if num_pages is not None and i >= num_pages:\n                break\n            # Extract href and resolve the actual URL from DuckDuckGo's redirect link\n            href_match = re.search(r'href=\"([^\"]*)\"', tag_attrs)\n            if not href_match:\n                continue\n            uddg = parse_qs(urlparse(html.unescape(href_match.group(1))).query).get('uddg', [''])[0]\n            if not uddg:\n                continue\n            title = html.unescape(re.sub(r'<[^>]+>', '', raw_title).strip())\n            download_tasks.append((uddg, title, len(download_tasks)))\n\n        search_results = [None] * len(download_tasks)  # Pre-allocate to maintain order\n\n        if not fetch_content:\n            for url, title, index in download_tasks:\n                search_results[index] = {\n                    'title': title,\n                    'url': url,\n                    'content': ''\n                }\n\n            return search_results\n\n        # Download pages in parallel\n        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:\n            # Submit all download tasks\n            future_to_task = {\n                executor.submit(download_web_page, task[0]): task\n                for task in download_tasks\n            }\n\n            # Collect results as they complete\n            for future in as_completed(future_to_task):\n                url, title, index = future_to_task[future]\n                try:\n                    content = future.result()\n                    search_results[index] = {\n                        'title': title,\n                        'url': url,\n                        'content': content\n                    }\n                except Exception:\n                    search_results[index] = {\n                        'title': title,\n                        'url': url,\n                        'content': ''\n                    }\n\n        return search_results\n\n    except Exception as e:\n        logger.error(f\"Error performing web search: {e}\")\n        return []\n\n\ndef truncate_content_by_tokens(content, max_tokens=8192):\n    \"\"\"Truncate content to fit within token limit using binary search\"\"\"\n    if len(shared.tokenizer.encode(content)) <= max_tokens:\n        return content\n\n    left, right = 0, len(content)\n    while left < right:\n        mid = (left + right + 1) // 2\n        if len(shared.tokenizer.encode(content[:mid])) <= max_tokens:\n            left = mid\n        else:\n            right = mid - 1\n\n    return content[:left]\n\n\ndef add_web_search_attachments(history, row_idx, user_message, search_query, state):\n    \"\"\"Perform web search and add results as attachments\"\"\"\n    if not search_query:\n        logger.warning(\"No search query provided\")\n        return\n\n    try:\n        logger.info(f\"Using search query: {search_query}\")\n\n        # Perform web search\n        num_pages = int(state.get('web_search_pages', 3))\n        search_results = perform_web_search(search_query, num_pages)\n\n        if not search_results:\n            logger.warning(\"No search results found\")\n            return\n\n        # Filter out failed downloads before adding attachments\n        successful_results = [result for result in search_results if result['content'].strip()]\n\n        if not successful_results:\n            logger.warning(\"No successful downloads to add as attachments\")\n            return\n\n        # Add search results as attachments\n        key = f\"user_{row_idx}\"\n        if key not in history['metadata']:\n            history['metadata'][key] = {\"timestamp\": get_current_timestamp()}\n        if \"attachments\" not in history['metadata'][key]:\n            history['metadata'][key][\"attachments\"] = []\n\n        for result in successful_results:\n            attachment = {\n                \"name\": result['title'],\n                \"type\": \"text/html\",\n                \"url\": result['url'],\n                \"content\": truncate_content_by_tokens(result['content'])\n            }\n            history['metadata'][key][\"attachments\"].append(attachment)\n\n        logger.info(f\"Added {len(successful_results)} successful web search results as attachments.\")\n\n    except Exception as e:\n        logger.error(f\"Error in web search: {e}\")\n"
  },
  {
    "path": "one_click.py",
    "content": "import argparse\nimport glob\nimport hashlib\nimport json\nimport os\nimport platform\nimport re\nimport signal\nimport site\nimport subprocess\nimport sys\n\n# Define the required versions\nTORCH_VERSION = \"2.9.1\"\nPYTHON_VERSION = \"3.13\"\nLIBSTDCXX_VERSION_LINUX = \"12.1.0\"\n\n# Environment\nscript_dir = os.getcwd()\nconda_env_path = os.path.join(script_dir, \"installer_files\", \"env\")\nstate_file = '.installer_state.json'\n\n# Command-line flags\nflags = f\"{' '.join([flag for flag in sys.argv[1:] if flag != '--update-wizard'])}\"\n\n\ndef signal_handler(sig, frame):\n    sys.exit(0)\n\n\nsignal.signal(signal.SIGINT, signal_handler)\n\n\ndef is_linux():\n    return sys.platform.startswith(\"linux\")\n\n\ndef is_windows():\n    return sys.platform.startswith(\"win\")\n\n\ndef is_macos():\n    return sys.platform.startswith(\"darwin\")\n\n\ndef is_x86_64():\n    return platform.machine() == \"x86_64\"\n\n\ndef is_installed():\n    site_packages_path = None\n    for sitedir in site.getsitepackages():\n        if \"site-packages\" in sitedir and conda_env_path in sitedir:\n            site_packages_path = sitedir\n            break\n\n    if site_packages_path:\n        return os.path.isfile(os.path.join(site_packages_path, 'torch', '__init__.py'))\n    else:\n        return os.path.isdir(conda_env_path)\n\n\ndef load_state():\n    \"\"\"Load installer state from JSON file\"\"\"\n    if os.path.exists(state_file):\n        try:\n            with open(state_file, 'r') as f:\n                return json.load(f)\n        except Exception:\n            return {}\n    return {}\n\n\ndef save_state(state):\n    \"\"\"Save installer state to JSON file\"\"\"\n    with open(state_file, 'w') as f:\n        json.dump(state, f)\n\n\ndef get_gpu_choice():\n    \"\"\"Get GPU choice from state file or ask user\"\"\"\n    state = load_state()\n    gpu_choice = state.get('gpu_choice')\n\n    if not gpu_choice:\n        if \"GPU_CHOICE\" in os.environ:\n            choice = os.environ[\"GPU_CHOICE\"].upper()\n            print_big_message(f\"Selected GPU choice \\\"{choice}\\\" based on the GPU_CHOICE environment variable.\")\n        else:\n            choice = get_user_choice(\n                \"What is your GPU?\",\n                {\n                    'A': 'NVIDIA',\n                    'B': 'AMD - Linux only, ROCm 7.2',\n                    'C': 'Apple M Series',\n                    'D': 'Intel Arc (beta)',\n                    'N': 'CPU mode'\n                },\n            )\n\n        # Convert choice to GPU name\n        gpu_choice = {\"A\": \"NVIDIA_CUDA128\", \"B\": \"AMD\", \"C\": \"APPLE\", \"D\": \"INTEL\", \"N\": \"NONE\"}[choice]\n\n        # Save choice to state\n        state['gpu_choice'] = gpu_choice\n        save_state(state)\n\n    return gpu_choice\n\n\ndef get_pytorch_install_command(gpu_choice):\n    \"\"\"Get PyTorch installation command based on GPU choice\"\"\"\n    base_cmd = f\"python -m pip install torch=={TORCH_VERSION} \"\n    pypi_fallback = \" --extra-index-url https://pypi.org/simple/\"\n\n    if gpu_choice == \"NVIDIA_CUDA128\":\n        return base_cmd + \"--index-url https://download.pytorch.org/whl/cu128\" + pypi_fallback\n    elif gpu_choice == \"AMD\":\n        py_tag = f\"cp{PYTHON_VERSION.replace('.', '')}\"\n        return f\"python -m pip install https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2/torch-{TORCH_VERSION}%2Brocm7.2.0.lw.git7e1940d4-{py_tag}-{py_tag}-linux_x86_64.whl\"\n    elif gpu_choice in [\"APPLE\", \"NONE\"]:\n        return base_cmd + \"--index-url https://download.pytorch.org/whl/cpu\" + pypi_fallback\n    elif gpu_choice == \"INTEL\":\n        return base_cmd + \"--index-url https://download.pytorch.org/whl/xpu\"\n    else:\n        return base_cmd\n\n\ndef get_pytorch_update_command(gpu_choice):\n    \"\"\"Get PyTorch update command based on GPU choice\"\"\"\n    base_cmd = f\"python -m pip install --upgrade torch=={TORCH_VERSION} \"\n    pypi_fallback = \" --extra-index-url https://pypi.org/simple/\"\n\n    if gpu_choice == \"NVIDIA_CUDA128\":\n        return f\"{base_cmd}--index-url https://download.pytorch.org/whl/cu128\" + pypi_fallback\n    elif gpu_choice == \"AMD\":\n        py_tag = f\"cp{PYTHON_VERSION.replace('.', '')}\"\n        return f\"python -m pip install --upgrade https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2/torch-{TORCH_VERSION}%2Brocm7.2.0.lw.git7e1940d4-{py_tag}-{py_tag}-linux_x86_64.whl\"\n    elif gpu_choice in [\"APPLE\", \"NONE\"]:\n        return f\"{base_cmd}--index-url https://download.pytorch.org/whl/cpu\" + pypi_fallback\n    elif gpu_choice == \"INTEL\":\n        return f\"{base_cmd}--index-url https://download.pytorch.org/whl/xpu\"\n    else:\n        return base_cmd\n\n\ndef get_requirements_file(gpu_choice):\n    \"\"\"Get requirements file path based on GPU choice\"\"\"\n    requirements_base = os.path.join(\"requirements\", \"full\")\n\n    if gpu_choice == \"NVIDIA_CUDA128\":\n        file_name = \"requirements.txt\"\n    elif gpu_choice == \"AMD\":\n        file_name = \"requirements_amd.txt\"\n    elif gpu_choice == \"APPLE\":\n        file_name = f\"requirements_apple_{'intel' if is_x86_64() else 'silicon'}.txt\"\n    elif gpu_choice in [\"INTEL\", \"NONE\"]:\n        file_name = \"requirements_cpu_only.txt\"\n    else:\n        raise ValueError(f\"Unknown GPU choice: {gpu_choice}\")\n\n    return os.path.join(requirements_base, file_name)\n\n\ndef get_current_commit():\n    result = run_cmd(\"git rev-parse HEAD\", capture_output=True, environment=True)\n    return result.stdout.decode('utf-8').strip()\n\n\ndef get_extensions_names():\n    return [foldername for foldername in os.listdir('extensions') if os.path.isfile(os.path.join('extensions', foldername, 'requirements.txt'))]\n\n\ndef check_env():\n    # If we have access to conda, we are probably in an environment\n    conda_exist = run_cmd(\"conda\", environment=True, capture_output=True).returncode == 0\n    if not conda_exist:\n        print(\"Conda is not installed. Exiting...\")\n        sys.exit(1)\n\n    # Ensure this is a new environment and not the base environment\n    if os.environ.get(\"CONDA_DEFAULT_ENV\", \"\") == \"base\":\n        print(\"Create an environment for this project and activate it. Exiting...\")\n        sys.exit(1)\n\n\ndef clear_cache():\n    run_cmd(\"conda clean -a -y\", environment=True)\n    run_cmd(\"python -m pip cache purge\", environment=True)\n\n\ndef run_cmd(cmd, assert_success=False, environment=False, capture_output=False, env=None):\n    # Use the conda environment\n    if environment:\n        if is_windows():\n            conda_bat_path = os.path.join(script_dir, \"installer_files\", \"conda\", \"condabin\", \"conda.bat\")\n            python_path = os.path.join(conda_env_path, \"python.exe\")\n            cmd = cmd.replace(\"python \", f'\"{python_path}\" ')\n            cmd = f'\"{conda_bat_path}\" activate \"{conda_env_path}\" >nul && {cmd}'\n        else:\n            conda_sh_path = os.path.join(script_dir, \"installer_files\", \"conda\", \"etc\", \"profile.d\", \"conda.sh\")\n            cmd = f'. \"{conda_sh_path}\" && conda activate \"{conda_env_path}\" && {cmd}'\n\n    # Set executable to None for Windows, bash for everything else\n    executable = None if is_windows() else 'bash'\n\n    # Run shell commands\n    result = subprocess.run(cmd, shell=True, capture_output=capture_output, env=env, executable=executable)\n\n    # Assert the command ran successfully\n    if assert_success and result.returncode != 0:\n        print(f\"Command '{cmd}' failed with exit status code '{str(result.returncode)}'.\\n\\nExiting now.\\nTry running the start/update script again.\")\n        sys.exit(1)\n\n    return result\n\n\ndef print_big_message(message):\n    message = message.strip()\n    lines = message.split('\\n')\n    print(\"\\n\\n*******************************************************************\")\n    for line in lines:\n        print(\"*\", line)\n\n    print(\"*******************************************************************\\n\\n\")\n\n\ndef calculate_file_hash(file_path):\n    p = os.path.join(script_dir, file_path)\n    if os.path.isfile(p):\n        with open(p, 'rb') as f:\n            return hashlib.sha256(f.read()).hexdigest()\n    else:\n        return ''\n\n\ndef generate_alphabetic_sequence(index):\n    result = ''\n    while index >= 0:\n        index, remainder = divmod(index, 26)\n        result = chr(ord('A') + remainder) + result\n        index -= 1\n\n    return result\n\n\ndef get_user_choice(question, options_dict):\n    print()\n    print(question)\n    print()\n\n    for key, value in options_dict.items():\n        print(f\"{key}) {value}\")\n\n    print()\n\n    choice = input(\"Input> \").upper()\n    while choice not in options_dict.keys():\n        print(\"Invalid choice. Please try again.\")\n        choice = input(\"Input> \").upper()\n\n    return choice\n\n\ndef update_pytorch_and_python():\n    print_big_message(\"Checking for PyTorch updates.\")\n    gpu_choice = get_gpu_choice()\n    install_cmd = get_pytorch_update_command(gpu_choice)\n    run_cmd(install_cmd, assert_success=True, environment=True)\n\n\ndef clean_outdated_pytorch_cuda_dependencies():\n    patterns = [\"cu121\", \"cu122\", \"rocm6\", \"torch2.4\", \"torch2.6\", \"torch2.7\", \"torchvision\", \"torchaudio\"]\n    result = run_cmd(\"python -m pip list --format=freeze\", capture_output=True, environment=True)\n    matching_packages = []\n\n    for line in result.stdout.decode('utf-8').splitlines():\n        if \"==\" in line:\n            pkg_name, version = line.split('==', 1)\n            if any(pattern in version for pattern in patterns):\n                matching_packages.append(pkg_name)\n\n    if matching_packages:\n        print(f\"\\nUninstalling: {', '.join(matching_packages)}\\n\")\n        run_cmd(f\"python -m pip uninstall -y {' '.join(matching_packages)}\", assert_success=True, environment=True)\n\n    return matching_packages\n\n\ndef install_webui():\n    if os.path.isfile(state_file):\n        os.remove(state_file)\n\n    # Get GPU choice and save it to state\n    gpu_choice = get_gpu_choice()\n\n    # Write a flag to CMD_FLAGS.txt for CPU mode\n    if gpu_choice == \"NONE\":\n        cmd_flags_path = os.path.join(script_dir, \"user_data\", \"CMD_FLAGS.txt\")\n        with open(cmd_flags_path, 'r+') as cmd_flags_file:\n            if \"--cpu\" not in cmd_flags_file.read():\n                print_big_message(\"Adding the --cpu flag to user_data/CMD_FLAGS.txt.\")\n                cmd_flags_file.write(\"\\n--cpu\\n\")\n\n    # Handle CUDA version display\n    elif any((is_windows(), is_linux())) and gpu_choice == \"NVIDIA_CUDA128\":\n        print(\"CUDA: 12.8\")\n\n    # No PyTorch for AMD on Windows\n    elif is_windows() and gpu_choice == \"AMD\":\n        print(\"PyTorch setup on Windows is not implemented yet. Exiting...\")\n        sys.exit(1)\n\n    # Install Git and then Pytorch\n    print_big_message(\"Installing PyTorch.\")\n    install_pytorch = get_pytorch_install_command(gpu_choice)\n    run_cmd(f\"conda install -y ninja git && {install_pytorch}\", assert_success=True, environment=True)\n\n    # Install the webui requirements\n    update_requirements(initial_installation=True, pull=False)\n\n\ndef update_requirements(initial_installation=False, pull=True):\n    # Create .git directory if missing\n    if not os.path.exists(os.path.join(script_dir, \".git\")):\n        run_cmd(\n            \"git init -b main && git remote add origin https://github.com/oobabooga/text-generation-webui && \"\n            \"git fetch && git symbolic-ref refs/remotes/origin/HEAD refs/remotes/origin/main && \"\n            \"git reset --hard origin/main && git branch --set-upstream-to=origin/main\",\n            environment=True,\n            assert_success=True\n        )\n\n    # Check for outdated Python version and refuse to update\n    if '.'.join(map(str, sys.version_info[:2])) != PYTHON_VERSION:\n        print_big_message(\n            \"Your current installation uses Python {}.{}, which is outdated.\\n\"\n            \"Python {} is now required. A clean installation is needed.\\n\\n\"\n            \"INSTRUCTIONS:\\n\"\n            \"1. Delete the 'installer_files' folder in your text-generation-webui directory.\\n\"\n            \"2. Run the start script again (e.g., start_windows.bat).\\n\\n\"\n            \"This will create a fresh environment with the latest software.\".format(*sys.version_info[:2], PYTHON_VERSION)\n        )\n        sys.exit(0)\n\n    # Check for outdated CUDA 12.4 installs and refuse to update\n    state = load_state()\n    if state.get('gpu_choice') == 'NVIDIA':\n        print_big_message(\n            \"Your current installation uses CUDA 12.4, which has been removed.\\n\"\n            \"To update to the new default (CUDA 12.8), a clean installation is required.\\n\\n\"\n            \"INSTRUCTIONS:\\n\"\n            \"1. Delete the 'installer_files' folder in your text-generation-webui directory.\\n\"\n            \"2. Run the start script again (e.g., start_windows.bat).\\n\\n\"\n            \"This will create a fresh environment with the latest software.\"\n        )\n        sys.exit(0)\n\n    current_commit = get_current_commit()\n    wheels_changed = not os.path.exists(state_file)\n    installed_wheels = set()\n    if not wheels_changed:\n        state = load_state()\n        installed_wheels = set(state.get('installed_wheels', []))\n        if 'wheels_changed' in state or state.get('last_installed_commit') != current_commit:\n            wheels_changed = True\n\n    gpu_choice = get_gpu_choice()\n    requirements_file = get_requirements_file(gpu_choice)\n\n    if pull:\n        # Read .whl lines before pulling\n        before_pull_whl_lines = []\n        if os.path.exists(requirements_file):\n            with open(requirements_file, 'r') as f:\n                before_pull_whl_lines = [line for line in f if '.whl' in line]\n\n        print_big_message('Updating the local copy of the repository with \"git pull\"')\n\n        # Hash files before pulling\n        files_to_check = [\n            'start_linux.sh', 'start_macos.sh', 'start_windows.bat', 'start_wsl.bat',\n            'update_wizard_linux.sh', 'update_wizard_macos.sh', 'update_wizard_windows.bat', 'update_wizard_wsl.bat',\n            'one_click.py'\n        ]\n        before_hashes = {file: calculate_file_hash(file) for file in files_to_check}\n\n        # Perform the git pull\n        run_cmd(\"git pull --autostash\", assert_success=True, environment=True)\n        current_commit = get_current_commit()\n\n        # Check hashes after pulling\n        after_hashes = {file: calculate_file_hash(file) for file in files_to_check}\n        if os.path.exists(requirements_file):\n            with open(requirements_file, 'r') as f:\n                after_pull_whl_lines = [line for line in f if '.whl' in line]\n\n            wheels_changed = wheels_changed or (before_pull_whl_lines != after_pull_whl_lines)\n\n        # Check for changes to installer files\n        for file in files_to_check:\n            if before_hashes[file] != after_hashes[file]:\n                print_big_message(f\"File '{file}' was updated during 'git pull'. Please run the script again.\")\n\n                # Save state before exiting\n                state = load_state()\n                state['last_installed_commit'] = current_commit\n                if wheels_changed:\n                    state['wheels_changed'] = True\n                save_state(state)\n                sys.exit(1)\n\n    if os.environ.get(\"INSTALL_EXTENSIONS\", \"\").lower() in (\"yes\", \"y\", \"true\", \"1\", \"t\", \"on\"):\n        install_extensions_requirements()\n\n    if is_linux():\n        run_cmd(f\"conda install -y -c conda-forge 'libstdcxx-ng>={LIBSTDCXX_VERSION_LINUX}'\", assert_success=True, environment=True)\n\n    # Update PyTorch\n    if not initial_installation:\n        update_pytorch_and_python()\n        clean_outdated_pytorch_cuda_dependencies()\n\n    print_big_message(f\"Installing webui requirements from file: {requirements_file}\")\n    print(f\"GPU Choice: {gpu_choice}\\n\")\n\n    # Prepare the requirements file\n    textgen_requirements = open(requirements_file).read().splitlines()\n    all_whl_lines = [line.strip() for line in textgen_requirements if '.whl' in line]\n\n    if not initial_installation:\n        if installed_wheels:\n            # Per-wheel comparison: only re-download wheels that changed\n            textgen_requirements = [\n                line for line in textgen_requirements\n                if '.whl' not in line or line.strip() not in installed_wheels\n            ]\n        elif not wheels_changed:\n            textgen_requirements = [line for line in textgen_requirements if '.whl' not in line]\n\n    with open('temp_requirements.txt', 'w') as file:\n        file.write('\\n'.join(textgen_requirements))\n\n    # Workaround for git+ packages not updating properly.\n    git_requirements = [req for req in textgen_requirements if req.startswith(\"git+\")]\n    for req in git_requirements:\n        url = req.replace(\"git+\", \"\")\n        package_name = url.split(\"/\")[-1].split(\"@\")[0].rstrip(\".git\")\n        run_cmd(f\"python -m pip uninstall -y {package_name}\", environment=True)\n        print(f\"Uninstalled {package_name}\")\n\n    # Install/update the project requirements\n    run_cmd(\"python -m pip install -r temp_requirements.txt --upgrade\", assert_success=True, environment=True)\n\n    # Save state after successful installation\n    state = load_state()\n    state['last_installed_commit'] = current_commit\n    state['installed_wheels'] = all_whl_lines\n    state.pop('wheels_changed', None)\n    save_state(state)\n\n    # Clean up\n    os.remove('temp_requirements.txt')\n    clear_cache()\n\n\ndef install_extensions_requirements():\n    print_big_message(\"Installing extensions requirements.\\nSome of these may fail on Windows.\\nDon\\'t worry if you see error messages, as they will not affect the main program.\")\n    extensions = get_extensions_names()\n    for i, extension in enumerate(extensions):\n        print(f\"\\n\\n--- [{i + 1}/{len(extensions)}]: {extension}\\n\\n\")\n        extension_req_path = os.path.join(\"extensions\", extension, \"requirements.txt\")\n        run_cmd(f\"python -m pip install -r {extension_req_path} --upgrade\", assert_success=False, environment=True)\n\n\ndef launch_webui():\n    run_cmd(f\"python server.py {flags}\", environment=True)\n\n\nif __name__ == \"__main__\":\n    # Verifies we are in a conda environment\n    check_env()\n\n    parser = argparse.ArgumentParser(add_help=False)\n    parser.add_argument('--update-wizard', action='store_true', help='Launch a menu with update options.')\n    args, _ = parser.parse_known_args()\n\n    if args.update_wizard:\n        while True:\n            choice = get_user_choice(\n                \"What would you like to do?\",\n                {\n                    'A': 'Update the web UI',\n                    'B': 'Install/update extensions requirements',\n                    'C': 'Revert local changes to repository files with \\\"git reset --hard\\\"',\n                    'N': 'Nothing (exit)'\n                },\n            )\n\n            if choice == 'A':\n                update_requirements()\n            elif choice == 'B':\n                choices = {'A': 'All extensions'}\n                for i, name in enumerate(get_extensions_names()):\n                    key = generate_alphabetic_sequence(i + 1)\n                    choices[key] = name\n\n                choice = get_user_choice(\"What extension?\", choices)\n\n                if choice == 'A':\n                    install_extensions_requirements()\n                else:\n                    extension_req_path = os.path.join(\"extensions\", choices[choice], \"requirements.txt\")\n                    run_cmd(f\"python -m pip install -r {extension_req_path} --upgrade\", assert_success=False, environment=True)\n\n                update_requirements(pull=False)\n            elif choice == 'C':\n                run_cmd(\"git reset --hard\", assert_success=True, environment=True)\n            elif choice == 'N':\n                sys.exit()\n    else:\n        if not is_installed():\n            install_webui()\n            os.chdir(script_dir)\n\n        if os.environ.get(\"LAUNCH_AFTER_INSTALL\", \"\").lower() in (\"no\", \"n\", \"false\", \"0\", \"f\", \"off\"):\n            print_big_message(\"Will now exit due to LAUNCH_AFTER_INSTALL.\")\n            sys.exit()\n\n        # Check if a model has been downloaded yet\n        if '--model-dir' in flags:\n            # Splits on ' ' or '=' while maintaining spaces within quotes\n            flags_list = re.split(' +(?=(?:[^\\\"]*\\\"[^\\\"]*\\\")*[^\\\"]*$)|=', flags)\n            model_dir = [flags_list[(flags_list.index(flag) + 1)] for flag in flags_list if flag == '--model-dir'][0].strip('\"\\'')\n        else:\n            model_dir = 'user_data/models'\n\n        if len([item for item in glob.glob(f'{model_dir}/*') if not item.endswith(('.txt', '.yaml'))]) == 0:\n            print_big_message(\"You haven't downloaded any model yet.\\nOnce the web UI launches, head over to the \\\"Model\\\" tab and download one.\")\n\n        # Workaround for llama-cpp-python loading paths in CUDA env vars even if they do not exist\n        conda_path_bin = os.path.join(conda_env_path, \"bin\")\n        if not os.path.exists(conda_path_bin):\n            os.mkdir(conda_path_bin)\n\n        # Launch the webui\n        launch_webui()\n"
  },
  {
    "path": "requirements/full/requirements.txt",
    "content": "accelerate==1.12.*\naudioop-lts<1.0; python_version >= \"3.13\"\nbitsandbytes==0.49.*\ndatasets\ndiffusers==0.37.*\neinops\nfastapi==0.112.4\nflash-linear-attention==0.4.*\nhuggingface-hub==1.5.*\njinja2==3.1.6\nmarkdown\nnumpy==2.2.*\npandas\npeft==0.18.*\nPillow>=9.5.0\npydantic==2.11.0\npymupdf==1.27.1\npython-docx==1.1.2\npyyaml\nrequests\nrich\nsafetensors==0.7.*\nscipy\nsentencepiece\ntensorboard\ntorchao==0.15.*\ntrafilatura==2.0.0\ntransformers==5.3.*\ntriton-windows==3.5.1.post24; platform_system == \"Windows\"\ntqdm\nwandb\n\n# Gradio\nhttps://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio-4.37.2+custom.11-py3-none-any.whl\nhttps://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio_client-1.0.2+custom.11-py3-none-any.whl\n\n# API\nflask_cloudflared==0.0.15\nsse-starlette==1.6.5\ntiktoken\n\n# CUDA wheels\nhttps://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0+cu124-py3-none-win_amd64.whl; platform_system == \"Windows\"\nhttps://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0+cu124-py3-none-linux_x86_64.whl; platform_system == \"Linux\" and platform_machine == \"x86_64\"\nhttps://github.com/turboderp-org/exllamav3/releases/download/v0.0.25/exllamav3-0.0.25+cu128.torch2.9.0-cp313-cp313-win_amd64.whl; platform_system == \"Windows\" and python_version == \"3.13\"\nhttps://github.com/turboderp-org/exllamav3/releases/download/v0.0.25/exllamav3-0.0.25+cu128.torch2.9.0-cp313-cp313-linux_x86_64.whl; platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version == \"3.13\"\nhttps://github.com/kingbri1/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu128torch2.9.0cxx11abiFALSE-cp313-cp313-win_amd64.whl; platform_system == \"Windows\" and python_version == \"3.13\"\nhttps://github.com/kingbri1/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu128torch2.9.0cxx11abiFALSE-cp313-cp313-linux_x86_64.whl; platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version == \"3.13\"\n"
  },
  {
    "path": "requirements/full/requirements_amd.txt",
    "content": "accelerate==1.12.*\naudioop-lts<1.0; python_version >= \"3.13\"\ndatasets\ndiffusers==0.37.*\neinops\nfastapi==0.112.4\nhuggingface-hub==1.5.*\njinja2==3.1.6\nmarkdown\nnumpy==2.2.*\npandas\npeft==0.18.*\nPillow>=9.5.0\npydantic==2.11.0\npymupdf==1.27.1\npython-docx==1.1.2\npyyaml\nrequests\nrich\nsafetensors==0.7.*\nscipy\nsentencepiece\ntensorboard\ntorchao==0.15.*\ntransformers==5.3.*\ntqdm\ntrafilatura==2.0.0\nwandb\n\n# Gradio\nhttps://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio-4.37.2+custom.11-py3-none-any.whl\nhttps://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio_client-1.0.2+custom.11-py3-none-any.whl\n\n# API\nflask_cloudflared==0.0.15\nsse-starlette==1.6.5\ntiktoken\n\n# AMD wheels\nhttps://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0+rocm7.2-py3-none-win_amd64.whl; platform_system == \"Windows\"\nhttps://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == \"Linux\" and platform_machine == \"x86_64\"\n"
  },
  {
    "path": "requirements/full/requirements_apple_intel.txt",
    "content": "accelerate==1.12.*\naudioop-lts<1.0; python_version >= \"3.13\"\ndatasets\ndiffusers==0.37.*\neinops\nfastapi==0.112.4\nhuggingface-hub==1.5.*\njinja2==3.1.6\nmarkdown\nnumpy==2.2.*\npandas\npeft==0.18.*\nPillow>=9.5.0\npydantic==2.11.0\npymupdf==1.27.1\npython-docx==1.1.2\npyyaml\nrequests\nrich\nsafetensors==0.7.*\nscipy\nsentencepiece\ntensorboard\ntorchao==0.15.*\ntransformers==5.3.*\ntqdm\ntrafilatura==2.0.0\nwandb\n\n# Gradio\nhttps://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio-4.37.2+custom.11-py3-none-any.whl\nhttps://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio_client-1.0.2+custom.11-py3-none-any.whl\n\n# API\nflask_cloudflared==0.0.15\nsse-starlette==1.6.5\ntiktoken\n\n# Mac wheels\nhttps://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0-py3-none-macosx_13_0_x86_64.whl; platform_system == \"Darwin\"\n"
  },
  {
    "path": "requirements/full/requirements_apple_silicon.txt",
    "content": "accelerate==1.12.*\naudioop-lts<1.0; python_version >= \"3.13\"\ndatasets\ndiffusers==0.37.*\neinops\nfastapi==0.112.4\nhuggingface-hub==1.5.*\njinja2==3.1.6\nmarkdown\nnumpy==2.2.*\npandas\npeft==0.18.*\nPillow>=9.5.0\npydantic==2.11.0\npymupdf==1.27.1\npython-docx==1.1.2\npyyaml\nrequests\nrich\nsafetensors==0.7.*\nscipy\nsentencepiece\ntensorboard\ntorchao==0.15.*\ntransformers==5.3.*\ntqdm\ntrafilatura==2.0.0\nwandb\n\n# Gradio\nhttps://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio-4.37.2+custom.11-py3-none-any.whl\nhttps://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio_client-1.0.2+custom.11-py3-none-any.whl\n\n# API\nflask_cloudflared==0.0.15\nsse-starlette==1.6.5\ntiktoken\n\n# Mac wheels\nhttps://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0-py3-none-macosx_13_0_arm64.whl; platform_system == \"Darwin\"\n"
  },
  {
    "path": "requirements/full/requirements_cpu_only.txt",
    "content": "accelerate==1.12.*\naudioop-lts<1.0; python_version >= \"3.13\"\ndatasets\ndiffusers==0.37.*\neinops\nfastapi==0.112.4\nhuggingface-hub==1.5.*\njinja2==3.1.6\nmarkdown\nnumpy==2.2.*\npandas\npeft==0.18.*\nPillow>=9.5.0\npydantic==2.11.0\npymupdf==1.27.1\npython-docx==1.1.2\npyyaml\nrequests\nrich\nsafetensors==0.7.*\nscipy\nsentencepiece\ntensorboard\ntorchao==0.15.*\ntransformers==5.3.*\ntqdm\ntrafilatura==2.0.0\nwandb\n\n# Gradio\nhttps://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio-4.37.2+custom.11-py3-none-any.whl\nhttps://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio_client-1.0.2+custom.11-py3-none-any.whl\n\n# API\nflask_cloudflared==0.0.15\nsse-starlette==1.6.5\ntiktoken\n\n# llama.cpp (CPU only)\nhttps://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0+cpu-py3-none-linux_x86_64.whl; platform_system == \"Linux\" and platform_machine == \"x86_64\"\nhttps://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0+cpu-py3-none-win_amd64.whl; platform_system == \"Windows\"\n"
  },
  {
    "path": "requirements/full/requirements_nowheels.txt",
    "content": "accelerate==1.12.*\naudioop-lts<1.0; python_version >= \"3.13\"\ndatasets\ndiffusers==0.37.*\neinops\nfastapi==0.112.4\nhuggingface-hub==1.5.*\njinja2==3.1.6\nmarkdown\nnumpy==2.2.*\npandas\npeft==0.18.*\nPillow>=9.5.0\npydantic==2.11.0\npymupdf==1.27.1\npython-docx==1.1.2\npyyaml\nrequests\nrich\nsafetensors==0.7.*\nscipy\nsentencepiece\ntensorboard\ntorchao==0.15.*\ntransformers==5.3.*\ntqdm\ntrafilatura==2.0.0\nwandb\n\n# Gradio\nhttps://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio-4.37.2+custom.11-py3-none-any.whl\nhttps://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio_client-1.0.2+custom.11-py3-none-any.whl\n\n# API\nflask_cloudflared==0.0.15\nsse-starlette==1.6.5\ntiktoken\n"
  },
  {
    "path": "requirements/portable/requirements.txt",
    "content": "audioop-lts<1.0; python_version >= \"3.13\"\nfastapi==0.112.4\nhuggingface-hub==1.5.*\njinja2==3.1.6\nmarkdown\nnumpy==2.2.*\npydantic==2.11.0\npymupdf==1.27.1\npython-docx==1.1.2\npyyaml\nrequests\nrich\ntrafilatura==2.0.0\ntqdm\n\n# Gradio\nhttps://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio-4.37.2+custom.11-py3-none-any.whl\nhttps://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio_client-1.0.2+custom.11-py3-none-any.whl\n\n# API\nflask_cloudflared==0.0.15\nsse-starlette==1.6.5\ntiktoken\n\n# CUDA wheels\nhttps://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0+cu124-py3-none-win_amd64.whl; platform_system == \"Windows\"\nhttps://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0+cu124-py3-none-linux_x86_64.whl; platform_system == \"Linux\" and platform_machine == \"x86_64\"\n"
  },
  {
    "path": "requirements/portable/requirements_amd.txt",
    "content": "audioop-lts<1.0; python_version >= \"3.13\"\nfastapi==0.112.4\nhuggingface-hub==1.5.*\njinja2==3.1.6\nmarkdown\nnumpy==2.2.*\npydantic==2.11.0\npymupdf==1.27.1\npython-docx==1.1.2\npyyaml\nrequests\nrich\ntrafilatura==2.0.0\ntqdm\n\n# Gradio\nhttps://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio-4.37.2+custom.11-py3-none-any.whl\nhttps://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio_client-1.0.2+custom.11-py3-none-any.whl\n\n# API\nflask_cloudflared==0.0.15\nsse-starlette==1.6.5\ntiktoken\n\n# AMD wheels\nhttps://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0+rocm7.2-py3-none-win_amd64.whl; platform_system == \"Windows\"\nhttps://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == \"Linux\" and platform_machine == \"x86_64\"\n"
  },
  {
    "path": "requirements/portable/requirements_apple_intel.txt",
    "content": "audioop-lts<1.0; python_version >= \"3.13\"\nfastapi==0.112.4\nhuggingface-hub==1.5.*\njinja2==3.1.6\nmarkdown\nnumpy==2.2.*\npydantic==2.11.0\npymupdf==1.27.1\npython-docx==1.1.2\npyyaml\nrequests\nrich\ntrafilatura==2.0.0\ntqdm\n\n# Gradio\nhttps://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio-4.37.2+custom.11-py3-none-any.whl\nhttps://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio_client-1.0.2+custom.11-py3-none-any.whl\n\n# API\nflask_cloudflared==0.0.15\nsse-starlette==1.6.5\ntiktoken\n\n# Mac wheels\nhttps://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0-py3-none-macosx_13_0_x86_64.whl; platform_system == \"Darwin\"\n"
  },
  {
    "path": "requirements/portable/requirements_apple_silicon.txt",
    "content": "audioop-lts<1.0; python_version >= \"3.13\"\nfastapi==0.112.4\nhuggingface-hub==1.5.*\njinja2==3.1.6\nmarkdown\nnumpy==2.2.*\npydantic==2.11.0\npymupdf==1.27.1\npython-docx==1.1.2\npyyaml\nrequests\nrich\ntrafilatura==2.0.0\ntqdm\n\n# Gradio\nhttps://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio-4.37.2+custom.11-py3-none-any.whl\nhttps://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio_client-1.0.2+custom.11-py3-none-any.whl\n\n# API\nflask_cloudflared==0.0.15\nsse-starlette==1.6.5\ntiktoken\n\n# Mac wheels\nhttps://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0-py3-none-macosx_13_0_arm64.whl; platform_system == \"Darwin\"\n"
  },
  {
    "path": "requirements/portable/requirements_cpu_only.txt",
    "content": "audioop-lts<1.0; python_version >= \"3.13\"\nfastapi==0.112.4\nhuggingface-hub==1.5.*\njinja2==3.1.6\nmarkdown\nnumpy==2.2.*\npydantic==2.11.0\npymupdf==1.27.1\npython-docx==1.1.2\npyyaml\nrequests\nrich\ntrafilatura==2.0.0\ntqdm\n\n# Gradio\nhttps://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio-4.37.2+custom.11-py3-none-any.whl\nhttps://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio_client-1.0.2+custom.11-py3-none-any.whl\n\n# API\nflask_cloudflared==0.0.15\nsse-starlette==1.6.5\ntiktoken\n\n# llama.cpp (CPU only)\nhttps://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0+cpu-py3-none-linux_x86_64.whl; platform_system == \"Linux\" and platform_machine == \"x86_64\"\nhttps://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0+cpu-py3-none-win_amd64.whl; platform_system == \"Windows\"\n"
  },
  {
    "path": "requirements/portable/requirements_cuda131.txt",
    "content": "audioop-lts<1.0; python_version >= \"3.13\"\nfastapi==0.112.4\nhuggingface-hub==1.5.*\njinja2==3.1.6\nmarkdown\nnumpy==2.2.*\npydantic==2.11.0\npymupdf==1.27.1\npython-docx==1.1.2\npyyaml\nrequests\nrich\ntrafilatura==2.0.0\ntqdm\n\n# Gradio\nhttps://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio-4.37.2+custom.11-py3-none-any.whl\nhttps://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio_client-1.0.2+custom.11-py3-none-any.whl\n\n# API\nflask_cloudflared==0.0.15\nsse-starlette==1.6.5\ntiktoken\n\n# CUDA wheels\nhttps://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0+cu131-py3-none-win_amd64.whl; platform_system == \"Windows\"\nhttps://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0+cu131-py3-none-linux_x86_64.whl; platform_system == \"Linux\" and platform_machine == \"x86_64\"\n"
  },
  {
    "path": "requirements/portable/requirements_nowheels.txt",
    "content": "audioop-lts<1.0; python_version >= \"3.13\"\nfastapi==0.112.4\nhuggingface-hub==1.5.*\njinja2==3.1.6\nmarkdown\nnumpy==2.2.*\npydantic==2.11.0\npymupdf==1.27.1\npython-docx==1.1.2\npyyaml\nrequests\nrich\ntrafilatura==2.0.0\ntqdm\n\n# Gradio\nhttps://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio-4.37.2+custom.11-py3-none-any.whl\nhttps://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio_client-1.0.2+custom.11-py3-none-any.whl\n\n# API\nflask_cloudflared==0.0.15\nsse-starlette==1.6.5\ntiktoken\n"
  },
  {
    "path": "requirements/portable/requirements_vulkan.txt",
    "content": "audioop-lts<1.0; python_version >= \"3.13\"\nfastapi==0.112.4\nhuggingface-hub==1.5.*\njinja2==3.1.6\nmarkdown\nnumpy==2.2.*\npydantic==2.11.0\npymupdf==1.27.1\npython-docx==1.1.2\npyyaml\nrequests\nrich\ntrafilatura==2.0.0\ntqdm\n\n# Gradio\nhttps://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio-4.37.2+custom.11-py3-none-any.whl\nhttps://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio_client-1.0.2+custom.11-py3-none-any.whl\n\n# API\nflask_cloudflared==0.0.15\nsse-starlette==1.6.5\ntiktoken\n\n# Vulkan wheels\nhttps://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0+vulkan-py3-none-win_amd64.whl; platform_system == \"Windows\"\nhttps://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0+vulkan-py3-none-linux_x86_64.whl; platform_system == \"Linux\" and platform_machine == \"x86_64\"\n"
  },
  {
    "path": "server.py",
    "content": "import os\nimport signal\nimport sys\nimport time\nimport warnings\nfrom functools import partial\nfrom pathlib import Path\nfrom threading import Lock, Thread\n\nimport yaml\n\nfrom modules import shared, utils\nfrom modules.image_models import load_image_model\nfrom modules.logging_colors import logger\nfrom modules.prompts import load_prompt\n\nimport modules.extensions as extensions_module\nfrom modules.LoRA import add_lora_to_model\nfrom modules.models import load_model, unload_model_if_idle\nfrom modules.models_settings import (\n    get_fallback_settings,\n    get_model_metadata,\n    update_model_parameters\n)\nfrom modules.shared import do_cmd_flags_warnings\n\nos.environ['BITSANDBYTES_NOWELCOME'] = '1'\n\nwarnings.filterwarnings('ignore', category=UserWarning, message='TypedStorage is deprecated')\nwarnings.filterwarnings('ignore', category=UserWarning, message='Using the update method is deprecated')\nwarnings.filterwarnings('ignore', category=UserWarning, message='Field \"model_name\" has conflict')\nwarnings.filterwarnings('ignore', category=UserWarning, message='Field \"model_names\" has conflict')\n\n\ndef signal_handler(sig, frame):\n    # On second Ctrl+C, force an immediate exit\n    signal.signal(signal.SIGINT, signal.SIG_DFL)\n    signal.signal(signal.SIGTERM, signal.SIG_DFL)\n\n    logger.info(\"Received Ctrl+C. Shutting down Text Generation Web UI gracefully.\")\n\n    # Explicitly stop LlamaServer to avoid __del__ cleanup issues during shutdown\n    if shared.model and shared.model.__class__.__name__ == 'LlamaServer':\n        try:\n            shared.model.stop()\n        except Exception:\n            pass\n\n    sys.exit(0)\n\n\nsignal.signal(signal.SIGINT, signal_handler)\nsignal.signal(signal.SIGTERM, signal_handler)\n\n\ndef create_interface():\n\n    import shutil\n\n    import gradio as gr\n\n    from modules import (\n        training,\n        ui,\n        ui_chat,\n        ui_default,\n        ui_file_saving,\n        ui_image_generation,\n        ui_model_menu,\n        ui_notebook,\n        ui_parameters,\n        ui_session,\n    )\n    from modules.chat import generate_pfp_cache\n    from modules.extensions import apply_extensions\n    from modules.utils import gradio\n\n    warnings.filterwarnings('ignore', category=UserWarning, message='The value passed into gr.Dropdown()')\n\n    # Set up Gradio temp directory path\n    gradio_temp_path = shared.user_data_dir / 'cache' / 'gradio'\n    shutil.rmtree(gradio_temp_path, ignore_errors=True)\n    gradio_temp_path.mkdir(parents=True, exist_ok=True)\n    os.environ.update({\n        'GRADIO_ANALYTICS_ENABLED': 'False',\n        'GRADIO_TEMP_DIR': str(gradio_temp_path)\n    })\n\n    title = 'Text Generation Web UI'\n\n    # Password authentication\n    auth = []\n    if shared.args.gradio_auth:\n        auth.extend(x.strip() for x in shared.args.gradio_auth.strip('\"').replace('\\n', '').split(',') if x.strip())\n    if shared.args.gradio_auth_path:\n        with open(shared.args.gradio_auth_path, 'r', encoding=\"utf8\") as file:\n            auth.extend(x.strip() for line in file for x in line.split(',') if x.strip())\n    auth = [tuple(cred.split(':')) for cred in auth]\n\n    # Allowed paths\n    allowed_paths = [\"css\", \"js\", \"extensions\", str(shared.user_data_dir / \"cache\")]\n    if not shared.args.multi_user:\n        allowed_paths.append(str(shared.user_data_dir / \"image_outputs\"))\n\n    # Import the extensions and execute their setup() functions\n    if shared.args.extensions is not None and len(shared.args.extensions) > 0:\n        extensions_module.load_extensions()\n\n    # Force some events to be triggered on page load\n    shared.persistent_interface_state.update({\n        'mode': shared.settings['mode'],\n        'loader': shared.args.loader or 'llama.cpp',\n        'filter_by_loader': (shared.args.loader or 'All') if not shared.args.portable else 'llama.cpp'\n    })\n\n    if not shared.settings['prompt-notebook']:\n        shared.settings['prompt-notebook'] = utils.get_available_prompts()[0]\n\n    prompt = load_prompt(shared.settings['prompt-notebook'])\n    shared.persistent_interface_state.update({\n        'textbox-default': prompt,\n        'textbox-notebook': prompt\n    })\n\n    # Clear existing cache files\n    for cache_file in ['pfp_character.png', 'pfp_character_thumb.png']:\n        cache_path = shared.user_data_dir / \"cache\" / cache_file\n        if cache_path.exists():\n            cache_path.unlink()\n\n    # Regenerate for default character\n    if shared.settings['mode'] != 'instruct':\n        generate_pfp_cache(shared.settings['character'])\n\n    # css/js strings\n    css = ui.css\n    js = ui.js\n    css += apply_extensions('css')\n    js += apply_extensions('js')\n\n    # Interface state elements\n    shared.input_elements = ui.list_interface_input_elements()\n\n    # Head HTML for font preloads, KaTeX, highlight.js, morphdom, and global JS\n    head_html = '\\n'.join([\n        '<link rel=\"preload\" href=\"file/css/Inter/Inter-VariableFont_opsz,wght.ttf\" as=\"font\" type=\"font/ttf\" crossorigin>',\n        '<link rel=\"preload\" href=\"file/css/Inter/Inter-Italic-VariableFont_opsz,wght.ttf\" as=\"font\" type=\"font/ttf\" crossorigin>',\n        '<link rel=\"preload\" href=\"file/css/NotoSans/NotoSans-Medium.woff2\" as=\"font\" type=\"font/woff2\" crossorigin>',\n        '<link rel=\"preload\" href=\"file/css/NotoSans/NotoSans-MediumItalic.woff2\" as=\"font\" type=\"font/woff2\" crossorigin>',\n        '<link rel=\"preload\" href=\"file/css/NotoSans/NotoSans-Bold.woff2\" as=\"font\" type=\"font/woff2\" crossorigin>',\n        '<script src=\"file/js/katex/katex.min.js\"></script>',\n        '<script src=\"file/js/katex/auto-render.js\"></script>',\n        '<script src=\"file/js/highlightjs/highlight.min.js\"></script>',\n        '<script src=\"file/js/highlightjs/highlightjs-copy.min.js\"></script>',\n        '<script src=\"file/js/morphdom/morphdom-umd.min.js\"></script>',\n        f'<link id=\"highlight-css\" rel=\"stylesheet\" href=\"file/css/highlightjs/{\"github-dark\" if shared.settings[\"dark_theme\"] else \"github\"}.min.css\">',\n        '<script>hljs.addPlugin(new CopyButtonPlugin());</script>',\n        f'<script>{ui.global_scope_js}</script>',\n    ])\n\n    with gr.Blocks(css=css, analytics_enabled=False, title=title, theme=ui.theme, head=head_html, dark_theme=shared.settings['dark_theme']) as shared.gradio['interface']:\n\n        # Interface state\n        shared.gradio['interface_state'] = gr.State({k: None for k in shared.input_elements})\n\n        # Audio notification\n        if (shared.user_data_dir / \"notification.mp3\").exists():\n            shared.gradio['audio_notification'] = gr.Audio(interactive=False, value=str(shared.user_data_dir / \"notification.mp3\"), elem_id=\"audio_notification\", visible=False)\n\n        # Floating menus for saving/deleting files\n        ui_file_saving.create_ui()\n\n        # Temporary clipboard for saving files\n        shared.gradio['temporary_text'] = gr.Textbox(visible=False)\n\n        # Chat tab\n        ui_chat.create_ui()\n\n        # Notebook tab\n        with gr.Tab(\"Notebook\", elem_id='notebook-parent-tab'):\n            ui_default.create_ui()\n            ui_notebook.create_ui()\n\n        ui_parameters.create_ui()  # Parameters tab\n        ui_chat.create_character_settings_ui()  # Character tab\n        ui_model_menu.create_ui()  # Model tab\n        if not shared.args.portable:\n            ui_image_generation.create_ui()  # Image generation tab\n            training.create_ui()  # Training tab\n        ui_session.create_ui()  # Session tab\n\n        # Generation events\n        ui_chat.create_event_handlers()\n        ui_default.create_event_handlers()\n        ui_notebook.create_event_handlers()\n        if not shared.args.portable:\n            ui_image_generation.create_event_handlers()\n\n        # Other events\n        ui_file_saving.create_event_handlers()\n        ui_parameters.create_event_handlers()\n        ui_model_menu.create_event_handlers()\n\n        # UI persistence events\n        ui.setup_auto_save()\n\n        # Interface launch events\n        shared.gradio['interface'].load(\n            None,\n            gradio('show_controls'),\n            None,\n            js=f\"\"\"(x) => {{\n                {js}\n                {ui.show_controls_js}\n                toggle_controls(x);\n            }}\"\"\"\n        )\n\n        shared.gradio['interface'].load(partial(ui.apply_interface_values, {}, use_persistent=True), None, gradio(ui.list_interface_input_elements()), show_progress=False)\n\n        # Sync theme_state with the actual client-side theme so that\n        # autosave always writes the correct dark_theme value.\n        shared.gradio['interface'].load(None, None, gradio('theme_state'), js='() => document.body.classList.contains(\"dark\") ? \"dark\" : \"light\"')\n\n        extensions_module.create_extensions_tabs()  # Extensions tabs\n        extensions_module.create_extensions_block()  # Extensions block\n\n    # Launch the interface\n    shared.gradio['interface'].queue()\n    shared.gradio['interface'].launch(\n        max_threads=64,\n        prevent_thread_lock=True,\n        share=shared.args.share,\n        server_name=None if not shared.args.listen else (shared.args.listen_host or '0.0.0.0'),\n        server_port=shared.args.listen_port,\n        inbrowser=shared.args.auto_launch,\n        auth=auth or None,\n        ssl_verify=False if (shared.args.ssl_keyfile or shared.args.ssl_certfile) else True,\n        ssl_keyfile=shared.args.ssl_keyfile,\n        ssl_certfile=shared.args.ssl_certfile,\n        root_path=shared.args.subpath,\n        allowed_paths=allowed_paths,\n    )\n\n\nif __name__ == \"__main__\":\n\n    logger.info(\"Starting Text Generation Web UI\")\n    do_cmd_flags_warnings()\n\n    # Load custom settings\n    settings_file = None\n    if shared.args.settings is not None and Path(shared.args.settings).exists():\n        settings_file = Path(shared.args.settings)\n    elif (shared.user_data_dir / 'settings.yaml').exists():\n        settings_file = shared.user_data_dir / 'settings.yaml'\n\n    if settings_file is not None:\n        logger.info(f\"Loading settings from \\\"{settings_file}\\\"\")\n        with open(settings_file, 'r', encoding='utf-8') as f:\n            new_settings = yaml.safe_load(f.read())\n\n        if new_settings:\n            shared.settings.update(new_settings)\n\n    # Apply CLI overrides for image model settings (CLI flags take precedence over saved settings)\n    shared.apply_image_model_cli_overrides()\n\n    # Fallback settings for models\n    shared.model_config['.*'] = get_fallback_settings()\n    shared.model_config.move_to_end('.*', last=False)  # Move to the beginning\n\n    # Activate the extensions listed on settings.yaml\n    extensions_module.available_extensions = utils.get_available_extensions()\n    for extension in shared.settings['default_extensions']:\n        shared.args.extensions = shared.args.extensions or []\n        if extension not in shared.args.extensions:\n            shared.args.extensions.append(extension)\n\n    # Load image model if specified via CLI\n    if shared.args.image_model:\n        logger.info(f\"Loading image model: {shared.args.image_model}\")\n        result = load_image_model(\n            shared.args.image_model,\n            dtype=shared.settings.get('image_dtype', 'bfloat16'),\n            attn_backend=shared.settings.get('image_attn_backend', 'sdpa'),\n            cpu_offload=shared.settings.get('image_cpu_offload', False),\n            compile_model=shared.settings.get('image_compile', False),\n            quant_method=shared.settings.get('image_quant', 'none')\n        )\n        if result is not None:\n            shared.image_model_name = shared.args.image_model\n        else:\n            logger.error(f\"Failed to load image model: {shared.args.image_model}\")\n\n    available_models = utils.get_available_models()\n\n    # Model defined through --model\n    if shared.args.model is not None:\n        shared.model_name = shared.args.model\n\n    # Select the model from a command-line menu\n    elif shared.args.model_menu:\n        if len(available_models) == 0:\n            logger.error('No models are available! Please download at least one.')\n            sys.exit(0)\n        else:\n            print('The following models are available:\\n')\n            for i, model in enumerate(available_models):\n                print(f'{i+1}. {model}')\n\n            print(f'\\nWhich one do you want to load? 1-{len(available_models)}\\n')\n            i = int(input()) - 1\n            print()\n\n        shared.model_name = available_models[i]\n\n    # If any model has been selected, load it\n    if shared.model_name != 'None':\n        model_settings = get_model_metadata(shared.model_name)\n        update_model_parameters(model_settings, initial=True)  # hijack the command-line arguments\n\n        # Load the model\n        shared.model, shared.tokenizer = load_model(shared.model_name)\n        if shared.args.lora:\n            add_lora_to_model(shared.args.lora)\n\n    shared.generation_lock = Lock()\n\n    if shared.args.idle_timeout > 0:\n        timer_thread = Thread(target=unload_model_if_idle)\n        timer_thread.daemon = True\n        timer_thread.start()\n\n    if shared.args.nowebui:\n        # Start the API in standalone mode\n        shared.args.extensions = [x for x in (shared.args.extensions or []) if x != 'gallery']\n        if shared.args.extensions:\n            extensions_module.load_extensions()\n    else:\n        # Launch the web UI\n        create_interface()\n        while True:\n            time.sleep(0.5)\n            if shared.need_restart:\n                shared.need_restart = False\n                time.sleep(0.5)\n                shared.gradio['interface'].close()\n                time.sleep(0.5)\n                create_interface()\n"
  },
  {
    "path": "setup.cfg",
    "content": "[pycodestyle]\nmax-line-length = 120\nignore = E402, E501, E722"
  },
  {
    "path": "start_linux.sh",
    "content": "#!/usr/bin/env bash\n\n# environment isolation\nexport PYTHONNOUSERSITE=1\nunset PYTHONPATH\nunset PYTHONHOME\n\ncd \"$(dirname \"$(readlink -f \"${BASH_SOURCE[0]}\")\")\"\n\n# Portable install case\nif [ -d \"portable_env\" ]; then\n    ./portable_env/bin/python3 server.py --portable --api --auto-launch \"$@\"\n    exit $?\nfi\n\nif [[ \"$(pwd)\" =~ \" \" ]]; then echo This script relies on Miniforge which can not be silently installed under a path with spaces. && exit; fi\n\n# deactivate existing conda envs as needed to avoid conflicts\n{ conda deactivate && conda deactivate && conda deactivate; } 2> /dev/null\n\nOS_ARCH=$(uname -m)\ncase \"${OS_ARCH}\" in\n    x86_64*)    OS_ARCH=\"x86_64\";;\n    arm64*)     OS_ARCH=\"aarch64\";;\n    aarch64*)     OS_ARCH=\"aarch64\";;\n    *)          echo \"Unknown system architecture: $OS_ARCH! This script runs only on x86_64 or arm64\" && exit\nesac\n\n# config\nINSTALL_DIR=\"$(pwd)/installer_files\"\nCONDA_ROOT_PREFIX=\"$(pwd)/installer_files/conda\"\nINSTALL_ENV_DIR=\"$(pwd)/installer_files/env\"\nMINIFORGE_DOWNLOAD_URL=\"https://github.com/conda-forge/miniforge/releases/download/26.1.0-0/Miniforge3-26.1.0-0-Linux-${OS_ARCH}.sh\"\nconda_exists=\"F\"\n\n# figure out whether git and conda needs to be installed\nif \"$CONDA_ROOT_PREFIX/bin/conda\" --version &>/dev/null; then conda_exists=\"T\"; fi\n\n# (if necessary) install git and conda into a contained environment\n# download miniforge\nif [ \"$conda_exists\" == \"F\" ]; then\n    echo \"Downloading Miniforge from $MINIFORGE_DOWNLOAD_URL to $INSTALL_DIR/miniforge_installer.sh\"\n\n    mkdir -p \"$INSTALL_DIR\"\n    curl -L \"$MINIFORGE_DOWNLOAD_URL\" > \"$INSTALL_DIR/miniforge_installer.sh\"\n\n    chmod u+x \"$INSTALL_DIR/miniforge_installer.sh\"\n    bash \"$INSTALL_DIR/miniforge_installer.sh\" -b -p $CONDA_ROOT_PREFIX\n\n    # test the conda binary\n    echo \"Miniforge version:\"\n    \"$CONDA_ROOT_PREFIX/bin/conda\" --version\n\n    # delete the Miniforge installer\n    rm \"$INSTALL_DIR/miniforge_installer.sh\"\nfi\n\n# create the installer env\nif [ ! -e \"$INSTALL_ENV_DIR\" ]; then\n    \"$CONDA_ROOT_PREFIX/bin/conda\" create -y -k --prefix \"$INSTALL_ENV_DIR\" python=3.13\nfi\n\n# check if conda environment was actually created\nif [ ! -e \"$INSTALL_ENV_DIR/bin/python\" ]; then\n    echo \"Conda environment is empty.\"\n    exit\nfi\n\nexport CUDA_PATH=\"$INSTALL_ENV_DIR\"\nexport CUDA_HOME=\"$CUDA_PATH\"\n\n# activate installer env\nsource \"$CONDA_ROOT_PREFIX/etc/profile.d/conda.sh\" # otherwise conda complains about 'shell not initialized' (needed when running in a script)\nconda activate \"$INSTALL_ENV_DIR\"\n\n# setup installer env\npython one_click.py $@\n"
  },
  {
    "path": "start_macos.sh",
    "content": "#!/bin/bash\n\n# environment isolation\nexport PYTHONNOUSERSITE=1\nunset PYTHONPATH\nunset PYTHONHOME\n\ncd \"$(dirname \"$(readlink -f \"${BASH_SOURCE[0]}\")\")\"\n\n# Portable install case\nif [ -d \"portable_env\" ]; then\n    ./portable_env/bin/python3 server.py --portable --api --auto-launch --api-port 5005 \"$@\"\n    exit $?\nfi\n\nif [[ \"$(pwd)\" =~ \" \" ]]; then echo This script relies on Miniforge which can not be silently installed under a path with spaces. && exit; fi\n\n# deactivate existing conda envs as needed to avoid conflicts\n{ conda deactivate && conda deactivate && conda deactivate; } 2> /dev/null\n\n# M Series or Intel\nOS_ARCH=$(uname -m)\ncase \"${OS_ARCH}\" in\n    x86_64*)    OS_ARCH=\"x86_64\";;\n    arm64*)     OS_ARCH=\"arm64\";;\n    *)          echo \"Unknown system architecture: $OS_ARCH! This script runs only on x86_64 or arm64\" && exit\nesac\n\n# config\nINSTALL_DIR=\"$(pwd)/installer_files\"\nCONDA_ROOT_PREFIX=\"$(pwd)/installer_files/conda\"\nINSTALL_ENV_DIR=\"$(pwd)/installer_files/env\"\nMINIFORGE_DOWNLOAD_URL=\"https://github.com/conda-forge/miniforge/releases/download/26.1.0-0/Miniforge3-26.1.0-0-MacOSX-${OS_ARCH}.sh\"\nconda_exists=\"F\"\n\n# figure out whether git and conda needs to be installed\nif \"$CONDA_ROOT_PREFIX/bin/conda\" --version &>/dev/null; then conda_exists=\"T\"; fi\n\n# (if necessary) install git and conda into a contained environment\n# download miniforge\nif [ \"$conda_exists\" == \"F\" ]; then\n    echo \"Downloading Miniforge from $MINIFORGE_DOWNLOAD_URL to $INSTALL_DIR/miniforge_installer.sh\"\n\n    mkdir -p \"$INSTALL_DIR\"\n    curl -L \"$MINIFORGE_DOWNLOAD_URL\" > \"$INSTALL_DIR/miniforge_installer.sh\"\n\n    chmod u+x \"$INSTALL_DIR/miniforge_installer.sh\"\n    bash \"$INSTALL_DIR/miniforge_installer.sh\" -b -p $CONDA_ROOT_PREFIX\n\n    # test the conda binary\n    echo \"Miniforge version:\"\n    \"$CONDA_ROOT_PREFIX/bin/conda\" --version\n\n    # delete the Miniforge installer\n    rm \"$INSTALL_DIR/miniforge_installer.sh\"\nfi\n\n# create the installer env\nif [ ! -e \"$INSTALL_ENV_DIR\" ]; then\n    \"$CONDA_ROOT_PREFIX/bin/conda\" create -y -k --prefix \"$INSTALL_ENV_DIR\" python=3.13\nfi\n\n# check if conda environment was actually created\nif [ ! -e \"$INSTALL_ENV_DIR/bin/python\" ]; then\n    echo \"Conda environment is empty.\"\n    exit\nfi\n\nexport CUDA_PATH=\"$INSTALL_ENV_DIR\"\nexport CUDA_HOME=\"$CUDA_PATH\"\n\n# activate installer env\nsource \"$CONDA_ROOT_PREFIX/etc/profile.d/conda.sh\" # otherwise conda complains about 'shell not initialized' (needed when running in a script)\nconda activate \"$INSTALL_ENV_DIR\"\n\n# setup installer env\npython one_click.py $@\n"
  },
  {
    "path": "start_windows.bat",
    "content": "@echo off\nsetlocal enabledelayedexpansion\n\n@rem environment isolation\nset PYTHONNOUSERSITE=1\nset PYTHONPATH=\nset PYTHONHOME=\nset PYTHONUTF8=1\n\ncd /D \"%~dp0\"\n\n@rem Portable install case\nif exist \"portable_env\" (\n    .\\portable_env\\python.exe server.py --portable --api --auto-launch %*\n    exit /b %errorlevel%\n)\n\nset PATH=%PATH%;%SystemRoot%\\system32\n\necho \"%CD%\"| findstr /C:\" \" >nul && echo This script relies on Miniforge which can not be silently installed under a path with spaces. && goto end\n\n@rem Check for special characters in installation path\nset \"SPCHARMESSAGE=\"WARNING: Special characters were detected in the installation path!\" \"         This can cause the installation to fail!\"\"\necho \"%CD%\"| findstr /R /C:\"[!#\\$%&()\\*+,;<=>?@\\[\\]\\^`{|}~]\" >nul && (\n\tcall :PrintBigMessage %SPCHARMESSAGE%\n)\nset SPCHARMESSAGE=\n\n@rem fix failed install when installing to a separate drive\nset TMP=%cd%\\installer_files\nset TEMP=%cd%\\installer_files\n\n@rem deactivate existing conda envs as needed to avoid conflicts\n(call conda deactivate && call conda deactivate && call conda deactivate) 2>nul\n\n@rem config\nset INSTALL_DIR=%cd%\\installer_files\nset CONDA_ROOT_PREFIX=%cd%\\installer_files\\conda\nset INSTALL_ENV_DIR=%cd%\\installer_files\\env\nset MINIFORGE_DOWNLOAD_URL=https://github.com/conda-forge/miniforge/releases/download/26.1.0-0/Miniforge3-26.1.0-0-Windows-x86_64.exe\nset MINIFORGE_CHECKSUM=0ad64473c20a8649be9313f64ee898f4b23a35a7a25ea9998a751c542e5e3840\nset conda_exists=F\n\n@rem figure out whether git and conda needs to be installed\ncall \"%CONDA_ROOT_PREFIX%\\_conda.exe\" --version >nul 2>&1\nif \"%ERRORLEVEL%\" EQU \"0\" set conda_exists=T\n\n@rem (if necessary) install git and conda into a contained environment\n@rem download conda\nif \"%conda_exists%\" == \"F\" (\n\techo Downloading Miniforge from %MINIFORGE_DOWNLOAD_URL% to %INSTALL_DIR%\\miniforge_installer.exe\n\n\tmkdir \"%INSTALL_DIR%\"\n\tcall curl -Lk \"%MINIFORGE_DOWNLOAD_URL%\" > \"%INSTALL_DIR%\\miniforge_installer.exe\" || ( echo. && echo Miniforge failed to download. && goto end )\n\n\t@rem Try CertUtil first\n\tfor /f %%a in ('CertUtil -hashfile \"%INSTALL_DIR%\\miniforge_installer.exe\" SHA256 ^| find /i /v \" \" ^| find /i \"%MINIFORGE_CHECKSUM%\"') do (\n\t\tset \"output=%%a\"\n\t)\n\n\t@rem If CertUtil fails, try PowerShell\n\tif not defined output (\n\t\tfor /f %%a in ('powershell -Command \"if((Get-FileHash \\\"%INSTALL_DIR%\\miniforge_installer.exe\\\" -Algorithm SHA256).Hash -eq ''%MINIFORGE_CHECKSUM%''){echo true}\"') do (\n\t\t\tset \"output=%%a\"\n\t\t)\n\t)\n\n\tif not defined output (\n\t\techo The checksum verification for miniforge_installer.exe has failed.\n\t\tdel \"%INSTALL_DIR%\\miniforge_installer.exe\"\n\t\tgoto end\n\t) else (\n\t\techo The checksum verification for miniforge_installer.exe has passed successfully.\n\t)\n\n\techo Installing Miniforge to %CONDA_ROOT_PREFIX%\n\tstart /wait \"\" \"%INSTALL_DIR%\\miniforge_installer.exe\" /InstallationType=JustMe /NoShortcuts=1 /AddToPath=0 /RegisterPython=0 /NoRegistry=1 /S /D=%CONDA_ROOT_PREFIX%\n\n\t@rem test the conda binary\n\techo Miniforge version:\n\tcall \"%CONDA_ROOT_PREFIX%\\_conda.exe\" --version || ( echo. && echo Miniforge not found. && goto end )\n\n\t@rem delete the Miniforge installer\n\tdel \"%INSTALL_DIR%\\miniforge_installer.exe\"\n)\n\n@rem create the installer env\nif not exist \"%INSTALL_ENV_DIR%\" (\n\techo Packages to install: %PACKAGES_TO_INSTALL%\n\tcall \"%CONDA_ROOT_PREFIX%\\_conda.exe\" create --no-shortcuts -y -k --prefix \"%INSTALL_ENV_DIR%\" python=3.13 || ( echo. && echo Conda environment creation failed. && goto end )\n)\n\n@rem check if conda environment was actually created\nif not exist \"%INSTALL_ENV_DIR%\\python.exe\" ( echo. && echo Conda environment is empty. && goto end )\n\nset \"CUDA_PATH=%INSTALL_ENV_DIR%\"\nset \"CUDA_HOME=%CUDA_PATH%\"\n\n@rem activate installer env\ncall \"%CONDA_ROOT_PREFIX%\\condabin\\conda.bat\" activate \"%INSTALL_ENV_DIR%\" || ( echo. && echo Miniforge hook not found. && goto end )\n\n@rem setup installer env\ncall \"%INSTALL_ENV_DIR%\\python.exe\" one_click.py %*\n\n@rem below are functions for the script   next line skips these during normal execution\ngoto end\n\n:PrintBigMessage\necho. && echo.\necho *******************************************************************\nfor %%M in (%*) do echo * %%~M\necho *******************************************************************\necho. && echo.\nexit /b\n\n:end\npause\n"
  },
  {
    "path": "update_wizard_linux.sh",
    "content": "#!/usr/bin/env bash\n\ncd \"$(dirname \"${BASH_SOURCE[0]}\")\"\n\nif [[ \"$(pwd)\" =~ \" \" ]]; then echo This script relies on Miniforge which can not be silently installed under a path with spaces. && exit; fi\n\n# deactivate existing conda envs as needed to avoid conflicts\n{ conda deactivate && conda deactivate && conda deactivate; } 2> /dev/null\n\n# config\nCONDA_ROOT_PREFIX=\"$(pwd)/installer_files/conda\"\nINSTALL_ENV_DIR=\"$(pwd)/installer_files/env\"\n\n# environment isolation\nexport PYTHONNOUSERSITE=1\nunset PYTHONPATH\nunset PYTHONHOME\nexport CUDA_PATH=\"$INSTALL_ENV_DIR\"\nexport CUDA_HOME=\"$CUDA_PATH\"\n\n# activate installer env\nsource \"$CONDA_ROOT_PREFIX/etc/profile.d/conda.sh\" # otherwise conda complains about 'shell not initialized' (needed when running in a script)\nconda activate \"$INSTALL_ENV_DIR\"\n\n# update installer env\npython one_click.py --update-wizard && echo -e \"\\nHave a great day!\"\n"
  },
  {
    "path": "update_wizard_macos.sh",
    "content": "#!/bin/bash\n\ncd \"$(dirname \"${BASH_SOURCE[0]}\")\"\n\nif [[ \"$(pwd)\" =~ \" \" ]]; then echo This script relies on Miniforge which can not be silently installed under a path with spaces. && exit; fi\n\n# deactivate existing conda envs as needed to avoid conflicts\n{ conda deactivate && conda deactivate && conda deactivate; } 2> /dev/null\n\n# config\nCONDA_ROOT_PREFIX=\"$(pwd)/installer_files/conda\"\nINSTALL_ENV_DIR=\"$(pwd)/installer_files/env\"\n\n# environment isolation\nexport PYTHONNOUSERSITE=1\nunset PYTHONPATH\nunset PYTHONHOME\nexport CUDA_PATH=\"$INSTALL_ENV_DIR\"\nexport CUDA_HOME=\"$CUDA_PATH\"\n\n# activate installer env\nsource \"$CONDA_ROOT_PREFIX/etc/profile.d/conda.sh\" # otherwise conda complains about 'shell not initialized' (needed when running in a script)\nconda activate \"$INSTALL_ENV_DIR\"\n\n# update installer env\npython one_click.py --update-wizard && echo -e \"\\nHave a great day!\"\n"
  },
  {
    "path": "update_wizard_windows.bat",
    "content": "@echo off\n\ncd /D \"%~dp0\"\n\nset PATH=%PATH%;%SystemRoot%\\system32\n\necho \"%CD%\"| findstr /C:\" \" >nul && echo This script relies on Miniforge which can not be silently installed under a path with spaces. && goto end\n\n@rem fix failed install when installing to a separate drive\nset TMP=%cd%\\installer_files\nset TEMP=%cd%\\installer_files\n\n@rem deactivate existing conda envs as needed to avoid conflicts\n(call conda deactivate && call conda deactivate && call conda deactivate) 2>nul\n\n@rem config\nset CONDA_ROOT_PREFIX=%cd%\\installer_files\\conda\nset INSTALL_ENV_DIR=%cd%\\installer_files\\env\n\n@rem environment isolation\nset PYTHONNOUSERSITE=1\nset PYTHONPATH=\nset PYTHONHOME=\nset \"CUDA_PATH=%INSTALL_ENV_DIR%\"\nset \"CUDA_HOME=%CUDA_PATH%\"\n\n@rem activate installer env\ncall \"%CONDA_ROOT_PREFIX%\\condabin\\conda.bat\" activate \"%INSTALL_ENV_DIR%\" || ( echo. && echo Miniforge hook not found. && goto end )\n\n@rem update installer env\ncall \"%INSTALL_ENV_DIR%\\python.exe\" one_click.py --update-wizard && (\n    echo.\n    echo Have a great day!\n)\n\n:end\npause\n"
  }
]