[
  {
    "path": ".commitlintrc",
    "content": "{\n  \"extends\": [\"@commitlint/config-conventional\"],\n  \"defaultIgnores\": true,\n  \"rules\": {\n    \"body-leading-blank\": [1, \"always\"],\n    \"body-max-line-length\": [2, \"always\", 100],\n    \"footer-leading-blank\": [1, \"always\"],\n    \"footer-max-line-length\": [2, \"always\", 10000],\n    \"header-max-length\": [2, \"always\", 200],\n    \"subject-case\": [\n      2,\n      \"never\",\n      []\n    ],\n    \"subject-empty\": [2, \"never\"],\n    \"subject-full-stop\": [2, \"never\", \".\"],\n    \"type-case\": [2, \"always\", \"lower-case\"],\n    \"type-empty\": [2, \"never\"],\n    \"type-enum\": [\n      2,\n      \"always\",\n      [\n        \"build\",\n        \"chore\",\n        \"ci\",\n        \"docs\",\n        \"feat\",\n        \"fix\",\n        \"perf\",\n        \"refactor\",\n        \"revert\",\n        \"style\",\n        \"test\"\n      ]\n    ]\n  }\n}\n"
  },
  {
    "path": ".dockerignore",
    "content": ".github/\n.git/\n.mypy_cache/\n__pycache__/\nktem_app_data/\nenv/\n.pre-commit-config.yaml\n.commitlintrc\n.gitignore\n.gitattributes\nREADME.md\n*.zip\n*.sh\n\n!/launch.sh\n"
  },
  {
    "path": ".gitattributes",
    "content": "*.bat   text eol=crlf\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/bug_report.yml",
    "content": "name: \"Bug Report\"\ndescription: Report something that is not working as expected\ntitle: \"[BUG] \"\nlabels: [\"bug\"]\nbody:\n  - type: markdown\n    attributes:\n      value: |\n        *Please fill this form with as much information as possible.*\n  - type: textarea\n    id: description\n    attributes:\n      label: \"Description\"\n      description: Please enter an explicit description of your issue\n      placeholder: Short and explicit description of your incident...\n    validations:\n      required: true\n  - type: textarea\n    id: reprod\n    attributes:\n      label: \"Reproduction steps\"\n      description: Please enter an explicit description of your issue\n      value: |\n        1. Go to '...'\n        2. Click on '....'\n        3. Scroll down to '....'\n        4. See error\n      render: bash\n    validations:\n      required: true\n  - type: textarea\n    id: screenshot\n    attributes:\n      label: \"Screenshots\"\n      description: If applicable, add screenshots to help explain your problem.\n      value: |\n        ![DESCRIPTION](LINK.png)\n      render: bash\n    validations:\n      required: false\n  - type: textarea\n    id: logs\n    attributes:\n      label: \"Logs\"\n      description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.\n      render: bash\n    validations:\n      required: false\n  - type: dropdown\n    id: browsers\n    attributes:\n      label: \"Browsers\"\n      description: What browsers are you seeing the problem on ?\n      multiple: true\n      options:\n        - Firefox\n        - Chrome\n        - Safari\n        - Microsoft Edge\n        - Opera\n        - Brave\n        - Other\n    validations:\n      required: false\n  - type: dropdown\n    id: os\n    attributes:\n      label: \"OS\"\n      description: What is the impacted environment ?\n      multiple: true\n      options:\n        - Windows\n        - MacOS\n        - Linux\n        - Other\n    validations:\n      required: false\n  - type: textarea\n    id: additional_information\n    attributes:\n      label: \"Additional information\"\n      description: Add any relevant information or context.\n      placeholder:\n    validations:\n      required: false\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/config.yml",
    "content": "blank_issues_enabled: false\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/feature_request.yml",
    "content": "name: \"Feature Request\"\ndescription: Brainstorm and propose new features for the project\ntitle: \"[REQUEST] \"\nlabels: [\"enhancement\"]\nbody:\n  - type: markdown\n    attributes:\n      value: |\n        *Please fill this form with as much information as possible.*\n  - type: textarea\n    id: reference_issues\n    attributes:\n      label: \"Reference Issues\"\n      description: Common issues\n      placeholder: \"#Issues IDs\"\n    validations:\n      required: false\n  - type: textarea\n    id: summary\n    attributes:\n      label: \"Summary\"\n      description: Provide a brief explanation of the feature\n      placeholder: Describe in a few lines your feature request\n    validations:\n      required: true\n  - type: textarea\n    id: basic_example\n    attributes:\n      label: \"Basic Example\"\n      description: Indicate here some basic examples of your feature.\n      placeholder: A few specific words about your feature request.\n    validations:\n      required: true\n  - type: textarea\n    id: drawbacks\n    attributes:\n      label: \"Drawbacks\"\n      description: What are the drawbacks/impacts of your feature request ?\n      placeholder: Identify the drawbacks and impacts while being neutral on your feature request\n    validations:\n      required: true\n  - type: textarea\n    id: additional_information\n    attributes:\n      label: \"Additional information\"\n      description: Add any additional information that you think is important for your feature request\n      placeholder:\n    validations:\n      required: false\n"
  },
  {
    "path": ".github/PULL_REQUEST_TEMPLATE.md",
    "content": "## Description\n\n- Please include a summary of the changes and the related issue.\n- Fixes # (issue)\n\n## Type of change\n\n- [ ] New features (non-breaking change).\n- [ ] Bug fix (non-breaking change).\n- [ ] Breaking change (fix or feature that would cause existing functionality not to work as expected).\n\n## Checklist\n\n- [ ] I have performed a self-review of my code.\n- [ ] I have added thorough tests if it is a core feature.\n- [ ] There is a reference to the original bug report and related work.\n- [ ] I have commented on my code, particularly in hard-to-understand areas.\n- [ ] The feature is well documented.\n"
  },
  {
    "path": ".github/workflows/auto-bump-and-release.yaml",
    "content": "name: Auto Bump and Release\n\non:\n  push:\n    branches:\n      - main\n\njobs:\n  auto-bump-and-release:\n    runs-on: ubuntu-latest\n    steps:\n      - name: Clone the repo\n        uses: actions/checkout@v4\n        with:\n          fetch-depth: 0\n      - name: Update Application Version\n        id: update-version\n        uses: anothrNick/github-tag-action@v1\n        env:\n          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n          WITH_V: true\n          DEFAULT_BUMP: patch\n          MAJOR_STRING_TOKEN: \"bump:major\"\n          MINOR_STRING_TOKEN: \"bump:minor\"\n          PATCH_STRING_TOKEN: \"bump:patch\"\n      - name: Create release for ${{ steps.update-version.outputs.new_tag }}\n        # need to repeat this if statement because Github Action doesn't support early\n        # stopping for steps\n        if: ${{ steps.update-version.outputs.new_tag != steps.update-version.outputs.old_tag }}\n        run: |\n          echo Create release folder\n          mkdir kotaemon-app\n          echo ${{ steps.update-version.outputs.new_tag }} > kotaemon-app/VERSION\n          cp LICENSE.txt kotaemon-app/\n          cp flowsettings.py kotaemon-app/\n          cp app.py kotaemon-app/\n          cp .env.example kotaemon-app/.env\n          cp -r scripts kotaemon-app/\n          mkdir -p kotaemon-app/libs/ktem/ktem/\n          cp -r libs/ktem/ktem/assets kotaemon-app/libs/ktem/ktem/\n\n          tree kotaemon-app\n          zip -r kotaemon-app.zip kotaemon-app\n      - name: Release ${{ steps.update-version.outputs.new_tag }}\n        if: ${{ steps.update-version.outputs.new_tag != steps.update-version.outputs.old_tag }}\n        uses: softprops/action-gh-release@v2\n        with:\n          files: kotaemon-app.zip\n          fail_on_unmatched_files: true\n          token: ${{ secrets.GITHUB_TOKEN }}\n          generate_release_notes: true\n          tag_name: ${{ steps.update-version.outputs.new_tag }}\n          make_latest: true\n      - name: Setup latest branch locally without switching current branch\n        if: ${{ steps.update-version.outputs.new_tag != steps.update-version.outputs.old_tag }}\n        run: git fetch origin latest:latest\n      - name: Update latest branch\n        if: ${{ steps.update-version.outputs.new_tag != steps.update-version.outputs.old_tag }}\n        run: |\n          git branch -f latest tags/${{ steps.update-version.outputs.new_tag }}\n          git checkout latest\n          git push -f -u origin latest\n"
  },
  {
    "path": ".github/workflows/build-push-docker.yaml",
    "content": "name: Build and Push Docker Image\n\non:\n  release:\n    types:\n      - created\n\n  push:\n    tags:\n      - \"v[0-9]+.[0-9]+.[0-9]+\"\n\n  workflow_dispatch:\n\nenv:\n  REGISTRY: ghcr.io\n\njobs:\n  build:\n    name: Build and push container\n    runs-on: ubuntu-latest\n    permissions:\n      contents: read\n      packages: write\n      attestations: write\n      id-token: write\n    strategy:\n      matrix:\n        target:\n          - lite\n          - full\n          - ollama\n      # The maximum number of jobs that can run simultaneously\n      max-parallel: 1\n    steps:\n      - name: Free Disk Space (Ubuntu)\n        uses: jlumbroso/free-disk-space@main\n        with:\n          # this might remove tools that are actually needed,\n          # if set to \"true\" but frees about 6 GB\n          tool-cache: true\n\n          # all of these default to true, but feel free to set to\n          # \"false\" if necessary for your workflow\n          android: true\n          dotnet: true\n          haskell: true\n          large-packages: true\n          docker-images: true\n          swap-storage: true\n\n      - name: Set repository and image name\n        run: |\n          echo \"FULL_IMAGE_NAME=${{ env.REGISTRY }}/${IMAGE_NAME,,}\" >>${GITHUB_ENV}\n        env:\n          IMAGE_NAME: \"${{ github.repository }}\"\n\n      - name: Checkout\n        uses: actions/checkout@v4\n\n      - name: Set up QEMU\n        uses: docker/setup-qemu-action@v3\n        with:\n          image: tonistiigi/binfmt:latest\n          platforms: arm64,arm\n\n      - name: Set up Docker Buildx\n        id: buildx\n        uses: docker/setup-buildx-action@v3\n\n      - name: Set up Docker meta\n        id: meta\n        uses: docker/metadata-action@v5\n        with:\n          images: ${{ env.FULL_IMAGE_NAME }}\n          tags: |\n            # branch\n            type=ref,event=branch,suffix=-${{ matrix.target }}\n            # semver with suffix for lite/full targets\n            type=semver,pattern={{version}},suffix=-${{ matrix.target }}\n            # latest tag with suffix for lite/full targets\n            type=raw,value=latest,enable=${{ startsWith(github.ref, 'refs/tags/') && !contains(github.ref, 'pre') }},suffix=-${{ matrix.target }}\n          flavor: |\n            # This is disabled here so we can use the raw form above\n            latest=false\n            # Suffix is not used here since there's no way to disable it above\n\n      - name: Log in to the Container registry\n        uses: docker/login-action@v3\n        with:\n          registry: ${{ env.REGISTRY }}\n          username: ${{ github.actor }}\n          password: ${{ secrets.GITHUB_TOKEN }}\n\n      - name: Build docker image\n        uses: docker/build-push-action@v6\n        with:\n          file: Dockerfile\n          context: .\n          push: true\n          platforms: linux/amd64, linux/arm64\n          tags: |\n            ${{ steps.meta.outputs.tags }}\n          labels: ${{ steps.meta.outputs.labels }}\n          target: ${{ matrix.target }}\n          cache-from: type=gha\n          cache-to: type=gha,mode=max\n"
  },
  {
    "path": ".github/workflows/pr-lint.yaml",
    "content": "name: \"Lint PR\"\n\non:\n  pull_request:\n    types:\n      - opened\n      - edited\n      - synchronize\n\npermissions:\n  pull-requests: write\n\njobs:\n  pr-title:\n    name: Validate PR title\n    runs-on: ubuntu-latest\n    permissions: write-all\n    steps:\n      - uses: amannn/action-semantic-pull-request@v5\n        id: lint_pr_title\n        env:\n          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n\n      - uses: marocchino/sticky-pull-request-comment@v2\n        # When the previous steps fails, the workflow would stop. By adding this\n        # condition you can continue the execution with the populated error message.\n        if: always() && (steps.lint_pr_title.outputs.error_message != null)\n        with:\n          header: pr-title-lint-error\n          message: |\n            Hey there and thank you for opening this pull request! 👋🏼\n\n            We require pull request titles to follow the [Conventional Commits specification](https://www.conventionalcommits.org/en/v1.0.0/) and it looks like your proposed title needs to be adjusted.\n            Details:\n            ```\n            ${{ steps.lint_pr_title.outputs.error_message }}\n            ```\n\n      # Delete a previous comment when the issue has been resolved\n      - if: ${{ steps.lint_pr_title.outputs.error_message == null }}\n        uses: marocchino/sticky-pull-request-comment@v2\n        with:\n          header: pr-title-lint-error\n          delete: true\n\n  commitlint:\n    if: false # Disable this job for now\n    name: Validate commit messages\n    runs-on: ubuntu-latest\n    permissions: write-all\n    steps:\n      - uses: actions/checkout@v4\n      - uses: wagoid/commitlint-github-action@v6\n        id: commitlint\n        env:\n          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n        with:\n          configFile: ./.commitlintrc\n      - uses: buildingcash/json-to-markdown-table-action@v1\n        if: always() && (steps.commitlint.outcome != 'success')\n        id: table\n        with:\n          json: ${{ steps.commitlint.outputs.results }}\n      - uses: marocchino/sticky-pull-request-comment@v2\n        if: always() && (steps.commitlint.outcome != 'success')\n        with:\n          header: commitlint-error\n          message: |\n            **All commits** in this PR need to follow the [Conventional Commits specification](https://www.conventionalcommits.org/en/v1.0.0/) and [.commitlintrc](${{ github.server_url }}/${{ github.repository }}/blob/${{ github.head_ref || github.ref_name }}/.commitlintrc).\n            Details:\n            ${{ steps.table.outputs.table }}\n\n      - if: ${{ steps.commitlint.outcome == 'success' }}\n        uses: marocchino/sticky-pull-request-comment@v2\n        with:\n          header: commitlint-error\n          delete: true\n"
  },
  {
    "path": ".github/workflows/style-check.yaml",
    "content": "name: style-check\n\non:\n  pull_request:\n    branches: [main, develop]\n  push:\n    branches: [main, develop]\n\njobs:\n  pre-commit:\n    runs-on: ubuntu-latest\n    steps:\n      - name: Clone the repo\n        uses: actions/checkout@v4\n      - name: Setup python\n        uses: actions/setup-python@v4\n        with:\n          python-version: \"3.10\"\n      - name: run pre-commit\n        uses: pre-commit/action@v3.0.0\n"
  },
  {
    "path": ".github/workflows/unit-test.yaml",
    "content": "name: unit-test\n\non:\n  pull_request:\n    branches: [main]\n  push:\n    branches: [main]\n\nenv:\n  THEFLOW_TEMP_PATH: ./tmp\n\njobs:\n  unit-test:\n    # if: false # temporary disable this job due to legacy interface\n    #TODO: enable this job after the new interface is ready\n    if: ${{ !cancelled() }}\n    runs-on: ${{ matrix.os }}\n    timeout-minutes: 20\n    defaults:\n      run:\n        shell: ${{ matrix.shell }}\n    strategy:\n      matrix:\n        python-version: [\"3.10\", \"3.11\"]\n        include:\n          - os: ubuntu-latest\n            shell: bash\n            ACTIVATE_ENV: \". env/bin/activate\"\n            GITHUB_OUTPUT: \"$GITHUB_OUTPUT\"\n          # - os: windows-latest\n          #   shell: pwsh\n          #   ACTIVATE_ENV: env/Scripts/activate.ps1\n          #   GITHUB_OUTPUT: \"$env:GITHUB_OUTPUT\"\n\n    name: unit testing with python ${{ matrix.python-version }}\n    steps:\n      - name: Clone the repo\n        uses: actions/checkout@v4\n        with:\n          ref: ${{ github.event.pull_request.head.sha }}\n\n      - name: Get Head Commit Message\n        id: get-head-commit-message\n        run: echo \"message=$(git show -s --format=%s)\" | tee -a ${{ matrix.GITHUB_OUTPUT }}\n\n      - name: Check ignore caching\n        id: check-ignore-cache\n        run: |\n          ignore_cache=${{ contains(steps.get-head-commit-message.outputs.message, '[ignore cache]') }}\n          echo \"check=$ignore_cache\" | tee -a ${{ matrix.GITHUB_OUTPUT }}\n\n      - name: Set up Python ${{ matrix.python-version }} on ${{ runner.os }}\n        uses: actions/setup-python@v4\n        id: setup_python\n        with:\n          python-version: ${{ matrix.python-version }}\n          architecture: x64\n\n      - name: Install uv\n        uses: astral-sh/setup-uv@v5\n        with:\n          python-version: ${{ matrix.python-version }}\n          enable-cache: true\n\n      - name: Get cache key\n        id: get-cache-key\n        run: |\n          pip install \"setuptools-git-versioning>=2.0,<3\"\n          package_version=$(setuptools-git-versioning)\n          cache_key=\"${{ runner.os }}-py${{ matrix.python-version }}-v${package_version}\"\n          echo \"key=$cache_key\" | tee -a ${{ matrix.GITHUB_OUTPUT }}\n\n      - name: Try to restore dependencies from ${{ steps.get-cache-key.outputs.key }}\n        id: restore-dependencies\n        if: steps.check-ignore-cache.outputs.check != 'true'\n        uses: actions/cache/restore@v3\n        with:\n          path: ${{ env.pythonLocation }}\n          key: ${{ steps.get-cache-key.outputs.key }}\n          # could using cache of previous ver to reuse unchanged packages\n          restore-keys: ${{ runner.os }}-py${{ matrix.python-version }}\n\n      - name: Check cache hit\n        id: check-cache-hit\n        run: |\n          echo \"cache-hit=${{ steps.restore-dependencies.outputs.cache-hit }}\"\n          echo \"cache-matched-key=${{ steps.restore-dependencies.outputs.cache-matched-key }}\"\n          cache_hit=${{ steps.restore-dependencies.outputs.cache-primary-key == steps.restore-dependencies.outputs.cache-matched-key }}\n          echo \"check=$cache_hit\" | tee -a ${{ matrix.GITHUB_OUTPUT }}\n\n      - name: Install additional dependencies (if any)\n        run: |\n          uv sync --frozen --no-cache\n\n      - name: New dependencies cache for key ${{ steps.restore-dependencies.outputs.cache-primary-key }}\n        if: |\n          steps.check-ignore-cache.outputs.check != 'true' &&\n          steps.check-cache-hit.outputs.check != 'true'\n        uses: actions/cache/save@v3\n        with:\n          path: ${{ env.pythonLocation }}\n          key: ${{ steps.restore-dependencies.outputs.cache-primary-key }}\n\n      - name: Install OS-based packages\n        run: |\n          sudo apt update -qqy\n          sudo apt install -y poppler-utils libpoppler-dev tesseract-ocr\n\n      - name: Test kotaemon with pytest\n        run: |\n          source .venv/bin/activate\n          uv pip show pytest\n          cd libs/kotaemon\n          pytest\n"
  },
  {
    "path": ".gitignore",
    "content": "# Created by https://www.toptal.com/developers/gitignore/api/python,linux,macos,windows,vim,emacs,visualstudiocode,pycharm\n# Edit at https://www.toptal.com/developers/gitignore?templates=python,linux,macos,windows,vim,emacs,visualstudiocode,pycharm\n\nactivate*\nactivate/*\nkotaemon-env*\n.env\n\n### Emacs ###\n# -*- mode: gitignore; -*-\n*~\n\\#*\\#\n/.emacs.desktop\n/.emacs.desktop.lock\n*.elc\nauto-save-list\ntramp\n.\\#*\n\n# Org-mode\n.org-id-locations\n*_archive\n\n# flymake-mode\n*_flymake.*\n\n# eshell files\n/eshell/history\n/eshell/lastdir\n\n# elpa packages\n/elpa/\n\n# reftex files\n*.rel\n\n# AUCTeX auto folder\n/auto/\n\n# cask packages\n.cask/\ndist/\n\n# Flycheck\nflycheck_*.el\n\n# server auth directory\n/server/\n\n# projectiles files\n.projectile\n\n# directory configuration\n.dir-locals.el\n\n# network security\n/network-security.data\n\n### Linux ###\n\n# temporary files which can be created if a process still has a handle open of a deleted file\n.fuse_hidden*\n\n# KDE directory preferences\n.directory\n\n# Linux trash folder which might appear on any partition or disk\n.Trash-*\n\n# .nfs files are created when an open file is removed but is still being accessed\n.nfs*\n\n### macOS ###\n# General\n.DS_Store\n.AppleDouble\n.LSOverride\n\n# Icon must end with two \\r\nIcon\n\n# Thumbnails\n._*\n\n# Files that might appear in the root of a volume\n.DocumentRevisions-V100\n.fseventsd\n.Spotlight-V100\n.TemporaryItems\n.Trashes\n.VolumeIcon.icns\n.com.apple.timemachine.donotpresent\n\n# Directories potentially created on remote AFP share\n.AppleDB\n.AppleDesktop\nNetwork Trash Folder\nTemporary Items\n.apdisk\n\n### macOS Patch ###\n# iCloud generated files\n*.icloud\n\n### PyCharm ###\n# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider\n# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839\n\n# User-specific stuff\n.idea/**/workspace.xml\n.idea/**/tasks.xml\n.idea/**/usage.statistics.xml\n.idea/**/dictionaries\n.idea/**/shelf\n\n# AWS User-specific\n.idea/**/aws.xml\n\n# Generated files\n.idea/**/contentModel.xml\n\n# Sensitive or high-churn files\n.idea/**/dataSources/\n.idea/**/dataSources.ids\n.idea/**/dataSources.local.xml\n.idea/**/sqlDataSources.xml\n.idea/**/dynamic.xml\n.idea/**/uiDesigner.xml\n.idea/**/dbnavigator.xml\n\n# Gradle\n.idea/**/gradle.xml\n.idea/**/libraries\n\n# Gradle and Maven with auto-import\n# When using Gradle or Maven with auto-import, you should exclude module files,\n# since they will be recreated, and may cause churn.  Uncomment if using\n# auto-import.\n# .idea/artifacts\n# .idea/compiler.xml\n# .idea/jarRepositories.xml\n# .idea/modules.xml\n# .idea/*.iml\n# .idea/modules\n# *.iml\n# *.ipr\n\n# CMake\ncmake-build-*/\n\n# Mongo Explorer plugin\n.idea/**/mongoSettings.xml\n\n# File-based project format\n*.iws\n\n# IntelliJ\nout/\n\n# mpeltonen/sbt-idea plugin\n.idea_modules/\n\n# JIRA plugin\natlassian-ide-plugin.xml\n\n# Cursive Clojure plugin\n.idea/replstate.xml\n\n# SonarLint plugin\n.idea/sonarlint/\n\n# Crashlytics plugin (for Android Studio and IntelliJ)\ncom_crashlytics_export_strings.xml\ncrashlytics.properties\ncrashlytics-build.properties\nfabric.properties\n\n# Editor-based Rest Client\n.idea/httpRequests\n\n# Android studio 3.1+ serialized cache file\n.idea/caches/build_file_checksums.ser\n\n### PyCharm Patch ###\n# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721\n\n# *.iml\n# modules.xml\n# .idea/misc.xml\n# *.ipr\n\n# Sonarlint plugin\n# https://plugins.jetbrains.com/plugin/7973-sonarlint\n.idea/**/sonarlint/\n\n# SonarQube Plugin\n# https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin\n.idea/**/sonarIssues.xml\n\n# Markdown Navigator plugin\n# https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced\n.idea/**/markdown-navigator.xml\n.idea/**/markdown-navigator-enh.xml\n.idea/**/markdown-navigator/\n\n# Cache file creation bug\n# See https://youtrack.jetbrains.com/issue/JBR-2257\n.idea/$CACHE_FILE$\n\n# CodeStream plugin\n# https://plugins.jetbrains.com/plugin/12206-codestream\n.idea/codestream.xml\n\n# Azure Toolkit for IntelliJ plugin\n# https://plugins.jetbrains.com/plugin/8053-azure-toolkit-for-intellij\n.idea/**/azureSettings.xml\n\n### Python ###\n# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packaging\n.Python\nbuild/\ndevelop-eggs/\ndownloads/\neggs/\n.eggs/\nlib/\nlib64/\nparts/\nsdist/\nvar/\nwheels/\nshare/python-wheels/\n*.egg-info/\n.installed.cfg\n*.egg\nMANIFEST\n\n# PyInstaller\n#  Usually these files are written by a python script from a template\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\n*.manifest\n*.spec\n\n# Installer logs\npip-log.txt\npip-delete-this-directory.txt\n\n# Unit test / coverage reports\nhtmlcov/\n.tox/\n.nox/\n.coverage\n.coverage.*\n.cache\nnosetests.xml\ncoverage.xml\n*.cover\n*.py,cover\n.hypothesis/\n.pytest_cache/\ncover/\n\n# Translations\n*.mo\n*.pot\n\n# Django stuff:\n*.log\nlocal_settings.py\ndb.sqlite3\ndb.sqlite3-journal\n\n# Flask stuff:\ninstance/\n.webassets-cache\n\n# Scrapy stuff:\n.scrapy\n\n# Sphinx documentation\ndocs/_build/\n\n# PyBuilder\n.pybuilder/\ntarget/\n\n# Jupyter Notebook\n.ipynb_checkpoints\n\n# IPython\nprofile_default/\nipython_config.py\n\n# pyenv\n#   For a library or package, you might want to ignore these files since the code is\n#   intended to run in multiple environments; otherwise, check them in:\n# .python-version\n\n# pipenv\n#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.\n#   However, in case of collaboration, if having platform-specific dependencies or dependencies\n#   having no cross-platform support, pipenv may install dependencies that don't work, or not\n#   install all needed dependencies.\n#Pipfile.lock\n\n# poetry\n#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.\n#   This is especially recommended for binary packages to ensure reproducibility, and is more\n#   commonly ignored for libraries.\n#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control\n#poetry.lock\n\n# pdm\n#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.\n#pdm.lock\n#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it\n#   in version control.\n#   https://pdm.fming.dev/#use-with-ide\n.pdm.toml\n\n# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm\n__pypackages__/\n\n# Celery stuff\ncelerybeat-schedule\ncelerybeat.pid\n\n# SageMath parsed files\n*.sage.py\n\n# Environments\n.venv\nenv/\nvenv/\nENV/\nenv.bak/\nvenv.bak/\n\n# Spyder project settings\n.spyderproject\n.spyproject\n\n# Rope project settings\n.ropeproject\n\n# mkdocs documentation\n/site\n\n# mypy\n.mypy_cache/\n.dmypy.json\ndmypy.json\n\n# Pyre type checker\n.pyre/\n\n# pytype static type analyzer\n.pytype/\n\n# Cython debug symbols\ncython_debug/\n\n# PyCharm\n#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can\n#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore\n#  and can be added to the global gitignore or merged into this file.  For a more nuclear\n#  option (not recommended) you can uncomment the following to ignore the entire idea folder.\n#.idea/\n\n### Python Patch ###\n# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration\npoetry.toml\n\n# ruff\n.ruff_cache/\n\n# LSP config files\npyrightconfig.json\n\n### Vim ###\n# Swap\n[._]*.s[a-v][a-z]\n!*.svg # comment out if you don't need vector files\n[._]*.sw[a-p]\n[._]s[a-rt-v][a-z]\n[._]ss[a-gi-z]\n[._]sw[a-p]\n\n# Session\nSession.vim\nSessionx.vim\n\n# Temporary\n.netrwhist\n# Auto-generated tag files\ntags\n# Persistent undo\n[._]*.un~\n\n### VisualStudioCode ###\n.vscode/*\n!.vscode/settings.json\n!.vscode/tasks.json\n!.vscode/launch.json\n!.vscode/extensions.json\n!.vscode/*.code-snippets\n\n# Local History for Visual Studio Code\n.history/\n\n# Built Visual Studio Code Extensions\n*.vsix\n\n### VisualStudioCode Patch ###\n# Ignore all local history of files\n.history\n.ionide\n\n### Windows ###\n# Windows thumbnail cache files\nThumbs.db\nThumbs.db:encryptable\nehthumbs.db\nehthumbs_vista.db\n\n# Dump file\n*.stackdump\n\n# Folder config file\n[Dd]esktop.ini\n\n# Recycle Bin used on file shares\n$RECYCLE.BIN/\n\n# Windows Installer files\n*.cab\n*.msi\n*.msix\n*.msm\n*.msp\n\n# Windows shortcuts\n*.lnk\n\n# PDF files\n*.pdf\n!libs/kotaemon/tests/resources/*.pdf\n\n.theflow/\n\n# End of https://www.toptal.com/developers/gitignore/api/python,linux,macos,windows,vim,emacs,visualstudiocode,pycharm\n*.py[coid]\n\nlogs/\n.gitsecret/keys/random_seed\n!*.secret\n.envrc\n.env\n\nS.gpg-agent*\n.vscode/settings.json\nexamples/example1/assets\nstorage/*\n\n# Conda and env storages\n*install_dir/\ndoc_env/\n\n# application data\nktem_app_data/\ngradio_tmp/\n"
  },
  {
    "path": ".pre-commit-config.yaml",
    "content": "repos:\n  - repo: https://github.com/pre-commit/pre-commit-hooks\n    rev: v4.3.0\n    hooks:\n      - id: check-yaml\n        args: [\"--unsafe\"]\n      - id: check-toml\n      - id: end-of-file-fixer\n      - id: trailing-whitespace\n      - id: mixed-line-ending\n      - id: detect-aws-credentials\n        args: [\"--allow-missing-credentials\"]\n      - id: detect-private-key\n      - id: check-added-large-files\n        args: [\"--maxkb=750\"]\n      - id: debug-statements\n  - repo: https://github.com/ambv/black\n    rev: 22.3.0\n    hooks:\n      - id: black\n        language_version: python3\n  - repo: https://github.com/pycqa/isort\n    rev: 5.12.0\n    hooks:\n      - id: isort\n        args: [\"--profile\", \"black\"]\n        language_version: python3.10\n  - repo: https://github.com/pycqa/flake8\n    rev: 4.0.1\n    hooks:\n      - id: flake8\n        args: [\"--max-line-length\", \"88\", \"--extend-ignore\", \"E203\"]\n  - repo: https://github.com/myint/autoflake\n    rev: v1.4\n    hooks:\n      - id: autoflake\n        args:\n          [\n            \"--in-place\",\n            \"--remove-unused-variables\",\n            \"--remove-all-unused-imports\",\n            \"--ignore-init-module-imports\",\n            \"--exclude=tests/*\",\n          ]\n  - repo: https://github.com/pre-commit/mirrors-prettier\n    rev: v2.7.1\n    hooks:\n      - id: prettier\n        types_or: [markdown, yaml]\n  - repo: https://github.com/pre-commit/mirrors-mypy\n    rev: \"v1.7.1\"\n    hooks:\n      - id: mypy\n        additional_dependencies:\n          [\n            types-PyYAML==6.0.12.11,\n            \"types-requests\",\n            \"sqlmodel\",\n            \"types-Markdown\",\n            \"types-cachetools\",\n            types-tzlocal,\n          ]\n        args: [\"--check-untyped-defs\", \"--ignore-missing-imports\"]\n        exclude: \"^templates/\"\n  - repo: https://github.com/codespell-project/codespell\n    rev: v2.2.4\n    hooks:\n      - id: codespell\n        additional_dependencies:\n          - tomli\n"
  },
  {
    "path": ".python-version",
    "content": "3.10\n"
  },
  {
    "path": "CODE_OF_CONDUCT.md",
    "content": "# Contributor Covenant Code of Conduct\n\n## Our Pledge\n\nWe as members, contributors, and leaders pledge to make participation in our\ncommunity a harassment-free experience for everyone, regardless of age, body\nsize, visible or invisible disability, ethnicity, sex characteristics, gender\nidentity and expression, level of experience, education, socio-economic status,\nnationality, personal appearance, race, religion, or sexual identity\nand orientation.\n\nWe pledge to act and interact in ways that contribute to an open, welcoming,\ndiverse, inclusive, and healthy community.\n\n## Our Standards\n\nExamples of behavior that contributes to a positive environment for our\ncommunity include:\n\n- Demonstrating empathy and kindness toward other people\n- Being respectful of differing opinions, viewpoints, and experiences\n- Giving and gracefully accepting constructive feedback\n- Accepting responsibility and apologizing to those affected by our mistakes,\n  and learning from the experience\n- Focusing on what is best not just for us as individuals, but for the\n  overall community\n\nExamples of unacceptable behavior include:\n\n- The use of sexualized language or imagery, and sexual attention or\n  advances of any kind\n- Trolling, insulting or derogatory comments, and personal or political attacks\n- Public or private harassment\n- Publishing others' private information, such as a physical or email\n  address, without their explicit permission\n- Other conduct which could reasonably be considered inappropriate in a\n  professional setting\n\n## Enforcement Responsibilities\n\nCommunity leaders are responsible for clarifying and enforcing our standards of\nacceptable behavior and will take appropriate and fair corrective action in\nresponse to any behavior that they deem inappropriate, threatening, offensive,\nor harmful.\n\nCommunity leaders have the right and responsibility to remove, edit, or reject\ncomments, commits, code, wiki edits, issues, and other contributions that are\nnot aligned to this Code of Conduct, and will communicate reasons for moderation\ndecisions when appropriate.\n\n## Scope\n\nThis Code of Conduct applies within all community spaces, and also applies when\nan individual is officially representing the community in public spaces.\nExamples of representing our community include using an official e-mail address,\nposting via an official social media account, or acting as an appointed\nrepresentative at an online or offline event.\n\n## Enforcement\n\nInstances of abusive, harassing, or otherwise unacceptable behavior may be\nreported to the community leaders responsible for enforcement at\n.\nAll complaints will be reviewed and investigated promptly and fairly.\n\nAll community leaders are obligated to respect the privacy and security of the\nreporter of any incident.\n\n## Enforcement Guidelines\n\nCommunity leaders will follow these Community Impact Guidelines in determining\nthe consequences for any action they deem in violation of this Code of Conduct:\n\n### 1. Correction\n\n**Community Impact**: Use of inappropriate language or other behavior deemed\nunprofessional or unwelcome in the community.\n\n**Consequence**: A private, written warning from community leaders, providing\nclarity around the nature of the violation and an explanation of why the\nbehavior was inappropriate. A public apology may be requested.\n\n### 2. Warning\n\n**Community Impact**: A violation through a single incident or series\nof actions.\n\n**Consequence**: A warning with consequences for continued behavior. No\ninteraction with the people involved, including unsolicited interaction with\nthose enforcing the Code of Conduct, for a specified period of time. This\nincludes avoiding interactions in community spaces as well as external channels\nlike social media. Violating these terms may lead to a temporary or\npermanent ban.\n\n### 3. Temporary Ban\n\n**Community Impact**: A serious violation of community standards, including\nsustained inappropriate behavior.\n\n**Consequence**: A temporary ban from any sort of interaction or public\ncommunication with the community for a specified period of time. No public or\nprivate interaction with the people involved, including unsolicited interaction\nwith those enforcing the Code of Conduct, is allowed during this period.\nViolating these terms may lead to a permanent ban.\n\n### 4. Permanent Ban\n\n**Community Impact**: Demonstrating a pattern of violation of community\nstandards, including sustained inappropriate behavior, harassment of an\nindividual, or aggression toward or disparagement of classes of individuals.\n\n**Consequence**: A permanent ban from any sort of public interaction within\nthe community.\n\n## Attribution\n\nThis Code of Conduct is adapted from the [Contributor Covenant][homepage],\nversion 2.0, available at\nhttps://www.contributor-covenant.org/version/2/0/code_of_conduct.html.\n\nCommunity Impact Guidelines were inspired by [Mozilla's code of conduct\nenforcement ladder](https://github.com/mozilla/diversity).\n\n[homepage]: https://www.contributor-covenant.org\n\nFor answers to common questions about this code of conduct, see the FAQ at\nhttps://www.contributor-covenant.org/faq. Translations are available at\nhttps://www.contributor-covenant.org/translations.\n"
  },
  {
    "path": "CONTRIBUTING.md",
    "content": "# Contributing to Kotaemon\n\nWelcome 👋 to the Kotaemon project! We're thrilled that you're interested in contributing. Whether you're fixing bugs, adding new features, or improving documentation, your efforts are highly appreciated. This guide aims to help you get started with contributing to Kotaemon.\n\n<a href=\"https://github.com/Cinnamon/kotaemon/graphs/contributors\">\n  <img src=\"https://contrib.rocks/image?repo=Cinnamon/kotaemon\" />\n</a>\n\n### Table of Contents\n\n1. [📖 Code of Conduct](#code-of-conduct)\n2. [🔁 Contributing via Pull Requests](#contributing-via-pull-requests)\n3. [📥 Opening an Issue](#-opening-an-issue)\n4. [📝 Commit Messages](#-commit-messages)\n5. [🧾 License](#-license)\n\n## 📖 Code of Conduct\n\nPlease review our [code of conduct](./CODE_OF_CONDUCT.md), which is in effect at all times. We expect everyone who contributes to this project to honor it.\n\n## 🔁 Contributing via Pull Requests\n\n1. [**Fork the repository**](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo): Click on the [Fork](https://github.com/Cinnamon/kotaemon/fork) button on the repository's page to create a copy of Kotaemon under your GitHub account.\n\n2. [**Clone your code**](https://docs.github.com/en/repositories/creating-and-managing-repositories/cloning-a-repository): Clone your forked repository to your local machine.\n\n3. [**Create new branch**](https://docs.github.com/en/desktop/making-changes-in-a-branch/managing-branches-in-github-desktop): Create a new branch in your forked repo with a descriptive name that reflects your changes.\n\n```sh\ngit checkout -b descriptive-name-for-your-changes\n```\n\n4. **Setup the development environment**: If you are working on the code, make sure to install the necessary dependencies for development\n\n```sh\npip install -e \"libs/kotaemon[dev]\"\n```\n\n5. **Make your changes**: Ensure your code follows the project's coding style and passes all test cases.\n\n   - Check the coding style\n\n   ```sh\n   pre-commit run --all-files\n   ```\n\n   - Run the tests\n\n   ```sh\n   pytest libs/kotaemon/tests/\n   ```\n\n6. [**Commit your changes**](https://docs.github.com/en/desktop/making-changes-in-a-branch/committing-and-reviewing-changes-to-your-project-in-github-desktop): Once you are done with your changes, add and commit them with clear messages.\n\n```sh\ngit add your_changes.py\ngit commit -m \"clear message described your changes.\"\ngit push -u origin descriptive-name-for-your-changes\n```\n\n7. [**Create a pull request**](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request): When you are satisfied with your changes, [submit a pull request](https://github.com/Cinnamon/kotaemon/compare) from your forked repository to Kotaemon repository. In the pull request, provide a clear description of your changes and any related issues. For the title of the pull request, please refer to our [commit messages convention](#-commit-messages).\n\n8. **Wait for reviews**: Wait for the maintainers to review your pull request. If everything is okay, your changes will be merged into the Kotaemon project.\n\n### GitHub Actions CI Tests\n\nAll pull requests must pass the [GitHub Actions Continuous Integration (CI)](https://docs.github.com/en/actions/about-github-actions/about-continuous-integration-with-github-actions) tests before they can be merged. These tests include coding-style checks, PR title validation, unit tests, etc. to ensure that your changes meet the project's quality standards. Please review and fix any CI failures that arise.\n\n## 📥 Opening an Issue\n\nBefore [creating an issues](https://github.com/Cinnamon/kotaemon/issues/new/choose), search through existing issues to ensure you are not opening a duplicate. If you are reporting a bug or issue, please provide a reproducible example to help us quickly identify the problem.\n\n## 📝 Commit Messages\n\n### Overview\n\nWe use [Angular convention](https://www.conventionalcommits.org/en/) for commit messages to maintain consistency and clarity in our project history. Please take a moment to familiarize yourself with this convention before making your first commit.\n\n_For the sake of simplicity, we use [squashing merge](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/incorporating-changes-from-a-pull-request/about-pull-request-merges#squash-and-merge-your-commits) with pull requests. Therefore, if you contribute via a pull request, just make sure your PR's title, instead of the whole commits, follows this convention._\n\nCommit format:\n\n```sh\n<gitmoji> <type>(<scope>): <subject>\n<BLANK LINE>\n<body>\n<BLANK LINE>\n<footer>\n```\n\nExamples:\n\n```sh\ndocs(api): update api doc\n```\n\n### Commit types\n\n| Types      | Description                                                   |\n| :--------- | :------------------------------------------------------------ |\n| `feat`     | New features                                                  |\n| `fix`      | Bug fix                                                       |\n| `docs`     | Documentation only changes                                    |\n| `build`    | Changes that affect the build system or external dependencies |\n| `chore`    | Something that doesn’t fit the other types                    |\n| `ci`       | Changes to our CI configuration files and scripts             |\n| `perf`     | Improve performance                                           |\n| `refactor` | Refactor code                                                 |\n| `revert`   | Revert a previous commit                                      |\n| `style`    | Improve structure/format of the code                          |\n| `test`     | Add, update or pass tests                                     |\n\n## 🧾 License\n\nAll contributions will be licensed under the project's license: [Apache License 2.0](https://github.com/Cinnamon/kotaemon/blob/main/LICENSE.txt).\n"
  },
  {
    "path": "Dockerfile",
    "content": "# Lite version\nFROM python:3.10-slim AS lite\n\n# Common dependencies\nRUN apt-get update -qqy && \\\n    apt-get install -y --no-install-recommends \\\n        ssh \\\n        git \\\n        gcc \\\n        g++ \\\n        poppler-utils \\\n        libpoppler-dev \\\n        unzip \\\n        curl \\\n        cargo \\\n        && \\\n    apt-get autoremove && apt-get clean && rm -rf /var/lib/apt/lists/*\n\n# Setup args\nARG TARGETPLATFORM\nARG TARGETARCH\n\n# Set environment variables\nENV PYTHONDONTWRITEBYTECODE=1\nENV PYTHONUNBUFFERED=1\nENV PYTHONIOENCODING=UTF-8\nENV TARGETARCH=${TARGETARCH}\n\n# Create working directory\nWORKDIR /app\n\n# Download pdfjs\nCOPY scripts/download_pdfjs.sh /app/scripts/download_pdfjs.sh\nRUN chmod +x /app/scripts/download_pdfjs.sh\nENV PDFJS_PREBUILT_DIR=\"/app/libs/ktem/ktem/assets/prebuilt/pdfjs-dist\"\nRUN bash scripts/download_pdfjs.sh $PDFJS_PREBUILT_DIR\n\n# Install uv dependencies\nRUN pip install --no-cache-dir \"uv\"\n\n# Copy contents\nCOPY . /app\nCOPY launch.sh /app/launch.sh\nCOPY .env.example /app/.env\n\n# Install pip packages\nRUN --mount=type=ssh  \\\n    --mount=type=cache,target=/root/.cache/uv  \\\n    uv sync --frozen --no-cache \\\n    && uv pip install --python .venv \"pdfservices-sdk@git+https://github.com/niallcm/pdfservices-python-sdk.git@bump-and-unfreeze-requirements\"\n\nRUN --mount=type=ssh  \\\n    --mount=type=cache,target=/root/.cache/uv  \\\n    if [ \"$TARGETARCH\" = \"amd64\" ]; then uv pip install --python .venv \"graphrag<=0.3.6\" future; fi\n\nENTRYPOINT [\"sh\", \"/app/launch.sh\"]\n\n# Full version\nFROM lite AS full\n\n# Additional dependencies for full version\nRUN apt-get update -qqy && \\\n    apt-get install -y --no-install-recommends \\\n        tesseract-ocr \\\n        tesseract-ocr-jpn \\\n        libsm6 \\\n        libxext6 \\\n        libreoffice \\\n        ffmpeg \\\n        libmagic-dev \\\n        && \\\n    apt-get autoremove && apt-get clean && rm -rf /var/lib/apt/lists/*\n\n# Install torch and torchvision for unstructured\nRUN --mount=type=ssh  \\\n    --mount=type=cache,target=/root/.cache/uv  \\\n    uv pip install --python .venv torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu\n\n# Install additional pip packages\nRUN --mount=type=ssh  \\\n    --mount=type=cache,target=/root/.cache/uv  \\\n    uv pip install --python .venv \"libs/kotaemon[adv]\" \\\n    && uv pip install --python .venv unstructured[all-docs]\n\n# Install lightRAG\nENV USE_LIGHTRAG=true\nRUN --mount=type=ssh  \\\n    --mount=type=cache,target=/root/.cache/uv  \\\n    uv pip install --python .venv aioboto3 nano-vectordb ollama xxhash \"lightrag-hku<=1.3.0\"\n\nRUN --mount=type=ssh  \\\n    --mount=type=cache,target=/root/.cache/uv  \\\n    uv pip install --python .venv \"docling<=2.5.2\"\n\n# Download NLTK data from LlamaIndex\nRUN /app/.venv/bin/python -c \"from llama_index.core.readers.base import BaseReader\"\n\nENTRYPOINT [\"sh\", \"/app/launch.sh\"]\n\n# Ollama-bundled version\nFROM full AS ollama\n\n# Install ollama\nRUN curl -fsSL https://ollama.com/install.sh | sh\n\n# RUN nohup bash -c \"ollama serve &\" && sleep 4 && ollama pull qwen2.5:7b\nRUN nohup bash -c \"ollama serve &\" && sleep 4 && ollama pull nomic-embed-text\n\nENTRYPOINT [\"sh\", \"/app/launch.sh\"]\n"
  },
  {
    "path": "LICENSE.txt",
    "content": "                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"[]\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright [yyyy] [name of copyright owner]\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License.\n"
  },
  {
    "path": "README.md",
    "content": "<div align=\"center\">\n\n# kotaemon\n\nAn open-source clean & customizable RAG UI for chatting with your documents. Built with both end users and\ndevelopers in mind.\n\n![Preview](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/preview-graph.png)\n\n<a href=\"https://trendshift.io/repositories/11607\" target=\"_blank\"><img src=\"https://trendshift.io/api/badge/repositories/11607\" alt=\"Cinnamon%2Fkotaemon | Trendshift\" style=\"width: 250px; height: 55px;\" width=\"250\" height=\"55\"/></a>\n\n[Live Demo #1](https://huggingface.co/spaces/cin-model/kotaemon) |\n[Live Demo #2](https://huggingface.co/spaces/cin-model/kotaemon-demo) |\n[Online Install](https://cinnamon.github.io/kotaemon/online_install/) |\n[Colab Notebook (Local RAG)](https://colab.research.google.com/drive/1eTfieec_UOowNizTJA1NjawBJH9y_1nn)\n\n[User Guide](https://cinnamon.github.io/kotaemon/) |\n[Developer Guide](https://cinnamon.github.io/kotaemon/development/) |\n[Feedback](https://github.com/Cinnamon/kotaemon/issues) |\n[Contact](mailto:kotaemon.support@cinnamon.is)\n\n[![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/release/python-31013/)\n[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)\n<a href=\"https://github.com/Cinnamon/kotaemon/pkgs/container/kotaemon\" target=\"_blank\">\n<img src=\"https://img.shields.io/badge/docker_pull-kotaemon:latest-brightgreen\" alt=\"docker pull ghcr.io/cinnamon/kotaemon:latest\"></a>\n![download](https://img.shields.io/github/downloads/Cinnamon/kotaemon/total.svg?label=downloads&color=blue)\n<a href='https://huggingface.co/spaces/cin-model/kotaemon-demo'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue'></a>\n<a href=\"https://hellogithub.com/en/repository/d3141471a0244d5798bc654982b263eb\" target=\"_blank\"><img src=\"https://abroad.hellogithub.com/v1/widgets/recommend.svg?rid=d3141471a0244d5798bc654982b263eb&claim_uid=RLiD9UZ1rEHNaMf&theme=small\" alt=\"Featured｜HelloGitHub\" /></a>\n\n</div>\n\n<!-- start-intro -->\n\n## Introduction\n\nThis project serves as a functional RAG UI for both end users who want to do QA on their\ndocuments and developers who want to build their own RAG pipeline.\n<br>\n\n```yml\n+----------------------------------------------------------------------------+\n| End users: Those who use apps built with `kotaemon`.                       |\n| (You use an app like the one in the demo above)                            |\n|     +----------------------------------------------------------------+     |\n|     | Developers: Those who built with `kotaemon`.                   |     |\n|     | (You have `import kotaemon` somewhere in your project)         |     |\n|     |     +----------------------------------------------------+     |     |\n|     |     | Contributors: Those who make `kotaemon` better.    |     |     |\n|     |     | (You make PR to this repo)                         |     |     |\n|     |     +----------------------------------------------------+     |     |\n|     +----------------------------------------------------------------+     |\n+----------------------------------------------------------------------------+\n```\n\n### For end users\n\n- **Clean & Minimalistic UI**: A user-friendly interface for RAG-based QA.\n- **Support for Various LLMs**: Compatible with LLM API providers (OpenAI, AzureOpenAI, Cohere, etc.) and local LLMs (via `ollama` and `llama-cpp-python`).\n- **Easy Installation**: Simple scripts to get you started quickly.\n\n### For developers\n\n- **Framework for RAG Pipelines**: Tools to build your own RAG-based document QA pipeline.\n- **Customizable UI**: See your RAG pipeline in action with the provided UI, built with <a href='https://github.com/gradio-app/gradio'>Gradio <img src='https://img.shields.io/github/stars/gradio-app/gradio'></a>.\n- **Gradio Theme**: If you use Gradio for development, check out our theme here: [kotaemon-gradio-theme](https://github.com/lone17/kotaemon-gradio-theme).\n\n## Key Features\n\n- **Host your own document QA (RAG) web-UI**: Support multi-user login, organize your files in private/public collections, collaborate and share your favorite chat with others.\n\n- **Organize your LLM & Embedding models**: Support both local LLMs & popular API providers (OpenAI, Azure, Ollama, Groq).\n\n- **Hybrid RAG pipeline**: Sane default RAG pipeline with hybrid (full-text & vector) retriever and re-ranking to ensure best retrieval quality.\n\n- **Multi-modal QA support**: Perform Question Answering on multiple documents with figures and tables support. Support multi-modal document parsing (selectable options on UI).\n\n- **Advanced citations with document preview**: By default the system will provide detailed citations to ensure the correctness of LLM answers. View your citations (incl. relevant score) directly in the _in-browser PDF viewer_ with highlights. Warning when retrieval pipeline return low relevant articles.\n\n- **Support complex reasoning methods**: Use question decomposition to answer your complex/multi-hop question. Support agent-based reasoning with `ReAct`, `ReWOO` and other agents.\n\n- **Configurable settings UI**: You can adjust most important aspects of retrieval & generation process on the UI (incl. prompts).\n\n- **Extensible**: Being built on Gradio, you are free to customize or add any UI elements as you like. Also, we aim to support multiple strategies for document indexing & retrieval. `GraphRAG` indexing pipeline is provided as an example.\n\n![Preview](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/preview.png)\n\n## Installation\n\n> If you are not a developer and just want to use the app, please check out our easy-to-follow [User Guide](https://cinnamon.github.io/kotaemon/). Download the `.zip` file from the [latest release](https://github.com/Cinnamon/kotaemon/releases/latest) to get all the newest features and bug fixes.\n\n### System requirements\n\n1. [Python](https://www.python.org/downloads/) >= 3.10\n2. [Docker](https://www.docker.com/): optional, if you [install with Docker](#with-docker-recommended)\n3. [Unstructured](https://docs.unstructured.io/open-source/installation/full-installation#full-installation) if you want to process files other than `.pdf`, `.html`, `.mhtml`, and `.xlsx` documents. Installation steps differ depending on your operating system. Please visit the link and follow the specific instructions provided there.\n\n### With Docker (recommended)\n\n1. We support both `lite` & `full` version of Docker images. With `full` version, the extra packages of `unstructured` will be installed, which can support additional file types (`.doc`, `.docx`, ...) but the cost is larger docker image size. For most users, the `lite` image should work well in most cases.\n\n   - To use the `full` version.\n\n     ```bash\n     docker run \\\n     -e GRADIO_SERVER_NAME=0.0.0.0 \\\n     -e GRADIO_SERVER_PORT=7860 \\\n     -v ./ktem_app_data:/app/ktem_app_data \\\n     -p 7860:7860 -it --rm \\\n     ghcr.io/cinnamon/kotaemon:main-full\n     ```\n\n   - To use the `full` version with bundled **Ollama** for _local / private RAG_.\n\n     ```bash\n     # change image name to\n     docker run <...> ghcr.io/cinnamon/kotaemon:main-ollama\n     ```\n\n   - To use the `lite` version.\n\n   ```bash\n    # change image name to\n    docker run <...> ghcr.io/cinnamon/kotaemon:main-lite\n   ```\n\n2. We currently support and test two platforms: `linux/amd64` and `linux/arm64` (for newer Mac). You can specify the platform by passing `--platform` in the `docker run` command. For example:\n\n   ```bash\n   # To run docker with platform linux/arm64\n   docker run \\\n   -e GRADIO_SERVER_NAME=0.0.0.0 \\\n   -e GRADIO_SERVER_PORT=7860 \\\n   -v ./ktem_app_data:/app/ktem_app_data \\\n   -p 7860:7860 -it --rm \\\n   --platform linux/arm64 \\\n   ghcr.io/cinnamon/kotaemon:main-lite\n   ```\n\n3. Once everything is set up correctly, you can go to `http://localhost:7860/` to access the WebUI.\n\n4. We use [GHCR](https://docs.github.com/en/packages/working-with-a-github-packages-registry/working-with-the-container-registry) to store docker images, all images can be found [here.](https://github.com/Cinnamon/kotaemon/pkgs/container/kotaemon)\n\n### Without Docker\n\n#### Option 1: Using uv (Recommended for faster installation)\n\n1. Clone the repository and run the uv installation script:\n\n   ```shell\n   # clone this repo\n   git clone https://github.com/Cinnamon/kotaemon\n   cd kotaemon\n\n   # run the uv installation script (installs uv automatically if not present)\n   bash scripts/run_uv.sh\n   ```\n\n   This script will:\n\n   - Install uv package manager if not present\n   - Create a virtual environment with Python 3.10\n   - Install all dependencies using uv (significantly faster than conda/pip)\n   - Set up PDF.js viewer\n   - Launch the application\n\n#### Option 2: Using conda (Traditional method)\n\n1. Clone and install required packages on a fresh python environment.\n\n   ```shell\n   # optional (setup env)\n   conda create -n kotaemon python=3.10\n   conda activate kotaemon\n\n   # clone this repo\n   git clone https://github.com/Cinnamon/kotaemon\n   cd kotaemon\n\n   pip install -e \"libs/kotaemon[all]\"\n   pip install -e \"libs/ktem\"\n   ```\n\n2. Create a `.env` file in the root of this project. Use `.env.example` as a template\n\n   The `.env` file is there to serve use cases where users want to pre-config the models before starting up the app (e.g. deploy the app on HF hub). The file will only be used to populate the db once upon the first run, it will no longer be used in consequent runs.\n\n3. (Optional) To enable in-browser `PDF_JS` viewer, download [PDF_JS_DIST](https://github.com/mozilla/pdf.js/releases/download/v4.0.379/pdfjs-4.0.379-dist.zip) then extract it to `libs/ktem/ktem/assets/prebuilt`\n\n<img src=\"https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/pdf-viewer-setup.png\" alt=\"pdf-setup\" width=\"300\">\n\n4. Start the web server:\n\n   ```shell\n   python app.py\n   ```\n\n   - The app will be automatically launched in your browser.\n   - Default username and password are both `admin`. You can set up additional users directly through the UI.\n\n   ![Chat tab](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/chat-tab.png)\n\n5. Check the `Resources` tab and `LLMs and Embeddings` and ensure that your `api_key` value is set correctly from your `.env` file. If it is not set, you can set it there.\n\n### Setup GraphRAG\n\n> [!NOTE]\n> Official MS GraphRAG indexing only works with OpenAI or Ollama API.\n> We recommend most users to use NanoGraphRAG implementation for straightforward integration with Kotaemon.\n\n<details>\n\n<summary>Setup Nano GRAPHRAG</summary>\n\n- Install nano-GraphRAG: `pip install nano-graphrag`\n- `nano-graphrag` install might introduce version conflicts, see [this issue](https://github.com/Cinnamon/kotaemon/issues/440)\n  - To quickly fix: `pip uninstall hnswlib chroma-hnswlib && pip install chroma-hnswlib`\n- Launch Kotaemon with `USE_NANO_GRAPHRAG=true` environment variable.\n- Set your default LLM & Embedding models in Resources setting and it will be recognized automatically from NanoGraphRAG.\n\n</details>\n\n<details>\n\n<summary>Setup LIGHTRAG</summary>\n\n- Install LightRAG: `pip install git+https://github.com/HKUDS/LightRAG.git`\n- `LightRAG` install might introduce version conflicts, see [this issue](https://github.com/Cinnamon/kotaemon/issues/440)\n  - To quickly fix: `pip uninstall hnswlib chroma-hnswlib && pip install chroma-hnswlib`\n- Launch Kotaemon with `USE_LIGHTRAG=true` environment variable.\n- Set your default LLM & Embedding models in Resources setting and it will be recognized automatically from LightRAG.\n\n</details>\n\n<details>\n\n<summary>Setup MS GRAPHRAG</summary>\n\n- **Non-Docker Installation**: If you are not using Docker, install GraphRAG with the following command:\n\n  ```shell\n  pip install \"graphrag<=0.3.6\" future\n  ```\n\n- **Setting Up API KEY**: To use the GraphRAG retriever feature, ensure you set the `GRAPHRAG_API_KEY` environment variable. You can do this directly in your environment or by adding it to a `.env` file.\n- **Using Local Models and Custom Settings**: If you want to use GraphRAG with local models (like `Ollama`) or customize the default LLM and other configurations, set the `USE_CUSTOMIZED_GRAPHRAG_SETTING` environment variable to true. Then, adjust your settings in the `settings.yaml.example` file.\n\n</details>\n\n### Setup Local Models (for local/private RAG)\n\nSee [Local model setup](docs/local_model.md).\n\n### Setup multimodal document parsing (OCR, table parsing, figure extraction)\n\nThese options are available:\n\n- [Azure Document Intelligence (API)](https://azure.microsoft.com/en-us/products/ai-services/ai-document-intelligence)\n- [Adobe PDF Extract (API)](https://developer.adobe.com/document-services/docs/overview/pdf-extract-api/)\n- [Docling (local, open-source)](https://github.com/DS4SD/docling)\n  - To use Docling, first install required dependencies: `pip install docling`\n\nSelect corresponding loaders in `Settings -> Retrieval Settings -> File loader`\n\n### Customize your application\n\n- By default, all application data is stored in the `./ktem_app_data` folder. You can back up or copy this folder to transfer your installation to a new machine.\n\n- For advanced users or specific use cases, you can customize these files:\n\n  - `flowsettings.py`\n  - `.env`\n\n#### `flowsettings.py`\n\nThis file contains the configuration of your application. You can use the example\n[here](flowsettings.py) as the starting point.\n\n<details>\n\n<summary>Notable settings</summary>\n\n```python\n# setup your preferred document store (with full-text search capabilities)\nKH_DOCSTORE=(Elasticsearch | LanceDB | SimpleFileDocumentStore)\n\n# setup your preferred vectorstore (for vector-based search)\nKH_VECTORSTORE=(ChromaDB | LanceDB | InMemory | Milvus | Qdrant)\n\n# Enable / disable multimodal QA\nKH_REASONINGS_USE_MULTIMODAL=True\n\n# Setup your new reasoning pipeline or modify existing one.\nKH_REASONINGS = [\n    \"ktem.reasoning.simple.FullQAPipeline\",\n    \"ktem.reasoning.simple.FullDecomposeQAPipeline\",\n    \"ktem.reasoning.react.ReactAgentPipeline\",\n    \"ktem.reasoning.rewoo.RewooAgentPipeline\",\n]\n```\n\n</details>\n\n#### `.env`\n\nThis file provides another way to configure your models and credentials.\n\n<details>\n\n<summary>Configure model via the .env file</summary>\n\n- Alternatively, you can configure the models via the `.env` file with the information needed to connect to the LLMs. This file is located in the folder of the application. If you don't see it, you can create one.\n\n- Currently, the following providers are supported:\n\n  - **OpenAI**\n\n    In the `.env` file, set the `OPENAI_API_KEY` variable with your OpenAI API key in order\n    to enable access to OpenAI's models. There are other variables that can be modified,\n    please feel free to edit them to fit your case. Otherwise, the default parameter should\n    work for most people.\n\n    ```shell\n    OPENAI_API_BASE=https://api.openai.com/v1\n    OPENAI_API_KEY=<your OpenAI API key here>\n    OPENAI_CHAT_MODEL=gpt-3.5-turbo\n    OPENAI_EMBEDDINGS_MODEL=text-embedding-ada-002\n    ```\n\n  - **Azure OpenAI**\n\n    For OpenAI models via Azure platform, you need to provide your Azure endpoint and API\n    key. Your might also need to provide your developments' name for the chat model and the\n    embedding model depending on how you set up Azure development.\n\n    ```shell\n    AZURE_OPENAI_ENDPOINT=\n    AZURE_OPENAI_API_KEY=\n    OPENAI_API_VERSION=2024-02-15-preview\n    AZURE_OPENAI_CHAT_DEPLOYMENT=gpt-35-turbo\n    AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT=text-embedding-ada-002\n    ```\n\n  - **Local Models**\n\n    - Using `ollama` OpenAI compatible server:\n\n      - Install [ollama](https://github.com/ollama/ollama) and start the application.\n\n      - Pull your model, for example:\n\n        ```shell\n        ollama pull llama3.1:8b\n        ollama pull nomic-embed-text\n        ```\n\n      - Set the model names on web UI and make it as default:\n\n        ![Models](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/models.png)\n\n    - Using `GGUF` with `llama-cpp-python`\n\n      You can search and download a LLM to be ran locally from the [Hugging Face Hub](https://huggingface.co/models). Currently, these model formats are supported:\n\n      - GGUF\n\n        You should choose a model whose size is less than your device's memory and should leave\n        about 2 GB. For example, if you have 16 GB of RAM in total, of which 12 GB is available,\n        then you should choose a model that takes up at most 10 GB of RAM. Bigger models tend to\n        give better generation but also take more processing time.\n\n        Here are some recommendations and their size in memory:\n\n      - [Qwen1.5-1.8B-Chat-GGUF](https://huggingface.co/Qwen/Qwen1.5-1.8B-Chat-GGUF/resolve/main/qwen1_5-1_8b-chat-q8_0.gguf?download=true): around 2 GB\n\n        Add a new LlamaCpp model with the provided model name on the web UI.\n\n  </details>\n\n### Adding your own RAG pipeline\n\n#### Custom Reasoning Pipeline\n\n1. Check the default pipeline implementation in [here](libs/ktem/ktem/reasoning/simple.py). You can make quick adjustment to how the default QA pipeline work.\n2. Add new `.py` implementation in `libs/ktem/ktem/reasoning/` and later include it in `flowssettings` to enable it on the UI.\n\n#### Custom Indexing Pipeline\n\n- Check sample implementation in `libs/ktem/ktem/index/file/graph`\n\n> (more instruction WIP).\n\n<!-- end-intro -->\n\n## Citation\n\nPlease cite this project as\n\n```BibTeX\n@misc{kotaemon2024,\n    title = {Kotaemon - An open-source RAG-based tool for chatting with any content.},\n    author = {The Kotaemon Team},\n    year = {2024},\n    howpublished = {\\url{https://github.com/Cinnamon/kotaemon}},\n}\n```\n\n## Star History\n\n<a href=\"https://star-history.com/#Cinnamon/kotaemon&Date\">\n <picture>\n   <source media=\"(prefers-color-scheme: dark)\" srcset=\"https://api.star-history.com/svg?repos=Cinnamon/kotaemon&type=Date&theme=dark\" />\n   <source media=\"(prefers-color-scheme: light)\" srcset=\"https://api.star-history.com/svg?repos=Cinnamon/kotaemon&type=Date\" />\n   <img alt=\"Star History Chart\" src=\"https://api.star-history.com/svg?repos=Cinnamon/kotaemon&type=Date\" />\n </picture>\n</a>\n\n## Contribution\n\nSince our project is actively being developed, we greatly value your feedback and contributions. Please see our [Contributing Guide](https://github.com/Cinnamon/kotaemon/blob/main/CONTRIBUTING.md) to get started. Thank you to all our contributors!\n\n<a href=\"https://github.com/Cinnamon/kotaemon/graphs/contributors\">\n  <img src=\"https://contrib.rocks/image?repo=Cinnamon/kotaemon\" />\n</a>\n"
  },
  {
    "path": "app.py",
    "content": "import os\n\nfrom theflow.settings import settings as flowsettings\n\nKH_APP_DATA_DIR = getattr(flowsettings, \"KH_APP_DATA_DIR\", \".\")\nKH_GRADIO_SHARE = getattr(flowsettings, \"KH_GRADIO_SHARE\", False)\nGRADIO_TEMP_DIR = os.getenv(\"GRADIO_TEMP_DIR\", None)\n# override GRADIO_TEMP_DIR if it's not set\nif GRADIO_TEMP_DIR is None:\n    GRADIO_TEMP_DIR = os.path.join(KH_APP_DATA_DIR, \"gradio_tmp\")\n    os.environ[\"GRADIO_TEMP_DIR\"] = GRADIO_TEMP_DIR\n\n\nfrom ktem.main import App  # noqa\n\napp = App()\ndemo = app.make()\ndemo.queue().launch(\n    favicon_path=app._favicon,\n    inbrowser=True,\n    allowed_paths=[\n        \"libs/ktem/ktem/assets\",\n        GRADIO_TEMP_DIR,\n    ],\n    share=KH_GRADIO_SHARE,\n)\n"
  },
  {
    "path": "doc_env_reqs.txt",
    "content": "mkdocs\nmkdocstrings[python]\nmkdocs-material\nmkdocs-gen-files\nmkdocs-literate-nav\nmkdocs-git-revision-date-localized-plugin\nmkdocs-section-index\nmkdocs-include-markdown-plugin[cache]\nmdx_truly_sane_lists\n"
  },
  {
    "path": "docs/about.md",
    "content": "# About Kotaemon\n\nAn open-source tool for chatting with your documents. Built with both end users and\ndevelopers in mind.\n\n[Source Code](https://github.com/Cinnamon/kotaemon) |\n[HF Space](https://huggingface.co/spaces/cin-model/kotaemon-demo)\n\n[Installation Guide](https://cinnamon.github.io/kotaemon/) |\n[Developer Guide](https://cinnamon.github.io/kotaemon/development/) |\n[Feedback](https://github.com/Cinnamon/kotaemon/issues)\n"
  },
  {
    "path": "docs/development/contributing.md",
    "content": "# Contributing\n\n## Setting up\n\n- Clone the repo\n\n  ```shell\n  git clone git@github.com:Cinnamon/kotaemon.git\n  cd kotaemon\n  ```\n\n- Install the environment\n\n  - Create a conda environment (python >= 3.10 is recommended)\n\n    ```shell\n    conda create -n kotaemon python=3.10\n    conda activate kotaemon\n\n    # install dependencies\n    cd libs/kotaemon\n    pip install -e \".[all]\"\n    ```\n\n  - Or run the installer (one of the `scripts/run_*` scripts depends on your OS), then\n    you will have all the dependencies installed as a conda environment at\n    `install_dir/env`.\n\n    ```shell\n    conda activate install_dir/env\n    ```\n\n- Pre-commit\n\n  ```shell\n  pre-commit install\n  ```\n\n- Test\n\n  ```shell\n  pytest tests\n  ```\n\n## Package overview\n\n`kotaemon` library focuses on the AI building blocks to implement a RAG-based QA application. It consists of base interfaces, core components and a list of utilities:\n\n- Base interfaces: `kotaemon` defines the base interface of a component in a pipeline. A pipeline is also a component. By clearly define this interface, a pipeline of steps can be easily constructed and orchestrated.\n- Core components: `kotaemon` implements (or wraps 3rd-party libraries\n  like Langchain, llama-index,... when possible) commonly used components in\n  kotaemon use cases. Some of these components are: LLM, vector store,\n  document store, retriever... For a detailed list and description of these\n  components, please refer to the [API Reference](../reference/Summary.md) section.\n- List of utilities: `kotaemon` provides utilities and tools that are\n  usually needed in client project. For example, it provides a prompt\n  engineering UI for AI developers in a project to quickly create a prompt\n  engineering tool for DMs and QALs. It also provides a command to quickly spin\n  up a project code base. For a full list and description of these utilities,\n  please refer to the [Utilities](utilities.md) section.\n\n```mermaid\nmindmap\n  root((kotaemon))\n    Base Interfaces\n      Document\n      LLMInterface\n      RetrievedDocument\n      BaseEmbeddings\n      BaseChat\n      BaseCompletion\n      ...\n    Core Components\n      LLMs\n        AzureOpenAI\n        OpenAI\n      Embeddings\n        AzureOpenAI\n        OpenAI\n        HuggingFaceEmbedding\n      VectorStore\n        InMemoryVectorstore\n        ChromaVectorstore\n      Agent\n      Tool\n      DocumentStore\n      ...\n    Utilities\n      Scaffold project\n      PromptUI\n      Documentation Support\n```\n\n## Common conventions\n\n- PR title: One-line description (example: Feat: Declare BaseComponent and decide LLM call interface).\n- [Encouraged] Provide a quick description in the PR, so that:\n  - Reviewers can quickly understand the direction of the PR.\n  - It will be included in the commit message when the PR is merged.\n\n## Environment caching on PR\n\n- To speed up CI, environments are cached based on the version specified in `__init__.py`.\n- Since dependencies versions in `setup.py` are not pinned, you need to pump the version in order to use a new environment. That environment will then be cached and used by your subsequence commits within the PR, until you pump the version again\n- The new environment created during your PR is cached and will be available to others once the PR is merged.\n- If you are experimenting with new dependencies and want a fresh environment every time, add `[ignore cache]` in your commit message. The CI will create a fresh environment to run your commit and then discard it.\n- If your PR include updated dependencies, the recommended workflow would be:\n  - Doing development as usual.\n  - When you want to run the CI, push a commit with the message containing `[ignore cache]`.\n  - Once the PR is final, pump the version in `__init__.py` and push a final commit not containing `[ignore cache]`.\n\n## Merge PR guideline\n\n- Use squash and merge option\n- 1st line message is the PR title.\n- The text area is the PR description.\n"
  },
  {
    "path": "docs/development/create-a-component.md",
    "content": "# Creating a component\n\nA fundamental concept in kotaemon is \"component\".\n\nAnything that isn't data or data structure is a \"component\". A component can be\nthought of as a step within a pipeline. It takes in some input, processes it,\nand returns an output, just the same as a Python function! The output will then\nbecome an input for the next component in a pipeline. In fact, a pipeline is just\na component. More appropriately, a nested component: a component that makes use of one or more other components in\nthe processing step. So in reality, there isn't a difference between a pipeline\nand a component! Because of that, in kotaemon, we will consider them the\nsame as \"component\".\n\nTo define a component, you will:\n\n1. Create a class that subclasses from `kotaemon.base.BaseComponent`\n2. Declare init params with type annotation\n3. Declare nodes (nodes are just other components!) with type annotation\n4. Implement the processing logic in `run`.\n\nThe syntax of a component is as follow:\n\n```python\nfrom kotaemon.base import BaseComponent\nfrom kotaemon.llms import LCAzureChatOpenAI\nfrom kotaemon.parsers import RegexExtractor\n\n\nclass FancyPipeline(BaseComponent):\n    param1: str = \"This is param1\"\n    param2: int = 10\n    param3: float\n\n    node1: BaseComponent    # this is a node because of BaseComponent type annotation\n    node2: LCAzureChatOpenAI  # this is also a node because LCAzureChatOpenAI subclasses BaseComponent\n    node3: RegexExtractor   # this is also a node bceause RegexExtractor subclasses BaseComponent\n\n    def run(self, some_text: str):\n        prompt = (self.param1 + some_text) * int(self.param2 + self.param3)\n        llm_pred = self.node2(prompt).text\n        matches = self.node3(llm_pred)\n        return matches\n```\n\nThen this component can be used as follow:\n\n```python\nllm = LCAzureChatOpenAI(endpoint=\"some-endpont\")\nextractor = RegexExtractor(pattern=[\"yes\", \"Yes\"])\n\ncomponent = FancyPipeline(\n    param1=\"Hello\"\n    param3=1.5\n    node1=llm,\n    node2=llm,\n    node3=extractor\n)\ncomponent(\"goodbye\")\n```\n\nThis way, we can define each operation as a reusable component, and use them to\ncompose larger reusable components!\n\n## Benefits of component\n\nBy defining a component as above, we formally encapsulate all the necessary\ninformation inside a single class. This introduces several benefits:\n\n1. Allow tools like promptui to inspect the inner working of a component in\n   order to automatically generate the promptui.\n2. Allow visualizing a pipeline for debugging purpose.\n"
  },
  {
    "path": "docs/development/data-components.md",
    "content": "# Data & Data Structure Components\n\nThe data & data structure components include:\n\n- The `Document` class.\n- The document store.\n- The vector store.\n\n## Data Loader\n\n- PdfLoader\n- Layout-aware with table parsing PdfLoader\n\n  - MathPixLoader: To use this loader, you need MathPix API key, refer to [mathpix docs](https://docs.mathpix.com/#introduction) for more information\n  - OCRLoader: This loader uses lib-table and Flax pipeline to perform OCR and read table structure from PDF file (TODO: add more info about deployment of this module).\n  - Output:\n\n    - Document: text + metadata to identify whether it is table or not\n\n      ```\n      - \"source\": source file name\n      - \"type\": \"table\" or \"text\"\n      - \"table_origin\": original table in markdown format (to be feed to LLM or visualize using external tools)\n      - \"page_label\": page number in the original PDF document\n      ```\n\n## Document Store\n\n- InMemoryDocumentStore\n\n## Vector Store\n\n- ChromaVectorStore\n- InMemoryVectorStore\n"
  },
  {
    "path": "docs/development/index.md",
    "content": "{%\n    include-markdown \"../../README.md\"\n    start=\"<!-- start-intro -->\"\n    end=\"<!-- end-intro -->\"\n%}\n"
  },
  {
    "path": "docs/development/utilities.md",
    "content": "# Utilities\n\n## Prompt engineering UI\n\n![chat-ui](images/271332562-ac8f9aac-d853-4571-a48b-d866a99eaf3e.png)\n\n**_Important:_** despite the name prompt engineering UI, this tool allows testers to test any kind of parameters that are exposed by developers. Prompt is one kind of param. There can be other type of params that testers can tweak (e.g. top_k, temperature...).\n\nIn the development process, developers typically build the pipeline. However, for use\ncases requiring expertise in prompt creation, non-technical members (testers, domain experts) can be more\neffective. To facilitate this, `kotaemon` offers a user-friendly prompt engineering UI\nthat developers integrate into their pipelines. This enables non-technical members to\nadjust prompts and parameters, run experiments, and export results for optimization.\n\nAs of Sept 2023, there are 2 kinds of prompt engineering UI:\n\n- Simple pipeline: run one-way from start to finish.\n- Chat pipeline: interactive back-and-forth.\n\n### Simple pipeline\n\nFor simple pipeline, the supported client project workflow looks as follow:\n\n1. [tech] Build pipeline\n2. [tech] Export pipeline to config: `$ kotaemon promptui export <module.path.piplineclass> --output <path/to/config/file.yml>`\n3. [tech] Customize the config\n4. [tech] Spin up prompt engineering UI: `$ kotaemon promptui run <path/to/config/file.yml>`\n5. [non-tech] Change params, run inference\n6. [non-tech] Export to Excel\n7. [non-tech] Select the set of params that achieve the best output\n\nThe prompt engineering UI prominently involves from step 2 to step 7 (step 1 is normally\ndone by the developers, while step 7 happens exclusively in Excel file).\n\n#### Step 2 - Export pipeline to config\n\nCommand:\n\n```shell\n$ kotaemon promptui export <module.path.piplineclass> --output <path/to/config/file.yml>\n```\n\nwhere:\n\n- `<module.path.pipelineclass>` is a dot-separated path to the pipeline. For example, if your pipeline can be accessed with `from projectA.pipelines import AnsweringPipeline`, then this value is `projectA.pipelines.AnswerPipeline`.\n- `<path/to/config/file.yml>` is the target file path that the config will be exported to. If the config file already exists, and contains information of other pipelines, the config of current pipeline will additionally be added. If it contains information of the current pipeline (in the past), the old information will be replaced.\n\nBy default, all params in a pipeline (including nested params) will be export to the configuration file. For params that you do not wish to expose to the UI, you can directly remove them from the config YAML file. You can also annotate those param with `ignore_ui=True`, and they will be ignored in the config generation process. Example:\n\n```python\nclass Pipeline(BaseComponent):\n    param1: str = Param(default=\"hello\")\n    param2: str = Param(default=\"goodbye\", ignore_ui=True)\n```\n\nDeclared as above, and `param1` will show up in the config YAML file, while `param2` will not.\n\n#### Step 3 - Customize the config\n\ndevelopers can further edit the config file in this step to get the most suitable UI (step 4) with their tasks. The exported config will have this overall schema:\n\n```yml\n<module.path.pipelineclass1>:\n  params: ... (Detail param information to initiate a pipeline. This corresponds to the pipeline init parameters.)\n  inputs: ... (Detail the input of the pipeline e.g. a text prompt. This corresponds to the params of `run(...)` method.)\n  outputs: ... (Detail the output of the pipeline e.g. prediction, accuracy... This is the output information we wish to see in the UI.)\n  logs: ... (Detail what information should show up in the log.)\n```\n\n##### Input and params\n\nThe inputs section have the overall schema as follow:\n\n```yml\ninputs:\n  <input-variable-name-1>:\n    component: <supported-UI-component>\n    params: # this section is optional)\n      value: <default-value>\n  <input-variable-name-2>: ... # similar to above\nparams:\n  <param-variable-name-1>: ... # similar to those in the inputs\n```\n\nThe list of supported prompt UI and their corresponding gradio UI components:\n\n```python\nCOMPONENTS_CLASS = {\n    \"text\": gr.components.Textbox,\n    \"checkbox\": gr.components.CheckboxGroup,\n    \"dropdown\": gr.components.Dropdown,\n    \"file\": gr.components.File,\n    \"image\": gr.components.Image,\n    \"number\": gr.components.Number,\n    \"radio\": gr.components.Radio,\n    \"slider\": gr.components.Slider,\n}\n```\n\n##### Outputs\n\nThe outputs are a list of variables that we wish to show in the UI. Since in Python, the function output doesn't have variable name, so output declaration is a little bit different than input and param declaration:\n\n```yml\noutputs:\n  - component: <supported-UI-component>\n    step: <name-of-pipeline-step>\n    item: <jsonpath way to retrieve the info>\n  - ... # similar to above\n```\n\nwhere:\n\n- component: the same text string and corresponding Gradio UI as in inputs & params\n- step: the pipeline step that we wish to look fetch and show output on the UI\n- item: the jsonpath mechanism to get the targeted variable from the step above\n\n##### Logs\n\nThe logs show a list of sheetname and how to retrieve the desired information.\n\n```yml\nlogs:\n  <logname>:\n    inputs:\n      - name: <column name>\n        step: <the pipeline step that we would wish to see the input>\n        variable: <the variable in the step>\n      - ...\n    outputs:\n      - name: <column name>\n        step: <the pipeline step that we would wish to see the output>\n        item: <how to retrieve the output of that step>\n```\n\n#### Step 4 + 5 - Spin up prompt engineering UI + Perform prompt engineering\n\nCommand:\n\n```shell\n$ kotaemon promptui run <path/to/config/file.yml>\n```\n\nThis will generate an UI as follow:\n\n![Screenshot from 2023-09-20 12-20-31](images/269170198-9ac1b95a-b667-42e7-b318-98a1b805d6df.png)\n\nwhere:\n\n- The tabs at the top of the UI corresponds to the pipeline to do prompt engineering.\n- The inputs and params tabs allow users to edit (these corresponds to the inputs and params in the config file).\n- The outputs panel holds the UI elements to show the outputs defined in config file.\n- The Run button: will execute pipeline with the supplied inputs and params, and render result in the outputs panel.\n- The Export button: will export the logs of all the run to an Excel files users to inspect for best set of params.\n\n#### Step 6 - Export to Excel\n\nUpon clicking export, the users can download Excel file.\n\n### Chat pipeline\n\nChat pipeline workflow is different from simple pipeline workflow. In simple pipeline, each Run creates a set of output, input and params for users to compare. In chat pipeline, each Run is not a one-off run, but a long interactive session. Hence, the workflow is as follow:\n\n1. Set the desired parameters.\n2. Click \"New chat\" to start a chat session with the supplied parameters. This set of parameters will persist until the end of the chat session. During an ongoing chat session, changing the parameters will not take any effect.\n3. Chat and interact with the chat bot on the right panel. You can add any additional input (if any), and they will be supplied to the chatbot.\n4. During chat, the log of the chat will show up in the \"Output\" tabs. This is empty by default, so if you want to show the log here, tell the AI developers to configure the UI settings.\n5. When finishing chat, select your preference in the radio box. Click \"End chat\". This will save the chat log and the preference to disk.\n6. To compare the result of different run, click \"Export\" to get an Excel spreadsheet summary of different run.\n"
  },
  {
    "path": "docs/extra/css/code_select.css",
    "content": ".language-pycon .gp,\n.language-pycon .go {\n  /* Generic.Prompt,  Generic.Output */\n  user-select: none;\n}\n"
  },
  {
    "path": "docs/index.md",
    "content": "# Getting Started with Kotaemon\n\n![type:video](https://github.com/Cinnamon/kotaemon/assets/25688648/815ecf68-3a02-4914-a0dd-3f8ec7e75cd9)\n\nThis page is intended for **end users** who want to use the `kotaemon` tool for Question\nAnswering on local documents. If you are a **developer** who wants contribute to the project, please visit the [development](development/index.md) page.\n\n## Installation (Online HuggingFace Space) - easy (10 mins)\n\nVisit this [guide](online_install.md).\n\n## Installation (Offline) - intermediate (20 mins)\n\n### Download\n\nDownload the `kotaemon-app.zip` file from the [latest release](https://github.com/Cinnamon/kotaemon/releases/latest/).\n\n### Run setup script\n\n0. Unzip the downloaded file.\n1. Navigate to the `scripts` folder and start an installer that matches your OS:\n   - Windows: `run_windows.bat`. Just double click the file.\n   - macOS: `run_macos.sh`\n     1. Right click on your file and select Open with and Other.\n     2. Enable All Applications and choose Terminal.\n     3. NOTE: If you always want to open that file with Terminal, then check Always Open With.\n     4. From now on, double click on your file and it should work.\n   - Linux: `run_linux.sh`. Please run the script using `bash run_linux.sh` in your terminal.\n2. After the installation, the installer will ask to launch the ktem's UI, answer to continue.\n3. If launched, the application will be open automatically in your browser.\n4. Default login information is: `username: admin / password: admin`. You should change this credential right after the first login on the UI.\n\n## Launch\n\nTo launch the app after initial setup or any change, simply run the `run_*` script again.\n\nA browser window will be opened and greets you with this screen:\n\n![Chat tab](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/chat-tab.png)\n\n## Usage\n\nFor how to use the application, see [Usage](usage.md). This page will also be available to\nyou within the application.\n\n## Feedback\n\nFeel free to create a bug report or a feature request on our [repo](https://github.com/Cinnamon/kotaemon/issues).\n"
  },
  {
    "path": "docs/local_model.md",
    "content": "# Setup local LLMs & Embedding models\n\n## Prepare local models\n\n#### NOTE\n\nIn the case of using Docker image, please replace `http://localhost` with `http://host.docker.internal` to correctly communicate with service on the host machine. See [more detail](https://stackoverflow.com/questions/31324981/how-to-access-host-port-from-docker-container).\n\n### Ollama OpenAI compatible server (recommended)\n\nInstall [ollama](https://github.com/ollama/ollama) and start the application.\n\nPull your model (e.g):\n\n```\nollama pull llama3.1:8b\nollama pull nomic-embed-text\n```\n\nSetup LLM and Embedding model on Resources tab with type OpenAI. Set these model parameters to connect to Ollama:\n\n```\napi_key: ollama\nbase_url: http://localhost:11434/v1/\nmodel: gemma2:2b (for llm) | nomic-embed-text (for embedding)\n```\n\n![Models](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/models.png)\n\n### oobabooga/text-generation-webui OpenAI compatible server\n\nInstall [oobabooga/text-generation-webui](https://github.com/oobabooga/text-generation-webui/).\n\nFollow the setup guide to download your models (GGUF, HF).\nAlso take a look at [OpenAI compatible server](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API) for detail instructions.\n\nHere is a short version\n\n```\n# install sentence-transformer for embeddings creation\npip install sentence_transformers\n# change to text-generation-webui src dir\npython server.py --api\n```\n\nUse the `Models` tab to download new model and press Load.\n\nSetup LLM and Embedding model on Resources tab with type OpenAI. Set these model parameters to connect to `text-generation-webui`:\n\n```\napi_key: dummy\nbase_url: http://localhost:5000/v1/\nmodel: any\n```\n\n### llama-cpp-python server (LLM only)\n\nSee [llama-cpp-python OpenAI server](https://llama-cpp-python.readthedocs.io/en/latest/server/).\n\nDownload any GGUF model weight on HuggingFace or other source. Place it somewhere on your local machine.\n\nRun\n\n```\nLOCAL_MODEL=<path/to/GGUF> python scripts/serve_local.py\n```\n\nSetup LLM model on Resources tab with type OpenAI. Set these model parameters to connect to `llama-cpp-python`:\n\n```\napi_key: dummy\nbase_url: http://localhost:8000/v1/\nmodel: model_name\n```\n\n## Use local models for RAG\n\n- Set default LLM and Embedding model to a local variant.\n\n![Models](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/llm-default.png)\n\n- Set embedding model for the File Collection to a local model (e.g: `ollama`)\n\n![Index](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/index-embedding.png)\n\n- Go to Retrieval settings and choose LLM relevant scoring model as a local model (e.g: `ollama`). Or, you can choose to disable this feature if your machine cannot handle a lot of parallel LLM requests at the same time.\n\n![Settings](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/retrieval-setting.png)\n\nYou are set! Start a new conversation to test your local RAG pipeline.\n"
  },
  {
    "path": "docs/online_install.md",
    "content": "## Installation (Online HuggingFace Space)\n\n1. Go to [HF kotaemon_template](https://huggingface.co/spaces/cin-model/kotaemon_template).\n2. Use Duplicate function to create your own space. Or use this [direct link](https://huggingface.co/spaces/cin-model/kotaemon_template?duplicate=true).\n   ![Duplicate space](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/duplicate_space.png)\n   ![Change space params](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/change_space_params.png)\n3. Wait for the build to complete and start up (apprx 10 mins).\n   ![Wait space build](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/space_build.png)\n   ![Close space build](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/close_logs_space.png)\n4. Follow the first setup instructions (and register for Cohere API key if needed).\n   ![Cohere API](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/cohere_api_key.png)\n5. Complete the setup and use your own private space!\n   ![App Startup](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/initial_startup.png)\n"
  },
  {
    "path": "docs/pages/app/customize-flows.md",
    "content": "# Add new indexing and reasoning pipeline to the application\n\n@trducng\n\nAt high level, to add new indexing and reasoning pipeline:\n\n1. You define your indexing or reasoning pipeline as a class from\n   `BaseComponent`.\n2. You declare that class in the setting files `flowsettings.py`.\n\nThen when `python app.py`, the application will dynamically load those\npipelines.\n\nThe below sections talk in more detail about how the pipelines should be\nconstructed.\n\n## Define a pipeline as a class\n\nIn essence, a pipeline will subclass from `kotaemon.base.BaseComponent`.\nEach pipeline has 2 main parts:\n\n- All declared arguments and sub-pipelines.\n- The logic inside the pipeline.\n\nAn example pipeline:\n\n```python\nfrom kotaemon.base import BaseComponent\n\n\nclass SoSimple(BaseComponent):\n    arg1: int\n    arg2: str\n\n    def run(self, arg3: str):\n        return self.arg1 * self.arg2 + arg3\n```\n\nThis pipeline is simple for demonstration purpose, but we can imagine pipelines\nwith much more arguments, that can take other pipelines as arguments, and have\nmore complicated logic in the `run` method.\n\n**_An indexing or reasoning pipeline is just a class subclass from\n`BaseComponent` like above._**\n\nFor more detail on this topic, please refer to [Creating a\nComponent](/create-a-component/)\n\n## Run signatures\n\n**Note**: this section is tentative at the moment. We will finalize `def run`\nfunction signature by latest early April.\n\nThe indexing pipeline:\n\n```python\n    def run(\n        self,\n        file_paths: str | Path | list[str | Path],\n        reindex: bool = False,\n        **kwargs,\n    ):\n        \"\"\"Index files to intermediate representation (e.g. vector, database...)\n\n        Args:\n            file_paths: the list of paths to files\n            reindex: if True, files in `file_paths` that already exists in database\n                should be reindex.\n        \"\"\"\n```\n\nThe reasoning pipeline:\n\n```python\n    def run(self, question: str, history: list, **kwargs) -> Document:\n        \"\"\"Answer the question\n\n        Args:\n            question: the user input\n            history: the chat history [(user_msg1, bot_msg1), (user_msg2, bot_msg2)...]\n\n        Returns:\n            kotaemon.base.Document: the final answer\n        \"\"\"\n```\n\n## Register your pipeline to ktem\n\nTo register your pipelines to ktem, you declare it in the `flowsettings.py`\nfile. This file locates at the current working directory where you start the\nktem. In most use cases, it is this\n[one](https://github.com/Cinnamon/kotaemon/blob/main/flowsettings.py).\n\n```python\nKH_REASONING = [\"<python.module.path.to.the.reasoning.class>\"]\n\nKH_INDEX = \"<python.module.path.to.the.indexing.class>\"\n```\n\nYou can register multiple reasoning pipelines to ktem by populating the\n`KH_REASONING` list. The user can select which reasoning pipeline to use\nin their Settings page.\n\nFor now, there's only one supported index option for `KH_INDEX`.\n\nMake sure that your class is discoverable by Python.\n\n## Allow users to customize your pipeline in the app settings\n\nTo allow the users to configure your pipeline, you need to declare what you\nallow the users to configure as a dictionary. `ktem` will include them into the\napplication settings.\n\nIn your pipeline class, add a classmethod `get_user_settings` that returns a\nsetting dictionary, add a classmethod `get_info` that returns an info\ndictionary. Example:\n\n```python\nclass SoSimple(BaseComponent):\n\n    ... # as above\n\n    @classmethod\n    def get_user_settings(cls) -> dict:\n        \"\"\"The settings to the user\"\"\"\n        return {\n            \"setting_1\": {\n                \"name\": \"Human-friendly name\",\n                \"value\": \"Default value\",\n                \"choices\": [(\"Human-friendly Choice 1\", \"choice1-id\"), (\"HFC 2\", \"choice2-id\")], # optional\n                \"component\": \"Which Gradio UI component to render, can be: text, number, checkbox, dropdown, radio, checkboxgroup\"\n            },\n            \"setting_2\": {\n                # follow the same rule as above\n            }\n        }\n\n    @classmethod\n    def get_info(cls) -> dict:\n        \"\"\"Pipeline information for bookkeeping purpose\"\"\"\n        return {\n            \"id\": \"a unique id to differentiate this pipeline from other pipeline\",\n            \"name\": \"Human-friendly name of the pipeline\",\n            \"description\": \"Can be a short description of this pipeline\"\n        }\n```\n\nOnce adding these methods to your pipeline class, `ktem` will automatically\nextract and add them to the settings.\n\n## Construct to pipeline object\n\nOnce `ktem` runs your pipeline, it will call your classmethod `get_pipeline`\nwith the full user settings and expect to obtain the pipeline object. Within\nthis `get_pipeline` method, you implement all the necessary logics to initiate\nthe pipeline object. Example:\n\n```python\nclass SoSimple(BaseComponent):\n    ... # as above\n\n    @classmethod\n    def get_pipeline(self, setting):\n        obj = cls(arg1=setting[\"reasoning.id.setting1\"])\n        return obj\n```\n\n## Reasoning: Stream output to UI\n\nFor fast user experience, you can stream the output directly to UI. This way,\nuser can start observing the output as soon as the LLM model generates the 1st\ntoken, rather than having to wait the pipeline finishes to read the whole message.\n\nTo stream the output, you need to;\n\n1. Turn the `run` function to async.\n2. Pass in the output to a special queue with `self.report_output`.\n\n```python\n\n    async def run(self, question: str, history: list, **kwargs) -> Document:\n        for char in \"This is a long messages\":\n            self.report_output({\"output\": text.text})\n```\n\nThe argument to `self.report_output` is a dictionary, that contains either or\nall of these 2 keys: \"output\", \"evidence\". The \"output\" string will be streamed\nto the chat message, and the \"evidence\" string will be streamed to the\ninformation panel.\n\n## Access application LLMs, Embeddings\n\nYou can access users' collections of LLMs and embedding models with:\n\n```python\nfrom ktem.embeddings.manager import embeddings\nfrom ktem.llms.manager import llms\n\n\nllm = llms.get_default()\nembedding_model = embeddings.get_default()\n```\n\nYou can also allow the users to specifically select which llms or embedding\nmodels they want to use through the settings.\n\n```python\n    @classmethod\n    def get_user_settings(cls) -> dict:\n        from ktem.llms.manager import llms\n\n        return {\n            \"citation_llm\": {\n                \"name\": \"LLM for citation\",\n                \"value\": llms.get_default(),\n                \"component: \"dropdown\",\n                \"choices\": list(llms.options().keys()),\n            },\n            ...\n        }\n```\n\n## Optional: Access application data\n\nYou can access the user's application database, vector store as follow:\n\n```python\n# get the database that contains the source files\nfrom ktem.db.models import Source, Index, Conversation, User\n\n# get the vector store\n```\n"
  },
  {
    "path": "docs/pages/app/ext/user-management.md",
    "content": "`ktem` provides user management as an extension. To enable user management, in\nyour `flowsettings.py`, set the following variables:\n\n- `KH_FEATURE_USER_MANAGEMENT`: True to enable.\n- `KH_FEATURE_USER_MANAGEMENT_ADMIN`: the admin username. This user will be\n  created when the app 1st start.\n- `KH_FEATURE_USER_MANAGEMENT_PASSWORD`: the admin password. This value\n  accompanies the admin username.\n\nOnce enabled, you have access to the following features:\n\n- User login/logout (located in Settings Tab)\n- User changing password (located in Settings Tab)\n- Create / List / Edit / Delete user (located in Resources > Users Tab)\n"
  },
  {
    "path": "docs/pages/app/features.md",
    "content": "## Chat\n\nThe kotaemon focuses on question and answering over a corpus of data. Below\nis the gentle introduction about the chat functionality.\n\n- Users can upload corpus of files.\n- Users can converse to the chatbot to ask questions about the corpus of files.\n- Users can view the reference in the files.\n"
  },
  {
    "path": "docs/pages/app/functional-description.md",
    "content": "## User group / tenant management\n\n### Create new user group\n\n(6 man-days)\n\n**Description**: each client has a dedicated user group. Each user group has an\nadmin user who can do administrative tasks (e.g. creating user account in that\nuser group...). The workflow for creating new user group is as follow:\n\n1. Cinnamon accesses the user group management UI.\n2. On \"Create user group\" panel, we supply:\n   a. Client name: e.g. Apple.\n   b. Sub-domain name: e.g. apple.\n   c. Admin email, username & password.\n3. The system will:\n   a. An Aurora Platform deployment with the specified sub-domain.\n   b. Send an email to the admin, with the username & password.\n\n**Expectation**:\n\n- The admin can go to the deployed Aurora Platform.\n- The admin can login with the specified username & password.\n\n**Condition**:\n\n- When sub-domain name already exists, raise error.\n- If error sending email to the client, raise the error, and delete the\n  newly-created user-group.\n- Password rule:\n  - Have at least 8 characters.\n  - Must contain uppercase, lowercase, number and symbols.\n\n---\n\n### Delete user group\n\n(2 man-days)\n\n**Description**: in the tenant management page, we can delete the selected user\ngroup. The user flow is as follow:\n\n1. Cinnamon accesses the user group management UI,\n2. View list of user groups.\n3. Next to target user group, click delete.\n4. Confirm whether to delete.\n5. If Yes, delete the user group. If No, cancel the operation.\n\n**Expectation**: when a user group is deleted, we expect to delete everything\nrelated to the user groups: domain, files, databases, caches, deployments.\n\n## User management\n\n---\n\n### Create user account (for admin user)\n\n(1 man-day)\n\n**Description**: the admin user in the client's account can create user account\nfor that user group. To create the new user, the client admin do:\n\n1. Navigate to \"Admin\" > \"Users\"\n2. In the \"Create user\" panel, supply:\n   - Username\n   - Password\n   - Confirm password\n3. Click \"Create\"\n\n**Expectation**:\n\n- The user can create the account.\n- The username:\n  - Is case-insensitive (e.g. Moon and moon will be the same)\n  - Can only contains these characters: a-z A-Z 0-9 \\_ + - .\n  - Has maximum length of 32 characters\n- The password is subjected to the following rule:\n  - 8-character minimum length\n  - Contains at least 1 number\n  - Contains at least 1 lowercase letter\n  - Contains at least 1 uppercase letter\n  - Contains at least 1 special character from the following set, or a\n    non-leading, non-trailing space character: `^ $ * . [ ] { } ( ) ? - \" ! @ # % & / \\ , > < ' : ; | _ ~ ` + =\n\n---\n\n### Delete user account (for admin user)\n\n**Description**: the admin user in the client's account can delete user account.\nOnce an user account is deleted, he/she cannot login to Aurora Platform.\n\n1. The admin user navigates to \"Admin\" > \"Users\".\n2. In the user list panel, next to the username, the admin click on the \"Delete\"\n   button. The Confirmation dialog appears.\n3. If \"Delete\", the user account is deleted. If \"Cancel\", do nothing. The\n   Confirmation dialog disappears.\n\n**Expectation**:\n\n- Once the user is deleted, the following information relating to the user will\n  be deleted:\n  - His/her personal setting.\n  - His/her conversations.\n- The following information relating to the user will still be retained:\n  - His/her uploaded files.\n\n---\n\n### Edit user account (for admin user)\n\n**Description**: the admin user can change any information about the user\naccount, including password. To change user information:\n\n1. The admin user navigates to \"Admin\" > \"Users\".\n2. In the user list panel, next to the username, the admin click on the \"Edit\"\n   button.\n3. The user list disappears, the user detail appears, with the following\n   information show up:\n   - Username: (prefilled the username)\n   - Password: (blank)\n   - Confirm password: (blank)\n4. The admin can edit any of the information, and click \"Save\" or \"Cancel\".\n   - If \"Save\": the information will be updated to the database, or show\n     error per Expectation below.\n   - If \"Cancel\": skip.\n5. If Save success or Cancel, transfer back to the user list UI, where the user\n   information is updated accordingly.\n\n**Expectation**:\n\n- If the \"Password\" & \"Confirm password\" are different from each other, show\n  error: \"Password mismatch\".\n- If both \"Password\" & \\*\"Confirm password\" are blank, don't change the user\n  password.\n- If changing password, the password rule is subjected to the same rule when\n  creating user.\n- It's possible to change username. If changing username, the target user has to\n  use the new username.\n\n---\n\n### Sign-in\n\n(3 man-days)\n\n**Description**: the users can sign-in to Aurora Platform as follow:\n\n1. User navigates to the URL.\n2. If the user is not logged in, the UI just shows the login screen.\n3. User types username & password.\n4. If correct, the user will proceed to normal working UI.\n5. If incorrect, the login screen shows text error.\n\n---\n\n### Sign-out\n\n(1 man-day)\n\n**Description**: the user can sign-out of Aurora Platform as follow:\n\n1. User navigates to the Settings > User page.\n2. User click on logout.\n3. The user is signed out to the UI login screen.\n\n**Expectation**: the user is completely signed out. Next time he/she uses the\nAurora Platform, he/she has to login again.\n\n---\n\n### Change password\n\n**Description**: the user can change their password as follow:\n\n1. User navigates to the Settings > User page.\n2. In the change password section, the user provides these info and click\n   Change:\n   - Current password\n   - New password\n   - Confirm new password\n3. If changing successfully, then the password is changed. Otherwise, show the\n   error on the UI.\n\n**Expectation**:\n\n- If changing password succeeds, next time they logout/login to the system, they\n  can use the new password.\n- Password rule (Same as normal password rule when creating user)\n- Errors:\n  - Password does not match.\n  - Violated password rules.\n\n---\n\n## Chat\n\n### Chat to the bot\n\n**Description**: the Aurora Platform focuses on question and answering over the\nuploaded data. Each chat has the following components:\n\n- Chat message: show the exchange between bots and humans.\n- Text input + send button: for the user to input the message.\n- Data source panel: for selecting the files that will scope the context for the\n  bot.\n- Information panel: showing evidence as the bot answers user's questions.\n\nThe chat workflow looks as follow:\n\n1. [Optional] User select files that they want to scope the context for the bot.\n   If the user doesn't select any files, then all files on Aurora Platform will\n   be the context for the bot.\n   - The user can type multi-line messages, using \"Shift + Enter\" for\n     line-break.\n2. User sends the message (either clicking the Send button or hitting the Enter\n   key).\n3. The bot in the chat conversation will return \"Thinking...\" while it\n   processes.\n4. The information panel on the right begin to show data related to the user\n   message.\n5. The bot begins to generate answer. The \"Thinking...\" placeholder disappears..\n\n**Expecatation**:\n\n- Messages:\n  - User can send multi-line messages, using \"Shift + Enter\" for line-break.\n  - User can thumbs up, thumbs down the AI response. This information is\n    recorded in the database.\n  - User can click on a copy button on the chat message to copy the content to\n    clipboard.\n- Information panel:\n  - The information panel shows the latest evidence.\n  - The user can click on the message, and the reference for that message will\n    show up on the \"Reference panel\" (feature in-planning).\n  - The user can click on the title to show/hide the content.\n  - The whole information panel can be collapsed.\n- Chatbot quality:\n  - The user can converse with the bot. The bot answer the user's requests in a\n    natural manner.\n  - The bot message should be streamed to the UI. The bot don't wait to gather\n    alll the text response, then dump all of them at once.\n\n### Conversation - switch\n\n**Description**: users can jump around between different conversations. They can\nsee the list of all conversations, can select an old converation, and continue\nthe chat under the context of the old conversation. The switching workflow is\nlike this:\n\n1. Users click on the conversation dropdown. It will show a list of\n   conversations.\n2. Within that dropdown, the user selects one conversation.\n3. The chat messages, information panel, and selected data will show the content\n   in that old chat.\n4. The user can continue chatting as normal under the context of this old chat.\n\n**Expectation**:\n\n- In the conversation drop down list, the conversations are ordered in created\n  date order.\n- When there is no conversation, the conversation list is empty.\n- When there is no conversation, the user can still converse with the chat bot.\n  When doing so, it automatically create new conversation.\n\n### Conversation - create\n\n**Description**: the user can explicitly start a new conversation with the\nchatbot:\n\n1. User click on the \"New\" button.\n2. The new conversation is automatically created.\n\n**Expectation**:\n\n- The default conversation name is the current datetime.\n- It become selected.\n- It is added to the conversation list.\n\n### Conversation - rename\n\n**Description**: user can rename the chatbot by typing the name, and click on\nthe Rename button next to it.\n\n- If rename succeeds: the name shown in the 1st dropdown will change accordingly\n- If rename doesn't succeed: show error message in red color below the rename section\n\n**Condition**:\n\n- Name constraint:\n  - Min characters: 1\n  - Max characters: 40\n  - Could not having the same name with an existing conversation of the same\n    user.\n\n### Conversation - delete\n\n**Description**: user can delete the existing conversation as follow:\n\n1. Click on Delete button.\n2. The UI show confirmation with 2 buttons:\n   - Delete\n   - Cancel.\n3. If Delete, delete the conversation, switch to the next oldest conversation,\n   close the confirmation panel.\n4. If cancel, just close the confirmation panel.\n\n## File management\n\nThe file management allows users to upload, list and delete files that they\nupload to the Aurora Platform\n\n### Upload file\n\n**Description**: the user can upload files to the Aurora Platform. The uploaded\nfiles will be served as context for our chatbot to refer to when it converses\nwith the user. To upload file, the user:\n\n1. Navigate to the File tab.\n2. Within the File tab, there is an Upload section.\n3. User can add files to the Upload section through drag & drop, and or by click\n   on the file browser.\n4. User can select some options relating to uploading and indexing. Depending on\n   the project, these options can be different. Nevertheless, they will discuss\n   below.\n5. User click on \"Upload and Index\" button.\n6. The app show notifications when indexing starts and finishes, and when errors\n   happen on the top right corner.\n\n**Options**:\n\n- Force re-index file. When user tries to upload files that already exists on\n  the system:\n  - If this option is True: will re-index those files.\n  - If this option is False: will skip indexing those files.\n\n**Condition**:\n\n- Max number of files: 100 files.\n- Max number of pages per file: 500 pages\n- Max file size: 10 MB\n\n### List all files\n\n**Description**: the user can know which files are on the system by:\n\n1. Navigate to the File tab.\n2. By default, it will show all the uploaded files, each with the following\n   information: file name, file size, number of pages, uploaded date\n3. The UI also shows total number of pages, and total number of sizes in MB.\n\n### Delete file\n\n**Description**: users can delete files from this UI to free up the space, or to\nremove outdated information. To remove the files:\n\n1. User navigate to the File tab.\n2. In the list of file, next to each file, there is a Delete button.\n3. The user clicks on the Delete button. Confirmation dialog appear.\n4. If Delete, delete the file. If Cancel, close the confirmation dialog.\n\n**Expectation**: once the file is deleted:\n\n- The database entry of that file is deleted.\n- The file is removed from \"Chat - Data source\".\n- The total number of pages and MB sizes are reduced accordingly.\n- The reference to the file in the information panel is still retained.\n"
  },
  {
    "path": "docs/pages/app/index/file.md",
    "content": "The file index stores files in a local folder and index them for retrieval.\nThis file index provides the following infrastructure to support the indexing:\n\n- SQL table Source: store the list of files that are indexed by the system\n- Vector store: contain the embedding of segments of the files\n- Document store: contain the text of segments of the files. Each text stored\n  in this document store is associated with a vector in the vector store.\n- SQL table Index: store the relationship between (1) the source and the\n  docstore, and (2) the source and the vector store.\n\nThe indexing and retrieval pipelines are encouraged to use the above software\ninfrastructure.\n\n## Indexing pipeline\n\nThe ktem has default indexing pipeline: `ktem.index.file.pipelines.IndexDocumentPipeline`.\n\nThis default pipeline works as follow:\n\n- **Input**: list of file paths\n- **Output**: list of nodes that are indexed into database\n- **Process**:\n  - Read files into texts. Different file types has different ways to read texts.\n  - Split text files into smaller segments\n  - Run each segments into embeddings.\n  - Store the embeddings into vector store. Store the texts of each segment\n    into docstore. Store the list of files in Source. Store the linking\n    between Sources and docstore + vectorstore in Index table.\n\nYou can customize this default pipeline if your indexing process is close to the\ndefault pipeline. You can create your own indexing pipeline if there are too\nmuch different logic.\n\n### Customize the default pipeline\n\nThe default pipeline provides the contact points in `flowsettings.py`.\n\n1. `FILE_INDEX_PIPELINE_FILE_EXTRACTORS`. Supply overriding file extractor,\n   based on file extension. Example: `{\".pdf\": \"path.to.PDFReader\", \".xlsx\": \"path.to.ExcelReader\"}`\n2. `FILE_INDEX_PIPELINE_SPLITTER_CHUNK_SIZE`. The expected number of characters\n   of each text segment. Example: 1024.\n3. `FILE_INDEX_PIPELINE_SPLITTER_CHUNK_OVERLAP`. The expected number of\n   characters that consecutive text segments should overlap with each other.\n   Example: 256.\n\n### Create your own indexing pipeline\n\nYour indexing pipeline will subclass `BaseFileIndexIndexing`.\n\nYou should define the following methods:\n\n- `run(self, file_paths)`: run the indexing given the pipeline\n- `get_pipeline(cls, user_settings, index_settings)`: return the\n  fully-initialized pipeline, ready to be used by ktem.\n  - `user_settings`: is a dictionary contains user settings (e.g. `{\"pdf_mode\": True, \"num_retrieval\": 5}`). You can declare these settings in the `get_user_settings` classmethod. ktem will collect these settings into the app Settings page, and will supply these user settings to your `get_pipeline` method.\n  - `index_settings`: is a dictionary. Currently it's empty for File Index.\n- `get_user_settings`: to declare user settings, return a dictionary.\n\nBy subclassing `BaseFileIndexIndexing`, You will have access to the following resources:\n\n- `self._Source`: the source table\n- `self._Index`: the index table\n- `self._VS`: the vector store\n- `self._DS`: the docstore\n\nOnce you have prepared your pipeline, register it in `flowsettings.py`: `FILE_INDEX_PIPELINE = \"<python.path.to.your.pipeline>\"`.\n\n## Retrieval pipeline\n\nThe ktem has default retrieval pipeline:\n`ktem.index.file.pipelines.DocumentRetrievalPipeline`. This pipeline works as\nfollow:\n\n- Input: user text query & optionally a list of source file ids\n- Output: the output segments that match the user text query\n- Process:\n  - If a list of source file ids is given, get the list of vector ids that\n    associate with those file ids.\n  - Embed the user text query.\n  - Query the vector store. Provide a list of vector ids to limit query scope\n    if the user restrict.\n  - Return the matched text segments\n\n### Create your own retrieval pipeline\n\nYour retrieval pipeline will subclass `BaseFileIndexRetriever`. The retriever\nhas the same database, vectorstore and docstore accesses like the indexing\npipeline.\n\nYou should define the following methods:\n\n- `run(self, query, file_ids)`: retrieve relevant documents relating to the\n  query. If `file_ids` is given, you should restrict your search within these\n  `file_ids`.\n- `get_pipeline(cls, user_settings, index_settings, selected)`: return the\n  fully-initialized pipeline, ready to be used by ktem.\n  - `user_settings`: is a dictionary contains user settings (e.g. `{\"pdf_mode\": True, \"num_retrieval\": 5}`). You can declare these settings in the `get_user_settings` classmethod. ktem will collect these settings into the app Settings page, and will supply these user settings to your `get_pipeline` method.\n    - `index_settings`: is a dictionary. Currently it's empty for File Index.\n    - `selected`: a list of file ids selected by user. If user doesn't select\n      anything, this variable will be None.\n- `get_user_settings`: to declare user settings, return a dictionary.\n\nOnce you build the retrieval pipeline class, you can register it in\n`flowsettings.py`: `FILE_INDEXING_RETRIEVER_PIPELIENS = [\"path.to.retrieval.pipelie\"]`. Because there can be\nmultiple parallel pipelines within an index, this variable takes a list of\nstring rather than a string.\n\n## Software infrastructure\n\n| Infra            | Access        | Schema                                                                                                                                                                                                                                                                                      | Ref                                                        |\n| ---------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------- |\n| SQL table Source | self.\\_Source | - id (int): id of the source (auto)<br>- name (str): the name of the file<br>- path (str): the path of the file<br>- size (int): the file size in bytes<br>- note (dict): allow extra optional information about the file<br>- date_created (datetime): the time the file is created (auto) | This is SQLALchemy ORM class. Can consult                  |\n| SQL table Index  | self.\\_Index  | - id (int): id of the index entry (auto)<br>- source_id (int): the id of a file in the Source table<br>- target_id: the id of the segment in docstore or vector store<br>- relation_type (str): if the link is \"document\" or \"vector\"                                                       | This is SQLAlchemy ORM class                               |\n| Vector store     | self.\\_VS     | - self.\\_VS.add: add the list of embeddings to the vector store (optionally associate metadata and ids)<br>- self.\\_VS.delete: delete vector entries based on ids<br>- self.\\_VS.query: get embeddings based on embeddings.                                                                 | kotaemon > storages > vectorstores > BaseVectorStore       |\n| Doc store        | self.\\_DS     | - self.\\_DS.add: add the segments to document stores<br>- self.\\_DS.get: get the segments based on id<br>- self.\\_DS.get_all: get all segments<br>- self.\\_DS.delete: delete segments based on id                                                                                           | kotaemon > storages > docstores > base > BaseDocumentStore |\n"
  },
  {
    "path": "docs/pages/app/settings/overview.md",
    "content": "# Overview\n\nThere are 3 kinds of settings in `ktem`, geared towards different stakeholders\nfor different use cases:\n\n- Developer settings. These settings are meant for very basic app customization, such as database URL, cloud config, logging config, which features to enable... You will be interested in the developer settings if you deploy `ktem` to your customers, or if you build extension for `ktem` for developers. These settings are declared inside `flowsettings.py`.\n- Admin settings. These settings show up in the Admin page, and are meant to allow admin-level user to customize low level features, such as which credentials to connect to data sources, which keys to use for LLM...\n- [User settings](/pages/app/settings/user-settings/). These settings are meant for run-time users to tweak ktem to their personal needs, such as which output languages the chatbot should generate, which reasoning type to use...\n"
  },
  {
    "path": "docs/pages/app/settings/user-settings.md",
    "content": "# User settings\n\n`ktem` allows developers to extend the index and the reasoning pipeline. In\nmany cases, these components can have settings that should be modified by\nusers at run-time, (e.g. `topk`, `chunksize`...). These are the user settings.\n\n`ktem` allows developers to declare such user settings in their code. Once\ndeclared, `ktem` will render them in a Settings page.\n\nThere are 2 places that `ktem` looks for declared user settings. You can\nrefer to the respective pages.\n\n- In the index.\n- In the reasoning pipeline.\n\n## Syntax of a settings\n\nA collection of settings is a dictionary of type `dict[str, dict]`, where the\nkey is a setting id, and the value is the description of the setting.\n\n```python\nsettings = {\n    \"topk\": {\n        \"name\": \"Top-k chunks\",\n        \"value\": 10,\n        \"component\": \"number\",\n    },\n    \"lang\": {\n        \"name\": \"Languages\",\n        \"value\": \"en\",\n        \"component\": \"dropdown\",\n        \"choices\": [(\"en\", \"English\"), (\"cn\", \"Chinese\")],\n    }\n}\n```\n\nEach setting description must have:\n\n- name: the human-understandable name of the settings.\n- value: the default value of the settings.\n- component: the UI component to render such setting on the UI. Available:\n\n  - \"text\": single-value\n  - \"number\": single-value\n  - \"checkbox\": single-value\n  - \"dropdown\": choices\n  - \"radio\": choices\n  - \"checkboxgroup\": choices\n\n- choices: the list of choices, if the component type allows.\n\n## Settings page structure\n"
  },
  {
    "path": "docs/scripts/generate_examples_docs.py",
    "content": "# import shutil\nfrom pathlib import Path\nfrom typing import Any, Iterable\n\nimport mkdocs_gen_files\n\n# get the root source code directory\ndoc_dir_name = \"docs\"\ndoc_dir = Path(__file__)\nwhile doc_dir.name != doc_dir_name and doc_dir != doc_dir.parent:\n    doc_dir = doc_dir.parent\n\nif doc_dir == doc_dir.parent:\n    raise ValueError(f\"root_name ({doc_dir_name}) not in path ({str(Path(__file__))}).\")\n\n\ndef generate_docs_for_examples_readme(\n    examples_dir: Path, target_doc_folder: str, ignored_modules: Iterable[Any] = []\n):\n    if not examples_dir.is_dir():\n        raise ModuleNotFoundError(str(examples_dir))\n\n    nav = mkdocs_gen_files.Nav()\n\n    for path in sorted(examples_dir.rglob(\"*README.md\")):\n        # ignore modules with name starts with underscore (i.e. __init__)\n        if path.name.startswith(\"_\") or path.name.startswith(\"test\"):\n            continue\n\n        module_path = path.parent.relative_to(examples_dir).with_suffix(\"\")\n        doc_path = path.parent.relative_to(examples_dir).with_suffix(\".md\")\n        full_doc_path = Path(target_doc_folder, doc_path)\n\n        parts = list(module_path.parts)\n        identifier = \".\".join(parts)\n\n        if \"tests\" in parts:\n            continue\n\n        ignore = False\n        for each_module in ignored_modules:\n            if identifier.startswith(each_module):\n                ignore = True\n                break\n        if ignore:\n            continue\n\n        nav_titles = [name.replace(\"_\", \" \").title() for name in parts]\n        nav[nav_titles] = doc_path.as_posix()\n\n        with mkdocs_gen_files.open(full_doc_path, \"w\") as f:\n            f.write(f'--8<-- \"{path.relative_to(examples_dir.parent)}\"')\n\n        mkdocs_gen_files.set_edit_path(\n            full_doc_path, Path(\"..\") / path.relative_to(examples_dir.parent)\n        )\n\n    with mkdocs_gen_files.open(f\"{target_doc_folder}/NAV.md\", \"w\") as nav_file:\n        nav_file.writelines(nav.build_literate_nav())\n\n\ngenerate_docs_for_examples_readme(\n    examples_dir=doc_dir.parent / \"examples\",\n    target_doc_folder=\"examples\",\n)\n"
  },
  {
    "path": "docs/scripts/generate_reference_docs.py",
    "content": "# import shutil\nfrom pathlib import Path\nfrom typing import Any, Iterable\n\nimport mkdocs_gen_files\n\n# get the root source code directory\ndoc_dir_name = \"docs\"\ndoc_dir = Path(__file__)\nwhile doc_dir.name != doc_dir_name and doc_dir != doc_dir.parent:\n    doc_dir = doc_dir.parent\n\nif doc_dir == doc_dir.parent:\n    raise ValueError(f\"root_name ({doc_dir_name}) not in path ({str(Path(__file__))}).\")\n\nnav_title_map = {\"cli\": \"CLI\", \"llms\": \"LLMs\"}\n\n\ndef generate_docs_for_src_code(\n    code_dir: Path, target_doc_folder: str, ignored_modules: Iterable[Any] = []\n):\n    if not code_dir.is_dir():\n        raise ModuleNotFoundError(str(code_dir))\n\n    nav = mkdocs_gen_files.Nav()\n\n    for path in sorted(code_dir.rglob(\"*.py\")):\n        # ignore modules with name starts with underscore (i.e. __init__)\n        # if path.name.startswith(\"_\") or path.name.startswith(\"test\"):\n        #     continue\n\n        module_path = path.relative_to(code_dir).with_suffix(\"\")\n        doc_path = path.relative_to(code_dir).with_suffix(\".md\")\n        full_doc_path = Path(target_doc_folder, doc_path)\n\n        parts = list(module_path.parts)\n\n        if parts[-1] == \"__init__\":\n            doc_path = doc_path.with_name(\"index.md\")\n            full_doc_path = full_doc_path.with_name(\"index.md\")\n            parts.pop()\n\n        if not parts:\n            continue\n\n        if \"tests\" in parts:\n            continue\n\n        identifier = \".\".join(parts)\n        ignore = False\n        for each_module in ignored_modules:\n            if identifier.startswith(each_module):\n                ignore = True\n                break\n        if ignore:\n            continue\n\n        nav_titles = [\n            nav_title_map.get(name, name.replace(\"_\", \" \").title()) for name in parts\n        ]\n        nav[nav_titles] = doc_path.as_posix()\n\n        with mkdocs_gen_files.open(full_doc_path, \"w\") as f:\n            f.write(f\"::: {identifier}\")\n\n        # this method works in docs folder\n        mkdocs_gen_files.set_edit_path(\n            full_doc_path, Path(\"..\") / path.relative_to(code_dir.parent)\n        )\n\n    with mkdocs_gen_files.open(f\"{target_doc_folder}/Summary.md\", \"w\") as nav_file:\n        nav_file.writelines(nav.build_literate_nav())\n\n\ngenerate_docs_for_src_code(\n    code_dir=doc_dir.parent / \"libs\" / \"kotaemon\" / \"kotaemon\",\n    target_doc_folder=\"reference\",\n    ignored_modules={\"contribs\"},\n)\n"
  },
  {
    "path": "docs/theme/assets/pymdownx-extras/extra-fb5a2a1c86.css",
    "content": "@charset \"UTF-8\";:root>*{--md-code-link-bg-color:hsla(0, 0%, 96%, 1);--md-code-link-accent-bg-color:var(--md-code-link-bg-color);--md-default-bg-color--trans:rgb(100%, 100%, 100%, 0);--md-code-title-bg-color:var(--md-code-bg-color);--md-code-inline-bg-color:var(--md-code-bg-color);--md-code-special-bg-color:#e8e8e8;--md-code-alternate-bg-color:var(--md-code-bg-color);--md-code-hl-punctuation-color:var(--md-code-fg-color);--md-code-hl-namespace-color:var(--md-code-fg-color);--md-code-hl-entity-color:var(--md-code-hl-keyword-color);--md-code-hl-tag-color:var(--md-code-hl-keyword-color);--md-code-hl-builtin-color:var(--md-code-hl-constant-color);--md-code-hl-class-color:var(--md-code-hl-function-color);--md-typeset-a-color:#00bcd4;--md-progress-stripe:var(--md-default-bg-color--lighter);--md-progress-100:#00e676;--md-progress-80:#00e676;--md-progress-60:#fbc02d;--md-progress-40:#ff9100;--md-progress-20:#ff5252;--md-progress-0:#ff1744;--md-typeset-kbd-color:#ebebeb;--md-typeset-kbd-border-color:#b8b8b8;--md-typeset-kbd-accent-color:hsla(0, 100%, 100%, 1)}:root>[data-md-color-scheme=slate]{--md-code-link-bg-color:hsla(232, 15%, 15%, 1);--md-code-link-accent-bg-color:var(--md-code-link-bg-color);--md-code-special-bg-color:#2b2d3b;--md-default-bg-color--trans:hsla(232,15%,15%, 0);--md-typeset-kbd-color:var(--md-default-fg-color--lightest);--md-typeset-kbd-border-color:#1a1c24;--md-typeset-kbd-accent-color:var(--md-default-fg-color--lighter)}:root>[data-md-color-scheme=dracula]{--md-default-fg-color:rgba(248, 248, 242, 0.87);--md-default-fg-color--light:rgba(248, 248, 242, 0.54);--md-default-fg-color--lighter:rgba(248, 248, 242, 0.16);--md-default-fg-color--lightest:rgba(248, 248, 242, 0.07);--md-default-autocomplete-fg-color:rgba(248, 248, 242, 0.4);--md-shadow-z2:0 0.2rem 0.5rem hsla(0, 0%, 0%, 0.3),0 0 0.05rem hsla(0, 0%, 0%, 0.2);--md-default-bg-color:var(--md-default-bg-color--darkest);--md-default-bg-color--light:rgba(50, 52, 67, 0.7);--md-default-bg-color--lighter:rgba(50, 52, 67, 0.3);--md-default-bg-color--lightest:rgba(50, 52, 67, 0.12);--md-default-bg-color--trans:rgba(50, 52, 67, 0);--md-default-bg-color--dark:#2b2e3b;--md-default-bg-color--darker:#252732;--md-default-bg-color--darkest:#1e2029;--md-default-bg-color--ultra-dark:#111217;--md-text-color:var(--md-default-fg-color);--md-typeset-color:var(--md-default-fg-color);--md-admonition-fg-color:var(--md-default-fg-color);--md-code-fg-color:hsl(60, 30%, 96%);--md-code-bg-color:hsl(231, 15%, 18%);--md-code-title-bg-color:var(--md-default-bg-color--ultra-dark);--md-code-inline-bg-color:#323443;--md-code-hl-operator-color:hsl(326, 100%, 74%);--md-code-hl-punctuation-color:hsl(60, 30%, 96%);--md-code-hl-string-color:hsl(65, 92%, 76%);--md-code-hl-special-color:hsl(265, 89%, 78%);--md-code-hl-number-color:hsl(265, 89%, 78%);--md-code-hl-keyword-color:hsl(326, 100%, 74%);--md-code-hl-name-color:hsl(60, 30%, 96%);--md-code-hl-constant-color:hsl(265, 89%, 78%);--md-code-hl-function-color:hsl(135, 94%, 65%);--md-code-hl-comment-color:hsl(225, 27%, 51%);--md-code-hl-variable-color:hsl(31, 100%, 71%);--md-code-hl-generic-color:hsl(225, 27%, 51%);--md-code-hl-color:hsl(231, 25%, 25%);--md-code-hl-entity-color:hsl(135, 94%, 65%);--md-code-hl-tag-color:hsl(326, 100%, 74%);--md-code-hl-namespace-color:hsl(60, 30%, 96%);--md-code-hl-builtin-color:hsl(191, 97%, 77%);--md-code-hl-class-color:hsl(191, 97%, 77%);--md-code-special-bg-color:#1c1e26;--md-code-alternate-bg-color:#3d3e49;--md-code-link-bg-color:#364653;--md-typeset-a-color:hsl(191, 97%, 77%);--md-typeset-mark-color:#6e7252;--md-typeset-del-color:#734568;--md-typeset-ins-color:#36724e;--md-progress-stripe:var(--md-default-bg-color--lightest);--md-progress-100:hsl(135, 94%, 65%);--md-progress-80:hsl(135, 92%, 79%);--md-progress-60:hsl(65, 92%, 76%);--md-progress-40:hsl(31, 100%, 71%);--md-progress-20:hsl(326, 100%, 74%);--md-progress-0:hsl(0, 100%, 67%);--md-typeset-kbd-color:var(--md-default-fg-color--lightest);--md-typeset-kbd-border-color:var(--md-default-bg-color--ultra-dark);--md-typeset-kbd-accent-color:var(--md-default-fg-color--lighter)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=red],[data-md-color-scheme=dracula][data-md-color-primary=red]{--md-primary-code-bg-color:#47303a;--md-primary-fg-color:hsla(0deg, 100%, 67%, 1);--md-primary-fg-color--transparent:hsla(0deg, 100%, 67%, 0.1);--md-primary-fg-color--light:hsla(0deg, 100%, 72%, 1);--md-primary-fg-color--dark:hsla(0deg, 100%, 62%, 1);--md-primary-bg-color:var(--md-default-bg-color);--md-primary-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=pink],[data-md-color-scheme=dracula][data-md-color-primary=pink]{--md-primary-code-bg-color:#47354b;--md-primary-fg-color:hsla(326deg, 100%, 74%, 1);--md-primary-fg-color--transparent:hsla(326deg, 100%, 74%, 0.1);--md-primary-fg-color--light:hsla(326deg, 100%, 79%, 1);--md-primary-fg-color--dark:hsla(326deg, 100%, 69%, 1);--md-primary-bg-color:var(--md-default-bg-color);--md-primary-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=purple],[data-md-color-scheme=dracula][data-md-color-primary=purple]{--md-primary-code-bg-color:#3e3952;--md-primary-fg-color:hsla(265deg, 89%, 78%, 1);--md-primary-fg-color--transparent:hsla(265deg, 89%, 78%, 0.1);--md-primary-fg-color--light:hsla(265deg, 89%, 83%, 1);--md-primary-fg-color--dark:hsla(265deg, 89%, 73%, 1);--md-primary-bg-color:var(--md-default-bg-color);--md-primary-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=deep-purple],[data-md-color-scheme=dracula][data-md-color-primary=deep-purple]{--md-primary-code-bg-color:#3e3952;--md-primary-fg-color:hsla(265deg, 89%, 78%, 1);--md-primary-fg-color--transparent:hsla(265deg, 89%, 78%, 0.1);--md-primary-fg-color--light:hsla(265deg, 89%, 83%, 1);--md-primary-fg-color--dark:hsla(265deg, 89%, 73%, 1);--md-primary-bg-color:var(--md-default-bg-color);--md-primary-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=blue],[data-md-color-scheme=dracula][data-md-color-primary=blue]{--md-primary-code-bg-color:#303446;--md-primary-fg-color:hsla(225deg, 27%, 51%, 1);--md-primary-fg-color--transparent:hsla(225deg, 27%, 51%, 0.1);--md-primary-fg-color--light:hsla(225deg, 27%, 56%, 1);--md-primary-fg-color--dark:hsla(225deg, 27%, 46%, 1);--md-primary-bg-color:var(--md-default-bg-color);--md-primary-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=indigo],[data-md-color-scheme=dracula][data-md-color-primary=indigo]{--md-primary-code-bg-color:#303446;--md-primary-fg-color:hsla(225deg, 27%, 51%, 1);--md-primary-fg-color--transparent:hsla(225deg, 27%, 51%, 0.1);--md-primary-fg-color--light:hsla(225deg, 27%, 56%, 1);--md-primary-fg-color--dark:hsla(225deg, 27%, 46%, 1);--md-primary-bg-color:var(--md-default-bg-color);--md-primary-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=light-blue],[data-md-color-scheme=dracula][data-md-color-primary=light-blue]{--md-primary-code-bg-color:#303446;--md-primary-fg-color:hsla(225deg, 27%, 51%, 1);--md-primary-fg-color--transparent:hsla(225deg, 27%, 51%, 0.1);--md-primary-fg-color--light:hsla(225deg, 27%, 56%, 1);--md-primary-fg-color--dark:hsla(225deg, 27%, 46%, 1);--md-primary-bg-color:var(--md-default-bg-color);--md-primary-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=cyan],[data-md-color-scheme=dracula][data-md-color-primary=cyan]{--md-primary-code-bg-color:#364653;--md-primary-fg-color:hsla(191deg, 97%, 77%, 1);--md-primary-fg-color--transparent:hsla(191deg, 97%, 77%, 0.1);--md-primary-fg-color--light:hsla(191deg, 97%, 82%, 1);--md-primary-fg-color--dark:hsla(191deg, 97%, 72%, 1);--md-primary-bg-color:var(--md-default-bg-color);--md-primary-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=teal],[data-md-color-scheme=dracula][data-md-color-primary=teal]{--md-primary-code-bg-color:#364653;--md-primary-fg-color:hsla(191deg, 97%, 77%, 1);--md-primary-fg-color--transparent:hsla(191deg, 97%, 77%, 0.1);--md-primary-fg-color--light:hsla(191deg, 97%, 82%, 1);--md-primary-fg-color--dark:hsla(191deg, 97%, 72%, 1);--md-primary-bg-color:var(--md-default-bg-color);--md-primary-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=green],[data-md-color-scheme=dracula][data-md-color-primary=green]{--md-primary-code-bg-color:#2d4840;--md-primary-fg-color:hsla(135deg, 94%, 65%, 1);--md-primary-fg-color--transparent:hsla(135deg, 94%, 65%, 0.1);--md-primary-fg-color--light:hsla(135deg, 94%, 70%, 1);--md-primary-fg-color--dark:hsla(135deg, 94%, 60%, 1);--md-primary-bg-color:var(--md-default-bg-color);--md-primary-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=light-green],[data-md-color-scheme=dracula][data-md-color-primary=light-green]{--md-primary-code-bg-color:#2d4840;--md-primary-fg-color:hsla(135deg, 94%, 65%, 1);--md-primary-fg-color--transparent:hsla(135deg, 94%, 65%, 0.1);--md-primary-fg-color--light:hsla(135deg, 94%, 70%, 1);--md-primary-fg-color--dark:hsla(135deg, 94%, 60%, 1);--md-primary-bg-color:var(--md-default-bg-color);--md-primary-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=lime],[data-md-color-scheme=dracula][data-md-color-primary=lime]{--md-primary-code-bg-color:#2d4840;--md-primary-fg-color:hsla(135deg, 94%, 65%, 1);--md-primary-fg-color--transparent:hsla(135deg, 94%, 65%, 0.1);--md-primary-fg-color--light:hsla(135deg, 94%, 70%, 1);--md-primary-fg-color--dark:hsla(135deg, 94%, 60%, 1);--md-primary-bg-color:var(--md-default-bg-color);--md-primary-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=yellow],[data-md-color-scheme=dracula][data-md-color-primary=yellow]{--md-primary-code-bg-color:#454842;--md-primary-fg-color:hsla(65deg, 92%, 76%, 1);--md-primary-fg-color--transparent:hsla(65deg, 92%, 76%, 0.1);--md-primary-fg-color--light:hsla(65deg, 92%, 81%, 1);--md-primary-fg-color--dark:hsla(65deg, 92%, 71%, 1);--md-primary-bg-color:var(--md-default-bg-color);--md-primary-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=amber],[data-md-color-scheme=dracula][data-md-color-primary=amber]{--md-primary-code-bg-color:#454842;--md-primary-fg-color:hsla(65deg, 92%, 76%, 1);--md-primary-fg-color--transparent:hsla(65deg, 92%, 76%, 0.1);--md-primary-fg-color--light:hsla(65deg, 92%, 81%, 1);--md-primary-fg-color--dark:hsla(65deg, 92%, 71%, 1);--md-primary-bg-color:var(--md-default-bg-color);--md-primary-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=orange],[data-md-color-scheme=dracula][data-md-color-primary=orange]{--md-primary-code-bg-color:#473e3d;--md-primary-fg-color:hsla(31deg, 100%, 71%, 1);--md-primary-fg-color--transparent:hsla(31deg, 100%, 71%, 0.1);--md-primary-fg-color--light:hsla(31deg, 100%, 76%, 1);--md-primary-fg-color--dark:hsla(31deg, 100%, 66%, 1);--md-primary-bg-color:var(--md-default-bg-color);--md-primary-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=deep-orange],[data-md-color-scheme=dracula][data-md-color-primary=deep-orange]{--md-primary-code-bg-color:#473e3d;--md-primary-fg-color:hsla(31deg, 100%, 71%, 1);--md-primary-fg-color--transparent:hsla(31deg, 100%, 71%, 0.1);--md-primary-fg-color--light:hsla(31deg, 100%, 76%, 1);--md-primary-fg-color--dark:hsla(31deg, 100%, 66%, 1);--md-primary-bg-color:var(--md-default-bg-color);--md-primary-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=red],[data-md-color-scheme=dracula][data-md-color-accent=red]{--md-code-link-accent-bg-color:#472c36;--md-accent-fg-color:hsla(0deg, 100%, 62%, 1);--md-accent-fg-color--transparent:hsla(0deg, 100%, 62%, 0.1);--md-accent-bg-color:var(--md-default-bg-color);--md-accent-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=pink],[data-md-color-scheme=dracula][data-md-color-accent=pink]{--md-code-link-accent-bg-color:#473149;--md-accent-fg-color:hsla(326deg, 100%, 69%, 1);--md-accent-fg-color--transparent:hsla(326deg, 100%, 69%, 0.1);--md-accent-bg-color:var(--md-default-bg-color);--md-accent-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=purple],[data-md-color-scheme=dracula][data-md-color-accent=purple]{--md-code-link-accent-bg-color:#3c3652;--md-accent-fg-color:hsla(265deg, 89%, 73%, 1);--md-accent-fg-color--transparent:hsla(265deg, 89%, 73%, 0.1);--md-accent-bg-color:var(--md-default-bg-color);--md-accent-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=deep-purple],[data-md-color-scheme=dracula][data-md-color-accent=deep-purple]{--md-code-link-accent-bg-color:#3c3652;--md-accent-fg-color:hsla(265deg, 89%, 73%, 1);--md-accent-fg-color--transparent:hsla(265deg, 89%, 73%, 0.1);--md-accent-bg-color:var(--md-default-bg-color);--md-accent-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=blue],[data-md-color-scheme=dracula][data-md-color-accent=blue]{--md-code-link-accent-bg-color:#2e3243;--md-accent-fg-color:hsla(225deg, 27%, 46%, 1);--md-accent-fg-color--transparent:hsla(225deg, 27%, 46%, 0.1);--md-accent-bg-color:var(--md-default-bg-color);--md-accent-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=indigo],[data-md-color-scheme=dracula][data-md-color-accent=indigo]{--md-code-link-accent-bg-color:#2e3243;--md-accent-fg-color:hsla(225deg, 27%, 46%, 1);--md-accent-fg-color--transparent:hsla(225deg, 27%, 46%, 0.1);--md-accent-bg-color:var(--md-default-bg-color);--md-accent-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=light-blue],[data-md-color-scheme=dracula][data-md-color-accent=light-blue]{--md-code-link-accent-bg-color:#2e3243;--md-accent-fg-color:hsla(225deg, 27%, 46%, 1);--md-accent-fg-color--transparent:hsla(225deg, 27%, 46%, 0.1);--md-accent-bg-color:var(--md-default-bg-color);--md-accent-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=cyan],[data-md-color-scheme=dracula][data-md-color-accent=cyan]{--md-code-link-accent-bg-color:#324553;--md-accent-fg-color:hsla(191deg, 97%, 72%, 1);--md-accent-fg-color--transparent:hsla(191deg, 97%, 72%, 0.1);--md-accent-bg-color:var(--md-default-bg-color);--md-accent-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=teal],[data-md-color-scheme=dracula][data-md-color-accent=teal]{--md-code-link-accent-bg-color:#324553;--md-accent-fg-color:hsla(191deg, 97%, 72%, 1);--md-accent-fg-color--transparent:hsla(191deg, 97%, 72%, 0.1);--md-accent-bg-color:var(--md-default-bg-color);--md-accent-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=green],[data-md-color-scheme=dracula][data-md-color-accent=green]{--md-code-link-accent-bg-color:#2a483d;--md-accent-fg-color:hsla(135deg, 94%, 60%, 1);--md-accent-fg-color--transparent:hsla(135deg, 94%, 60%, 0.1);--md-accent-bg-color:var(--md-default-bg-color);--md-accent-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=light-green],[data-md-color-scheme=dracula][data-md-color-accent=light-green]{--md-code-link-accent-bg-color:#2a483d;--md-accent-fg-color:hsla(135deg, 94%, 60%, 1);--md-accent-fg-color--transparent:hsla(135deg, 94%, 60%, 0.1);--md-accent-bg-color:var(--md-default-bg-color);--md-accent-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=lime],[data-md-color-scheme=dracula][data-md-color-accent=lime]{--md-code-link-accent-bg-color:#2a483d;--md-accent-fg-color:hsla(135deg, 94%, 60%, 1);--md-accent-fg-color--transparent:hsla(135deg, 94%, 60%, 0.1);--md-accent-bg-color:var(--md-default-bg-color);--md-accent-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=yellow],[data-md-color-scheme=dracula][data-md-color-accent=yellow]{--md-code-link-accent-bg-color:#45483e;--md-accent-fg-color:hsla(65deg, 92%, 71%, 1);--md-accent-fg-color--transparent:hsla(65deg, 92%, 71%, 0.1);--md-accent-bg-color:var(--md-default-bg-color);--md-accent-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=amber],[data-md-color-scheme=dracula][data-md-color-accent=amber]{--md-code-link-accent-bg-color:#45483e;--md-accent-fg-color:hsla(65deg, 92%, 71%, 1);--md-accent-fg-color--transparent:hsla(65deg, 92%, 71%, 0.1);--md-accent-bg-color:var(--md-default-bg-color);--md-accent-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=orange],[data-md-color-scheme=dracula][data-md-color-accent=orange]{--md-code-link-accent-bg-color:#473d39;--md-accent-fg-color:hsla(31deg, 100%, 66%, 1);--md-accent-fg-color--transparent:hsla(31deg, 100%, 66%, 0.1);--md-accent-bg-color:var(--md-default-bg-color);--md-accent-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=deep-orange],[data-md-color-scheme=dracula][data-md-color-accent=deep-orange]{--md-code-link-accent-bg-color:#473d39;--md-accent-fg-color:hsla(31deg, 100%, 66%, 1);--md-accent-fg-color--transparent:hsla(31deg, 100%, 66%, 0.1);--md-accent-bg-color:var(--md-default-bg-color);--md-accent-bg-color--light:var(--md-default-bg-color--light)}:root{--md-heart:#ff5252;--md-heart-big:#ff1744}:root :focus-visible{outline-style:solid}:root [data-md-color-scheme=dracula]{--md-heart:hsl(326, 100%, 74%);--md-heart-big:hsl(0, 100%, 67%)}.md-typeset h4{margin:2em 0 1em}.md-typeset a.source-link{position:relative;top:-.6rem;float:right;color:var(--md-default-fg-color--lighter);transition:color 125ms}.md-typeset a.source-link:hover{color:var(--md-accent-fg-color)}.md-typeset a.source-link .twemoji{height:1.2rem}.md-typeset a.source-link .twemoji svg{width:1.2rem;height:1.2rem}.md-typeset div.highlight.md-max-height pre>code{max-height:15rem}.twemoji.heart-throb svg,.twemoji.heart-throb-hover svg{position:relative;color:var(--md-heart);animation:pulse 1.5s ease infinite}@keyframes pulse{0%{transform:scale(1)}40%{color:var(--md-heart-big);transform:scale(1.3)}50%{transform:scale(1.2)}60%{color:var(--md-heart-big);transform:scale(1.3)}100%{transform:scale(1)}}footer.sponsorship{text-align:center}footer.sponsorship hr{display:inline-block;width:1.6rem;margin:0 .7rem;vertical-align:middle;border-bottom:2px solid var(--md-default-fg-color--lighter)}footer.sponsorship:hover hr{border-color:var(--md-accent-fg-color)}footer.sponsorship:not(:hover) .twemoji.heart-throb-hover svg{color:var(--md-default-fg-color--lighter)!important}body:not([data-md-prefers-color-scheme=true])[data-md-color-scheme=dracula] .md-icon .light-mode,body:not([data-md-prefers-color-scheme=true])[data-md-color-scheme=dracula] .md-icon .system-mode,body:not([data-md-prefers-color-scheme=true])[data-md-color-scheme=dracula] .md-icon .unknown-mode{display:none}body:not([data-md-prefers-color-scheme=true])[data-md-color-scheme=default] .md-icon .dark-mode,body:not([data-md-prefers-color-scheme=true])[data-md-color-scheme=default] .md-icon .system-mode,body:not([data-md-prefers-color-scheme=true])[data-md-color-scheme=default] .md-icon .unknown-mode{display:none}body:not([data-md-prefers-color-scheme=true]):not([data-md-color-scheme=default]):not([data-md-color-scheme=dracula]) .md-icon .dark-mode,body:not([data-md-prefers-color-scheme=true]):not([data-md-color-scheme=default]):not([data-md-color-scheme=dracula]) .md-icon .light-mode,body:not([data-md-prefers-color-scheme=true]):not([data-md-color-scheme=default]):not([data-md-color-scheme=dracula]) .md-icon .system-mode{display:none}body[data-md-prefers-color-scheme=true] .md-icon .dark-mode,body[data-md-prefers-color-scheme=true] .md-icon .light-mode,body[data-md-prefers-color-scheme=true] .md-icon .unknown-mode{display:none}.md-header-nav__scheme{z-index:0}[data-md-toggle=search]:checked~.md-header .md-header-nav__scheme{display:none}.md-typeset .admonition,.md-typeset details{border-width:0;border-left-width:4px}:root>*{--md-admonition-bg-color:transparent;--md-admonition-icon--settings:url('data:image/svg+xml;charset=utf-8,<svg xmlns=\"http://www.w3.org/2000/svg\" viewBox=\"0 0 24 24\"><path d=\"M12 15.5A3.5 3.5 0 0 1 8.5 12 3.5 3.5 0 0 1 12 8.5a3.5 3.5 0 0 1 3.5 3.5 3.5 3.5 0 0 1-3.5 3.5m7.43-2.53c.04-.32.07-.64.07-.97 0-.33-.03-.66-.07-1l2.11-1.63c.19-.15.24-.42.12-.64l-2-3.46c-.12-.22-.39-.31-.61-.22l-2.49 1c-.52-.39-1.06-.73-1.69-.98l-.37-2.65A.506.506 0 0 0 14 2h-4c-.25 0-.46.18-.5.42l-.37 2.65c-.63.25-1.17.59-1.69.98l-2.49-1c-.22-.09-.49 0-.61.22l-2 3.46c-.13.22-.07.49.12.64L4.57 11c-.04.34-.07.67-.07 1 0 .33.03.65.07.97l-2.11 1.66c-.19.15-.25.42-.12.64l2 3.46c.12.22.39.3.61.22l2.49-1.01c.52.4 1.06.74 1.69.99l.37 2.65c.04.24.25.42.5.42h4c.25 0 .46-.18.5-.42l.37-2.65c.63-.26 1.17-.59 1.69-.99l2.49 1.01c.22.08.49 0 .61-.22l2-3.46c.12-.22.07-.49-.12-.64l-2.11-1.66Z\"/></svg>');--md-admonition-bg-color--settings:rgba(170, 0, 255, 0.1);--md-admonition-icon-color--settings:#aa00ff;--md-admonition-shadow-color--settings:rgba(170, 0, 255, 0.1);--md-admonition-icon--new:url('data:image/svg+xml;charset=utf-8,<svg xmlns=\"http://www.w3.org/2000/svg\" viewBox=\"0 0 24 24\"><path d=\"m23 12-2.44-2.78.34-3.68-3.61-.82-1.89-3.18L12 3 8.6 1.54 6.71 4.72l-3.61.81.34 3.68L1 12l2.44 2.78-.34 3.69 3.61.82 1.89 3.18L12 21l3.4 1.46 1.89-3.18 3.61-.82-.34-3.68L23 12m-10 5h-2v-2h2v2m0-4h-2V7h2v6Z\"/></svg>');--md-admonition-bg-color--new:rgba(255, 214, 0, 0.1);--md-admonition-icon-color--new:#ffd600;--md-admonition-shadow-color--new:rgba(255, 214, 0, 0.1);--md-admonition-bg-color--note:var(--md-default-bg-color--ultra-dark);--md-admonition-icon-color--note:hsl(51, 94%, 73%);--md-admonition-shadow-color--note:rgba(251, 231, 121, 0.1);--md-admonition-bg-color--abstract:var(--md-default-bg-color--ultra-dark);--md-admonition-icon-color--abstract:hsl(191, 97%, 77%);--md-admonition-shadow-color--abstract:rgba(139, 232, 253, 0.1);--md-admonition-bg-color--info:var(--md-default-bg-color--ultra-dark);--md-admonition-icon-color--info:hsl(190, 94%, 87%);--md-admonition-shadow-color--info:rgba(191, 243, 253, 0.1);--md-admonition-bg-color--tip:var(--md-default-bg-color--ultra-dark);--md-admonition-icon-color--tip:hsl(161, 97%, 77%);--md-admonition-shadow-color--tip:rgba(139, 253, 217, 0.1);--md-admonition-bg-color--success:var(--md-default-bg-color--ultra-dark);--md-admonition-icon-color--success:hsl(135, 94%, 65%);--md-admonition-shadow-color--success:rgba(82, 250, 124, 0.1);--md-admonition-bg-color--question:var(--md-default-bg-color--ultra-dark);--md-admonition-icon-color--question:hsl(135, 92%, 79%);--md-admonition-shadow-color--question:rgba(152, 251, 177, 0.1);--md-admonition-bg-color--warning:var(--md-default-bg-color--ultra-dark);--md-admonition-icon-color--warning:hsl(31, 100%, 71%);--md-admonition-shadow-color--warning:rgba(255, 184, 107, 0.1);--md-admonition-bg-color--failure:var(--md-default-bg-color--ultra-dark);--md-admonition-icon-color--failure:hsl(0, 100%, 59%);--md-admonition-shadow-color--failure:rgba(255, 46, 46, 0.1);--md-admonition-bg-color--danger:var(--md-default-bg-color--ultra-dark);--md-admonition-icon-color--danger:hsl(0, 100%, 67%);--md-admonition-shadow-color--danger:rgba(255, 87, 87, 0.1);--md-admonition-bg-color--bug:var(--md-default-bg-color--ultra-dark);--md-admonition-icon-color--bug:hsl(325, 100%, 64%);--md-admonition-shadow-color--bug:rgba(255, 71, 179, 0.1);--md-admonition-bg-color--example:var(--md-default-bg-color--ultra-dark);--md-admonition-icon-color--example:hsl(265, 89%, 78%);--md-admonition-shadow-color--example:rgba(191, 149, 249, 0.1);--md-admonition-bg-color--quote:var(--md-default-bg-color--ultra-dark);--md-admonition-icon-color--quote:hsl(225, 8%, 51%);--md-admonition-shadow-color--quote:rgba(120, 125, 140, 0.1)}:root>[data-md-color-scheme=dracula]{--md-admonition-icon-color:$drac-dark-yellow}:root>[data-md-color-scheme=dracula]{--md-admonition-bg-color--settings:var(--md-default-bg-color--ultra-dark);--md-admonition-icon-color--settings:hsl(326, 100%, 74%);--md-admonition-shadow-color--settings:rgba(255, 122, 198, 0.1)}:root>[data-md-color-scheme=dracula]{--md-admonition-bg-color--new:var(--md-default-bg-color--ultra-dark);--md-admonition-icon-color--new:hsl(65, 92%, 76%);--md-admonition-shadow-color--new:rgba(241, 250, 137, 0.1)}[data-md-color-scheme=dracula] .md-typeset .admonition,[data-md-color-scheme=dracula] .md-typeset details{border-color:var(--md-admonition-icon-color--note);box-shadow:var(--md-shadow-z2)}[data-md-color-scheme=dracula] .md-typeset .admonition:focus-within,[data-md-color-scheme=dracula] .md-typeset details:focus-within{box-shadow:0 0 0 .2rem var(--md-admonition-shadow-color--note)}[data-md-color-scheme=dracula] .md-typeset .admonition>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details>summary{background-color:var(--md-admonition-bg-color--note)}[data-md-color-scheme=dracula] .md-typeset .admonition>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details>summary::before{background-color:var(--md-admonition-icon-color--note)}[data-md-color-scheme=dracula] .md-typeset .admonition>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details>summary::after{color:var(--md-admonition-icon-color--note)}[data-md-color-scheme=dracula] .md-typeset .admonition.note,[data-md-color-scheme=dracula] .md-typeset details.note{border-color:var(--md-admonition-icon-color--note)}[data-md-color-scheme=dracula] .md-typeset .admonition.note:focus-within,[data-md-color-scheme=dracula] .md-typeset details.note:focus-within{box-shadow:0 0 0 .2rem var(--md-admonition-shadow-color--note)}[data-md-color-scheme=dracula] .md-typeset .admonition.note>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.note>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.note>summary{background-color:var(--md-admonition-bg-color--note);border-color:var(--md-admonition-icon-color--note)}[data-md-color-scheme=dracula] .md-typeset .admonition.note>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.note>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.note>summary::before{background-color:var(--md-admonition-icon-color--note)}[data-md-color-scheme=dracula] .md-typeset .admonition.note>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.note>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.note>summary::after{color:var(--md-admonition-icon-color--note)}[data-md-color-scheme=dracula] .md-typeset .admonition.abstract,[data-md-color-scheme=dracula] .md-typeset details.abstract{border-color:var(--md-admonition-icon-color--abstract)}[data-md-color-scheme=dracula] .md-typeset .admonition.abstract:focus-within,[data-md-color-scheme=dracula] .md-typeset details.abstract:focus-within{box-shadow:0 0 0 .2rem var(--md-admonition-shadow-color--abstract)}[data-md-color-scheme=dracula] .md-typeset .admonition.abstract>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.abstract>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.abstract>summary{background-color:var(--md-admonition-bg-color--abstract);border-color:var(--md-admonition-icon-color--abstract)}[data-md-color-scheme=dracula] .md-typeset .admonition.abstract>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.abstract>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.abstract>summary::before{background-color:var(--md-admonition-icon-color--abstract)}[data-md-color-scheme=dracula] .md-typeset .admonition.abstract>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.abstract>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.abstract>summary::after{color:var(--md-admonition-icon-color--abstract)}[data-md-color-scheme=dracula] .md-typeset .admonition.info,[data-md-color-scheme=dracula] .md-typeset details.info{border-color:var(--md-admonition-icon-color--info)}[data-md-color-scheme=dracula] .md-typeset .admonition.info:focus-within,[data-md-color-scheme=dracula] .md-typeset details.info:focus-within{box-shadow:0 0 0 .2rem var(--md-admonition-shadow-color--info)}[data-md-color-scheme=dracula] .md-typeset .admonition.info>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.info>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.info>summary{background-color:var(--md-admonition-bg-color--info);border-color:var(--md-admonition-icon-color--info)}[data-md-color-scheme=dracula] .md-typeset .admonition.info>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.info>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.info>summary::before{background-color:var(--md-admonition-icon-color--info)}[data-md-color-scheme=dracula] .md-typeset .admonition.info>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.info>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.info>summary::after{color:var(--md-admonition-icon-color--info)}[data-md-color-scheme=dracula] .md-typeset .admonition.tip,[data-md-color-scheme=dracula] .md-typeset details.tip{border-color:var(--md-admonition-icon-color--tip)}[data-md-color-scheme=dracula] .md-typeset .admonition.tip:focus-within,[data-md-color-scheme=dracula] .md-typeset details.tip:focus-within{box-shadow:0 0 0 .2rem var(--md-admonition-shadow-color--tip)}[data-md-color-scheme=dracula] .md-typeset .admonition.tip>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.tip>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.tip>summary{background-color:var(--md-admonition-bg-color--tip);border-color:var(--md-admonition-icon-color--tip)}[data-md-color-scheme=dracula] .md-typeset .admonition.tip>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.tip>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.tip>summary::before{background-color:var(--md-admonition-icon-color--tip)}[data-md-color-scheme=dracula] .md-typeset .admonition.tip>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.tip>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.tip>summary::after{color:var(--md-admonition-icon-color--tip)}[data-md-color-scheme=dracula] .md-typeset .admonition.success,[data-md-color-scheme=dracula] .md-typeset details.success{border-color:var(--md-admonition-icon-color--success)}[data-md-color-scheme=dracula] .md-typeset .admonition.success:focus-within,[data-md-color-scheme=dracula] .md-typeset details.success:focus-within{box-shadow:0 0 0 .2rem var(--md-admonition-shadow-color--success)}[data-md-color-scheme=dracula] .md-typeset .admonition.success>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.success>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.success>summary{background-color:var(--md-admonition-bg-color--success);border-color:var(--md-admonition-icon-color--success)}[data-md-color-scheme=dracula] .md-typeset .admonition.success>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.success>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.success>summary::before{background-color:var(--md-admonition-icon-color--success)}[data-md-color-scheme=dracula] .md-typeset .admonition.success>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.success>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.success>summary::after{color:var(--md-admonition-icon-color--success)}[data-md-color-scheme=dracula] .md-typeset .admonition.question,[data-md-color-scheme=dracula] .md-typeset details.question{border-color:var(--md-admonition-icon-color--question)}[data-md-color-scheme=dracula] .md-typeset .admonition.question:focus-within,[data-md-color-scheme=dracula] .md-typeset details.question:focus-within{box-shadow:0 0 0 .2rem var(--md-admonition-shadow-color--question)}[data-md-color-scheme=dracula] .md-typeset .admonition.question>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.question>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.question>summary{background-color:var(--md-admonition-bg-color--question);border-color:var(--md-admonition-icon-color--question)}[data-md-color-scheme=dracula] .md-typeset .admonition.question>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.question>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.question>summary::before{background-color:var(--md-admonition-icon-color--question)}[data-md-color-scheme=dracula] .md-typeset .admonition.question>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.question>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.question>summary::after{color:var(--md-admonition-icon-color--question)}[data-md-color-scheme=dracula] .md-typeset .admonition.warning,[data-md-color-scheme=dracula] .md-typeset details.warning{border-color:var(--md-admonition-icon-color--warning)}[data-md-color-scheme=dracula] .md-typeset .admonition.warning:focus-within,[data-md-color-scheme=dracula] .md-typeset details.warning:focus-within{box-shadow:0 0 0 .2rem var(--md-admonition-shadow-color--warning)}[data-md-color-scheme=dracula] .md-typeset .admonition.warning>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.warning>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.warning>summary{background-color:var(--md-admonition-bg-color--warning);border-color:var(--md-admonition-icon-color--warning)}[data-md-color-scheme=dracula] .md-typeset .admonition.warning>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.warning>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.warning>summary::before{background-color:var(--md-admonition-icon-color--warning)}[data-md-color-scheme=dracula] .md-typeset .admonition.warning>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.warning>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.warning>summary::after{color:var(--md-admonition-icon-color--warning)}[data-md-color-scheme=dracula] .md-typeset .admonition.failure,[data-md-color-scheme=dracula] .md-typeset details.failure{border-color:var(--md-admonition-icon-color--failure)}[data-md-color-scheme=dracula] .md-typeset .admonition.failure:focus-within,[data-md-color-scheme=dracula] .md-typeset details.failure:focus-within{box-shadow:0 0 0 .2rem var(--md-admonition-shadow-color--failure)}[data-md-color-scheme=dracula] .md-typeset .admonition.failure>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.failure>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.failure>summary{background-color:var(--md-admonition-bg-color--failure);border-color:var(--md-admonition-icon-color--failure)}[data-md-color-scheme=dracula] .md-typeset .admonition.failure>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.failure>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.failure>summary::before{background-color:var(--md-admonition-icon-color--failure)}[data-md-color-scheme=dracula] .md-typeset .admonition.failure>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.failure>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.failure>summary::after{color:var(--md-admonition-icon-color--failure)}[data-md-color-scheme=dracula] .md-typeset .admonition.danger,[data-md-color-scheme=dracula] .md-typeset details.danger{border-color:var(--md-admonition-icon-color--danger)}[data-md-color-scheme=dracula] .md-typeset .admonition.danger:focus-within,[data-md-color-scheme=dracula] .md-typeset details.danger:focus-within{box-shadow:0 0 0 .2rem var(--md-admonition-shadow-color--danger)}[data-md-color-scheme=dracula] .md-typeset .admonition.danger>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.danger>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.danger>summary{background-color:var(--md-admonition-bg-color--danger);border-color:var(--md-admonition-icon-color--danger)}[data-md-color-scheme=dracula] .md-typeset .admonition.danger>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.danger>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.danger>summary::before{background-color:var(--md-admonition-icon-color--danger)}[data-md-color-scheme=dracula] .md-typeset .admonition.danger>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.danger>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.danger>summary::after{color:var(--md-admonition-icon-color--danger)}[data-md-color-scheme=dracula] .md-typeset .admonition.bug,[data-md-color-scheme=dracula] .md-typeset details.bug{border-color:var(--md-admonition-icon-color--bug)}[data-md-color-scheme=dracula] .md-typeset .admonition.bug:focus-within,[data-md-color-scheme=dracula] .md-typeset details.bug:focus-within{box-shadow:0 0 0 .2rem var(--md-admonition-shadow-color--bug)}[data-md-color-scheme=dracula] .md-typeset .admonition.bug>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.bug>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.bug>summary{background-color:var(--md-admonition-bg-color--bug);border-color:var(--md-admonition-icon-color--bug)}[data-md-color-scheme=dracula] .md-typeset .admonition.bug>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.bug>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.bug>summary::before{background-color:var(--md-admonition-icon-color--bug)}[data-md-color-scheme=dracula] .md-typeset .admonition.bug>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.bug>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.bug>summary::after{color:var(--md-admonition-icon-color--bug)}[data-md-color-scheme=dracula] .md-typeset .admonition.example,[data-md-color-scheme=dracula] .md-typeset details.example{border-color:var(--md-admonition-icon-color--example)}[data-md-color-scheme=dracula] .md-typeset .admonition.example:focus-within,[data-md-color-scheme=dracula] .md-typeset details.example:focus-within{box-shadow:0 0 0 .2rem var(--md-admonition-shadow-color--example)}[data-md-color-scheme=dracula] .md-typeset .admonition.example>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.example>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.example>summary{background-color:var(--md-admonition-bg-color--example);border-color:var(--md-admonition-icon-color--example)}[data-md-color-scheme=dracula] .md-typeset .admonition.example>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.example>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.example>summary::before{background-color:var(--md-admonition-icon-color--example)}[data-md-color-scheme=dracula] .md-typeset .admonition.example>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.example>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.example>summary::after{color:var(--md-admonition-icon-color--example)}[data-md-color-scheme=dracula] .md-typeset .admonition.quote,[data-md-color-scheme=dracula] .md-typeset details.quote{border-color:var(--md-admonition-icon-color--quote)}[data-md-color-scheme=dracula] .md-typeset .admonition.quote:focus-within,[data-md-color-scheme=dracula] .md-typeset details.quote:focus-within{box-shadow:0 0 0 .2rem var(--md-admonition-shadow-color--quote)}[data-md-color-scheme=dracula] .md-typeset .admonition.quote>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.quote>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.quote>summary{background-color:var(--md-admonition-bg-color--quote);border-color:var(--md-admonition-icon-color--quote)}[data-md-color-scheme=dracula] .md-typeset .admonition.quote>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.quote>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.quote>summary::before{background-color:var(--md-admonition-icon-color--quote)}[data-md-color-scheme=dracula] .md-typeset .admonition.quote>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.quote>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.quote>summary::after{color:var(--md-admonition-icon-color--quote)}.md-typeset .admonition.config,.md-typeset .admonition.settings,.md-typeset details.config,.md-typeset details.settings{border-color:var(--md-admonition-icon-color--settings)}.md-typeset .admonition.config:focus-within,.md-typeset .admonition.settings:focus-within,.md-typeset details.config:focus-within,.md-typeset details.settings:focus-within{box-shadow:0 0 0 .2rem var(--md-admonition-shadow-color--settings)}.md-typeset .admonition.config>.admonition-title,.md-typeset .admonition.settings>.admonition-title,.md-typeset details.config>.admonition-title,.md-typeset details.config>summary,.md-typeset details.settings>.admonition-title,.md-typeset details.settings>summary{background-color:var(--md-admonition-bg-color--settings);border-color:var(--md-admonition-icon-color--settings)}.md-typeset .admonition.config>.admonition-title::before,.md-typeset .admonition.settings>.admonition-title::before,.md-typeset details.config>.admonition-title::before,.md-typeset details.config>summary::before,.md-typeset details.settings>.admonition-title::before,.md-typeset details.settings>summary::before{width:1rem;height:1rem;background-color:var(--md-admonition-icon-color--settings);background-size:1rem;-webkit-mask-image:var(--md-admonition-icon--settings);mask-image:var(--md-admonition-icon--settings);content:\" \"}.md-typeset .admonition.config>.admonition-title::after,.md-typeset .admonition.settings>.admonition-title::after,.md-typeset details.config>.admonition-title::after,.md-typeset details.config>summary::after,.md-typeset details.settings>.admonition-title::after,.md-typeset details.settings>summary::after{color:var(--md-admonition-icon-color--settings)}.md-typeset .admonition.new,.md-typeset details.new{border-color:var(--md-admonition-icon-color--new)}.md-typeset .admonition.new:focus-within,.md-typeset details.new:focus-within{box-shadow:0 0 0 .2rem var(--md-admonition-shadow-color--new)}.md-typeset .admonition.new>.admonition-title,.md-typeset details.new>.admonition-title,.md-typeset details.new>summary{background-color:var(--md-admonition-bg-color--new);border-color:var(--md-admonition-icon-color--new)}.md-typeset .admonition.new>.admonition-title::before,.md-typeset details.new>.admonition-title::before,.md-typeset details.new>summary::before{width:1rem;height:1rem;background-color:var(--md-admonition-icon-color--new);background-size:1rem;-webkit-mask-image:var(--md-admonition-icon--new);mask-image:var(--md-admonition-icon--new);content:\" \"}.md-typeset .admonition.new>.admonition-title::after,.md-typeset details.new>.admonition-title::after,.md-typeset details.new>summary::after{color:var(--md-admonition-icon-color--new)}mjx-container[display=true]{font-size:120%!important}mjx-container:not([display]){font-size:100%!important}[data-md-color-scheme=dracula] .CtxtMenu_InfoContent pre,[data-md-color-scheme=dracula] .CtxtMenu_InfoSignature input,[data-md-color-scheme=slate] .CtxtMenu_InfoContent pre,[data-md-color-scheme=slate] .CtxtMenu_InfoSignature input{color:#000}[data-md-color-scheme=dracula] .CtxtMenu_Info,[data-md-color-scheme=dracula] .CtxtMenu_Menu,[data-md-color-scheme=slate] .CtxtMenu_Info,[data-md-color-scheme=slate] .CtxtMenu_Menu{box-shadow:0 10px 20px rgba(0,0,0,.5)}.md-typeset .arithmatex{overflow-x:auto!important;overflow-y:hidden!important}.katex-display .katex-html{display:flex!important;flex-direction:row;flex-wrap:nowrap;align-items:baseline;justify-content:space-between}.katex-display .katex-html .base{display:inline!important}.katex-display .katex-html .tag{position:relative!important;display:inline!important;margin-left:var(--margin-small)}.md-typeset del.critic,.md-typeset ins.critic,.md-typeset mark.critic{padding:0 .25em;color:unset;box-shadow:none}.md-typeset .critic.break{margin:0}.md-typeset details{overflow:hidden}.md-typeset details>summary:focus{outline-style:none}.highlight .kc{color:var(--md-code-hl-constant-color)}.highlight .nc,.highlight .ne{color:var(--md-code-hl-class-color)}.highlight .mb{color:var(--md-code-hl-number-color)}.highlight .bp,.highlight .nb{color:var(--md-code-hl-builtin-color)}.highlight .nn{color:var(--md-code-hl-namespace-color)}.highlight .na,.highlight .nd,.highlight .ni{color:var(--md-code-hl-entity-color)}.highlight .nl,.highlight .nt{color:var(--md-code-hl-tag-color)}.md-typeset :not(pre)>code{margin:0;padding:0 .2941176471em;color:var(--md-code-fg-color);background-color:var(--md-code-inline-bg-color);border-radius:.1rem;box-shadow:none}.md-typeset a>code{color:inherit!important;background-color:var(--md-code-link-bg-color)!important;transition:color 125ms;transition:background-color 125ms}.md-typeset a>code *{color:var(--md-typeset-a-color)!important}.md-typeset a>code:hover{background-color:var(--md-code-link-accent-bg-color)!important}.md-typeset a>code:hover *{color:var(--md-accent-fg-color)!important}.md-typeset pre>code{outline:0}.md-typeset td code{word-break:normal}.md-typeset .highlight{-moz-tab-size:8;-o-tab-size:8;tab-size:8}.md-typeset .highlight+.result{border-width:.1rem}.md-typeset .highlight [data-linenos].special::before{background-color:var(--md-code-special-bg-color)}.md-typeset .highlighttable .linenodiv .special{margin-right:-.5882352941em;margin-left:-1.1764705882em;padding-right:.5882352941em;padding-left:1.1764705882em;background-color:var(--md-code-special-bg-color)}.md-typeset .highlight span.filename{position:relative;display:block;margin-top:1em;padding:.5em 1.1764705882em .5em 2.9411764706em;font-weight:700;font-size:.68rem;background-color:var(--md-code-title-bg-color);border-top-left-radius:.1rem;border-top-right-radius:.1rem}.md-typeset .highlight span.filename+pre{margin-top:0}.md-typeset .highlight span.filename+pre code{border-top-left-radius:0;border-top-right-radius:0}.md-typeset .highlight span.filename::before{position:absolute;left:.8823529412em;width:1.4705882353em;height:1.4705882353em;background-color:var(--md-default-fg-color);-webkit-mask-image:url('data:image/svg+xml;charset=utf-8,<svg xmlns=\"http://www.w3.org/2000/svg\" viewBox=\"0 0 24 24\"><path d=\"M20 19V7H4v12h16m0-16a2 2 0 0 1 2 2v14a2 2 0 0 1-2 2H4a2 2 0 0 1-2-2V5a2 2 0 0 1 2-2h16m-7 14v-2h5v2h-5m-3.42-4L5.57 9H8.4l3.3 3.3c.39.39.39 1.03 0 1.42L8.42 17H5.59l3.99-4Z\"/></svg>');mask-image:url('data:image/svg+xml;charset=utf-8,<svg xmlns=\"http://www.w3.org/2000/svg\" viewBox=\"0 0 24 24\"><path d=\"M20 19V7H4v12h16m0-16a2 2 0 0 1 2 2v14a2 2 0 0 1-2 2H4a2 2 0 0 1-2-2V5a2 2 0 0 1 2-2h16m-7 14v-2h5v2h-5m-3.42-4L5.57 9H8.4l3.3 3.3c.39.39.39 1.03 0 1.42L8.42 17H5.59l3.99-4Z\"/></svg>');-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;content:\"\"}.md-typeset .collapse-code{position:relative;margin-top:1em;margin-bottom:1em}.md-typeset .collapse-code pre{margin-top:0;margin-bottom:0}.md-typeset .collapse-code input{display:none}.md-typeset .collapse-code input~.code-footer{width:100%;margin:0;padding:.25em .5em .25em 0}.md-typeset .collapse-code input~.code-footer label{position:relative;margin:.05em;padding:.15em .8em;color:var(--md-primary-bg-color);font-size:90%;background-color:var(--md-primary-fg-color);-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;border-radius:.1rem;cursor:pointer;content:\"\"}.md-typeset .collapse-code input~.code-footer label:hover{background-color:var(--md-accent-fg-color)}.md-typeset .collapse-code input~.code-footer label::before{position:absolute;top:.15em;left:.15em;display:block;box-sizing:border-box;width:1.25em;height:1.25em;background-color:var(--md-primary-bg-color);background-size:1.25em;content:\"\"}.md-typeset .collapse-code input~.code-footer label.expand{display:none}.md-typeset .collapse-code input~.code-footer label.expand::before{-webkit-mask-image:url('data:image/svg+xml;charset=utf-8,<svg xmlns=\"http://www.w3.org/2000/svg\" viewBox=\"0 0 24 24\"><path d=\"M10 21v-2H6.41l4.5-4.5-1.41-1.41-4.5 4.5V14H3v7h7m4.5-10.09 4.5-4.5V10h2V3h-7v2h3.59l-4.5 4.5 1.41 1.41Z\"/></svg>');mask-image:url('data:image/svg+xml;charset=utf-8,<svg xmlns=\"http://www.w3.org/2000/svg\" viewBox=\"0 0 24 24\"><path d=\"M10 21v-2H6.41l4.5-4.5-1.41-1.41-4.5 4.5V14H3v7h7m4.5-10.09 4.5-4.5V10h2V3h-7v2h3.59l-4.5 4.5 1.41 1.41Z\"/></svg>')}.md-typeset .collapse-code input~.code-footer label.collapse::before{-webkit-mask-image:url('data:image/svg+xml;charset=utf-8,<svg xmlns=\"http://www.w3.org/2000/svg\" viewBox=\"0 0 24 24\"><path d=\"M19.5 3.09 15 7.59V4h-2v7h7V9h-3.59l4.5-4.5-1.41-1.41M4 13v2h3.59l-4.5 4.5 1.41 1.41 4.5-4.5V20h2v-7H4Z\"/></svg>');mask-image:url('data:image/svg+xml;charset=utf-8,<svg xmlns=\"http://www.w3.org/2000/svg\" viewBox=\"0 0 24 24\"><path d=\"M19.5 3.09 15 7.59V4h-2v7h7V9h-3.59l4.5-4.5-1.41-1.41M4 13v2h3.59l-4.5 4.5 1.41 1.41 4.5-4.5V20h2v-7H4Z\"/></svg>')}.md-typeset .collapse-code input:checked~.code-footer label.expand{display:inline}.md-typeset .collapse-code input:checked~.code-footer label.collapse{display:none}.md-typeset .collapse-code input:checked+div.highlight code{max-height:9.375em;overflow:hidden}.md-typeset .collapse-code input:checked~.code-footer{position:absolute;bottom:0;left:0;padding:2em .5em .5em .8rem;background-image:linear-gradient(to bottom,transparent,var(--md-default-bg-color) 80% 100%)}.md-typeset .keys .key-power::before{padding-right:.4em;content:\"⏻\"}.md-typeset .keys .key-fingerprint::before{padding-right:.4em;content:\"☝\"}:root>*{--magiclink-email-icon:url('data:image/svg+xml;charset=utf-8,<svg xmlns=\"http://www.w3.org/2000/svg\" viewBox=\"0 0 24 24\"><path d=\"M20 4H4c-1.11 0-2 .89-2 2v12a2 2 0 0 0 2 2h16a2 2 0 0 0 2-2V6a2 2 0 0 0-2-2m-3 13H7v-2h10m0-2H7v-2h10m3-2h-3V6h3\"/></svg>');--magiclink-github-icon:url('data:image/svg+xml;charset=utf-8,<svg xmlns=\"http://www.w3.org/2000/svg\" viewBox=\"0 0 24 24\"><path d=\"M12 2A10 10 0 0 0 2 12c0 4.42 2.87 8.17 6.84 9.5.5.08.66-.23.66-.5v-1.69c-2.77.6-3.36-1.34-3.36-1.34-.46-1.16-1.11-1.47-1.11-1.47-.91-.62.07-.6.07-.6 1 .07 1.53 1.03 1.53 1.03.87 1.52 2.34 1.07 2.91.83.09-.65.35-1.09.63-1.34-2.22-.25-4.55-1.11-4.55-4.92 0-1.11.38-2 1.03-2.71-.1-.25-.45-1.29.1-2.64 0 0 .84-.27 2.75 1.02.79-.22 1.65-.33 2.5-.33.85 0 1.71.11 2.5.33 1.91-1.29 2.75-1.02 2.75-1.02.55 1.35.2 2.39.1 2.64.65.71 1.03 1.6 1.03 2.71 0 3.82-2.34 4.66-4.57 4.91.36.31.69.92.69 1.85V21c0 .27.16.59.67.5C19.14 20.16 22 16.42 22 12A10 10 0 0 0 12 2Z\"/></svg>');--magiclink-bitbucket-icon:url('data:image/svg+xml;charset=utf-8,<svg xmlns=\"http://www.w3.org/2000/svg\" viewBox=\"0 0 24 24\"><path d=\"M2.65 3C2.3 3 2 3.3 2 3.65v.12l2.73 16.5c.07.42.43.73.85.73h13.05c.31 0 .59-.22.64-.54L22 3.77a.643.643 0 0 0-.54-.73c-.03-.01-.07-.01-.11-.01L2.65 3M14.1 14.95H9.94L8.81 9.07h6.3l-1.01 5.88Z\"/></svg>');--magiclink-gitlab-icon:url('data:image/svg+xml;charset=utf-8,<svg xmlns=\"http://www.w3.org/2000/svg\" viewBox=\"0 0 24 24\"><path d=\"m21.94 13.11-1.05-3.22c0-.03-.01-.06-.02-.09l-2.11-6.48a.859.859 0 0 0-.8-.57c-.36 0-.68.25-.79.58l-2 6.17H8.84L6.83 3.33a.851.851 0 0 0-.79-.58c-.37 0-.69.25-.8.58L3.13 9.82v.01l-1.07 3.28c-.16.5.01 1.04.44 1.34l9.22 6.71c.17.12.39.12.56-.01l9.22-6.7c.43-.3.6-.84.44-1.34M8.15 10.45l2.57 7.91-6.17-7.91m8.73 7.92 2.47-7.59.1-.33h3.61l-5.59 7.16m4.1-13.67 1.81 5.56h-3.62m-1.3.95-1.79 5.51L12 19.24l-2.86-8.79M6.03 3.94 7.84 9.5H4.23m-1.18 4.19c-.09-.07-.13-.19-.09-.29l.79-2.43 5.82 7.45m11.38-4.73-6.51 4.73.02-.03 5.79-7.42.79 2.43c.04.1 0 .22-.09.29\"/></svg>');--magiclink-commit-icon:url('data:image/svg+xml;charset=utf-8,<svg xmlns=\"http://www.w3.org/2000/svg\" viewBox=\"0 0 24 24\"><path d=\"M16.944 11h4.306a.75.75 0 0 1 0 1.5h-4.306a5.001 5.001 0 0 1-9.888 0H2.75a.75.75 0 0 1 0-1.5h4.306a5.001 5.001 0 0 1 9.888 0Zm-1.444.75a3.5 3.5 0 1 0-7 0 3.5 3.5 0 0 0 7 0Z\"/></svg>');--magiclink-compare-icon:url('data:image/svg+xml;charset=utf-8,<svg xmlns=\"http://www.w3.org/2000/svg\" viewBox=\"0 0 24 24\"><path d=\"M12.5 6.75a.75.75 0 0 0-1.5 0V9H8.75a.75.75 0 0 0 0 1.5H11v2.25a.75.75 0 0 0 1.5 0V10.5h2.25a.75.75 0 0 0 0-1.5H12.5V6.75ZM8.75 16a.75.75 0 0 0 0 1.5h6a.75.75 0 0 0 0-1.5h-6Z\"/><path d=\"M5 1h9.982a2 2 0 0 1 1.414.586l4.018 4.018A2 2 0 0 1 21 7.018V21a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V3a2 2 0 0 1 2-2Zm-.5 2v18a.5.5 0 0 0 .5.5h14a.5.5 0 0 0 .5-.5V7.018a.5.5 0 0 0-.146-.354l-4.018-4.018a.5.5 0 0 0-.354-.146H5a.5.5 0 0 0-.5.5Z\"/></svg>');--magiclink-pull-icon:url('data:image/svg+xml;charset=utf-8,<svg xmlns=\"http://www.w3.org/2000/svg\" viewBox=\"0 0 24 24\"><path d=\"M16 19.25a3.25 3.25 0 1 1 6.5 0 3.25 3.25 0 0 1-6.5 0Zm-14.5 0a3.25 3.25 0 1 1 6.5 0 3.25 3.25 0 0 1-6.5 0Zm0-14.5a3.25 3.25 0 1 1 6.5 0 3.25 3.25 0 0 1-6.5 0ZM4.75 3a1.75 1.75 0 1 0 .001 3.501A1.75 1.75 0 0 0 4.75 3Zm0 14.5a1.75 1.75 0 1 0 .001 3.501A1.75 1.75 0 0 0 4.75 17.5Zm14.5 0a1.75 1.75 0 1 0 .001 3.501 1.75 1.75 0 0 0-.001-3.501Z\"/><path d=\"M13.405 1.72a.75.75 0 0 1 0 1.06L12.185 4h4.065A3.75 3.75 0 0 1 20 7.75v8.75a.75.75 0 0 1-1.5 0V7.75a2.25 2.25 0 0 0-2.25-2.25h-4.064l1.22 1.22a.75.75 0 0 1-1.061 1.06l-2.5-2.5a.75.75 0 0 1 0-1.06l2.5-2.5a.75.75 0 0 1 1.06 0ZM4.75 7.25A.75.75 0 0 1 5.5 8v8A.75.75 0 0 1 4 16V8a.75.75 0 0 1 .75-.75Z\"/></svg>');--magiclink-issue-icon:url('data:image/svg+xml;charset=utf-8,<svg xmlns=\"http://www.w3.org/2000/svg\" viewBox=\"0 0 24 24\"><path d=\"M12 1c6.075 0 11 4.925 11 11s-4.925 11-11 11S1 18.075 1 12 5.925 1 12 1ZM2.5 12a9.5 9.5 0 0 0 9.5 9.5 9.5 9.5 0 0 0 9.5-9.5A9.5 9.5 0 0 0 12 2.5 9.5 9.5 0 0 0 2.5 12Zm9.5 2a2 2 0 1 1-.001-3.999A2 2 0 0 1 12 14Z\"/></svg>');--magiclink-discussion-icon:url('data:image/svg+xml;charset=utf-8,<svg xmlns=\"http://www.w3.org/2000/svg\" viewBox=\"0 0 24 24\"><path d=\"M1.75 1h12.5c.966 0 1.75.784 1.75 1.75v9.5A1.75 1.75 0 0 1 14.25 14H8.061l-2.574 2.573A1.458 1.458 0 0 1 3 15.543V14H1.75A1.75 1.75 0 0 1 0 12.25v-9.5C0 1.784.784 1 1.75 1ZM1.5 2.75v9.5c0 .138.112.25.25.25h2a.75.75 0 0 1 .75.75v2.19l2.72-2.72a.749.749 0 0 1 .53-.22h6.5a.25.25 0 0 0 .25-.25v-9.5a.25.25 0 0 0-.25-.25H1.75a.25.25 0 0 0-.25.25Z\"/><path d=\"M22.5 8.75a.25.25 0 0 0-.25-.25h-3.5a.75.75 0 0 1 0-1.5h3.5c.966 0 1.75.784 1.75 1.75v9.5A1.75 1.75 0 0 1 22.25 20H21v1.543a1.457 1.457 0 0 1-2.487 1.03L15.939 20H10.75A1.75 1.75 0 0 1 9 18.25v-1.465a.75.75 0 0 1 1.5 0v1.465c0 .138.112.25.25.25h5.5a.75.75 0 0 1 .53.22l2.72 2.72v-2.19a.75.75 0 0 1 .75-.75h2a.25.25 0 0 0 .25-.25v-9.5Z\"/></svg>')}.md-typeset a[href^=\"mailto:\"]:not(.magiclink-ignore)::before{-webkit-mask-image:var(--magiclink-email-icon);mask-image:var(--magiclink-email-icon)}.md-typeset .magiclink-commit:not(.magiclink-ignore),.md-typeset .magiclink-compare:not(.magiclink-ignore),.md-typeset .magiclink-discussion:not(.magiclink-ignore),.md-typeset .magiclink-issue:not(.magiclink-ignore),.md-typeset .magiclink-pull:not(.magiclink-ignore),.md-typeset .magiclink-repository:not(.magiclink-ignore),.md-typeset a[href^=\"mailto:\"]:not(.magiclink-ignore){position:relative;padding-left:1.375em}.md-typeset .magiclink-commit:not(.magiclink-ignore)::before,.md-typeset .magiclink-compare:not(.magiclink-ignore)::before,.md-typeset .magiclink-discussion:not(.magiclink-ignore)::before,.md-typeset .magiclink-issue:not(.magiclink-ignore)::before,.md-typeset .magiclink-pull:not(.magiclink-ignore)::before,.md-typeset .magiclink-repository:not(.magiclink-ignore)::before,.md-typeset a[href^=\"mailto:\"]:not(.magiclink-ignore)::before{position:absolute;top:0;left:0;display:block;box-sizing:border-box;width:1.25em;height:1.25em;background-color:var(--md-typeset-a-color);background-size:1.25em;transition:background-color 125ms;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;content:\"\"}.md-typeset .magiclink-commit:not(.magiclink-ignore):hover::before,.md-typeset .magiclink-compare:not(.magiclink-ignore):hover::before,.md-typeset .magiclink-discussion:not(.magiclink-ignore):hover::before,.md-typeset .magiclink-issue:not(.magiclink-ignore):hover::before,.md-typeset .magiclink-pull:not(.magiclink-ignore):hover::before,.md-typeset .magiclink-repository:not(.magiclink-ignore):hover::before,.md-typeset a[href^=\"mailto:\"]:not(.magiclink-ignore):hover::before{background-color:var(--md-accent-fg-color)}.md-typeset .magiclink-commit:not(.magiclink-ignore)::before{-webkit-mask-image:var(--magiclink-commit-icon);mask-image:var(--magiclink-commit-icon)}.md-typeset .magiclink-compare:not(.magiclink-ignore)::before{-webkit-mask-image:var(--magiclink-compare-icon);mask-image:var(--magiclink-compare-icon)}.md-typeset .magiclink-pull:not(.magiclink-ignore)::before{-webkit-mask-image:var(--magiclink-pull-icon);mask-image:var(--magiclink-pull-icon)}.md-typeset .magiclink-issue:not(.magiclink-ignore)::before{-webkit-mask-image:var(--magiclink-issue-icon);mask-image:var(--magiclink-issue-icon)}.md-typeset .magiclink-discussion:not(.magiclink-ignore)::before{-webkit-mask-image:var(--magiclink-discussion-icon);mask-image:var(--magiclink-discussion-icon)}.md-typeset .magiclink-repository.magiclink-github:not(.magiclink-ignore)::before{-webkit-mask-image:var(--magiclink-github-icon);mask-image:var(--magiclink-github-icon)}.md-typeset .magiclink-repository.magiclink-gitlab:not(.magiclink-ignore)::before{-webkit-mask-image:var(--magiclink-gitlab-icon);mask-image:var(--magiclink-gitlab-icon)}.md-typeset .magiclink-repository.magiclink-bitbucket:not(.magiclink-ignore)::before{-webkit-mask-image:var(--magiclink-bitbucket-icon);mask-image:var(--magiclink-bitbucket-icon)}.md-typeset mark:not(.critic){box-shadow:none}.md-typeset .progress-label{position:absolute;width:100%;margin:0;color:var(--md-text-color);font-weight:700;line-height:1.4rem;white-space:nowrap;text-align:center;text-shadow:-.0625em -.0625em .375em var(--md-default-bg-color--light),.0625em -.0625em .375em var(--md-default-bg-color--light),-.0625em .0625em .375em var(--md-default-bg-color--light),.0625em .0625em .375em var(--md-default-bg-color--light)}.md-typeset .progress-bar{float:left;height:1.2rem;background-color:#2979ff}.md-typeset .candystripe-animate .progress-bar{animation:animate-stripes 3s linear infinite}.md-typeset .progress{position:relative;display:block;width:100%;height:1.2rem;margin:.5rem 0;background-color:var(--md-default-fg-color--lightest)}.md-typeset .progress.thin{height:.4rem;margin-top:.9rem}.md-typeset .progress.thin .progress-label{margin-top:-.4rem}.md-typeset .progress.thin .progress-bar{height:.4rem}.md-typeset .progress.candystripe .progress-bar{background-image:linear-gradient(135deg,var(--md-progress-stripe) 27%,transparent 27%,transparent 52%,var(--md-progress-stripe) 52%,var(--md-progress-stripe) 77%,transparent 77%,transparent);background-size:2rem 2rem}.md-typeset .progress-100plus .progress-bar{background-color:var(--md-progress-100)}.md-typeset .progress-80plus .progress-bar{background-color:var(--md-progress-80)}.md-typeset .progress-60plus .progress-bar{background-color:var(--md-progress-60)}.md-typeset .progress-40plus .progress-bar{background-color:var(--md-progress-40)}.md-typeset .progress-20plus .progress-bar{background-color:var(--md-progress-20)}.md-typeset .progress-0plus .progress-bar{background-color:var(--md-progress-0)}@keyframes animate-stripes{0%{background-position:0 0}100%{background-position:6rem 0}}[data-md-color-scheme=dracula] .md-typeset .tabbed-set>.tabbed-labels{box-shadow:0 -.05rem var(--md-default-fg-color--lighter) inset}.md-typeset .tabbed-alternate.tabbed-set .tabbed-control{width:2rem}.md-typeset .tabbed-alternate.tabbed-set .tabbed-control[hidden]{width:1.2rem;opacity:0}.md-typeset .tabbed-alternate.tabbed-set>.tabbed-content>.tabbed-block{padding:0 .6rem}.md-typeset .tabbed-alternate.tabbed-set>.tabbed-content>.tabbed-block>.codehilite:only-child,.md-typeset .tabbed-alternate.tabbed-set>.tabbed-content>.tabbed-block>.codehilitetable:only-child,.md-typeset .tabbed-alternate.tabbed-set>.tabbed-content>.tabbed-block>.highlight:only-child,.md-typeset .tabbed-alternate.tabbed-set>.tabbed-content>.tabbed-block>.highlighttable:only-child,.md-typeset .tabbed-alternate.tabbed-set>.tabbed-content>.tabbed-block>pre:only-child{margin-right:-1.2rem;margin-left:-1.2rem;padding-right:.6rem;padding-left:.6rem}.md-typeset .tabbed-alternate.tabbed-set>.tabbed-content>.tabbed-block>.codehilite:only-child span.filename,.md-typeset .tabbed-alternate.tabbed-set>.tabbed-content>.tabbed-block>.codehilitetable:only-child span.filename,.md-typeset .tabbed-alternate.tabbed-set>.tabbed-content>.tabbed-block>.highlight:only-child span.filename,.md-typeset .tabbed-alternate.tabbed-set>.tabbed-content>.tabbed-block>.highlighttable:only-child span.filename,.md-typeset .tabbed-alternate.tabbed-set>.tabbed-content>.tabbed-block>pre:only-child span.filename{margin-top:0}.md-typeset .tabbed-alternate.tabbed-set>.tabbed-content>.tabbed-block>.collapse-code:only-child{margin-top:0;margin-right:-1.2rem;margin-left:-1.2rem;padding-right:.6rem;padding-left:.6rem}.md-typeset .tabbed-alternate.tabbed-set>.tabbed-content>.tabbed-block>.collapse-code:only-child>.code-footer{left:.6rem}.md-typeset .tabbed-alternate.tabbed-set>.tabbed-content>.tabbed-block>diagram-div:only-child{margin-right:-1.2rem;margin-left:-1.2rem;padding-right:.6rem;padding-left:.6rem}.js .md-typeset .tabbed-labels::before{background-color:var(--md-accent-fg-color)}[data-md-color-scheme=dracula] .md-typeset table:not([class]){box-shadow:var(--md-shadow-z2)}[data-md-color-scheme=dracula] .md-typeset table:not([class]) tr:hover{background-color:rgba(0,0,0,.08)}[data-md-color-scheme=dracula] .md-typeset table:not([class]) th{color:var(--md-text-color);background-color:var(--md-default-bg-color--ultra-dark);border-bottom:.05rem solid var(--md-primary-fg-color)}[data-md-color-scheme=dracula] .md-typeset table:not([class]) td{border-top:.05rem solid var(--md-default-fg-color--lighter)}[data-md-color-scheme=dracula] .md-typeset .task-list-control .task-list-indicator::before{background-color:var(--md-default-fg-color--lighter)}[data-md-color-scheme=dracula] .md-typeset .task-list-control [type=checkbox]:checked+.task-list-indicator::before{background-color:#51f97b}.md-typeset .headerlink{width:1em;height:1em;vertical-align:middle;background-color:var(--md-default-fg-color--lighter);background-size:1em;-webkit-mask-size:1em;mask-size:1em;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;visibility:visible;-webkit-mask-image:url('data:image/svg+xml;charset=utf-8,<svg xmlns=\"http://www.w3.org/2000/svg\" viewBox=\"0 0 24 24\"><path d=\"M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7a5 5 0 0 0-5 5 5 5 0 0 0 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1M8 13h8v-2H8v2m9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1 0 1.71-1.39 3.1-3.1 3.1h-4V17h4a5 5 0 0 0 5-5 5 5 0 0 0-5-5Z\"/></svg>');mask-image:url('data:image/svg+xml;charset=utf-8,<svg xmlns=\"http://www.w3.org/2000/svg\" viewBox=\"0 0 24 24\"><path d=\"M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7a5 5 0 0 0-5 5 5 5 0 0 0 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1M8 13h8v-2H8v2m9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1 0 1.71-1.39 3.1-3.1 3.1h-4V17h4a5 5 0 0 0 5-5 5 5 0 0 0-5-5Z\"/></svg>')}.md-typeset .headerlink:hover,.md-typeset [id]:target .headerlink{background-color:var(--md-accent-fg-color)}diagram-div{overflow:auto}html{background-color:transparent}[data-md-component=announce] .twemoji{color:var(--md-primary-fg-color)}[data-md-color-scheme=dracula]{--md-text-color:var(--md-default-fg-color);background-color:var(--md-default-bg-color);--md-footer-bg-color:transparent;--md-footer-bg-color--dark:var(--md-default-bg-color--darkest);--md-header-fg-color:var(--md-text-color);--md-header-bg-color:var(--md-default-bg-color--darkest)}[data-md-color-scheme=dracula] .md-header{color:var(--md-text-color);background-color:var(--md-header-bg-color);border-bottom:.05rem solid var(--md-primary-fg-color)}[data-md-color-scheme=dracula] .md-header[data-md-state=shadow]{box-shadow:0 0 .2rem rgba(0,0,0,.15),0 0 .2rem .4rem rgba(0,0,0,.2)}[data-md-color-scheme=dracula] .md-top{background-color:var(--md-default-bg-color--dark)}[data-md-color-scheme=dracula] .md-top:hover{background-color:var(--md-primary-fg-color)}[data-md-color-scheme=dracula] .md-tabs{color:var(--md-text-color);background-color:var(--md-primary-fg-color--transparent)}[data-md-color-scheme=dracula] .md-tabs__link--active{color:var(--md-primary-fg-color)}[data-md-color-scheme=dracula] .md-tabs__link:hover{color:var(--md-accent-fg-color)}[data-md-color-scheme=dracula] .md-hero{color:var(--md-text-color);background-color:var(--md-primary-fg-color--transparent)}[data-md-color-scheme=dracula] .md-nav__source{color:var(--md-text-color)}[data-md-color-scheme=dracula] .md-nav__link[data-md-state=blur]{color:var(--md-default-fg-color--light)}[data-md-color-scheme=dracula] .md-nav__item .md-nav__link--active{color:var(--md-primary-fg-color)}[data-md-color-scheme=dracula] .md-nav__link:focus,[data-md-color-scheme=dracula] .md-nav__link:hover{color:var(--md-accent-fg-color)}[data-md-color-scheme=dracula] .md-search__input{color:var(--md-text-color);background-color:var(--md-accent-bg-color--light)}[data-md-color-scheme=dracula] .md-search__input:hover{background-color:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] .md-search__input~.md-search__icon{color:var(--md-text-color)}[data-md-color-scheme=dracula] .md-search__input::-moz-placeholder{color:var(--md-default-fg-color--light)}[data-md-color-scheme=dracula] .md-search__input::placeholder{color:var(--md-default-fg-color--light)}[data-md-color-scheme=dracula] [data-md-toggle=search]:checked~.md-header .md-search__input{background-color:transparent}[data-md-color-scheme=dracula] .md-search__suggest{color:var(--md-default-autocomplete-fg-color)}[data-md-color-scheme=dracula] .md-overlay,[data-md-color-scheme=dracula] .md-search__overlay{background-color:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] .md-footer-nav__direction{color:var(--md-primary-fg-color)}[data-md-color-scheme=dracula] .md-footer-meta{border-top:.05rem solid var(--md-primary-fg-color)}[data-md-color-scheme=dracula] [data-md-component=announce]{background-color:var(--md-default-bg-color--ultra-dark)}.md-typeset h5{color:var(--md-text-color);text-transform:none}.md-search__scrollwrap,.md-sidebar__scrollwrap,.md-typeset diagram-div,.md-typeset div.arithmatex,.md-typeset div.diagram,.md-typeset div.mermaid,.md-typeset mermaid-div,.md-typeset pre.arithmatex,.md-typeset pre>code,.md-typeset__scrollwrap{scrollbar-color:var(--md-default-fg-color--lighter) transparent;scrollbar-width:thin}.md-search__scrollwrap::-webkit-scrollbar,.md-sidebar__scrollwrap::-webkit-scrollbar,.md-typeset diagram-div::-webkit-scrollbar,.md-typeset div.arithmatex::-webkit-scrollbar,.md-typeset div.diagram::-webkit-scrollbar,.md-typeset div.mermaid::-webkit-scrollbar,.md-typeset mermaid-div::-webkit-scrollbar,.md-typeset pre.arithmatex::-webkit-scrollbar,.md-typeset pre>code::-webkit-scrollbar,.md-typeset__scrollwrap::-webkit-scrollbar{width:.2rem;height:.2rem}.md-search__scrollwrap::-webkit-scrollbar-corner,.md-sidebar__scrollwrap::-webkit-scrollbar-corner,.md-typeset diagram-div::-webkit-scrollbar-corner,.md-typeset div.arithmatex::-webkit-scrollbar-corner,.md-typeset div.diagram::-webkit-scrollbar-corner,.md-typeset div.mermaid::-webkit-scrollbar-corner,.md-typeset mermaid-div::-webkit-scrollbar-corner,.md-typeset pre.arithmatex::-webkit-scrollbar-corner,.md-typeset pre>code::-webkit-scrollbar-corner,.md-typeset__scrollwrap::-webkit-scrollbar-corner{background-color:transparent}.md-search__scrollwrap::-webkit-scrollbar-thumb,.md-sidebar__scrollwrap::-webkit-scrollbar-thumb,.md-typeset diagram-div::-webkit-scrollbar-thumb,.md-typeset div.arithmatex::-webkit-scrollbar-thumb,.md-typeset div.diagram::-webkit-scrollbar-thumb,.md-typeset div.mermaid::-webkit-scrollbar-thumb,.md-typeset mermaid-div::-webkit-scrollbar-thumb,.md-typeset pre.arithmatex::-webkit-scrollbar-thumb,.md-typeset pre>code::-webkit-scrollbar-thumb,.md-typeset__scrollwrap::-webkit-scrollbar-thumb{background-color:var(--md-default-fg-color--lighter)}.md-search__scrollwrap::-webkit-scrollbar-thumb:hover,.md-sidebar__scrollwrap::-webkit-scrollbar-thumb:hover,.md-typeset diagram-div::-webkit-scrollbar-thumb:hover,.md-typeset div.arithmatex::-webkit-scrollbar-thumb:hover,.md-typeset div.diagram::-webkit-scrollbar-thumb:hover,.md-typeset div.mermaid::-webkit-scrollbar-thumb:hover,.md-typeset mermaid-div::-webkit-scrollbar-thumb:hover,.md-typeset pre.arithmatex::-webkit-scrollbar-thumb:hover,.md-typeset pre>code::-webkit-scrollbar-thumb:hover,.md-typeset__scrollwrap::-webkit-scrollbar-thumb:hover{background-color:var(--md-accent-fg-color)}.md-search__scrollwrap:hover,.md-sidebar__scrollwrap:hover,.md-typeset diagram-div:hover,.md-typeset div.arithmatex:hover,.md-typeset div.diagram:hover,.md-typeset div.mermaid:hover,.md-typeset mermaid-div:hover,.md-typeset pre.arithmatex:hover,.md-typeset pre>code:hover,.md-typeset__scrollwrap:hover{scrollbar-color:var(--md-accent-fg-color) transparent}@media screen and (max-width:59.9375em){.md-header-nav__scheme{padding-right:0}label[for=__search]{padding-left:0}[data-md-color-scheme=dracula] .md-nav__source{color:var(--md-text-color);background-color:var(--md-primary-fg-color--transparent)}[data-md-color-scheme=dracula] .md-nav .md-nav__title{color:var(--md-text-color);background-color:var(--md-header-bg-color);border-bottom:.05rem solid var(--md-primary-fg-color)}}@media screen and (max-width:44.9375em){.md-typeset>diagram-div{margin-right:-.8rem;margin-left:-.8rem}.md-typeset>.collapse-code{margin-right:-.8rem;margin-left:-.8rem}.md-typeset>.collapse-code label.collapse{left:.8rem}[dir=ltr] .md-content__inner>.tabbed-set .tabbed-labels{padding-left:0}.md-content__inner>.tabbed-set .tabbed-labels{max-width:100%;margin:0;padding-inline-start:0;scroll-padding-inline-start:0}.md-content__inner>.tabbed-set .tabbed-labels::after{padding-inline-end:0;content:none}.md-content__inner>.tabbed-set .tabbed-labels~.tabbed-control--prev{margin-inline-start:0;padding-inline-start:0}.md-content__inner>.tabbed-set .tabbed-labels~.tabbed-control--next{margin-inline-end:0;padding-inline-end:0}}@media screen and (max-width:76.1875em){[data-md-color-scheme=dracula] .md-nav--primary .md-nav__item--active>.md-nav__link:not(:hover){color:var(--md-primary-fg-color)}[data-md-color-scheme=dracula] .md-nav--primary .md-nav__title{color:var(--md-text-color);background-color:var(--md-header-bg-color);border-bottom:.05rem solid var(--md-primary-fg-color)}}\n/*# sourceMappingURL=extra-fb5a2a1c86.css.map */\n"
  },
  {
    "path": "docs/theme/assets/pymdownx-extras/extra-loader-MCFnu0Wd.js",
    "content": "function _typeof(t){return _typeof=\"function\"==typeof Symbol&&\"symbol\"==typeof Symbol.iterator?function(t){return typeof t}:function(t){return t&&\"function\"==typeof Symbol&&t.constructor===Symbol&&t!==Symbol.prototype?\"symbol\":typeof t},_typeof(t)}!function(){\"use strict\";function t(){t=function(){return r};var e,r={},n=Object.prototype,o=n.hasOwnProperty,i=Object.defineProperty||function(t,e,r){t[e]=r.value},a=\"function\"==typeof Symbol?Symbol:{},c=a.iterator||\"@@iterator\",u=a.asyncIterator||\"@@asyncIterator\",l=a.toStringTag||\"@@toStringTag\";function f(t,e,r){return Object.defineProperty(t,e,{value:r,enumerable:!0,configurable:!0,writable:!0}),t[e]}try{f({},\"\")}catch(e){f=function(t,e,r){return t[e]=r}}function s(t,e,r,n){var o=e&&e.prototype instanceof g?e:g,a=Object.create(o.prototype),c=new N(n||[]);return i(a,\"_invoke\",{value:S(t,r,c)}),a}function h(t,e,r){try{return{type:\"normal\",arg:t.call(e,r)}}catch(t){return{type:\"throw\",arg:t}}}r.wrap=s;var d=\"suspendedStart\",p=\"suspendedYield\",y=\"executing\",m=\"completed\",v={};function g(){}function b(){}function w(){}var x={};f(x,c,(function(){return this}));var E=Object.getPrototypeOf,L=E&&E(E(C([])));L&&L!==n&&o.call(L,c)&&(x=L);var O=w.prototype=g.prototype=Object.create(x);function _(t){[\"next\",\"throw\",\"return\"].forEach((function(e){f(t,e,(function(t){return this._invoke(e,t)}))}))}function j(t,e){function r(n,i,a,c){var u=h(t[n],t,i);if(\"throw\"!==u.type){var l=u.arg,f=l.value;return f&&\"object\"==_typeof(f)&&o.call(f,\"__await\")?e.resolve(f.__await).then((function(t){r(\"next\",t,a,c)}),(function(t){r(\"throw\",t,a,c)})):e.resolve(f).then((function(t){l.value=t,a(l)}),(function(t){return r(\"throw\",t,a,c)}))}c(u.arg)}var n;i(this,\"_invoke\",{value:function(t,o){function i(){return new e((function(e,n){r(t,o,e,n)}))}return n=n?n.then(i,i):i()}})}function S(t,r,n){var o=d;return function(i,a){if(o===y)throw new Error(\"Generator is already running\");if(o===m){if(\"throw\"===i)throw a;return{value:e,done:!0}}for(n.method=i,n.arg=a;;){var c=n.delegate;if(c){var u=P(c,n);if(u){if(u===v)continue;return u}}if(\"next\"===n.method)n.sent=n._sent=n.arg;else if(\"throw\"===n.method){if(o===d)throw o=m,n.arg;n.dispatchException(n.arg)}else\"return\"===n.method&&n.abrupt(\"return\",n.arg);o=y;var l=h(t,r,n);if(\"normal\"===l.type){if(o=n.done?m:p,l.arg===v)continue;return{value:l.arg,done:n.done}}\"throw\"===l.type&&(o=m,n.method=\"throw\",n.arg=l.arg)}}}function P(t,r){var n=r.method,o=t.iterator[n];if(o===e)return r.delegate=null,\"throw\"===n&&t.iterator.return&&(r.method=\"return\",r.arg=e,P(t,r),\"throw\"===r.method)||\"return\"!==n&&(r.method=\"throw\",r.arg=new TypeError(\"The iterator does not provide a '\"+n+\"' method\")),v;var i=h(o,t.iterator,r.arg);if(\"throw\"===i.type)return r.method=\"throw\",r.arg=i.arg,r.delegate=null,v;var a=i.arg;return a?a.done?(r[t.resultName]=a.value,r.next=t.nextLoc,\"return\"!==r.method&&(r.method=\"next\",r.arg=e),r.delegate=null,v):a:(r.method=\"throw\",r.arg=new TypeError(\"iterator result is not an object\"),r.delegate=null,v)}function k(t){var e={tryLoc:t[0]};1 in t&&(e.catchLoc=t[1]),2 in t&&(e.finallyLoc=t[2],e.afterLoc=t[3]),this.tryEntries.push(e)}function M(t){var e=t.completion||{};e.type=\"normal\",delete e.arg,t.completion=e}function N(t){this.tryEntries=[{tryLoc:\"root\"}],t.forEach(k,this),this.reset(!0)}function C(t){if(t||\"\"===t){var r=t[c];if(r)return r.call(t);if(\"function\"==typeof t.next)return t;if(!isNaN(t.length)){var n=-1,i=function r(){for(;++n<t.length;)if(o.call(t,n))return r.value=t[n],r.done=!1,r;return r.value=e,r.done=!0,r};return i.next=i}}throw new TypeError(_typeof(t)+\" is not iterable\")}return b.prototype=w,i(O,\"constructor\",{value:w,configurable:!0}),i(w,\"constructor\",{value:b,configurable:!0}),b.displayName=f(w,l,\"GeneratorFunction\"),r.isGeneratorFunction=function(t){var e=\"function\"==typeof t&&t.constructor;return!!e&&(e===b||\"GeneratorFunction\"===(e.displayName||e.name))},r.mark=function(t){return Object.setPrototypeOf?Object.setPrototypeOf(t,w):(t.__proto__=w,f(t,l,\"GeneratorFunction\")),t.prototype=Object.create(O),t},r.awrap=function(t){return{__await:t}},_(j.prototype),f(j.prototype,u,(function(){return this})),r.AsyncIterator=j,r.async=function(t,e,n,o,i){void 0===i&&(i=Promise);var a=new j(s(t,e,n,o),i);return r.isGeneratorFunction(e)?a:a.next().then((function(t){return t.done?t.value:a.next()}))},_(O),f(O,l,\"Generator\"),f(O,c,(function(){return this})),f(O,\"toString\",(function(){return\"[object Generator]\"})),r.keys=function(t){var e=Object(t),r=[];for(var n in e)r.push(n);return r.reverse(),function t(){for(;r.length;){var n=r.pop();if(n in e)return t.value=n,t.done=!1,t}return t.done=!0,t}},r.values=C,N.prototype={constructor:N,reset:function(t){if(this.prev=0,this.next=0,this.sent=this._sent=e,this.done=!1,this.delegate=null,this.method=\"next\",this.arg=e,this.tryEntries.forEach(M),!t)for(var r in this)\"t\"===r.charAt(0)&&o.call(this,r)&&!isNaN(+r.slice(1))&&(this[r]=e)},stop:function(){this.done=!0;var t=this.tryEntries[0].completion;if(\"throw\"===t.type)throw t.arg;return this.rval},dispatchException:function(t){if(this.done)throw t;var r=this;function n(n,o){return c.type=\"throw\",c.arg=t,r.next=n,o&&(r.method=\"next\",r.arg=e),!!o}for(var i=this.tryEntries.length-1;i>=0;--i){var a=this.tryEntries[i],c=a.completion;if(\"root\"===a.tryLoc)return n(\"end\");if(a.tryLoc<=this.prev){var u=o.call(a,\"catchLoc\"),l=o.call(a,\"finallyLoc\");if(u&&l){if(this.prev<a.catchLoc)return n(a.catchLoc,!0);if(this.prev<a.finallyLoc)return n(a.finallyLoc)}else if(u){if(this.prev<a.catchLoc)return n(a.catchLoc,!0)}else{if(!l)throw new Error(\"try statement without catch or finally\");if(this.prev<a.finallyLoc)return n(a.finallyLoc)}}}},abrupt:function(t,e){for(var r=this.tryEntries.length-1;r>=0;--r){var n=this.tryEntries[r];if(n.tryLoc<=this.prev&&o.call(n,\"finallyLoc\")&&this.prev<n.finallyLoc){var i=n;break}}i&&(\"break\"===t||\"continue\"===t)&&i.tryLoc<=e&&e<=i.finallyLoc&&(i=null);var a=i?i.completion:{};return a.type=t,a.arg=e,i?(this.method=\"next\",this.next=i.finallyLoc,v):this.complete(a)},complete:function(t,e){if(\"throw\"===t.type)throw t.arg;return\"break\"===t.type||\"continue\"===t.type?this.next=t.arg:\"return\"===t.type?(this.rval=this.arg=t.arg,this.method=\"return\",this.next=\"end\"):\"normal\"===t.type&&e&&(this.next=e),v},finish:function(t){for(var e=this.tryEntries.length-1;e>=0;--e){var r=this.tryEntries[e];if(r.finallyLoc===t)return this.complete(r.completion,r.afterLoc),M(r),v}},catch:function(t){for(var e=this.tryEntries.length-1;e>=0;--e){var r=this.tryEntries[e];if(r.tryLoc===t){var n=r.completion;if(\"throw\"===n.type){var o=n.arg;M(r)}return o}}throw new Error(\"illegal catch attempt\")},delegateYield:function(t,r,n){return this.delegate={iterator:C(t),resultName:r,nextLoc:n},\"next\"===this.method&&(this.arg=e),v}},r}function e(t,e,r,n,o,i,a){try{var c=t[i](a),u=c.value}catch(t){return void r(t)}c.done?e(u):Promise.resolve(u).then(n,o)}function r(t,e){if(!(t instanceof e))throw new TypeError(\"Cannot call a class as a function\")}function n(t,e){for(var r=0;r<e.length;r++){var n=e[r];n.enumerable=n.enumerable||!1,n.configurable=!0,\"value\"in n&&(n.writable=!0),Object.defineProperty(t,(o=n.key,i=void 0,i=function(t,e){if(\"object\"!==_typeof(t)||null===t)return t;var r=t[Symbol.toPrimitive];if(void 0!==r){var n=r.call(t,e||\"default\");if(\"object\"!==_typeof(n))return n;throw new TypeError(\"@@toPrimitive must return a primitive value.\")}return(\"string\"===e?String:Number)(t)}(o,\"string\"),\"symbol\"===_typeof(i)?i:String(i)),n)}var o,i}function o(t,e){if(\"function\"!=typeof e&&null!==e)throw new TypeError(\"Super expression must either be null or a function\");t.prototype=Object.create(e&&e.prototype,{constructor:{value:t,writable:!0,configurable:!0}}),Object.defineProperty(t,\"prototype\",{writable:!1}),e&&a(t,e)}function i(t){return i=Object.setPrototypeOf?Object.getPrototypeOf.bind():function(t){return t.__proto__||Object.getPrototypeOf(t)},i(t)}function a(t,e){return a=Object.setPrototypeOf?Object.setPrototypeOf.bind():function(t,e){return t.__proto__=e,t},a(t,e)}function c(){if(\"undefined\"==typeof Reflect||!Reflect.construct)return!1;if(Reflect.construct.sham)return!1;if(\"function\"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],(function(){}))),!0}catch(t){return!1}}function u(t,e,r){return u=c()?Reflect.construct.bind():function(t,e,r){var n=[null];n.push.apply(n,e);var o=new(Function.bind.apply(t,n));return r&&a(o,r.prototype),o},u.apply(null,arguments)}function l(t){var e=\"function\"==typeof Map?new Map:void 0;return l=function(t){if(null===t||!function(t){try{return-1!==Function.toString.call(t).indexOf(\"[native code]\")}catch(e){return\"function\"==typeof t}}(t))return t;if(\"function\"!=typeof t)throw new TypeError(\"Super expression must either be null or a function\");if(void 0!==e){if(e.has(t))return e.get(t);e.set(t,r)}function r(){return u(t,arguments,i(this).constructor)}return r.prototype=Object.create(t.prototype,{constructor:{value:r,enumerable:!1,writable:!0,configurable:!0}}),a(r,t)},l(t)}function f(t,e){if(e&&(\"object\"===_typeof(e)||\"function\"==typeof e))return e;if(void 0!==e)throw new TypeError(\"Derived constructors may only return object or undefined\");return function(t){if(void 0===t)throw new ReferenceError(\"this hasn't been initialised - super() hasn't been called\");return t}(t)}var s,h,d,p,y=function(){var a,u=(a=t().mark((function e(a){var u,s,h,d,p,y,m,v,g,b,w,x,E,L,O,_;return t().wrap((function(t){for(;;)switch(t.prev=t.next){case 0:u=function(t){o(d,t);var e,a,u,l,s,h=(e=d,a=c(),function(){var t,r=i(e);if(a){var n=i(this).constructor;t=Reflect.construct(r,arguments,n)}else t=r.apply(this,arguments);return f(this,t)});function d(){var t;r(this,d);var e=(t=h.call(this)).attachShadow({mode:\"open\"}),n=document.createElement(\"style\");return n.textContent=\"\\n      :host {\\n        display: block;\\n        line-height: initial;\\n        font-size: 16px;\\n      }\\n      div.diagram {\\n        margin: 0;\\n        overflow: visible;\\n      }\",e.appendChild(n),t}return u=d,l&&n(u.prototype,l),s&&n(u,s),Object.defineProperty(u,\"prototype\",{writable:!1}),u}(l(HTMLElement)),void 0===customElements.get(\"diagram-div\")&&customElements.define(\"diagram-div\",u),s=function(t){for(var e=\"\",r=0;r<t.childNodes.length;r++){var n=t.childNodes[r];if(\"code\"===n.tagName.toLowerCase())for(var o=0;o<n.childNodes.length;o++){var i=n.childNodes[o];if(\"#text\"===i.nodeName&&!/^\\s*$/.test(i.nodeValue)){e=i.nodeValue;break}}}return e},h={startOnLoad:!1,theme:\"default\",flowchart:{htmlLabels:!1},er:{useMaxWidth:!1},sequence:{useMaxWidth:!1,noteFontWeight:\"14px\",actorFontSize:\"14px\",messageFontSize:\"16px\"}},mermaid.mermaidAPI.globalReset(),d=null;try{d=document.querySelector(\"[data-md-color-scheme]\").getAttribute(\"data-md-color-scheme\")}catch(t){d=\"default\"}p=\"undefined\"==typeof mermaidConfig?h:mermaidConfig[d]||mermaidConfig.default||h,mermaid.initialize(p),y=document.querySelectorAll(\"pre.\".concat(a,\", diagram-div\")),m=document.querySelector(\"html body\"),v=0;case 12:if(!(v<y.length)){t.next=47;break}return g=y[v],b=\"diagram-div\"===g.tagName.toLowerCase()?g.shadowRoot.querySelector(\"pre.\".concat(a)):g,(w=document.createElement(\"div\")).style.visibility=\"hidden\",w.style.display=\"display\",w.style.padding=\"0\",w.style.margin=\"0\",w.style.lineHeight=\"initial\",w.style.fontSize=\"16px\",m.appendChild(w),t.prev=23,t.next=26,mermaid.render(\"_diagram_\".concat(v),s(b),w);case 26:x=t.sent,E=x.svg,L=x.bindFunctions,(O=document.createElement(\"div\")).className=a,O.innerHTML=E,L&&L(O),(_=document.createElement(\"diagram-div\")).shadowRoot.appendChild(O),g.parentNode.insertBefore(_,g),b.style.display=\"none\",_.shadowRoot.appendChild(b),b!==g&&g.parentNode.removeChild(g),t.next=43;break;case 41:t.prev=41,t.t0=t.catch(23);case 43:m.contains(w)&&m.removeChild(w);case 44:v++,t.next=12;break;case 47:case\"end\":return t.stop()}}),e,null,[[23,41]])})),function(){var t=this,r=arguments;return new Promise((function(n,o){var i=a.apply(t,r);function c(t){e(i,n,o,c,u,\"next\",t)}function u(t){e(i,n,o,c,u,\"throw\",t)}c(void 0)}))});return function(t){return u.apply(this,arguments)}}(),m=function(t,e){if(\"katex\"===e)for(var r=document.querySelectorAll(\".\".concat(t)),n=0;n<r.length;n++){var o=r[n].textContent||r[n].innerText;o.startsWith(\"\\\\(\")&&o.endsWith(\"\\\\)\")?katex.render(o.slice(2,-2),r[n],{displayMode:!1}):o.startsWith(\"\\\\[\")&&o.endsWith(\"\\\\]\")&&katex.render(o.slice(2,-2),r[n],{displayMode:!0})}else\"mathjax\"===e&&(MathJax.startup.output.clearCache(),MathJax.typesetClear(),MathJax.texReset(),MathJax.typesetPromise())};s=Promise.resolve(),h=Promise.resolve(),d=new MutationObserver((function(t){t.forEach((function(t){if(\"attributes\"===t.type){var e=t.target.getAttribute(\"data-md-color-scheme\");e||(e=\"default\"),localStorage.setItem(\"data-md-color-scheme\",e),\"undefined\"!=typeof mermaid&&y(\"diagram\")}}))})),p=function(){d.observe(document.querySelector(\"body\"),{attributeFilter:[\"data-md-color-scheme\"]}),\"undefined\"!=typeof mermaid&&(s=s.then((function(){y(\"diagram\")})).catch((function(t){console.log(\"UML loading failed...\".concat(t))}))),\"undefined\"!=typeof katex?h=h.then((function(){m(\"arithmatex\",\"katex\")})).catch((function(t){console.log(\"Math loading failed...\".concat(t))})):\"undefined\"!=typeof MathJax&&\"typesetPromise\"in MathJax&&(h=h.then((function(){m(\"arithmatex\",\"mathjax\")})).catch((function(t){console.log(\"Math loading failed...\".concat(t))})))},window.document$?window.document$.subscribe(p):document.addEventListener(\"DOMContentLoaded\",p)}();\n//# sourceMappingURL=extra-loader-MCFnu0Wd.js.map\n"
  },
  {
    "path": "docs/theme/assets/pymdownx-extras/material-extra-3rdparty-E-i8w1WA.js",
    "content": "!function(){\"use strict\";\"mathjaxConfig\"in window||(window.MathJax={tex:{inlineMath:[[\"\\\\(\",\"\\\\)\"]],displayMath:[[\"\\\\[\",\"\\\\]\"]],processEscapes:!0,processEnvironments:!0,tagSide:\"right\",tagIndent:\".8em\",multlineWidth:\"85%\",tags:\"ams\"},options:{ignoreHtmlClass:\".*\",processHtmlClass:\"arithmatex\"}}),\"mermaidConfig\"in window||(window.mermaidConfig={dracula:{startOnLoad:!1,theme:\"base\",themeCSS:\"        * {          --drac-page-bg: hsl(233, 15%, 23%);          --drac-white-fg: hsl(60, 30%, 96%);          --drac-purple-fg: hsl(265, 89%, 78%);          --drac-purple-bg: hsl(265, 25%, 39%);          --drac-yellow-fg: hsl(65, 92%, 76%);          --drac-blue-fg: hsl(225, 27%, 51%);        }                /* General */        [id^='_diagram'] {          background-color: var(--drac-page-bg);        }                /* Entity Relationship */        rect.relationshipLabelBox {          opacity: 0.75 !important;          fill: var(--drac-purple-bg) !important;        }        defs marker#ZERO_OR_MORE_END circle {          fill: var(--drac-page-bg) !important;          stroke: var(--drac-purple-fg) !important;        }        defs marker#ZERO_OR_MORE_END path {          stroke: var(--drac-purple-fg) !important;        }        defs marker#ZERO_OR_MORE_START circle{          fill: var(--drac-page-bg) !important;          stroke: var(--drac-purple-fg) !important;        }        defs marker#ZERO_OR_MORE_START path {          stroke: var(--drac-purple-fg) !important;        }        defs marker#ONLY_ONE_START path {          stroke: var(--drac-purple-fg) !important;        }        defs marker#ONLY_ONE_END path {          stroke: var(--drac-purple-fg) !important;        }        defs marker#ZERO_OR_ONE_START path {          stroke: var(--drac-purple-fg) !important;        }        defs marker#ZERO_OR_ONE_END path {          stroke: var(--drac-purple-fg) !important;        }        defs marker#ONE_OR_MORE_START path {          stroke: var(--drac-purple-fg) !important;        }        defs marker#ONE_OR_MORE_END path {          stroke: var(--drac-purple-fg) !important;        }                /* Flowchart */        .labelText,        :not(.branchLabel) > .label text {          fill: var(--drac-purple-fg);        }        .edgeLabel text {          fill: var(--drac-purple-fg) !important;        }        .edgeLabel rect {          opacity: 0.75 !important;          fill: var(--drac-purple-bg) !important;        }                .grey rect.label-container {           fill: var(--drac-purple-bg) !important;          stroke: var(--drac-purple-fg) !important;        }         /* Sequence */        line[id^='actor'] {          stroke: var(--drac-blue-fg);        }        .noteText {          fill: var(--drac-yellow-fg);        }                /* Gantt */        .sectionTitle {          fill: var(--drac-purple-fg) !important;        }                .grid .tick line {          stroke: var(--drac-blue-fg) !important;        }                .grid .tick text {          fill: var(--drac-purple-fg);        }                /* Class Diagram */        .statediagram-state rect.divider {          fill: transparent !important;        }                /* State Diagram */        .stateGroup circle[style$=\\\"fill: black;\\\"] {          fill: var(--drac-purple-bg) !important;          stroke: var(--drac-purple-bg) !important;        }                .stateGroup circle[style$=\\\"fill: white;\\\"] {          fill: var(--drac-purple-bg) !important;          stroke: var(--drac-purple-fg) !important;        }                .stateGroup .composit {          fill: var(--drac-page-bg);        }        /* Pie */        text.slice {          fill: var(--drac-white-fg) !important;        }        /* Git Graph */        .commit-bullets .commit-reverse,        .commit-bullets .commit-merge,         .commit-bullets .commit-highlight-inner {          fill: var(--drac-page-bg) !important;          stroke: var(--drac-page-bg) !important;        }        \",themeVariables:{darkMode:!0,background:\"#323443\",mainBkg:\"#604b7d\",textColor:\"#bf95f9\",lineColor:\"#bf95f9\",errorBkgColor:\"#802c2c\",errorTextColor:\"#ff5757\",primaryColor:\"#604b7d\",primaryTextColor:\"#bf95f9\",primaryBorderColor:\"#bf95f9\",secondaryColor:\"#297d3e\",secondaryTextColor:\"#52fa7c\",secondaryBorderColor:\"#52fa7c\",tertiaryColor:\"#303952\",tertiaryTextColor:\"#6071a4\",tertiaryBorderColor:\"#6071a4\",noteBkgColor:\"#797d45\",noteTextColor:\"#f1fa89\",noteBorderColor:\"#f1fa89\",edgeLabelBackground:\"#604b7d\",edgeLabelText:\"#604b7d\",actorLineColor:\"#6071a4\",activeTaskBkgColor:\"#803d63\",activeTaskBorderColor:\"#ff7ac6\",doneTaskBkgColor:\"#297d3e\",doneTaskBorderColor:\"#52fa7c\",critBkgColor:\"#802c2c\",critBorderColor:\"#ff5757\",taskTextColor:\"#bf95f9\",taskTextOutsideColor:\"#bf95f9\",taskTextLightColor:\"#bf95f9\",sectionBkgColor:\"#bf95f9b3\",sectionBkgColor2:\"#bf95f966\",altSectionBkgColor:\"#323443\",todayLineColor:\"#ff7ac6\",gridColor:\"#6071a4\",defaultLinkColor:\"#8be8fd\",altBackground:\"#bf95f9\",classText:\"#bf95f9\",fillType0:\"#406080\",fillType1:\"#46747f\",fillType2:\"#297d3e\",fillType3:\"#805c36\",fillType4:\"#803d63\",fillType5:\"#604b7d\",fillType6:\"#802c2c\",fillType7:\"#797d45\",fillType8:\"#7c7c79\",git0:\"#ff5555\",git1:\"#ffb86c\",git2:\"#f1fa8c\",git3:\"#50fa7b\",git4:\"#8be9fd\",git5:\"#809fff\",git6:\"#ff79c6\",git7:\"#bd93f9\",gitInv0:\"#ff5555\",gitInv1:\"#ffb86c\",gitInv2:\"#f1fa8c\",gitInv3:\"#50fa7b\",gitInv4:\"#8be9fd\",gitInv5:\"#809fff\",gitInv6:\"#ff79c6\",gitInv7:\"#bd93f9\",gitBranchLabel0:\"#323443\",gitBranchLabel1:\"#323443\",gitBranchLabel2:\"#323443\",gitBranchLabel3:\"#323443\",gitBranchLabel4:\"#323443\",gitBranchLabel5:\"#323443\",gitBranchLabel6:\"#323443\",gitBranchLabel7:\"#323443\",commitLabelColor:\"#52fa7c\",commitLabelBackground:\"#297d3e\"},flowchart:{htmlLabels:!1,useMaxWidth:!1},er:{useMaxWidth:!1},sequence:{useMaxWidth:!1,noteFontWeight:\"14px\",actorFontSize:\"14px\",messageFontSize:\"16px\"},journey:{useMaxWidth:!1},pie:{useMaxWidth:!1},gantt:{useMaxWidth:!1},gitGraph:{useMaxWidth:!1}},default:{startOnLoad:!1,theme:\"default\",flowchart:{htmlLabels:!1,useMaxWidth:!1},er:{useMaxWidth:!1},sequence:{useMaxWidth:!1,noteFontWeight:\"14px\",actorFontSize:\"14px\",messageFontSize:\"16px\"},journey:{useMaxWidth:!1},pie:{useMaxWidth:!1},gantt:{useMaxWidth:!1},gitGraph:{useMaxWidth:!1}},slate:{startOnLoad:!1,theme:\"dark\",flowchart:{htmlLabels:!1,useMaxWidth:!1},er:{useMaxWidth:!1},sequence:{useMaxWidth:!1,noteFontWeight:\"14px\",actorFontSize:\"14px\",messageFontSize:\"16px\"},journey:{useMaxWidth:!1},pie:{useMaxWidth:!1},gantt:{useMaxWidth:!1},gitGraph:{useMaxWidth:!1}}})}();\n//# sourceMappingURL=material-extra-3rdparty-E-i8w1WA.js.map\n"
  },
  {
    "path": "docs/theme/assets/pymdownx-extras/material-extra-theme-TVq-kNRT.js",
    "content": "!function(){\"use strict\";var e;e=function(e){\"true\"===localStorage.getItem(\"data-md-prefers-color-scheme\")&&document.querySelector(\"body\").setAttribute(\"data-md-color-scheme\",e.matches?\"dracula\":\"default\")},new MutationObserver((function(t){t.forEach((function(t){if(\"childList\"===t.type&&t.addedNodes.length)for(var a=0;a<t.addedNodes.length;a++){var r=t.addedNodes[a];if(1===r.nodeType&&\"body\"===r.tagName.toLowerCase()){d=r,o=void 0,c=void 0,l=void 0,o=\"not all\"!==window.matchMedia(\"(prefers-color-scheme)\").media,c=localStorage.getItem(\"data-md-color-scheme\"),l=localStorage.getItem(\"data-md-prefers-color-scheme\"),c||(c=\"dracula\"),l||(l=\"false\"),\"true\"===l&&o?c=window.matchMedia(\"(prefers-color-scheme: dark)\").matches?\"dracula\":\"default\":l=\"false\",d.setAttribute(\"data-md-prefers-color-scheme\",l),d.setAttribute(\"data-md-color-scheme\",c),o&&window.matchMedia(\"(prefers-color-scheme: dark)\").addListener(e);break}}var d,o,c,l}))})).observe(document.querySelector(\"html\"),{childList:!0}),window.toggleScheme=function(){var e=document.querySelector(\"body\"),t=\"not all\"!==window.matchMedia(\"(prefers-color-scheme)\").media,a=e.getAttribute(\"data-md-color-scheme\"),r=e.getAttribute(\"data-md-prefers-color-scheme\");t&&\"default\"===a&&\"true\"!==r?(r=\"true\",a=window.matchMedia(\"(prefers-color-scheme: dark)\").matches?\"dracula\":\"default\"):t&&\"true\"===r?(r=\"false\",a=\"dracula\"):\"dracula\"===a?(r=\"false\",a=\"default\"):(r=\"false\",a=\"dracula\"),localStorage.setItem(\"data-md-prefers-color-scheme\",r),e.setAttribute(\"data-md-prefers-color-scheme\",r),e.setAttribute(\"data-md-color-scheme\",a)}}();\n//# sourceMappingURL=material-extra-theme-TVq-kNRT.js.map\n"
  },
  {
    "path": "docs/theme/main.html",
    "content": "{% extends \"base.html\" %}\n\n{% block libs %}\n{{ super() }}\n{% include \"partials/libs.html\" ignore missing %}\n{% endblock %}\n"
  },
  {
    "path": "docs/theme/partials/footer.html",
    "content": "\n{% import \"partials/language.html\" as lang with context %}\n<footer class=\"md-footer\">\n  {% if page.previous_page or page.next_page %}\n    <nav\n      class=\"md-footer__inner md-grid\"\n      aria-label=\"{{ lang.t('footer.title') }}\"\n    >\n      {% if page.previous_page %}\n        <a\n          href=\"{{ page.previous_page.url | url }}\"\n          class=\"md-footer__link md-footer__link--prev\"\n          rel=\"prev\"\n        >\n          <div class=\"md-footer__button md-icon\">\n            {% include \".icons/material/arrow-left.svg\" %}\n          </div>\n          <div class=\"md-footer__title\">\n            <div class=\"md-ellipsis\">\n              <span class=\"md-footer__direction\">\n                {{ lang.t(\"footer.previous\") }}\n              </span>\n              {{ page.previous_page.title }}\n            </div>\n          </div>\n        </a>\n      {% endif %}\n      {% if page.next_page %}\n        <a\n          href=\"{{ page.next_page.url | url }}\"\n          class=\"md-footer__link md-footer__link--next\"\n          rel=\"next\"\n        >\n          <div class=\"md-footer__title\">\n            <div class=\"md-ellipsis\">\n              <span class=\"md-footer__direction\">\n                {{ lang.t(\"footer.next\") }}\n              </span>\n              {{ page.next_page.title }}\n            </div>\n          </div>\n          <div class=\"md-footer__button md-icon\">\n            {% include \".icons/material/arrow-right.svg\" %}\n          </div>\n        </a>\n      {% endif %}\n    </nav>\n  {% endif %}\n</footer>\n"
  },
  {
    "path": "docs/theme/partials/header.html",
    "content": "\n{% set site_url = config.site_url | d(nav.homepage.url, true) | url %}\n{% if not config.use_directory_urls and site_url[0] == site_url[-1] == \".\" %}\n  {% set site_url = site_url ~ \"/index.html\" %}\n{% endif %}\n<header class=\"md-header\" data-md-component=\"header\">\n  <nav\n    class=\"md-header__inner md-grid\"\n    aria-label=\"{{ lang.t('header.title') }}\"\n  >\n    <a\n      href=\"{{ site_url }}\"\n      title=\"{{ config.site_name | e }}\"\n      class=\"md-header__button md-logo\"\n      aria-label=\"{{ config.site_name }}\"\n    >\n      {% include \"partials/logo.html\" %}\n    </a>\n    <label class=\"md-header__button md-icon\" for=\"__drawer\">\n      {% include \".icons/material/menu\" ~ \".svg\" %}\n    </label>\n    <div class=\"md-header__title\" data-md-component=\"header-title\">\n      <div class=\"md-header__ellipsis\">\n        <div class=\"md-header__topic\">\n          <span class=\"md-ellipsis\">\n            {{ config.site_name }}\n          </span>\n        </div>\n        <div class=\"md-header__topic\" data-md-component=\"header-topic\">\n          <span class=\"md-ellipsis\">\n            {% if page and page.meta and page.meta.title %}\n              {{ page.meta.title }}\n            {% else %}\n              {{ page.title }}\n            {% endif %}\n          </span>\n        </div>\n      </div>\n    </div>\n    <div class=\"md-header__options\">\n      <div class=\"md-header-nav__scheme md-header-nav__button md-source__icon md-icon\">\n          <a\n            href=\"javascript:toggleScheme();\"\n            title=\"Light mode\"\n            class=\"light-mode\"\n          >\n          {% set icon = \"material/weather-sunny\" %}\n          {% include \".icons/\" ~ icon ~ \".svg\" %}\n          </a>\n          <a\n            href=\"javascript:toggleScheme();\"\n            title=\"Dark mode\"\n            class=\"dark-mode\"\n          >\n          {% set icon = \"material/weather-night\" %}\n          {% include \".icons/\" ~ icon ~ \".svg\" %}\n          </a>\n          <a\n            href=\"javascript:toggleScheme();\"\n            title=\"System preference\"\n            class=\"system-mode\"\n          >\n          {% set icon = \"material/theme-light-dark\" %}\n          {% include \".icons/\" ~ icon ~ \".svg\" %}\n          </a>\n          <!-- <a\n            href=\"javascript:toggleScheme();\"\n            title=\"Unknown scheme\"\n            class=\"unknown-mode\"\n          >\n          {% set icon = \"material/help-circle\" %}\n          {% include \".icons/\" ~ icon ~ \".svg\" %}\n          </a> -->\n      </div>\n    </div>\n    {% if \"material/search\" in config.plugins %}\n      <label class=\"md-header__button md-icon\" for=\"__search\">\n        {% include \".icons/material/magnify.svg\" %}\n      </label>\n      {% include \"partials/search.html\" %}\n    {% endif %}\n    {% if config.repo_url %}\n      <div class=\"md-header__source\">\n        {% include \"partials/source.html\" %}\n      </div>\n    {% endif %}\n  </nav>\n</header>\n"
  },
  {
    "path": "docs/theme/partials/libs.html",
    "content": "<script src=\"{{ 'assets/pymdownx-extras/material-extra-theme-TVq-kNRT.js' | url }}\" type=\"text/javascript\"></script>\n<script src=\"{{ 'assets/pymdownx-extras/material-extra-3rdparty-E-i8w1WA.js' | url }}\" type=\"text/javascript\"></script>\n"
  },
  {
    "path": "docs/usage.md",
    "content": "## 1. Add your AI models\n\n![resources tab](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/resources-tab.png)\n\n- The tool uses Large Language Model (LLMs) to perform various tasks in a QA pipeline.\n  So, you need to provide the application with access to the LLMs you want\n  to use.\n- You only need to provide at least one. However, it is recommended that you include all the LLMs\n  that you have access to, you will be able to switch between them while using the\n  application.\n\nTo add a model:\n\n1. Navigate to the `Resources` tab.\n2. Select the `LLMs` sub-tab.\n3. Select the `Add` sub-tab.\n4. Config the model to add:\n   - Give it a name.\n   - Pick a vendor/provider (e.g. `ChatOpenAI`).\n   - Provide the specifications.\n   - (Optional) Set the model as default.\n5. Click `Add` to add the model.\n6. Select `Embedding Models` sub-tab and repeat the step 3 to 5 to add an embedding model.\n\n<details markdown>\n\n<summary>(Optional) Configure model via the .env file</summary>\n\nAlternatively, you can configure the models via the `.env` file with the information needed to connect to the LLMs. This file is located in\nthe folder of the application. If you don't see it, you can create one.\n\nCurrently, the following providers are supported:\n\n### OpenAI\n\nIn the `.env` file, set the `OPENAI_API_KEY` variable with your OpenAI API key in order\nto enable access to OpenAI's models. There are other variables that can be modified,\nplease feel free to edit them to fit your case. Otherwise, the default parameter should\nwork for most people.\n\n```shell\nOPENAI_API_BASE=https://api.openai.com/v1\nOPENAI_API_KEY=<your OpenAI API key here>\nOPENAI_CHAT_MODEL=gpt-3.5-turbo\nOPENAI_EMBEDDINGS_MODEL=text-embedding-ada-002\n```\n\n### Azure OpenAI\n\nFor OpenAI models via Azure platform, you need to provide your Azure endpoint and API\nkey. Your might also need to provide your developments' name for the chat model and the\nembedding model depending on how you set up Azure development.\n\n```shell\nAZURE_OPENAI_ENDPOINT=\nAZURE_OPENAI_API_KEY=\nOPENAI_API_VERSION=2024-02-15-preview # could be different for you\nAZURE_OPENAI_CHAT_DEPLOYMENT=gpt-35-turbo # change to your deployment name\nAZURE_OPENAI_EMBEDDINGS_DEPLOYMENT=text-embedding-ada-002 # change to your deployment name\n```\n\n### Local models\n\nPros:\n\n- Privacy. Your documents will be stored and process locally.\n- Choices. There are a wide range of LLMs in terms of size, domain, language to choose\n  from.\n- Cost. It's free.\n\nCons:\n\n- Quality. Local models are much smaller and thus have lower generative quality than\n  paid APIs.\n- Speed. Local models are deployed using your machine so the processing speed is\n  limited by your hardware.\n\n#### Find and download a LLM\n\nYou can search and download a LLM to be ran locally from the [Hugging Face\nHub](https://huggingface.co/models). Currently, these model formats are supported:\n\n- GGUF\n\nYou should choose a model whose size is less than your device's memory and should leave\nabout 2 GB. For example, if you have 16 GB of RAM in total, of which 12 GB is available,\nthen you should choose a model that take up at most 10 GB of RAM. Bigger models tend to\ngive better generation but also take more processing time.\n\nHere are some recommendations and their size in memory:\n\n- [Qwen1.5-1.8B-Chat-GGUF](https://huggingface.co/Qwen/Qwen1.5-1.8B-Chat-GGUF/resolve/main/qwen1_5-1_8b-chat-q8_0.gguf?download=true):\n  around 2 GB\n\n#### Enable local models\n\nTo add a local model to the model pool, set the `LOCAL_MODEL` variable in the `.env`\nfile to the path of the model file.\n\n```shell\nLOCAL_MODEL=<full path to your model file>\n```\n\nHere is how to get the full path of your model file:\n\n- On Windows 11: right click the file and select `Copy as Path`.\n</details>\n\n## 2. Upload your documents\n\n![file index tab](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/file-index-tab.png)\n\nIn order to do QA on your documents, you need to upload them to the application first.\nNavigate to the `File Index` tab and you will see 2 sections:\n\n1. File upload:\n   - Drag and drop your file to the UI or select it from your file system.\n     Then click `Upload and Index`.\n   - The application will take some time to process the file and show a message once it is done.\n2. File list:\n   - This section shows the list of files that have been uploaded to the application and allows users to delete them.\n\n## 3. Chat with your documents\n\n![chat tab](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/chat-tab.png)\n\nNow navigate back to the `Chat` tab. The chat tab is divided into 3 regions:\n\n1. Conversation Settings Panel\n   - Here you can select, create, rename, and delete conversations.\n     - By default, a new conversation is created automatically if no conversation is selected.\n   - Below that you have the file index, where you can choose whether to disable, select all files, or select which files to retrieve references from.\n     - If you choose \"Disabled\", no files will be considered as context during chat.\n     - If you choose \"Search All\", all files will be considered during chat.\n     - If you choose \"Select\", a dropdown will appear for you to select the\n       files to be considered during chat. If no files are selected, then no\n       files will be considered during chat.\n2. Chat Panel\n   - This is where you can chat with the chatbot.\n3. Information Panel\n\n![information panel](https://raw.githubusercontent.com/Cinnamon/kotaemon/develop/docs/images/info-panel-scores.png)\n\n- Supporting information such as the retrieved evidence and reference will be\n  displayed here.\n- Direct citation for the answer produced by the LLM is highlighted.\n- The confidence score of the answer and relevant scores of evidences are displayed to quickly assess the quality of the answer and retrieved content.\n\n- Meaning of the score displayed:\n  - **Answer confidence**: answer confidence level from the LLM model.\n  - **Relevance score**: overall relevant score between evidence and user question.\n  - **Vectorstore score**: relevant score from vector embedding similarity calculation (show `full-text search` if retrieved from full-text search DB).\n  - **LLM relevant score**: relevant score from LLM model (which judge relevancy between question and evidence using specific prompt).\n  - **Reranking score**: relevant score from Cohere [reranking model](https://cohere.com/rerank).\n\nGenerally, the score quality is `LLM relevant score` > `Reranking score` > `Vectorscore`.\nBy default, overall relevance score is taken directly from LLM relevant score. Evidences are sorted based on their overall relevance score and whether they have citation or not.\n"
  },
  {
    "path": "flowsettings.py",
    "content": "import os\nfrom importlib.metadata import version\nfrom inspect import currentframe, getframeinfo\nfrom pathlib import Path\n\nfrom decouple import config\nfrom ktem.utils.lang import SUPPORTED_LANGUAGE_MAP\nfrom theflow.settings.default import *  # noqa\n\ncur_frame = currentframe()\nif cur_frame is None:\n    raise ValueError(\"Cannot get the current frame.\")\nthis_file = getframeinfo(cur_frame).filename\nthis_dir = Path(this_file).parent\n\n# change this if your app use a different name\nKH_PACKAGE_NAME = \"kotaemon_app\"\n\nKH_APP_VERSION = config(\"KH_APP_VERSION\", None)\nif not KH_APP_VERSION:\n    try:\n        # Caution: This might produce the wrong version\n        # https://stackoverflow.com/a/59533071\n        KH_APP_VERSION = version(KH_PACKAGE_NAME)\n    except Exception:\n        KH_APP_VERSION = \"local\"\n\nKH_GRADIO_SHARE = config(\"KH_GRADIO_SHARE\", default=False, cast=bool)\nKH_ENABLE_FIRST_SETUP = config(\"KH_ENABLE_FIRST_SETUP\", default=True, cast=bool)\nKH_DEMO_MODE = config(\"KH_DEMO_MODE\", default=False, cast=bool)\nKH_OLLAMA_URL = config(\"KH_OLLAMA_URL\", default=\"http://localhost:11434/v1/\")\n\n# App can be ran from anywhere and it's not trivial to decide where to store app data.\n# So let's use the same directory as the flowsetting.py file.\nKH_APP_DATA_DIR = this_dir / \"ktem_app_data\"\nKH_APP_DATA_EXISTS = KH_APP_DATA_DIR.exists()\nKH_APP_DATA_DIR.mkdir(parents=True, exist_ok=True)\n\n# User data directory\nKH_USER_DATA_DIR = KH_APP_DATA_DIR / \"user_data\"\nKH_USER_DATA_DIR.mkdir(parents=True, exist_ok=True)\n\n# markdown output directory\nKH_MARKDOWN_OUTPUT_DIR = KH_APP_DATA_DIR / \"markdown_cache_dir\"\nKH_MARKDOWN_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)\n\n# chunks output directory\nKH_CHUNKS_OUTPUT_DIR = KH_APP_DATA_DIR / \"chunks_cache_dir\"\nKH_CHUNKS_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)\n\n# zip output directory\nKH_ZIP_OUTPUT_DIR = KH_APP_DATA_DIR / \"zip_cache_dir\"\nKH_ZIP_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)\n\n# zip input directory\nKH_ZIP_INPUT_DIR = KH_APP_DATA_DIR / \"zip_cache_dir_in\"\nKH_ZIP_INPUT_DIR.mkdir(parents=True, exist_ok=True)\n\n# HF models can be big, let's store them in the app data directory so that it's easier\n# for users to manage their storage.\n# ref: https://huggingface.co/docs/huggingface_hub/en/guides/manage-cache\nos.environ[\"HF_HOME\"] = str(KH_APP_DATA_DIR / \"huggingface\")\nos.environ[\"HF_HUB_CACHE\"] = str(KH_APP_DATA_DIR / \"huggingface\")\n\n# doc directory\nKH_DOC_DIR = this_dir / \"docs\"\n\nKH_MODE = \"dev\"\nKH_SSO_ENABLED = config(\"KH_SSO_ENABLED\", default=False, cast=bool)\n\nKH_FEATURE_CHAT_SUGGESTION = config(\n    \"KH_FEATURE_CHAT_SUGGESTION\", default=False, cast=bool\n)\nKH_FEATURE_USER_MANAGEMENT = config(\n    \"KH_FEATURE_USER_MANAGEMENT\", default=True, cast=bool\n)\nKH_USER_CAN_SEE_PUBLIC = None\nKH_FEATURE_USER_MANAGEMENT_ADMIN = str(\n    config(\"KH_FEATURE_USER_MANAGEMENT_ADMIN\", default=\"admin\")\n)\nKH_FEATURE_USER_MANAGEMENT_PASSWORD = str(\n    config(\"KH_FEATURE_USER_MANAGEMENT_PASSWORD\", default=\"admin\")\n)\nKH_ENABLE_ALEMBIC = False\nKH_DATABASE = f\"sqlite:///{KH_USER_DATA_DIR / 'sql.db'}\"\nKH_FILESTORAGE_PATH = str(KH_USER_DATA_DIR / \"files\")\nKH_WEB_SEARCH_BACKEND = (\n    \"kotaemon.indices.retrievers.tavily_web_search.WebSearch\"\n    # \"kotaemon.indices.retrievers.jina_web_search.WebSearch\"\n)\n\nKH_DOCSTORE = {\n    # \"__type__\": \"kotaemon.storages.ElasticsearchDocumentStore\",\n    # \"__type__\": \"kotaemon.storages.SimpleFileDocumentStore\",\n    \"__type__\": \"kotaemon.storages.LanceDBDocumentStore\",\n    \"path\": str(KH_USER_DATA_DIR / \"docstore\"),\n}\nKH_VECTORSTORE = {\n    # \"__type__\": \"kotaemon.storages.LanceDBVectorStore\",\n    \"__type__\": \"kotaemon.storages.ChromaVectorStore\",\n    # \"__type__\": \"kotaemon.storages.MilvusVectorStore\",\n    # \"__type__\": \"kotaemon.storages.QdrantVectorStore\",\n    \"path\": str(KH_USER_DATA_DIR / \"vectorstore\"),\n}\nKH_LLMS = {}\nKH_EMBEDDINGS = {}\nKH_RERANKINGS = {}\n\n# populate options from config\nif config(\"AZURE_OPENAI_API_KEY\", default=\"\") and config(\n    \"AZURE_OPENAI_ENDPOINT\", default=\"\"\n):\n    if config(\"AZURE_OPENAI_CHAT_DEPLOYMENT\", default=\"\"):\n        KH_LLMS[\"azure\"] = {\n            \"spec\": {\n                \"__type__\": \"kotaemon.llms.AzureChatOpenAI\",\n                \"temperature\": 0,\n                \"azure_endpoint\": config(\"AZURE_OPENAI_ENDPOINT\", default=\"\"),\n                \"api_key\": config(\"AZURE_OPENAI_API_KEY\", default=\"\"),\n                \"api_version\": config(\"OPENAI_API_VERSION\", default=\"\")\n                or \"2024-02-15-preview\",\n                \"azure_deployment\": config(\"AZURE_OPENAI_CHAT_DEPLOYMENT\", default=\"\"),\n                \"timeout\": 20,\n            },\n            \"default\": False,\n        }\n    if config(\"AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT\", default=\"\"):\n        KH_EMBEDDINGS[\"azure\"] = {\n            \"spec\": {\n                \"__type__\": \"kotaemon.embeddings.AzureOpenAIEmbeddings\",\n                \"azure_endpoint\": config(\"AZURE_OPENAI_ENDPOINT\", default=\"\"),\n                \"api_key\": config(\"AZURE_OPENAI_API_KEY\", default=\"\"),\n                \"api_version\": config(\"OPENAI_API_VERSION\", default=\"\")\n                or \"2024-02-15-preview\",\n                \"azure_deployment\": config(\n                    \"AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT\", default=\"\"\n                ),\n                \"timeout\": 10,\n            },\n            \"default\": False,\n        }\n\nOPENAI_DEFAULT = \"<YOUR_OPENAI_KEY>\"\nOPENAI_API_KEY = config(\"OPENAI_API_KEY\", default=OPENAI_DEFAULT)\nGOOGLE_API_KEY = config(\"GOOGLE_API_KEY\", default=\"your-key\")\nIS_OPENAI_DEFAULT = len(OPENAI_API_KEY) > 0 and OPENAI_API_KEY != OPENAI_DEFAULT\n\nif OPENAI_API_KEY:\n    KH_LLMS[\"openai\"] = {\n        \"spec\": {\n            \"__type__\": \"kotaemon.llms.ChatOpenAI\",\n            \"temperature\": 0,\n            \"base_url\": config(\"OPENAI_API_BASE\", default=\"\")\n            or \"https://api.openai.com/v1\",\n            \"api_key\": OPENAI_API_KEY,\n            \"model\": config(\"OPENAI_CHAT_MODEL\", default=\"gpt-4o-mini\"),\n            \"timeout\": 20,\n        },\n        \"default\": IS_OPENAI_DEFAULT,\n    }\n    KH_EMBEDDINGS[\"openai\"] = {\n        \"spec\": {\n            \"__type__\": \"kotaemon.embeddings.OpenAIEmbeddings\",\n            \"base_url\": config(\"OPENAI_API_BASE\", default=\"https://api.openai.com/v1\"),\n            \"api_key\": OPENAI_API_KEY,\n            \"model\": config(\n                \"OPENAI_EMBEDDINGS_MODEL\", default=\"text-embedding-3-large\"\n            ),\n            \"timeout\": 10,\n            \"context_length\": 8191,\n        },\n        \"default\": IS_OPENAI_DEFAULT,\n    }\n\nVOYAGE_API_KEY = config(\"VOYAGE_API_KEY\", default=\"\")\nif VOYAGE_API_KEY:\n    KH_EMBEDDINGS[\"voyageai\"] = {\n        \"spec\": {\n            \"__type__\": \"kotaemon.embeddings.VoyageAIEmbeddings\",\n            \"api_key\": VOYAGE_API_KEY,\n            \"model\": config(\"VOYAGE_EMBEDDINGS_MODEL\", default=\"voyage-3-large\"),\n        },\n        \"default\": False,\n    }\n    KH_RERANKINGS[\"voyageai\"] = {\n        \"spec\": {\n            \"__type__\": \"kotaemon.rerankings.VoyageAIReranking\",\n            \"model_name\": \"rerank-2\",\n            \"api_key\": VOYAGE_API_KEY,\n        },\n        \"default\": False,\n    }\n\nif config(\"LOCAL_MODEL\", default=\"\"):\n    KH_LLMS[\"ollama\"] = {\n        \"spec\": {\n            \"__type__\": \"kotaemon.llms.ChatOpenAI\",\n            \"base_url\": KH_OLLAMA_URL,\n            \"model\": config(\"LOCAL_MODEL\", default=\"qwen2.5:7b\"),\n            \"api_key\": \"ollama\",\n        },\n        \"default\": False,\n    }\n    KH_LLMS[\"ollama-long-context\"] = {\n        \"spec\": {\n            \"__type__\": \"kotaemon.llms.LCOllamaChat\",\n            \"base_url\": KH_OLLAMA_URL.replace(\"v1/\", \"\"),\n            \"model\": config(\"LOCAL_MODEL\", default=\"qwen2.5:7b\"),\n            \"num_ctx\": 8192,\n        },\n        \"default\": False,\n    }\n\n    KH_EMBEDDINGS[\"ollama\"] = {\n        \"spec\": {\n            \"__type__\": \"kotaemon.embeddings.OpenAIEmbeddings\",\n            \"base_url\": KH_OLLAMA_URL,\n            \"model\": config(\"LOCAL_MODEL_EMBEDDINGS\", default=\"nomic-embed-text\"),\n            \"api_key\": \"ollama\",\n        },\n        \"default\": False,\n    }\n    KH_EMBEDDINGS[\"fast_embed\"] = {\n        \"spec\": {\n            \"__type__\": \"kotaemon.embeddings.FastEmbedEmbeddings\",\n            \"model_name\": \"BAAI/bge-base-en-v1.5\",\n        },\n        \"default\": False,\n    }\n\n# additional LLM configurations\nKH_LLMS[\"claude\"] = {\n    \"spec\": {\n        \"__type__\": \"kotaemon.llms.chats.LCAnthropicChat\",\n        \"model_name\": \"claude-3-5-sonnet-20240620\",\n        \"api_key\": \"your-key\",\n    },\n    \"default\": False,\n}\nKH_LLMS[\"google\"] = {\n    \"spec\": {\n        \"__type__\": \"kotaemon.llms.chats.LCGeminiChat\",\n        \"model_name\": \"gemini-1.5-flash\",\n        \"api_key\": GOOGLE_API_KEY,\n    },\n    \"default\": not IS_OPENAI_DEFAULT,\n}\nKH_LLMS[\"groq\"] = {\n    \"spec\": {\n        \"__type__\": \"kotaemon.llms.ChatOpenAI\",\n        \"base_url\": \"https://api.groq.com/openai/v1\",\n        \"model\": \"llama-3.1-8b-instant\",\n        \"api_key\": \"your-key\",\n    },\n    \"default\": False,\n}\nKH_LLMS[\"cohere\"] = {\n    \"spec\": {\n        \"__type__\": \"kotaemon.llms.chats.LCCohereChat\",\n        \"model_name\": \"command-r-plus-08-2024\",\n        \"api_key\": config(\"COHERE_API_KEY\", default=\"your-key\"),\n    },\n    \"default\": False,\n}\nKH_LLMS[\"mistral\"] = {\n    \"spec\": {\n        \"__type__\": \"kotaemon.llms.ChatOpenAI\",\n        \"base_url\": \"https://api.mistral.ai/v1\",\n        \"model\": \"ministral-8b-latest\",\n        \"api_key\": config(\"MISTRAL_API_KEY\", default=\"your-key\"),\n    },\n    \"default\": False,\n}\n\n# additional embeddings configurations\nKH_EMBEDDINGS[\"cohere\"] = {\n    \"spec\": {\n        \"__type__\": \"kotaemon.embeddings.LCCohereEmbeddings\",\n        \"model\": \"embed-multilingual-v3.0\",\n        \"cohere_api_key\": config(\"COHERE_API_KEY\", default=\"your-key\"),\n        \"user_agent\": \"default\",\n    },\n    \"default\": False,\n}\nKH_EMBEDDINGS[\"google\"] = {\n    \"spec\": {\n        \"__type__\": \"kotaemon.embeddings.LCGoogleEmbeddings\",\n        \"model\": \"models/text-embedding-004\",\n        \"google_api_key\": GOOGLE_API_KEY,\n    },\n    \"default\": not IS_OPENAI_DEFAULT,\n}\nKH_EMBEDDINGS[\"mistral\"] = {\n    \"spec\": {\n        \"__type__\": \"kotaemon.embeddings.LCMistralEmbeddings\",\n        \"model\": \"mistral-embed\",\n        \"api_key\": config(\"MISTRAL_API_KEY\", default=\"your-key\"),\n    },\n    \"default\": False,\n}\n# KH_EMBEDDINGS[\"huggingface\"] = {\n#     \"spec\": {\n#         \"__type__\": \"kotaemon.embeddings.LCHuggingFaceEmbeddings\",\n#         \"model_name\": \"sentence-transformers/all-mpnet-base-v2\",\n#     },\n#     \"default\": False,\n# }\n\n# default reranking models\nKH_RERANKINGS[\"cohere\"] = {\n    \"spec\": {\n        \"__type__\": \"kotaemon.rerankings.CohereReranking\",\n        \"model_name\": \"rerank-multilingual-v2.0\",\n        \"cohere_api_key\": config(\"COHERE_API_KEY\", default=\"\"),\n    },\n    \"default\": True,\n}\n\nKH_REASONINGS = [\n    \"ktem.reasoning.simple.FullQAPipeline\",\n    \"ktem.reasoning.simple.FullDecomposeQAPipeline\",\n    \"ktem.reasoning.react.ReactAgentPipeline\",\n    \"ktem.reasoning.rewoo.RewooAgentPipeline\",\n]\nKH_REASONINGS_USE_MULTIMODAL = config(\"USE_MULTIMODAL\", default=False, cast=bool)\nKH_VLM_ENDPOINT = \"{0}/openai/deployments/{1}/chat/completions?api-version={2}\".format(\n    config(\"AZURE_OPENAI_ENDPOINT\", default=\"\"),\n    config(\"OPENAI_VISION_DEPLOYMENT_NAME\", default=\"gpt-4o\"),\n    config(\"OPENAI_API_VERSION\", default=\"\"),\n)\n\n\nSETTINGS_APP: dict[str, dict] = {}\n\n\nSETTINGS_REASONING = {\n    \"use\": {\n        \"name\": \"Reasoning options\",\n        \"value\": None,\n        \"choices\": [],\n        \"component\": \"radio\",\n    },\n    \"lang\": {\n        \"name\": \"Language\",\n        \"value\": \"en\",\n        \"choices\": [(lang, code) for code, lang in SUPPORTED_LANGUAGE_MAP.items()],\n        \"component\": \"dropdown\",\n    },\n    \"max_context_length\": {\n        \"name\": \"Max context length (LLM)\",\n        \"value\": 32000,\n        \"component\": \"number\",\n    },\n}\n\nUSE_GLOBAL_GRAPHRAG = config(\"USE_GLOBAL_GRAPHRAG\", default=True, cast=bool)\nUSE_NANO_GRAPHRAG = config(\"USE_NANO_GRAPHRAG\", default=False, cast=bool)\nUSE_LIGHTRAG = config(\"USE_LIGHTRAG\", default=True, cast=bool)\nUSE_MS_GRAPHRAG = config(\"USE_MS_GRAPHRAG\", default=True, cast=bool)\n\nGRAPHRAG_INDEX_TYPES = []\n\nif USE_MS_GRAPHRAG:\n    GRAPHRAG_INDEX_TYPES.append(\"ktem.index.file.graph.GraphRAGIndex\")\nif USE_NANO_GRAPHRAG:\n    GRAPHRAG_INDEX_TYPES.append(\"ktem.index.file.graph.NanoGraphRAGIndex\")\nif USE_LIGHTRAG:\n    GRAPHRAG_INDEX_TYPES.append(\"ktem.index.file.graph.LightRAGIndex\")\n\nKH_INDEX_TYPES = [\n    \"ktem.index.file.FileIndex\",\n    *GRAPHRAG_INDEX_TYPES,\n]\n\nGRAPHRAG_INDICES = [\n    {\n        \"name\": graph_type.split(\".\")[-1].replace(\"Index\", \"\")\n        + \" Collection\",  # get last name\n        \"config\": {\n            \"supported_file_types\": (\n                \".png, .jpeg, .jpg, .tiff, .tif, .pdf, .xls, .xlsx, .doc, .docx, \"\n                \".pptx, .csv, .html, .mhtml, .txt, .md, .zip\"\n            ),\n            \"private\": True,\n        },\n        \"index_type\": graph_type,\n    }\n    for graph_type in GRAPHRAG_INDEX_TYPES\n]\n\nKH_INDICES = [\n    {\n        \"name\": \"File Collection\",\n        \"config\": {\n            \"supported_file_types\": (\n                \".png, .jpeg, .jpg, .tiff, .tif, .pdf, .xls, .xlsx, .doc, .docx, \"\n                \".pptx, .csv, .html, .mhtml, .txt, .md, .zip\"\n            ),\n            \"private\": True,\n        },\n        \"index_type\": \"ktem.index.file.FileIndex\",\n    },\n    *GRAPHRAG_INDICES,\n]\n"
  },
  {
    "path": "fly.toml",
    "content": "# fly.toml app configuration file generated for kotaemon on 2024-12-24T20:56:32+07:00\n#\n# See https://fly.io/docs/reference/configuration/ for information about how to use this file.\n#\n\napp = 'kotaemon'\nprimary_region = 'sin'\n\n[build]\n\n[mounts]\n  destination = \"/app/ktem_app_data\"\n  source = \"ktem_volume\"\n\n[http_service]\n  internal_port = 7860\n  force_https = true\n  auto_stop_machines = 'suspend'\n  auto_start_machines = true\n  min_machines_running = 0\n  processes = ['app']\n\n[[vm]]\n  memory = '4gb'\n  cpu_kind = 'shared'\n  cpus = 4\n"
  },
  {
    "path": "launch.sh",
    "content": "#!/bin/bash\n\nif [ -z \"$GRADIO_SERVER_NAME\" ]; then\n    export GRADIO_SERVER_NAME=\"0.0.0.0\"\nfi\nif [ -z \"$GRADIO_SERVER_PORT\" ]; then\n    export GRADIO_SERVER_PORT=\"7860\"\nfi\n\n# Check if environment variable KH_DEMO_MODE is set to true\nif [ \"$KH_DEMO_MODE\" = \"true\" ]; then\n    echo \"KH_DEMO_MODE is true. Launching in demo mode...\"\n    # Command to launch in demo mode\n    GR_FILE_ROOT_PATH=\"/app\" KH_FEATURE_USER_MANAGEMENT=false USE_LIGHTRAG=false .venv/bin/uvicorn sso_app_demo:app --host \"$GRADIO_SERVER_NAME\" --port \"$GRADIO_SERVER_PORT\"\nelse\n    if [ \"$KH_SSO_ENABLED\" = \"true\" ]; then\n        echo \"KH_SSO_ENABLED is true. Launching in SSO mode...\"\n        GR_FILE_ROOT_PATH=\"/app\" KH_SSO_ENABLED=true .venv/bin/uvicorn sso_app:app --host \"$GRADIO_SERVER_NAME\" --port \"$GRADIO_SERVER_PORT\"\n    else\n        ollama serve &\n        .venv/bin/python app.py\n    fi\nfi\n"
  },
  {
    "path": "libs/kotaemon/README.md",
    "content": "# kotaemon\n\nQuick and easy AI components to build Kotaemon\n\n## Documentation\n\nTBD\n\n## Install\n\n```shell\npip install kotaemon@git+ssh://git@github.com/Cinnamon/kotaemon.git\n```\n\n## Contribute\n\n### Setup\n\n- Create conda environment (suggest 3.10)\n\n  ```shell\n  conda create -n kotaemon python=3.10\n  conda activate kotaemon\n  ```\n\n- Clone the repo\n\n  ```shell\n  git clone git@github.com:Cinnamon/kotaemon.git\n  cd kotaemon\n  ```\n\n- Install all\n\n  ```shell\n  pip install -e \".[dev]\"\n  ```\n\n- Pre-commit\n\n  ```shell\n  pre-commit install\n  ```\n\n- Test\n\n  ```shell\n  pytest tests\n  ```\n\n### Credential sharing\n\nThis repo uses [git-secret](https://sobolevn.me/git-secret/) to share credentials, which\ninternally uses `gpg` to encrypt and decrypt secret files.\n\nThis repo uses `python-dotenv` to manage credentials stored as environment variable.\nPlease note that the use of `python-dotenv` and credentials are for development\npurposes only. Thus, it should not be used in the main source code (i.e. `kotaemon/` and `tests/`), but can be used in `examples/`.\n\n#### Install git-secret\n\nPlease follow the [official guide](https://sobolevn.me/git-secret/installation) to install git-secret.\n\nFor Windows users, see [For Windows users](#for-windows-users).\n\nFor users who don't have sudo privilege to install packages, follow the `Manual Installation` in the [official guide](https://sobolevn.me/git-secret/installation) and set `PREFIX` to a path that you have access to. And please don't forget to add `PREFIX` to your `PATH`.\n\n#### Gaining access\n\nIn order to gain access to the secret files, you must provide your gpg public file to anyone who has access and ask them to ask your key to the keyring. For a quick tutorial on generating your gpg key pair, you can refer to the `Using gpg` section from the [git-secret main page](https://sobolevn.me/git-secret/).\n\n#### Decrypt the secret file\n\nThe credentials are encrypted in the `.env.secret` file. To print the decrypted content to stdout, run\n\n```shell\ngit-secret cat [filename]\n```\n\nOr to get the decrypted `.env` file, run\n\n```shell\ngit-secret reveal [filename]\n```\n\n#### For Windows users\n\ngit-secret is currently not available for Windows, thus the easiest way is to use it in WSL (please use the latest version of WSL2). From there you have 2 options:\n\n1. Using the gpg of WSL.\n\n   This is the most straight-forward option since you would use WSL just like any other unix environment. However, the downside is that you have to make WSL your main environment, which means WSL must have write permission on your repo. To achieve this, you must either:\n\n   - Clone and store your repo inside WSL's file system.\n   - Provide WSL with necessary permission on your Windows file system. This can be achieve by setting `automount` options for WSL. To do that, add these content to `/etc/wsl.conf` and then restart your sub-system.\n\n     ```shell\n     [automount]\n     options = \"metadata,umask=022,fmask=011\"\n     ```\n\n     This enables all permissions for user owner.\n\n2. Using the gpg of Windows but with git-secret from WSL.\n\n   For those who use Windows as the main environment, having to switch back and forth between Windows and WSL will be inconvenient. You can instead stay within your Windows environment and apply some tricks to use `git-secret` from WSL.\n\n   - Install and setup `gpg` on Windows.\n   - Install `git-secret` on WSL. Now in Windows, you can invoke `git-secret` using `wsl git-secret`.\n   - Alternatively you can setup alias in CMD to shorten the syntax. Please refer to [this SO answer](https://stackoverflow.com/a/65823225) for the instruction. Some recommended aliases are:\n\n     ```bat\n     @echo off\n\n     :: Commands\n     DOSKEY ls=dir /B $*\n     DOSKEY ll=dir /a $*\n     DOSKEY git-secret=wsl git-secret $*\n     DOSKEY gs=wsl git-secret $*\n     ```\n\n     Now you can invoke `git-secret` in CMD using `git-secret` or `gs`.\n\n     - For Powershell users, similar behaviours can be achieved using `Set-Alias` and `profile.ps1`. Please refer this [SO thread](https://stackoverflow.com/questions/61081434/how-do-i-create-a-permanent-alias-file-in-powershell-core) as an example.\n\n### Code base structure\n\n- documents: define document\n- loaders\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/__init__.py",
    "content": "# Disable telemetry with monkey patching\nimport logging\n\nlogger = logging.getLogger(__name__)\ntry:\n    import posthog\n\n    def capture(*args, **kwargs):\n        logger.info(\"posthog.capture called with args: %s, kwargs: %s\", args, kwargs)\n\n    posthog.capture = capture\nexcept ImportError:\n    pass\n\ntry:\n    import os\n\n    os.environ[\"HAYSTACK_TELEMETRY_ENABLED\"] = \"False\"\n    import haystack.telemetry\n\n    haystack.telemetry.telemetry = None\nexcept ImportError:\n    pass\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/agents/__init__.py",
    "content": "from .base import BaseAgent\nfrom .io import AgentFinish, AgentOutput, AgentType, BaseScratchPad\nfrom .langchain_based import LangchainAgent\nfrom .react.agent import ReactAgent\nfrom .rewoo.agent import RewooAgent\nfrom .tools import (\n    BaseTool,\n    ComponentTool,\n    GoogleSearchTool,\n    LLMTool,\n    MCPTool,\n    WikipediaTool,\n)\n\n__all__ = [\n    # agent\n    \"BaseAgent\",\n    \"ReactAgent\",\n    \"RewooAgent\",\n    \"LangchainAgent\",\n    # tool\n    \"BaseTool\",\n    \"ComponentTool\",\n    \"GoogleSearchTool\",\n    \"WikipediaTool\",\n    \"LLMTool\",\n    \"MCPTool\",\n    # io\n    \"AgentType\",\n    \"AgentOutput\",\n    \"AgentFinish\",\n    \"BaseScratchPad\",\n]\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/agents/base.py",
    "content": "from typing import Optional, Union\n\nfrom kotaemon.base import BaseComponent, Node, Param\nfrom kotaemon.llms import BaseLLM, PromptTemplate\n\nfrom .io import AgentOutput, AgentType\nfrom .tools import BaseTool\n\n\nclass BaseAgent(BaseComponent):\n    \"\"\"Define base agent interface\"\"\"\n\n    name: str = Param(help=\"Name of the agent.\")\n    agent_type: AgentType = Param(help=\"Agent type, must be one of AgentType\")\n    description: str = Param(\n        help=(\n            \"Description used to tell the model how/when/why to use the agent. You can\"\n            \" provide few-shot examples as a part of the description. This will be\"\n            \" input to the prompt of LLM.\"\n        )\n    )\n    llm: Optional[BaseLLM] = Node(\n        help=(\n            \"LLM to be used for the agent (optional). LLM must implement BaseLLM\"\n            \" interface.\"\n        )\n    )\n    prompt_template: Optional[Union[PromptTemplate, dict[str, PromptTemplate]]] = Param(\n        help=\"A prompt template or a dict to supply different prompt to the agent\"\n    )\n    plugins: list[BaseTool] = Param(\n        default_callback=lambda _: [],\n        help=\"List of plugins / tools to be used in the agent\",\n    )\n\n    @staticmethod\n    def safeguard_run(run_func, *args, **kwargs):\n        def wrapper(self, *args, **kwargs):\n            try:\n                return run_func(self, *args, **kwargs)\n            except Exception as e:\n                return AgentOutput(\n                    text=\"\",\n                    agent_type=self.agent_type,\n                    status=\"failed\",\n                    error=str(e),\n                )\n\n        return wrapper\n\n    def add_tools(self, tools: list[BaseTool]) -> None:\n        \"\"\"Helper method to add tools and update agent state if needed\"\"\"\n        self.plugins.extend(tools)\n\n    def run(self, *args, **kwargs) -> AgentOutput | list[AgentOutput]:\n        \"\"\"Run the component.\"\"\"\n        raise NotImplementedError()\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/agents/io/__init__.py",
    "content": "from .base import AgentAction, AgentFinish, AgentOutput, AgentType, BaseScratchPad\n\n__all__ = [\"AgentOutput\", \"AgentFinish\", \"BaseScratchPad\", \"AgentType\", \"AgentAction\"]\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/agents/io/base.py",
    "content": "import json\nimport logging\nimport os\nfrom dataclasses import dataclass\nfrom enum import Enum\nfrom typing import Any, Dict, Literal, NamedTuple, Optional, Union\n\nfrom pydantic import ConfigDict\n\nfrom kotaemon.base import LLMInterface\n\n\ndef check_log():\n    \"\"\"\n    Checks if logging has been enabled.\n    :return: True if logging has been enabled, False otherwise.\n    :rtype: bool\n    \"\"\"\n    return os.environ.get(\"LOG_PATH\", None) is not None\n\n\nclass AgentType(Enum):\n    \"\"\"\n    Enumerated type for agent types.\n    \"\"\"\n\n    openai = \"openai\"\n    openai_multi = \"openai_multi\"\n    openai_tool = \"openai_tool\"\n    self_ask = \"self_ask\"\n    react = \"react\"\n    rewoo = \"rewoo\"\n    vanilla = \"vanilla\"\n\n\nclass BaseScratchPad:\n    \"\"\"\n    Base class for output handlers.\n\n    Attributes:\n    -----------\n    logger : logging.Logger\n        The logger object to log messages.\n\n    Methods:\n    --------\n    stop():\n        Stop the output.\n\n    update_status(output: str, **kwargs):\n        Update the status of the output.\n\n    thinking(name: str):\n        Log that a process is thinking.\n\n    done(_all=False):\n        Log that the process is done.\n\n    stream_print(item: str):\n        Not implemented.\n\n    json_print(item: Dict[str, Any]):\n        Log a JSON object.\n\n    panel_print(item: Any, title: str = \"Output\", stream: bool = False):\n        Log a panel output.\n\n    clear():\n        Not implemented.\n\n    print(content: str, **kwargs):\n        Log arbitrary content.\n\n    format_json(json_obj: str):\n        Format a JSON object.\n\n    debug(content: str, **kwargs):\n        Log a debug message.\n\n    info(content: str, **kwargs):\n        Log an informational message.\n\n    warning(content: str, **kwargs):\n        Log a warning message.\n\n    error(content: str, **kwargs):\n        Log an error message.\n\n    critical(content: str, **kwargs):\n        Log a critical message.\n    \"\"\"\n\n    def __init__(self):\n        \"\"\"\n        Initialize the BaseOutput object.\n\n        \"\"\"\n        self.logger = logging\n        self.log = []\n\n    def stop(self):\n        \"\"\"\n        Stop the output.\n        \"\"\"\n\n    def update_status(self, output: str, **kwargs):\n        \"\"\"\n        Update the status of the output.\n        \"\"\"\n        if check_log():\n            self.logger.info(output)\n\n    def thinking(self, name: str):\n        \"\"\"\n        Log that a process is thinking.\n        \"\"\"\n        if check_log():\n            self.logger.info(f\"{name} is thinking...\")\n\n    def done(self, _all=False):\n        \"\"\"\n        Log that the process is done.\n        \"\"\"\n\n        if check_log():\n            self.logger.info(\"Done\")\n\n    def stream_print(self, item: str):\n        \"\"\"\n        Stream print.\n        \"\"\"\n\n    def json_print(self, item: Dict[str, Any]):\n        \"\"\"\n        Log a JSON object.\n        \"\"\"\n        if check_log():\n            self.logger.info(json.dumps(item, indent=2))\n\n    def panel_print(self, item: Any, title: str = \"Output\", stream: bool = False):\n        \"\"\"\n        Log a panel output.\n\n        Args:\n            item : Any\n                The item to log.\n            title : str, optional\n                The title of the panel, defaults to \"Output\".\n            stream : bool, optional\n        \"\"\"\n        if not stream:\n            self.log.append(item)\n        if check_log():\n            self.logger.info(\"-\" * 20)\n            self.logger.info(item)\n            self.logger.info(\"-\" * 20)\n\n    def clear(self):\n        \"\"\"\n        Not implemented.\n        \"\"\"\n\n    def print(self, content: str, **kwargs):\n        \"\"\"\n        Log arbitrary content.\n        \"\"\"\n        self.log.append(content)\n        if check_log():\n            self.logger.info(content)\n\n    def format_json(self, json_obj: str):\n        \"\"\"\n        Format a JSON object.\n        \"\"\"\n        formatted_json = json.dumps(json_obj, indent=2)\n        return formatted_json\n\n    def debug(self, content: str, **kwargs):\n        \"\"\"\n        Log a debug message.\n        \"\"\"\n        if check_log():\n            self.logger.debug(content, **kwargs)\n\n    def info(self, content: str, **kwargs):\n        \"\"\"\n        Log an informational message.\n        \"\"\"\n        if check_log():\n            self.logger.info(content, **kwargs)\n\n    def warning(self, content: str, **kwargs):\n        \"\"\"\n        Log a warning message.\n        \"\"\"\n        if check_log():\n            self.logger.warning(content, **kwargs)\n\n    def error(self, content: str, **kwargs):\n        \"\"\"\n        Log an error message.\n        \"\"\"\n        if check_log():\n            self.logger.error(content, **kwargs)\n\n    def critical(self, content: str, **kwargs):\n        \"\"\"\n        Log a critical message.\n        \"\"\"\n        if check_log():\n            self.logger.critical(content, **kwargs)\n\n\n@dataclass\nclass AgentAction:\n    \"\"\"Agent's action to take.\n\n    Args:\n        tool: The tool to invoke.\n        tool_input: The input to the tool.\n        log: The log message.\n    \"\"\"\n\n    tool: str\n    tool_input: Union[str, dict]\n    log: str\n\n\nclass AgentFinish(NamedTuple):\n    \"\"\"Agent's return value when finishing execution.\n\n    Args:\n        return_values: The return values of the agent.\n        log: The log message.\n    \"\"\"\n\n    return_values: dict\n    log: str\n\n\nclass AgentOutput(LLMInterface):\n    \"\"\"Output from an agent.\n\n    Args:\n        text: The text output from the agent.\n        agent_type: The type of agent.\n        status: The status after executing the agent.\n        error: The error message if any.\n    \"\"\"\n\n    model_config = ConfigDict(extra=\"allow\")\n\n    text: str\n    type: str = \"agent\"\n    agent_type: AgentType\n    status: Literal[\"thinking\", \"finished\", \"stopped\", \"failed\"]\n    error: Optional[str] = None\n    intermediate_steps: Optional[list] = None\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/agents/langchain_based.py",
    "content": "from typing import List, Optional\n\nfrom langchain.agents import AgentType as LCAgentType\nfrom langchain.agents import initialize_agent\nfrom langchain.agents.agent import AgentExecutor as LCAgentExecutor\n\nfrom kotaemon.llms import LLM, ChatLLM\n\nfrom .base import BaseAgent\nfrom .io import AgentOutput, AgentType\nfrom .tools import BaseTool\n\n\nclass LangchainAgent(BaseAgent):\n    \"\"\"Wrapper for Langchain Agent\"\"\"\n\n    name: str = \"LangchainAgent\"\n    agent_type: AgentType\n    description: str = \"LangchainAgent for answering multi-step reasoning questions\"\n    AGENT_TYPE_MAP = {\n        AgentType.openai: LCAgentType.OPENAI_FUNCTIONS,\n        AgentType.openai_multi: LCAgentType.OPENAI_MULTI_FUNCTIONS,\n        AgentType.react: LCAgentType.ZERO_SHOT_REACT_DESCRIPTION,\n        AgentType.self_ask: LCAgentType.SELF_ASK_WITH_SEARCH,\n    }\n    agent: Optional[LCAgentExecutor] = None\n\n    def __init__(self, *args, **kwargs):\n        super().__init__(*args, **kwargs)\n\n        if self.agent_type not in self.AGENT_TYPE_MAP:\n            raise NotImplementedError(\n                f\"AgentType {self.agent_type } not supported by Langchain wrapper\"\n            )\n        self.update_agent_tools()\n\n    def update_agent_tools(self):\n        assert isinstance(self.llm, (ChatLLM, LLM))\n        langchain_plugins = [tool.to_langchain_format() for tool in self.plugins]\n\n        # a fix for search_doc tool name:\n        # use \"Intermediate Answer\" for self-ask agent\n        found_search_tool = False\n        if self.agent_type == AgentType.self_ask:\n            for plugin in langchain_plugins:\n                if plugin.name == \"search_doc\":\n                    plugin.name = \"Intermediate Answer\"\n                    langchain_plugins = [plugin]\n                    found_search_tool = True\n                    break\n\n        if self.agent_type != AgentType.self_ask or found_search_tool:\n            # reinit Langchain AgentExecutor\n            self.agent = initialize_agent(\n                langchain_plugins,\n                self.llm.to_langchain_format(),\n                agent=self.AGENT_TYPE_MAP[self.agent_type],\n                handle_parsing_errors=True,\n                verbose=True,\n            )\n\n    def add_tools(self, tools: List[BaseTool]) -> None:\n        super().add_tools(tools)\n        self.update_agent_tools()\n        return\n\n    def run(self, instruction: str) -> AgentOutput:\n        assert (\n            self.agent is not None\n        ), \"Lanchain AgentExecutor is not correctly initialized\"\n\n        # Langchain AgentExecutor call\n        output = self.agent(instruction)[\"output\"]\n\n        return AgentOutput(\n            text=output,\n            agent_type=self.agent_type,\n            status=\"finished\",\n        )\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/agents/react/__init__.py",
    "content": "from .agent import ReactAgent\n\n__all__ = [\"ReactAgent\"]\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/agents/react/agent.py",
    "content": "import logging\nimport re\nfrom functools import partial\nfrom typing import Optional\n\nimport tiktoken\n\nfrom kotaemon.agents.base import BaseAgent, BaseLLM\nfrom kotaemon.agents.io import AgentAction, AgentFinish, AgentOutput, AgentType\nfrom kotaemon.agents.tools import BaseTool\nfrom kotaemon.base import Document, Param\nfrom kotaemon.indices.splitters import TokenSplitter\nfrom kotaemon.llms import PromptTemplate\n\nFINAL_ANSWER_ACTION = \"Final Answer:\"\n\n\nclass ReactAgent(BaseAgent):\n    \"\"\"\n    Sequential ReactAgent class inherited from BaseAgent.\n    Implementing ReAct agent paradigm https://arxiv.org/pdf/2210.03629.pdf\n    \"\"\"\n\n    name: str = \"ReactAgent\"\n    agent_type: AgentType = AgentType.react\n    description: str = \"ReactAgent for answering multi-step reasoning questions\"\n    llm: BaseLLM\n    prompt_template: Optional[PromptTemplate] = None\n    output_lang: str = \"English\"\n    plugins: list[BaseTool] = Param(\n        default_callback=lambda _: [], help=\"List of tools to be used in the agent. \"\n    )\n    examples: dict[str, str | list[str]] = Param(\n        default_callback=lambda _: {}, help=\"Examples to be used in the agent. \"\n    )\n    intermediate_steps: list[tuple[AgentAction | AgentFinish, str]] = Param(\n        default_callback=lambda _: [],\n        help=\"List of AgentAction and observation (tool) output\",\n    )\n    max_iterations: int = 5\n    strict_decode: bool = False\n    max_context_length: int = Param(\n        default=3000,\n        help=\"Max context length for each tool output.\",\n    )\n    trim_func: TokenSplitter | None = None\n\n    def _compose_plugin_description(self) -> str:\n        \"\"\"\n        Compose the worker prompt from the workers.\n\n        Example:\n        toolname1[input]: tool1 description\n        toolname2[input]: tool2 description\n        \"\"\"\n        prompt = \"\"\n        try:\n            for plugin in self.plugins:\n                prompt += f\"{plugin.name}[input]: {plugin.description}\\n\"\n        except Exception:\n            raise ValueError(\"Worker must have a name and description.\")\n        return prompt\n\n    def _construct_scratchpad(\n        self, intermediate_steps: list[tuple[AgentAction | AgentFinish, str]] = []\n    ) -> str:\n        \"\"\"Construct the scratchpad that lets the agent continue its thought process.\"\"\"\n        thoughts = \"\"\n        for action, observation in intermediate_steps:\n            thoughts += action.log\n            thoughts += f\"\\nObservation: {observation}\\nThought:\"\n        return thoughts\n\n    def _parse_output(self, text: str) -> Optional[AgentAction | AgentFinish]:\n        \"\"\"\n        Parse text output from LLM for the next Action or Final Answer\n        Using Regex to parse \"Action:\\n Action Input:\\n\" for the next Action\n        Using FINAL_ANSWER_ACTION to parse Final Answer\n\n        Args:\n            text[str]: input text to parse\n        \"\"\"\n        includes_answer = FINAL_ANSWER_ACTION in text\n        regex = (\n            r\"Action\\s*\\d*\\s*:[\\s]*(.*?)[\\s]*Action\\s*\\d*\\s*Input\\s*\\d*\\s*:[\\s]*(.*)\"\n        )\n        action_match = re.search(regex, text, re.DOTALL)\n        action_output: Optional[AgentAction | AgentFinish] = None\n        if action_match:\n            if includes_answer:\n                raise Exception(\n                    \"Parsing LLM output produced both a final answer \"\n                    f\"and a parse-able action: {text}\"\n                )\n            action = action_match.group(1).strip()\n            action_input = action_match.group(2)\n            tool_input = action_input.strip(\" \")\n            # ensure if its a well formed SQL query we don't remove any trailing \" chars\n            if tool_input.startswith(\"SELECT \") is False:\n                tool_input = tool_input.strip('\"')\n\n            action_output = AgentAction(action, tool_input, text)\n\n        elif includes_answer:\n            action_output = AgentFinish(\n                {\"output\": text.split(FINAL_ANSWER_ACTION)[-1].strip()}, text\n            )\n        else:\n            if self.strict_decode:\n                raise Exception(f\"Could not parse LLM output: `{text}`\")\n            else:\n                action_output = AgentFinish({\"output\": text}, text)\n\n        return action_output\n\n    def _compose_prompt(self, instruction) -> str:\n        \"\"\"\n        Compose the prompt from template, worker description, examples and instruction.\n        \"\"\"\n        agent_scratchpad = self._construct_scratchpad(self.intermediate_steps)\n        tool_description = self._compose_plugin_description()\n        tool_names = \", \".join([plugin.name for plugin in self.plugins])\n        if self.prompt_template is None:\n            from .prompt import zero_shot_react_prompt\n\n            self.prompt_template = zero_shot_react_prompt\n        return self.prompt_template.populate(\n            instruction=instruction,\n            agent_scratchpad=agent_scratchpad,\n            tool_description=tool_description,\n            tool_names=tool_names,\n            lang=self.output_lang,\n        )\n\n    def _format_function_map(self) -> dict[str, BaseTool]:\n        \"\"\"Format the function map for the open AI function API.\n\n        Return:\n            Dict[str, Callable]: The function map.\n        \"\"\"\n        # Map the function name to the real function object.\n        function_map = {}\n        for plugin in self.plugins:\n            function_map[plugin.name] = plugin\n        return function_map\n\n    def _trim(self, text: str | Document) -> str:\n        \"\"\"\n        Trim the text to the maximum token length.\n        \"\"\"\n        evidence_trim_func = (\n            self.trim_func\n            if self.trim_func\n            else TokenSplitter(\n                chunk_size=self.max_context_length,\n                chunk_overlap=0,\n                separator=\" \",\n                tokenizer=partial(\n                    tiktoken.encoding_for_model(\"gpt-3.5-turbo\").encode,\n                    allowed_special=set(),\n                    disallowed_special=\"all\",\n                ),\n            )\n        )\n        if isinstance(text, str):\n            texts = evidence_trim_func([Document(text=text)])\n        elif isinstance(text, Document):\n            texts = evidence_trim_func([text])\n        else:\n            raise ValueError(\"Invalid text type to trim\")\n        trim_text = texts[0].text\n        logging.info(f\"len (trimmed): {len(trim_text)}\")\n        return trim_text\n\n    def clear(self):\n        \"\"\"\n        Clear and reset the agent.\n        \"\"\"\n        self.intermediate_steps = []\n\n    def run(self, instruction, max_iterations=None) -> AgentOutput:\n        \"\"\"\n        Run the agent with the given instruction.\n\n        Args:\n            instruction: Instruction to run the agent with.\n            max_iterations: Maximum number of iterations\n                of reasoning steps, defaults to 10.\n\n        Return:\n            AgentOutput object.\n        \"\"\"\n        if not max_iterations:\n            max_iterations = self.max_iterations\n        assert max_iterations > 0\n\n        self.clear()\n        logging.info(f\"Running {self.name} with instruction: {instruction}\")\n        total_cost = 0.0\n        total_token = 0\n        status = \"failed\"\n        response_text = None\n\n        for step_count in range(1, max_iterations + 1):\n            prompt = self._compose_prompt(instruction)\n            logging.info(f\"Prompt: {prompt}\")\n            response = self.llm(\n                prompt, stop=[\"Observation:\"]\n            )  # could cause bugs if llm doesn't have `stop` as a parameter\n            response_text = response.text\n            logging.info(f\"Response: {response_text}\")\n            action_step = self._parse_output(response_text)\n            if action_step is None:\n                raise ValueError(\"Invalid action\")\n            is_finished_chain = isinstance(action_step, AgentFinish)\n            if is_finished_chain:\n                result = \"\"\n            else:\n                assert isinstance(action_step, AgentAction)\n                action_name = action_step.tool\n                tool_input = action_step.tool_input\n                logging.info(f\"Action: {action_name}\")\n                logging.info(f\"Tool Input: {tool_input}\")\n                function_map = self._format_function_map()\n                if action_name not in function_map:\n                    available = \", \".join(function_map.keys())\n                    result = (\n                        f\"Tool '{action_name}' not found. \"\n                        f\"Available tools: {available}\"\n                    )\n                else:\n                    result = function_map[action_name](tool_input)\n\n                # trim the worker output to 1000 tokens, as we are appending\n                # all workers' logs and it can exceed the token limit if we\n                # don't limit each. Fix this number regarding to the LLM capacity.\n                result = self._trim(result)\n                logging.info(f\"Result: {result}\")\n\n            self.intermediate_steps.append((action_step, result))\n            if is_finished_chain:\n                logging.info(f\"Finished after {step_count} steps.\")\n                status = \"finished\"\n                break\n        else:\n            status = \"stopped\"\n\n        return AgentOutput(\n            text=response_text,\n            agent_type=self.agent_type,\n            status=status,\n            total_tokens=total_token,\n            total_cost=total_cost,\n            intermediate_steps=self.intermediate_steps,\n            max_iterations=max_iterations,\n        )\n\n    def stream(self, instruction, max_iterations=None):\n        \"\"\"\n        Stream the agent with the given instruction.\n\n        Args:\n            instruction: Instruction to run the agent with.\n            max_iterations: Maximum number of iterations\n                of reasoning steps, defaults to 10.\n\n        Return:\n            AgentOutput object.\n        \"\"\"\n        if not max_iterations:\n            max_iterations = self.max_iterations\n        assert max_iterations > 0\n\n        self.clear()\n        logging.info(f\"Running {self.name} with instruction: {instruction}\")\n        print(f\"Running {self.name} with instruction: {instruction}\")\n        total_cost = 0.0\n        total_token = 0\n        status = \"failed\"\n        response_text = None\n\n        for step_count in range(1, max_iterations + 1):\n            prompt = self._compose_prompt(instruction)\n            logging.info(f\"Prompt: {prompt}\")\n            print(f\"Prompt: {prompt}\")\n            response = self.llm(\n                prompt, stop=[\"Observation:\"]\n            )  # TODO: could cause bugs if llm doesn't have `stop` as a parameter\n            response_text = response.text\n            logging.info(f\"Response: {response_text}\")\n            print(f\"Response: {response_text}\")\n            action_step = self._parse_output(response_text)\n            if action_step is None:\n                raise ValueError(\"Invalid action\")\n            is_finished_chain = isinstance(action_step, AgentFinish)\n            if is_finished_chain:\n                result = response_text\n                if \"Final Answer:\" in response_text:\n                    result = response_text.split(\"Final Answer:\")[-1].strip()\n            else:\n                assert isinstance(action_step, AgentAction)\n                action_name = action_step.tool\n                tool_input = action_step.tool_input\n                logging.info(f\"Action: {action_name}\")\n                print(f\"Action: {action_name}\")\n                logging.info(f\"Tool Input: {tool_input}\")\n                print(f\"Tool Input: {tool_input}\")\n                function_map = self._format_function_map()\n                if action_name not in function_map:\n                    available = \", \".join(function_map.keys())\n                    result = (\n                        f\"Tool '{action_name}' not found. \"\n                        f\"Available tools: {available}\"\n                    )\n                else:\n                    result = function_map[action_name](tool_input)\n\n                # trim the worker output to 1000 tokens, as we are appending\n                # all workers' logs and it can exceed the token limit if we\n                # don't limit each. Fix this number regarding to the LLM capacity.\n                result = self._trim(result)\n                logging.info(f\"Result: {result}\")\n                print(f\"Result: {result}\")\n\n            self.intermediate_steps.append((action_step, result))\n            if is_finished_chain:\n                logging.info(f\"Finished after {step_count} steps.\")\n                status = \"finished\"\n                yield AgentOutput(\n                    text=result,\n                    agent_type=self.agent_type,\n                    status=status,\n                    intermediate_steps=self.intermediate_steps[-1],\n                )\n                break\n            else:\n                yield AgentOutput(\n                    text=\"\",\n                    agent_type=self.agent_type,\n                    status=\"thinking\",\n                    intermediate_steps=self.intermediate_steps[-1],\n                )\n\n        else:\n            status = \"stopped\"\n            yield AgentOutput(\n                text=\"\",\n                agent_type=self.agent_type,\n                status=status,\n                intermediate_steps=self.intermediate_steps[-1],\n            )\n\n        return AgentOutput(\n            text=response_text,\n            agent_type=self.agent_type,\n            status=status,\n            total_tokens=total_token,\n            total_cost=total_cost,\n            intermediate_steps=self.intermediate_steps,\n            max_iterations=max_iterations,\n        )\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/agents/react/prompt.py",
    "content": "# flake8: noqa\n\nfrom kotaemon.llms import PromptTemplate\n\nzero_shot_react_prompt = PromptTemplate(\n    template=\"\"\"Answer the following questions as best you can. Give answer in {lang}. You have access to the following tools:\n{tool_description}\nUse the following format:\n\nQuestion: the input question you must answer\nThought: you should always think about what to do\n\nAction: the action to take, should be one of [{tool_names}]\n\nAction Input: the input to the action, should be different from the action input of the same action in previous steps.\n\nObservation: the result of the action\n\n... (this Thought/Action/Action Input/Observation can repeat N times)\n#Thought: I now know the final answer\nFinal Answer: the final answer to the original input question\n\nBegin! After each Action Input.\n\nQuestion: {instruction}\nThought:{agent_scratchpad}\n    \"\"\"\n)\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/agents/rewoo/__init__.py",
    "content": "from .agent import RewooAgent\n\n__all__ = [\"RewooAgent\"]\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/agents/rewoo/agent.py",
    "content": "import logging\nimport re\nfrom concurrent.futures import ThreadPoolExecutor\nfrom functools import partial\nfrom typing import Any\n\nimport tiktoken\n\nfrom kotaemon.agents.base import BaseAgent\nfrom kotaemon.agents.io import AgentOutput, AgentType, BaseScratchPad\nfrom kotaemon.agents.tools import BaseTool\nfrom kotaemon.agents.utils import get_plugin_response_content\nfrom kotaemon.base import Document, Node, Param\nfrom kotaemon.indices.qa.citation import CitationPipeline\nfrom kotaemon.indices.splitters import TokenSplitter\nfrom kotaemon.llms import BaseLLM, PromptTemplate\n\nfrom .planner import Planner\nfrom .solver import Solver\n\n\nclass RewooAgent(BaseAgent):\n    \"\"\"Distributive RewooAgent class inherited from BaseAgent.\n    Implementing ReWOO paradigm https://arxiv.org/pdf/2305.18323.pdf\"\"\"\n\n    name: str = \"RewooAgent\"\n    agent_type: AgentType = AgentType.rewoo\n    description: str = \"RewooAgent for answering multi-step reasoning questions\"\n    output_lang: str = \"English\"\n    planner_llm: BaseLLM\n    solver_llm: BaseLLM\n    prompt_template: dict[str, PromptTemplate] = Param(\n        default_callback=lambda _: {},\n        help=\"A dict to supply different prompt to the agent.\",\n    )\n    plugins: list[BaseTool] = Param(\n        default_callback=lambda _: [], help=\"A list of plugins to be used in the model.\"\n    )\n    examples: dict[str, str | list[str]] = Param(\n        default_callback=lambda _: {}, help=\"Examples to be used in the agent.\"\n    )\n    max_context_length: int = Param(\n        default=3000,\n        help=\"Max context length for each tool output.\",\n    )\n    trim_func: TokenSplitter | None = None\n\n    @Node.auto(depends_on=[\"planner_llm\", \"plugins\", \"prompt_template\", \"examples\"])\n    def planner(self):\n        return Planner(\n            model=self.planner_llm,\n            plugins=self.plugins,\n            prompt_template=self.prompt_template.get(\"Planner\", None),\n            examples=self.examples.get(\"Planner\", None),\n        )\n\n    @Node.auto(depends_on=[\"solver_llm\", \"prompt_template\", \"examples\"])\n    def solver(self):\n        return Solver(\n            model=self.solver_llm,\n            prompt_template=self.prompt_template.get(\"Solver\", None),\n            examples=self.examples.get(\"Solver\", None),\n            output_lang=self.output_lang,\n        )\n\n    def _parse_plan_map(\n        self, planner_response: str\n    ) -> tuple[dict[str, list[str]], dict[str, str]]:\n        \"\"\"\n        Parse planner output. It should be an n-to-n mapping from Plans to #Es.\n        This is because sometimes LLM cannot follow the strict output format.\n        Example:\n            #Plan1\n            #E1\n            #E2\n        should result in: {\"#Plan1\": [\"#E1\", \"#E2\"]}\n        Or:\n            #Plan1\n            #Plan2\n            #E1\n        should result in: {\"#Plan1\": [], \"#Plan2\": [\"#E1\"]}\n        This function should also return a plan map.\n\n        Returns:\n            tuple[Dict[str, List[str]], Dict[str, str]]: A list of plan map\n        \"\"\"\n        valid_chunk = [\n            line\n            for line in planner_response.splitlines()\n            if line.startswith(\"#Plan\") or line.startswith(\"#E\")\n        ]\n\n        plan_to_es: dict[str, list[str]] = dict()\n        plans: dict[str, str] = dict()\n        prev_key = \"\"\n        for line in valid_chunk:\n            key, description = line.split(\":\", 1)\n            key = key.strip()\n            if key.startswith(\"#Plan\"):\n                plans[key] = description.strip()\n                plan_to_es[key] = []\n                prev_key = key\n            elif key.startswith(\"#E\"):\n                plan_to_es[prev_key].append(key)\n\n        return plan_to_es, plans\n\n    def _parse_planner_evidences(\n        self, planner_response: str\n    ) -> tuple[dict[str, str], list[list[str]]]:\n        \"\"\"\n        Parse planner output. This should return a mapping from #E to tool call.\n        It should also identify the level of each #E in dependency map.\n        Example:\n            {\n            \"#E1\": \"Tool1\", \"#E2\": \"Tool2\",\n            \"#E3\": \"Tool3\", \"#E4\": \"Tool4\"\n            }, [[#E1, #E2], [#E3, #E4]]\n\n        Returns:\n            tuple[dict[str, str], List[List[str]]]:\n            A mapping from #E to tool call and a list of levels.\n        \"\"\"\n        evidences: dict[str, str] = dict()\n        dependence: dict[str, list[str]] = dict()\n        for line in planner_response.splitlines():\n            if line.startswith(\"#E\") and line[2].isdigit():\n                e, tool_call = line.split(\":\", 1)\n                e, tool_call = e.strip(), tool_call.strip()\n                if len(e) == 3:\n                    dependence[e] = []\n                    evidences[e] = tool_call\n                    for var in re.findall(r\"#E\\d+\", tool_call):\n                        if var in evidences:\n                            dependence[e].append(var)\n                else:\n                    evidences[e] = \"No evidence found\"\n        level = []\n        while dependence:\n            select = [i for i in dependence if not dependence[i]]\n            if len(select) == 0:\n                raise ValueError(\"Circular dependency detected.\")\n            level.append(select)\n            for item in select:\n                dependence.pop(item)\n            for item in dependence:\n                for i in select:\n                    if i in dependence[item]:\n                        dependence[item].remove(i)\n\n        return evidences, level\n\n    def _run_plugin(\n        self,\n        e: str,\n        planner_evidences: dict[str, str],\n        worker_evidences: dict[str, str],\n        output=BaseScratchPad(),\n    ):\n        \"\"\"\n        Run a plugin for a given evidence.\n        This function should also cumulate the cost and tokens.\n        \"\"\"\n        result = dict(e=e, plugin_cost=0, plugin_token=0, evidence=\"\")\n        tool_call = planner_evidences[e]\n        if \"[\" not in tool_call:\n            result[\"evidence\"] = tool_call\n        else:\n            tool, tool_input = tool_call.split(\"[\", 1)\n            tool_input = tool_input[:-1]\n            # find variables in input and replace with previous evidences\n            for var in re.findall(r\"#E\\d+\", tool_input):\n                print(\"Tool input: \", tool_input)\n                print(\"Var: \", var)\n                print(\"Worker evidences: \", worker_evidences)\n                if var in worker_evidences:\n                    tool_input = tool_input.replace(\n                        var, worker_evidences.get(var, \"\") or \"\"\n                    )\n            try:\n                selected_plugin = self._find_plugin(tool)\n                if selected_plugin is None:\n                    raise ValueError(\"Invalid plugin detected\")\n                tool_response = selected_plugin(tool_input)\n                result[\"evidence\"] = get_plugin_response_content(tool_response)\n            except ValueError:\n                result[\"evidence\"] = \"No evidence found.\"\n            finally:\n                output.panel_print(\n                    result[\"evidence\"], f\"[green] Function Response of [blue]{tool}: \"\n                )\n        return result\n\n    def _get_worker_evidence(\n        self,\n        planner_evidences: dict[str, str],\n        evidences_level: list[list[str]],\n        output=BaseScratchPad(),\n    ) -> Any:\n        \"\"\"\n        Parallel execution of plugins in DAG for speedup.\n        This is one of core benefits of ReWOO agents.\n\n        Args:\n            planner_evidences: A mapping from #E to tool call.\n            evidences_level: A list of levels of evidences.\n                Calculated from DAG of plugin calls.\n            output: Output object, defaults to BaseOutput().\n        Returns:\n            A mapping from #E to tool call.\n        \"\"\"\n        worker_evidences: dict[str, str] = dict()\n        plugin_cost, plugin_token = 0.0, 0.0\n        with ThreadPoolExecutor() as pool:\n            for level in evidences_level:\n                results = []\n                for e in level:\n                    results.append(\n                        pool.submit(\n                            self._run_plugin,\n                            e,\n                            planner_evidences,\n                            worker_evidences,\n                            output,\n                        )\n                    )\n                if len(results) > 1:\n                    output.update_status(f\"Running tasks {level} in parallel.\")\n                else:\n                    output.update_status(f\"Running task {level[0]}.\")\n                for r in results:\n                    resp = r.result()\n                    plugin_cost += resp[\"plugin_cost\"]\n                    plugin_token += resp[\"plugin_token\"]\n                    worker_evidences[resp[\"e\"]] = self._trim_evidence(resp[\"evidence\"])\n                output.done()\n\n        return worker_evidences, plugin_cost, plugin_token\n\n    def _find_plugin(self, name: str):\n        for p in self.plugins:\n            if p.name == name:\n                return p\n\n    def _trim_evidence(self, evidence: str):\n        evidence_trim_func = (\n            self.trim_func\n            if self.trim_func\n            else TokenSplitter(\n                chunk_size=self.max_context_length,\n                chunk_overlap=0,\n                separator=\" \",\n                tokenizer=partial(\n                    tiktoken.encoding_for_model(\"gpt-3.5-turbo\").encode,\n                    allowed_special=set(),\n                    disallowed_special=\"all\",\n                ),\n            )\n        )\n        if evidence:\n            texts = evidence_trim_func([Document(text=evidence)])\n            evidence = texts[0].text\n            logging.info(f\"len (trimmed): {len(evidence)}\")\n            return evidence\n\n    @BaseAgent.safeguard_run\n    def run(self, instruction: str, use_citation: bool = False) -> AgentOutput:\n        \"\"\"\n        Run the agent with a given instruction.\n        \"\"\"\n        logging.info(f\"Running {self.name} with instruction: {instruction}\")\n        total_cost = 0.0\n        total_token = 0\n\n        # Plan\n        planner_output = self.planner(instruction)\n        planner_text_output = planner_output.text\n        plan_to_es, plans = self._parse_plan_map(planner_text_output)\n        planner_evidences, evidence_level = self._parse_planner_evidences(\n            planner_text_output\n        )\n\n        # Work\n        worker_evidences, plugin_cost, plugin_token = self._get_worker_evidence(\n            planner_evidences, evidence_level\n        )\n        worker_log = \"\"\n        for plan in plan_to_es:\n            worker_log += f\"{plan}: {plans[plan]}\\n\"\n            for e in plan_to_es[plan]:\n                worker_log += f\"{e}: {worker_evidences[e]}\\n\"\n\n        # Solve\n        solver_output = self.solver(instruction, worker_log)\n        solver_output_text = solver_output.text\n        if use_citation:\n            citation_pipeline = CitationPipeline(llm=self.solver_llm)\n            citation = citation_pipeline(context=worker_log, question=instruction)\n        else:\n            citation = None\n\n        return AgentOutput(\n            text=solver_output_text,\n            agent_type=self.agent_type,\n            status=\"finished\",\n            total_tokens=total_token,\n            total_cost=total_cost,\n            citation=citation,\n            metadata={\"citation\": citation, \"worker_log\": worker_log},\n        )\n\n    def stream(self, instruction: str, use_citation: bool = False):\n        \"\"\"\n        Stream the agent with a given instruction.\n        \"\"\"\n        logging.info(f\"Streaming {self.name} with instruction: {instruction}\")\n        total_cost = 0.0\n        total_token = 0\n\n        # Plan\n        planner_output = self.planner(instruction)\n        planner_text_output = planner_output.text\n        plan_to_es, plans = self._parse_plan_map(planner_text_output)\n        planner_evidences, evidence_level = self._parse_planner_evidences(\n            planner_text_output\n        )\n\n        print(\"Planner output:\", planner_text_output)\n        # output planner to info panel\n        yield AgentOutput(\n            text=\"\",\n            agent_type=self.agent_type,\n            status=\"thinking\",\n            intermediate_steps=[{\"planner_log\": planner_text_output}],\n        )\n\n        # Work\n        worker_evidences, plugin_cost, plugin_token = self._get_worker_evidence(\n            planner_evidences, evidence_level\n        )\n        worker_log = \"\"\n        for plan in plan_to_es:\n            worker_log += f\"{plan}: {plans[plan]}\\n\"\n            current_progress = f\"{plan}: {plans[plan]}\\n\"\n            for e in plan_to_es[plan]:\n                worker_log += f\"#Action: {planner_evidences.get(e, None)}\\n\"\n                worker_log += f\"{e}: {worker_evidences[e]}\\n\"\n                current_progress += f\"#Action: {planner_evidences.get(e, None)}\\n\"\n                current_progress += f\"{e}: {worker_evidences[e]}\\n\"\n\n            yield AgentOutput(\n                text=\"\",\n                agent_type=self.agent_type,\n                status=\"thinking\",\n                intermediate_steps=[{\"worker_log\": current_progress}],\n            )\n\n        # Solve\n        solver_response = \"\"\n        for solver_output in self.solver.stream(instruction, worker_log):\n            solver_output_text = solver_output.text\n            solver_response += solver_output_text\n            yield AgentOutput(\n                text=solver_output_text,\n                agent_type=self.agent_type,\n                status=\"thinking\",\n            )\n        if use_citation:\n            citation_pipeline = CitationPipeline(llm=self.solver_llm)\n            citation = citation_pipeline.invoke(\n                context=worker_log, question=instruction\n            )\n        else:\n            citation = None\n\n        return AgentOutput(\n            text=\"\",\n            agent_type=self.agent_type,\n            status=\"finished\",\n            total_tokens=total_token,\n            total_cost=total_cost,\n            citation=citation,\n            metadata={\"citation\": citation, \"worker_log\": worker_log},\n        )\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/agents/rewoo/planner.py",
    "content": "from typing import Any, List, Optional, Union\n\nfrom kotaemon.agents.base import BaseLLM, BaseTool\nfrom kotaemon.agents.io import BaseScratchPad\nfrom kotaemon.base import BaseComponent\nfrom kotaemon.llms import PromptTemplate\n\nfrom .prompt import few_shot_planner_prompt, zero_shot_planner_prompt\n\n\nclass Planner(BaseComponent):\n    model: BaseLLM\n    prompt_template: Optional[PromptTemplate] = None\n    examples: Optional[Union[str, List[str]]] = None\n    plugins: List[BaseTool]\n\n    def _compose_worker_description(self) -> str:\n        \"\"\"\n        Compose the worker prompt from the workers.\n\n        Example:\n        toolname1[input]: tool1 description\n        toolname2[input]: tool2 description\n        \"\"\"\n        prompt = \"\"\n        try:\n            for worker in self.plugins:\n                prompt += f\"{worker.name}[input]: {worker.description}\\n\"\n        except Exception:\n            raise ValueError(\"Worker must have a name and description.\")\n        return prompt\n\n    def _compose_fewshot_prompt(self) -> str:\n        if self.examples is None:\n            return \"\"\n        if isinstance(self.examples, str):\n            return self.examples\n        else:\n            return \"\\n\\n\".join([e.strip(\"\\n\") for e in self.examples])\n\n    def _compose_prompt(self, instruction) -> str:\n        \"\"\"\n        Compose the prompt from template, worker description, examples and instruction.\n        \"\"\"\n        worker_desctription = self._compose_worker_description()\n        fewshot = self._compose_fewshot_prompt()\n        if self.prompt_template is not None:\n            if \"fewshot\" in self.prompt_template.placeholders:\n                return self.prompt_template.populate(\n                    tool_description=worker_desctription,\n                    fewshot=fewshot,\n                    task=instruction,\n                )\n            else:\n                return self.prompt_template.populate(\n                    tool_description=worker_desctription, task=instruction\n                )\n        else:\n            if self.examples is not None:\n                return few_shot_planner_prompt.populate(\n                    tool_description=worker_desctription,\n                    fewshot=fewshot,\n                    task=instruction,\n                )\n            else:\n                return zero_shot_planner_prompt.populate(\n                    tool_description=worker_desctription, task=instruction\n                )\n\n    def run(self, instruction: str, output: BaseScratchPad = BaseScratchPad()) -> Any:\n        response = None\n        output.info(\"Running Planner\")\n        prompt = self._compose_prompt(instruction)\n        output.debug(f\"Prompt: {prompt}\")\n        try:\n            response = self.model(prompt)\n            self.log_progress(\".planner\", response=response)\n            output.info(\"Planner run successful.\")\n        except ValueError as e:\n            output.error(\"Planner failed to retrieve response from LLM\")\n            raise ValueError(\"Planner failed to retrieve response from LLM\") from e\n\n        return response\n\n    def stream(self, instruction: str, output: BaseScratchPad = BaseScratchPad()):\n        response = None\n        output.info(\"Running Planner\")\n        prompt = self._compose_prompt(instruction)\n        output.debug(f\"Prompt: {prompt}\")\n\n        response = \"\"\n        try:\n            for text in self.model.stream(prompt):\n                response += text\n                yield text\n            self.log_progress(\".planner\", response=response)\n            output.info(\"Planner run successful.\")\n        except NotImplementedError:\n            print(\"Streaming is not supported, falling back to normal run\")\n            response = self.model(prompt)\n            yield response\n        except ValueError as e:\n            output.error(\"Planner failed to retrieve response from LLM\")\n            raise ValueError(\"Planner failed to retrieve response from LLM\") from e\n\n        return response\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/agents/rewoo/prompt.py",
    "content": "# flake8: noqa\n\nfrom kotaemon.llms import PromptTemplate\n\nzero_shot_planner_prompt = PromptTemplate(\n    template=\"\"\"You are an AI agent who makes step-by-step plans to solve a problem under the help of external tools.\nFor each step, make one plan followed by one tool-call, which will be executed later to retrieve evidence for that step.\nYou should store each evidence into a distinct variable #E1, #E2, #E3 ... that can be referred to in later tool-call inputs.\n\n##Available Tools##\n{tool_description}\n\n##Output Format (Replace '<...>')##\n#Plan1: <describe your plan here>\n#E1: <toolname>[<input here>] (eg. Search[What is Python])\n#Plan2: <describe next plan>\n#E2: <toolname>[<input here, you can use #E1 to represent its expected output>]\nAnd so on...\n\n##Your Task##\n{task}\n\n##Now Begin##\n\"\"\"\n)\n\none_shot_planner_prompt = PromptTemplate(\n    template=\"\"\"You are an AI agent who makes step-by-step plans to solve a problem under the help of external tools.\nFor each step, make one plan followed by one tool-call, which will be executed later to retrieve evidence for that step.\nYou should store each evidence into a distinct variable #E1, #E2, #E3 ... that can be referred to in later tool-call inputs.\n\n##Available Tools##\n{tool_description}\n\n##Output Format##\n#Plan1: <describe your plan here>\n#E1: <toolname>[<input here>]\n#Plan2: <describe next plan>\n#E2: <toolname>[<input here, you can use #E1 to represent its expected output>]\nAnd so on...\n\n##Example##\nTask: What is the 4th root of 64 to the power of 3?\n#Plan1: Find the 4th root of 64\n#E1: Calculator[64^(1/4)]\n#Plan2: Raise the result from #Plan1 to the power of 3\n#E2: Calculator[#E1^3]\n\n##Your Task##\n{task}\n\n##Now Begin##\n\"\"\"\n)\n\n\nfew_shot_planner_prompt = PromptTemplate(\n    template=\"\"\"You are an AI agent who makes step-by-step plans to solve a problem under the help of external tools.\nFor each step, make one plan followed by one tool-call, which will be executed later to retrieve evidence for that step.\nYou should store each evidence into a distinct variable #E1, #E2, #E3 ... that can be referred to in later tool-call inputs.\n\n##Available Tools##\n{tool_description}\n\n##Output Format (Replace '<...>')##\n#Plan1: <describe your plan here>\n#E1: <toolname>[<input>]\n#Plan2: <describe next plan>\n#E2: <toolname>[<input, you can use #E1 to represent its expected output>]\nAnd so on...\n\n##Examples##\n{fewshot}\n\n##Your Task##\n{task}\n\n##Now Begin##\n\"\"\"\n)\n\nzero_shot_solver_prompt = PromptTemplate(\n    template=\"\"\"You are an AI agent who solves a problem with my assistance. I will provide step-by-step plans(#Plan) and evidences(#E) that could be helpful.\nYour task is to briefly summarize each step, then make a short final conclusion for your task. Give answer in {lang}.\n\n##My Plans and Evidences##\n{plan_evidence}\n\n##Example Output##\nFirst, I <did something> , and I think <...>; Second, I <...>, and I think <...>; ....\nSo, <your conclusion>.\n\n##Your Task##\n{task}\n\n##Now Begin##\n\"\"\"\n)\n\nfew_shot_solver_prompt = PromptTemplate(\n    template=\"\"\"You are an AI agent who solves a problem with my assistance. I will provide step-by-step plans and evidences that could be helpful.\nYour task is to briefly summarize each step, then make a short final conclusion for your task. Give answer in {lang}.\n\n##My Plans and Evidences##\n{plan_evidence}\n\n##Example Output##\nFirst, I <did something> , and I think <...>; Second, I <...>, and I think <...>; ....\nSo, <your conclusion>.\n\n##Example##\n{fewshot}\n\n##Your Task##\n{task}\n\n##Now Begin##\n\"\"\"\n)\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/agents/rewoo/solver.py",
    "content": "from typing import Any, List, Optional, Union\n\nfrom kotaemon.agents.io import BaseScratchPad\nfrom kotaemon.base import BaseComponent\nfrom kotaemon.llms import BaseLLM, PromptTemplate\n\nfrom .prompt import few_shot_solver_prompt, zero_shot_solver_prompt\n\n\nclass Solver(BaseComponent):\n    model: BaseLLM\n    prompt_template: Optional[PromptTemplate] = None\n    examples: Optional[Union[str, List[str]]] = None\n    output_lang: str = \"English\"\n\n    def _compose_fewshot_prompt(self) -> str:\n        if self.examples is None:\n            return \"\"\n        if isinstance(self.examples, str):\n            return self.examples\n        else:\n            return \"\\n\\n\".join([e.strip(\"\\n\") for e in self.examples])\n\n    def _compose_prompt(self, instruction, plan_evidence, output_lang) -> str:\n        \"\"\"\n        Compose the prompt from template, plan&evidence, examples and instruction.\n        \"\"\"\n        fewshot = self._compose_fewshot_prompt()\n        if self.prompt_template is not None:\n            if \"fewshot\" in self.prompt_template.placeholders:\n                return self.prompt_template.populate(\n                    plan_evidence=plan_evidence,\n                    fewshot=fewshot,\n                    task=instruction,\n                    lang=output_lang,\n                )\n            else:\n                return self.prompt_template.populate(\n                    plan_evidence=plan_evidence, task=instruction, lang=output_lang\n                )\n        else:\n            if self.examples is not None:\n                return few_shot_solver_prompt.populate(\n                    plan_evidence=plan_evidence,\n                    fewshot=fewshot,\n                    task=instruction,\n                    lang=output_lang,\n                )\n            else:\n                return zero_shot_solver_prompt.populate(\n                    plan_evidence=plan_evidence,\n                    task=instruction,\n                    lang=output_lang,\n                )\n\n    def run(\n        self,\n        instruction: str,\n        plan_evidence: str,\n        output: BaseScratchPad = BaseScratchPad(),\n    ) -> Any:\n        response = None\n        output.info(\"Running Solver\")\n        output.debug(f\"Instruction: {instruction}\")\n        output.debug(f\"Plan Evidence: {plan_evidence}\")\n        prompt = self._compose_prompt(instruction, plan_evidence, self.output_lang)\n        output.debug(f\"Prompt: {prompt}\")\n        try:\n            response = self.model(prompt)\n            output.info(\"Solver run successful.\")\n        except ValueError:\n            output.error(\"Solver failed to retrieve response from LLM\")\n\n        return response\n\n    def stream(\n        self,\n        instruction: str,\n        plan_evidence: str,\n        output: BaseScratchPad = BaseScratchPad(),\n    ) -> Any:\n        response = \"\"\n        output.info(\"Running Solver\")\n        output.debug(f\"Instruction: {instruction}\")\n        output.debug(f\"Plan Evidence: {plan_evidence}\")\n        prompt = self._compose_prompt(instruction, plan_evidence, self.output_lang)\n        output.debug(f\"Prompt: {prompt}\")\n        try:\n            for text in self.model.stream(prompt):\n                response += text.text\n                yield text\n            output.info(\"Planner run successful.\")\n        except NotImplementedError:\n            response = self.model(prompt).text\n            output.info(\"Solver run successful.\")\n        except ValueError:\n            output.error(\"Solver failed to retrieve response from LLM\")\n\n        return response\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/agents/tools/__init__.py",
    "content": "from .base import BaseTool, ComponentTool\nfrom .google import GoogleSearchTool\nfrom .llm import LLMTool\nfrom .mcp import (\n    MCPTool,\n    build_args_model,\n    create_tools_from_config,\n    discover_tools_info,\n    format_tool_list,\n    parse_mcp_config,\n)\nfrom .wikipedia import WikipediaTool\n\n__all__ = [\n    \"BaseTool\",\n    \"ComponentTool\",\n    \"GoogleSearchTool\",\n    \"WikipediaTool\",\n    \"LLMTool\",\n    \"MCPTool\",\n    \"build_args_model\",\n    \"create_tools_from_config\",\n    \"discover_tools_info\",\n    \"format_tool_list\",\n    \"parse_mcp_config\",\n]\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/agents/tools/base.py",
    "content": "from typing import Any, Callable, Dict, Optional, Tuple, Type, Union\n\nfrom langchain.agents import Tool as LCTool\nfrom pydantic import BaseModel\n\nfrom kotaemon.base import BaseComponent\n\n\nclass ToolException(Exception):\n    \"\"\"An optional exception that tool throws when execution error occurs.\n\n    When this exception is thrown, the agent will not stop working,\n    but will handle the exception according to the handle_tool_error\n    variable of the tool, and the processing result will be returned\n    to the agent as observation, and printed in red on the console.\n    \"\"\"\n\n\nclass BaseTool(BaseComponent):\n    name: str\n    \"\"\"The unique name of the tool that clearly communicates its purpose.\"\"\"\n    description: str\n    \"\"\"Description used to tell the model how/when/why to use the tool.\n    You can provide few-shot examples as a part of the description. This will be\n    input to the prompt of LLM.\n    \"\"\"\n    args_schema: Optional[Type[BaseModel]] = None\n    \"\"\"Pydantic model class to validate and parse the tool's input arguments.\"\"\"\n    verbose: bool = False\n    \"\"\"Whether to log the tool's progress.\"\"\"\n    handle_tool_error: Optional[\n        Union[bool, str, Callable[[ToolException], str]]\n    ] = False\n    \"\"\"Handle the content of the ToolException thrown.\"\"\"\n\n    def _parse_input(\n        self,\n        tool_input: Union[str, Dict],\n    ) -> Union[str, Dict[str, Any]]:\n        \"\"\"Convert tool input to pydantic model.\"\"\"\n        args_schema = self.args_schema\n        if isinstance(tool_input, str):\n            if args_schema is not None:\n                key_ = next(iter(args_schema.model_fields.keys()))\n                args_schema.validate({key_: tool_input})\n            return tool_input\n        else:\n            if args_schema is not None:\n                result = args_schema.parse_obj(tool_input)\n                return {k: v for k, v in result.dict().items() if k in tool_input}\n        return tool_input\n\n    def _run_tool(\n        self,\n        *args: Any,\n        **kwargs: Any,\n    ) -> Any:\n        \"\"\"Call tool.\"\"\"\n        raise NotImplementedError(f\"_run_tool is not implemented for {self.name}\")\n\n    def _to_args_and_kwargs(self, tool_input: Union[str, Dict]) -> Tuple[Tuple, Dict]:\n        # For backwards compatibility, if run_input is a string,\n        # pass as a positional argument.\n        if isinstance(tool_input, str):\n            return (tool_input,), {}\n        else:\n            return (), tool_input\n\n    def _handle_tool_error(self, e: ToolException) -> Any:\n        \"\"\"Handle the content of the ToolException thrown.\"\"\"\n        observation = None\n        if not self.handle_tool_error:\n            raise e\n        elif isinstance(self.handle_tool_error, bool):\n            if e.args:\n                observation = e.args[0]\n            else:\n                observation = \"Tool execution error\"\n        elif isinstance(self.handle_tool_error, str):\n            observation = self.handle_tool_error\n        elif callable(self.handle_tool_error):\n            observation = self.handle_tool_error(e)\n        else:\n            raise ValueError(\n                f\"Got unexpected type of `handle_tool_error`. Expected bool, str \"\n                f\"or callable. Received: {self.handle_tool_error}\"\n            )\n        return observation\n\n    def to_langchain_format(self) -> LCTool:\n        \"\"\"Convert this tool to Langchain format to use with its agent\"\"\"\n        return LCTool(name=self.name, description=self.description, func=self.run)\n\n    def run(\n        self,\n        tool_input: Union[str, Dict],\n        verbose: Optional[bool] = None,\n        **kwargs: Any,\n    ) -> Any:\n        \"\"\"Run the tool.\"\"\"\n        parsed_input = self._parse_input(tool_input)\n        # TODO (verbose_): Add logging\n        try:\n            tool_args, tool_kwargs = self._to_args_and_kwargs(parsed_input)\n            call_kwargs = {**kwargs, **tool_kwargs}\n            observation = self._run_tool(*tool_args, **call_kwargs)\n        except ToolException as e:\n            observation = self._handle_tool_error(e)\n            return observation\n        else:\n            return observation\n\n    @classmethod\n    def from_langchain_format(cls, langchain_tool: LCTool) -> \"BaseTool\":\n        \"\"\"Wrapper for Langchain Tool\"\"\"\n        new_tool = BaseTool(\n            name=langchain_tool.name, description=langchain_tool.description\n        )\n        new_tool._run_tool = langchain_tool._run  # type: ignore\n        return new_tool\n\n\nclass ComponentTool(BaseTool):\n    \"\"\"Wrapper around other BaseComponent to use it as a tool\n\n    Args:\n        component: BaseComponent-based component to wrap\n        postprocessor: Optional postprocessor for the component output\n    \"\"\"\n\n    component: BaseComponent\n    postprocessor: Optional[Callable] = None\n\n    def _run_tool(self, *args: Any, **kwargs: Any) -> Any:\n        output = self.component(*args, **kwargs)\n        if self.postprocessor:\n            output = self.postprocessor(output)\n\n        return output\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/agents/tools/google.py",
    "content": "from typing import AnyStr, Optional, Type\nfrom urllib.error import HTTPError\n\nfrom langchain_community.utilities import SerpAPIWrapper\nfrom pydantic import BaseModel, Field\n\nfrom .base import BaseTool\n\n\nclass GoogleSearchArgs(BaseModel):\n    query: str = Field(..., description=\"a search query\")\n\n\nclass GoogleSearchTool(BaseTool):\n    name: str = \"google_search\"\n    description: str = (\n        \"A search engine retrieving top search results as snippets from Google. \"\n        \"Input should be a search query.\"\n    )\n    args_schema: Optional[Type[BaseModel]] = GoogleSearchArgs\n\n    def _run_tool(self, query: AnyStr) -> str:\n        try:\n            from googlesearch import search\n        except ImportError:\n            raise ImportError(\n                \"install googlesearch using `pip3 install googlesearch-python` to \"\n                \"use this tool\"\n            )\n\n        try:\n            output = \"\"\n            search_results = search(query, advanced=True)\n            if search_results:\n                output = \"\\n\".join(\n                    \"{} {}\".format(item.title, item.description)\n                    for item in search_results\n                )\n        except HTTPError:\n            output = \"No evidence found.\"\n\n        return output\n\n\nclass SerpTool(BaseTool):\n    name = \"google_search\"\n    description = (\n        \"Worker that searches results from Google. Useful when you need to find short \"\n        \"and succinct answers about a specific topic. Input should be a search query.\"\n    )\n    args_schema: Optional[Type[BaseModel]] = GoogleSearchArgs\n\n    def _run_tool(self, query: AnyStr) -> str:\n        tool = SerpAPIWrapper()\n        evidence = tool.run(query)\n\n        return evidence\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/agents/tools/llm.py",
    "content": "from typing import AnyStr, Optional, Type\n\nfrom pydantic import BaseModel, Field\n\nfrom kotaemon.agents.tools.base import ToolException\nfrom kotaemon.llms import BaseLLM\n\nfrom .base import BaseTool\n\n\nclass LLMArgs(BaseModel):\n    query: str = Field(..., description=\"a search question or prompt\")\n\n\nclass LLMTool(BaseTool):\n    name: str = \"llm\"\n    description: str = (\n        \"A pretrained LLM like yourself. Useful when you need to act with \"\n        \"general world knowledge and common sense. Prioritize it when you \"\n        \"are confident in solving the problem \"\n        \"yourself. Input can be any instruction.\"\n    )\n    llm: BaseLLM\n    args_schema: Optional[Type[BaseModel]] = LLMArgs\n    dummy_mode: bool = True\n\n    def _run_tool(self, query: AnyStr) -> str:\n        output = None\n        try:\n            if not self.dummy_mode:\n                response = self.llm(query)\n            else:\n                response = None\n        except ValueError:\n            raise ToolException(\"LLM Tool call failed\")\n        output = response.text if response else \"<->\"\n        return output\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/agents/tools/mcp.py",
    "content": "\"\"\"MCP Tool for kotaemon agents.\r\n\r\nBridges the MCP SDK's tool schema with kotaemon's BaseTool abstraction\r\nso MCP tools can be seamlessly used by ReAct/ReWOO agents.\r\n\r\nThis module contains:\r\n- MCPTool: BaseTool wrapper for individual MCP server tools\r\n- Tool discovery/creation functions for building MCPTool instances from config\r\n- Config parsing utilities\r\n\"\"\"\r\n\r\nimport asyncio\r\nimport json\r\nimport logging\r\nimport shlex\r\nfrom typing import Any, Optional, Type\r\n\r\nfrom pydantic import BaseModel, Field, create_model\r\n\r\nfrom .base import BaseTool\r\n\r\nlogger = logging.getLogger(__name__)\r\n\r\n\r\n# ---------------------------------------------------------------------------\r\n# JSON Schema → Pydantic helpers\r\n# ---------------------------------------------------------------------------\r\n\r\n\r\ndef _json_schema_type_to_python(json_type: str) -> type:\r\n    \"\"\"Map JSON Schema types to Python types.\"\"\"\r\n    mapping: dict[str, type] = {\r\n        \"string\": str,\r\n        \"integer\": int,\r\n        \"number\": float,\r\n        \"boolean\": bool,\r\n        \"object\": dict,\r\n        \"array\": list,\r\n    }\r\n    return mapping.get(json_type, str)\r\n\r\n\r\ndef build_args_model(tool_name: str, input_schema: dict) -> Type[BaseModel]:\r\n    \"\"\"Build a Pydantic model from MCP tool's JSON Schema input_schema.\"\"\"\r\n    properties = input_schema.get(\"properties\", {})\r\n    required = set(input_schema.get(\"required\", []))\r\n    fields: dict[str, Any] = {}\r\n    for prop_name, prop_info in properties.items():\r\n        python_type = _json_schema_type_to_python(prop_info.get(\"type\", \"string\"))\r\n        description = prop_info.get(\"description\", \"\")\r\n        if prop_name in required:\r\n            fields[prop_name] = (python_type, Field(..., description=description))\r\n        else:\r\n            default = prop_info.get(\"default\", None)\r\n            fields[prop_name] = (\r\n                Optional[python_type],\r\n                Field(default=default, description=description),\r\n            )\r\n\r\n    model_name = f\"MCPArgs_{tool_name}\"\r\n    return create_model(model_name, **fields)\r\n\r\n\r\n# ---------------------------------------------------------------------------\r\n# Config parsing\r\n# ---------------------------------------------------------------------------\r\n\r\n\r\ndef parse_mcp_config(config: dict) -> dict:\r\n    \"\"\"Parse a JSON config into normalised transport/command/args/env.\r\n\r\n    Handles the case where the user puts the full command string\r\n    (e.g. ``\"npx -y mcp-remote https://...\"`` ) into the command field.\r\n\r\n    Returns a dict with keys: transport, command, args, env.\r\n    \"\"\"\r\n    transport = config.get(\"transport\", \"stdio\")\r\n    command = config.get(\"command\", \"\")\r\n    args = config.get(\"args\", [])\r\n    env = config.get(\"env\", {})\r\n    url = config.get(\"url\", \"\")\r\n\r\n    # If stdio and args is empty but command has spaces, split it\r\n    if transport == \"stdio\" and not args and \" \" in command:\r\n        parts = shlex.split(command)\r\n        command = parts[0]\r\n        args = parts[1:]\r\n\r\n    return {\r\n        \"transport\": transport,\r\n        \"command\": command if transport == \"stdio\" else url,\r\n        \"args\": args,\r\n        \"env\": env,\r\n    }\r\n\r\n\r\n# ---------------------------------------------------------------------------\r\n# Tool discovery & creation\r\n# ---------------------------------------------------------------------------\r\n\r\n\r\ndef _make_tool(parsed: dict, tool_info: Any) -> \"MCPTool\":\r\n    \"\"\"Build an MCPTool from MCP tool info.\"\"\"\r\n    input_schema = tool_info.inputSchema if hasattr(tool_info, \"inputSchema\") else {}\r\n    args_model = (\r\n        build_args_model(tool_info.name, input_schema) if input_schema else None\r\n    )\r\n\r\n    return MCPTool(\r\n        name=tool_info.name,\r\n        description=tool_info.description or f\"MCP tool: {tool_info.name}\",\r\n        args_schema=args_model,\r\n        server_transport=parsed[\"transport\"],\r\n        server_command=parsed[\"command\"],\r\n        server_args=parsed.get(\"args\", []),\r\n        server_env=parsed.get(\"env\", {}),\r\n        mcp_tool_name=tool_info.name,\r\n    )\r\n\r\n\r\nasync def _async_discover_tools(parsed: dict) -> list[\"MCPTool\"]:\r\n    \"\"\"Async: connect to an MCP server and return MCPTool wrappers.\"\"\"\r\n    from mcp import ClientSession\r\n    from mcp.client.sse import sse_client\r\n    from mcp.client.stdio import StdioServerParameters, stdio_client\r\n\r\n    tools: list[MCPTool] = []\r\n    transport = parsed[\"transport\"]\r\n\r\n    if transport == \"stdio\":\r\n        server_params = StdioServerParameters(\r\n            command=parsed[\"command\"],\r\n            args=parsed.get(\"args\", []),\r\n            env=parsed.get(\"env\") or None,\r\n        )\r\n        async with stdio_client(server_params) as (read, write):\r\n            async with ClientSession(read, write) as session:\r\n                await session.initialize()\r\n                result = await session.list_tools()\r\n                for tool_info in result.tools:\r\n                    tools.append(_make_tool(parsed, tool_info))\r\n    elif transport == \"sse\":\r\n        async with sse_client(url=parsed[\"command\"]) as (read, write):\r\n            async with ClientSession(read, write) as session:\r\n                await session.initialize()\r\n                result = await session.list_tools()\r\n                for tool_info in result.tools:\r\n                    tools.append(_make_tool(parsed, tool_info))\r\n\r\n    return tools\r\n\r\n\r\ndef _run_async(coro: Any) -> Any:\r\n    \"\"\"Run an async coroutine from a sync context, handling event loops.\"\"\"\r\n    try:\r\n        loop = asyncio.get_event_loop()\r\n        if loop.is_running():\r\n            import concurrent.futures\r\n\r\n            with concurrent.futures.ThreadPoolExecutor() as pool:\r\n                return pool.submit(asyncio.run, coro).result()\r\n        else:\r\n            return loop.run_until_complete(coro)\r\n    except RuntimeError:\r\n        return asyncio.run(coro)\r\n\r\n\r\ndef create_tools_from_config(\r\n    config: dict,\r\n    enabled_tools: Optional[list[str]] = None,\r\n) -> list[\"MCPTool\"]:\r\n    \"\"\"Create MCPTool instances from an MCP server config dict.\r\n\r\n    Args:\r\n        config: MCP server JSON config with keys like transport, command, etc.\r\n        enabled_tools: If provided, only return tools whose names are in this\r\n            list.  If ``None`` or empty, return all discovered tools.\r\n\r\n    Returns:\r\n        List of MCPTool instances ready for use by agents.\r\n    \"\"\"\r\n    parsed = parse_mcp_config(config)\r\n    tools = _run_async(_async_discover_tools(parsed))\r\n\r\n    if enabled_tools:\r\n        tools = [t for t in tools if t.mcp_tool_name in enabled_tools]\r\n\r\n    return tools\r\n\r\n\r\nasync def async_discover_tools_info(config: dict) -> list[dict]:\r\n    \"\"\"Connect to an MCP server and return raw tool info dicts.\r\n\r\n    Returns a list of dicts with keys: name, description.\r\n    Useful for UI display without instantiating full MCPTool objects.\r\n    \"\"\"\r\n    from mcp import ClientSession\r\n    from mcp.client.sse import sse_client\r\n    from mcp.client.stdio import StdioServerParameters, stdio_client\r\n\r\n    parsed = parse_mcp_config(config)\r\n    transport = parsed[\"transport\"]\r\n    tool_infos: list[dict] = []\r\n\r\n    if transport == \"stdio\":\r\n        server_params = StdioServerParameters(\r\n            command=parsed[\"command\"],\r\n            args=parsed.get(\"args\", []),\r\n            env=parsed.get(\"env\") or None,\r\n        )\r\n        async with stdio_client(server_params) as (read, write):\r\n            async with ClientSession(read, write) as session:\r\n                await session.initialize()\r\n                result = await session.list_tools()\r\n                for t in result.tools:\r\n                    tool_infos.append(\r\n                        {\r\n                            \"name\": t.name,\r\n                            \"description\": t.description or \"\",\r\n                        }\r\n                    )\r\n    elif transport == \"sse\":\r\n        async with sse_client(url=parsed[\"command\"]) as (read, write):\r\n            async with ClientSession(read, write) as session:\r\n                await session.initialize()\r\n                result = await session.list_tools()\r\n                for t in result.tools:\r\n                    tool_infos.append(\r\n                        {\r\n                            \"name\": t.name,\r\n                            \"description\": t.description or \"\",\r\n                        }\r\n                    )\r\n\r\n    return tool_infos\r\n\r\n\r\ndef discover_tools_info(config: dict) -> list[dict]:\r\n    \"\"\"Sync wrapper around async_discover_tools_info.\"\"\"\r\n    return _run_async(async_discover_tools_info(config))\r\n\r\n\r\ndef format_tool_list(\r\n    tool_infos: list[dict],\r\n    enabled_tools: Optional[list[str]] = None,\r\n) -> str:\r\n    \"\"\"Format tool info dicts into a readable HTML string.\r\n\r\n    Args:\r\n        tool_infos: List of dicts with 'name' and 'description' keys.\r\n        enabled_tools: If provided, marks which tools are enabled.\r\n    \"\"\"\r\n    lines = [f\"✅ Connected! Found <b>{len(tool_infos)}</b> tool(s):<br>\"]\r\n    for t in tool_infos:\r\n        desc = (t.get(\"description\") or \"No description\")[:120]\r\n        if enabled_tools is not None:\r\n            check = \"✅\" if t[\"name\"] in enabled_tools else \"⬜\"\r\n            lines.append(f\"&nbsp;&nbsp;{check} <b>{t['name']}</b> — {desc}<br>\")\r\n        else:\r\n            lines.append(f\"&nbsp;&nbsp;• <b>{t['name']}</b> — {desc}<br>\")\r\n    if enabled_tools is not None:\r\n        enabled_count = sum(1 for t in tool_infos if t[\"name\"] in enabled_tools)\r\n        lines.append(\r\n            f\"<br><i>{enabled_count}/{len(tool_infos)} tool(s) enabled. \"\r\n            'Add <code>\"enabled_tools\": [\"tool_name\", ...]</code> '\r\n            \"to your config JSON to limit tools.</i>\"\r\n        )\r\n    else:\r\n        lines.append(\r\n            \"<br><i>All tools enabled. Add \"\r\n            '<code>\"enabled_tools\": [\"tool_name\", ...]</code> '\r\n            \"to your config JSON to limit tools.</i>\"\r\n        )\r\n    return \"\".join(lines)\r\n\r\n\r\n# ---------------------------------------------------------------------------\r\n# MCPTool class\r\n# ---------------------------------------------------------------------------\r\n\r\n\r\nclass MCPTool(BaseTool):\r\n    \"\"\"A kotaemon BaseTool wrapper around a single MCP server tool.\r\n\r\n    This tool holds the MCP server configuration and establishes\r\n    a connection to invoke the tool on demand.\r\n\r\n    Example usage::\r\n\r\n        tool = MCPTool(\r\n            name=\"search\",\r\n            description=\"Search the web\",\r\n            server_transport=\"stdio\",\r\n            server_command=\"uvx\",\r\n            server_args=[\"mcp-server-fetch\"],\r\n            mcp_tool_name=\"fetch\",\r\n        )\r\n        result = tool.run(\"https://example.com\")\r\n    \"\"\"\r\n\r\n    name: str = \"\"\r\n    description: str = \"\"\r\n    args_schema: Optional[Type[BaseModel]] = None\r\n\r\n    # MCP server connection details\r\n    server_transport: str = \"stdio\"\r\n    server_command: str = \"\"\r\n    server_args: list[str] = []\r\n    server_env: dict[str, str] = {}\r\n\r\n    # The original MCP tool name (on the server)\r\n    mcp_tool_name: str = \"\"\r\n\r\n    def _run_tool(self, *args: Any, **kwargs: Any) -> str:\r\n        \"\"\"Invoke the MCP tool by establishing a session.\"\"\"\r\n        return _run_async(self._arun_tool(*args, **kwargs))\r\n\r\n    async def _arun_tool(self, *args: Any, **kwargs: Any) -> str:\r\n        \"\"\"Async implementation that connects to the MCP server and calls\r\n        the tool.\"\"\"\r\n        from mcp import ClientSession\r\n        from mcp.client.sse import sse_client\r\n        from mcp.client.stdio import StdioServerParameters, stdio_client\r\n\r\n        # Build tool arguments\r\n        if args and isinstance(args[0], str):\r\n            try:\r\n                tool_args = json.loads(args[0])\r\n            except json.JSONDecodeError:\r\n                # If not JSON, assume single string argument\r\n                if self.args_schema:\r\n                    first_field = next(iter(self.args_schema.model_fields.keys()))\r\n                    tool_args = {first_field: args[0]}\r\n                else:\r\n                    tool_args = {\"input\": args[0]}\r\n        else:\r\n            tool_args = kwargs\r\n\r\n        if self.server_transport == \"stdio\":\r\n            cmd = self.server_command\r\n            cmd_args = self.server_args\r\n            # Auto-split if full command string with no separate args\r\n            if not cmd_args and \" \" in cmd:\r\n                parts = shlex.split(cmd)\r\n                cmd = parts[0]\r\n                cmd_args = parts[1:]\r\n\r\n            server_params = StdioServerParameters(\r\n                command=cmd,\r\n                args=cmd_args,\r\n                env=self.server_env if self.server_env else None,\r\n            )\r\n            async with stdio_client(server_params) as (read, write):\r\n                async with ClientSession(read, write) as session:\r\n                    await session.initialize()\r\n                    result = await session.call_tool(self.mcp_tool_name, tool_args)\r\n                    return self._format_result(result)\r\n        elif self.server_transport == \"sse\":\r\n            async with sse_client(url=self.server_command) as (read, write):\r\n                async with ClientSession(read, write) as session:\r\n                    await session.initialize()\r\n                    result = await session.call_tool(self.mcp_tool_name, tool_args)\r\n                    return self._format_result(result)\r\n        else:\r\n            return f\"Unsupported transport: {self.server_transport}\"\r\n\r\n    def _format_result(self, result: Any) -> str:\r\n        \"\"\"Format MCP CallToolResult into a string.\"\"\"\r\n        if result.isError:\r\n            return f\"MCP Tool Error: {result.content}\"\r\n\r\n        parts = []\r\n        for content in result.content:\r\n            if hasattr(content, \"text\"):\r\n                parts.append(content.text)\r\n            elif hasattr(content, \"data\"):\r\n                parts.append(f\"[Binary data: {content.mimeType}]\")\r\n            else:\r\n                parts.append(str(content))\r\n        return \"\\n\".join(parts)\r\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/agents/tools/wikipedia.py",
    "content": "from typing import Any, AnyStr, Optional, Type, Union\n\nfrom pydantic import BaseModel, Field\n\nfrom kotaemon.base import Document\n\nfrom .base import BaseTool\n\n\nclass Wiki:\n    \"\"\"Wrapper around wikipedia API.\"\"\"\n\n    def __init__(self) -> None:\n        \"\"\"Check that wikipedia package is installed.\"\"\"\n        try:\n            import wikipedia  # noqa: F401\n        except ImportError:\n            raise ValueError(\n                \"Could not import wikipedia python package. \"\n                \"Please install it with `pip install wikipedia`.\"\n            )\n\n    def search(self, search: str) -> Union[str, Document]:\n        \"\"\"Try to search for wiki page.\n\n        If page exists, return the page summary, and a PageWithLookups object.\n        If page does not exist, return similar entries.\n        \"\"\"\n        import wikipedia\n\n        try:\n            page_content = wikipedia.page(search).content\n            url = wikipedia.page(search).url\n            result: Union[str, Document] = Document(\n                text=page_content, metadata={\"page\": url}\n            )\n        except wikipedia.PageError:\n            result = f\"Could not find [{search}]. Similar: {wikipedia.search(search)}\"\n        except wikipedia.DisambiguationError:\n            result = f\"Could not find [{search}]. Similar: {wikipedia.search(search)}\"\n        return result\n\n\nclass WikipediaArgs(BaseModel):\n    query: str = Field(..., description=\"a search query as input to wkipedia\")\n\n\nclass WikipediaTool(BaseTool):\n    \"\"\"Tool that adds the capability to query the Wikipedia API.\"\"\"\n\n    name: str = \"wikipedia\"\n    description: str = (\n        \"Search engine from Wikipedia, retrieving relevant wiki page. \"\n        \"Useful when you need to get holistic knowledge about people, \"\n        \"places, companies, historical events, or other subjects. \"\n        \"Input should be a search query.\"\n    )\n    args_schema: Optional[Type[BaseModel]] = WikipediaArgs\n    doc_store: Any = None\n\n    def _run_tool(self, query: AnyStr) -> AnyStr:\n        if not self.doc_store:\n            self.doc_store = Wiki()\n        tool = self.doc_store\n        evidence = tool.search(query)\n        return evidence\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/agents/utils.py",
    "content": "from kotaemon.base import Document\n\n\ndef get_plugin_response_content(output) -> str:\n    \"\"\"\n    Wrapper for AgentOutput content return\n    \"\"\"\n    if isinstance(output, Document):\n        return output.text\n    else:\n        return str(output)\n\n\ndef calculate_cost(model_name: str, prompt_token: int, completion_token: int) -> float:\n    \"\"\"\n    Calculate the cost of a prompt and completion.\n\n    Returns:\n        float: Cost of the provided model name with provided token information\n    \"\"\"\n    # TODO: to be implemented\n    return 0.0\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/base/__init__.py",
    "content": "from .component import BaseComponent, Node, Param, lazy\nfrom .schema import (\n    AIMessage,\n    BaseMessage,\n    Document,\n    DocumentWithEmbedding,\n    ExtractorOutput,\n    HumanMessage,\n    LLMInterface,\n    RetrievedDocument,\n    StructuredOutputLLMInterface,\n    SystemMessage,\n)\n\n__all__ = [\n    \"BaseComponent\",\n    \"Document\",\n    \"DocumentWithEmbedding\",\n    \"BaseMessage\",\n    \"SystemMessage\",\n    \"AIMessage\",\n    \"HumanMessage\",\n    \"RetrievedDocument\",\n    \"LLMInterface\",\n    \"StructuredOutputLLMInterface\",\n    \"ExtractorOutput\",\n    \"Param\",\n    \"Node\",\n    \"lazy\",\n]\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/base/component.py",
    "content": "from abc import abstractmethod\nfrom typing import Any, AsyncGenerator, Iterator, Optional\n\nfrom theflow import Function, Node, Param, lazy\n\nfrom kotaemon.base.schema import Document\n\n\nclass BaseComponent(Function):\n    \"\"\"A component is a class that can be used to compose a pipeline.\n\n    !!! tip \"Benefits of component\"\n        - Auto caching, logging\n        - Allow deployment\n\n    !!! tip \"For each component, the spirit is\"\n        - Tolerate multiple input types, e.g. str, Document, List[str], List[Document]\n        - Enforce single output type. Hence, the output type of a component should be\n    as generic as possible.\n    \"\"\"\n\n    inflow = None\n\n    def flow(self):\n        if self.inflow is None:\n            raise ValueError(\"No inflow provided.\")\n\n        if not isinstance(self.inflow, BaseComponent):\n            raise ValueError(\n                f\"inflow must be a BaseComponent, found {type(self.inflow)}\"\n            )\n\n        return self.__call__(self.inflow.flow())\n\n    def set_output_queue(self, queue):\n        self._queue = queue\n        for name in self._ff_nodes:\n            node = getattr(self, name)\n            if isinstance(node, BaseComponent):\n                node.set_output_queue(queue)\n\n    def report_output(self, output: Optional[Document]):\n        if self._queue is not None:\n            self._queue.put_nowait(output)\n\n    def invoke(self, *args, **kwargs) -> Document | list[Document] | None:\n        ...\n\n    async def ainvoke(self, *args, **kwargs) -> Document | list[Document] | None:\n        ...\n\n    def stream(self, *args, **kwargs) -> Iterator[Document] | None:\n        ...\n\n    def astream(self, *args, **kwargs) -> AsyncGenerator[Document, None] | None:\n        ...\n\n    @abstractmethod\n    def run(\n        self, *args, **kwargs\n    ) -> Document | list[Document] | Iterator[Document] | None | Any:\n        \"\"\"Run the component.\"\"\"\n        ...\n\n\n__all__ = [\"BaseComponent\", \"Param\", \"Node\", \"lazy\"]\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/base/schema.py",
    "content": "from __future__ import annotations\n\nfrom typing import TYPE_CHECKING, Any, Literal, Optional, TypeVar\n\nfrom langchain.schema.messages import AIMessage as LCAIMessage\nfrom langchain.schema.messages import HumanMessage as LCHumanMessage\nfrom langchain.schema.messages import SystemMessage as LCSystemMessage\nfrom llama_index.core.bridge.pydantic import Field\nfrom llama_index.core.schema import Document as BaseDocument\n\nif TYPE_CHECKING:\n    from haystack.schema import Document as HaystackDocument\n    from openai.types.chat.chat_completion_message_param import (\n        ChatCompletionMessageParam,\n    )\n\nIO_Type = TypeVar(\"IO_Type\", \"Document\", str)\nSAMPLE_TEXT = \"A sample Document from kotaemon\"\n\n\nclass Document(BaseDocument):\n    \"\"\"\n    Base document class, mostly inherited from Document class from llama-index.\n\n    This class accept one positional argument `content` of an arbitrary type, which will\n        store the raw content of the document. If specified, the class will use\n        `content` to initialize the base llama_index class.\n\n    Attributes:\n        content: raw content of the document, can be anything\n        source: id of the source of the Document. Optional.\n        channel: the channel to show the document. Optional.:\n            - chat: show in chat message\n            - info: show in information panel\n            - index: show in index panel\n            - debug: show in debug panel\n    \"\"\"\n\n    content: Any = None\n    source: Optional[str] = None\n    channel: Optional[Literal[\"chat\", \"info\", \"index\", \"debug\", \"plot\"]] = None\n\n    def __init__(self, content: Optional[Any] = None, *args, **kwargs):\n        if content is None:\n            if kwargs.get(\"text\", None) is not None:\n                kwargs[\"content\"] = kwargs[\"text\"]\n            elif kwargs.get(\"embedding\", None) is not None:\n                kwargs[\"content\"] = kwargs[\"embedding\"]\n                # default text indicating this document only contains embedding\n                kwargs[\"text\"] = \"<EMBEDDING>\"\n        elif isinstance(content, Document):\n            # TODO: simplify the Document class\n            temp_ = content.dict()\n            temp_.update(kwargs)\n            kwargs = temp_\n        else:\n            kwargs[\"content\"] = content\n            if content:\n                kwargs[\"text\"] = str(content)\n            else:\n                kwargs[\"text\"] = \"\"\n        super().__init__(*args, **kwargs)\n\n    def __bool__(self):\n        return bool(self.content)\n\n    @classmethod\n    def example(cls) -> \"Document\":\n        document = Document(\n            text=SAMPLE_TEXT,\n            metadata={\"filename\": \"README.md\", \"category\": \"codebase\"},\n        )\n        return document\n\n    def to_haystack_format(self) -> \"HaystackDocument\":\n        \"\"\"Convert struct to Haystack document format.\"\"\"\n        from haystack.schema import Document as HaystackDocument\n\n        metadata = self.metadata or {}\n        text = self.text\n        return HaystackDocument(content=text, meta=metadata)\n\n    def __str__(self):\n        return str(self.content)\n\n\nclass DocumentWithEmbedding(Document):\n    \"\"\"Subclass of Document which must contains embedding\n\n    Use this if you want to enforce component's IOs to must contain embedding.\n    \"\"\"\n\n    def __init__(self, embedding: list[float], *args, **kwargs):\n        kwargs[\"embedding\"] = embedding\n        super().__init__(*args, **kwargs)\n\n\nclass BaseMessage(Document):\n    def __add__(self, other: Any):\n        raise NotImplementedError\n\n    def to_openai_format(self) -> \"ChatCompletionMessageParam\":\n        raise NotImplementedError\n\n\nclass SystemMessage(BaseMessage, LCSystemMessage):\n    def to_openai_format(self) -> \"ChatCompletionMessageParam\":\n        return {\"role\": \"system\", \"content\": self.content}\n\n\nclass AIMessage(BaseMessage, LCAIMessage):\n    def to_openai_format(self) -> \"ChatCompletionMessageParam\":\n        return {\"role\": \"assistant\", \"content\": self.content}\n\n\nclass HumanMessage(BaseMessage, LCHumanMessage):\n    def to_openai_format(self) -> \"ChatCompletionMessageParam\":\n        return {\"role\": \"user\", \"content\": self.content}\n\n\nclass RetrievedDocument(Document):\n    \"\"\"Subclass of Document with retrieval-related information\n\n    Attributes:\n        score (float): score of the document (from 0.0 to 1.0)\n        retrieval_metadata (dict): metadata from the retrieval process, can be used\n            by different components in a retrieved pipeline to communicate with each\n            other\n    \"\"\"\n\n    score: float = Field(default=0.0)\n    retrieval_metadata: dict = Field(default={})\n\n\nclass LLMInterface(AIMessage):\n    candidates: list[str] = Field(default_factory=list)\n    completion_tokens: int = -1\n    total_tokens: int = -1\n    prompt_tokens: int = -1\n    total_cost: float = 0\n    logits: list[list[float]] = Field(default_factory=list)\n    messages: list[AIMessage] = Field(default_factory=list)\n    logprobs: list[float] = []\n\n\nclass StructuredOutputLLMInterface(LLMInterface):\n    parsed: Any\n    refusal: str = \"\"\n\n\nclass ExtractorOutput(Document):\n    \"\"\"\n    Represents the output of an extractor.\n    \"\"\"\n\n    matches: list[str]\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/chatbot/__init__.py",
    "content": "from .base import BaseChatBot, ChatConversation\nfrom .simple_respondent import SimpleRespondentChatbot\n\n__all__ = [\"BaseChatBot\", \"SimpleRespondentChatbot\", \"ChatConversation\"]\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/chatbot/base.py",
    "content": "from abc import abstractmethod\nfrom typing import List, Optional\n\nfrom theflow import SessionFunction\n\nfrom kotaemon.base import BaseComponent, LLMInterface\nfrom kotaemon.base.schema import AIMessage, BaseMessage, HumanMessage, SystemMessage\n\n\nclass BaseChatBot(BaseComponent):\n    @abstractmethod\n    def run(self, messages: List[BaseMessage]) -> LLMInterface:\n        ...\n\n\ndef session_chat_storage(obj):\n    \"\"\"Store using the bot location rather than the session location\"\"\"\n    return obj._store_result\n\n\nclass ChatConversation(SessionFunction):\n    \"\"\"Base implementation of a chat bot component\n\n    A chatbot component should:\n        - handle internal state, including history messages\n        - return output for a given input\n    \"\"\"\n\n    class Config:\n        store_result = session_chat_storage\n\n    system_message: str = \"\"\n    bot: BaseChatBot\n\n    def __init__(self, *args, **kwargs):\n        self._history: List[BaseMessage] = []\n        self._store_result = (\n            f\"{self.__module__}.{self.__class__.__name__},uninitiated_bot\"\n        )\n        super().__init__(*args, **kwargs)\n\n    def run(self, message: HumanMessage) -> Optional[BaseMessage]:\n        \"\"\"Chat, given a message, return a response\n\n        Args:\n            message: The message to respond to\n\n        Returns:\n            The response to the message. If None, no response is sent.\n        \"\"\"\n        user_message = (\n            HumanMessage(content=message) if isinstance(message, str) else message\n        )\n        self.history.append(user_message)\n\n        output = self.bot(self.history).text\n        output_message = None\n        if output is not None:\n            output_message = AIMessage(content=output)\n            self.history.append(output_message)\n\n        return output_message\n\n    def start_session(self):\n        self._store_result = self.bot.config.store_result\n        super().start_session()\n        if not self.history and self.system_message:\n            system_message = SystemMessage(content=self.system_message)\n            self.history.append(system_message)\n\n    def end_session(self):\n        super().end_session()\n        self._history = []\n\n    def check_end(\n        self,\n        history: Optional[List[BaseMessage]] = None,\n        user_message: Optional[HumanMessage] = None,\n        bot_message: Optional[AIMessage] = None,\n    ) -> bool:\n        \"\"\"Check if a conversation should end\"\"\"\n        if user_message is not None and user_message.content == \"\":\n            return True\n\n        return False\n\n    def terminal_session(self):\n        \"\"\"Create a terminal session\"\"\"\n        self.start_session()\n        print(\">> Start chat:\")\n\n        while True:\n            human = HumanMessage(content=input(\"Human: \"))\n            if self.check_end(history=self.history, user_message=human):\n                break\n\n            output = self(human)\n            if output is None:\n                print(\"AI: <No response>\")\n            else:\n                print(\"AI:\", output.content)\n\n            if self.check_end(history=self.history, bot_message=output):\n                break\n\n        self.end_session()\n\n    @property\n    def history(self):\n        return self._history\n\n    @history.setter\n    def history(self, value):\n        self._history = value\n        self._variablex()\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/chatbot/simple_respondent.py",
    "content": "from ..llms import ChatLLM\nfrom .base import BaseChatBot\n\n\nclass SimpleRespondentChatbot(BaseChatBot):\n    \"\"\"Simple text respondent chatbot that essentially wraps around a chat LLM\"\"\"\n\n    llm: ChatLLM\n\n    def _get_message(self) -> str:\n        return self.llm(self.history).text\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/cli.py",
    "content": "import os\n\nimport click\nimport yaml\nfrom trogon import tui\n\n\n# check if the output is not a .yml file -> raise error\ndef check_config_format(config):\n    if os.path.exists(config):\n        if isinstance(config, str):\n            with open(config) as f:\n                yaml.safe_load(f)\n        else:\n            raise ValueError(\"config must be yaml format.\")\n\n\n@tui(command=\"ui\", help=\"Open the terminal UI\")  # generate the terminal UI\n@click.group()\ndef main():\n    pass\n\n\n@click.group()\ndef promptui():\n    pass\n\n\nmain.add_command(promptui)\n\n\n@promptui.command()\n@click.argument(\"export_path\", nargs=1)\n@click.option(\"--output\", default=\"promptui.yml\", show_default=True, required=False)\ndef export(export_path, output):\n    \"\"\"Export a pipeline to a config file\"\"\"\n    import sys\n\n    from theflow.utils.modules import import_dotted_string\n\n    from kotaemon.contribs.promptui.config import export_pipeline_to_config\n\n    sys.path.append(os.getcwd())\n    cls = import_dotted_string(export_path, safe=False)\n    export_pipeline_to_config(cls, output)\n    check_config_format(output)\n\n\n@promptui.command()\n@click.argument(\"run_path\", required=False, default=\"promptui.yml\")\n@click.option(\n    \"--share\",\n    is_flag=True,\n    show_default=True,\n    default=False,\n    help=\"Share the app through Gradio. Requires --username to enable authentication.\",\n)\n@click.option(\n    \"--username\",\n    required=False,\n    help=(\n        \"Username for the user. If not provided, the promptui will not have \"\n        \"authentication.\"\n    ),\n)\n@click.option(\n    \"--password\",\n    required=False,\n    help=\"Password for the user. If not provided, will be prompted.\",\n)\n@click.option(\n    \"--appname\",\n    required=False,\n    help=\"The share app subdomain. Requires --share and --username\",\n)\n@click.option(\n    \"--port\",\n    required=False,\n    help=\"Port to run the app. If not provided, will $GRADIO_SERVER_PORT (7860)\",\n)\ndef run(run_path, share, username, password, appname, port):\n    \"\"\"Run the UI from a config file\n\n    Examples:\n\n        \\b\n        # Run with default config file\n        $ kh promptui run\n\n        \\b\n        # Run with username and password supplied\n        $ kh promptui run --username admin --password password\n\n        \\b\n        # Run with username and prompted password\n        $ kh promptui run --username admin\n\n        # Run and share to promptui\n        # kh promptui run --username admin --password password --share --appname hey \\\n                --port 7861\n    \"\"\"\n    import sys\n\n    from kotaemon.contribs.promptui.ui import build_from_dict\n\n    sys.path.append(os.getcwd())\n\n    check_config_format(run_path)\n    demo = build_from_dict(run_path)\n\n    params: dict = {}\n    if username is not None:\n        if password is not None:\n            auth = (username, password)\n        else:\n            auth = (username, click.prompt(\"Password\", hide_input=True))\n        params[\"auth\"] = auth\n\n    port = int(port) if port else int(os.getenv(\"GRADIO_SERVER_PORT\", \"7860\"))\n    params[\"server_port\"] = port\n\n    if share:\n        if username is None:\n            raise ValueError(\n                \"Username must be provided to enable authentication for sharing\"\n            )\n        if appname:\n            from kotaemon.contribs.promptui.tunnel import Tunnel\n\n            tunnel = Tunnel(\n                appname=str(appname), username=str(username), local_port=port\n            )\n            url = tunnel.run()\n            print(f\"App is shared at {url}\")\n        else:\n            params[\"share\"] = True\n            print(\"App is shared at Gradio\")\n\n    demo.launch(**params)\n\n\n@main.command()\n@click.argument(\"module\", required=True)\n@click.option(\n    \"--output\", default=\"docs.md\", required=False, help=\"The output markdown file\"\n)\n@click.option(\n    \"--separation-level\", required=False, default=1, help=\"Organize markdown layout\"\n)\ndef makedoc(module, output, separation_level):\n    \"\"\"Make documentation for module `module`\n\n    Example:\n\n        \\b\n        # Make component documentation for kotaemon library\n        $ kh makedoc kotaemon\n    \"\"\"\n    from kotaemon.contribs.docs import make_doc\n\n    make_doc(module, output, separation_level)\n    print(f\"Documentation exported to {output}\")\n\n\n@main.command()\n@click.option(\n    \"--template\",\n    default=\"project-default\",\n    required=False,\n    help=\"Template name\",\n    show_default=True,\n)\ndef start_project(template):\n    \"\"\"Start a project from a template.\n\n    Important: the value for --template corresponds to the name of the template folder,\n    which is located at https://github.com/Cinnamon/kotaemon/tree/main/templates\n    The default value is \"project-default\", which should work when you are starting a\n    client project.\n    \"\"\"\n\n    print(\"Retrieving template...\")\n    os.system(\n        \"cookiecutter git@github.com:Cinnamon/kotaemon.git \"\n        f\"--directory='templates/{template}'\"\n    )\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/contribs/__init__.py",
    "content": ""
  },
  {
    "path": "libs/kotaemon/kotaemon/contribs/docs.py",
    "content": "import inspect\nfrom collections import defaultdict\n\nfrom theflow.utils.documentation import get_function_documentation_from_module\n\n\ndef from_definition_to_markdown(definition: dict) -> str:\n    \"\"\"From definition to markdown\"\"\"\n\n    # Handle params\n    params = \" N/A\\n\"\n    if definition[\"params\"]:\n        params = \"\\n| Name | Description | Type | Default |\\n\"\n        params += \"| --- | --- | --- | --- |\\n\"\n        for name, p in definition[\"params\"].items():\n            type_ = p[\"type\"].__name__ if inspect.isclass(p[\"type\"]) else p[\"type\"]\n            params += f\"| {name} | {p['desc']} | {type_} | {p['default']} |\\n\"\n\n    # Handle nodes\n    nodes = \" N/A\\n\"\n    if definition[\"nodes\"]:\n        nodes = \"\\n| Name | Description | Type | Input | Output |\\n\"\n        nodes += \"| --- | --- | --- | --- | --- |\\n\"\n        for name, n in definition[\"nodes\"].items():\n            type_ = n[\"type\"].__name__ if inspect.isclass(n[\"type\"]) else str(n[\"type\"])\n            input_ = (\n                n[\"input\"].__name__ if inspect.isclass(n[\"input\"]) else str(n[\"input\"])\n            )\n            output_ = (\n                n[\"output\"].__name__\n                if inspect.isclass(n[\"output\"])\n                else str(n[\"output\"])\n            )\n            nodes += f\"|{name}|{n['desc']}|{type_}|{input_}|{output_}|\\n\"\n\n    description = inspect.cleandoc(definition[\"desc\"])\n    return f\"{description}\\n\\n_**Params:**_{params}\\n_**Nodes:**_{nodes}\"\n\n\ndef make_doc(module: str, output: str, separation_level: int):\n    \"\"\"Run exporting components to markdown\n\n    Args:\n        module (str): module name\n        output_path (str): output path to save\n        separation_level (int): level of separation\n    \"\"\"\n    documentation = sorted(\n        get_function_documentation_from_module(module).items(), key=lambda x: x[0]\n    )\n\n    entries = defaultdict(list)\n\n    for name, definition in documentation:\n        section = name.split(\".\")[separation_level].capitalize()\n        cls_name = name.split(\".\")[-1]\n\n        markdown = from_definition_to_markdown(definition)\n        entries[section].append(f\"### {cls_name}\\n{markdown}\")\n\n    final = \"\\n\".join(\n        [f\"## {section}\\n\" + \"\\n\".join(entries[section]) for section in entries]\n    )\n\n    with open(output, \"w\") as f:\n        f.write(final)\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/contribs/promptui/.gitignore",
    "content": "/frpc_*\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/contribs/promptui/__init__.py",
    "content": ""
  },
  {
    "path": "libs/kotaemon/kotaemon/contribs/promptui/base.py",
    "content": "import gradio as gr\n\nCOMPONENTS_CLASS = {\n    \"text\": gr.components.Textbox,\n    \"checkbox\": gr.components.CheckboxGroup,\n    \"dropdown\": gr.components.Dropdown,\n    \"file\": gr.components.File,\n    \"image\": gr.components.Image,\n    \"number\": gr.components.Number,\n    \"radio\": gr.components.Radio,\n    \"slider\": gr.components.Slider,\n}\nSUPPORTED_COMPONENTS = set(COMPONENTS_CLASS.keys())\nDEFAULT_COMPONENT_BY_TYPES = {\n    \"str\": \"text\",\n    \"bool\": \"checkbox\",\n    \"int\": \"number\",\n    \"float\": \"number\",\n    \"list\": \"dropdown\",\n}\n\n\ndef get_component(component_def: dict) -> gr.components.Component:\n    \"\"\"Get the component based on component definition\"\"\"\n    component_cls = None\n\n    if \"component\" in component_def:\n        component = component_def[\"component\"]\n        if component not in SUPPORTED_COMPONENTS:\n            raise ValueError(\n                f\"Unsupported UI component: {component}. \"\n                f\"Must be one of {SUPPORTED_COMPONENTS}\"\n            )\n\n        component_cls = COMPONENTS_CLASS[component]\n    else:\n        raise ValueError(\n            f\"Cannot decide the component from {component_def}. \"\n            \"Please specify `component` with 1 of the following \"\n            f\"values: {SUPPORTED_COMPONENTS}\"\n        )\n\n    return component_cls(**component_def.get(\"params\", {}))\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/contribs/promptui/cli.py",
    "content": "\"\"\"CLI commands that can be imported by the kotaemon.cli module\"\"\"\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/contribs/promptui/config.py",
    "content": "\"\"\"Get config from Pipeline\"\"\"\nimport inspect\nfrom pathlib import Path\nfrom typing import Any, Dict, Optional, Type, Union\n\nimport yaml\n\nfrom kotaemon.base import BaseComponent\nfrom kotaemon.chatbot import BaseChatBot\n\nfrom .base import DEFAULT_COMPONENT_BY_TYPES\n\n\ndef config_from_value(value: Any) -> dict:\n    \"\"\"Get the config from default value\n\n    Args:\n        value (Any): default value\n\n    Returns:\n        dict: config\n    \"\"\"\n    component = DEFAULT_COMPONENT_BY_TYPES.get(type(value).__name__, \"text\")\n    return {\n        \"component\": component,\n        \"params\": {\n            \"value\": value,\n        },\n    }\n\n\ndef handle_param(param: dict) -> dict:\n    \"\"\"Convert param definition into promptui-compliant config\n\n    Supported gradio's UI components are (https://www.gradio.app/docs/components)\n        - CheckBoxGroup: list (multi select)\n        - DropDown: list (single select)\n        - File\n        - Image\n        - Number: int / float\n        - Radio: list (single select)\n        - Slider: int / float\n        - TextBox: str\n    \"\"\"\n    params = {}\n    default = param.get(\"default\", None)\n    if isinstance(default, str) and default.startswith(\"{{\") and default.endswith(\"}}\"):\n        default = None\n    if default is not None:\n        params[\"value\"] = default\n\n    ui_component = param.get(\"component_ui\", \"\")\n    if not ui_component:\n        type_: str = type(default).__name__ if default is not None else \"\"\n        ui_component = DEFAULT_COMPONENT_BY_TYPES.get(type_, \"text\")\n\n    return {\n        \"component\": ui_component,\n        \"params\": params,\n    }\n\n\ndef handle_node(node: dict) -> dict:\n    \"\"\"Convert node definition into promptui-compliant config\"\"\"\n    config = {}\n    for name, param_def in node.get(\"params\", {}).items():\n        if isinstance(param_def[\"auto_callback\"], str):\n            continue\n        if param_def.get(\"ignore_ui\", False):\n            continue\n        config[name] = handle_param(param_def)\n    for name, node_def in node.get(\"nodes\", {}).items():\n        if isinstance(node_def[\"auto_callback\"], str):\n            continue\n        if node_def.get(\"ignore_ui\", False):\n            continue\n        for key, value in handle_node(node_def[\"default\"]).items():\n            config[f\"{name}.{key}\"] = value\n        for key, value in node_def.get(\"default_kwargs\", {}).items():\n            config[f\"{name}.{key}\"] = config_from_value(value)\n\n    return config\n\n\ndef handle_input(pipeline: Union[BaseComponent, Type[BaseComponent]]) -> dict:\n    \"\"\"Get the input from the pipeline\"\"\"\n    signature = inspect.signature(pipeline.run)\n    inputs: Dict[str, Dict] = {}\n    for name, param in signature.parameters.items():\n        if name in [\"self\", \"args\", \"kwargs\"]:\n            continue\n        input_def: Dict[str, Optional[Any]] = {\"component\": \"text\"}\n        default = param.default\n        if default is param.empty:\n            inputs[name] = input_def\n            continue\n\n        params = {}\n        params[\"value\"] = default\n        type_ = type(default).__name__ if default is not None else None\n        ui_component = None\n        if type_ is not None:\n            ui_component = \"text\"\n\n        input_def[\"component\"] = ui_component\n        input_def[\"params\"] = params\n\n        inputs[name] = input_def\n\n    return inputs\n\n\ndef export_pipeline_to_config(\n    pipeline: Union[BaseComponent, Type[BaseComponent]],\n    path: Optional[str] = None,\n) -> dict:\n    \"\"\"Export a pipeline to a promptui-compliant config dict\"\"\"\n    if inspect.isclass(pipeline):\n        pipeline = pipeline()\n\n    pipeline_def = pipeline.describe()\n    ui_type = \"chat\" if isinstance(pipeline, BaseChatBot) else \"simple\"\n    if ui_type == \"chat\":\n        params = {f\".bot.{k}\": v for k, v in handle_node(pipeline_def).items()}\n        params[\"system_message\"] = {\"component\": \"text\", \"params\": {\"value\": \"\"}}\n        outputs = []\n        if hasattr(pipeline, \"_promptui_outputs\"):\n            outputs = pipeline._promptui_outputs\n        config_obj: dict = {\n            \"ui-type\": ui_type,\n            \"params\": params,\n            \"inputs\": {},\n            \"outputs\": outputs,\n            \"logs\": {\n                \"full_pipeline\": {\n                    \"input\": {\n                        \"step\": \".\",\n                        \"getter\": \"_get_input\",\n                    },\n                    \"output\": {\n                        \"step\": \".\",\n                        \"getter\": \"_get_output\",\n                    },\n                    \"preference\": {\n                        \"step\": \"preference\",\n                    },\n                }\n            },\n        }\n    else:\n        outputs = [{\"step\": \".\", \"getter\": \"_get_output\", \"component\": \"text\"}]\n        if hasattr(pipeline, \"_promptui_outputs\"):\n            outputs = pipeline._promptui_outputs\n        config_obj = {\n            \"ui-type\": ui_type,\n            \"params\": handle_node(pipeline_def),\n            \"inputs\": handle_input(pipeline),\n            \"outputs\": outputs,\n            \"logs\": {\n                \"full_pipeline\": {\n                    \"input\": {\n                        \"step\": \".\",\n                        \"getter\": \"_get_input\",\n                    },\n                    \"output\": {\n                        \"step\": \".\",\n                        \"getter\": \"_get_output\",\n                    },\n                },\n            },\n        }\n\n    config = {f\"{pipeline.__module__}.{pipeline.__class__.__name__}\": config_obj}\n    if path is not None:\n        old_config = config\n        if Path(path).is_file():\n            with open(path) as f:\n                old_config = yaml.safe_load(f)\n                old_config.update(config)\n        with open(path, \"w\") as f:\n            yaml.safe_dump(old_config, f, sort_keys=False)\n\n    return config\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/contribs/promptui/export.py",
    "content": "\"\"\"Export logs into Excel file\"\"\"\nimport os\nimport pickle\nfrom pathlib import Path\nfrom typing import Any, Dict, List, Type, Union\n\nimport pandas as pd\nimport yaml\nfrom theflow.storage import storage\nfrom theflow.utils.modules import import_dotted_string\n\nfrom kotaemon.base import BaseComponent\n\nfrom .logs import ResultLog\n\n\ndef from_log_to_dict(pipeline_cls: Type[BaseComponent], log_config: dict) -> dict:\n    \"\"\"Export the log to panda dataframes\n\n    Args:\n        pipeline_cls (Type[BaseComponent]): Pipeline class\n        log_config (dict): Log config\n\n    Returns:\n        dataframe\n    \"\"\"\n    # get the directory\n    pipeline_log_path = storage.url(pipeline_cls().config.store_result)\n    dirs = list(sorted([f.path for f in os.scandir(pipeline_log_path) if f.is_dir()]))\n\n    # get resultlog callback\n    resultlog = getattr(pipeline_cls, \"_promptui_resultlog\", ResultLog)\n    allowed_resultlog_callbacks = {i for i in dir(resultlog) if not i.startswith(\"__\")}\n\n    ids = []\n    params: Dict[str, List[Any]] = {}\n    logged_infos: Dict[str, List[Any]] = {}\n\n    for idx, each_dir in enumerate(dirs):\n        ids.append(str(Path(each_dir).name))\n\n        # get the params\n        params_file = os.path.join(each_dir, \"params.pkl\")\n        if os.path.exists(params_file):\n            with open(params_file, \"rb\") as f:\n                each_params = pickle.load(f)\n            for key, value in each_params.items():\n                if key not in params:\n                    params[key] = [None] * len(dirs)\n                params[key][idx] = value\n\n        # get the progress\n        progress_file = os.path.join(each_dir, \"progress.pkl\")\n        if os.path.exists(progress_file):\n            with open(progress_file, \"rb\") as f:\n                progress = pickle.load(f)\n\n            for name, col_info in log_config.items():\n                step = col_info[\"step\"]\n                getter = col_info.get(\"getter\", None)\n                if name not in logged_infos:\n                    logged_infos[name] = [None] * len(dirs)\n\n                if step not in progress:\n                    continue\n\n                info = progress[step]\n                if getter:\n                    if getter in allowed_resultlog_callbacks:\n                        info = getattr(resultlog, getter)(info)\n                else:\n                    implicit_name = f\"get_{name}\"\n                    if implicit_name in allowed_resultlog_callbacks:\n                        info = getattr(resultlog, implicit_name)(info)\n                logged_infos[name][idx] = info\n\n    return {\"ids\": ids, **params, **logged_infos}\n\n\ndef export(config: dict, pipeline_def, output_path):\n    \"\"\"Export from config to Excel file\"\"\"\n\n    pipeline_name = f\"{pipeline_def.__module__}.{pipeline_def.__name__}\"\n\n    # export to Excel\n    if not config.get(\"logs\", {}):\n        raise ValueError(f\"Pipeline {pipeline_name} has no logs to export\")\n\n    pds: Dict[str, pd.DataFrame] = {}\n    for log_name, log_def in config[\"logs\"].items():\n        pds[log_name] = pd.DataFrame(from_log_to_dict(pipeline_def, log_def))\n\n    # from the list of pds, export to Excel to output_path\n    with pd.ExcelWriter(output_path, engine=\"openpyxl\") as writer:  # type: ignore\n        for log_name, df in pds.items():\n            df.to_excel(writer, sheet_name=log_name)\n\n\ndef export_from_dict(\n    config: Union[str, dict],\n    pipeline: Union[str, Type[BaseComponent]],\n    output_path: str,\n):\n    \"\"\"CLI to export the logs of a pipeline into Excel file\n\n    Args:\n        config_path (str): Path to the config file\n        pipeline_name (str): Name of the pipeline\n        output_path (str): Path to the output Excel file\n    \"\"\"\n    # get the pipeline class and the relevant config dict\n    config_dict: dict\n    if isinstance(config, str):\n        with open(config) as f:\n            config_dict = yaml.safe_load(f)\n    elif isinstance(config, dict):\n        config_dict = config\n    else:\n        raise TypeError(f\"`config` must be str or dict, not {type(config)}\")\n\n    pipeline_name: str\n    pipeline_cls: Type[BaseComponent]\n    pipeline_config: dict\n    if isinstance(pipeline, str):\n        if pipeline not in config_dict:\n            raise ValueError(f\"Pipeline {pipeline} not found in config file\")\n        pipeline_name = pipeline\n        pipeline_cls = import_dotted_string(pipeline, safe=False)\n        pipeline_config = config_dict[pipeline]\n    elif isinstance(pipeline, type) and issubclass(pipeline, BaseComponent):\n        pipeline_name = f\"{pipeline.__module__}.{pipeline.__name__}\"\n        if pipeline_name not in config_dict:\n            raise ValueError(f\"Pipeline {pipeline_name} not found in config file\")\n        pipeline_cls = pipeline\n        pipeline_config = config_dict[pipeline_name]\n    else:\n        raise TypeError(\n            f\"`pipeline` must be str or subclass of BaseComponent, not {type(pipeline)}\"\n        )\n\n    export(pipeline_config, pipeline_cls, output_path)\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/contribs/promptui/logs.py",
    "content": "class ResultLog:\n    \"\"\"Callback getter to get the desired log result\n\n    The callback resolution will be as follow:\n        1. Explicit string name\n        2. Implicitly by: `get_<name>`\n        3. Pass through\n    \"\"\"\n\n    @staticmethod\n    def _get_input(obj):\n        return obj[\"input\"]\n\n    @staticmethod\n    def _get_output(obj):\n        return obj[\"output\"]\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/contribs/promptui/themes.py",
    "content": "from __future__ import annotations\n\nfrom typing import Iterable\n\nfrom gradio.themes.base import Base\nfrom gradio.themes.utils import colors, fonts, sizes\n\n\nclass John(Base):\n    def __init__(\n        self,\n        *,\n        primary_hue: colors.Color | str = colors.neutral,\n        secondary_hue: colors.Color | str = colors.neutral,\n        neutral_hue: colors.Color | str = colors.neutral,\n        spacing_size: sizes.Size | str = sizes.spacing_sm,\n        radius_size: sizes.Size | str = sizes.radius_none,\n        text_size: sizes.Size | str = sizes.text_sm,\n        font: fonts.Font\n        | str\n        | Iterable[fonts.Font | str] = (\n            fonts.GoogleFont(\"Quicksand\"),\n            \"ui-sans-serif\",\n            \"system-ui\",\n            \"sans-serif\",\n        ),\n        font_mono: fonts.Font\n        | str\n        | Iterable[fonts.Font | str] = (\n            fonts.GoogleFont(\"IBM Plex Mono\"),\n            \"ui-monospace\",\n            \"Consolas\",\n            \"monospace\",\n        ),\n    ):\n        super().__init__(\n            primary_hue=primary_hue,\n            secondary_hue=secondary_hue,\n            neutral_hue=neutral_hue,\n            spacing_size=spacing_size,\n            radius_size=radius_size,\n            text_size=text_size,\n            font=font,\n            font_mono=font_mono,\n        )\n        self.name = \"monochrome\"\n        super().set(\n            # Colors\n            slider_color=\"*neutral_900\",\n            slider_color_dark=\"*neutral_500\",\n            body_text_color=\"*neutral_900\",\n            block_label_text_color=\"*body_text_color\",\n            block_title_text_color=\"*body_text_color\",\n            body_text_color_subdued=\"*neutral_700\",\n            background_fill_primary_dark=\"*neutral_900\",\n            background_fill_secondary_dark=\"*neutral_800\",\n            block_background_fill_dark=\"*neutral_800\",\n            input_background_fill_dark=\"*neutral_700\",\n            # Button Colors\n            button_primary_background_fill=\"*neutral_900\",\n            button_primary_background_fill_hover=\"*neutral_700\",\n            button_primary_text_color=\"white\",\n            button_primary_background_fill_dark=\"*neutral_600\",\n            button_primary_background_fill_hover_dark=\"*neutral_600\",\n            button_primary_text_color_dark=\"white\",\n            button_secondary_background_fill=(\n                \"linear-gradient(to bottom right, *neutral_100, *neutral_200)\"\n            ),\n            button_secondary_background_fill_hover=(\n                \"linear-gradient(to bottom right, *neutral_100, *neutral_100)\"\n            ),\n            button_secondary_background_fill_dark=(\n                \"linear-gradient(to bottom right, *neutral_600, *neutral_700)\"\n            ),\n            button_secondary_background_fill_hover_dark=(\n                \"linear-gradient(to bottom right, *neutral_600, *neutral_600)\"\n            ),\n            button_cancel_background_fill=\"*button_primary_background_fill\",\n            button_cancel_background_fill_hover=\"*button_primary_background_fill_hover\",\n            button_cancel_text_color=\"*button_primary_text_color\",\n            # Padding\n            checkbox_label_padding=\"*spacing_sm\",\n            button_large_padding=\"*spacing_sm\",\n            button_small_padding=\"*spacing_sm\",\n            # Borders\n            block_border_width=\"0px\",\n            block_border_width_dark=\"1px\",\n            shadow_drop_lg=\"0 1px 4px 0 rgb(0 0 0 / 0.1)\",\n            block_shadow=\"*shadow_drop_lg\",\n            block_shadow_dark=\"none\",\n            # Block Labels\n            block_title_text_weight=\"600\",\n            block_label_text_weight=\"600\",\n            block_label_text_size=\"*text_sm\",\n        )\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/contribs/promptui/tunnel.py",
    "content": "import atexit\nimport logging\nimport os\nimport platform\nimport stat\nimport subprocess\nfrom pathlib import Path\n\nimport requests\n\nVERSION = \"1.0\"\n\nmachine = platform.machine()\nif machine == \"x86_64\":\n    machine = \"amd64\"\n\nBINARY_REMOTE_NAME = f\"frpc_{platform.system().lower()}_{machine.lower()}\"\nEXTENSION = \".exe\" if os.name == \"nt\" else \"\"\nBINARY_URL = (\n    \"some-endpoint.com\" f\"/kotaemon/tunneling/{VERSION}/{BINARY_REMOTE_NAME}{EXTENSION}\"\n)\n\nBINARY_FILENAME = f\"{BINARY_REMOTE_NAME}_v{VERSION}\"\nBINARY_FOLDER = Path(__file__).parent\nBINARY_PATH = f\"{BINARY_FOLDER / BINARY_FILENAME}\"\n\n\nlogger = logging.getLogger(__name__)\n\n\nclass Tunnel:\n    def __init__(self, appname, username, local_port):\n        self.proc = None\n        self.url = None\n        self.appname = appname\n        self.username = username\n        self.local_port = local_port\n\n    @staticmethod\n    def download_binary():\n        if not Path(BINARY_PATH).exists():\n            print(\"First time setting tunneling...\")\n            resp = requests.get(BINARY_URL)\n\n            if resp.status_code == 404:\n                raise OSError(\n                    f\"Cannot set up a share link as this platform is incompatible. \"\n                    \"Please create a GitHub issue with information about your \"\n                    f\"platform: {platform.uname()}\"\n                )\n\n            if resp.status_code == 403:\n                raise OSError(\n                    \"You do not have permission to setup the tunneling. Please \"\n                    \"make sure that you are within Cinnamon VPN or within other \"\n                    \"approved IPs. If this is new server, please contact @channel \"\n                    \"at #llm-productization to add your IP address\"\n                )\n\n            resp.raise_for_status()\n\n            # Save file data to local copy\n            with open(BINARY_PATH, \"wb\") as file:\n                file.write(resp.content)\n            st = os.stat(BINARY_PATH)\n            os.chmod(BINARY_PATH, st.st_mode | stat.S_IEXEC)\n\n    def run(self) -> str:\n        \"\"\"Setting up tunneling\"\"\"\n        if platform.system().lower() == \"windows\":\n            logger.warning(\"Tunneling is not fully supported on Windows.\")\n\n        self.download_binary()\n        self.url = self._start_tunnel(BINARY_PATH)\n        return self.url\n\n    def kill(self):\n        if self.proc is not None:\n            print(f\"Killing tunnel 127.0.0.1:{self.local_port} <> {self.url}\")\n            self.proc.terminate()\n            self.proc = None\n\n    def _start_tunnel(self, binary: str) -> str:\n        command = [\n            binary,\n            \"http\",\n            \"-l\",\n            str(self.local_port),\n            \"-i\",\n            \"127.0.0.1\",\n            \"--uc\",\n            \"--sd\",\n            str(self.appname),\n            \"-n\",\n            str(self.appname + self.username),\n            \"--server_addr\",\n            \"44.229.38.9:7000\",\n            \"--token\",\n            \"Wz807/DyC;#t;#/\",\n            \"--disable_log_color\",\n        ]\n        self.proc = subprocess.Popen(\n            command, stdout=subprocess.PIPE, stderr=subprocess.PIPE\n        )\n        atexit.register(self.kill)\n        return f\"https://{self.appname}.promptui.dm.cinnamon.is\"\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/contribs/promptui/ui/__init__.py",
    "content": "from typing import Union\n\nimport gradio as gr\nimport yaml\nfrom theflow.utils.modules import import_dotted_string\n\nfrom ..themes import John\nfrom .chat import build_chat_ui\nfrom .pipeline import build_pipeline_ui\n\n\ndef build_from_dict(config: Union[str, dict]):\n    \"\"\"Build a full UI from YAML config file\"\"\"\n\n    if isinstance(config, str):\n        with open(config) as f:\n            config_dict: dict = yaml.safe_load(f)\n    elif isinstance(config, dict):\n        config_dict = config\n    else:\n        raise ValueError(\n            f\"config must be either a yaml path or a dict, got {type(config)}\"\n        )\n\n    demos = []\n    for key, value in config_dict.items():\n        pipeline_def = import_dotted_string(key, safe=False)\n        if value[\"ui-type\"] == \"chat\":\n            demos.append(build_chat_ui(value, pipeline_def).queue())\n        else:\n            demos.append(build_pipeline_ui(value, pipeline_def).queue())\n    if len(demos) == 1:\n        demo = demos[0]\n    else:\n        demo = gr.TabbedInterface(\n            demos,\n            tab_names=list(config_dict.keys()),\n            title=\"PromptUI from kotaemon\",\n            analytics_enabled=False,\n            theme=John(),\n        )\n\n    demo.queue()\n\n    return demo\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/contribs/promptui/ui/blocks.py",
    "content": "from __future__ import annotations\n\nfrom typing import Any, AsyncGenerator\n\nimport anyio\nfrom gradio import ChatInterface\nfrom gradio.components import Component, get_component_instance\nfrom gradio.events import on\nfrom gradio.helpers import special_args\nfrom gradio.routes import Request\n\n\nclass ChatBlock(ChatInterface):\n    \"\"\"The ChatBlock subclasses ChatInterface to provide extra functionalities:\n\n    - Show additional outputs to the chat interface\n    - Disallow blank user message\n    \"\"\"\n\n    def __init__(\n        self,\n        *args,\n        additional_outputs: str | Component | list[str | Component] | None = None,\n        **kwargs,\n    ):\n        if additional_outputs:\n            if not isinstance(additional_outputs, list):\n                additional_outputs = [additional_outputs]\n            self.additional_outputs = [\n                get_component_instance(i) for i in additional_outputs  # type: ignore\n            ]\n        else:\n            self.additional_outputs = []\n\n        super().__init__(*args, **kwargs)\n\n    async def _submit_fn(\n        self,\n        message: str,\n        history_with_input: list[list[str | None]],\n        request: Request,\n        *args,\n    ) -> tuple[Any, ...]:\n        input_args = args[: -len(self.additional_outputs)]\n        output_args = args[-len(self.additional_outputs) :]\n        if not message:\n            return history_with_input, history_with_input, *output_args\n\n        history = history_with_input[:-1]\n        inputs, _, _ = special_args(\n            self.fn, inputs=[message, history, *input_args], request=request\n        )\n\n        if self.is_async:\n            response = await self.fn(*inputs)\n        else:\n            response = await anyio.to_thread.run_sync(\n                self.fn, *inputs, limiter=self.limiter\n            )\n\n        output = []\n        if self.additional_outputs:\n            text = response[0]\n            output = response[1:]\n        else:\n            text = response\n\n        history.append([message, text])\n        return history, history, *output\n\n    async def _stream_fn(\n        self,\n        message: str,\n        history_with_input: list[list[str | None]],\n        *args,\n    ) -> AsyncGenerator:\n        raise NotImplementedError(\"Stream function not implemented for ChatBlock\")\n\n    def _display_input(\n        self, message: str, history: list[list[str | None]]\n    ) -> tuple[list[list[str | None]], list[list[str | None]]]:\n        \"\"\"Stop displaying the input message if the message is a blank string\"\"\"\n        if not message:\n            return history, history\n        return super()._display_input(message, history)\n\n    def _setup_events(self) -> None:\n        \"\"\"Include additional outputs in the submit event\"\"\"\n        submit_fn = self._stream_fn if self.is_generator else self._submit_fn\n        submit_triggers = (\n            [self.textbox.submit, self.submit_btn.click]\n            if self.submit_btn\n            else [self.textbox.submit]\n        )\n        submit_event = (\n            on(\n                submit_triggers,\n                self._clear_and_save_textbox,\n                [self.textbox],\n                [self.textbox, self.saved_input],\n                api_name=False,\n                queue=False,\n            )\n            .then(\n                self._display_input,\n                [self.saved_input, self.chatbot_state],\n                [self.chatbot, self.chatbot_state],\n                api_name=False,\n                queue=False,\n            )\n            .then(\n                submit_fn,\n                [self.saved_input, self.chatbot_state]\n                + self.additional_inputs\n                + self.additional_outputs,\n                [self.chatbot, self.chatbot_state] + self.additional_outputs,\n                api_name=False,\n            )\n        )\n        self._setup_stop_events(submit_triggers, submit_event)\n\n        if self.retry_btn:\n            retry_event = (\n                self.retry_btn.click(\n                    self._delete_prev_fn,\n                    [self.chatbot_state],\n                    [self.chatbot, self.saved_input, self.chatbot_state],\n                    api_name=False,\n                    queue=False,\n                )\n                .then(\n                    self._display_input,\n                    [self.saved_input, self.chatbot_state],\n                    [self.chatbot, self.chatbot_state],\n                    api_name=False,\n                    queue=False,\n                )\n                .then(\n                    submit_fn,\n                    [self.saved_input, self.chatbot_state]\n                    + self.additional_inputs\n                    + self.additional_outputs,\n                    [self.chatbot, self.chatbot_state] + self.additional_outputs,\n                    api_name=False,\n                )\n            )\n            self._setup_stop_events([self.retry_btn.click], retry_event)\n\n        if self.undo_btn:\n            self.undo_btn.click(\n                self._delete_prev_fn,\n                [self.chatbot_state],\n                [self.chatbot, self.saved_input, self.chatbot_state],\n                api_name=False,\n                queue=False,\n            ).then(\n                lambda x: x,\n                [self.saved_input],\n                [self.textbox],\n                api_name=False,\n                queue=False,\n            )\n\n        if self.clear_btn:\n            self.clear_btn.click(\n                lambda: ([], [], None),\n                None,\n                [self.chatbot, self.chatbot_state, self.saved_input],\n                queue=False,\n                api_name=False,\n            )\n\n    def _setup_api(self) -> None:\n        api_fn = self._api_stream_fn if self.is_generator else self._api_submit_fn\n\n        self.fake_api_btn.click(\n            api_fn,\n            [self.textbox, self.chatbot_state] + self.additional_inputs,\n            [self.textbox, self.chatbot_state] + self.additional_outputs,\n            api_name=\"chat\",\n        )\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/contribs/promptui/ui/chat.py",
    "content": "import pickle\nfrom datetime import datetime\nfrom pathlib import Path\n\nimport gradio as gr\nfrom theflow.storage import storage\n\nfrom kotaemon.chatbot import ChatConversation\nfrom kotaemon.contribs.promptui.base import get_component\nfrom kotaemon.contribs.promptui.export import export\nfrom kotaemon.contribs.promptui.ui.blocks import ChatBlock\n\nfrom ..logs import ResultLog\n\nUSAGE_INSTRUCTION = \"\"\"## How to use:\n\n1. Set the desired parameters.\n2. Click \"New chat\" to start a chat session with the supplied parameters. This\n    set of parameters will persist until the end of the chat session. During an\n    ongoing chat session, changing the parameters will not take any effect.\n3. Chat and interact with the chat bot on the right panel. You can add any\n    additional input (if any), and they will be supplied to the chatbot.\n4. During chat, the log of the chat will show up in the \"Output\" tabs. This is\n    empty by default, so if you want to show the log here, tell the AI developers\n    to configure the UI settings.\n5. When finishing chat, select your preference in the radio box. Click \"End chat\".\n    This will save the chat log and the preference to disk.\n6. To compare the result of different run, click \"Export\" to get an Excel\n    spreadsheet summary of different run.\n\n## Support:\n\nIn case of errors, you can:\n\n- PromptUI instruction:\n    https://github.com/Cinnamon/kotaemon/wiki/Utilities#prompt-engineering-ui\n- Create bug fix and make PR at: https://github.com/Cinnamon/kotaemon\n- Ping any of @john @tadashi @ian @jacky in Slack channel #llm-productization\n\n## Contribute:\n\n- Follow installation at: https://github.com/Cinnamon/kotaemon/\n\"\"\"\n\n\ndef construct_chat_ui(\n    config, func_new_chat, func_chat, func_end_chat, func_export_to_excel\n) -> gr.Blocks:\n    \"\"\"Construct the prompt engineering UI for chat\n\n    Args:\n        config: the UI config\n        func_new_chat: the function for starting a new chat session\n        func_chat: the function for chatting interaction\n        func_end_chat: the function for ending and saving the chat\n        func_export_to_excel: the function to export the logs to excel\n\n    Returns:\n        the UI object\n    \"\"\"\n    inputs, outputs, params = [], [], []\n    for name, component_def in config.get(\"inputs\", {}).items():\n        if \"params\" not in component_def:\n            component_def[\"params\"] = {}\n        component_def[\"params\"][\"interactive\"] = True\n        component = get_component(component_def)\n        if hasattr(component, \"label\") and not component.label:  # type: ignore\n            component.label = name  # type: ignore\n\n        inputs.append(component)\n\n    for name, component_def in config.get(\"params\", {}).items():\n        if \"params\" not in component_def:\n            component_def[\"params\"] = {}\n        component_def[\"params\"][\"interactive\"] = True\n        component = get_component(component_def)\n        if hasattr(component, \"label\") and not component.label:  # type: ignore\n            component.label = name  # type: ignore\n\n        params.append(component)\n\n    for idx, component_def in enumerate(config.get(\"outputs\", [])):\n        if \"params\" not in component_def:\n            component_def[\"params\"] = {}\n        component_def[\"params\"][\"interactive\"] = False\n        component = get_component(component_def)\n        if hasattr(component, \"label\") and not component.label:  # type: ignore\n            component.label = f\"Output {idx}\"  # type: ignore\n\n        outputs.append(component)\n\n    sess = gr.State(value=None)\n    chatbot = gr.Chatbot(label=\"Chatbot\", show_copy_button=True)\n    chat = ChatBlock(\n        func_chat, chatbot=chatbot, additional_inputs=[sess], additional_outputs=outputs\n    )\n    param_state = gr.Textbox(interactive=False)\n\n    with gr.Blocks(analytics_enabled=False, title=\"Welcome to PromptUI\") as demo:\n        sess.render()\n        with gr.Accordion(label=\"HOW TO\", open=False):\n            gr.Markdown(USAGE_INSTRUCTION)\n        with gr.Row():\n            run_btn = gr.Button(\"New chat\")\n            run_btn.click(\n                func_new_chat,\n                inputs=params,\n                outputs=[\n                    chat.chatbot,\n                    chat.chatbot_state,\n                    chat.saved_input,\n                    param_state,\n                    sess,\n                    *outputs,\n                ],\n            )\n            with gr.Accordion(label=\"End chat\", open=False):\n                likes = gr.Radio([\"like\", \"dislike\", \"neutral\"], value=\"neutral\")\n                save_log = gr.Checkbox(\n                    value=True,\n                    label=\"Save log\",\n                    info=\"If saved, log can be exported later\",\n                    show_label=True,\n                )\n                end_btn = gr.Button(\"End chat\")\n                end_btn.click(\n                    func_end_chat,\n                    inputs=[likes, save_log, sess],\n                    outputs=[param_state, sess],\n                )\n            with gr.Accordion(label=\"Export\", open=False):\n                exported_file = gr.File(\n                    label=\"Output file\", show_label=True, height=100\n                )\n                export_btn = gr.Button(\"Export\")\n                export_btn.click(func_export_to_excel, inputs=[], outputs=exported_file)\n\n        with gr.Row():\n            with gr.Column():\n                with gr.Tab(\"Params\"):\n                    for component in params:\n                        component.render()\n                    with gr.Accordion(label=\"Session state\", open=False):\n                        param_state.render()\n\n                with gr.Tab(\"Outputs\"):\n                    for component in outputs:\n                        component.render()\n            with gr.Column():\n                chat.render()\n\n    return demo.queue()\n\n\ndef build_chat_ui(config, pipeline_def):\n    \"\"\"Build the chat UI\n\n    Args:\n        config: the UI config\n        pipeline_def: the pipeline definition\n\n    Returns:\n        the UI object\n    \"\"\"\n    output_dir: Path = Path(storage.url(pipeline_def().config.store_result))\n    exported_dir = output_dir.parent / \"exported\"\n    exported_dir.mkdir(parents=True, exist_ok=True)\n\n    resultlog = getattr(pipeline_def, \"_promptui_resultlog\", ResultLog)\n    allowed_resultlog_callbacks = {i for i in dir(resultlog) if not i.startswith(\"__\")}\n\n    def new_chat(*args):\n        \"\"\"Start a new chat function\n\n        Args:\n            *args: the pipeline init params\n\n        Returns:\n            new empty states\n        \"\"\"\n        gr.Info(\"Starting new session...\")\n        param_dicts = {\n            name: value for name, value in zip(config[\"params\"].keys(), args)\n        }\n        for key in param_dicts.keys():\n            if config[\"params\"][key].get(\"component\").lower() == \"file\":\n                param_dicts[key] = param_dicts[key].name\n\n        # TODO: currently hard-code as ChatConversation\n        pipeline = pipeline_def()\n        session = ChatConversation(bot=pipeline)\n        session.set(param_dicts)\n        session.start_session()\n\n        param_state_str = \"\\n\".join(\n            f\"- {name}: {value}\" for name, value in param_dicts.items()\n        )\n\n        gr.Info(\"New chat session started.\")\n        return (\n            [],\n            [],\n            None,\n            param_state_str,\n            session,\n            *[None] * len(config.get(\"outputs\", [])),\n        )\n\n    def chat(message, history, session, *args):\n        \"\"\"The chat interface\n\n        # TODO: wrap the input and output of this chat function so that it\n        work with more types of chat conversation than simple text\n\n        Args:\n            message: the message from the user\n            history: the gradio history of the chat\n            session: the chat object session\n            *args: the additional inputs\n\n        Returns:\n            the response from the chatbot\n        \"\"\"\n        if session is None:\n            raise gr.Error(\n                \"No active chat session. Please set the params and click New chat\"\n            )\n\n        pred = session(message)\n        text_response = pred.content\n\n        additional_outputs = []\n        for output_def in config.get(\"outputs\", []):\n            value = session.last_run.logs(output_def[\"step\"])\n            getter = output_def.get(\"getter\", None)\n            if getter and getter in allowed_resultlog_callbacks:\n                value = getattr(resultlog, getter)(value)\n            additional_outputs.append(value)\n\n        return text_response, *additional_outputs\n\n    def end_chat(preference: str, save_log: bool, session):\n        \"\"\"End the chat session\n\n        Args:\n            preference: the preference of the user\n            save_log: whether to save the result\n            session: the chat object session\n\n        Returns:\n            the new empty state\n        \"\"\"\n        gr.Info(\"Ending session...\")\n        session.end_session()\n        output_dir: Path = (\n            Path(storage.url(session.config.store_result)) / session.last_run.id()\n        )\n\n        if not save_log:\n            if output_dir.exists():\n                import shutil\n\n                shutil.rmtree(output_dir)\n\n            session = None\n            param_state = \"\"\n            gr.Info(\"End session without saving log.\")\n            return param_state, session\n\n        # add preference result to progress\n        with (output_dir / \"progress.pkl\").open(\"rb\") as fi:\n            progress = pickle.load(fi)\n            progress[\"preference\"] = preference\n        with (output_dir / \"progress.pkl\").open(\"wb\") as fo:\n            pickle.dump(progress, fo)\n\n        # get the original params\n        param_dicts = {name: session.getx(name) for name in config[\"params\"].keys()}\n        with (output_dir / \"params.pkl\").open(\"wb\") as fo:\n            pickle.dump(param_dicts, fo)\n\n        session = None\n        param_state = \"\"\n        gr.Info(\"End session and save log.\")\n        return param_state, session\n\n    def export_func():\n        name = (\n            f\"{pipeline_def.__module__}.{pipeline_def.__name__}_{datetime.now()}.xlsx\"\n        )\n        path = str(exported_dir / name)\n        gr.Info(f\"Begin exporting {name}...\")\n        try:\n            export(config=config, pipeline_def=pipeline_def, output_path=path)\n        except Exception as e:\n            raise gr.Error(f\"Failed to export. Please contact project's AIR: {e}\")\n        gr.Info(f\"Exported {name}. Please go to the `Exported file` tab to download\")\n        return path\n\n    demo = construct_chat_ui(\n        config=config,\n        func_new_chat=new_chat,\n        func_chat=chat,\n        func_end_chat=end_chat,\n        func_export_to_excel=export_func,\n    )\n    return demo\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/contribs/promptui/ui/pipeline.py",
    "content": "import pickle\nimport time\nfrom datetime import datetime\nfrom pathlib import Path\nfrom typing import Any, Dict\n\nimport gradio as gr\nimport pandas as pd\nfrom theflow.storage import storage\n\nfrom kotaemon.contribs.promptui.base import get_component\nfrom kotaemon.contribs.promptui.export import export\n\nfrom ..logs import ResultLog\n\nUSAGE_INSTRUCTION = \"\"\"## How to use:\n\n1. Set the desired parameters.\n2. Set the desired inputs.\n3. Click \"Run\" to execute the pipeline with the supplied parameters and inputs\n4. The pipeline output will show up in the output panel.\n5. Repeat from step 1.\n6. To compare the result of different run, click \"Export\" to get an Excel\n    spreadsheet summary of different run.\n\n## Support:\n\nIn case of errors, you can:\n\n- PromptUI instruction:\n    https://github.com/Cinnamon/kotaemon/wiki/Utilities#prompt-engineering-ui\n- Create bug fix and make PR at: https://github.com/Cinnamon/kotaemon\n- Ping any of @john @tadashi @ian @jacky in Slack channel #llm-productization\n\n## Contribute:\n\n- Follow installation at: https://github.com/Cinnamon/kotaemon/\n\"\"\"\n\n\ndef construct_pipeline_ui(\n    config, func_run, func_save, func_load_params, func_activate_params, func_export\n) -> gr.Blocks:\n    \"\"\"Create UI from config file. Execute the UI from config file\n\n    - Can do now: Log from stdout to UI\n    - In the future, we can provide some hooks and callbacks to let developers better\n    fine-tune the UI behavior.\n    \"\"\"\n    inputs, outputs, params = [], [], []\n    for name, component_def in config.get(\"inputs\", {}).items():\n        if \"params\" not in component_def:\n            component_def[\"params\"] = {}\n        component_def[\"params\"][\"interactive\"] = True\n        component = get_component(component_def)\n        if hasattr(component, \"label\") and not component.label:  # type: ignore\n            component.label = name  # type: ignore\n\n        inputs.append(component)\n\n    for name, component_def in config.get(\"params\", {}).items():\n        if \"params\" not in component_def:\n            component_def[\"params\"] = {}\n        component_def[\"params\"][\"interactive\"] = True\n        component = get_component(component_def)\n        if hasattr(component, \"label\") and not component.label:  # type: ignore\n            component.label = name  # type: ignore\n\n        params.append(component)\n\n    for idx, component_def in enumerate(config.get(\"outputs\", [])):\n        if \"params\" not in component_def:\n            component_def[\"params\"] = {}\n        component_def[\"params\"][\"interactive\"] = False\n        component = get_component(component_def)\n        if hasattr(component, \"label\") and not component.label:  # type: ignore\n            component.label = f\"Output {idx}\"  # type: ignore\n\n        outputs.append(component)\n\n    exported_file = gr.File(label=\"Output file\", show_label=True)\n    history_dataframe = gr.DataFrame(wrap=True)\n\n    temp = gr.Tab\n    with gr.Blocks(analytics_enabled=False, title=\"Welcome to PromptUI\") as demo:\n        with gr.Accordion(label=\"HOW TO\", open=False):\n            gr.Markdown(USAGE_INSTRUCTION)\n        with gr.Accordion(label=\"Params History\", open=False):\n            with gr.Row():\n                save_btn = gr.Button(\"Save params\")\n                save_btn.click(func_save, inputs=params, outputs=history_dataframe)\n                load_params_btn = gr.Button(\"Reload params\")\n                load_params_btn.click(\n                    func_load_params, inputs=[], outputs=history_dataframe\n                )\n            history_dataframe.render()\n            history_dataframe.select(\n                func_activate_params, inputs=params, outputs=params\n            )\n        with gr.Row():\n            run_btn = gr.Button(\"Run\")\n            run_btn.click(func_run, inputs=inputs + params, outputs=outputs)\n            export_btn = gr.Button(\n                \"Export (Result will be in Exported file next to Output)\"\n            )\n            export_btn.click(func_export, inputs=[], outputs=exported_file)\n        with gr.Row():\n            with gr.Column():\n                if params:\n                    with temp(\"Params\"):\n                        for component in params:\n                            component.render()\n                if inputs:\n                    with temp(\"Inputs\"):\n                        for component in inputs:\n                            component.render()\n                if not params and not inputs:\n                    gr.Text(\"No params or inputs\")\n            with gr.Column():\n                with temp(\"Outputs\"):\n                    for component in outputs:\n                        component.render()\n                with temp(\"Exported file\"):\n                    exported_file.render()\n\n    return demo\n\n\ndef load_saved_params(path: str) -> Dict:\n    \"\"\"Load the saved params from path to a dataframe\"\"\"\n    # get all pickle files\n    files = list(sorted(Path(path).glob(\"*.pkl\")))\n    data: Dict[str, Any] = {\"_id\": [None] * len(files)}\n    for idx, each_file in enumerate(files):\n        with open(each_file, \"rb\") as f:\n            each_data = pickle.load(f)\n        data[\"_id\"][idx] = Path(each_file).stem\n        for key, value in each_data.items():\n            if key not in data:\n                data[key] = [None] * len(files)\n            data[key][idx] = value\n\n    return data\n\n\ndef build_pipeline_ui(config: dict, pipeline_def):\n    \"\"\"Build a tab from config file\"\"\"\n    inputs_name = list(config.get(\"inputs\", {}).keys())\n    params_name = list(config.get(\"params\", {}).keys())\n    outputs_def = config.get(\"outputs\", [])\n\n    output_dir: Path = Path(storage.url(pipeline_def().config.store_result))\n    exported_dir = output_dir.parent / \"exported\"\n    exported_dir.mkdir(parents=True, exist_ok=True)\n\n    save_dir = (\n        output_dir.parent\n        / \"saved\"\n        / f\"{pipeline_def.__module__}.{pipeline_def.__name__}\"\n    )\n    save_dir.mkdir(parents=True, exist_ok=True)\n\n    resultlog = getattr(pipeline_def, \"_promptui_resultlog\", ResultLog)\n    allowed_resultlog_callbacks = {i for i in dir(resultlog) if not i.startswith(\"__\")}\n\n    def run_func(*args):\n        inputs = {\n            name: value for name, value in zip(inputs_name, args[: len(inputs_name)])\n        }\n        params = {\n            name: value for name, value in zip(params_name, args[len(inputs_name) :])\n        }\n        pipeline = pipeline_def()\n        pipeline.set(params)\n        pipeline(**inputs)\n        with storage.open(\n            storage.url(\n                pipeline.config.store_result, pipeline.last_run.id(), \"params.pkl\"\n            ),\n            \"wb\",\n        ) as f:\n            pickle.dump(params, f)\n        if outputs_def:\n            outputs = []\n            for output_def in outputs_def:\n                output = pipeline.last_run.logs(output_def[\"step\"])\n                getter = output_def.get(\"getter\", None)\n                if getter and getter in allowed_resultlog_callbacks:\n                    output = getattr(resultlog, getter)(output)\n                outputs.append(output)\n            if len(outputs_def) == 1:\n                return outputs[0]\n            return outputs\n\n    def save_func(*args):\n        params = {name: value for name, value in zip(params_name, args)}\n        filename = save_dir / f\"{int(time.time())}.pkl\"\n        with open(filename, \"wb\") as f:\n            pickle.dump(params, f)\n        gr.Info(\"Params saved\")\n\n        data = load_saved_params(str(save_dir))\n        return pd.DataFrame(data)\n\n    def load_params_func():\n        data = load_saved_params(str(save_dir))\n        return pd.DataFrame(data)\n\n    def activate_params_func(ev: gr.SelectData, *args):\n        data = load_saved_params(str(save_dir))\n        output_args = [each for each in args]\n        if ev.value is None:\n            gr.Info(f'Blank value: \"{ev.value}\". Skip')\n            return output_args\n\n        column = list(data.keys())[ev.index[1]]\n\n        if column not in params_name:\n            gr.Info(f'Column \"{column}\" not in params. Skip')\n            return output_args\n\n        value = data[column][ev.index[0]]\n        if value is None:\n            gr.Info(f'Blank value: \"{ev.value}\". Skip')\n            return output_args\n\n        output_args[params_name.index(column)] = value\n\n        return output_args\n\n    def export_func():\n        name = (\n            f\"{pipeline_def.__module__}.{pipeline_def.__name__}_{datetime.now()}.xlsx\"\n        )\n        path = str(exported_dir / name)\n        gr.Info(f\"Begin exporting {name}...\")\n        try:\n            export(config=config, pipeline_def=pipeline_def, output_path=path)\n        except Exception as e:\n            raise gr.Error(f\"Failed to export. Please contact project's AIR: {e}\")\n        gr.Info(f\"Exported {name}. Please go to the `Exported file` tab to download\")\n        return path\n\n    return construct_pipeline_ui(\n        config, run_func, save_func, load_params_func, activate_params_func, export_func\n    )\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/embeddings/__init__.py",
    "content": "from .base import BaseEmbeddings\nfrom .endpoint_based import EndpointEmbeddings\nfrom .fastembed import FastEmbedEmbeddings\nfrom .langchain_based import (\n    LCAzureOpenAIEmbeddings,\n    LCCohereEmbeddings,\n    LCGoogleEmbeddings,\n    LCHuggingFaceEmbeddings,\n    LCMistralEmbeddings,\n    LCOpenAIEmbeddings,\n)\nfrom .openai import AzureOpenAIEmbeddings, OpenAIEmbeddings\nfrom .tei_endpoint_embed import TeiEndpointEmbeddings\nfrom .voyageai import VoyageAIEmbeddings\n\n__all__ = [\n    \"BaseEmbeddings\",\n    \"EndpointEmbeddings\",\n    \"TeiEndpointEmbeddings\",\n    \"LCOpenAIEmbeddings\",\n    \"LCAzureOpenAIEmbeddings\",\n    \"LCCohereEmbeddings\",\n    \"LCHuggingFaceEmbeddings\",\n    \"LCGoogleEmbeddings\",\n    \"LCMistralEmbeddings\",\n    \"OpenAIEmbeddings\",\n    \"AzureOpenAIEmbeddings\",\n    \"FastEmbedEmbeddings\",\n    \"VoyageAIEmbeddings\",\n]\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/embeddings/base.py",
    "content": "from __future__ import annotations\n\nfrom kotaemon.base import BaseComponent, Document, DocumentWithEmbedding\n\n\nclass BaseEmbeddings(BaseComponent):\n    def run(\n        self, text: str | list[str] | Document | list[Document], *args, **kwargs\n    ) -> list[DocumentWithEmbedding]:\n        return self.invoke(text, *args, **kwargs)\n\n    def invoke(\n        self, text: str | list[str] | Document | list[Document], *args, **kwargs\n    ) -> list[DocumentWithEmbedding]:\n        raise NotImplementedError\n\n    async def ainvoke(\n        self, text: str | list[str] | Document | list[Document], *args, **kwargs\n    ) -> list[DocumentWithEmbedding]:\n        raise NotImplementedError\n\n    def prepare_input(\n        self, text: str | list[str] | Document | list[Document]\n    ) -> list[Document]:\n        if isinstance(text, (str, Document)):\n            return [Document(content=text)]\n        elif isinstance(text, list):\n            return [Document(content=_) for _ in text]\n        return text\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/embeddings/endpoint_based.py",
    "content": "import requests\n\nfrom kotaemon.base import Document, DocumentWithEmbedding\n\nfrom .base import BaseEmbeddings\n\n\nclass EndpointEmbeddings(BaseEmbeddings):\n    \"\"\"\n    An Embeddings component that uses an OpenAI API compatible endpoint.\n\n    Attributes:\n        endpoint_url (str): The url of an OpenAI API compatible endpoint.\n    \"\"\"\n\n    endpoint_url: str\n\n    def run(\n        self, text: str | list[str] | Document | list[Document]\n    ) -> list[DocumentWithEmbedding]:\n        \"\"\"\n        Generate embeddings from text Args:\n            text (str | list[str] | Document | list[Document]): text to generate\n            embeddings from\n        Returns:\n            list[DocumentWithEmbedding]: embeddings\n        \"\"\"\n        if not isinstance(text, list):\n            text = [text]\n\n        outputs = []\n\n        for item in text:\n            response = requests.post(\n                self.endpoint_url, json={\"input\": str(item)}\n            ).json()\n            outputs.append(\n                DocumentWithEmbedding(\n                    text=str(item),\n                    embedding=response[\"data\"][0][\"embedding\"],\n                    total_tokens=response[\"usage\"][\"total_tokens\"],\n                    prompt_tokens=response[\"usage\"][\"prompt_tokens\"],\n                )\n            )\n\n        return outputs\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/embeddings/fastembed.py",
    "content": "from typing import TYPE_CHECKING, Optional\n\nfrom kotaemon.base import Document, DocumentWithEmbedding, Param\n\nfrom .base import BaseEmbeddings\n\nif TYPE_CHECKING:\n    from fastembed import TextEmbedding\n\n\nclass FastEmbedEmbeddings(BaseEmbeddings):\n    \"\"\"Utilize fastembed library for embeddings locally without GPU.\n\n    Supported model: https://qdrant.github.io/fastembed/examples/Supported_Models/\n    Code: https://github.com/qdrant/fastembed\n    \"\"\"\n\n    model_name: str = Param(\n        \"BAAI/bge-small-en-v1.5\",\n        help=(\n            \"Model name for fastembed. Please refer \"\n            \"[here](https://qdrant.github.io/fastembed/examples/Supported_Models/) \"\n            \"for the list of supported models.\"\n        ),\n        required=True,\n    )\n    batch_size: int = Param(\n        256,\n        help=\"Batch size for embeddings. Higher values use more memory, but are faster\",\n    )\n    parallel: Optional[int] = Param(\n        None,\n        help=(\n            \"Number of threads to use for embeddings. \"\n            \"If > 1, data-parallel encoding will be used. \"\n            \"If 0, use all available CPUs. \"\n            \"If None, use default onnxruntime threading. \"\n            \"Defaults to None.\"\n        ),\n    )\n\n    @Param.auto()\n    def client_(self) -> \"TextEmbedding\":\n        try:\n            from fastembed import TextEmbedding\n        except ImportError:\n            raise ImportError(\"Please install FastEmbed: `pip install fastembed`\")\n\n        return TextEmbedding(model_name=self.model_name)\n\n    def invoke(\n        self, text: str | list[str] | Document | list[Document], *args, **kwargs\n    ) -> list[DocumentWithEmbedding]:\n        input_ = self.prepare_input(text)\n        embeddings = self.client_.embed(\n            [_.content for _ in input_],\n            batch_size=self.batch_size,\n            parallel=self.parallel,\n        )\n        return [\n            DocumentWithEmbedding(\n                content=doc,\n                embedding=list(embedding),\n            )\n            for doc, embedding in zip(input_, embeddings)\n        ]\n\n    async def ainvoke(\n        self, text: str | list[str] | Document | list[Document], *args, **kwargs\n    ) -> list[DocumentWithEmbedding]:\n        \"\"\"Fastembed does not support async API.\"\"\"\n        return self.invoke(text, *args, **kwargs)\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/embeddings/langchain_based.py",
    "content": "from typing import Optional\n\nfrom kotaemon.base import DocumentWithEmbedding, Param\n\nfrom .base import BaseEmbeddings\n\n\nclass LCEmbeddingMixin:\n    def _get_lc_class(self):\n        raise NotImplementedError(\n            \"Please return the relevant Langchain class in in _get_lc_class\"\n        )\n\n    def __init__(self, **params):\n        self._lc_class = self._get_lc_class()\n        self._obj = self._lc_class(**params)\n        self._kwargs: dict = params\n\n        super().__init__()\n\n    def run(self, text):\n        input_docs = self.prepare_input(text)\n        input_ = [doc.text for doc in input_docs]\n\n        embeddings = self._obj.embed_documents(input_)\n\n        return [\n            DocumentWithEmbedding(content=doc, embedding=each_embedding)\n            for doc, each_embedding in zip(input_docs, embeddings)\n        ]\n\n    def __repr__(self):\n        kwargs = []\n        for key, value_obj in self._kwargs.items():\n            value = repr(value_obj)\n            kwargs.append(f\"{key}={value}\")\n        kwargs_repr = \", \".join(kwargs)\n        return f\"{self.__class__.__name__}({kwargs_repr})\"\n\n    def __str__(self):\n        kwargs = []\n        for key, value_obj in self._kwargs.items():\n            value = str(value_obj)\n            if len(value) > 20:\n                value = f\"{value[:15]}...\"\n            kwargs.append(f\"{key}={value}\")\n        kwargs_repr = \", \".join(kwargs)\n        return f\"{self.__class__.__name__}({kwargs_repr})\"\n\n    def __setattr__(self, name, value):\n        if name == \"_lc_class\":\n            return super().__setattr__(name, value)\n\n        if name in self._lc_class.__fields__:\n            self._kwargs[name] = value\n            self._obj = self._lc_class(**self._kwargs)\n        else:\n            super().__setattr__(name, value)\n\n    def __getattr__(self, name):\n        if name in self._kwargs:\n            return self._kwargs[name]\n        return getattr(self._obj, name)\n\n    def dump(self, *args, **kwargs):\n        from theflow.utils.modules import serialize\n\n        params = {key: serialize(value) for key, value in self._kwargs.items()}\n        return {\n            \"__type__\": f\"{self.__module__}.{self.__class__.__qualname__}\",\n            **params,\n        }\n\n    def specs(self, path: str):\n        path = path.strip(\".\")\n        if \".\" in path:\n            raise ValueError(\"path should not contain '.'\")\n\n        if path in self._lc_class.__fields__:\n            return {\n                \"__type__\": \"theflow.base.ParamAttr\",\n                \"refresh_on_set\": True,\n                \"strict_type\": True,\n            }\n\n        raise ValueError(f\"Invalid param {path}\")\n\n\nclass LCOpenAIEmbeddings(LCEmbeddingMixin, BaseEmbeddings):\n    \"\"\"Wrapper around Langchain's OpenAI embedding, focusing on key parameters\"\"\"\n\n    def __init__(\n        self,\n        model: str = \"text-embedding-ada-002\",\n        openai_api_version: Optional[str] = None,\n        openai_api_base: Optional[str] = None,\n        openai_api_type: Optional[str] = None,\n        openai_api_key: Optional[str] = None,\n        request_timeout: Optional[float] = None,\n        **params,\n    ):\n        super().__init__(\n            model=model,\n            openai_api_version=openai_api_version,\n            openai_api_base=openai_api_base,\n            openai_api_type=openai_api_type,\n            openai_api_key=openai_api_key,\n            request_timeout=request_timeout,\n            **params,\n        )\n\n    def _get_lc_class(self):\n        try:\n            from langchain_openai import OpenAIEmbeddings\n        except ImportError:\n            from langchain.embeddings import OpenAIEmbeddings\n\n        return OpenAIEmbeddings\n\n\nclass LCAzureOpenAIEmbeddings(LCEmbeddingMixin, BaseEmbeddings):\n    \"\"\"Wrapper around Langchain's AzureOpenAI embedding, focusing on key parameters\"\"\"\n\n    def __init__(\n        self,\n        azure_endpoint: Optional[str] = None,\n        deployment: Optional[str] = None,\n        openai_api_key: Optional[str] = None,\n        api_version: Optional[str] = None,\n        request_timeout: Optional[float] = None,\n        **params,\n    ):\n        super().__init__(\n            azure_endpoint=azure_endpoint,\n            deployment=deployment,\n            api_version=api_version,\n            openai_api_key=openai_api_key,\n            request_timeout=request_timeout,\n            **params,\n        )\n\n    def _get_lc_class(self):\n        try:\n            from langchain_openai import AzureOpenAIEmbeddings\n        except ImportError:\n            from langchain.embeddings import AzureOpenAIEmbeddings\n\n        return AzureOpenAIEmbeddings\n\n\nclass LCCohereEmbeddings(LCEmbeddingMixin, BaseEmbeddings):\n    \"\"\"Wrapper around Langchain's Cohere embedding, focusing on key parameters\"\"\"\n\n    cohere_api_key: str = Param(\n        help=\"API key (https://dashboard.cohere.com/api-keys)\",\n        default=None,\n        required=True,\n    )\n    model: str = Param(\n        help=\"Model name to use (https://docs.cohere.com/docs/models)\",\n        default=None,\n        required=True,\n    )\n    user_agent: str = Param(\n        help=\"User agent (leave default)\", default=\"default\", required=True\n    )\n\n    def __init__(\n        self,\n        model: str = \"embed-english-v2.0\",\n        cohere_api_key: Optional[str] = None,\n        truncate: Optional[str] = None,\n        request_timeout: Optional[float] = None,\n        **params,\n    ):\n        super().__init__(\n            model=model,\n            cohere_api_key=cohere_api_key,\n            truncate=truncate,\n            request_timeout=request_timeout,\n            **params,\n        )\n\n    def _get_lc_class(self):\n        try:\n            from langchain_cohere import CohereEmbeddings\n        except ImportError:\n            from langchain.embeddings import CohereEmbeddings\n\n        return CohereEmbeddings\n\n\nclass LCHuggingFaceEmbeddings(LCEmbeddingMixin, BaseEmbeddings):\n    \"\"\"Wrapper around Langchain's HuggingFace embedding, focusing on key parameters\"\"\"\n\n    model_name: str = Param(\n        help=(\n            \"Model name to use (https://huggingface.co/models?\"\n            \"pipeline_tag=sentence-similarity&sort=trending)\"\n        ),\n        default=None,\n        required=True,\n    )\n\n    def __init__(\n        self,\n        model_name: str = \"sentence-transformers/all-mpnet-base-v2\",\n        **params,\n    ):\n        super().__init__(\n            model_name=model_name,\n            **params,\n        )\n\n    def _get_lc_class(self):\n        try:\n            from langchain_community.embeddings import HuggingFaceBgeEmbeddings\n        except ImportError:\n            from langchain.embeddings import HuggingFaceBgeEmbeddings\n\n        return HuggingFaceBgeEmbeddings\n\n\nclass LCGoogleEmbeddings(LCEmbeddingMixin, BaseEmbeddings):\n    \"\"\"Wrapper around Langchain's Google GenAI embedding, focusing on key parameters\"\"\"\n\n    google_api_key: str = Param(\n        help=\"API key (https://aistudio.google.com/app/apikey)\",\n        default=None,\n        required=True,\n    )\n    model: str = Param(\n        help=\"Model name to use (https://ai.google.dev/gemini-api/docs/models/gemini#text-embedding-and-embedding)\",  # noqa\n        default=\"models/text-embedding-004\",\n        required=True,\n    )\n\n    def __init__(\n        self,\n        model: str = \"models/text-embedding-004\",\n        google_api_key: Optional[str] = None,\n        **params,\n    ):\n        super().__init__(\n            model=model,\n            google_api_key=google_api_key,\n            **params,\n        )\n\n    def _get_lc_class(self):\n        try:\n            from langchain_google_genai import GoogleGenerativeAIEmbeddings\n        except ImportError:\n            raise ImportError(\"Please install langchain-google-genai\")\n\n        return GoogleGenerativeAIEmbeddings\n\n\nclass LCMistralEmbeddings(LCEmbeddingMixin, BaseEmbeddings):\n    \"\"\"Wrapper around LangChain's MistralAI embedding, focusing on key parameters\"\"\"\n\n    api_key: str = Param(\n        help=\"API key (https://console.mistral.ai/api-keys)\",\n        default=None,\n        required=True,\n    )\n    model: str = Param(\n        help=\"Model name to use ('mistral-embed')\",\n        default=\"mistral-embed\",\n        required=True,\n    )\n\n    def __init__(\n        self,\n        model: str = \"mistral-embed\",\n        api_key: Optional[str] = None,\n        **params,\n    ):\n        super().__init__(\n            model=model,\n            api_key=api_key,\n            **params,\n        )\n\n    def _get_lc_class(self):\n        try:\n            from langchain_mistralai import MistralAIEmbeddings\n        except ImportError:\n            raise ImportError(\n                \"Please install langchain_mistralai: \"\n                \"`pip install -U langchain_mistralai`\"\n            )\n        return MistralAIEmbeddings\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/embeddings/openai.py",
    "content": "from itertools import islice\nfrom typing import Optional\n\nimport numpy as np\nimport openai\nimport tiktoken\nfrom tenacity import (\n    retry,\n    retry_if_not_exception_type,\n    stop_after_attempt,\n    wait_random_exponential,\n)\nfrom theflow.utils.modules import import_dotted_string\n\nfrom kotaemon.base import Param\n\nfrom .base import BaseEmbeddings, Document, DocumentWithEmbedding\n\n\ndef split_text_by_chunk_size(text: str, chunk_size: int) -> list[list[int]]:\n    \"\"\"Split the text into chunks of a given size\n\n    Args:\n        text: text to split\n        chunk_size: size of each chunk\n\n    Returns:\n        list of chunks (as tokens)\n    \"\"\"\n    encoding = tiktoken.get_encoding(\"cl100k_base\")\n    tokens = iter(encoding.encode(text))\n    result = []\n    while chunk := list(islice(tokens, chunk_size)):\n        result.append(chunk)\n    return result\n\n\nclass BaseOpenAIEmbeddings(BaseEmbeddings):\n    \"\"\"Base interface for OpenAI embedding model, using the openai library.\n\n    This class exposes the parameters in resources.Chat. To subclass this class:\n\n        - Implement the `prepare_client` method to return the OpenAI client\n        - Implement the `openai_response` method to return the OpenAI response\n        - Implement the params relate to the OpenAI client\n    \"\"\"\n\n    _dependencies = [\"openai\"]\n\n    api_key: str = Param(None, help=\"API key\", required=True)\n    timeout: Optional[float] = Param(None, help=\"Timeout for the API request.\")\n    max_retries: Optional[int] = Param(\n        None, help=\"Maximum number of retries for the API request.\"\n    )\n\n    dimensions: Optional[int] = Param(\n        None,\n        help=(\n            \"The number of dimensions the resulting output embeddings should have. \"\n            \"Only supported in `text-embedding-3` and later models.\"\n        ),\n    )\n    context_length: Optional[int] = Param(\n        None, help=\"The maximum context length of the embedding model\"\n    )\n\n    @Param.auto(depends_on=[\"max_retries\"])\n    def max_retries_(self):\n        if self.max_retries is None:\n            from openai._constants import DEFAULT_MAX_RETRIES\n\n            return DEFAULT_MAX_RETRIES\n        return self.max_retries\n\n    def prepare_client(self, async_version: bool = False):\n        \"\"\"Get the OpenAI client\n\n        Args:\n            async_version (bool): Whether to get the async version of the client\n        \"\"\"\n        raise NotImplementedError\n\n    def openai_response(self, client, **kwargs):\n        \"\"\"Get the openai response\"\"\"\n        raise NotImplementedError\n\n    def invoke(\n        self, text: str | list[str] | Document | list[Document], *args, **kwargs\n    ) -> list[DocumentWithEmbedding]:\n        input_doc = self.prepare_input(text)\n        client = self.prepare_client(async_version=False)\n\n        input_: list[str | list[int]] = []\n        splitted_indices = {}\n        for idx, text in enumerate(input_doc):\n            if self.context_length:\n                chunks = split_text_by_chunk_size(text.text or \" \", self.context_length)\n                splitted_indices[idx] = (len(input_), len(input_) + len(chunks))\n                input_.extend(chunks)\n            else:\n                splitted_indices[idx] = (len(input_), len(input_) + 1)\n                input_.append(text.text)\n\n        resp = self.openai_response(client, input=input_, **kwargs).dict()\n        output_ = list(sorted(resp[\"data\"], key=lambda x: x[\"index\"]))\n\n        output = []\n        for idx, doc in enumerate(input_doc):\n            embs = output_[splitted_indices[idx][0] : splitted_indices[idx][1]]\n            if len(embs) == 1:\n                output.append(\n                    DocumentWithEmbedding(embedding=embs[0][\"embedding\"], content=doc)\n                )\n                continue\n\n            chunk_lens = [\n                len(_)\n                for _ in input_[splitted_indices[idx][0] : splitted_indices[idx][1]]\n            ]\n            vs: list[list[float]] = [_[\"embedding\"] for _ in embs]\n            emb = np.average(vs, axis=0, weights=chunk_lens)\n            emb = emb / np.linalg.norm(emb)\n            output.append(DocumentWithEmbedding(embedding=emb.tolist(), content=doc))\n\n        return output\n\n    async def ainvoke(\n        self, text: str | list[str] | Document | list[Document], *args, **kwargs\n    ) -> list[DocumentWithEmbedding]:\n        input_ = self.prepare_input(text)\n        client = self.prepare_client(async_version=True)\n        resp = await self.openai_response(\n            client, input=[_.text if _.text else \" \" for _ in input_], **kwargs\n        ).dict()\n        output_ = sorted(resp[\"data\"], key=lambda x: x[\"index\"])\n        return [\n            DocumentWithEmbedding(embedding=o[\"embedding\"], content=i)\n            for i, o in zip(input_, output_)\n        ]\n\n\nclass OpenAIEmbeddings(BaseOpenAIEmbeddings):\n    \"\"\"OpenAI chat model\"\"\"\n\n    base_url: Optional[str] = Param(None, help=\"OpenAI base URL\")\n    organization: Optional[str] = Param(None, help=\"OpenAI organization\")\n    model: str = Param(\n        None,\n        help=(\n            \"ID of the model to use. You can go to [Model overview](https://platform.\"\n            \"openai.com/docs/models/overview) to see the available models.\"\n        ),\n        required=True,\n    )\n\n    def prepare_client(self, async_version: bool = False):\n        \"\"\"Get the OpenAI client\n\n        Args:\n            async_version (bool): Whether to get the async version of the client\n        \"\"\"\n        params = {\n            \"api_key\": self.api_key,\n            \"organization\": self.organization,\n            \"base_url\": self.base_url,\n            \"timeout\": self.timeout,\n            \"max_retries\": self.max_retries_,\n        }\n        if async_version:\n            from openai import AsyncOpenAI\n\n            return AsyncOpenAI(**params)\n\n        from openai import OpenAI\n\n        return OpenAI(**params)\n\n    @retry(\n        retry=retry_if_not_exception_type(\n            (openai.NotFoundError, openai.BadRequestError)\n        ),\n        wait=wait_random_exponential(min=1, max=40),\n        stop=stop_after_attempt(6),\n    )\n    def openai_response(self, client, **kwargs):\n        \"\"\"Get the openai response\"\"\"\n        params: dict = {\n            \"model\": self.model,\n        }\n        if self.dimensions:\n            params[\"dimensions\"] = self.dimensions\n        params.update(kwargs)\n\n        return client.embeddings.create(**params)\n\n\nclass AzureOpenAIEmbeddings(BaseOpenAIEmbeddings):\n    azure_endpoint: str = Param(\n        None,\n        help=(\n            \"HTTPS endpoint for the Azure OpenAI model. The azure_endpoint, \"\n            \"azure_deployment, and api_version parameters are used to construct \"\n            \"the full URL for the Azure OpenAI model.\"\n        ),\n        required=True,\n    )\n    azure_deployment: str = Param(None, help=\"Azure deployment name\", required=True)\n    api_version: str = Param(None, help=\"Azure model version\", required=True)\n    azure_ad_token: Optional[str] = Param(None, help=\"Azure AD token\")\n    azure_ad_token_provider: Optional[str] = Param(None, help=\"Azure AD token provider\")\n\n    @Param.auto(depends_on=[\"azure_ad_token_provider\"])\n    def azure_ad_token_provider_(self):\n        if isinstance(self.azure_ad_token_provider, str):\n            return import_dotted_string(self.azure_ad_token_provider, safe=False)\n\n    def prepare_client(self, async_version: bool = False):\n        \"\"\"Get the OpenAI client\n\n        Args:\n            async_version (bool): Whether to get the async version of the client\n        \"\"\"\n        params = {\n            \"azure_endpoint\": self.azure_endpoint,\n            \"api_version\": self.api_version,\n            \"api_key\": self.api_key,\n            \"azure_ad_token\": self.azure_ad_token,\n            \"azure_ad_token_provider\": self.azure_ad_token_provider_,\n            \"timeout\": self.timeout,\n            \"max_retries\": self.max_retries_,\n        }\n        if async_version:\n            from openai import AsyncAzureOpenAI\n\n            return AsyncAzureOpenAI(**params)\n\n        from openai import AzureOpenAI\n\n        return AzureOpenAI(**params)\n\n    @retry(\n        retry=retry_if_not_exception_type(\n            (openai.NotFoundError, openai.BadRequestError)\n        ),\n        wait=wait_random_exponential(min=1, max=40),\n        stop=stop_after_attempt(6),\n    )\n    def openai_response(self, client, **kwargs):\n        \"\"\"Get the openai response\"\"\"\n        params: dict = {\n            \"model\": self.azure_deployment,\n        }\n        if self.dimensions:\n            params[\"dimensions\"] = self.dimensions\n        params.update(kwargs)\n\n        return client.embeddings.create(**params)\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/embeddings/tei_endpoint_embed.py",
    "content": "import aiohttp\nimport requests\n\nfrom kotaemon.base import Document, DocumentWithEmbedding, Param\n\nfrom .base import BaseEmbeddings\n\nsession = requests.session()\n\n\nclass TeiEndpointEmbeddings(BaseEmbeddings):\n    \"\"\"An Embeddings component that uses an\n    TEI (Text-Embedding-Inference) API compatible endpoint.\n\n    Ref: https://github.com/huggingface/text-embeddings-inference\n\n    Attributes:\n        endpoint_url (str): The url of an TEI\n            (Text-Embedding-Inference) API compatible endpoint.\n        normalize (bool): Whether to normalize embeddings to unit length.\n        truncate (bool): Whether to truncate embeddings\n            to a fixed/default length.\n    \"\"\"\n\n    endpoint_url: str = Param(None, help=\"TEI embedding service api base URL\")\n    normalize: bool = Param(\n        True,\n        help=\"Normalize embeddings to unit length\",\n    )\n    truncate: bool = Param(\n        True,\n        help=\"Truncate embeddings to a fixed/default length\",\n    )\n\n    async def client_(self, inputs: list[str]):\n        async with aiohttp.ClientSession() as session:\n            async with session.post(\n                url=self.endpoint_url,\n                json={\n                    \"inputs\": inputs,\n                    \"normalize\": self.normalize,\n                    \"truncate\": self.truncate,\n                },\n            ) as resp:\n                embeddings = await resp.json()\n        return embeddings\n\n    async def ainvoke(\n        self, text: str | list[str] | Document | list[Document], *args, **kwargs\n    ) -> list[DocumentWithEmbedding]:\n        if not isinstance(text, list):\n            text = [text]\n        text = self.prepare_input(text)\n\n        outputs = []\n        batch_size = 6\n        num_batch = max(len(text) // batch_size, 1)\n        for i in range(num_batch):\n            if i == num_batch - 1:\n                mini_batch = text[batch_size * i :]\n            else:\n                mini_batch = text[batch_size * i : batch_size * (i + 1)]\n            mini_batch = [x.content for x in mini_batch]\n            embeddings = await self.client_(mini_batch)  # type: ignore\n            outputs.extend(\n                [\n                    DocumentWithEmbedding(content=doc, embedding=embedding)\n                    for doc, embedding in zip(mini_batch, embeddings)\n                ]\n            )\n\n        return outputs\n\n    def invoke(\n        self, text: str | list[str] | Document | list[Document], *args, **kwargs\n    ) -> list[DocumentWithEmbedding]:\n        if not isinstance(text, list):\n            text = [text]\n\n        text = self.prepare_input(text)\n\n        outputs = []\n        batch_size = 6\n        num_batch = max(len(text) // batch_size, 1)\n        for i in range(num_batch):\n            if i == num_batch - 1:\n                mini_batch = text[batch_size * i :]\n            else:\n                mini_batch = text[batch_size * i : batch_size * (i + 1)]\n            mini_batch = [x.content for x in mini_batch]\n            embeddings = session.post(\n                url=self.endpoint_url,\n                json={\n                    \"inputs\": mini_batch,\n                    \"normalize\": self.normalize,\n                    \"truncate\": self.truncate,\n                },\n            ).json()\n            outputs.extend(\n                [\n                    DocumentWithEmbedding(content=doc, embedding=embedding)\n                    for doc, embedding in zip(mini_batch, embeddings)\n                ]\n            )\n        return outputs\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/embeddings/voyageai.py",
    "content": "\"\"\"Implements embeddings from [Voyage AI](https://voyageai.com).\n\"\"\"\n\nimport importlib\n\nfrom kotaemon.base import Document, DocumentWithEmbedding, Param\n\nfrom .base import BaseEmbeddings\n\nvo = None\n\n\ndef _import_voyageai():\n    global vo\n    if not vo:\n        vo = importlib.import_module(\"voyageai\")\n    return vo\n\n\ndef _format_output(texts: list[str], embeddings: list[list]):\n    \"\"\"Formats the output of all `.embed` calls.\n    Args:\n        texts: List of original documents\n        embeddings: Embeddings corresponding to each document\n    \"\"\"\n    return [\n        DocumentWithEmbedding(content=text, embedding=embedding)\n        for text, embedding in zip(texts, embeddings)\n    ]\n\n\nclass VoyageAIEmbeddings(BaseEmbeddings):\n    \"\"\"Voyage AI provides best-in-class embedding models and rerankers.\"\"\"\n\n    api_key: str = Param(None, help=\"Voyage API key\", required=False)\n    model: str = Param(\n        \"voyage-3\",\n        help=(\n            \"Model name to use. The Voyage \"\n            \"[documentation](https://docs.voyageai.com/docs/embeddings) \"\n            \"provides a list of all available embedding models.\"\n        ),\n        required=True,\n    )\n\n    def __init__(self, *args, **kwargs):\n        super().__init__(*args, **kwargs)\n        if not self.api_key:\n            raise ValueError(\"API key must be provided for VoyageAIEmbeddings.\")\n\n        self._client = _import_voyageai().Client(api_key=self.api_key)\n        self._aclient = _import_voyageai().AsyncClient(api_key=self.api_key)\n\n    def invoke(\n        self, text: str | list[str] | Document | list[Document], *args, **kwargs\n    ) -> list[DocumentWithEmbedding]:\n        texts = [t.content for t in self.prepare_input(text)]\n        embeddings = self._client.embed(texts, model=self.model).embeddings\n        return _format_output(texts, embeddings)\n\n    async def ainvoke(\n        self, text: str | list[str] | Document | list[Document], *args, **kwargs\n    ) -> list[DocumentWithEmbedding]:\n        texts = [t.content for t in self.prepare_input(text)]\n        embeddings = await self._aclient.embed(texts, model=self.model).embeddings\n        return _format_output(texts, embeddings)\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/indices/__init__.py",
    "content": "from .vectorindex import VectorIndexing, VectorRetrieval\n\n__all__ = [\"VectorIndexing\", \"VectorRetrieval\"]\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/indices/base.py",
    "content": "from __future__ import annotations\n\nfrom abc import abstractmethod\nfrom typing import Any, Type\n\nfrom llama_index.core.node_parser.interface import NodeParser\n\nfrom kotaemon.base import BaseComponent, Document, RetrievedDocument\n\n\nclass DocTransformer(BaseComponent):\n    \"\"\"This is a base class for document transformers\n\n    A document transformer transforms a list of documents into another list\n    of documents. Transforming can mean splitting a document into multiple documents,\n    reducing a large list of documents into a smaller list of documents, or adding\n    metadata to each document in a list of documents, etc.\n    \"\"\"\n\n    @abstractmethod\n    def run(\n        self,\n        documents: list[Document],\n        **kwargs,\n    ) -> list[Document]:\n        ...\n\n\nclass LlamaIndexDocTransformerMixin:\n    \"\"\"Allow automatically wrapping a Llama-index component into kotaemon component\n\n    Example:\n        class TokenSplitter(LlamaIndexMixin, BaseSplitter):\n            def _get_li_class(self):\n                from llama_index.core.text_splitter import TokenTextSplitter\n                return TokenTextSplitter\n\n    To use this mixin, please:\n        1. Use this class as the 1st parent class, so that Python will prefer to use\n        the attributes and methods of this class whenever possible.\n        2. Overwrite `_get_li_class` to return the relevant LlamaIndex component.\n    \"\"\"\n\n    def _get_li_class(self) -> Type[NodeParser]:\n        raise NotImplementedError(\n            \"Please return the relevant LlamaIndex class in _get_li_class\"\n        )\n\n    def __init__(self, **params):\n        self._li_cls = self._get_li_class()\n        self._obj = self._li_cls(**params)\n        self._kwargs = params\n        super().__init__()\n\n    def __repr__(self):\n        kwargs = []\n        for key, value_obj in self._kwargs.items():\n            value = repr(value_obj)\n            kwargs.append(f\"{key}={value}\")\n        kwargs_repr = \", \".join(kwargs)\n        return f\"{self.__class__.__name__}({kwargs_repr})\"\n\n    def __str__(self):\n        kwargs = []\n        for key, value_obj in self._kwargs.items():\n            value = str(value_obj)\n            if len(value) > 20:\n                value = f\"{value[:15]}...\"\n            kwargs.append(f\"{key}={value}\")\n        kwargs_repr = \", \".join(kwargs)\n        return f\"{self.__class__.__name__}({kwargs_repr})\"\n\n    def __setattr__(self, name: str, value: Any) -> None:\n        if name.startswith(\"_\") or name in self._protected_keywords():\n            return super().__setattr__(name, value)\n\n        self._kwargs[name] = value\n        return setattr(self._obj, name, value)\n\n    def __getattr__(self, name: str) -> Any:\n        if name in self._kwargs:\n            return self._kwargs[name]\n        return getattr(self._obj, name)\n\n    def dump(self, *args, **kwargs):\n        from theflow.utils.modules import serialize\n\n        params = {key: serialize(value) for key, value in self._kwargs.items()}\n        return {\n            \"__type__\": f\"{self.__module__}.{self.__class__.__qualname__}\",\n            **params,\n        }\n\n    def run(\n        self,\n        documents: list[Document],\n        **kwargs,\n    ) -> list[Document]:\n        \"\"\"Run Llama-index node parser and convert the output to Document from\n        kotaemon\n        \"\"\"\n        docs = self._obj(documents, **kwargs)  # type: ignore\n        return [Document.from_dict(doc.to_dict()) for doc in docs]\n\n\nclass BaseIndexing(BaseComponent):\n    \"\"\"Define the base interface for indexing pipeline\"\"\"\n\n    def to_retrieval_pipeline(self, **kwargs):\n        \"\"\"Convert the indexing pipeline to a retrieval pipeline\"\"\"\n        raise NotImplementedError\n\n    def to_qa_pipeline(self, **kwargs):\n        \"\"\"Convert the indexing pipeline to a QA pipeline\"\"\"\n        raise NotImplementedError\n\n\nclass BaseRetrieval(BaseComponent):\n    \"\"\"Define the base interface for retrieval pipeline\"\"\"\n\n    @abstractmethod\n    def run(self, *args, **kwargs) -> list[RetrievedDocument]:\n        ...\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/indices/extractors/__init__.py",
    "content": "from .doc_parsers import BaseDocParser, SummaryExtractor, TitleExtractor\n\n__all__ = [\n    \"BaseDocParser\",\n    \"TitleExtractor\",\n    \"SummaryExtractor\",\n]\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/indices/extractors/doc_parsers.py",
    "content": "from ..base import DocTransformer, LlamaIndexDocTransformerMixin\n\n\nclass BaseDocParser(DocTransformer):\n    ...\n\n\nclass TitleExtractor(LlamaIndexDocTransformerMixin, BaseDocParser):\n    def __init__(\n        self,\n        llm=None,\n        nodes: int = 5,\n        **params,\n    ):\n        super().__init__(llm=llm, nodes=nodes, **params)\n\n    def _get_li_class(self):\n        from llama_index.core.extractors import TitleExtractor\n\n        return TitleExtractor\n\n\nclass SummaryExtractor(LlamaIndexDocTransformerMixin, BaseDocParser):\n    def __init__(\n        self,\n        llm=None,\n        summaries: list[str] = [\"self\"],\n        **params,\n    ):\n        super().__init__(llm=llm, summaries=summaries, **params)\n\n    def _get_li_class(self):\n        from llama_index.core.extractors import SummaryExtractor\n\n        return SummaryExtractor\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/indices/ingests/__init__.py",
    "content": "from .files import DocumentIngestor\n\n__all__ = [\"DocumentIngestor\"]\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/indices/ingests/files.py",
    "content": "from pathlib import Path\nfrom typing import Type\n\nfrom decouple import config\nfrom llama_index.core.readers.base import BaseReader\nfrom llama_index.readers.file import PDFReader\nfrom theflow.settings import settings as flowsettings\n\nfrom kotaemon.base import BaseComponent, Document, Param\nfrom kotaemon.indices.extractors import BaseDocParser\nfrom kotaemon.indices.splitters import BaseSplitter, TokenSplitter\nfrom kotaemon.loaders import (\n    AdobeReader,\n    AzureAIDocumentIntelligenceLoader,\n    DirectoryReader,\n    DoclingReader,\n    HtmlReader,\n    MathpixPDFReader,\n    MhtmlReader,\n    OCRReader,\n    PandasExcelReader,\n    PDFThumbnailReader,\n    TxtReader,\n    UnstructuredReader,\n    WebReader,\n)\n\nweb_reader = WebReader()\nunstructured = UnstructuredReader()\nadobe_reader = AdobeReader()\nazure_reader = AzureAIDocumentIntelligenceLoader(\n    endpoint=str(config(\"AZURE_DI_ENDPOINT\", default=\"\")),\n    credential=str(config(\"AZURE_DI_CREDENTIAL\", default=\"\")),\n    cache_dir=getattr(flowsettings, \"KH_MARKDOWN_OUTPUT_DIR\", None),\n)\ndocling_reader = DoclingReader()\nadobe_reader.vlm_endpoint = (\n    azure_reader.vlm_endpoint\n) = docling_reader.vlm_endpoint = getattr(flowsettings, \"KH_VLM_ENDPOINT\", \"\")\n\n\nKH_DEFAULT_FILE_EXTRACTORS: dict[str, BaseReader] = {\n    \".xlsx\": PandasExcelReader(),\n    \".docx\": unstructured,\n    \".pptx\": unstructured,\n    \".xls\": unstructured,\n    \".doc\": unstructured,\n    \".html\": HtmlReader(),\n    \".mhtml\": MhtmlReader(),\n    \".png\": unstructured,\n    \".jpeg\": unstructured,\n    \".jpg\": unstructured,\n    \".tiff\": unstructured,\n    \".tif\": unstructured,\n    \".pdf\": PDFThumbnailReader(),\n    \".txt\": TxtReader(),\n    \".md\": TxtReader(),\n}\n\n\nclass DocumentIngestor(BaseComponent):\n    \"\"\"Ingest common office document types into Document for indexing\n\n    Document types:\n        - pdf\n        - xlsx, xls\n        - docx, doc\n\n    Args:\n        pdf_mode: mode for pdf extraction, one of \"normal\", \"mathpix\", \"ocr\"\n            - normal: parse pdf text\n            - mathpix: parse pdf text using mathpix\n            - ocr: parse pdf image using flax\n        doc_parsers: list of document parsers to parse the document\n        text_splitter: splitter to split the document into text nodes\n        override_file_extractors: override file extractors for specific file extensions\n            The default file extractors are stored in `KH_DEFAULT_FILE_EXTRACTORS`\n    \"\"\"\n\n    pdf_mode: str = \"normal\"  # \"normal\", \"mathpix\", \"ocr\", \"multimodal\"\n    doc_parsers: list[BaseDocParser] = Param(default_callback=lambda _: [])\n    text_splitter: BaseSplitter = TokenSplitter.withx(\n        chunk_size=1024,\n        chunk_overlap=256,\n        separator=\"\\n\\n\",\n        backup_separators=[\"\\n\", \".\", \" \", \"\\u200B\"],\n    )\n    override_file_extractors: dict[str, Type[BaseReader]] = {}\n\n    def _get_reader(self, input_files: list[str | Path]):\n        \"\"\"Get appropriate readers for the input files based on file extension\"\"\"\n        file_extractors: dict[str, BaseReader] = {\n            ext: reader for ext, reader in KH_DEFAULT_FILE_EXTRACTORS.items()\n        }\n        for ext, cls in self.override_file_extractors.items():\n            file_extractors[ext] = cls()\n\n        if self.pdf_mode == \"normal\":\n            file_extractors[\".pdf\"] = PDFReader()\n        elif self.pdf_mode == \"ocr\":\n            file_extractors[\".pdf\"] = OCRReader()\n        elif self.pdf_mode == \"multimodal\":\n            file_extractors[\".pdf\"] = AdobeReader()\n        else:\n            file_extractors[\".pdf\"] = MathpixPDFReader()\n\n        main_reader = DirectoryReader(\n            input_files=input_files,\n            file_extractor=file_extractors,\n        )\n\n        return main_reader\n\n    def run(self, file_paths: list[str | Path] | str | Path) -> list[Document]:\n        \"\"\"Ingest the file paths into Document\n\n        Args:\n            file_paths: list of file paths or a single file path\n\n        Returns:\n            list of parsed Documents\n        \"\"\"\n        if not isinstance(file_paths, list):\n            file_paths = [file_paths]\n\n        documents = self._get_reader(input_files=file_paths)()\n        print(f\"Read {len(file_paths)} files into {len(documents)} documents.\")\n        nodes = self.text_splitter(documents)\n        print(f\"Transform {len(documents)} documents into {len(nodes)} nodes.\")\n        self.log_progress(\".num_docs\", num_docs=len(nodes))\n\n        # document parsers call\n        if self.doc_parsers:\n            for parser in self.doc_parsers:\n                nodes = parser(nodes)\n\n        return nodes\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/indices/qa/__init__.py",
    "content": "from .citation import CitationPipeline\n\n__all__ = [\n    \"CitationPipeline\",\n]\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/indices/qa/citation.py",
    "content": "from typing import List\n\nfrom pydantic import BaseModel, Field\n\nfrom kotaemon.base import BaseComponent\nfrom kotaemon.base.schema import HumanMessage, SystemMessage\nfrom kotaemon.llms import BaseLLM\n\n\nclass CiteEvidence(BaseModel):\n    \"\"\"List of evidences (maximum 5) to support the answer.\"\"\"\n\n    evidences: List[str] = Field(\n        ...,\n        description=(\n            \"Each source should be a direct quote from the context, \"\n            \"as a substring of the original content (max 15 words).\"\n        ),\n    )\n\n\nclass CitationPipeline(BaseComponent):\n    \"\"\"Citation pipeline to extract cited evidences from source\n    (based on input question)\"\"\"\n\n    llm: BaseLLM\n\n    def run(self, context: str, question: str):\n        return self.invoke(context, question)\n\n    def prepare_llm(self, context: str, question: str):\n        schema = CiteEvidence.schema()\n        function = {\n            \"name\": schema[\"title\"],\n            \"description\": schema[\"description\"],\n            \"parameters\": schema,\n        }\n        llm_kwargs = {\n            \"tools\": [{\"type\": \"function\", \"function\": function}],\n            \"tool_choice\": \"required\",\n            \"tools_pydantic\": [CiteEvidence],\n        }\n        messages = [\n            SystemMessage(\n                content=(\n                    \"You are a world class algorithm to answer \"\n                    \"questions with correct and exact citations.\"\n                )\n            ),\n            HumanMessage(\n                content=(\n                    \"Answer question using the following context. \"\n                    \"Use the provided function CiteEvidence() to cite your sources.\"\n                )\n            ),\n            HumanMessage(content=context),\n            HumanMessage(content=f\"Question: {question}\"),\n            HumanMessage(\n                content=(\n                    \"Tips: Make sure to cite your sources, \"\n                    \"and use the exact words from the context.\"\n                )\n            ),\n        ]\n        return messages, llm_kwargs\n\n    def invoke(self, context: str, question: str):\n        messages, llm_kwargs = self.prepare_llm(context, question)\n        try:\n            print(\"CitationPipeline: invoking LLM\")\n            llm_output = self.get_from_path(\"llm\").invoke(messages, **llm_kwargs)\n            print(\"CitationPipeline: finish invoking LLM\")\n            if not llm_output.additional_kwargs.get(\"tool_calls\"):\n                return None\n\n            first_func = llm_output.additional_kwargs[\"tool_calls\"][0]\n\n            if \"function\" in first_func:\n                # openai and cohere format\n                function_output = first_func[\"function\"][\"arguments\"]\n            else:\n                # anthropic format\n                function_output = first_func[\"args\"]\n\n            print(\"CitationPipeline:\", function_output)\n\n            if isinstance(function_output, str):\n                output = CiteEvidence.parse_raw(function_output)\n            else:\n                output = CiteEvidence.parse_obj(function_output)\n        except Exception as e:\n            print(e)\n            return None\n\n        return output\n\n    async def ainvoke(self, context: str, question: str):\n        raise NotImplementedError()\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/indices/qa/citation_qa.py",
    "content": "import threading\nfrom collections import defaultdict\nfrom typing import Generator\n\nimport numpy as np\nfrom decouple import config\nfrom theflow.settings import settings as flowsettings\n\nfrom kotaemon.base import (\n    AIMessage,\n    BaseComponent,\n    Document,\n    HumanMessage,\n    Node,\n    SystemMessage,\n)\nfrom kotaemon.llms import ChatLLM, PromptTemplate\n\nfrom .citation import CitationPipeline\nfrom .format_context import (\n    EVIDENCE_MODE_FIGURE,\n    EVIDENCE_MODE_TABLE,\n    EVIDENCE_MODE_TEXT,\n)\nfrom .utils import find_text\n\ntry:\n    from ktem.llms.manager import llms\n    from ktem.reasoning.prompt_optimization.mindmap import CreateMindmapPipeline\n    from ktem.utils.render import Render\nexcept ImportError:\n    raise ImportError(\"Please install `ktem` to use this component\")\n\nMAX_IMAGES = 10\nCITATION_TIMEOUT = 5.0\nCONTEXT_RELEVANT_WARNING_SCORE = config(\n    \"CONTEXT_RELEVANT_WARNING_SCORE\", 0.3, cast=float\n)\n\nDEFAULT_QA_TEXT_PROMPT = (\n    \"Use the following pieces of context to answer the question at the end in detail with clear explanation. \"  # noqa: E501\n    \"If you don't know the answer, just say that you don't know, don't try to \"\n    \"make up an answer. Give answer in \"\n    \"{lang}.\\n\\n\"\n    \"{context}\\n\"\n    \"Question: {question}\\n\"\n    \"Helpful Answer:\"\n)\n\nDEFAULT_QA_TABLE_PROMPT = (\n    \"Use the given context: texts, tables, and figures below to answer the question, \"\n    \"then provide answer with clear explanation.\"\n    \"If you don't know the answer, just say that you don't know, \"\n    \"don't try to make up an answer. Give answer in {lang}.\\n\\n\"\n    \"Context:\\n\"\n    \"{context}\\n\"\n    \"Question: {question}\\n\"\n    \"Helpful Answer:\"\n)  # noqa\n\nDEFAULT_QA_CHATBOT_PROMPT = (\n    \"Pick the most suitable chatbot scenarios to answer the question at the end, \"\n    \"output the provided answer text. If you don't know the answer, \"\n    \"just say that you don't know. Keep the answer as concise as possible. \"\n    \"Give answer in {lang}.\\n\\n\"\n    \"Context:\\n\"\n    \"{context}\\n\"\n    \"Question: {question}\\n\"\n    \"Answer:\"\n)  # noqa\n\nDEFAULT_QA_FIGURE_PROMPT = (\n    \"Use the given context: texts, tables, and figures below to answer the question. \"\n    \"If you don't know the answer, just say that you don't know. \"\n    \"Give answer in {lang}.\\n\\n\"\n    \"Context: \\n\"\n    \"{context}\\n\"\n    \"Question: {question}\\n\"\n    \"Answer: \"\n)  # noqa\n\n\nclass AnswerWithContextPipeline(BaseComponent):\n    \"\"\"Answer the question based on the evidence\n\n    Args:\n        llm: the language model to generate the answer\n        citation_pipeline: generates citation from the evidence\n        qa_template: the prompt template for LLM to generate answer (refer to\n            evidence_mode)\n        qa_table_template: the prompt template for LLM to generate answer for table\n            (refer to evidence_mode)\n        qa_chatbot_template: the prompt template for LLM to generate answer for\n            pre-made scenarios (refer to evidence_mode)\n        lang: the language of the answer. Currently support English and Japanese\n    \"\"\"\n\n    llm: ChatLLM = Node(default_callback=lambda _: llms.get_default())\n    vlm_endpoint: str = getattr(flowsettings, \"KH_VLM_ENDPOINT\", \"\")\n    use_multimodal: bool = getattr(flowsettings, \"KH_REASONINGS_USE_MULTIMODAL\", True)\n    citation_pipeline: CitationPipeline = Node(\n        default_callback=lambda _: CitationPipeline(llm=llms.get_default())\n    )\n    create_mindmap_pipeline: CreateMindmapPipeline = Node(\n        default_callback=lambda _: CreateMindmapPipeline(llm=llms.get_default())\n    )\n\n    qa_template: str = DEFAULT_QA_TEXT_PROMPT\n    qa_table_template: str = DEFAULT_QA_TABLE_PROMPT\n    qa_chatbot_template: str = DEFAULT_QA_CHATBOT_PROMPT\n    qa_figure_template: str = DEFAULT_QA_FIGURE_PROMPT\n\n    enable_citation: bool = False\n    enable_mindmap: bool = False\n    enable_citation_viz: bool = False\n\n    system_prompt: str = \"\"\n    lang: str = \"English\"  # support English and Japanese\n    n_last_interactions: int = 5\n\n    def get_prompt(self, question, evidence, evidence_mode: int):\n        \"\"\"Prepare the prompt and other information for LLM\"\"\"\n        if evidence_mode == EVIDENCE_MODE_TEXT:\n            prompt_template = PromptTemplate(self.qa_template)\n        elif evidence_mode == EVIDENCE_MODE_TABLE:\n            prompt_template = PromptTemplate(self.qa_table_template)\n        elif evidence_mode == EVIDENCE_MODE_FIGURE:\n            if self.use_multimodal:\n                prompt_template = PromptTemplate(self.qa_figure_template)\n            else:\n                prompt_template = PromptTemplate(self.qa_template)\n        else:\n            prompt_template = PromptTemplate(self.qa_chatbot_template)\n\n        prompt = prompt_template.populate(\n            context=evidence,\n            question=question,\n            lang=self.lang,\n        )\n\n        return prompt, evidence\n\n    def run(\n        self, question: str, evidence: str, evidence_mode: int = 0, **kwargs\n    ) -> Document:\n        return self.invoke(question, evidence, evidence_mode, **kwargs)\n\n    def invoke(\n        self,\n        question: str,\n        evidence: str,\n        evidence_mode: int = 0,\n        images: list[str] = [],\n        **kwargs,\n    ) -> Document:\n        raise NotImplementedError\n\n    async def ainvoke(  # type: ignore\n        self,\n        question: str,\n        evidence: str,\n        evidence_mode: int = 0,\n        images: list[str] = [],\n        **kwargs,\n    ) -> Document:\n        \"\"\"Answer the question based on the evidence\n\n        In addition to the question and the evidence, this method also take into\n        account evidence_mode. The evidence_mode tells which kind of evidence is.\n        The kind of evidence affects:\n            1. How the evidence is represented.\n            2. The prompt to generate the answer.\n\n        By default, the evidence_mode is 0, which means the evidence is plain text with\n        no particular semantic representation. The evidence_mode can be:\n            1. \"table\": There will be HTML markup telling that there is a table\n                within the evidence.\n            2. \"chatbot\": There will be HTML markup telling that there is a chatbot.\n                This chatbot is a scenario, extracted from an Excel file, where each\n                row corresponds to an interaction.\n\n        Args:\n            question: the original question posed by user\n            evidence: the text that contain relevant information to answer the question\n                (determined by retrieval pipeline)\n            evidence_mode: the mode of evidence, 0 for text, 1 for table, 2 for chatbot\n        \"\"\"\n        raise NotImplementedError\n\n    def stream(  # type: ignore\n        self,\n        question: str,\n        evidence: str,\n        evidence_mode: int = 0,\n        images: list[str] = [],\n        **kwargs,\n    ) -> Generator[Document, None, Document]:\n        history = kwargs.get(\"history\", [])\n        print(f\"Got {len(images)} images\")\n        # check if evidence exists, use QA prompt\n        if evidence:\n            prompt, evidence = self.get_prompt(question, evidence, evidence_mode)\n        else:\n            prompt = question\n\n        # retrieve the citation\n        citation = None\n        mindmap = None\n\n        def citation_call():\n            nonlocal citation\n            citation = self.citation_pipeline(context=evidence, question=question)\n\n        def mindmap_call():\n            nonlocal mindmap\n            mindmap = self.create_mindmap_pipeline(context=evidence, question=question)\n\n        citation_thread = None\n        mindmap_thread = None\n\n        # execute function call in thread\n        if evidence:\n            if self.enable_citation:\n                citation_thread = threading.Thread(target=citation_call)\n                citation_thread.start()\n\n            if self.enable_mindmap:\n                mindmap_thread = threading.Thread(target=mindmap_call)\n                mindmap_thread.start()\n\n        output = \"\"\n        logprobs = []\n\n        messages = []\n        if self.system_prompt:\n            messages.append(SystemMessage(content=self.system_prompt))\n\n        for human, ai in history[-self.n_last_interactions :]:\n            messages.append(HumanMessage(content=human))\n            messages.append(AIMessage(content=ai))\n\n        if self.use_multimodal and evidence_mode == EVIDENCE_MODE_FIGURE:\n            # create image message:\n            messages.append(\n                HumanMessage(\n                    content=[\n                        {\"type\": \"text\", \"text\": prompt},\n                    ]\n                    + [\n                        {\n                            \"type\": \"image_url\",\n                            \"image_url\": {\"url\": image},\n                        }\n                        for image in images[:MAX_IMAGES]\n                    ],\n                )\n            )\n        else:\n            # append main prompt\n            messages.append(HumanMessage(content=prompt))\n\n        try:\n            # try streaming first\n            print(\"Trying LLM streaming\")\n            for out_msg in self.llm.stream(messages):\n                output += out_msg.text\n                logprobs += out_msg.logprobs\n                yield Document(channel=\"chat\", content=out_msg.text)\n        except NotImplementedError:\n            print(\"Streaming is not supported, falling back to normal processing\")\n            output = self.llm(messages).text\n            yield Document(channel=\"chat\", content=output)\n\n        if logprobs:\n            qa_score = np.exp(np.average(logprobs))\n        else:\n            qa_score = None\n\n        if citation_thread:\n            citation_thread.join(timeout=CITATION_TIMEOUT)\n        if mindmap_thread:\n            mindmap_thread.join(timeout=CITATION_TIMEOUT)\n\n        answer = Document(\n            text=output,\n            metadata={\n                \"citation_viz\": self.enable_citation_viz,\n                \"mindmap\": mindmap,\n                \"citation\": citation,\n                \"qa_score\": qa_score,\n            },\n        )\n\n        return answer\n\n    def match_evidence_with_context(self, answer, docs) -> dict[str, list[dict]]:\n        \"\"\"Match the evidence with the context\"\"\"\n        spans: dict[str, list[dict]] = defaultdict(list)\n\n        if not answer.metadata[\"citation\"]:\n            return spans\n\n        evidences = answer.metadata[\"citation\"].evidences\n        for quote in evidences:\n            matched_excerpts = []\n            for doc in docs:\n                matches = find_text(quote, doc.text)\n\n                for start, end in matches:\n                    if \"|\" not in doc.text[start:end]:\n                        spans[doc.doc_id].append(\n                            {\n                                \"start\": start,\n                                \"end\": end,\n                            }\n                        )\n                        matched_excerpts.append(doc.text[start:end])\n\n            # print(\"Matched citation:\", quote, matched_excerpts),\n        return spans\n\n    def prepare_citations(self, answer, docs) -> tuple[list[Document], list[Document]]:\n        \"\"\"Prepare the citations to show on the UI\"\"\"\n        with_citation, without_citation = [], []\n        has_llm_score = any(\"llm_trulens_score\" in doc.metadata for doc in docs)\n\n        spans = self.match_evidence_with_context(answer, docs)\n        id2docs = {doc.doc_id: doc for doc in docs}\n        not_detected = set(id2docs.keys()) - set(spans.keys())\n\n        # render highlight spans\n        for _id, ss in spans.items():\n            if not ss:\n                not_detected.add(_id)\n                continue\n            cur_doc = id2docs[_id]\n            highlight_text = \"\"\n\n            ss = sorted(ss, key=lambda x: x[\"start\"])\n            last_end = 0\n            text = cur_doc.text[: ss[0][\"start\"]]\n\n            for idx, span in enumerate(ss):\n                # prevent overlapping between span\n                span_start = max(last_end, span[\"start\"])\n                span_end = max(last_end, span[\"end\"])\n\n                to_highlight = cur_doc.text[span_start:span_end]\n                last_end = span_end\n\n                # append to highlight on PDF viewer\n                highlight_text += (\" \" if highlight_text else \"\") + to_highlight\n\n                span_idx = span.get(\"idx\", None)\n                if span_idx is not None:\n                    to_highlight = f\"【{span_idx}】\" + to_highlight\n\n                text += Render.highlight(\n                    to_highlight,\n                    elem_id=str(span_idx) if span_idx is not None else None,\n                )\n                if idx < len(ss) - 1:\n                    text += cur_doc.text[span[\"end\"] : ss[idx + 1][\"start\"]]\n\n            text += cur_doc.text[ss[-1][\"end\"] :]\n            # add to display list\n            with_citation.append(\n                Document(\n                    channel=\"info\",\n                    content=Render.collapsible_with_header_score(\n                        cur_doc,\n                        override_text=text,\n                        highlight_text=highlight_text,\n                        open_collapsible=True,\n                    ),\n                )\n            )\n\n        print(\"Got {} cited docs\".format(len(with_citation)))\n\n        sorted_not_detected_items_with_scores = [\n            (id_, id2docs[id_].metadata.get(\"llm_trulens_score\", 0.0))\n            for id_ in not_detected\n        ]\n        sorted_not_detected_items_with_scores.sort(key=lambda x: x[1], reverse=True)\n\n        for id_, _ in sorted_not_detected_items_with_scores:\n            doc = id2docs[id_]\n            doc_score = doc.metadata.get(\"llm_trulens_score\", 0.0)\n            is_open = not has_llm_score or (\n                doc_score\n                > CONTEXT_RELEVANT_WARNING_SCORE\n                # and len(with_citation) == 0\n            )\n            without_citation.append(\n                Document(\n                    channel=\"info\",\n                    content=Render.collapsible_with_header_score(\n                        doc, open_collapsible=is_open\n                    ),\n                )\n            )\n        return with_citation, without_citation\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/indices/qa/citation_qa_inline.py",
    "content": "import re\nimport threading\nfrom collections import defaultdict\nfrom dataclasses import dataclass\nfrom typing import Generator\n\nimport numpy as np\n\nfrom kotaemon.base import AIMessage, Document, HumanMessage, SystemMessage\nfrom kotaemon.llms import PromptTemplate\n\nfrom .citation_qa import CITATION_TIMEOUT, MAX_IMAGES, AnswerWithContextPipeline\nfrom .format_context import EVIDENCE_MODE_FIGURE\nfrom .utils import find_start_end_phrase\n\nDEFAULT_QA_CITATION_PROMPT = \"\"\"\nUse the following pieces of context to answer the question at the end.\nProvide DETAILED ansswer with clear explanation.\nFormat answer with easy to follow bullets / paragraphs.\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\nUse the same language as the question to response.\n\nCONTEXT:\n----\n{context}\n----\n\nAnswer using this format:\nCITATION LIST\n\n// the index in this array\nCITATION【number】\n\n// output 2 phrase to mark start and end of the relevant span\n// each has ~ 6 words\n// MUST COPY EXACTLY from the CONTEXT\n// NO CHANGE or REPHRASE\n// RELEVANT_SPAN_FROM_CONTEXT\nSTART_PHRASE: string\nEND_PHRASE: string\n\n// When you answer, ensure to add citations from the documents\n// in the CONTEXT with a number that corresponds to the answersInText array.\n// (in the form [number])\n// Try to include the number after each facts / statements you make.\n// You can create as many citations as you need.\nFINAL ANSWER\nstring\n\nSTRICTLY FOLLOW THIS EXAMPLE:\nCITATION LIST\n\nCITATION【1】\n\nSTART_PHRASE: Known as fixed-size chunking , the traditional\nEND_PHRASE: not degrade the final retrieval performance.\n\nCITATION【2】\n\nSTART_PHRASE: Fixed-size Chunker This is our baseline chunker\nEND_PHRASE: this shows good retrieval quality.\n\nFINAL ANSWER\nAn alternative to semantic chunking is fixed-size chunking. This traditional method involves splitting documents into chunks of a predetermined or user-specified size, regardless of semantic content, which is computationally efficient【1】. However, it may result in the fragmentation of semantically related content, thereby potentially degrading retrieval performance【1】【2】.\n\nQUESTION: {question}\\n\nANSWER:\n\"\"\"  # noqa\n\nSTART_ANSWER = \"FINAL ANSWER\"\nSTART_CITATION = \"CITATION LIST\"\nCITATION_PATTERN = r\"citation【(\\d+)】\"\nSTART_ANSWER_PATTERN = \"start_phrase:\"\nEND_ANSWER_PATTERN = \"end_phrase:\"\n\n\n@dataclass\nclass InlineEvidence:\n    \"\"\"List of evidences to support the answer.\"\"\"\n\n    start_phrase: str | None = None\n    end_phrase: str | None = None\n    idx: int | None = None\n\n\nclass AnswerWithInlineCitation(AnswerWithContextPipeline):\n    \"\"\"Answer the question based on the evidence with inline citation\"\"\"\n\n    qa_citation_template: str = DEFAULT_QA_CITATION_PROMPT\n\n    def get_prompt(self, question, evidence, evidence_mode: int):\n        \"\"\"Prepare the prompt and other information for LLM\"\"\"\n        prompt_template = PromptTemplate(self.qa_citation_template)\n\n        prompt = prompt_template.populate(\n            context=evidence,\n            question=question,\n            safe=False,\n        )\n\n        return prompt, evidence\n\n    def answer_to_citations(self, answer) -> list[InlineEvidence]:\n        citations: list[InlineEvidence] = []\n        lines = answer.split(\"\\n\")\n\n        current_evidence = None\n\n        for line in lines:\n            # check citation idx using regex\n            match = re.match(CITATION_PATTERN, line.lower())\n\n            if match:\n                try:\n                    parsed_citation_idx = int(match.group(1))\n                except ValueError:\n                    parsed_citation_idx = None\n\n                # conclude the current evidence if exists\n                if current_evidence:\n                    citations.append(current_evidence)\n                    current_evidence = None\n\n                current_evidence = InlineEvidence(idx=parsed_citation_idx)\n            else:\n                for keyword in [START_ANSWER_PATTERN, END_ANSWER_PATTERN]:\n                    if line.lower().startswith(keyword):\n                        matched_phrase = line[len(keyword) :].strip()\n                        if not current_evidence:\n                            current_evidence = InlineEvidence(idx=None)\n\n                        if keyword == START_ANSWER_PATTERN:\n                            current_evidence.start_phrase = matched_phrase\n                        else:\n                            current_evidence.end_phrase = matched_phrase\n\n                        break\n\n            if (\n                current_evidence\n                and current_evidence.end_phrase\n                and current_evidence.start_phrase\n            ):\n                citations.append(current_evidence)\n                current_evidence = None\n\n        if current_evidence:\n            citations.append(current_evidence)\n\n        return citations\n\n    def replace_citation_with_link(self, answer: str):\n        # Define the regex pattern to match 【number】\n        pattern = r\"【\\d+】\"\n        alternate_pattern = r\"\\[\\d+\\]\"\n\n        # Regular expression to match merged citations\n        multi_pattern = r\"【([\\d,\\s]+)】\"\n\n        # Function to replace merged citations with independent ones\n        def split_citations(match):\n            # Extract the numbers, split by comma, and create individual citations\n            numbers = match.group(1).split(\",\")\n            return \"\".join(f\"【{num.strip()}】\" for num in numbers)\n\n        # Replace merged citations in the text\n        answer = re.sub(multi_pattern, split_citations, answer)\n\n        # Find all citations in the answer\n        matches = list(re.finditer(pattern, answer))\n        if not matches:\n            matches = list(re.finditer(alternate_pattern, answer))\n\n        matched_citations = set()\n        for match in matches:\n            citation = match.group()\n            matched_citations.add(citation)\n\n        for citation in matched_citations:\n            citation_id = citation[1:-1]\n            answer = answer.replace(\n                citation,\n                (\n                    \"<a href='#' class='citation' \"\n                    f\"id='mark-{citation_id}'>【{citation_id}】</a>\"\n                ),\n            )\n\n        answer = answer.replace(START_CITATION, \"\")\n\n        return answer\n\n    def stream(  # type: ignore\n        self,\n        question: str,\n        evidence: str,\n        evidence_mode: int = 0,\n        images: list[str] = [],\n        **kwargs,\n    ) -> Generator[Document, None, Document]:\n        history = kwargs.get(\"history\", [])\n        print(f\"Got {len(images)} images\")\n        # check if evidence exists, use QA prompt\n        if evidence:\n            prompt, evidence = self.get_prompt(question, evidence, evidence_mode)\n        else:\n            prompt = question\n\n        output = \"\"\n        logprobs = []\n\n        citation = None\n        mindmap = None\n\n        def mindmap_call():\n            nonlocal mindmap\n            mindmap = self.create_mindmap_pipeline(context=evidence, question=question)\n\n        mindmap_thread = None\n\n        # execute function call in thread\n        if evidence:\n            if self.enable_mindmap:\n                mindmap_thread = threading.Thread(target=mindmap_call)\n                mindmap_thread.start()\n\n        messages = []\n        if self.system_prompt:\n            messages.append(SystemMessage(content=self.system_prompt))\n\n        for human, ai in history[-self.n_last_interactions :]:\n            messages.append(HumanMessage(content=human))\n            messages.append(AIMessage(content=ai))\n\n        if self.use_multimodal and evidence_mode == EVIDENCE_MODE_FIGURE:\n            # create image message:\n            messages.append(\n                HumanMessage(\n                    content=[\n                        {\"type\": \"text\", \"text\": prompt},\n                    ]\n                    + [\n                        {\n                            \"type\": \"image_url\",\n                            \"image_url\": {\"url\": image},\n                        }\n                        for image in images[:MAX_IMAGES]\n                    ],\n                )\n            )\n        else:\n            # append main prompt\n            messages.append(HumanMessage(content=prompt))\n\n        final_answer = \"\"\n\n        try:\n            # try streaming first\n            print(\"Trying LLM streaming\")\n            for out_msg in self.llm.stream(messages):\n                if evidence:\n                    if START_ANSWER in output:\n                        if not final_answer:\n                            try:\n                                left_over_answer = output.split(START_ANSWER)[\n                                    1\n                                ].lstrip()\n                            except IndexError:\n                                left_over_answer = \"\"\n                            if left_over_answer:\n                                out_msg.text = left_over_answer + out_msg.text\n\n                        final_answer += (\n                            out_msg.text.lstrip() if not final_answer else out_msg.text\n                        )\n                        yield Document(channel=\"chat\", content=out_msg.text)\n\n                        # check for the edge case of citation list is repeated\n                        # with smaller LLMs\n                        if START_CITATION in out_msg.text:\n                            break\n                else:\n                    yield Document(channel=\"chat\", content=out_msg.text)\n\n                output += out_msg.text\n                logprobs += out_msg.logprobs\n        except NotImplementedError:\n            print(\"Streaming is not supported, falling back to normal processing\")\n            output = self.llm(messages).text\n            yield Document(channel=\"chat\", content=output)\n\n        if logprobs:\n            qa_score = np.exp(np.average(logprobs))\n        else:\n            qa_score = None\n\n        citation = self.answer_to_citations(output)\n\n        if mindmap_thread:\n            mindmap_thread.join(timeout=CITATION_TIMEOUT)\n\n        # convert citation to link\n        answer = Document(\n            text=final_answer,\n            metadata={\n                \"citation_viz\": self.enable_citation_viz,\n                \"mindmap\": mindmap,\n                \"citation\": citation,\n                \"qa_score\": qa_score,\n            },\n        )\n\n        # yield the final answer\n        final_answer = self.replace_citation_with_link(final_answer)\n\n        if final_answer:\n            yield Document(channel=\"chat\", content=None)\n            yield Document(channel=\"chat\", content=final_answer)\n\n        return answer\n\n    def match_evidence_with_context(self, answer, docs) -> dict[str, list[dict]]:\n        \"\"\"Match the evidence with the context\"\"\"\n        spans: dict[str, list[dict]] = defaultdict(list)\n\n        if not answer.metadata[\"citation\"]:\n            return spans\n\n        evidences = answer.metadata[\"citation\"]\n\n        for e_id, evidence in enumerate(evidences):\n            start_phrase, end_phrase = evidence.start_phrase, evidence.end_phrase\n            evidence_idx = evidence.idx\n\n            if evidence_idx is None:\n                evidence_idx = e_id + 1\n\n            best_match = None\n            best_match_length = 0\n            best_match_doc_idx = None\n\n            for doc in docs:\n                match, match_length = find_start_end_phrase(\n                    start_phrase, end_phrase, doc.text\n                )\n                if best_match is None or (\n                    match is not None and match_length > best_match_length\n                ):\n                    best_match = match\n                    best_match_length = match_length\n                    best_match_doc_idx = doc.doc_id\n\n            if best_match is not None and best_match_doc_idx is not None:\n                spans[best_match_doc_idx].append(\n                    {\n                        \"start\": best_match[0],\n                        \"end\": best_match[1],\n                        \"idx\": evidence_idx,\n                    }\n                )\n        return spans\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/indices/qa/format_context.py",
    "content": "import html\nfrom functools import partial\n\nimport tiktoken\n\nfrom kotaemon.base import BaseComponent, Document, RetrievedDocument\nfrom kotaemon.indices.splitters import TokenSplitter\n\nEVIDENCE_MODE_TEXT = 0\nEVIDENCE_MODE_TABLE = 1\nEVIDENCE_MODE_CHATBOT = 2\nEVIDENCE_MODE_FIGURE = 3\n\n\nclass PrepareEvidencePipeline(BaseComponent):\n    \"\"\"Prepare the evidence text from the list of retrieved documents\n\n    This step usually happens after `DocumentRetrievalPipeline`.\n\n    Args:\n        trim_func: a callback function or a BaseComponent, that splits a large\n            chunk of text into smaller ones. The first one will be retained.\n    \"\"\"\n\n    max_context_length: int = 32000\n    trim_func: TokenSplitter | None = None\n\n    def run(self, docs: list[RetrievedDocument]) -> Document:\n        evidence = \"\"\n        images = []\n        table_found = 0\n        evidence_modes = []\n\n        evidence_trim_func = (\n            self.trim_func\n            if self.trim_func\n            else TokenSplitter(\n                chunk_size=self.max_context_length,\n                chunk_overlap=0,\n                separator=\" \",\n                tokenizer=partial(\n                    tiktoken.encoding_for_model(\"gpt-3.5-turbo\").encode,\n                    allowed_special=set(),\n                    disallowed_special=\"all\",\n                ),\n            )\n        )\n\n        for _, retrieved_item in enumerate(docs):\n            retrieved_content = \"\"\n            page = retrieved_item.metadata.get(\"page_label\", None)\n            source = filename = retrieved_item.metadata.get(\"file_name\", \"-\")\n            if page:\n                source += f\" (Page {page})\"\n            if retrieved_item.metadata.get(\"type\", \"\") == \"table\":\n                evidence_modes.append(EVIDENCE_MODE_TABLE)\n                if table_found < 5:\n                    retrieved_content = retrieved_item.metadata.get(\n                        \"table_origin\", retrieved_item.text\n                    )\n                    if retrieved_content not in evidence:\n                        table_found += 1\n                        evidence += (\n                            f\"<br><b>Table from {source}</b>\\n\"\n                            + retrieved_content\n                            + \"\\n<br>\"\n                        )\n            elif retrieved_item.metadata.get(\"type\", \"\") == \"chatbot\":\n                evidence_modes.append(EVIDENCE_MODE_CHATBOT)\n                retrieved_content = retrieved_item.metadata[\"window\"]\n                evidence += (\n                    f\"<br><b>Chatbot scenario from {filename} (Row {page})</b>\\n\"\n                    + retrieved_content\n                    + \"\\n<br>\"\n                )\n            elif retrieved_item.metadata.get(\"type\", \"\") == \"image\":\n                evidence_modes.append(EVIDENCE_MODE_FIGURE)\n                retrieved_content = retrieved_item.metadata.get(\"image_origin\", \"\")\n                retrieved_caption = html.escape(retrieved_item.get_content())\n                evidence += (\n                    f\"<br><b>Figure from {source}</b>\\n\"\n                    + \"<img width='85%' src='<src>' \"\n                    + f\"alt='{retrieved_caption}'/>\"\n                    + \"\\n<br>\"\n                )\n                images.append(retrieved_content)\n            else:\n                if \"window\" in retrieved_item.metadata:\n                    retrieved_content = retrieved_item.metadata[\"window\"]\n                else:\n                    retrieved_content = retrieved_item.text\n                retrieved_content = retrieved_content.replace(\"\\n\", \" \")\n                if retrieved_content not in evidence:\n                    evidence += (\n                        f\"<br><b>Content from {source}: </b> \"\n                        + retrieved_content\n                        + \" \\n<br>\"\n                    )\n\n        # resolve evidence mode\n        evidence_mode = EVIDENCE_MODE_TEXT\n        if EVIDENCE_MODE_FIGURE in evidence_modes:\n            evidence_mode = EVIDENCE_MODE_FIGURE\n        elif EVIDENCE_MODE_TABLE in evidence_modes:\n            evidence_mode = EVIDENCE_MODE_TABLE\n\n        # trim context by trim_len\n        print(\"len (original)\", len(evidence))\n        if evidence:\n            texts = evidence_trim_func([Document(text=evidence)])\n            evidence = texts[0].text\n            print(\"len (trimmed)\", len(evidence))\n\n        return Document(content=(evidence_mode, evidence, images))\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/indices/qa/utils.py",
    "content": "from difflib import SequenceMatcher\n\n\ndef find_text(search_span, context, min_length=5):\n    search_span, context = search_span.lower(), context.lower()\n\n    sentence_list = search_span.split(\"\\n\")\n    context = context.replace(\"\\n\", \" \")\n\n    matches_span = []\n    # don't search for small text\n    if len(search_span) > min_length:\n        for sentence in sentence_list:\n            match_results = SequenceMatcher(\n                None,\n                sentence,\n                context,\n                autojunk=False,\n            ).get_matching_blocks()\n\n            matched_blocks = []\n            for _, start, length in match_results:\n                if length > max(len(sentence) * 0.25, min_length):\n                    matched_blocks.append((start, start + length))\n\n            if matched_blocks:\n                start_index = min(start for start, _ in matched_blocks)\n                end_index = max(end for _, end in matched_blocks)\n                length = end_index - start_index\n\n                if length > max(len(sentence) * 0.35, min_length):\n                    matches_span.append((start_index, end_index))\n\n    if matches_span:\n        # merge all matches into one span\n        final_span = min(start for start, _ in matches_span), max(\n            end for _, end in matches_span\n        )\n        matches_span = [final_span]\n\n    return matches_span\n\n\ndef find_start_end_phrase(\n    start_phrase, end_phrase, context, min_length=5, max_excerpt_length=300\n):\n    start_phrase, end_phrase = start_phrase.lower(), end_phrase.lower()\n    context = context.lower()\n\n    context = context.replace(\"\\n\", \" \")\n\n    matches = []\n    matched_length = 0\n    for sentence in [start_phrase, end_phrase]:\n        if sentence is None:\n            continue\n\n        match = SequenceMatcher(\n            None, sentence, context, autojunk=False\n        ).find_longest_match()\n        if match.size > max(len(sentence) * 0.35, min_length):\n            matches.append((match.b, match.b + match.size))\n            matched_length += match.size\n\n    # check if second match is before the first match\n    if len(matches) == 2 and matches[1][0] < matches[0][0]:\n        # if so, keep only the first match\n        matches = [matches[0]]\n\n    if matches:\n        start_idx = min(start for start, _ in matches)\n        end_idx = max(end for _, end in matches)\n\n        # check if the excerpt is too long\n        if end_idx - start_idx > max_excerpt_length:\n            end_idx = start_idx + max_excerpt_length\n\n        final_match = (start_idx, end_idx)\n    else:\n        final_match = None\n\n    return final_match, matched_length\n\n\ndef replace_think_tag_with_details(text):\n    text = text.replace(\n        \"<think>\",\n        '<details><summary><span style=\"color:grey\">Thought</span></summary><blockquote>',  # noqa\n    )\n    text = text.replace(\"</think>\", \"</blockquote></details>\")\n    return text\n\n\ndef strip_think_tag(text):\n    if \"</think>\" in text:\n        text = text.split(\"</think>\")[1]\n    return text\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/indices/rankings/__init__.py",
    "content": "from .base import BaseReranking\nfrom .cohere import CohereReranking\nfrom .llm import LLMReranking\nfrom .llm_scoring import LLMScoring\nfrom .llm_trulens import LLMTrulensScoring\n\n__all__ = [\n    \"CohereReranking\",\n    \"LLMReranking\",\n    \"LLMScoring\",\n    \"BaseReranking\",\n    \"LLMTrulensScoring\",\n]\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/indices/rankings/base.py",
    "content": "from __future__ import annotations\n\nfrom abc import abstractmethod\n\nfrom kotaemon.base import BaseComponent, Document\n\n\nclass BaseReranking(BaseComponent):\n    @abstractmethod\n    def run(self, documents: list[Document], query: str) -> list[Document]:\n        \"\"\"Main method to transform list of documents\n        (re-ranking, filtering, etc)\"\"\"\n        ...\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/indices/rankings/cohere.py",
    "content": "from __future__ import annotations\n\nfrom decouple import config\n\nfrom kotaemon.base import Document\n\nfrom .base import BaseReranking\n\n\nclass CohereReranking(BaseReranking):\n    model_name: str = \"rerank-multilingual-v2.0\"\n    cohere_api_key: str = config(\"COHERE_API_KEY\", \"\")\n    use_key_from_ktem: bool = False\n\n    def run(self, documents: list[Document], query: str) -> list[Document]:\n        \"\"\"Use Cohere Reranker model to re-order documents\n        with their relevance score\"\"\"\n        try:\n            import cohere\n        except ImportError:\n            raise ImportError(\n                \"Please install Cohere `pip install cohere` to use Cohere Reranking\"\n            )\n\n        # try to get COHERE_API_KEY from embeddings\n        if not self.cohere_api_key and self.use_key_from_ktem:\n            try:\n                from ktem.embeddings.manager import (\n                    embedding_models_manager as embeddings,\n                )\n\n                cohere_model = embeddings.get(\"cohere\")\n                ktem_cohere_api_key = cohere_model._kwargs.get(  # type: ignore\n                    \"cohere_api_key\"\n                )\n                if ktem_cohere_api_key != \"your-key\":\n                    self.cohere_api_key = ktem_cohere_api_key\n            except Exception as e:\n                print(\"Cannot get Cohere API key from `ktem`\", e)\n\n        if not self.cohere_api_key:\n            print(\"Cohere API key not found. Skipping rerankings.\")\n            return documents\n\n        cohere_client = cohere.Client(self.cohere_api_key)\n        compressed_docs: list[Document] = []\n\n        if not documents:  # to avoid empty api call\n            return compressed_docs\n\n        _docs = [d.content for d in documents]\n        response = cohere_client.rerank(\n            model=self.model_name, query=query, documents=_docs\n        )\n        for r in response.results:\n            doc = documents[r.index]\n            doc.metadata[\"reranking_score\"] = r.relevance_score\n            compressed_docs.append(doc)\n\n        return compressed_docs\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/indices/rankings/llm.py",
    "content": "from __future__ import annotations\n\nfrom concurrent.futures import ThreadPoolExecutor\n\nfrom langchain.output_parsers.boolean import BooleanOutputParser\n\nfrom kotaemon.base import Document\nfrom kotaemon.llms import BaseLLM, PromptTemplate\n\nfrom .base import BaseReranking\n\nRERANK_PROMPT_TEMPLATE = \"\"\"Given the following question and context,\nreturn YES if the context is relevant to the question and NO if it isn't.\n\n> Question: {question}\n> Context:\n>>>\n{context}\n>>>\n> Relevant (YES / NO):\"\"\"\n\n\nclass LLMReranking(BaseReranking):\n    llm: BaseLLM\n    prompt_template: PromptTemplate = PromptTemplate(template=RERANK_PROMPT_TEMPLATE)\n    top_k: int = 3\n    concurrent: bool = True\n\n    def run(\n        self,\n        documents: list[Document],\n        query: str,\n    ) -> list[Document]:\n        \"\"\"Filter down documents based on their relevance to the query.\"\"\"\n        filtered_docs = []\n        output_parser = BooleanOutputParser()\n\n        if self.concurrent:\n            with ThreadPoolExecutor() as executor:\n                futures = []\n                for doc in documents:\n                    _prompt = self.prompt_template.populate(\n                        question=query, context=doc.get_content()\n                    )\n                    futures.append(executor.submit(lambda: self.llm(_prompt).text))\n\n                results = [future.result() for future in futures]\n        else:\n            results = []\n            for doc in documents:\n                _prompt = self.prompt_template.populate(\n                    question=query, context=doc.get_content()\n                )\n                results.append(self.llm(_prompt).text)\n\n        # use Boolean parser to extract relevancy output from LLM\n        results = [output_parser.parse(result) for result in results]\n        for include_doc, doc in zip(results, documents):\n            if include_doc:\n                filtered_docs.append(doc)\n\n        # prevent returning empty result\n        if len(filtered_docs) == 0:\n            filtered_docs = documents[: self.top_k]\n\n        return filtered_docs\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/indices/rankings/llm_scoring.py",
    "content": "from __future__ import annotations\n\nfrom concurrent.futures import ThreadPoolExecutor\n\nimport numpy as np\nfrom langchain.output_parsers.boolean import BooleanOutputParser\n\nfrom kotaemon.base import Document\n\nfrom .llm import LLMReranking\n\n\nclass LLMScoring(LLMReranking):\n    def run(\n        self,\n        documents: list[Document],\n        query: str,\n    ) -> list[Document]:\n        \"\"\"Filter down documents based on their relevance to the query.\"\"\"\n        filtered_docs: list[Document] = []\n        output_parser = BooleanOutputParser()\n\n        if self.concurrent:\n            with ThreadPoolExecutor() as executor:\n                futures = []\n                for doc in documents:\n                    _prompt = self.prompt_template.populate(\n                        question=query, context=doc.get_content()\n                    )\n                    futures.append(executor.submit(lambda: self.llm(_prompt)))\n\n                results = [future.result() for future in futures]\n        else:\n            results = []\n            for doc in documents:\n                _prompt = self.prompt_template.populate(\n                    question=query, context=doc.get_content()\n                )\n                results.append(self.llm(_prompt))\n\n        for result, doc in zip(results, documents):\n            score = np.exp(np.average(result.logprobs))\n            include_doc = output_parser.parse(result.text)\n            if include_doc:\n                doc.metadata[\"llm_reranking_score\"] = score\n            else:\n                doc.metadata[\"llm_reranking_score\"] = 1 - score\n            filtered_docs.append(doc)\n\n        # prevent returning empty result\n        if len(filtered_docs) == 0:\n            filtered_docs = documents[: self.top_k]\n\n        return filtered_docs\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/indices/rankings/llm_trulens.py",
    "content": "from __future__ import annotations\n\nimport re\nfrom concurrent.futures import ThreadPoolExecutor\nfrom functools import partial\n\nimport tiktoken\n\nfrom kotaemon.base import Document, HumanMessage, SystemMessage\nfrom kotaemon.indices.splitters import TokenSplitter\nfrom kotaemon.llms import BaseLLM, PromptTemplate\n\nfrom .llm import LLMReranking\n\nSYSTEM_PROMPT_TEMPLATE = PromptTemplate(\n    \"\"\"You are a RELEVANCE grader; providing the relevance of the given CONTEXT to the given QUESTION.\n        Respond only as a number from 0 to 10 where 0 is the least relevant and 10 is the most relevant.\n\n        A few additional scoring guidelines:\n\n        - Long CONTEXTS should score equally well as short CONTEXTS.\n\n        - RELEVANCE score should increase as the CONTEXTS provides more RELEVANT context to the QUESTION.\n\n        - RELEVANCE score should increase as the CONTEXTS provides RELEVANT context to more parts of the QUESTION.\n\n        - CONTEXT that is RELEVANT to some of the QUESTION should score of 2, 3 or 4. Higher score indicates more RELEVANCE.\n\n        - CONTEXT that is RELEVANT to most of the QUESTION should get a score of 5, 6, 7 or 8. Higher score indicates more RELEVANCE.\n\n        - CONTEXT that is RELEVANT to the entire QUESTION should get a score of 9 or 10. Higher score indicates more RELEVANCE.\n\n        - CONTEXT must be relevant and helpful for answering the entire QUESTION to get a score of 10.\n\n        - Never elaborate.\"\"\"  # noqa: E501\n)\n\nUSER_PROMPT_TEMPLATE = PromptTemplate(\n    \"\"\"QUESTION: {question}\n\n        CONTEXT: {context}\n\n        RELEVANCE: \"\"\"\n)  # noqa\n\nPATTERN_INTEGER: re.Pattern = re.compile(r\"([+-]?[1-9][0-9]*|0)\")\n\"\"\"Regex that matches integers.\"\"\"\n\nMAX_CONTEXT_LEN = 7500\n\n\ndef validate_rating(rating) -> int:\n    \"\"\"Validate a rating is between 0 and 10.\"\"\"\n\n    if not 0 <= rating <= 10:\n        raise ValueError(\"Rating must be between 0 and 10\")\n\n    return rating\n\n\ndef re_0_10_rating(s: str) -> int:\n    \"\"\"Extract a 0-10 rating from a string.\n\n    If the string does not match an integer or matches an integer outside the\n    0-10 range, raises an error instead. If multiple numbers are found within\n    the expected 0-10 range, the smallest is returned.\n\n    Args:\n        s: String to extract rating from.\n\n    Returns:\n        int: Extracted rating.\n\n    Raises:\n        ParseError: If no integers between 0 and 10 are found in the string.\n    \"\"\"\n\n    matches = PATTERN_INTEGER.findall(s)\n    if not matches:\n        raise AssertionError\n\n    vals = set()\n    for match in matches:\n        try:\n            vals.add(validate_rating(int(match)))\n        except ValueError:\n            pass\n\n    if not vals:\n        raise AssertionError\n\n    # Min to handle cases like \"The rating is 8 out of 10.\"\n    return min(vals)\n\n\nclass LLMTrulensScoring(LLMReranking):\n    llm: BaseLLM\n    system_prompt_template: PromptTemplate = SYSTEM_PROMPT_TEMPLATE\n    user_prompt_template: PromptTemplate = USER_PROMPT_TEMPLATE\n    concurrent: bool = True\n    normalize: float = 10\n    trim_func: TokenSplitter = TokenSplitter.withx(\n        chunk_size=MAX_CONTEXT_LEN,\n        chunk_overlap=0,\n        separator=\" \",\n        tokenizer=partial(\n            tiktoken.encoding_for_model(\"gpt-3.5-turbo\").encode,\n            allowed_special=set(),\n            disallowed_special=\"all\",\n        ),\n    )\n\n    def run(\n        self,\n        documents: list[Document],\n        query: str,\n    ) -> list[Document]:\n        \"\"\"Filter down documents based on their relevance to the query.\"\"\"\n        filtered_docs = []\n\n        documents = sorted(documents, key=lambda doc: doc.get_content())\n        if self.concurrent:\n            with ThreadPoolExecutor() as executor:\n                futures = []\n                for doc in documents:\n                    chunked_doc_content = self.trim_func(\n                        [\n                            Document(content=doc.get_content())\n                            # skip metadata which cause troubles\n                        ]\n                    )[0].text\n\n                    messages = []\n                    messages.append(\n                        SystemMessage(self.system_prompt_template.populate())\n                    )\n                    messages.append(\n                        HumanMessage(\n                            self.user_prompt_template.populate(\n                                question=query, context=chunked_doc_content\n                            )\n                        )\n                    )\n\n                    def llm_call():\n                        return self.llm(messages).text\n\n                    futures.append(executor.submit(llm_call))\n\n                results = [future.result() for future in futures]\n        else:\n            results = []\n            for doc in documents:\n                messages = []\n                messages.append(SystemMessage(self.system_prompt_template.populate()))\n                messages.append(\n                    SystemMessage(\n                        self.user_prompt_template.populate(\n                            question=query, context=doc.get_content()\n                        )\n                    )\n                )\n                results.append(self.llm(messages).text)\n\n        # use Boolean parser to extract relevancy output from LLM\n        results = [\n            (r_idx, float(re_0_10_rating(result)) / self.normalize)\n            for r_idx, result in enumerate(results)\n        ]\n        results.sort(key=lambda x: x[1], reverse=True)\n\n        for r_idx, score in results:\n            doc = documents[r_idx]\n            doc.metadata[\"llm_trulens_score\"] = score\n            filtered_docs.append(doc)\n\n        print(\n            \"LLM rerank scores\",\n            [doc.metadata[\"llm_trulens_score\"] for doc in filtered_docs],\n        )\n\n        return filtered_docs\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/indices/retrievers/__init__.py",
    "content": ""
  },
  {
    "path": "libs/kotaemon/kotaemon/indices/retrievers/jina_web_search.py",
    "content": "import requests\nfrom decouple import config\n\nfrom kotaemon.base import BaseComponent, RetrievedDocument\n\nJINA_API_KEY = config(\"JINA_API_KEY\", default=\"\")\nJINA_URL = config(\"JINA_URL\", default=\"https://r.jina.ai/\")\n\n\nclass WebSearch(BaseComponent):\n    \"\"\"WebSearch component for fetching data from the web\n    using Jina API\n    \"\"\"\n\n    def run(\n        self,\n        text: str,\n        *args,\n        **kwargs,\n    ) -> list[RetrievedDocument]:\n        if JINA_API_KEY == \"\":\n            raise ValueError(\n                \"This feature requires JINA_API_KEY \"\n                \"(get free one from https://jina.ai/reader)\"\n            )\n\n        # setup the request\n        api_url = f\"https://s.jina.ai/{text}\"\n        headers = {\"X-With-Generated-Alt\": \"true\", \"Accept\": \"application/json\"}\n        if JINA_API_KEY:\n            headers[\"Authorization\"] = f\"Bearer {JINA_API_KEY}\"\n\n        response = requests.get(api_url, headers=headers)\n        response.raise_for_status()\n        response_dict = response.json()\n\n        return [\n            RetrievedDocument(\n                text=(\n                    \"###URL: [{url}]({url})\\n\\n\"\n                    \"####{title}\\n\\n\"\n                    \"{description}\\n\"\n                    \"{content}\"\n                ).format(\n                    url=item[\"url\"],\n                    title=item[\"title\"],\n                    description=item[\"description\"],\n                    content=item[\"content\"],\n                ),\n                metadata={\n                    \"file_name\": \"Web search\",\n                    \"type\": \"table\",\n                    \"llm_trulens_score\": 1.0,\n                },\n            )\n            for item in response_dict[\"data\"]\n        ]\n\n    def generate_relevant_scores(self, text, documents: list[RetrievedDocument]):\n        return documents\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/indices/retrievers/tavily_web_search.py",
    "content": "from decouple import config\n\nfrom kotaemon.base import BaseComponent, RetrievedDocument\n\nTAVILY_API_KEY = config(\"TAVILY_API_KEY\", default=\"\")\n\n\nclass WebSearch(BaseComponent):\n    \"\"\"WebSearch component for fetching data from the web\n    using Jina API\n    \"\"\"\n\n    def run(\n        self,\n        text: str,\n        *args,\n        **kwargs,\n    ) -> list[RetrievedDocument]:\n        if TAVILY_API_KEY == \"\":\n            raise ValueError(\n                \"This feature requires TAVILY_API_KEY \"\n                \"(get free one from https://app.tavily.com/)\"\n            )\n\n        try:\n            from tavily import TavilyClient\n        except ImportError:\n            raise ImportError(\n                \"Please install `pip install tavily-python` to use this feature\"\n            )\n\n        tavily_client = TavilyClient(api_key=TAVILY_API_KEY)\n        results = tavily_client.search(\n            query=text,\n            search_depth=\"advanced\",\n        )[\"results\"]\n        context = \"\\n\\n\".join(\n            \"###URL: [{url}]({url})\\n\\n{content}\".format(\n                url=result[\"url\"],\n                content=result[\"content\"],\n            )\n            for result in results\n        )\n\n        return [\n            RetrievedDocument(\n                text=context,\n                metadata={\n                    \"file_name\": \"Web search\",\n                    \"type\": \"table\",\n                    \"llm_trulens_score\": 1.0,\n                },\n            )\n        ]\n\n    def generate_relevant_scores(self, text, documents: list[RetrievedDocument]):\n        return documents\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/indices/splitters/__init__.py",
    "content": "from ..base import DocTransformer, LlamaIndexDocTransformerMixin\n\n\nclass BaseSplitter(DocTransformer):\n    \"\"\"Represent base splitter class\"\"\"\n\n    ...\n\n\nclass TokenSplitter(LlamaIndexDocTransformerMixin, BaseSplitter):\n    def __init__(\n        self,\n        chunk_size: int = 1024,\n        chunk_overlap: int = 20,\n        separator: str = \" \",\n        **params,\n    ):\n        super().__init__(\n            chunk_size=chunk_size,\n            chunk_overlap=chunk_overlap,\n            separator=separator,\n            **params,\n        )\n\n    def _get_li_class(self):\n        from llama_index.core.text_splitter import TokenTextSplitter\n\n        return TokenTextSplitter\n\n\nclass SentenceWindowSplitter(LlamaIndexDocTransformerMixin, BaseSplitter):\n    def __init__(\n        self,\n        window_size: int = 3,\n        window_metadata_key: str = \"window\",\n        original_text_metadata_key: str = \"original_text\",\n        **params,\n    ):\n        super().__init__(\n            window_size=window_size,\n            window_metadata_key=window_metadata_key,\n            original_text_metadata_key=original_text_metadata_key,\n            **params,\n        )\n\n    def _get_li_class(self):\n        from llama_index.core.node_parser import SentenceWindowNodeParser\n\n        return SentenceWindowNodeParser\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/indices/vectorindex.py",
    "content": "from __future__ import annotations\n\nimport threading\nimport uuid\nfrom pathlib import Path\nfrom typing import Optional, Sequence, cast\n\nfrom theflow.settings import settings as flowsettings\n\nfrom kotaemon.base import BaseComponent, Document, RetrievedDocument\nfrom kotaemon.embeddings import BaseEmbeddings\nfrom kotaemon.storages import BaseDocumentStore, BaseVectorStore\n\nfrom .base import BaseIndexing, BaseRetrieval\nfrom .rankings import BaseReranking, LLMReranking\n\nVECTOR_STORE_FNAME = \"vectorstore\"\nDOC_STORE_FNAME = \"docstore\"\n\n\nclass VectorIndexing(BaseIndexing):\n    \"\"\"Ingest the document, run through the embedding, and store the embedding in a\n    vector store.\n\n    This pipeline supports the following set of inputs:\n        - List of documents\n        - List of texts\n    \"\"\"\n\n    cache_dir: Optional[str] = getattr(flowsettings, \"KH_CHUNKS_OUTPUT_DIR\", None)\n    vector_store: BaseVectorStore\n    doc_store: Optional[BaseDocumentStore] = None\n    embedding: BaseEmbeddings\n    count_: int = 0\n\n    def to_retrieval_pipeline(self, *args, **kwargs):\n        \"\"\"Convert the indexing pipeline to a retrieval pipeline\"\"\"\n        return VectorRetrieval(\n            vector_store=self.vector_store,\n            doc_store=self.doc_store,\n            embedding=self.embedding,\n            **kwargs,\n        )\n\n    def write_chunk_to_file(self, docs: list[Document]):\n        # save the chunks content into markdown format\n        if self.cache_dir:\n            file_name = docs[0].metadata.get(\"file_name\")\n            if not file_name:\n                return\n\n            file_name = Path(file_name)\n            for i in range(len(docs)):\n                markdown_content = \"\"\n                if \"page_label\" in docs[i].metadata:\n                    page_label = str(docs[i].metadata[\"page_label\"])\n                    markdown_content += f\"Page label: {page_label}\"\n                if \"file_name\" in docs[i].metadata:\n                    filename = docs[i].metadata[\"file_name\"]\n                    markdown_content += f\"\\nFile name: {filename}\"\n                if \"section\" in docs[i].metadata:\n                    section = docs[i].metadata[\"section\"]\n                    markdown_content += f\"\\nSection: {section}\"\n                if \"type\" in docs[i].metadata:\n                    if docs[i].metadata[\"type\"] == \"image\":\n                        image_origin = docs[i].metadata[\"image_origin\"]\n                        image_origin = f'<p><img src=\"{image_origin}\"></p>'\n                        markdown_content += f\"\\nImage origin: {image_origin}\"\n                if docs[i].text:\n                    markdown_content += f\"\\ntext:\\n{docs[i].text}\"\n\n                with open(\n                    Path(self.cache_dir) / f\"{file_name.stem}_{self.count_+i}.md\",\n                    \"w\",\n                    encoding=\"utf-8\",\n                ) as f:\n                    f.write(markdown_content)\n\n    def add_to_docstore(self, docs: list[Document]):\n        if self.doc_store:\n            print(\"Adding documents to doc store\")\n            self.doc_store.add(docs)\n\n    def add_to_vectorstore(self, docs: list[Document]):\n        # in case we want to skip embedding\n        if self.vector_store:\n            print(f\"Getting embeddings for {len(docs)} nodes\")\n            embeddings = self.embedding(docs)\n            print(\"Adding embeddings to vector store\")\n            self.vector_store.add(\n                embeddings=embeddings,\n                ids=[t.doc_id for t in docs],\n            )\n\n    def run(self, text: str | list[str] | Document | list[Document]):\n        input_: list[Document] = []\n        if not isinstance(text, list):\n            text = [text]\n\n        for item in cast(list, text):\n            if isinstance(item, str):\n                input_.append(Document(text=item, id_=str(uuid.uuid4())))\n            elif isinstance(item, Document):\n                input_.append(item)\n            else:\n                raise ValueError(\n                    f\"Invalid input type {type(item)}, should be str or Document\"\n                )\n\n        self.add_to_vectorstore(input_)\n        self.add_to_docstore(input_)\n        self.write_chunk_to_file(input_)\n        self.count_ += len(input_)\n\n\nclass VectorRetrieval(BaseRetrieval):\n    \"\"\"Retrieve list of documents from vector store\"\"\"\n\n    vector_store: BaseVectorStore\n    doc_store: Optional[BaseDocumentStore] = None\n    embedding: BaseEmbeddings\n    rerankers: Sequence[BaseReranking] = []\n    top_k: int = 5\n    first_round_top_k_mult: int = 10\n    retrieval_mode: str = \"hybrid\"  # vector, text, hybrid\n\n    def _filter_docs(\n        self, documents: list[RetrievedDocument], top_k: int | None = None\n    ):\n        if top_k:\n            documents = documents[:top_k]\n        return documents\n\n    def run(\n        self, text: str | Document, top_k: Optional[int] = None, **kwargs\n    ) -> list[RetrievedDocument]:\n        \"\"\"Retrieve a list of documents from vector store\n\n        Args:\n            text: the text to retrieve similar documents\n            top_k: number of top similar documents to return\n\n        Returns:\n            list[RetrievedDocument]: list of retrieved documents\n        \"\"\"\n        if top_k is None:\n            top_k = self.top_k\n\n        do_extend = kwargs.pop(\"do_extend\", False)\n        thumbnail_count = kwargs.pop(\"thumbnail_count\", 3)\n\n        if do_extend:\n            top_k_first_round = top_k * self.first_round_top_k_mult\n        else:\n            top_k_first_round = top_k\n\n        if self.doc_store is None:\n            raise ValueError(\n                \"doc_store is not provided. Please provide a doc_store to \"\n                \"retrieve the documents\"\n            )\n\n        result: list[RetrievedDocument] = []\n        # TODO: should declare scope directly in the run params\n        scope = kwargs.pop(\"scope\", None)\n        emb: list[float]\n\n        if self.retrieval_mode == \"vector\":\n            emb = self.embedding(text)[0].embedding\n            _, scores, ids = self.vector_store.query(\n                embedding=emb, top_k=top_k_first_round, doc_ids=scope, **kwargs\n            )\n            docs = self.doc_store.get(ids)\n            result = [\n                RetrievedDocument(**doc.to_dict(), score=score)\n                for doc, score in zip(docs, scores)\n            ]\n        elif self.retrieval_mode == \"text\":\n            query = text.text if isinstance(text, Document) else text\n            docs = []\n            if scope:\n                docs = self.doc_store.query(\n                    query, top_k=top_k_first_round, doc_ids=scope\n                )\n            result = [RetrievedDocument(**doc.to_dict(), score=-1.0) for doc in docs]\n        elif self.retrieval_mode == \"hybrid\":\n            # similarity search section\n            emb = self.embedding(text)[0].embedding\n            vs_docs: list[RetrievedDocument] = []\n            vs_ids: list[str] = []\n            vs_scores: list[float] = []\n\n            def query_vectorstore():\n                nonlocal vs_docs\n                nonlocal vs_scores\n                nonlocal vs_ids\n\n                assert self.doc_store is not None\n                _, vs_scores, vs_ids = self.vector_store.query(\n                    embedding=emb, top_k=top_k_first_round, doc_ids=scope, **kwargs\n                )\n                if vs_ids:\n                    vs_docs = self.doc_store.get(vs_ids)\n\n            # full-text search section\n            ds_docs: list[RetrievedDocument] = []\n\n            def query_docstore():\n                nonlocal ds_docs\n\n                assert self.doc_store is not None\n                query = text.text if isinstance(text, Document) else text\n                if scope:\n                    ds_docs = self.doc_store.query(\n                        query, top_k=top_k_first_round, doc_ids=scope\n                    )\n\n            vs_query_thread = threading.Thread(target=query_vectorstore)\n            ds_query_thread = threading.Thread(target=query_docstore)\n\n            vs_query_thread.start()\n            ds_query_thread.start()\n\n            vs_query_thread.join()\n            ds_query_thread.join()\n\n            result = [\n                RetrievedDocument(**doc.to_dict(), score=-1.0)\n                for doc in ds_docs\n                if doc not in vs_ids\n            ]\n            result += [\n                RetrievedDocument(**doc.to_dict(), score=score)\n                for doc, score in zip(vs_docs, vs_scores)\n            ]\n            print(f\"Got {len(vs_docs)} from vectorstore\")\n            print(f\"Got {len(ds_docs)} from docstore\")\n\n        # use additional reranker to re-order the document list\n        if self.rerankers and text:\n            for reranker in self.rerankers:\n                # if reranker is LLMReranking, limit the document with top_k items only\n                if isinstance(reranker, LLMReranking):\n                    result = self._filter_docs(result, top_k=top_k)\n                result = reranker.run(documents=result, query=text)\n\n        result = self._filter_docs(result, top_k=top_k)\n        print(f\"Got raw {len(result)} retrieved documents\")\n\n        # add page thumbnails to the result if exists\n        thumbnail_doc_ids: set[str] = set()\n        # we should copy the text from retrieved text chunk\n        # to the thumbnail to get relevant LLM score correctly\n        text_thumbnail_docs: dict[str, RetrievedDocument] = {}\n\n        non_thumbnail_docs = []\n        raw_thumbnail_docs = []\n        for doc in result:\n            if doc.metadata.get(\"type\") == \"thumbnail\":\n                # change type to image to display on UI\n                doc.metadata[\"type\"] = \"image\"\n                raw_thumbnail_docs.append(doc)\n                continue\n            if (\n                \"thumbnail_doc_id\" in doc.metadata\n                and len(thumbnail_doc_ids) < thumbnail_count\n            ):\n                thumbnail_id = doc.metadata[\"thumbnail_doc_id\"]\n                thumbnail_doc_ids.add(thumbnail_id)\n                text_thumbnail_docs[thumbnail_id] = doc\n            else:\n                non_thumbnail_docs.append(doc)\n\n        linked_thumbnail_docs = self.doc_store.get(list(thumbnail_doc_ids))\n        print(\n            \"thumbnail docs\",\n            len(linked_thumbnail_docs),\n            \"non-thumbnail docs\",\n            len(non_thumbnail_docs),\n            \"raw-thumbnail docs\",\n            len(raw_thumbnail_docs),\n        )\n        additional_docs = []\n\n        for thumbnail_doc in linked_thumbnail_docs:\n            text_doc = text_thumbnail_docs[thumbnail_doc.doc_id]\n            doc_dict = thumbnail_doc.to_dict()\n            doc_dict[\"_id\"] = text_doc.doc_id\n            doc_dict[\"content\"] = text_doc.content\n            doc_dict[\"metadata\"][\"type\"] = \"image\"\n            for key in text_doc.metadata:\n                if key not in doc_dict[\"metadata\"]:\n                    doc_dict[\"metadata\"][key] = text_doc.metadata[key]\n\n            additional_docs.append(RetrievedDocument(**doc_dict, score=text_doc.score))\n\n        result = additional_docs + non_thumbnail_docs\n\n        if not result:\n            # return output from raw retrieved thumbnails\n            result = self._filter_docs(raw_thumbnail_docs, top_k=thumbnail_count)\n\n        return result\n\n\nclass TextVectorQA(BaseComponent):\n    retrieving_pipeline: BaseRetrieval\n    qa_pipeline: BaseComponent\n\n    def run(self, question, **kwargs):\n        retrieved_documents = self.retrieving_pipeline(question, **kwargs)\n        return self.qa_pipeline(question, retrieved_documents, **kwargs)\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/llms/__init__.py",
    "content": "from kotaemon.base.schema import AIMessage, BaseMessage, HumanMessage, SystemMessage\n\nfrom .base import BaseLLM\nfrom .branching import GatedBranchingPipeline, SimpleBranchingPipeline\nfrom .chats import (\n    AzureChatOpenAI,\n    ChatLLM,\n    ChatOpenAI,\n    EndpointChatLLM,\n    LCAnthropicChat,\n    LCAzureChatOpenAI,\n    LCChatOpenAI,\n    LCCohereChat,\n    LCGeminiChat,\n    LCOllamaChat,\n    LlamaCppChat,\n    StructuredOutputChatOpenAI,\n)\nfrom .completions import LLM, AzureOpenAI, LlamaCpp, OpenAI\nfrom .cot import ManualSequentialChainOfThought, Thought\nfrom .linear import GatedLinearPipeline, SimpleLinearPipeline\nfrom .prompts import BasePromptComponent, PromptTemplate\n\n__all__ = [\n    \"BaseLLM\",\n    # chat-specific components\n    \"ChatLLM\",\n    \"EndpointChatLLM\",\n    \"BaseMessage\",\n    \"HumanMessage\",\n    \"AIMessage\",\n    \"SystemMessage\",\n    \"AzureChatOpenAI\",\n    \"ChatOpenAI\",\n    \"StructuredOutputChatOpenAI\",\n    \"LCAnthropicChat\",\n    \"LCGeminiChat\",\n    \"LCCohereChat\",\n    \"LCOllamaChat\",\n    \"LCAzureChatOpenAI\",\n    \"LCChatOpenAI\",\n    \"LlamaCppChat\",\n    # completion-specific components\n    \"LLM\",\n    \"OpenAI\",\n    \"AzureOpenAI\",\n    \"LlamaCpp\",\n    # prompt-specific components\n    \"BasePromptComponent\",\n    \"PromptTemplate\",\n    # strategies\n    \"SimpleLinearPipeline\",\n    \"GatedLinearPipeline\",\n    \"SimpleBranchingPipeline\",\n    \"GatedBranchingPipeline\",\n    # chain-of-thoughts\n    \"ManualSequentialChainOfThought\",\n    \"Thought\",\n]\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/llms/base.py",
    "content": "from typing import AsyncGenerator, Iterator\n\nfrom langchain_core.language_models.base import BaseLanguageModel\n\nfrom kotaemon.base import BaseComponent, LLMInterface\n\n\nclass BaseLLM(BaseComponent):\n    def to_langchain_format(self) -> BaseLanguageModel:\n        raise NotImplementedError\n\n    def invoke(self, *args, **kwargs) -> LLMInterface:\n        raise NotImplementedError\n\n    async def ainvoke(self, *args, **kwargs) -> LLMInterface:\n        raise NotImplementedError\n\n    def stream(self, *args, **kwargs) -> Iterator[LLMInterface]:\n        raise NotImplementedError\n\n    def astream(self, *args, **kwargs) -> AsyncGenerator[LLMInterface, None]:\n        raise NotImplementedError\n\n    def run(self, *args, **kwargs):\n        return self.invoke(*args, **kwargs)\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/llms/branching.py",
    "content": "from typing import List, Optional\n\nfrom kotaemon.base import BaseComponent, Document, Param\n\nfrom .linear import GatedLinearPipeline\n\n\nclass SimpleBranchingPipeline(BaseComponent):\n    \"\"\"\n    A simple branching pipeline for executing multiple branches.\n\n    Attributes:\n        branches (List[BaseComponent]): The list of branches to be executed.\n\n    Example:\n        ```python\n        from kotaemon.llms import (\n            LCAzureChatOpenAI,\n            BasePromptComponent,\n            GatedLinearPipeline,\n        )\n        from kotaemon.parsers import RegexExtractor\n\n        def identity(x):\n            return x\n\n        pipeline = SimpleBranchingPipeline()\n        llm = LCAzureChatOpenAI(\n            openai_api_base=\"your openai api base\",\n            openai_api_key=\"your openai api key\",\n            openai_api_version=\"your openai api version\",\n            deployment_name=\"dummy-q2-gpt35\",\n            temperature=0,\n            request_timeout=600,\n        )\n\n        for i in range(3):\n            pipeline.add_branch(\n                GatedLinearPipeline(\n                    prompt=BasePromptComponent(template=f\"what is {i} in Japanese ?\"),\n                    condition=RegexExtractor(pattern=f\"{i}\"),\n                    llm=llm,\n                    post_processor=identity,\n                )\n            )\n        print(pipeline(condition_text=\"1\"))\n        print(pipeline(condition_text=\"2\"))\n        print(pipeline(condition_text=\"12\"))\n        ```\n    \"\"\"\n\n    branches: List[BaseComponent] = Param(default_callback=lambda *_: [])\n\n    def add_branch(self, component: BaseComponent):\n        \"\"\"\n        Add a new branch to the pipeline.\n\n        Args:\n            component (BaseComponent): The branch component to be added.\n        \"\"\"\n        self.branches.append(component)\n\n    def run(self, **prompt_kwargs):\n        \"\"\"\n        Execute the pipeline by running each branch and return the outputs as a list.\n\n        Args:\n            **prompt_kwargs: Keyword arguments for the branches.\n\n        Returns:\n            List: The outputs of each branch as a list.\n        \"\"\"\n        output = []\n        for i, branch in enumerate(self.branches):\n            self._prepare_child(branch, name=f\"branch-{i}\")\n            output.append(branch(**prompt_kwargs))\n\n        return output\n\n\nclass GatedBranchingPipeline(SimpleBranchingPipeline):\n    \"\"\"\n    A simple gated branching pipeline for executing multiple branches based on a\n        condition.\n\n    This class extends the SimpleBranchingPipeline class and adds the ability to execute\n        the branches until a branch returns a non-empty output based on a condition.\n\n    Attributes:\n        branches (List[BaseComponent]): The list of branches to be executed.\n\n    Example:\n        ```python\n        from kotaemon.llms import (\n            LCAzureChatOpenAI,\n            BasePromptComponent,\n            GatedLinearPipeline,\n        )\n        from kotaemon.parsers import RegexExtractor\n\n        def identity(x):\n            return x\n\n        pipeline = GatedBranchingPipeline()\n        llm = LCAzureChatOpenAI(\n            openai_api_base=\"your openai api base\",\n            openai_api_key=\"your openai api key\",\n            openai_api_version=\"your openai api version\",\n            deployment_name=\"dummy-q2-gpt35\",\n            temperature=0,\n            request_timeout=600,\n        )\n\n        for i in range(3):\n            pipeline.add_branch(\n                GatedLinearPipeline(\n                    prompt=BasePromptComponent(template=f\"what is {i} in Japanese ?\"),\n                    condition=RegexExtractor(pattern=f\"{i}\"),\n                    llm=llm,\n                    post_processor=identity,\n                )\n            )\n        print(pipeline(condition_text=\"1\"))\n        print(pipeline(condition_text=\"2\"))\n        ```\n    \"\"\"\n\n    def run(self, *, condition_text: Optional[str] = None, **prompt_kwargs):\n        \"\"\"\n        Execute the pipeline by running each branch and return the output of the first\n            branch that returns a non-empty output based on the provided condition.\n\n        Args:\n            condition_text (str): The condition text to evaluate for each branch.\n                Default to None.\n            **prompt_kwargs: Keyword arguments for the branches.\n\n        Returns:\n            Union[OutputType, None]: The output of the first branch that satisfies the\n            condition, or None if no branch satisfies the condition.\n\n        Raises:\n            ValueError: If condition_text is None\n        \"\"\"\n        if condition_text is None:\n            raise ValueError(\"`condition_text` must be provided.\")\n\n        for i, branch in enumerate(self.branches):\n            self._prepare_child(branch, name=f\"branch-{i}\")\n            output = branch(condition_text=condition_text, **prompt_kwargs)\n            if output:\n                return output\n\n        return Document(None)\n\n\nif __name__ == \"__main__\":\n    import dotenv\n\n    from kotaemon.llms import BasePromptComponent, LCAzureChatOpenAI\n    from kotaemon.parsers import RegexExtractor\n\n    def identity(x):\n        return x\n\n    secrets = dotenv.dotenv_values(\".env\")\n\n    pipeline = GatedBranchingPipeline()\n    llm = LCAzureChatOpenAI(\n        openai_api_base=secrets.get(\"OPENAI_API_BASE\", \"\"),\n        openai_api_key=secrets.get(\"OPENAI_API_KEY\", \"\"),\n        openai_api_version=secrets.get(\"OPENAI_API_VERSION\", \"\"),\n        deployment_name=\"dummy-q2-gpt35\",\n        temperature=0,\n        request_timeout=600,\n    )\n\n    for i in range(3):\n        pipeline.add_branch(\n            GatedLinearPipeline(\n                prompt=BasePromptComponent(template=f\"what is {i} in Japanese ?\"),\n                condition=RegexExtractor(pattern=f\"{i}\"),\n                llm=llm,\n                post_processor=identity,\n            )\n        )\n    pipeline(condition_text=\"1\")\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/llms/chats/__init__.py",
    "content": "from .base import ChatLLM\nfrom .endpoint_based import EndpointChatLLM\nfrom .langchain_based import (\n    LCAnthropicChat,\n    LCAzureChatOpenAI,\n    LCChatMixin,\n    LCChatOpenAI,\n    LCCohereChat,\n    LCGeminiChat,\n    LCOllamaChat,\n)\nfrom .llamacpp import LlamaCppChat\nfrom .openai import AzureChatOpenAI, ChatOpenAI, StructuredOutputChatOpenAI\n\n__all__ = [\n    \"ChatOpenAI\",\n    \"AzureChatOpenAI\",\n    \"ChatLLM\",\n    \"EndpointChatLLM\",\n    \"ChatOpenAI\",\n    \"StructuredOutputChatOpenAI\",\n    \"LCAnthropicChat\",\n    \"LCGeminiChat\",\n    \"LCCohereChat\",\n    \"LCOllamaChat\",\n    \"LCChatOpenAI\",\n    \"LCAzureChatOpenAI\",\n    \"LCChatMixin\",\n    \"LlamaCppChat\",\n]\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/llms/chats/base.py",
    "content": "from __future__ import annotations\n\nimport logging\n\nfrom kotaemon.base import BaseComponent\nfrom kotaemon.llms.base import BaseLLM\n\nlogger = logging.getLogger(__name__)\n\n\nclass ChatLLM(BaseLLM):\n    def flow(self):\n        if self.inflow is None:\n            raise ValueError(\"No inflow provided.\")\n\n        if not isinstance(self.inflow, BaseComponent):\n            raise ValueError(\n                f\"inflow must be a BaseComponent, found {type(self.inflow)}\"\n            )\n\n        text = self.inflow.flow().text\n        return self.__call__(text)\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/llms/chats/endpoint_based.py",
    "content": "import requests\n\nfrom kotaemon.base import (\n    AIMessage,\n    BaseMessage,\n    HumanMessage,\n    LLMInterface,\n    Param,\n    SystemMessage,\n)\n\nfrom .base import ChatLLM\n\n\nclass EndpointChatLLM(ChatLLM):\n    \"\"\"\n    A ChatLLM that uses an endpoint to generate responses. This expects an OpenAI API\n    compatible endpoint.\n\n    Attributes:\n        endpoint_url (str): The url of a OpenAI API compatible endpoint.\n    \"\"\"\n\n    endpoint_url: str = Param(\n        help=\"URL of the OpenAI API compatible endpoint\", required=True\n    )\n\n    def run(\n        self, messages: str | BaseMessage | list[BaseMessage], **kwargs\n    ) -> LLMInterface:\n        \"\"\"\n        Generate response from messages\n        Args:\n            messages (str | BaseMessage | list[BaseMessage]): history of messages to\n                generate response from\n            **kwargs: additional arguments to pass to the OpenAI API\n        Returns:\n            LLMInterface: generated response\n        \"\"\"\n        if isinstance(messages, str):\n            input_ = [HumanMessage(content=messages)]\n        elif isinstance(messages, BaseMessage):\n            input_ = [messages]\n        else:\n            input_ = messages\n\n        def decide_role(message: BaseMessage):\n            if isinstance(message, SystemMessage):\n                return \"system\"\n            elif isinstance(message, AIMessage):\n                return \"assistant\"\n            else:\n                return \"user\"\n\n        request_json = {\n            \"messages\": [{\"content\": m.text, \"role\": decide_role(m)} for m in input_]\n        }\n\n        response = requests.post(self.endpoint_url, json=request_json).json()\n\n        content = \"\"\n        candidates = []\n        if response[\"choices\"]:\n            candidates = [\n                each[\"message\"][\"content\"]\n                for each in response[\"choices\"]\n                if each[\"message\"][\"content\"]\n            ]\n            content = candidates[0]\n\n        return LLMInterface(\n            content=content,\n            candidates=candidates,\n            completion_tokens=response[\"usage\"][\"completion_tokens\"],\n            total_tokens=response[\"usage\"][\"total_tokens\"],\n            prompt_tokens=response[\"usage\"][\"prompt_tokens\"],\n        )\n\n    def invoke(\n        self, messages: str | BaseMessage | list[BaseMessage], **kwargs\n    ) -> LLMInterface:\n        \"\"\"Same as run\"\"\"\n        return self.run(messages, **kwargs)\n\n    async def ainvoke(\n        self, messages: str | BaseMessage | list[BaseMessage], **kwargs\n    ) -> LLMInterface:\n        return self.invoke(messages, **kwargs)\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/llms/chats/langchain_based.py",
    "content": "from __future__ import annotations\n\nimport logging\nfrom typing import AsyncGenerator, Iterator\n\nfrom kotaemon.base import BaseMessage, HumanMessage, LLMInterface, Param\n\nfrom .base import ChatLLM\n\nlogger = logging.getLogger(__name__)\n\n\nclass LCChatMixin:\n    \"\"\"Mixin for langchain based chat models\"\"\"\n\n    def _get_lc_class(self):\n        raise NotImplementedError(\n            \"Please return the relevant Langchain class in in _get_lc_class\"\n        )\n\n    def _get_tool_call_kwargs(self):\n        return {}\n\n    def __init__(self, stream: bool = False, **params):\n        self._lc_class = self._get_lc_class()\n        self._obj = self._lc_class(**params)\n        self._kwargs: dict = params\n        self._stream = stream\n\n        super().__init__()\n\n    def run(\n        self, messages: str | BaseMessage | list[BaseMessage], **kwargs\n    ) -> LLMInterface:\n        if self._stream:\n            return self.stream(messages, **kwargs)  # type: ignore\n        return self.invoke(messages, **kwargs)\n\n    def prepare_message(self, messages: str | BaseMessage | list[BaseMessage]):\n        input_: list[BaseMessage] = []\n\n        if isinstance(messages, str):\n            input_ = [HumanMessage(content=messages)]\n        elif isinstance(messages, BaseMessage):\n            input_ = [messages]\n        else:\n            input_ = messages\n\n        return input_\n\n    def prepare_response(self, pred):\n        all_text = [each.text for each in pred.generations[0]]\n        all_messages = [each.message for each in pred.generations[0]]\n\n        completion_tokens, total_tokens, prompt_tokens = 0, 0, 0\n        try:\n            if pred.llm_output is not None:\n                completion_tokens = pred.llm_output[\"token_usage\"][\"completion_tokens\"]\n                total_tokens = pred.llm_output[\"token_usage\"][\"total_tokens\"]\n                prompt_tokens = pred.llm_output[\"token_usage\"][\"prompt_tokens\"]\n        except Exception:\n            pass\n\n        return LLMInterface(\n            text=all_text[0] if len(all_text) > 0 else \"\",\n            candidates=all_text,\n            completion_tokens=completion_tokens,\n            total_tokens=total_tokens,\n            prompt_tokens=prompt_tokens,\n            messages=all_messages,\n            logits=[],\n        )\n\n    def invoke(\n        self, messages: str | BaseMessage | list[BaseMessage], **kwargs\n    ) -> LLMInterface:\n        \"\"\"Generate response from messages\n\n        Args:\n            messages: history of messages to generate response from\n            **kwargs: additional arguments to pass to the langchain chat model\n\n        Returns:\n            LLMInterface: generated response\n        \"\"\"\n        input_ = self.prepare_message(messages)\n\n        if \"tools_pydantic\" in kwargs:\n            tools = kwargs.pop(\n                \"tools_pydantic\",\n            )\n            lc_tool_call = self._obj.bind_tools(tools)\n            pred = lc_tool_call.invoke(\n                input_,\n                **self._get_tool_call_kwargs(),\n            )\n            if pred.tool_calls:\n                tool_calls = pred.tool_calls\n            else:\n                tool_calls = pred.additional_kwargs.get(\"tool_calls\", [])\n\n            output = LLMInterface(\n                content=\"\",\n                additional_kwargs={\"tool_calls\": tool_calls},\n            )\n        else:\n            pred = self._obj.generate(messages=[input_], **kwargs)\n            output = self.prepare_response(pred)\n\n        return output\n\n    async def ainvoke(\n        self, messages: str | BaseMessage | list[BaseMessage], **kwargs\n    ) -> LLMInterface:\n        input_ = self.prepare_message(messages)\n        pred = await self._obj.agenerate(messages=[input_], **kwargs)\n        return self.prepare_response(pred)\n\n    def stream(\n        self, messages: str | BaseMessage | list[BaseMessage], **kwargs\n    ) -> Iterator[LLMInterface]:\n        for response in self._obj.stream(input=messages, **kwargs):\n            yield LLMInterface(content=response.content)\n\n    async def astream(\n        self, messages: str | BaseMessage | list[BaseMessage], **kwargs\n    ) -> AsyncGenerator[LLMInterface, None]:\n        async for response in self._obj.astream(input=messages, **kwargs):\n            yield LLMInterface(content=response.content)\n\n    def to_langchain_format(self):\n        return self._obj\n\n    def __repr__(self):\n        kwargs = []\n        for key, value_obj in self._kwargs.items():\n            value = repr(value_obj)\n            kwargs.append(f\"{key}={value}\")\n        kwargs_repr = \", \".join(kwargs)\n        return f\"{self.__class__.__name__}({kwargs_repr})\"\n\n    def __str__(self):\n        kwargs = []\n        for key, value_obj in self._kwargs.items():\n            value = str(value_obj)\n            if len(value) > 20:\n                value = f\"{value[:15]}...\"\n            kwargs.append(f\"{key}={value}\")\n        kwargs_repr = \", \".join(kwargs)\n        return f\"{self.__class__.__name__}({kwargs_repr})\"\n\n    def __setattr__(self, name, value):\n        if name == \"_lc_class\":\n            return super().__setattr__(name, value)\n\n        if name in self._lc_class.__fields__:\n            self._kwargs[name] = value\n            self._obj = self._lc_class(**self._kwargs)\n        else:\n            super().__setattr__(name, value)\n\n    def __getattr__(self, name):\n        if name in self._kwargs:\n            return self._kwargs[name]\n        return getattr(self._obj, name)\n\n    def dump(self, *args, **kwargs):\n        from theflow.utils.modules import serialize\n\n        params = {key: serialize(value) for key, value in self._kwargs.items()}\n        return {\n            \"__type__\": f\"{self.__module__}.{self.__class__.__qualname__}\",\n            **params,\n        }\n\n    def specs(self, path: str):\n        path = path.strip(\".\")\n        if \".\" in path:\n            raise ValueError(\"path should not contain '.'\")\n\n        if path in self._lc_class.__fields__:\n            return {\n                \"__type__\": \"theflow.base.ParamAttr\",\n                \"refresh_on_set\": True,\n                \"strict_type\": True,\n            }\n\n        raise ValueError(f\"Invalid param {path}\")\n\n\nclass LCChatOpenAI(LCChatMixin, ChatLLM):  # type: ignore\n    def __init__(\n        self,\n        openai_api_base: str | None = None,\n        openai_api_key: str | None = None,\n        model: str | None = None,\n        temperature: float = 0.7,\n        request_timeout: float | None = None,\n        **params,\n    ):\n        super().__init__(\n            openai_api_base=openai_api_base,\n            openai_api_key=openai_api_key,\n            model=model,\n            temperature=temperature,\n            request_timeout=request_timeout,\n            **params,\n        )\n\n    def _get_lc_class(self):\n        try:\n            from langchain_openai import ChatOpenAI\n        except ImportError:\n            from langchain.chat_models import ChatOpenAI\n\n        return ChatOpenAI\n\n\nclass LCAzureChatOpenAI(LCChatMixin, ChatLLM):  # type: ignore\n    def __init__(\n        self,\n        azure_endpoint: str | None = None,\n        openai_api_key: str | None = None,\n        openai_api_version: str = \"\",\n        deployment_name: str | None = None,\n        temperature: float = 0.7,\n        request_timeout: float | None = None,\n        **params,\n    ):\n        super().__init__(\n            azure_endpoint=azure_endpoint,\n            openai_api_key=openai_api_key,\n            openai_api_version=openai_api_version,\n            deployment_name=deployment_name,\n            temperature=temperature,\n            request_timeout=request_timeout,\n            **params,\n        )\n\n    def _get_lc_class(self):\n        try:\n            from langchain_openai import AzureChatOpenAI\n        except ImportError:\n            from langchain.chat_models import AzureChatOpenAI\n\n        return AzureChatOpenAI\n\n\nclass LCAnthropicChat(LCChatMixin, ChatLLM):  # type: ignore\n    api_key: str = Param(\n        help=\"API key (https://console.anthropic.com/settings/keys)\", required=True\n    )\n    model_name: str = Param(\n        help=(\n            \"Model name to use \"\n            \"(https://docs.anthropic.com/en/docs/about-claude/models)\"\n        ),\n        required=True,\n    )\n\n    def _get_tool_call_kwargs(self):\n        return {\"tool_choice\": {\"type\": \"any\"}}\n\n    def __init__(\n        self,\n        api_key: str | None = None,\n        model_name: str | None = None,\n        temperature: float = 0.7,\n        **params,\n    ):\n        super().__init__(\n            api_key=api_key,\n            model_name=model_name,\n            temperature=temperature,\n            **params,\n        )\n\n    def _get_lc_class(self):\n        try:\n            from langchain_anthropic import ChatAnthropic\n        except ImportError:\n            raise ImportError(\"Please install langchain-anthropic\")\n\n        return ChatAnthropic\n\n\nclass LCGeminiChat(LCChatMixin, ChatLLM):  # type: ignore\n    api_key: str = Param(\n        help=\"API key (https://aistudio.google.com/app/apikey)\", required=True\n    )\n    model_name: str = Param(\n        help=(\n            \"Model name to use (https://cloud.google\"\n            \".com/vertex-ai/generative-ai/docs/learn/models)\"\n        ),\n        required=True,\n    )\n\n    def _get_tool_call_kwargs(self):\n        return {\n            \"tool_config\": {\n                \"function_calling_config\": {\n                    \"mode\": \"ANY\",\n                }\n            }\n        }\n\n    def __init__(\n        self,\n        api_key: str | None = None,\n        model_name: str | None = None,\n        temperature: float = 0.7,\n        **params,\n    ):\n        super().__init__(\n            google_api_key=api_key,\n            model=model_name,\n            temperature=temperature,\n            **params,\n        )\n\n    def _get_lc_class(self):\n        try:\n            from langchain_google_genai import ChatGoogleGenerativeAI\n        except ImportError:\n            raise ImportError(\"Please install langchain-google-genai\")\n\n        return ChatGoogleGenerativeAI\n\n\nclass LCCohereChat(LCChatMixin, ChatLLM):  # type: ignore\n    api_key: str = Param(\n        help=\"API key (https://dashboard.cohere.com/api-keys)\", required=True\n    )\n    model_name: str = Param(\n        help=(\"Model name to use (https://dashboard.cohere.com/playground/chat)\"),\n        required=True,\n    )\n\n    def __init__(\n        self,\n        api_key: str | None = None,\n        model_name: str | None = None,\n        temperature: float = 0.7,\n        **params,\n    ):\n        super().__init__(\n            cohere_api_key=api_key,\n            model_name=model_name,\n            temperature=temperature,\n            **params,\n        )\n\n    def _get_lc_class(self):\n        try:\n            from langchain_cohere import ChatCohere\n        except ImportError:\n            raise ImportError(\"Please install langchain-cohere\")\n\n        return ChatCohere\n\n\nclass LCOllamaChat(LCChatMixin, ChatLLM):  # type: ignore\n    base_url: str = Param(\n        help=\"Base Ollama URL. (default: http://localhost:11434/api/)\",  # noqa\n        required=True,\n    )\n    model: str = Param(\n        help=\"Model name to use (https://ollama.com/library)\",\n        required=True,\n    )\n    num_ctx: int = Param(\n        help=\"The size of the context window (default: 8192)\",\n        required=True,\n    )\n\n    def __init__(\n        self,\n        model: str | None = None,\n        base_url: str | None = None,\n        num_ctx: int | None = None,\n        **params,\n    ):\n        super().__init__(\n            base_url=base_url,\n            model=model,\n            num_ctx=num_ctx,\n            **params,\n        )\n\n    def _get_lc_class(self):\n        try:\n            from langchain_ollama import ChatOllama\n        except ImportError:\n            raise ImportError(\"Please install langchain-ollama\")\n\n        return ChatOllama\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/llms/chats/llamacpp.py",
    "content": "from typing import TYPE_CHECKING, Iterator, Optional, cast\n\nfrom kotaemon.base import BaseMessage, HumanMessage, LLMInterface, Param\n\nfrom .base import ChatLLM\n\nif TYPE_CHECKING:\n    from llama_cpp import CreateChatCompletionResponse as CCCR\n    from llama_cpp import Llama\n\n\nclass LlamaCppChat(ChatLLM):\n    \"\"\"Wrapper around the llama-cpp-python's Llama model\"\"\"\n\n    model_path: Optional[str] = Param(\n        help=\"Path to the model file. This is required to load the model.\",\n    )\n    repo_id: Optional[str] = Param(\n        help=\"Id of a repo on the HuggingFace Hub in the form of `user_name/repo_name`.\"\n    )\n    filename: Optional[str] = Param(\n        help=\"A filename or glob pattern to match the model file in the repo.\"\n    )\n    chat_format: str = Param(\n        help=(\n            \"Chat format to use. Please refer to llama_cpp.llama_chat_format for a \"\n            \"list of supported formats. If blank, the chat format will be auto-\"\n            \"inferred.\"\n        ),\n        required=True,\n    )\n    lora_base: Optional[str] = Param(None, help=\"Path to the base Lora model\")\n    n_ctx: Optional[int] = Param(512, help=\"Text context, 0 = from model\")\n    n_gpu_layers: Optional[int] = Param(\n        0,\n        help=\"Number of layers to offload to GPU. If -1, all layers are offloaded\",\n    )\n    use_mmap: Optional[bool] = Param(\n        True,\n        help=(),\n    )\n    vocab_only: Optional[bool] = Param(\n        False,\n        help=\"If True, only the vocabulary is loaded. This is useful for debugging.\",\n    )\n\n    _role_mapper: dict[str, str] = {\n        \"human\": \"user\",\n        \"system\": \"system\",\n        \"ai\": \"assistant\",\n    }\n\n    @Param.auto()\n    def client_object(self) -> \"Llama\":\n        \"\"\"Get the llama-cpp-python client object\"\"\"\n        try:\n            from llama_cpp import Llama\n        except ImportError:\n            raise ImportError(\n                \"llama-cpp-python is not installed. \"\n                \"Please install it using `pip install llama-cpp-python`\"\n            )\n\n        errors = []\n        if not self.model_path and (not self.repo_id or not self.filename):\n            errors.append(\n                \"- `model_path` or `repo_id` and `filename` are required to load the\"\n                \" model\"\n            )\n\n        if not self.chat_format:\n            errors.append(\n                \"- `chat_format` is required to know how to format the chat messages. \"\n                \"Please refer to llama_cpp.llama_chat_format for a list of supported \"\n                \"formats.\"\n            )\n        if errors:\n            raise ValueError(\"\\n\".join(errors))\n\n        if self.model_path:\n            return Llama(\n                model_path=cast(str, self.model_path),\n                chat_format=self.chat_format,\n                lora_base=self.lora_base,\n                n_ctx=self.n_ctx,\n                n_gpu_layers=self.n_gpu_layers,\n                use_mmap=self.use_mmap,\n                vocab_only=self.vocab_only,\n            )\n        else:\n            return Llama.from_pretrained(\n                repo_id=self.repo_id,\n                filename=self.filename,\n                chat_format=self.chat_format,\n                lora_base=self.lora_base,\n                n_ctx=self.n_ctx,\n                n_gpu_layers=self.n_gpu_layers,\n                use_mmap=self.use_mmap,\n                vocab_only=self.vocab_only,\n            )\n\n    def prepare_message(\n        self, messages: str | BaseMessage | list[BaseMessage]\n    ) -> list[dict]:\n        input_: list[BaseMessage] = []\n\n        if isinstance(messages, str):\n            input_ = [HumanMessage(content=messages)]\n        elif isinstance(messages, BaseMessage):\n            input_ = [messages]\n        else:\n            input_ = messages\n\n        output_ = [\n            {\"role\": self._role_mapper[each.type], \"content\": each.content}\n            for each in input_\n        ]\n\n        return output_\n\n    def invoke(\n        self, messages: str | BaseMessage | list[BaseMessage], **kwargs\n    ) -> LLMInterface:\n\n        pred: \"CCCR\" = self.client_object.create_chat_completion(\n            messages=self.prepare_message(messages),\n            stream=False,\n        )\n\n        return LLMInterface(\n            content=pred[\"choices\"][0][\"message\"][\"content\"] if pred[\"choices\"] else \"\",\n            candidates=[\n                c[\"message\"][\"content\"]\n                for c in pred[\"choices\"]\n                if c[\"message\"][\"content\"]\n            ],\n            completion_tokens=pred[\"usage\"][\"completion_tokens\"],\n            total_tokens=pred[\"usage\"][\"total_tokens\"],\n            prompt_tokens=pred[\"usage\"][\"prompt_tokens\"],\n        )\n\n    def stream(\n        self, messages: str | BaseMessage | list[BaseMessage], **kwargs\n    ) -> Iterator[LLMInterface]:\n        pred = self.client_object.create_chat_completion(\n            messages=self.prepare_message(messages),\n            stream=True,\n        )\n        for chunk in pred:\n            if not chunk[\"choices\"]:\n                continue\n\n            if \"content\" not in chunk[\"choices\"][0][\"delta\"]:\n                continue\n\n            yield LLMInterface(content=chunk[\"choices\"][0][\"delta\"][\"content\"])\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/llms/chats/openai.py",
    "content": "from typing import TYPE_CHECKING, AsyncGenerator, Iterator, Optional, Type\n\nfrom pydantic import BaseModel\nfrom theflow.utils.modules import import_dotted_string\n\nfrom kotaemon.base import (\n    AIMessage,\n    BaseMessage,\n    HumanMessage,\n    LLMInterface,\n    Param,\n    StructuredOutputLLMInterface,\n)\n\nfrom .base import ChatLLM\n\nif TYPE_CHECKING:\n    from openai.types.chat.chat_completion_message_param import (\n        ChatCompletionMessageParam,\n    )\n\n\nclass BaseChatOpenAI(ChatLLM):\n    \"\"\"Base interface for OpenAI chat model, using the openai library\n\n    This class exposes the parameters in resources.Chat. To subclass this class:\n\n        - Implement the `prepare_client` method to return the OpenAI client\n        - Implement the `openai_response` method to return the OpenAI response\n        - Implement the params relate to the OpenAI client\n    \"\"\"\n\n    _dependencies = [\"openai\"]\n    _capabilities = [\"chat\", \"text\"]  # consider as mixin\n\n    api_key: str = Param(help=\"API key\", required=True)\n    timeout: Optional[float] = Param(None, help=\"Timeout for the API request\")\n    max_retries: Optional[int] = Param(\n        None, help=\"Maximum number of retries for the API request\"\n    )\n\n    temperature: Optional[float] = Param(\n        None,\n        help=(\n            \"Number between 0 and 2 that controls the randomness of the generated \"\n            \"tokens. Lower values make the model more deterministic, while higher \"\n            \"values make the model more random.\"\n        ),\n    )\n    max_tokens: Optional[int] = Param(\n        None,\n        help=(\n            \"Maximum number of tokens to generate. The total length of input tokens \"\n            \"and generated tokens is limited by the model's context length.\"\n        ),\n    )\n    n: int = Param(\n        1,\n        help=(\n            \"Number of completions to generate. The API will generate n completion \"\n            \"for each prompt.\"\n        ),\n    )\n    stop: Optional[str | list[str]] = Param(\n        None,\n        help=(\n            \"Stop sequence. If a stop sequence is detected, generation will stop \"\n            \"at that point. If not specified, generation will continue until the \"\n            \"maximum token length is reached.\"\n        ),\n    )\n    frequency_penalty: Optional[float] = Param(\n        None,\n        help=(\n            \"Number between -2.0 and 2.0. Positive values penalize new tokens \"\n            \"based on their existing frequency in the text so far, decrearsing the \"\n            \"model's likelihood of repeating the same text.\"\n        ),\n    )\n    presence_penalty: Optional[float] = Param(\n        None,\n        help=(\n            \"Number between -2.0 and 2.0. Positive values penalize new tokens \"\n            \"based on their existing presence in the text so far, decrearsing the \"\n            \"model's likelihood of repeating the same text.\"\n        ),\n    )\n    tool_choice: Optional[str] = Param(\n        None,\n        help=(\n            \"Choice of tool to use for the completion. Available choices are: \"\n            \"auto, default.\"\n        ),\n    )\n    tools: Optional[list[str]] = Param(\n        None,\n        help=\"List of tools to use for the completion.\",\n    )\n    logprobs: Optional[bool] = Param(\n        None,\n        help=(\n            \"Include log probabilities on the logprobs most likely tokens, \"\n            \"as well as the chosen token.\"\n        ),\n    )\n    logit_bias: Optional[dict] = Param(\n        None,\n        help=(\n            \"Dictionary of logit bias values to add to the logits of the tokens \"\n            \"in the vocabulary.\"\n        ),\n    )\n    top_logprobs: Optional[int] = Param(\n        None,\n        help=(\n            \"An integer between 0 and 5 specifying the number of most likely tokens \"\n            \"to return at each token position, each with an associated log \"\n            \"probability. `logprobs` must also be set to `true` if this parameter \"\n            \"is used.\"\n        ),\n    )\n    top_p: Optional[float] = Param(\n        None,\n        help=(\n            \"An alternative to sampling with temperature, called nucleus sampling, \"\n            \"where the model considers the results of the token with top_p \"\n            \"probability mass. So 0.1 means that only the tokens comprising the \"\n            \"top 10% probability mass are considered.\"\n        ),\n    )\n\n    @Param.auto(depends_on=[\"max_retries\"])\n    def max_retries_(self):\n        if self.max_retries is None:\n            from openai._constants import DEFAULT_MAX_RETRIES\n\n            return DEFAULT_MAX_RETRIES\n        return self.max_retries\n\n    def prepare_message(\n        self, messages: str | BaseMessage | list[BaseMessage]\n    ) -> list[\"ChatCompletionMessageParam\"]:\n        \"\"\"Prepare the message into OpenAI format\n\n        Returns:\n            list[dict]: List of messages in OpenAI format\n        \"\"\"\n        input_: list[BaseMessage] = []\n        output_: list[\"ChatCompletionMessageParam\"] = []\n\n        if isinstance(messages, str):\n            input_ = [HumanMessage(content=messages)]\n        elif isinstance(messages, BaseMessage):\n            input_ = [messages]\n        else:\n            input_ = messages\n\n        for message in input_:\n            output_.append(message.to_openai_format())\n\n        return output_\n\n    def prepare_output(self, resp: dict) -> LLMInterface:\n        \"\"\"Convert the OpenAI response into LLMInterface\"\"\"\n        additional_kwargs = {}\n        if \"tool_calls\" in resp[\"choices\"][0][\"message\"]:\n            additional_kwargs[\"tool_calls\"] = resp[\"choices\"][0][\"message\"][\n                \"tool_calls\"\n            ]\n\n        if resp[\"choices\"][0].get(\"logprobs\") is None:\n            logprobs = []\n        else:\n            all_logprobs = resp[\"choices\"][0][\"logprobs\"].get(\"content\")\n            logprobs = (\n                [logprob[\"logprob\"] for logprob in all_logprobs] if all_logprobs else []\n            )\n\n        output = LLMInterface(\n            candidates=[(_[\"message\"][\"content\"] or \"\") for _ in resp[\"choices\"]],\n            content=resp[\"choices\"][0][\"message\"][\"content\"] or \"\",\n            total_tokens=resp[\"usage\"][\"total_tokens\"],\n            prompt_tokens=resp[\"usage\"][\"prompt_tokens\"],\n            completion_tokens=resp[\"usage\"][\"completion_tokens\"],\n            additional_kwargs=additional_kwargs,\n            messages=[\n                AIMessage(content=(_[\"message\"][\"content\"]) or \"\")\n                for _ in resp[\"choices\"]\n            ],\n            logprobs=logprobs,\n        )\n\n        return output\n\n    def prepare_client(self, async_version: bool = False):\n        \"\"\"Get the OpenAI client\n\n        Args:\n            async_version (bool): Whether to get the async version of the client\n        \"\"\"\n        raise NotImplementedError\n\n    def openai_response(self, client, **kwargs):\n        \"\"\"Get the openai response\"\"\"\n        raise NotImplementedError\n\n    async def aopenai_response(self, client, **kwargs):\n        \"\"\"Get the openai response\"\"\"\n        raise NotImplementedError\n\n    def invoke(\n        self, messages: str | BaseMessage | list[BaseMessage], *args, **kwargs\n    ) -> LLMInterface:\n        client = self.prepare_client(async_version=False)\n        input_messages = self.prepare_message(messages)\n        resp = self.openai_response(\n            client, messages=input_messages, stream=False, **kwargs\n        ).dict()\n        return self.prepare_output(resp)\n\n    async def ainvoke(\n        self, messages: str | BaseMessage | list[BaseMessage], *args, **kwargs\n    ) -> LLMInterface:\n        client = self.prepare_client(async_version=True)\n        input_messages = self.prepare_message(messages)\n        resp = (\n            await self.aopenai_response(\n                client, messages=input_messages, stream=False, **kwargs\n            )\n        ).dict()\n\n        return self.prepare_output(resp)\n\n    def stream(\n        self, messages: str | BaseMessage | list[BaseMessage], *args, **kwargs\n    ) -> Iterator[LLMInterface]:\n        client = self.prepare_client(async_version=False)\n        input_messages = self.prepare_message(messages)\n        resp = self.openai_response(\n            client, messages=input_messages, stream=True, **kwargs\n        )\n\n        for c in resp:\n            chunk = c.dict()\n            if not chunk[\"choices\"]:\n                continue\n            if chunk[\"choices\"][0][\"delta\"][\"content\"] is not None:\n                if chunk[\"choices\"][0].get(\"logprobs\") is None:\n                    logprobs = []\n                else:\n                    logprobs = [\n                        logprob[\"logprob\"]\n                        for logprob in chunk[\"choices\"][0][\"logprobs\"].get(\n                            \"content\", []\n                        )\n                    ]\n\n                yield LLMInterface(\n                    content=chunk[\"choices\"][0][\"delta\"][\"content\"], logprobs=logprobs\n                )\n\n    async def astream(\n        self, messages: str | BaseMessage | list[BaseMessage], *args, **kwargs\n    ) -> AsyncGenerator[LLMInterface, None]:\n        client = self.prepare_client(async_version=True)\n        input_messages = self.prepare_message(messages)\n        resp = self.openai_response(\n            client, messages=input_messages, stream=True, **kwargs\n        )\n\n        async for chunk in resp:\n            if not chunk.choices:\n                continue\n            if chunk.choices[0].delta.content is not None:\n                yield LLMInterface(content=chunk.choices[0].delta.content)\n\n\nclass ChatOpenAI(BaseChatOpenAI):\n    \"\"\"OpenAI chat model\"\"\"\n\n    base_url: Optional[str] = Param(None, help=\"OpenAI base URL\")\n    organization: Optional[str] = Param(None, help=\"OpenAI organization\")\n    model: str = Param(help=\"OpenAI model\", required=True)\n\n    def prepare_client(self, async_version: bool = False):\n        \"\"\"Get the OpenAI client\n\n        Args:\n            async_version (bool): Whether to get the async version of the client\n        \"\"\"\n        params = {\n            \"api_key\": self.api_key,\n            \"organization\": self.organization,\n            \"base_url\": self.base_url,\n            \"timeout\": self.timeout,\n            \"max_retries\": self.max_retries_,\n        }\n        if async_version:\n            from openai import AsyncOpenAI\n\n            return AsyncOpenAI(**params)\n\n        from openai import OpenAI\n\n        return OpenAI(**params)\n\n    def prepare_params(self, **kwargs):\n        if \"tools_pydantic\" in kwargs:\n            kwargs.pop(\"tools_pydantic\")\n\n        params_ = {\n            \"model\": self.model,\n            \"temperature\": self.temperature,\n            \"max_tokens\": self.max_tokens,\n            \"n\": self.n,\n            \"stop\": self.stop,\n            \"frequency_penalty\": self.frequency_penalty,\n            \"presence_penalty\": self.presence_penalty,\n            \"tool_choice\": self.tool_choice,\n            \"tools\": self.tools,\n            \"logprobs\": self.logprobs,\n            \"logit_bias\": self.logit_bias,\n            \"top_logprobs\": self.top_logprobs,\n            \"top_p\": self.top_p,\n        }\n        params = {k: v for k, v in params_.items() if v is not None}\n        params.update(kwargs)\n\n        return params\n\n    def openai_response(self, client, **kwargs):\n        \"\"\"Get the openai response\"\"\"\n        params = self.prepare_params(**kwargs)\n        return client.chat.completions.create(**params)\n\n    async def aopenai_response(self, client, **kwargs):\n        params = self.prepare_params(**kwargs)\n        return await client.chat.completions.create(**params)\n\n\nclass StructuredOutputChatOpenAI(ChatOpenAI):\n    \"\"\"OpenAI chat model that returns structured output\"\"\"\n\n    response_schema: Type[BaseModel] = Param(\n        help=\"class that subclasses pydantics BaseModel\", required=True\n    )\n\n    def prepare_output(self, resp: dict) -> StructuredOutputLLMInterface:\n        \"\"\"Convert the OpenAI response into StructuredOutputLLMInterface\"\"\"\n        additional_kwargs = {}\n\n        if \"tool_calls\" in resp[\"choices\"][0][\"message\"]:\n            additional_kwargs[\"tool_calls\"] = resp[\"choices\"][0][\"message\"][\n                \"tool_calls\"\n            ]\n\n        if resp[\"choices\"][0].get(\"logprobs\") is None:\n            logprobs = []\n        else:\n            all_logprobs = resp[\"choices\"][0][\"logprobs\"].get(\"content\")\n            logprobs = (\n                [logprob[\"logprob\"] for logprob in all_logprobs] if all_logprobs else []\n            )\n\n        output = StructuredOutputLLMInterface(\n            parsed=resp[\"choices\"][0][\"message\"][\"parsed\"],\n            candidates=[(_[\"message\"][\"content\"] or \"\") for _ in resp[\"choices\"]],\n            content=resp[\"choices\"][0][\"message\"][\"content\"] or \"\",\n            total_tokens=resp[\"usage\"][\"total_tokens\"],\n            prompt_tokens=resp[\"usage\"][\"prompt_tokens\"],\n            completion_tokens=resp[\"usage\"][\"completion_tokens\"],\n            messages=[\n                AIMessage(content=(_[\"message\"][\"content\"]) or \"\")\n                for _ in resp[\"choices\"]\n            ],\n            additional_kwargs=additional_kwargs,\n            logprobs=logprobs,\n        )\n\n        return output\n\n    def prepare_params(self, **kwargs):\n        if \"tools_pydantic\" in kwargs:\n            kwargs.pop(\"tools_pydantic\")\n\n        params_ = {\n            \"model\": self.model,\n            \"temperature\": self.temperature,\n            \"max_tokens\": self.max_tokens,\n            \"n\": self.n,\n            \"stop\": self.stop,\n            \"frequency_penalty\": self.frequency_penalty,\n            \"presence_penalty\": self.presence_penalty,\n            \"tool_choice\": self.tool_choice,\n            \"tools\": self.tools,\n            \"logprobs\": self.logprobs,\n            \"logit_bias\": self.logit_bias,\n            \"top_logprobs\": self.top_logprobs,\n            \"top_p\": self.top_p,\n            \"response_format\": self.response_schema,\n        }\n        params = {k: v for k, v in params_.items() if v is not None}\n        params.update(kwargs)\n\n        # doesn't do streaming\n        params.pop(\"stream\")\n\n        return params\n\n    def openai_response(self, client, **kwargs):\n        \"\"\"Get the openai response\"\"\"\n        params = self.prepare_params(**kwargs)\n\n        return client.beta.chat.completions.parse(**params)\n\n    async def aopenai_response(self, client, **kwargs):\n        \"\"\"Get the openai response\"\"\"\n        params = self.prepare_params(**kwargs)\n\n        return await client.beta.chat.completions.parse(**params)\n\n\nclass AzureChatOpenAI(BaseChatOpenAI):\n    \"\"\"OpenAI chat model provided by Microsoft Azure\"\"\"\n\n    azure_endpoint: str = Param(\n        help=(\n            \"HTTPS endpoint for the Azure OpenAI model. The azure_endpoint, \"\n            \"azure_deployment, and api_version parameters are used to construct \"\n            \"the full URL for the Azure OpenAI model.\"\n        ),\n        required=True,\n    )\n    azure_deployment: str = Param(help=\"Azure deployment name\", required=True)\n    api_version: str = Param(help=\"Azure model version\", required=True)\n    azure_ad_token: Optional[str] = Param(None, help=\"Azure AD token\")\n    azure_ad_token_provider: Optional[str] = Param(None, help=\"Azure AD token provider\")\n\n    @Param.auto(depends_on=[\"azure_ad_token_provider\"])\n    def azure_ad_token_provider_(self):\n        if isinstance(self.azure_ad_token_provider, str):\n            return import_dotted_string(self.azure_ad_token_provider, safe=False)\n\n    def prepare_client(self, async_version: bool = False):\n        \"\"\"Get the OpenAI client\n\n        Args:\n            async_version (bool): Whether to get the async version of the client\n        \"\"\"\n        params = {\n            \"azure_endpoint\": self.azure_endpoint,\n            \"api_version\": self.api_version,\n            \"api_key\": self.api_key,\n            \"azure_ad_token\": self.azure_ad_token,\n            \"azure_ad_token_provider\": self.azure_ad_token_provider_,\n            \"timeout\": self.timeout,\n            \"max_retries\": self.max_retries_,\n        }\n        if async_version:\n            from openai import AsyncAzureOpenAI\n\n            return AsyncAzureOpenAI(**params)\n\n        from openai import AzureOpenAI\n\n        return AzureOpenAI(**params)\n\n    def prepare_params(self, **kwargs):\n        if \"tools_pydantic\" in kwargs:\n            kwargs.pop(\"tools_pydantic\")\n\n        params_ = {\n            \"model\": self.azure_deployment,\n            \"temperature\": self.temperature,\n            \"max_tokens\": self.max_tokens,\n            \"n\": self.n,\n            \"stop\": self.stop,\n            \"frequency_penalty\": self.frequency_penalty,\n            \"presence_penalty\": self.presence_penalty,\n            \"tool_choice\": self.tool_choice,\n            \"tools\": self.tools,\n            \"logprobs\": self.logprobs,\n            \"logit_bias\": self.logit_bias,\n            \"top_logprobs\": self.top_logprobs,\n            \"top_p\": self.top_p,\n        }\n        params = {k: v for k, v in params_.items() if v is not None}\n        params.update(kwargs)\n\n        return params\n\n    def openai_response(self, client, **kwargs):\n        \"\"\"Get the openai response\"\"\"\n        params = self.prepare_params(**kwargs)\n        return client.chat.completions.create(**params)\n\n    async def aopenai_response(self, client, **kwargs):\n        params = self.prepare_params(**kwargs)\n        return await client.chat.completions.create(**params)\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/llms/completions/__init__.py",
    "content": "from .base import LLM\nfrom .langchain_based import AzureOpenAI, LCCompletionMixin, LlamaCpp, OpenAI\n\n__all__ = [\"LLM\", \"OpenAI\", \"AzureOpenAI\", \"LCCompletionMixin\", \"LlamaCpp\"]\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/llms/completions/base.py",
    "content": "from kotaemon.llms.base import BaseLLM\n\n\nclass LLM(BaseLLM):\n    pass\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/llms/completions/langchain_based.py",
    "content": "import logging\nfrom typing import Optional\n\nfrom kotaemon.base import LLMInterface\n\nfrom .base import LLM\n\nlogger = logging.getLogger(__name__)\n\n\nclass LCCompletionMixin:\n    def _get_lc_class(self):\n        raise NotImplementedError(\n            \"Please return the relevant Langchain class in in _get_lc_class\"\n        )\n\n    def __init__(self, **params):\n        self._lc_class = self._get_lc_class()\n        self._obj = self._lc_class(**params)\n        self._kwargs: dict = params\n\n        super().__init__()\n\n    def run(self, text: str) -> LLMInterface:\n        pred = self._obj.generate([text])\n        all_text = [each.text for each in pred.generations[0]]\n\n        completion_tokens, total_tokens, prompt_tokens = 0, 0, 0\n        try:\n            if pred.llm_output is not None:\n                completion_tokens = pred.llm_output[\"token_usage\"][\"completion_tokens\"]\n                total_tokens = pred.llm_output[\"token_usage\"][\"total_tokens\"]\n                prompt_tokens = pred.llm_output[\"token_usage\"][\"prompt_tokens\"]\n        except Exception:\n            logger.warning(\n                f\"Cannot get token usage from LLM output for {self._lc_class.__name__}\"\n            )\n\n        return LLMInterface(\n            text=all_text[0] if len(all_text) > 0 else \"\",\n            candidates=all_text,\n            completion_tokens=completion_tokens,\n            total_tokens=total_tokens,\n            prompt_tokens=prompt_tokens,\n            logits=[],\n        )\n\n    def to_langchain_format(self):\n        return self._obj\n\n    def __repr__(self):\n        kwargs = []\n        for key, value_obj in self._kwargs.items():\n            value = repr(value_obj)\n            kwargs.append(f\"{key}={value}\")\n        kwargs_repr = \", \".join(kwargs)\n        return f\"{self.__class__.__name__}({kwargs_repr})\"\n\n    def __str__(self):\n        kwargs = []\n        for key, value_obj in self._kwargs.items():\n            value = str(value_obj)\n            if len(value) > 20:\n                value = f\"{value[:15]}...\"\n            kwargs.append(f\"{key}={value}\")\n        kwargs_repr = \", \".join(kwargs)\n        return f\"{self.__class__.__name__}({kwargs_repr})\"\n\n    def __setattr__(self, name, value):\n        if name == \"_lc_class\":\n            return super().__setattr__(name, value)\n\n        if name in self._lc_class.__fields__:\n            self._kwargs[name] = value\n            self._obj = self._lc_class(**self._kwargs)\n        else:\n            super().__setattr__(name, value)\n\n    def __getattr__(self, name):\n        if name in self._kwargs:\n            return self._kwargs[name]\n        return getattr(self._obj, name)\n\n    def dump(self, *args, **kwargs):\n        from theflow.utils.modules import serialize\n\n        params = {key: serialize(value) for key, value in self._kwargs.items()}\n        return {\n            \"__type__\": f\"{self.__module__}.{self.__class__.__qualname__}\",\n            **params,\n        }\n\n    def specs(self, path: str):\n        path = path.strip(\".\")\n        if \".\" in path:\n            raise ValueError(\"path should not contain '.'\")\n\n        if path in self._lc_class.__fields__:\n            return {\n                \"__type__\": \"theflow.base.ParamAttr\",\n                \"refresh_on_set\": True,\n                \"strict_type\": True,\n            }\n\n        raise ValueError(f\"Invalid param {path}\")\n\n\nclass OpenAI(LCCompletionMixin, LLM):\n    \"\"\"Wrapper around Langchain's OpenAI class, focusing on key parameters\"\"\"\n\n    def __init__(\n        self,\n        openai_api_key: Optional[str] = None,\n        openai_api_base: Optional[str] = None,\n        model_name: str = \"text-davinci-003\",\n        temperature: float = 0.7,\n        max_tokens: int = 256,\n        top_p: float = 1,\n        frequency_penalty: float = 0,\n        n: int = 1,\n        best_of: int = 1,\n        request_timeout: Optional[float] = None,\n        max_retries: int = 2,\n        streaming: bool = False,\n        **params,\n    ):\n        super().__init__(\n            openai_api_key=openai_api_key,\n            openai_api_base=openai_api_base,\n            model_name=model_name,\n            temperature=temperature,\n            max_tokens=max_tokens,\n            top_p=top_p,\n            frequency_penalty=frequency_penalty,\n            n=n,\n            best_of=best_of,\n            request_timeout=request_timeout,\n            max_retries=max_retries,\n            streaming=streaming,\n            **params,\n        )\n\n    def _get_lc_class(self):\n        try:\n            from langchain_openai import OpenAI\n        except ImportError:\n            from langchain.llms import OpenAI\n\n        return OpenAI\n\n\nclass AzureOpenAI(LCCompletionMixin, LLM):\n    \"\"\"Wrapper around Langchain's AzureOpenAI class, focusing on key parameters\"\"\"\n\n    def __init__(\n        self,\n        azure_endpoint: Optional[str] = None,\n        deployment_name: Optional[str] = None,\n        openai_api_version: str = \"\",\n        openai_api_key: Optional[str] = None,\n        model_name: str = \"text-davinci-003\",\n        temperature: float = 0.7,\n        max_tokens: int = 256,\n        top_p: float = 1,\n        frequency_penalty: float = 0,\n        n: int = 1,\n        best_of: int = 1,\n        request_timeout: Optional[float] = None,\n        max_retries: int = 2,\n        streaming: bool = False,\n        **params,\n    ):\n        super().__init__(\n            azure_endpoint=azure_endpoint,\n            deployment_name=deployment_name,\n            openai_api_version=openai_api_version,\n            openai_api_key=openai_api_key,\n            model_name=model_name,\n            temperature=temperature,\n            max_tokens=max_tokens,\n            top_p=top_p,\n            frequency_penalty=frequency_penalty,\n            n=n,\n            best_of=best_of,\n            request_timeout=request_timeout,\n            max_retries=max_retries,\n            streaming=streaming,\n            **params,\n        )\n\n    def _get_lc_class(self):\n        try:\n            from langchain_openai import AzureOpenAI\n        except ImportError:\n            from langchain.llms import AzureOpenAI\n\n        return AzureOpenAI\n\n\nclass LlamaCpp(LCCompletionMixin, LLM):\n    \"\"\"Wrapper around Langchain's LlamaCpp class, focusing on key parameters\"\"\"\n\n    def __init__(\n        self,\n        model_path: str,\n        lora_base: Optional[str] = None,\n        n_ctx: int = 512,\n        n_gpu_layers: Optional[int] = None,\n        use_mmap: bool = True,\n        **params,\n    ):\n        super().__init__(\n            model_path=model_path,\n            lora_base=lora_base,\n            n_ctx=n_ctx,\n            n_gpu_layers=n_gpu_layers,\n            use_mmap=use_mmap,\n            **params,\n        )\n\n    def _get_lc_class(self):\n        try:\n            from langchain_community.llms import LlamaCpp\n        except ImportError:\n            from langchain.llms import LlamaCpp\n\n        return LlamaCpp\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/llms/cot.py",
    "content": "from copy import deepcopy\nfrom typing import Callable, List\n\nfrom theflow import Function, Node, Param\n\nfrom kotaemon.base import BaseComponent, Document\n\nfrom .chats import LCAzureChatOpenAI\nfrom .completions import LLM\nfrom .prompts import BasePromptComponent\n\n\nclass Thought(BaseComponent):\n    \"\"\"A thought in the chain of thought\n\n    - Input: `**kwargs` pairs, where key is the placeholder in the prompt, and\n    value is the value.\n    - Output: an output dictionary\n\n    _**Usage:**_\n\n    Create and run a thought:\n\n    ```python\n    >> from kotaemon.pipelines.cot import Thought\n    >> thought = Thought(\n         prompt=\"How to {action} {object}?\",\n         llm=LCAzureChatOpenAI(...),\n         post_process=lambda string: {\"tutorial\": string},\n       )\n    >> output = thought(action=\"install\", object=\"python\")\n    >> print(output)\n    {'tutorial': 'As an AI language model,...'}\n    ```\n\n    Basically, when a thought is run, it will:\n\n    1. Populate the prompt template with the input `**kwargs`.\n    2. Run the LLM model with the populated prompt.\n    3. Post-process the LLM output with the post-processor.\n\n    This `Thought` allows chaining sequentially with the + operator. For example:\n\n    ```python\n    >> llm = LCAzureChatOpenAI(...)\n    >> thought1 = Thought(\n           prompt=\"Word {word} in {language} is \",\n           llm=llm,\n           post_process=lambda string: {\"translated\": string},\n       )\n    >> thought2 = Thought(\n            prompt=\"Translate {translated} to Japanese\",\n            llm=llm,\n            post_process=lambda string: {\"output\": string},\n       )\n\n    >> thought = thought1 + thought2\n    >> thought(word=\"hello\", language=\"French\")\n    {'word': 'hello',\n     'language': 'French',\n     'translated': '\"Bonjour\"',\n     'output': 'こんにちは (Konnichiwa)'}\n    ```\n\n    Under the hood, when the `+` operator is used, a `ManualSequentialChainOfThought`\n    is created.\n    \"\"\"\n\n    prompt: str = Param(\n        help=(\n            \"The prompt template string. This prompt template has Python-like variable\"\n            \" placeholders, that then will be substituted with real values when this\"\n            \" component is executed\"\n        )\n    )\n    llm: LLM = Node(LCAzureChatOpenAI, help=\"The LLM model to execute the input prompt\")\n    post_process: Function = Node(\n        help=(\n            \"The function post-processor that post-processes LLM output prediction .\"\n            \"It should take a string as input (this is the LLM output text) and return \"\n            \"a dictionary, where the key should\"\n        )\n    )\n\n    @Node.auto(depends_on=\"prompt\")\n    def prompt_template(self):\n        \"\"\"Automatically wrap around param prompt. Can ignore\"\"\"\n        return BasePromptComponent(template=self.prompt)\n\n    def run(self, **kwargs) -> Document:\n        \"\"\"Run the chain of thought\"\"\"\n        prompt = self.prompt_template(**kwargs).text\n        response = self.llm(prompt).text\n        response = self.post_process(response)\n\n        return Document(response)\n\n    def get_variables(self) -> List[str]:\n        return []\n\n    def __add__(self, next_thought: \"Thought\") -> \"ManualSequentialChainOfThought\":\n        return ManualSequentialChainOfThought(\n            thoughts=[self, next_thought], llm=self.llm\n        )\n\n\nclass ManualSequentialChainOfThought(BaseComponent):\n    \"\"\"Perform sequential chain-of-thought with manual pre-defined prompts\n\n    This method supports variable number of steps. Each step corresponds to a\n    `kotaemon.pipelines.cot.Thought`. Please refer that section for\n    Thought's detail. This section is about chaining thought together.\n\n    _**Usage:**_\n\n    **Create and run a chain of thought without \"+\" operator:**\n\n    ```pycon\n    >>> from kotaemon.pipelines.cot import Thought, ManualSequentialChainOfThought\n    >>> llm = LCAzureChatOpenAI(...)\n    >>> thought1 = Thought(\n    >>>    prompt=\"Word {word} in {language} is \",\n    >>>    post_process=lambda string: {\"translated\": string},\n    >>> )\n    >>> thought2 = Thought(\n    >>>     prompt=\"Translate {translated} to Japanese\",\n    >>>     post_process=lambda string: {\"output\": string},\n    >>> )\n    >>> thought = ManualSequentialChainOfThought(thoughts=[thought1, thought2], llm=llm)\n    >>> thought(word=\"hello\", language=\"French\")\n    {'word': 'hello',\n     'language': 'French',\n     'translated': '\"Bonjour\"',\n     'output': 'こんにちは (Konnichiwa)'}\n    ```\n\n    **Create and run a chain of thought without \"+\" operator:** Please refer the\n    `kotaemon.pipelines.cot.Thought` section for examples.\n\n    This chain-of-thought optionally takes a termination check callback function.\n    This function will be called after each thought is executed. It takes in a\n    dictionary of all thought outputs so far, and it returns True or False. If\n    True, the chain-of-thought will terminate. If unset, the default callback always\n    returns False.\n    \"\"\"\n\n    thoughts: List[Thought] = Param(\n        default_callback=lambda *_: [], help=\"List of Thought\"\n    )\n    llm: LLM = Param(help=\"The LLM model to use (base of kotaemon.llms.BaseLLM)\")\n    terminate: Callable = Param(\n        default=lambda _: False,\n        help=\"Callback on terminate condition. Default to always return False\",\n    )\n\n    def run(self, **kwargs) -> Document:\n        \"\"\"Run the manual chain of thought\"\"\"\n\n        inputs = deepcopy(kwargs)\n        for idx, thought in enumerate(self.thoughts):\n            if self.llm:\n                thought.llm = self.llm\n            self._prepare_child(thought, f\"thought{idx}\")\n\n            output = thought(**inputs)\n            inputs.update(output.content)\n            if self.terminate(inputs):\n                break\n\n        return Document(inputs)\n\n    def __add__(self, next_thought: Thought) -> \"ManualSequentialChainOfThought\":\n        return ManualSequentialChainOfThought(\n            thoughts=self.thoughts + [next_thought], llm=self.llm\n        )\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/llms/linear.py",
    "content": "from typing import Any, Callable, Optional, Union\n\nfrom ..base import BaseComponent\nfrom ..base.schema import Document, IO_Type\nfrom .chats import ChatLLM\nfrom .completions import LLM\nfrom .prompts import BasePromptComponent\n\n\nclass SimpleLinearPipeline(BaseComponent):\n    \"\"\"\n    A simple pipeline for running a function with a prompt, a language model, and an\n        optional post-processor.\n\n    Attributes:\n        prompt (BasePromptComponent): The prompt component used to generate the initial\n            input.\n        llm (Union[ChatLLM, LLM]): The language model component used to generate the\n            output.\n        post_processor (Union[BaseComponent, Callable[[IO_Type], IO_Type]]): An optional\n            post-processor component or function.\n\n    Example Usage:\n        ```python\n        from kotaemon.llms import LCAzureChatOpenAI, BasePromptComponent\n\n        def identity(x):\n            return x\n\n        llm = LCAzureChatOpenAI(\n            openai_api_base=\"your openai api base\",\n            openai_api_key=\"your openai api key\",\n            openai_api_version=\"your openai api version\",\n            deployment_name=\"dummy-q2-gpt35\",\n            temperature=0,\n            request_timeout=600,\n        )\n\n        pipeline = SimpleLinearPipeline(\n            prompt=BasePromptComponent(template=\"what is {word} in Japanese ?\"),\n            llm=llm,\n            post_processor=identity,\n        )\n        print(pipeline(word=\"lone\"))\n        ```\n    \"\"\"\n\n    prompt: BasePromptComponent\n    llm: Union[ChatLLM, LLM]\n    post_processor: Union[BaseComponent, Callable[[IO_Type], IO_Type]]\n\n    def run(\n        self,\n        *,\n        llm_kwargs: Optional[dict] = {},\n        post_processor_kwargs: Optional[dict] = {},\n        **prompt_kwargs,\n    ):\n        \"\"\"\n        Run the function with the given arguments and return the final output as a\n            Document object.\n\n        Args:\n            llm_kwargs (dict): Keyword arguments for the llm call.\n            post_processor_kwargs (dict): Keyword arguments for the post_processor.\n            **prompt_kwargs: Keyword arguments for populating the prompt.\n\n        Returns:\n            Document: The final output of the function as a Document object.\n        \"\"\"\n        prompt = self.prompt(**prompt_kwargs)\n        llm_output = self.llm(prompt.text, **llm_kwargs)\n        if self.post_processor is not None:\n            final_output = self.post_processor(llm_output, **post_processor_kwargs)[0]\n        else:\n            final_output = llm_output\n\n        return Document(final_output)\n\n\nclass GatedLinearPipeline(SimpleLinearPipeline):\n    \"\"\"\n    A pipeline that extends the SimpleLinearPipeline class and adds a condition\n        attribute.\n\n    Attributes:\n        condition (Callable[[IO_Type], Any]): A callable function that represents the\n            condition.\n\n    Usage:\n        ```{.py3 title=\"Example Usage\"}\n        from kotaemon.llms import LCAzureChatOpenAI, BasePromptComponent\n        from kotaemon.parsers import RegexExtractor\n\n        def identity(x):\n            return x\n\n        llm = LCAzureChatOpenAI(\n            openai_api_base=\"your openai api base\",\n            openai_api_key=\"your openai api key\",\n            openai_api_version=\"your openai api version\",\n            deployment_name=\"dummy-q2-gpt35\",\n            temperature=0,\n            request_timeout=600,\n        )\n\n        pipeline = GatedLinearPipeline(\n            prompt=BasePromptComponent(template=\"what is {word} in Japanese ?\"),\n            condition=RegexExtractor(pattern=\"some pattern\"),\n            llm=llm,\n            post_processor=identity,\n        )\n        print(pipeline(condition_text=\"some pattern\", word=\"lone\"))\n        print(pipeline(condition_text=\"other pattern\", word=\"lone\"))\n        ```\n    \"\"\"\n\n    condition: Callable[[IO_Type], Any]\n\n    def run(\n        self,\n        *,\n        condition_text: Optional[str] = None,\n        llm_kwargs: Optional[dict] = {},\n        post_processor_kwargs: Optional[dict] = {},\n        **prompt_kwargs,\n    ) -> Document:\n        \"\"\"\n        Run the pipeline with the given arguments and return the final output as a\n            Document object.\n\n        Args:\n            condition_text (str): The condition text to evaluate. Default to None.\n            llm_kwargs (dict): Additional keyword arguments for the language model call.\n            post_processor_kwargs (dict): Additional keyword arguments for the\n                post-processor.\n            **prompt_kwargs: Keyword arguments for populating the prompt.\n\n        Returns:\n            Document: The final output of the pipeline as a Document object.\n\n        Raises:\n            ValueError: If condition_text is None\n        \"\"\"\n        if condition_text is None:\n            raise ValueError(\"`condition_text` must be provided\")\n\n        if self.condition(condition_text)[0]:\n            return super().run(\n                llm_kwargs=llm_kwargs,\n                post_processor_kwargs=post_processor_kwargs,\n                **prompt_kwargs,\n            )\n\n        return Document(None)\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/llms/prompts/__init__.py",
    "content": "from .base import BasePromptComponent\nfrom .template import PromptTemplate\n\n__all__ = [\"BasePromptComponent\", \"PromptTemplate\"]\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/llms/prompts/base.py",
    "content": "from typing import Callable\n\nfrom theflow import Param\n\nfrom kotaemon.base import BaseComponent, Document\n\nfrom .template import PromptTemplate\n\n\nclass BasePromptComponent(BaseComponent):\n    \"\"\"\n    Base class for prompt components.\n\n    Args:\n        template (PromptTemplate): The prompt template.\n        **kwargs: Any additional keyword arguments that will be used to populate the\n            given template.\n    \"\"\"\n\n    class Config:\n        middleware_switches = {\"theflow.middleware.CachingMiddleware\": False}\n        allow_extra = True\n\n    template: str | PromptTemplate\n\n    @Param.auto(depends_on=\"template\")\n    def template__(self):\n        return (\n            self.template\n            if isinstance(self.template, PromptTemplate)\n            else PromptTemplate(self.template)\n        )\n\n    def __init__(self, **kwargs):\n        super().__init__(**kwargs)\n        self.__set(**kwargs)\n\n    def __check_redundant_kwargs(self, **kwargs):\n        \"\"\"\n        Check for redundant keyword arguments.\n\n        Parameters:\n            **kwargs (dict): A dictionary of keyword arguments.\n\n        Raises:\n            ValueError: If any keys provided are not in the template.\n\n        Returns:\n            None\n        \"\"\"\n        self.template__.check_redundant_kwargs(**kwargs)\n\n    def __check_unset_placeholders(self):\n        \"\"\"\n        Check if all the placeholders in the template are set.\n\n        This function checks if all the expected placeholders in the template are set as\n            attributes of the object. If any placeholders are missing, a `ValueError`\n            is raised with the names of the missing keys.\n\n        Parameters:\n            None\n\n        Returns:\n            None\n        \"\"\"\n        self.template__.check_missing_kwargs(**self.__dict__)\n\n    def __validate_value_type(self, **kwargs):\n        \"\"\"\n        Validates the value types of the given keyword arguments.\n\n        Parameters:\n            **kwargs (dict): A dictionary of keyword arguments to be validated.\n\n        Raises:\n            ValueError: If any of the values in the kwargs dictionary have an\n                unsupported type.\n\n        Returns:\n            None\n        \"\"\"\n        type_error = []\n        for k, v in kwargs.items():\n            if k.startswith(\"template\"):\n                continue\n            if not isinstance(v, (str, int, Document, Callable)):  # type: ignore\n                type_error.append((k, type(v)))\n\n        if type_error:\n            raise ValueError(\n                \"Type of values must be either int, str, Document, Callable, \"\n                f\"found unsupported type for (key, type): {type_error}\"\n            )\n\n    def __set(self, **kwargs):\n        \"\"\"\n        Set the values of the attributes in the object based on the provided keyword\n            arguments.\n\n        Args:\n            kwargs (dict): A dictionary with the attribute names as keys and the new\n                values as values.\n\n        Returns:\n            None\n        \"\"\"\n        self.__check_redundant_kwargs(**kwargs)\n        self.__validate_value_type(**kwargs)\n\n        self.__dict__.update(kwargs)\n\n    def __prepare_value(self):\n        \"\"\"\n        Generate a dictionary of keyword arguments based on the template's placeholders\n            and the current instance's attributes.\n\n        Returns:\n            dict: A dictionary of keyword arguments.\n        \"\"\"\n\n        def __prepare(key, value):\n            if isinstance(value, str):\n                return value\n            if isinstance(value, (int, Document)):\n                return str(value)\n\n            raise ValueError(\n                f\"Unsupported type {type(value)} for template value of key {key}\"\n            )\n\n        kwargs = {}\n        for k in self.template__.placeholders:\n            v = getattr(self, k)\n\n            # if get a callable, execute to get its output\n            if isinstance(v, Callable):  # type: ignore[arg-type]\n                v = v()\n\n            if isinstance(v, list):\n                v = str([__prepare(k, each) for each in v])\n            elif isinstance(v, (str, int, Document)):\n                v = __prepare(k, v)\n            else:\n                raise ValueError(\n                    f\"Unsupported type {type(v)} for template value of key `{k}`\"\n                )\n            kwargs[k] = v\n\n        return kwargs\n\n    def set_value(self, **kwargs):\n        \"\"\"\n        Similar to `__set` but for external use.\n\n        Set the values of the attributes in the object based on the provided keyword\n            arguments.\n\n        Args:\n            kwargs (dict): A dictionary with the attribute names as keys and the new\n                values as values.\n\n        Returns:\n            None\n        \"\"\"\n        self.__set(**kwargs)\n\n    def run(self, **kwargs):\n        \"\"\"\n        Run the function with the given keyword arguments.\n\n        Args:\n            **kwargs: The keyword arguments to pass to the function.\n\n        Returns:\n            The result of calling the `populate` method of the `template` object\n            with the given keyword arguments.\n        \"\"\"\n        self.__set(**kwargs)\n        self.__check_unset_placeholders()\n        prepared_kwargs = self.__prepare_value()\n\n        text = self.template__.populate(**prepared_kwargs)\n        return Document(text=text, metadata={\"origin\": \"PromptComponent\"})\n\n    def flow(self):\n        return self.__call__()\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/llms/prompts/template.py",
    "content": "import warnings\nfrom string import Formatter\n\n\nclass PromptTemplate:\n    \"\"\"\n    Base class for prompt templates.\n    \"\"\"\n\n    def __init__(self, template: str, ignore_invalid=True):\n        template = template\n        formatter = Formatter()\n        parsed_template = list(formatter.parse(template))\n\n        placeholders = set()\n        for _, key, _, _ in parsed_template:\n            if key is None:\n                continue\n            if not key.isidentifier():\n                if ignore_invalid:\n                    warnings.warn(f\"Ignore invalid placeholder: {key}.\", UserWarning)\n                else:\n                    raise ValueError(\n                        \"Placeholder name must be a valid Python identifier, found:\"\n                        f\" {key}.\"\n                    )\n            placeholders.add(key)\n\n        self.template = template\n        self.placeholders = placeholders\n        self.__formatter = formatter\n        self.__parsed_template = parsed_template\n\n    def check_missing_kwargs(self, **kwargs):\n        \"\"\"\n        Check if all the placeholders in the template are set.\n\n        This function checks if all the expected placeholders in the template are set as\n            attributes of the object. If any placeholders are missing, a `ValueError`\n            is raised with the names of the missing keys.\n\n        Parameters:\n            None\n\n        Returns:\n            None\n        \"\"\"\n        missing_keys = self.placeholders.difference(kwargs.keys())\n        if missing_keys:\n            raise ValueError(f\"Missing keys in template: {','.join(missing_keys)}\")\n\n    def check_redundant_kwargs(self, **kwargs):\n        \"\"\"\n        Check if all the placeholders in the template are set.\n\n        This function checks if all the expected placeholders in the template are set as\n            attributes of the object. If any placeholders are missing, a `ValueError`\n            is raised with the names of the missing keys.\n\n        Parameters:\n            None\n\n        Returns:\n            None\n        \"\"\"\n        provided_keys = set(kwargs.keys())\n        redundant_keys = provided_keys - self.placeholders\n\n        if redundant_keys:\n            warnings.warn(\n                f\"Keys provided but not in template: {','.join(redundant_keys)}\",\n                UserWarning,\n            )\n\n    def populate(self, safe=True, **kwargs) -> str:\n        \"\"\"\n        Strictly populate the template with the given keyword arguments.\n\n        Args:\n            **kwargs: The keyword arguments to populate the template.\n                      Each keyword corresponds to a placeholder in the template.\n\n        Returns:\n            The populated template.\n\n        Raises:\n            ValueError: If an unknown placeholder is provided.\n        \"\"\"\n        if safe:\n            self.check_missing_kwargs(**kwargs)\n\n        return self.partial_populate(**kwargs)\n\n    def partial_populate(self, **kwargs):\n        \"\"\"\n        Partially populate the template with the given keyword arguments.\n\n        Args:\n            **kwargs: The keyword arguments to populate the template.\n                      Each keyword corresponds to a placeholder in the template.\n\n        Returns:\n            str: The populated template.\n        \"\"\"\n        self.check_redundant_kwargs(**kwargs)\n\n        prompt = []\n        for literal_text, field_name, format_spec, conversion in self.__parsed_template:\n            prompt.append(literal_text)\n\n            if field_name is None:\n                continue\n\n            if field_name not in kwargs:\n                if conversion:\n                    value = f\"{{{field_name}}}!{conversion}:{format_spec}\"\n                else:\n                    value = f\"{{{field_name}:{format_spec}}}\"\n            else:\n                value = kwargs[field_name]\n                if conversion is not None:\n                    value = self.__formatter.convert_field(value, conversion)\n                if format_spec is not None:\n                    value = self.__formatter.format_field(value, format_spec)\n\n            prompt.append(value)\n\n        return \"\".join(prompt)\n\n    def __add__(self, other):\n        \"\"\"\n        Create a new PromptTemplate object by concatenating the template of the current\n            object with the template of another PromptTemplate object.\n\n        Parameters:\n            other (PromptTemplate): Another PromptTemplate object.\n\n        Returns:\n            PromptTemplate: A new PromptTemplate object with the concatenated templates.\n        \"\"\"\n        return PromptTemplate(self.template + \"\\n\" + other.template)\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/loaders/__init__.py",
    "content": "from .adobe_loader import AdobeReader\nfrom .azureai_document_intelligence_loader import AzureAIDocumentIntelligenceLoader\nfrom .base import AutoReader, BaseReader\nfrom .composite_loader import DirectoryReader\nfrom .docling_loader import DoclingReader\nfrom .docx_loader import DocxReader\nfrom .excel_loader import ExcelReader, PandasExcelReader\nfrom .html_loader import HtmlReader, MhtmlReader\nfrom .mathpix_loader import MathpixPDFReader\nfrom .ocr_loader import ImageReader, OCRReader\nfrom .pdf_loader import PDFThumbnailReader\nfrom .txt_loader import TxtReader\nfrom .unstructured_loader import UnstructuredReader\nfrom .web_loader import WebReader\n\n__all__ = [\n    \"AutoReader\",\n    \"AzureAIDocumentIntelligenceLoader\",\n    \"BaseReader\",\n    \"PandasExcelReader\",\n    \"ExcelReader\",\n    \"MathpixPDFReader\",\n    \"ImageReader\",\n    \"OCRReader\",\n    \"DirectoryReader\",\n    \"UnstructuredReader\",\n    \"DocxReader\",\n    \"HtmlReader\",\n    \"MhtmlReader\",\n    \"AdobeReader\",\n    \"TxtReader\",\n    \"PDFThumbnailReader\",\n    \"WebReader\",\n    \"DoclingReader\",\n]\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/loaders/adobe_loader.py",
    "content": "import logging\nimport os\nimport re\nfrom collections import defaultdict\nfrom pathlib import Path\nfrom typing import Any, Dict, List, Optional\n\nfrom decouple import config\nfrom llama_index.core.readers.base import BaseReader\n\nfrom kotaemon.base import Document\n\nlogger = logging.getLogger(__name__)\n\nDEFAULT_VLM_ENDPOINT = (\n    \"{0}openai/deployments/{1}/chat/completions?api-version={2}\".format(\n        config(\"AZURE_OPENAI_ENDPOINT\", default=\"\"),\n        \"gpt-4-vision\",\n        config(\"OPENAI_API_VERSION\", default=\"\"),\n    )\n)\n\n\nclass AdobeReader(BaseReader):\n    \"\"\"Read PDF using the Adobe's PDF Services.\n    Be able to extract text, table, and figure with high accuracy\n\n    Example:\n        ```python\n        >> from kotaemon.loaders import AdobeReader\n        >> reader = AdobeReader()\n        >> documents = reader.load_data(\"path/to/pdf\")\n        ```\n    Args:\n        endpoint: URL to the Vision Language Model endpoint. If not provided,\n        will use the default `kotaemon.loaders.adobe_loader.DEFAULT_VLM_ENDPOINT`\n\n        max_figures_to_caption: an int decides how many figured will be captioned.\n        The rest will be ignored (are indexed without captions).\n    \"\"\"\n\n    def __init__(\n        self,\n        vlm_endpoint: Optional[str] = None,\n        max_figures_to_caption: int = 100,\n        *args: Any,\n        **kwargs: Any,\n    ) -> None:\n        \"\"\"Init params\"\"\"\n        super().__init__(*args)\n        self.table_regex = r\"/Table(\\[\\d+\\])?$\"\n        self.figure_regex = r\"/Figure(\\[\\d+\\])?$\"\n        self.vlm_endpoint = vlm_endpoint or DEFAULT_VLM_ENDPOINT\n        self.max_figures_to_caption = max_figures_to_caption\n\n    def load_data(\n        self, file: Path, extra_info: Optional[Dict] = None, **kwargs\n    ) -> List[Document]:\n        \"\"\"Load data by calling to the Adobe's API\n\n        Args:\n            file (Path): Path to the PDF file\n\n        Returns:\n            List[Document]: list of documents extracted from the PDF file,\n                includes 3 types: text, table, and image\n\n        \"\"\"\n        from .utils.adobe import (\n            generate_figure_captions,\n            load_json,\n            parse_figure_paths,\n            parse_table_paths,\n            request_adobe_service,\n        )\n\n        filename = file.name\n        filepath = str(Path(file).resolve())\n        output_path = request_adobe_service(file_path=str(file), output_path=\"\")\n        results_path = os.path.join(output_path, \"structuredData.json\")\n\n        if not os.path.exists(results_path):\n            logger.exception(\"Fail to parse the document.\")\n            return []\n\n        data = load_json(results_path)\n\n        texts = defaultdict(list)\n        tables = []\n        figures = []\n\n        elements = data[\"elements\"]\n        for item_id, item in enumerate(elements):\n            page_number = item.get(\"Page\", -1) + 1\n            item_path = item[\"Path\"]\n            item_text = item.get(\"Text\", \"\")\n\n            file_paths = [\n                Path(output_path) / path for path in item.get(\"filePaths\", [])\n            ]\n            prev_item = elements[item_id - 1]\n            title = prev_item.get(\"Text\", \"\")\n\n            if re.search(self.table_regex, item_path):\n                table_content = parse_table_paths(file_paths)\n                if not table_content:\n                    continue\n                table_caption = (\n                    table_content.replace(\"|\", \"\").replace(\"---\", \"\")\n                    + f\"\\n(Table in Page {page_number}. {title})\"\n                )\n                tables.append((page_number, table_content, table_caption))\n\n            elif re.search(self.figure_regex, item_path):\n                figure_caption = (\n                    item_text + f\"\\n(Figure in Page {page_number}. {title})\"\n                )\n                figure_content = parse_figure_paths(file_paths)\n                if not figure_content:\n                    continue\n                figures.append([page_number, figure_content, figure_caption])\n\n            else:\n                if item_text and \"Table\" not in item_path and \"Figure\" not in item_path:\n                    texts[page_number].append(item_text)\n\n        # get figure caption using GPT-4V\n        figure_captions = generate_figure_captions(\n            self.vlm_endpoint,\n            [item[1] for item in figures],\n            self.max_figures_to_caption,\n        )\n        for item, caption in zip(figures, figure_captions):\n            # update figure caption\n            item[2] += \" \" + caption\n\n        # Wrap elements with Document\n        documents = []\n\n        # join plain text elements\n        for page_number, txts in texts.items():\n            documents.append(\n                Document(\n                    text=\"\\n\".join(txts),\n                    metadata={\n                        \"page_label\": page_number,\n                        \"file_name\": filename,\n                        \"file_path\": filepath,\n                        **(extra_info if extra_info else {}),\n                    },\n                )\n            )\n\n        # table elements\n        for page_number, table_content, table_caption in tables:\n            documents.append(\n                Document(\n                    text=table_content,\n                    metadata={\n                        \"table_origin\": table_content,\n                        \"type\": \"table\",\n                        \"page_label\": page_number,\n                        \"file_name\": filename,\n                        \"file_path\": filepath,\n                        **(extra_info if extra_info else {}),\n                    },\n                    metadata_template=\"\",\n                    metadata_seperator=\"\",\n                )\n            )\n\n        # figure elements\n        for page_number, figure_content, figure_caption in figures:\n            documents.append(\n                Document(\n                    text=figure_caption,\n                    metadata={\n                        \"image_origin\": figure_content,\n                        \"type\": \"image\",\n                        \"page_label\": page_number,\n                        \"file_name\": filename,\n                        \"file_path\": filepath,\n                        **(extra_info if extra_info else {}),\n                    },\n                    metadata_template=\"\",\n                    metadata_seperator=\"\",\n                )\n            )\n        return documents\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/loaders/azureai_document_intelligence_loader.py",
    "content": "import base64\nimport os\nfrom io import BytesIO\nfrom pathlib import Path\nfrom typing import Optional\n\nfrom PIL import Image\n\nfrom kotaemon.base import Document, Param\n\nfrom .base import BaseReader\nfrom .utils.adobe import generate_single_figure_caption\n\n\ndef crop_image(file_path: Path, bbox: list[float], page_number: int = 0) -> Image.Image:\n    \"\"\"Crop the image based on the bounding box\n\n    Args:\n        file_path (Path): path to the image file\n        bbox (list[float]): bounding box of the image (in percentage [x0, y0, x1, y1])\n        page_number (int, optional): page number of the image. Defaults to 0.\n\n    Returns:\n        Image.Image: cropped image\n    \"\"\"\n    left, upper, right, lower = bbox\n\n    left, right = min(left, right), max(left, right)\n    upper, lower = min(upper, lower), max(upper, lower)\n\n    img: Image.Image\n    suffix = file_path.suffix.lower()\n    if suffix == \".pdf\":\n        try:\n            import fitz\n        except ImportError:\n            raise ImportError(\"Please install PyMuPDF: 'pip install PyMuPDF'\")\n\n        doc = fitz.open(file_path)\n        page = doc.load_page(page_number)\n        pm = page.get_pixmap(dpi=150)\n        img = Image.frombytes(\"RGB\", [pm.width, pm.height], pm.samples)\n    elif suffix in [\".tif\", \".tiff\"]:\n        img = Image.open(file_path)\n        img.seek(page_number)\n    else:\n        img = Image.open(file_path)\n\n    return img.crop(\n        (\n            int(left * img.width),\n            int(upper * img.height),\n            int(right * img.width),\n            int(lower * img.height),\n        )\n    )\n\n\nclass AzureAIDocumentIntelligenceLoader(BaseReader):\n    \"\"\"Utilize Azure AI Document Intelligence to parse document\n\n    As of April 24, the supported file formats are: pdf, jpeg/jpg, png, bmp, tiff,\n    heif, docx, xlsx, pptx and html.\n    \"\"\"\n\n    _dependencies = [\"azure-ai-documentintelligence\", \"PyMuPDF\", \"Pillow\"]\n\n    endpoint: str = Param(\n        os.environ.get(\"AZUREAI_DOCUMENT_INTELLIGENT_ENDPOINT\", None),\n        help=\"Endpoint of Azure AI Document Intelligence\",\n    )\n    credential: str = Param(\n        os.environ.get(\"AZUREAI_DOCUMENT_INTELLIGENT_CREDENTIAL\", None),\n        help=\"Credential of Azure AI Document Intelligence\",\n    )\n    model: str = Param(\n        \"prebuilt-layout\",\n        help=(\n            \"Model to use for document analysis. Default is prebuilt-layout. \"\n            \"As of April 24, you can view the supported models [here]\"\n            \"(https://learn.microsoft.com/en-us/azure/ai-services/\"\n            \"document-intelligence/concept-model-overview?view=doc-intel-4.0.0\"\n            \"#model-analysis-features)\"\n        ),\n    )\n    output_content_format: str = Param(\n        \"markdown\",\n        help=\"Output content format. Can be 'markdown' or 'text'.Default is markdown\",\n    )\n    vlm_endpoint: str = Param(\n        help=(\n            \"Default VLM endpoint for figure captioning. If not provided, will not \"\n            \"caption the figures\"\n        )\n    )\n    figure_friendly_filetypes: list[str] = Param(\n        [\".pdf\", \".jpeg\", \".jpg\", \".png\", \".bmp\", \".tiff\", \".heif\", \".tif\"],\n        help=(\n            \"File types that we can reliably open and extract figures. \"\n            \"For files like .docx or .html, the visual layout may be different \"\n            \"when viewed from different tools, hence we cannot use Azure DI \"\n            \"location to extract figures.\"\n        ),\n    )\n    cache_dir: str = Param(\n        None,\n        help=\"Directory to cache the downloaded files. Default is None\",\n    )\n\n    @Param.auto(depends_on=[\"endpoint\", \"credential\"])\n    def client_(self):\n        try:\n            from azure.ai.documentintelligence import DocumentIntelligenceClient\n            from azure.core.credentials import AzureKeyCredential\n        except ImportError:\n            raise ImportError(\"Please install azure-ai-documentintelligence\")\n\n        return DocumentIntelligenceClient(\n            self.endpoint, AzureKeyCredential(self.credential)\n        )\n\n    def run(\n        self, file_path: str | Path, extra_info: Optional[dict] = None, **kwargs\n    ) -> list[Document]:\n        return self.load_data(Path(file_path), extra_info=extra_info, **kwargs)\n\n    def load_data(\n        self, file_path: Path, extra_info: Optional[dict] = None, **kwargs\n    ) -> list[Document]:\n        \"\"\"Extract the input file, allowing multi-modal extraction\"\"\"\n        metadata = extra_info or {}\n        file_name = Path(file_path)\n        with open(file_path, \"rb\") as fi:\n            poller = self.client_.begin_analyze_document(\n                self.model,\n                body=fi,\n                content_type=\"application/octet-stream\",\n                output_content_format=self.output_content_format,\n            )\n            result = poller.result()\n\n        # the total text content of the document in `output_content_format` format\n        text_content = result.content\n        removed_spans: list[dict] = []\n\n        # extract the figures\n        figures = []\n        for figure_desc in result.get(\"figures\", []):\n            if not self.vlm_endpoint:\n                continue\n            if file_path.suffix.lower() not in self.figure_friendly_filetypes:\n                continue\n\n            # read & crop the image\n            page_number = figure_desc[\"boundingRegions\"][0][\"pageNumber\"]\n            page_width = result.pages[page_number - 1][\"width\"]\n            page_height = result.pages[page_number - 1][\"height\"]\n            polygon = figure_desc[\"boundingRegions\"][0][\"polygon\"]\n            xs = [polygon[i] for i in range(0, len(polygon), 2)]\n            ys = [polygon[i] for i in range(1, len(polygon), 2)]\n            bbox = [\n                min(xs) / page_width,\n                min(ys) / page_height,\n                max(xs) / page_width,\n                max(ys) / page_height,\n            ]\n            img = crop_image(file_path, bbox, page_number - 1)\n\n            # convert the image into base64\n            img_bytes = BytesIO()\n            img.save(img_bytes, format=\"PNG\")\n            img_base64 = base64.b64encode(img_bytes.getvalue()).decode(\"utf-8\")\n            img_base64 = f\"data:image/png;base64,{img_base64}\"\n\n            # caption the image\n            caption = generate_single_figure_caption(\n                figure=img_base64, vlm_endpoint=self.vlm_endpoint\n            )\n\n            # store the image into document\n            figure_metadata = {\n                \"image_origin\": img_base64,\n                \"type\": \"image\",\n                \"page_label\": page_number,\n            }\n            figure_metadata.update(metadata)\n\n            figures.append(\n                Document(\n                    text=caption,\n                    metadata=figure_metadata,\n                )\n            )\n            removed_spans += figure_desc[\"spans\"]\n\n        # extract the tables\n        tables = []\n        for table_desc in result.get(\"tables\", []):\n            if not table_desc[\"spans\"]:\n                continue\n\n            # convert the tables into markdown format\n            boundingRegions = table_desc[\"boundingRegions\"]\n            if boundingRegions:\n                page_number = boundingRegions[0][\"pageNumber\"]\n            else:\n                page_number = 1\n\n            # store the tables into document\n            offset = table_desc[\"spans\"][0][\"offset\"]\n            length = table_desc[\"spans\"][0][\"length\"]\n            table_metadata = {\n                \"type\": \"table\",\n                \"page_label\": page_number,\n                \"table_origin\": text_content[offset : offset + length],\n            }\n            table_metadata.update(metadata)\n\n            tables.append(\n                Document(\n                    text=text_content[offset : offset + length],\n                    metadata=table_metadata,\n                )\n            )\n            removed_spans += table_desc[\"spans\"]\n        # save the text content into markdown format\n        if self.cache_dir is not None:\n            with open(\n                Path(self.cache_dir) / f\"{file_name.stem}.md\", \"w\", encoding=\"utf-8\"\n            ) as f:\n                f.write(text_content)\n\n        removed_spans = sorted(removed_spans, key=lambda x: x[\"offset\"], reverse=True)\n        for span in removed_spans:\n            text_content = (\n                text_content[: span[\"offset\"]]\n                + text_content[span[\"offset\"] + span[\"length\"] :]\n            )\n\n        return [Document(content=text_content, metadata=metadata)] + figures + tables\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/loaders/base.py",
    "content": "from pathlib import Path\nfrom typing import TYPE_CHECKING, Any, List, Type, Union\n\nfrom kotaemon.base import BaseComponent, Document\n\nif TYPE_CHECKING:\n    from llama_index.core.readers.base import BaseReader as LIBaseReader\n\n\nclass BaseReader(BaseComponent):\n    \"\"\"The base class for all readers\"\"\"\n\n    ...\n\n\nclass AutoReader(BaseReader):\n    \"\"\"General auto reader for a variety of files. (based on llama-hub)\"\"\"\n\n    def __init__(self, reader_type: Union[str, Type[\"LIBaseReader\"]]) -> None:\n        \"\"\"Init reader using string identifier or class name from llama-hub.\n\n        When a string is given, first attempts a direct import from\n        ``llama_index.readers.file`` (works in uv/pip-less venvs where the\n        package is already installed). Falls back to the deprecated\n        ``download_loader`` only if the direct import fails.\n        \"\"\"\n        import importlib\n\n        if isinstance(reader_type, str):\n            # Try direct import first — avoids pip-install side-effect of\n            # download_loader, which fails in venvs without pip (e.g. uv).\n            try:\n                module = importlib.import_module(\"llama_index.readers.file\")\n                reader_cls = getattr(module, reader_type)\n                self._reader = reader_cls()\n            except (ImportError, AttributeError):\n                from llama_index.core import download_loader\n\n                self._reader = download_loader(reader_type)()\n        else:\n            self._reader = reader_type()\n        super().__init__()\n\n    def load_data(self, file: Union[Path, str], **kwargs: Any) -> List[Document]:\n        documents = self._reader.load_data(file=file, **kwargs)\n\n        # convert Document to new base class from kotaemon\n        converted_documents = [Document.from_dict(doc.to_dict()) for doc in documents]\n        return converted_documents\n\n    def run(self, file: Union[Path, str], **kwargs: Any) -> List[Document]:\n        return self.load_data(file=file, **kwargs)\n\n\nclass LIReaderMixin(BaseComponent):\n    \"\"\"Base wrapper around llama-index reader\n\n    To use the LIBaseReader, you need to implement the _get_wrapped_class method to\n    return the relevant llama-index reader class that you want to wrap.\n\n    Example:\n\n        ```python\n        class DirectoryReader(LIBaseReader):\n            def _get_wrapped_class(self) -> Type[\"BaseReader\"]:\n                from llama_index import SimpleDirectoryReader\n\n                return SimpleDirectoryReader\n        ```\n    \"\"\"\n\n    def _get_wrapped_class(self) -> Type[\"LIBaseReader\"]:\n        raise NotImplementedError(\n            \"Please return the relevant llama-index class in in _get_wrapped_class\"\n        )\n\n    def __init__(self, *args, **kwargs):\n        self._reader_class = self._get_wrapped_class()\n        self._reader = self._reader_class(*args, **kwargs)\n        super().__init__()\n\n    def __setattr__(self, name: str, value: Any) -> None:\n        if name.startswith(\"_\"):\n            return super().__setattr__(name, value)\n\n        return setattr(self._reader, name, value)\n\n    def __getattr__(self, name: str) -> Any:\n        return getattr(self._reader, name)\n\n    def load_data(self, *args, **kwargs: Any) -> List[Document]:\n        documents = self._reader.load_data(*args, **kwargs)\n\n        # convert Document to new base class from kotaemon\n        converted_documents = [Document.from_dict(doc.to_dict()) for doc in documents]\n        return converted_documents\n\n    def run(self, *args, **kwargs: Any) -> List[Document]:\n        return self.load_data(*args, **kwargs)\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/loaders/composite_loader.py",
    "content": "from typing import Callable, List, Optional, Type\n\nfrom llama_index.core.readers.base import BaseReader as LIBaseReader\n\nfrom .base import BaseReader, LIReaderMixin\n\n\nclass DirectoryReader(LIReaderMixin, BaseReader):\n    \"\"\"Wrap around llama-index SimpleDirectoryReader\n\n    Args:\n        input_dir (str): Path to the directory.\n        input_files (List): List of file paths to read\n            (Optional; overrides input_dir, exclude)\n        exclude (List): glob of python file paths to exclude (Optional)\n        exclude_hidden (bool): Whether to exclude hidden files (dotfiles).\n        encoding (str): Encoding of the files.\n            Default is utf-8.\n        errors (str): how encoding and decoding errors are to be handled,\n              see https://docs.python.org/3/library/functions.html#open\n        recursive (bool): Whether to recursively search in subdirectories.\n            False by default.\n        filename_as_id (bool): Whether to use the filename as the document id.\n            False by default.\n        required_exts (Optional[List[str]]): List of required extensions.\n            Default is None.\n        file_extractor (Optional[Dict[str, BaseReader]]): A mapping of file\n            extension to a BaseReader class that specifies how to convert that file\n            to text. If not specified, use default from DEFAULT_FILE_READER_CLS.\n        num_files_limit (Optional[int]): Maximum number of files to read.\n            Default is None.\n        file_metadata (Optional[Callable[str, Dict]]): A function that takes\n            in a filename and returns a Dict of metadata for the Document.\n            Default is None.\n    \"\"\"\n\n    input_dir: Optional[str] = None\n    input_files: Optional[List] = None\n    exclude: Optional[List] = None\n    exclude_hidden: bool = True\n    errors: str = \"ignore\"\n    recursive: bool = False\n    encoding: str = \"utf-8\"\n    filename_as_id: bool = False\n    required_exts: Optional[list[str]] = None\n    file_extractor: Optional[dict[str, \"LIBaseReader\"]] = None\n    num_files_limit: Optional[int] = None\n    file_metadata: Optional[Callable[[str], dict]] = None\n\n    def _get_wrapped_class(self) -> Type[\"LIBaseReader\"]:\n        from llama_index.core import SimpleDirectoryReader\n\n        return SimpleDirectoryReader\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/loaders/docling_loader.py",
    "content": "import base64\nfrom collections import defaultdict\nfrom io import BytesIO\nfrom pathlib import Path\nfrom typing import List, Optional\n\nfrom kotaemon.base import Document, Param\n\nfrom .azureai_document_intelligence_loader import crop_image\nfrom .base import BaseReader\nfrom .utils.adobe import generate_single_figure_caption, make_markdown_table\n\n\nclass DoclingReader(BaseReader):\n    \"\"\"Using Docling to extract document structure and content\"\"\"\n\n    _dependencies = [\"docling\"]\n\n    vlm_endpoint: str = Param(\n        help=(\n            \"Default VLM endpoint for figure captioning. \"\n            \"If not provided, will not caption the figures\"\n        )\n    )\n\n    max_figure_to_caption: int = Param(\n        100,\n        help=(\n            \"The maximum number of figures to caption. \"\n            \"The rest will be indexed without captions.\"\n        ),\n    )\n\n    figure_friendly_filetypes: list[str] = Param(\n        [\".pdf\", \".jpeg\", \".jpg\", \".png\", \".bmp\", \".tiff\", \".heif\", \".tif\"],\n        help=(\n            \"File types that we can reliably open and extract figures. \"\n            \"For files like .docx or .html, the visual layout may be different \"\n            \"when viewed from different tools, hence we cannot use Azure DI location \"\n            \"to extract figures.\"\n        ),\n    )\n\n    @Param.auto(cache=True)\n    def converter_(self):\n        try:\n            from docling.document_converter import DocumentConverter\n        except ImportError:\n            raise ImportError(\"Please install docling: 'pip install docling'\")\n\n        return DocumentConverter()\n\n    def run(\n        self, file_path: str | Path, extra_info: Optional[dict] = None, **kwargs\n    ) -> List[Document]:\n        return self.load_data(file_path, extra_info, **kwargs)\n\n    def load_data(\n        self, file_path: str | Path, extra_info: Optional[dict] = None, **kwargs\n    ) -> List[Document]:\n        \"\"\"Extract the input file, allowing multi-modal extraction\"\"\"\n\n        metadata = extra_info or {}\n\n        result = self.converter_.convert(file_path)\n        result_dict = result.document.export_to_dict()\n\n        file_path = Path(file_path)\n        file_name = file_path.name\n\n        # extract the figures\n        figures = []\n        gen_caption_count = 0\n        for figure_obj in result_dict.get(\"pictures\", []):\n            if not self.vlm_endpoint:\n                continue\n            if file_path.suffix.lower() not in self.figure_friendly_filetypes:\n                continue\n\n            # retrieve extractive captions provided by docling\n            caption_refs = [caption[\"$ref\"] for caption in figure_obj[\"captions\"]]\n            extractive_captions = []\n            for caption_ref in caption_refs:\n                text_id = caption_ref.split(\"/\")[-1]\n                try:\n                    caption_text = result_dict[\"texts\"][int(text_id)][\"text\"]\n                    extractive_captions.append(caption_text)\n                except (ValueError, TypeError, IndexError) as e:\n                    print(e)\n                    continue\n\n            # read & crop image\n            page_number = figure_obj[\"prov\"][0][\"page_no\"]\n\n            try:\n                page_number_text = str(page_number)\n                page_width = result_dict[\"pages\"][page_number_text][\"size\"][\"width\"]\n                page_height = result_dict[\"pages\"][page_number_text][\"size\"][\"height\"]\n\n                bbox_obj = figure_obj[\"prov\"][0][\"bbox\"]\n                bbox: list[float] = [\n                    bbox_obj[\"l\"],\n                    bbox_obj[\"t\"],\n                    bbox_obj[\"r\"],\n                    bbox_obj[\"b\"],\n                ]\n                if bbox_obj[\"coord_origin\"] == \"BOTTOMLEFT\":\n                    bbox = self._convert_bbox_bl_tl(bbox, page_width, page_height)\n\n                img = crop_image(file_path, bbox, page_number - 1)\n            except KeyError as e:\n                print(e, list(result_dict[\"pages\"].keys()))\n                continue\n\n            # convert img to base64\n            img_bytes = BytesIO()\n            img.save(img_bytes, format=\"PNG\")\n            img_base64 = base64.b64encode(img_bytes.getvalue()).decode(\"utf-8\")\n            img_base64 = f\"data:image/png;base64,{img_base64}\"\n\n            # generate the generative caption\n            if gen_caption_count >= self.max_figure_to_caption:\n                gen_caption = \"\"\n            else:\n                gen_caption_count += 1\n                gen_caption = generate_single_figure_caption(\n                    figure=img_base64, vlm_endpoint=self.vlm_endpoint\n                )\n\n            # join the extractive and generative captions\n            caption = \"\\n\".join(extractive_captions + [gen_caption])\n\n            # store the image into document\n            figure_metadata = {\n                \"image_origin\": img_base64,\n                \"type\": \"image\",\n                \"page_label\": page_number,\n                \"file_name\": file_name,\n                \"file_path\": file_path,\n            }\n            figure_metadata.update(metadata)\n\n            figures.append(\n                Document(\n                    text=caption,\n                    metadata=figure_metadata,\n                )\n            )\n\n        # extract the tables\n        tables = []\n        for table_obj in result_dict.get(\"tables\", []):\n            # convert the tables into markdown format\n            markdown_table = self._parse_table(table_obj)\n            caption_refs = [caption[\"$ref\"] for caption in table_obj[\"captions\"]]\n\n            extractive_captions = []\n            for caption_ref in caption_refs:\n                text_id = caption_ref.split(\"/\")[-1]\n                try:\n                    caption_text = result_dict[\"texts\"][int(text_id)][\"text\"]\n                    extractive_captions.append(caption_text)\n                except (ValueError, TypeError, IndexError) as e:\n                    print(e)\n                    continue\n            # join the extractive and generative captions\n            caption = \"\\n\".join(extractive_captions)\n            markdown_table = f\"{caption}\\n{markdown_table}\"\n\n            page_number = table_obj[\"prov\"][0].get(\"page_no\", 1)\n\n            table_metadata = {\n                \"type\": \"table\",\n                \"page_label\": page_number,\n                \"table_origin\": markdown_table,\n                \"file_name\": file_name,\n                \"file_path\": file_path,\n            }\n            table_metadata.update(metadata)\n\n            tables.append(\n                Document(\n                    text=markdown_table,\n                    metadata=table_metadata,\n                )\n            )\n\n        # join plain text elements\n        texts = []\n        page_number_to_text = defaultdict(list)\n\n        for text_obj in result_dict[\"texts\"]:\n            page_number = text_obj[\"prov\"][0].get(\"page_no\", 1)\n            page_number_to_text[page_number].append(text_obj[\"text\"])\n\n        for page_number, txts in page_number_to_text.items():\n            texts.append(\n                Document(\n                    text=\"\\n\".join(txts),\n                    metadata={\n                        \"page_label\": page_number,\n                        \"file_name\": file_name,\n                        \"file_path\": file_path,\n                        **metadata,\n                    },\n                )\n            )\n\n        return texts + tables + figures\n\n    def _convert_bbox_bl_tl(\n        self, bbox: list[float], page_width: int, page_height: int\n    ) -> list[float]:\n        \"\"\"Convert bbox from bottom-left to top-left\"\"\"\n        x0, y0, x1, y1 = bbox\n        return [\n            x0 / page_width,\n            (page_height - y1) / page_height,\n            x1 / page_width,\n            (page_height - y0) / page_height,\n        ]\n\n    def _parse_table(self, table_obj: dict) -> str:\n        \"\"\"Convert docling table object to markdown table\"\"\"\n        table_as_list: List[List[str]] = []\n        grid = table_obj[\"data\"][\"grid\"]\n        for row in grid:\n            table_as_list.append([])\n            for cell in row:\n                table_as_list[-1].append(cell[\"text\"])\n\n        return make_markdown_table(table_as_list)\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/loaders/docx_loader.py",
    "content": "import unicodedata\nfrom pathlib import Path\nfrom typing import List, Optional\n\nimport pandas as pd\nfrom llama_index.core.readers.base import BaseReader\n\nfrom kotaemon.base import Document\n\n\nclass DocxReader(BaseReader):\n    \"\"\"Read Docx files that respect table, using python-docx library\n\n    Reader behavior:\n        - All paragraphs are extracted as a Document\n        - Each table is extracted as a Document, rendered as a CSV string\n        - The output is a list of Documents, concatenating the above\n        (tables + paragraphs)\n    \"\"\"\n\n    def __init__(self, *args, **kwargs):\n        try:\n            import docx  # noqa\n        except ImportError:\n            raise ImportError(\n                \"docx is not installed. \"\n                \"Please install it using `pip install python-docx`\"\n            )\n\n    def _load_single_table(self, table) -> List[List[str]]:\n        \"\"\"Extract content from tables. Return a list of columns: list[str]\n        Some merged cells will share duplicated content.\n        \"\"\"\n        n_row = len(table.rows)\n        n_col = len(table.columns)\n\n        arrays = [[\"\" for _ in range(n_row)] for _ in range(n_col)]\n\n        for i, row in enumerate(table.rows):\n            for j, cell in enumerate(row.cells):\n                arrays[j][i] = cell.text\n\n        return arrays\n\n    def load_data(\n        self, file_path: Path, extra_info: Optional[dict] = None, **kwargs\n    ) -> List[Document]:\n        \"\"\"Load data using Docx reader\n\n        Args:\n            file_path (Path): Path to .docx file\n\n        Returns:\n            List[Document]: list of documents extracted from the HTML file\n        \"\"\"\n        import docx\n\n        file_path = Path(file_path).resolve()\n\n        doc = docx.Document(str(file_path))\n        all_text = \"\\n\".join(\n            [unicodedata.normalize(\"NFKC\", p.text) for p in doc.paragraphs]\n        )\n        pages = [all_text]  # 1 page only\n\n        tables = []\n        for t in doc.tables:\n            # return list of columns: list of string\n            arrays = self._load_single_table(t)\n\n            tables.append(pd.DataFrame({a[0]: a[1:] for a in arrays}))\n\n        extra_info = extra_info or {}\n\n        # create output Document with metadata from table\n        documents = [\n            Document(\n                text=table.to_csv(\n                    index=False\n                ).strip(),  # strip_special_chars_markdown()\n                metadata={\n                    \"table_origin\": table.to_csv(index=False),\n                    \"type\": \"table\",\n                    **extra_info,\n                },\n                metadata_template=\"\",\n                metadata_seperator=\"\",\n            )\n            for table in tables  # page_id\n        ]\n\n        # create Document from non-table text\n        documents.extend(\n            [\n                Document(\n                    text=non_table_text.strip(),\n                    metadata={\"page_label\": 1, **extra_info},\n                )\n                for _, non_table_text in enumerate(pages)\n            ]\n        )\n\n        return documents\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/loaders/excel_loader.py",
    "content": "\"\"\"Pandas Excel reader.\n\nPandas parser for .xlsx files.\n\n\"\"\"\nfrom pathlib import Path\nfrom typing import Any, List, Optional, Union\n\nfrom llama_index.core.readers.base import BaseReader\n\nfrom kotaemon.base import Document\n\n\nclass PandasExcelReader(BaseReader):\n    r\"\"\"Pandas-based CSV parser.\n\n    Parses CSVs using the separator detection from Pandas `read_csv` function.\n    If special parameters are required, use the `pandas_config` dict.\n\n    Args:\n\n        pandas_config (dict): Options for the `pandas.read_excel` function call.\n            Refer to https://pandas.pydata.org/docs/reference/api/pandas.read_excel.html\n            for more information. Set to empty dict by default,\n            this means defaults will be used.\n\n    \"\"\"\n\n    def __init__(\n        self,\n        *args: Any,\n        pandas_config: Optional[dict] = None,\n        row_joiner: str = \"\\n\",\n        col_joiner: str = \" \",\n        **kwargs: Any,\n    ) -> None:\n        \"\"\"Init params.\"\"\"\n        super().__init__(*args, **kwargs)\n        self._pandas_config = pandas_config or {}\n        self._row_joiner = row_joiner if row_joiner else \"\\n\"\n        self._col_joiner = col_joiner if col_joiner else \" \"\n\n    def load_data(\n        self,\n        file: Path,\n        include_sheetname: bool = False,\n        sheet_name: Optional[Union[str, int, list]] = None,\n        extra_info: Optional[dict] = None,\n        **kwargs,\n    ) -> List[Document]:\n        \"\"\"Parse file and extract values from a specific column.\n\n        Args:\n            file (Path): The path to the Excel file to read.\n            include_sheetname (bool): Whether to include the sheet name in the output.\n            sheet_name (Union[str, int, None]): The specific sheet to read from,\n                default is None which reads all sheets.\n\n        Returns:\n            List[Document]: A list of`Document objects containing the\n                values from the specified column in the Excel file.\n        \"\"\"\n        import itertools\n\n        try:\n            import pandas as pd\n        except ImportError:\n            raise ImportError(\n                \"install pandas using `pip3 install pandas` to use this loader\"\n            )\n\n        if sheet_name is not None:\n            sheet_name = (\n                [sheet_name] if not isinstance(sheet_name, list) else sheet_name\n            )\n\n        dfs = pd.read_excel(file, sheet_name=sheet_name, **self._pandas_config)\n        sheet_names = dfs.keys()\n        df_sheets = []\n\n        for key in sheet_names:\n            sheet = []\n            if include_sheetname:\n                sheet.append([key])\n            dfs[key] = dfs[key].dropna(axis=0, how=\"all\")\n            dfs[key] = dfs[key].dropna(axis=0, how=\"all\")\n            dfs[key].fillna(\"\", inplace=True)\n            sheet.extend(dfs[key].values.astype(str).tolist())\n            df_sheets.append(sheet)\n\n        text_list = list(\n            itertools.chain.from_iterable(df_sheets)\n        )  # flatten list of lists\n\n        output = [\n            Document(\n                text=self._row_joiner.join(\n                    self._col_joiner.join(sublist) for sublist in text_list\n                ),\n                metadata=extra_info or {},\n            )\n        ]\n\n        return output\n\n\nclass ExcelReader(BaseReader):\n    r\"\"\"Spreadsheet exporter respecting multiple worksheets\n\n    Parses CSVs using the separator detection from Pandas `read_csv` function.\n    If special parameters are required, use the `pandas_config` dict.\n\n    Args:\n\n        pandas_config (dict): Options for the `pandas.read_excel` function call.\n            Refer to https://pandas.pydata.org/docs/reference/api/pandas.read_excel.html\n            for more information. Set to empty dict by default,\n            this means defaults will be used.\n\n    \"\"\"\n\n    def __init__(\n        self,\n        *args: Any,\n        pandas_config: Optional[dict] = None,\n        row_joiner: str = \"\\n\",\n        col_joiner: str = \" \",\n        **kwargs: Any,\n    ) -> None:\n        \"\"\"Init params.\"\"\"\n        super().__init__(*args, **kwargs)\n        self._pandas_config = pandas_config or {}\n        self._row_joiner = row_joiner if row_joiner else \"\\n\"\n        self._col_joiner = col_joiner if col_joiner else \" \"\n\n    def load_data(\n        self,\n        file: Path,\n        include_sheetname: bool = True,\n        sheet_name: Optional[Union[str, int, list]] = None,\n        extra_info: Optional[dict] = None,\n        **kwargs,\n    ) -> List[Document]:\n        \"\"\"Parse file and extract values from a specific column.\n\n        Args:\n            file (Path): The path to the Excel file to read.\n            include_sheetname (bool): Whether to include the sheet name in the output.\n            sheet_name (Union[str, int, None]): The specific sheet to read from,\n                default is None which reads all sheets.\n\n        Returns:\n            List[Document]: A list of`Document objects containing the\n                values from the specified column in the Excel file.\n        \"\"\"\n\n        try:\n            import pandas as pd\n        except ImportError:\n            raise ImportError(\n                \"install pandas using `pip3 install pandas` to use this loader\"\n            )\n\n        if sheet_name is not None:\n            sheet_name = (\n                [sheet_name] if not isinstance(sheet_name, list) else sheet_name\n            )\n\n        # clean up input\n        file = Path(file)\n        extra_info = extra_info or {}\n\n        dfs = pd.read_excel(file, sheet_name=sheet_name, **self._pandas_config)\n        sheet_names = dfs.keys()\n        output = []\n\n        for idx, key in enumerate(sheet_names):\n            dfs[key] = dfs[key].dropna(axis=0, how=\"all\")\n            dfs[key] = dfs[key].dropna(axis=0, how=\"all\")\n            dfs[key] = dfs[key].astype(\"object\")\n            dfs[key].fillna(\"\", inplace=True)\n\n            rows = dfs[key].values.astype(str).tolist()\n            content = self._row_joiner.join(\n                self._col_joiner.join(row).strip() for row in rows\n            ).strip()\n            if include_sheetname:\n                content = f\"(Sheet {key} of file {file.name})\\n{content}\"\n            metadata = {\"page_label\": idx + 1, \"sheet_name\": key, **extra_info}\n            output.append(Document(text=content, metadata=metadata))\n\n        return output\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/loaders/html_loader.py",
    "content": "import email\nfrom pathlib import Path\nfrom typing import Optional\n\nfrom llama_index.core.readers.base import BaseReader\nfrom theflow.settings import settings as flowsettings\n\nfrom kotaemon.base import Document\n\n\nclass HtmlReader(BaseReader):\n    \"\"\"Reader HTML usimg html2text\n\n    Reader behavior:\n        - HTML is read with html2text.\n        - All of the texts will be split by `page_break_pattern`\n        - Each page is extracted as a Document\n        - The output is a list of Documents\n\n    Args:\n        page_break_pattern (str): Pattern to split the HTML into pages\n    \"\"\"\n\n    def __init__(self, page_break_pattern: Optional[str] = None, *args, **kwargs):\n        try:\n            import html2text  # noqa\n        except ImportError:\n            raise ImportError(\n                \"html2text is not installed. \"\n                \"Please install it using `pip install html2text`\"\n            )\n\n        self._page_break_pattern: Optional[str] = page_break_pattern\n        super().__init__()\n\n    def load_data(\n        self, file_path: Path | str, extra_info: Optional[dict] = None, **kwargs\n    ) -> list[Document]:\n        \"\"\"Load data using Html reader\n\n        Args:\n            file_path: path to HTML file\n            extra_info: extra information passed to this reader during extracting data\n\n        Returns:\n            list[Document]: list of documents extracted from the HTML file\n        \"\"\"\n        import html2text\n\n        file_path = Path(file_path).resolve()\n\n        with file_path.open(\"r\") as f:\n            html_text = \"\".join([line[:-1] for line in f.readlines()])\n\n        # read HTML\n        all_text = html2text.html2text(html_text)\n        pages = (\n            all_text.split(self._page_break_pattern)\n            if self._page_break_pattern\n            else [all_text]\n        )\n\n        extra_info = extra_info or {}\n\n        # create Document from non-table text\n        documents = [\n            Document(\n                text=page.strip(),\n                metadata={\"page_label\": page_id + 1, **extra_info},\n            )\n            for page_id, page in enumerate(pages)\n        ]\n\n        return documents\n\n\nclass MhtmlReader(BaseReader):\n    \"\"\"Parse `MHTML` files with `BeautifulSoup`.\"\"\"\n\n    def __init__(\n        self,\n        cache_dir: Optional[str] = getattr(\n            flowsettings, \"KH_MARKDOWN_OUTPUT_DIR\", None\n        ),\n        open_encoding: Optional[str] = None,\n        bs_kwargs: Optional[dict] = None,\n        get_text_separator: str = \"\",\n    ) -> None:\n        \"\"\"initialize with path, and optionally, file encoding to use, and any kwargs\n        to pass to the BeautifulSoup object.\n\n        Args:\n            cache_dir: Path for markdwon format.\n            file_path: Path to file to load.\n            open_encoding: The encoding to use when opening the file.\n            bs_kwargs: Any kwargs to pass to the BeautifulSoup object.\n            get_text_separator: The separator to use when getting the text\n                from the soup.\n        \"\"\"\n        try:\n            import bs4  # noqa:F401\n        except ImportError:\n            raise ImportError(\n                \"beautifulsoup4 package not found, please install it with \"\n                \"`pip install beautifulsoup4`\"\n            )\n\n        self.cache_dir = cache_dir\n        self.open_encoding = open_encoding\n        if bs_kwargs is None:\n            bs_kwargs = {\"features\": \"lxml\"}\n        self.bs_kwargs = bs_kwargs\n        self.get_text_separator = get_text_separator\n\n    def load_data(\n        self, file_path: Path | str, extra_info: Optional[dict] = None, **kwargs\n    ) -> list[Document]:\n        \"\"\"Load MHTML document into document objects.\"\"\"\n\n        from bs4 import BeautifulSoup\n\n        extra_info = extra_info or {}\n        metadata: dict = extra_info\n        page = []\n        file_name = Path(file_path)\n        with open(file_path, \"r\", encoding=self.open_encoding) as f:\n            message = email.message_from_string(f.read())\n            parts = message.get_payload()\n\n            if not isinstance(parts, list):\n                parts = [message]\n\n            for part in parts:\n                if part.get_content_type() == \"text/html\":\n                    html = part.get_payload(decode=True).decode()\n\n                    soup = BeautifulSoup(html, **self.bs_kwargs)\n                    text = soup.get_text(self.get_text_separator)\n\n                    if soup.title:\n                        title = str(soup.title.string)\n                    else:\n                        title = \"\"\n\n                    metadata = {\n                        \"source\": str(file_path),\n                        \"title\": title,\n                        **extra_info,\n                    }\n                    lines = [line for line in text.split(\"\\n\") if line.strip()]\n                    text = \"\\n\\n\".join(lines)\n                    if text:\n                        page.append(text)\n        # save the page into markdown format\n        print(self.cache_dir)\n        if self.cache_dir is not None:\n            print(Path(self.cache_dir) / f\"{file_name.stem}.md\")\n            with open(Path(self.cache_dir) / f\"{file_name.stem}.md\", \"w\") as f:\n                f.write(page[0])\n\n        return [Document(text=\"\\n\\n\".join(page), metadata=metadata)]\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/loaders/mathpix_loader.py",
    "content": "import json\nimport re\nimport time\nfrom pathlib import Path\nfrom typing import Any, Dict, Generator, List, Optional, Union\n\nimport requests\nfrom langchain.utils import get_from_dict_or_env\nfrom llama_index.core.readers.base import BaseReader\n\nfrom kotaemon.base import Document\n\nfrom .utils.table import strip_special_chars_markdown\n\n\n# MathpixPDFLoader implementation taken largely from Daniel Gross's:\n# https://gist.github.com/danielgross/3ab4104e14faccc12b49200843adab21\nclass MathpixPDFReader(BaseReader):\n    \"\"\"Load `PDF` files using `Mathpix` service.\"\"\"\n\n    def __init__(\n        self,\n        processed_file_format: str = \"md\",\n        max_wait_time_seconds: int = 900,\n        should_clean_pdf: bool = True,\n        **kwargs: Any,\n    ) -> None:\n        \"\"\"Initialize with a file path.\n\n        Args:\n            processed_file_format: a format of the processed file. Default is   \"mmd\".\n            max_wait_time_seconds: a maximum time to wait for the response from\n                the server. Default is 500.\n            should_clean_pdf: a flag to clean the PDF file. Default is False.\n            **kwargs: additional keyword arguments.\n        \"\"\"\n        self.mathpix_api_key = get_from_dict_or_env(\n            kwargs, \"mathpix_api_key\", \"MATHPIX_API_KEY\", default=\"empty\"\n        )\n        self.mathpix_api_id = get_from_dict_or_env(\n            kwargs, \"mathpix_api_id\", \"MATHPIX_API_ID\", default=\"empty\"\n        )\n        self.processed_file_format = processed_file_format\n        self.max_wait_time_seconds = max_wait_time_seconds\n        self.should_clean_pdf = should_clean_pdf\n        super().__init__()\n\n    @property\n    def _mathpix_headers(self) -> Dict[str, str]:\n        return {\"app_id\": self.mathpix_api_id, \"app_key\": self.mathpix_api_key}\n\n    @property\n    def url(self) -> str:\n        return \"https://api.mathpix.com/v3/pdf\"\n\n    @property\n    def data(self) -> dict:\n        options = {\n            \"conversion_formats\": {self.processed_file_format: True},\n            \"enable_tables_fallback\": True,\n        }\n        return {\"options_json\": json.dumps(options)}\n\n    def send_pdf(self, file_path) -> str:\n        with open(file_path, \"rb\") as f:\n            files = {\"file\": f}\n            response = requests.post(\n                self.url, headers=self._mathpix_headers, files=files, data=self.data\n            )\n        response_data = response.json()\n        if \"pdf_id\" in response_data:\n            pdf_id = response_data[\"pdf_id\"]\n            return pdf_id\n        else:\n            raise ValueError(\"Unable to send PDF to Mathpix.\")\n\n    def wait_for_processing(self, pdf_id: str) -> None:\n        \"\"\"Wait for processing to complete.\n\n        Args:\n            pdf_id: a PDF id.\n\n        Returns: None\n        \"\"\"\n        url = self.url + \"/\" + pdf_id\n        for _ in range(0, self.max_wait_time_seconds, 5):\n            response = requests.get(url, headers=self._mathpix_headers)\n            response_data = response.json()\n            status = response_data.get(\"status\", None)\n            print(\n                f\"Processing status: {status},\"\n                f\"Progress: {response_data.get('percent_done', 0)}%\"\n            )\n\n            if status == \"completed\":\n                return\n            elif status == \"error\":\n                raise ValueError(f\"Mathpix processing error: {response_data}\")\n            elif status in [\n                \"split\",\n                \"processing\",\n            ]:  # Add handling for processing states\n                time.sleep(5)\n                continue\n            else:\n                print(f\"Unknown status: {response_data}\")\n                time.sleep(5)\n\n        raise TimeoutError(\n            f\"Processing did not complete within {self.max_wait_time_seconds} seconds\"\n        )\n\n    def get_processed_pdf(self, pdf_id: str) -> str:\n        self.wait_for_processing(pdf_id)\n        url = f\"{self.url}/{pdf_id}.{self.processed_file_format}\"\n        response = requests.get(url, headers=self._mathpix_headers)\n        if response.status_code != 200:\n            raise ValueError(f\"Failed to get processed PDF: {response.text}\")\n        content = response.content.decode(\"utf-8\")\n        print(f\"Retrieved content length: {len(content)}\")  # Debug print\n        return content\n\n    def clean_pdf(self, contents: str) -> str:\n        \"\"\"Clean the PDF file.\n\n        Args:\n            contents: a PDF file contents.\n\n        Returns:\n\n        \"\"\"\n        contents = \"\\n\".join(\n            [line for line in contents.split(\"\\n\") if not line.startswith(\"![]\")]\n        )\n        # replace \\section{Title} with # Title\n        contents = contents.replace(\"\\\\section{\", \"# \")\n        # replace the \"\\\" slash that Mathpix adds to escape $, %, (, etc.\n\n        # http:// or https:// followed by anything but a closing paren\n        url_regex = \"http[s]?://[^)]+\"\n        markup_regex = r\"\\[]\\(\\s*({0})\\s*\\)\".format(url_regex)\n        contents = (\n            contents.replace(r\"\\$\", \"$\")\n            .replace(r\"\\%\", \"%\")\n            .replace(r\"\\(\", \"(\")\n            .replace(r\"\\)\", \")\")\n            .replace(\"$\\\\begin{array}\", \"\")\n            .replace(\"\\\\end{array}$\", \"\")\n            .replace(\"\\\\\\\\\", \"\")\n            .replace(\"\\\\text\", \"\")\n            .replace(\"}\", \"\")\n            .replace(\"{\", \"\")\n            .replace(\"\\\\mathrm\", \"\")\n        )\n        contents = re.sub(markup_regex, \"\", contents)\n        return contents\n\n    def parse_markdown_text_to_tables(\n        self, content: str\n    ) -> tuple[list[tuple[int, str]], list[tuple[int, str]]]:\n        \"\"\"Parse markdown text to get tables and texts separately.\n\n        Returns:\n            Tuple of (tables, texts) where each is a list of (page_num, content) tuples\n        \"\"\"\n        print(\"Starting markdown parsing...\")\n        print(f\"Content length: {len(content)}\")\n\n        # Split by page markers if present\n        pages = re.split(r\"(?m)^# Page \\d+\\n\", content)\n\n        tables: list[tuple[int, str]] = []\n        texts: list[tuple[int, str]] = []\n\n        for page_num, page_content in enumerate(pages, 1):\n            if not page_content.strip():\n                continue\n\n            # Extract tables from the page\n            table_matches = re.findall(r\"(\\|[^\\n]+\\|(?:\\n\\|[^\\n]+\\|)*)\", page_content)\n            if table_matches:\n                for table in table_matches:\n                    tables.append(\n                        (page_num, table.strip())\n                    )  # Store as tuple with page number\n                # Remove tables from page content\n                page_content = re.sub(\n                    r\"(\\|[^\\n]+\\|(?:\\n\\|[^\\n]+\\|)*)\", \"\", page_content\n                )\n\n            # Split remaining content into meaningful chunks\n            chunks = re.split(r\"\\n\\s*\\n\", page_content)\n            for chunk in chunks:\n                if chunk.strip():\n                    texts.append(\n                        (page_num, chunk.strip())\n                    )  # Store as tuple with page number\n\n        print(f\"Found {len(tables)} tables and {len(texts)} text sections\")\n        return tables, texts\n\n    def load_data(\n        self,\n        file: Union[str, List[str], Path],\n        extra_info: Optional[Dict] = None,\n        **load_kwargs: Any,\n    ) -> List[Document]:\n        \"\"\"Load data from file path.\"\"\"\n        file_path = Path(file) if isinstance(file, str) else file\n\n        if \"response_content\" in load_kwargs:\n            content = load_kwargs[\"response_content\"]\n        else:\n            pdf_id = self.send_pdf(file_path)\n            content = self.get_processed_pdf(pdf_id)\n\n        if self.should_clean_pdf:\n            content = self.clean_pdf(content)\n\n        tables, texts = self.parse_markdown_text_to_tables(content)\n        documents = []\n\n        # Handle tables\n        for page_num, table_content in tables:\n            text = strip_special_chars_markdown(table_content)\n            metadata = {\n                \"table_origin\": table_content,\n                \"type\": \"table\",\n                \"page_label\": page_num,\n                \"page_number\": page_num,\n            }\n            if extra_info:\n                metadata.update(extra_info)\n            documents.append(\n                Document(\n                    text=text,\n                    metadata=metadata,\n                    metadata_template=\"\",\n                    metadata_seperator=\"\",\n                )\n            )\n\n        # Handle text sections\n        for page_num, text_content in texts:\n            if not text_content.strip():\n                continue\n            metadata = {\n                \"source\": str(file_path),\n                \"type\": \"text\",\n                \"page_label\": page_num,\n                \"page_number\": page_num,\n            }\n            if extra_info:\n                metadata.update(extra_info)\n            documents.append(Document(text=text_content, metadata=metadata))\n\n        # Fallback if no content was parsed\n        if not documents and content.strip():\n            metadata = {\n                \"source\": str(file_path),\n                \"type\": \"text\",\n                \"page_label\": 1,\n                \"page_number\": 1,\n            }\n            if extra_info:\n                metadata.update(extra_info)\n            documents.append(Document(text=content.strip(), metadata=metadata))\n\n        return documents\n\n    def lazy_load_data(\n        self,\n        file: Union[str, List[str], Path],\n        extra_info: Optional[Dict] = None,\n        **load_kwargs: Any,\n    ) -> Generator[Document, None, None]:\n        \"\"\"Lazy load data from file path.\"\"\"\n        file_path = Path(file) if isinstance(file, str) else file\n\n        if \"response_content\" in load_kwargs:\n            content = load_kwargs[\"response_content\"]\n        else:\n            pdf_id = self.send_pdf(file_path)\n            print(f\"PDF ID: {pdf_id}\")\n            content = self.get_processed_pdf(pdf_id)\n\n        if self.should_clean_pdf:\n            content = self.clean_pdf(content)\n\n        tables, texts = self.parse_markdown_text_to_tables(content)\n\n        # Handle tables\n        for page_num, table_content in tables:  # Changed variable name for clarity\n            text = strip_special_chars_markdown(table_content)  # Pass just the content\n            metadata = {\n                \"table_origin\": table_content,  # Use table_content here too\n                \"type\": \"table\",\n                \"page_label\": page_num,\n                \"page_number\": page_num,\n            }\n            if extra_info:\n                metadata.update(extra_info)\n            yield Document(\n                text=text,\n                metadata=metadata,\n                metadata_template=\"\",\n                metadata_seperator=\"\",\n            )\n\n        # Handle text sections\n        for page_num, text_content in texts:  # Changed variable name for clarity\n            if not text_content.strip():\n                continue\n            metadata = {\n                \"source\": str(file_path),\n                \"type\": \"text\",\n                \"page_label\": page_num,\n                \"page_number\": page_num,\n            }\n            if extra_info:\n                metadata.update(extra_info)\n            yield Document(\n                text=text_content, metadata=metadata\n            )  # Use text_content directly\n\n        # Fallback if no content was parsed\n        if not (tables or texts) and content.strip():\n            metadata = {\n                \"source\": str(file_path),\n                \"type\": \"text\",\n                \"page_label\": 1,\n                \"page_number\": 1,\n            }\n            if extra_info:\n                metadata.update(extra_info)\n            yield Document(text=content.strip(), metadata=metadata)\n\n        print(f\"Completed processing PDF: {file_path}\")\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/loaders/ocr_loader.py",
    "content": "import logging\nimport os\nfrom pathlib import Path\nfrom typing import List, Optional\nfrom uuid import uuid4\n\nimport requests\nfrom llama_index.core.readers.base import BaseReader\nfrom tenacity import after_log, retry, stop_after_attempt, wait_exponential\n\nfrom kotaemon.base import Document\n\nfrom .utils.pdf_ocr import parse_ocr_output, read_pdf_unstructured\nfrom .utils.table import strip_special_chars_markdown\n\nlogger = logging.getLogger(__name__)\n\nDEFAULT_OCR_ENDPOINT = \"http://127.0.0.1:8000/v2/ai/infer/\"\n\n\n@retry(\n    stop=stop_after_attempt(6),\n    wait=wait_exponential(multiplier=20, exp_base=2, min=1, max=1000),\n    after=after_log(logger, logging.WARNING),\n)\ndef tenacious_api_post(url, file_path, table_only, **kwargs):\n    with file_path.open(\"rb\") as content:\n        files = {\"input\": content}\n        data = {\"job_id\": uuid4(), \"table_only\": table_only}\n        resp = requests.post(url=url, files=files, data=data, **kwargs)\n        resp.raise_for_status()\n    return resp\n\n\nclass OCRReader(BaseReader):\n    \"\"\"Read PDF using OCR, with high focus on table extraction\n\n    Example:\n        ```python\n        >> from kotaemon.loaders import OCRReader\n        >> reader = OCRReader()\n        >> documents = reader.load_data(\"path/to/pdf\")\n        ```\n\n    Args:\n        endpoint: URL to FullOCR endpoint. If not provided, will look for\n            environment variable `OCR_READER_ENDPOINT` or use the default\n            `kotaemon.loaders.ocr_loader.DEFAULT_OCR_ENDPOINT`\n            (http://127.0.0.1:8000/v2/ai/infer/)\n        use_ocr: whether to use OCR to read text (e.g: from images, tables) in the PDF\n            If False, only the table and text within table cells will be extracted.\n    \"\"\"\n\n    def __init__(self, endpoint: Optional[str] = None, use_ocr=True):\n        \"\"\"Init the OCR reader with OCR endpoint (FullOCR pipeline)\"\"\"\n        super().__init__()\n        self.ocr_endpoint = endpoint or os.getenv(\n            \"OCR_READER_ENDPOINT\", DEFAULT_OCR_ENDPOINT\n        )\n        self.use_ocr = use_ocr\n\n    def load_data(\n        self, file_path: Path, extra_info: Optional[dict] = None, **kwargs\n    ) -> List[Document]:\n        \"\"\"Load data using OCR reader\n\n        Args:\n            file_path (Path): Path to PDF file\n            debug_path (Path): Path to store debug image output\n            artifact_path (Path): Path to OCR endpoints artifacts directory\n\n        Returns:\n            List[Document]: list of documents extracted from the PDF file\n        \"\"\"\n        file_path = Path(file_path).resolve()\n\n        # call the API from FullOCR endpoint\n        if \"response_content\" in kwargs:\n            # overriding response content if specified\n            ocr_results = kwargs[\"response_content\"]\n        else:\n            # call original API\n            resp = tenacious_api_post(\n                url=self.ocr_endpoint, file_path=file_path, table_only=not self.use_ocr\n            )\n            ocr_results = resp.json()[\"result\"]\n\n        debug_path = kwargs.pop(\"debug_path\", None)\n        artifact_path = kwargs.pop(\"artifact_path\", None)\n\n        # read PDF through normal reader (unstructured)\n        pdf_page_items = read_pdf_unstructured(file_path)\n        # merge PDF text output with OCR output\n        tables, texts = parse_ocr_output(\n            ocr_results,\n            pdf_page_items,\n            debug_path=debug_path,\n            artifact_path=artifact_path,\n        )\n        extra_info = extra_info or {}\n\n        # create output Document with metadata from table\n        documents = [\n            Document(\n                text=strip_special_chars_markdown(table_text),\n                metadata={\n                    \"table_origin\": table_text,\n                    \"type\": \"table\",\n                    \"page_label\": page_id + 1,\n                    **extra_info,\n                },\n                metadata_template=\"\",\n                metadata_seperator=\"\",\n            )\n            for page_id, table_text in tables\n        ]\n        # create Document from non-table text\n        documents.extend(\n            [\n                Document(\n                    text=non_table_text,\n                    metadata={\"page_label\": page_id + 1, **extra_info},\n                )\n                for page_id, non_table_text in texts\n            ]\n        )\n\n        return documents\n\n\nclass ImageReader(BaseReader):\n    \"\"\"Read PDF using OCR, with high focus on table extraction\n\n    Example:\n        ```python\n        >> from knowledgehub.loaders import OCRReader\n        >> reader = OCRReader()\n        >> documents = reader.load_data(\"path/to/pdf\")\n        ```\n\n    Args:\n        endpoint: URL to FullOCR endpoint. If not provided, will look for\n            environment variable `OCR_READER_ENDPOINT` or use the default\n            `knowledgehub.loaders.ocr_loader.DEFAULT_OCR_ENDPOINT`\n            (http://127.0.0.1:8000/v2/ai/infer/)\n        use_ocr: whether to use OCR to read text (e.g: from images, tables) in the PDF\n            If False, only the table and text within table cells will be extracted.\n    \"\"\"\n\n    def __init__(self, endpoint: Optional[str] = None):\n        \"\"\"Init the OCR reader with OCR endpoint (FullOCR pipeline)\"\"\"\n        super().__init__()\n        self.ocr_endpoint = endpoint or os.getenv(\n            \"OCR_READER_ENDPOINT\", DEFAULT_OCR_ENDPOINT\n        )\n\n    def load_data(\n        self, file_path: Path, extra_info: Optional[dict] = None, **kwargs\n    ) -> List[Document]:\n        \"\"\"Load data using OCR reader\n\n        Args:\n            file_path (Path): Path to PDF file\n            debug_path (Path): Path to store debug image output\n            artifact_path (Path): Path to OCR endpoints artifacts directory\n\n        Returns:\n            List[Document]: list of documents extracted from the PDF file\n        \"\"\"\n        file_path = Path(file_path).resolve()\n\n        # call the API from FullOCR endpoint\n        if \"response_content\" in kwargs:\n            # overriding response content if specified\n            ocr_results = kwargs[\"response_content\"]\n        else:\n            # call original API\n            resp = tenacious_api_post(\n                url=self.ocr_endpoint, file_path=file_path, table_only=False\n            )\n            ocr_results = resp.json()[\"result\"]\n\n        extra_info = extra_info or {}\n        result = []\n        for ocr_result in ocr_results:\n            result.append(\n                Document(\n                    content=ocr_result[\"csv_string\"],\n                    metadata=extra_info,\n                )\n            )\n\n        return result\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/loaders/pdf_loader.py",
    "content": "import base64\nfrom io import BytesIO\nfrom pathlib import Path\nfrom typing import Dict, List, Optional\n\nfrom decouple import config\nfrom fsspec import AbstractFileSystem\nfrom llama_index.readers.file import PDFReader\nfrom PIL import Image\n\nfrom kotaemon.base import Document\n\nPDF_LOADER_DPI = config(\"PDF_LOADER_DPI\", default=40, cast=int)\n\n\ndef get_page_thumbnails(\n    file_path: Path, pages: list[int], dpi: int = PDF_LOADER_DPI\n) -> List[Image.Image]:\n    \"\"\"Get image thumbnails of the pages in the PDF file.\n\n    Args:\n        file_path (Path): path to the image file\n        page_number (list[int]): list of page numbers to extract\n\n    Returns:\n        list[Image.Image]: list of page thumbnails\n    \"\"\"\n\n    img: Image.Image\n    suffix = file_path.suffix.lower()\n    assert suffix == \".pdf\", \"This function only supports PDF files.\"\n    try:\n        import fitz\n    except ImportError:\n        raise ImportError(\"Please install PyMuPDF: 'pip install PyMuPDF'\")\n\n    doc = fitz.open(file_path)\n\n    output_imgs = []\n    for page_number in pages:\n        page = doc.load_page(page_number)\n        pm = page.get_pixmap(dpi=dpi)\n        img = Image.frombytes(\"RGB\", [pm.width, pm.height], pm.samples)\n        output_imgs.append(convert_image_to_base64(img))\n\n    return output_imgs\n\n\ndef convert_image_to_base64(img: Image.Image) -> str:\n    # convert the image into base64\n    img_bytes = BytesIO()\n    img.save(img_bytes, format=\"PNG\")\n    img_base64 = base64.b64encode(img_bytes.getvalue()).decode(\"utf-8\")\n    img_base64 = f\"data:image/png;base64,{img_base64}\"\n\n    return img_base64\n\n\nclass PDFThumbnailReader(PDFReader):\n    \"\"\"PDF parser with thumbnail for each page.\"\"\"\n\n    def __init__(self) -> None:\n        \"\"\"\n        Initialize PDFReader.\n        \"\"\"\n        super().__init__(return_full_document=False)\n\n    def load_data(\n        self,\n        file: Path,\n        extra_info: Optional[Dict] = None,\n        fs: Optional[AbstractFileSystem] = None,\n    ) -> List[Document]:\n        \"\"\"Parse file.\"\"\"\n        documents = super().load_data(file, extra_info, fs)\n\n        page_numbers_str = []\n        filtered_docs = []\n        is_int_page_number: dict[str, bool] = {}\n\n        for doc in documents:\n            if \"page_label\" in doc.metadata:\n                page_num_str = doc.metadata[\"page_label\"]\n                page_numbers_str.append(page_num_str)\n                try:\n                    _ = int(page_num_str)\n                    is_int_page_number[page_num_str] = True\n                    filtered_docs.append(doc)\n                except ValueError:\n                    is_int_page_number[page_num_str] = False\n                    continue\n\n        documents = filtered_docs\n        page_numbers = list(range(len(page_numbers_str)))\n\n        print(\"Page numbers:\", len(page_numbers))\n        page_thumbnails = get_page_thumbnails(file, page_numbers)\n\n        documents.extend(\n            [\n                Document(\n                    text=\"Page thumbnail\",\n                    metadata={\n                        \"image_origin\": page_thumbnail,\n                        \"type\": \"thumbnail\",\n                        \"page_label\": page_number,\n                        **(extra_info if extra_info is not None else {}),\n                    },\n                )\n                for (page_thumbnail, page_number) in zip(\n                    page_thumbnails, page_numbers_str\n                )\n                if is_int_page_number[page_number]\n            ]\n        )\n\n        return documents\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/loaders/txt_loader.py",
    "content": "from pathlib import Path\nfrom typing import Optional\n\nfrom kotaemon.base import Document\n\nfrom .base import BaseReader\n\n\nclass TxtReader(BaseReader):\n    def run(\n        self, file_path: str | Path, extra_info: Optional[dict] = None, **kwargs\n    ) -> list[Document]:\n        return self.load_data(Path(file_path), extra_info=extra_info, **kwargs)\n\n    def load_data(\n        self, file_path: Path, extra_info: Optional[dict] = None, **kwargs\n    ) -> list[Document]:\n        with open(file_path, \"r\", encoding=\"utf-8\") as f:\n            text = f.read()\n\n        metadata = extra_info or {}\n        return [Document(text=text, metadata=metadata)]\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/loaders/unstructured_loader.py",
    "content": "\"\"\"Unstructured file reader.\n\nA parser for unstructured text files using Unstructured.io.\nSupports .txt, .docx, .pptx, .jpg, .png, .eml, .html, and .pdf documents.\n\nTo use .doc and .xls parser, install\n\nsudo apt-get install -y libmagic-dev poppler-utils libreoffice\npip install xlrd\n\n\"\"\"\nfrom pathlib import Path\nfrom typing import Any, Dict, List, Optional\n\nfrom llama_index.core.readers.base import BaseReader\n\nfrom kotaemon.base import Document\n\n\nclass UnstructuredReader(BaseReader):\n    \"\"\"General unstructured text reader for a variety of files.\"\"\"\n\n    def __init__(self, *args: Any, **kwargs: Any) -> None:\n        \"\"\"Init params.\"\"\"\n        super().__init__(*args)  # not passing kwargs to parent bc it cannot accept it\n\n        self.api = False  # we default to local\n        if \"url\" in kwargs:\n            self.server_url = str(kwargs[\"url\"])\n            self.api = True  # is url was set, switch to api\n        else:\n            self.server_url = \"http://localhost:8000\"\n\n        if \"api\" in kwargs:\n            self.api = kwargs[\"api\"]\n\n        self.api_key = \"\"\n        if \"api_key\" in kwargs:\n            self.api_key = kwargs[\"api_key\"]\n\n    \"\"\" Loads data using Unstructured.io\n\n        Depending on the construction if url is set or api = True\n        it'll parse file using API call, else parse it locally\n        additional_metadata is extended by the returned metadata if\n        split_documents is True\n\n        Returns list of documents\n    \"\"\"\n\n    def load_data(\n        self,\n        file: Path,\n        extra_info: Optional[Dict] = None,\n        split_documents: Optional[bool] = False,\n        **kwargs,\n    ) -> List[Document]:\n        \"\"\"If api is set, parse through api\"\"\"\n        file_path_str = str(file)\n        if self.api:\n            from unstructured.partition.api import partition_via_api\n\n            elements = partition_via_api(\n                filename=file_path_str,\n                api_key=self.api_key,\n                api_url=self.server_url + \"/general/v0/general\",\n            )\n        else:\n            \"\"\"Parse file locally\"\"\"\n            from unstructured.partition.auto import partition\n\n            elements = partition(filename=file_path_str)\n\n        \"\"\" Process elements \"\"\"\n        docs = []\n        file_name = Path(file).name\n        file_path = str(Path(file).resolve())\n        if split_documents:\n            for node in elements:\n                metadata = {\"file_name\": file_name, \"file_path\": file_path}\n                if hasattr(node, \"metadata\"):\n                    \"\"\"Load metadata fields\"\"\"\n                    for field, val in vars(node.metadata).items():\n                        if field == \"_known_field_names\":\n                            continue\n                        # removing coordinates because it does not serialize\n                        # and dont want to bother with it\n                        if field == \"coordinates\":\n                            continue\n                        # removing bc it might cause interference\n                        if field == \"parent_id\":\n                            continue\n                        metadata[field] = val\n\n                if extra_info is not None:\n                    metadata.update(extra_info)\n\n                metadata[\"file_name\"] = file_name\n                docs.append(Document(text=node.text, metadata=metadata))\n\n        else:\n            text_chunks = [\" \".join(str(el).split()) for el in elements]\n            metadata = {\"file_name\": file_name, \"file_path\": file_path}\n\n            if extra_info is not None:\n                metadata.update(extra_info)\n\n            # Create a single document by joining all the texts\n            docs.append(Document(text=\"\\n\\n\".join(text_chunks), metadata=metadata))\n\n        return docs\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/loaders/utils/__init__.py",
    "content": ""
  },
  {
    "path": "libs/kotaemon/kotaemon/loaders/utils/adobe.py",
    "content": "# need pip install pdfservices-sdk==2.3.0\n\nimport base64\nimport json\nimport logging\nimport os\nimport tempfile\nimport zipfile\nfrom concurrent.futures import ThreadPoolExecutor\nfrom pathlib import Path\nfrom typing import List, Union\n\nimport pandas as pd\nfrom decouple import config\n\nfrom kotaemon.loaders.utils.gpt4v import generate_gpt4v\n\n\ndef request_adobe_service(file_path: str, output_path: str = \"\") -> str:\n    \"\"\"Main function to call the adobe service, and unzip the results.\n    Args:\n        file_path (str): path to the pdf file\n        output_path (str): path to store the results\n\n    Returns:\n        output_path (str): path to the results\n\n    \"\"\"\n    try:\n        from adobe.pdfservices.operation.auth.credentials import Credentials\n        from adobe.pdfservices.operation.exception.exceptions import (\n            SdkException,\n            ServiceApiException,\n            ServiceUsageException,\n        )\n        from adobe.pdfservices.operation.execution_context import ExecutionContext\n        from adobe.pdfservices.operation.io.file_ref import FileRef\n        from adobe.pdfservices.operation.pdfops.extract_pdf_operation import (\n            ExtractPDFOperation,\n        )\n        from adobe.pdfservices.operation.pdfops.options.extractpdf.extract_element_type import (  # noqa: E501\n            ExtractElementType,\n        )\n        from adobe.pdfservices.operation.pdfops.options.extractpdf.extract_pdf_options import (  # noqa: E501\n            ExtractPDFOptions,\n        )\n        from adobe.pdfservices.operation.pdfops.options.extractpdf.extract_renditions_element_type import (  # noqa: E501\n            ExtractRenditionsElementType,\n        )\n    except ImportError:\n        raise ImportError(\n            \"pdfservices-sdk is not installed. \"\n            \"Please install it by running `pip install pdfservices-sdk\"\n            \"@git+https://github.com/niallcm/pdfservices-python-sdk.git\"\n            \"@bump-and-unfreeze-requirements`\"\n        )\n\n    if not output_path:\n        output_path = tempfile.mkdtemp()\n\n    try:\n        # Initial setup, create credentials instance.\n        credentials = (\n            Credentials.service_principal_credentials_builder()\n            .with_client_id(config(\"PDF_SERVICES_CLIENT_ID\", default=\"\"))\n            .with_client_secret(config(\"PDF_SERVICES_CLIENT_SECRET\", default=\"\"))\n            .build()\n        )\n\n        # Create an ExecutionContext using credentials\n        # and create a new operation instance.\n        execution_context = ExecutionContext.create(credentials)\n        extract_pdf_operation = ExtractPDFOperation.create_new()\n\n        # Set operation input from a source file.\n        source = FileRef.create_from_local_file(file_path)\n        extract_pdf_operation.set_input(source)\n\n        # Build ExtractPDF options and set them into the operation\n        extract_pdf_options: ExtractPDFOptions = (\n            ExtractPDFOptions.builder()\n            .with_elements_to_extract(\n                [ExtractElementType.TEXT, ExtractElementType.TABLES]\n            )\n            .with_elements_to_extract_renditions(\n                [\n                    ExtractRenditionsElementType.TABLES,\n                    ExtractRenditionsElementType.FIGURES,\n                ]\n            )\n            .build()\n        )\n        extract_pdf_operation.set_options(extract_pdf_options)\n\n        # Execute the operation.\n        result: FileRef = extract_pdf_operation.execute(execution_context)\n\n        # Save the result to the specified location.\n        zip_file_path = os.path.join(\n            output_path, \"ExtractTextTableWithFigureTableRendition.zip\"\n        )\n        result.save_as(zip_file_path)\n        # Open the ZIP file\n        with zipfile.ZipFile(zip_file_path, \"r\") as zip_ref:\n            # Extract all contents to the destination folder\n            zip_ref.extractall(output_path)\n    except (ServiceApiException, ServiceUsageException, SdkException):\n        logging.exception(\"Exception encountered while executing operation\")\n\n    return output_path\n\n\ndef make_markdown_table(table_as_list: List[List[str]]) -> str:\n    \"\"\"\n    Convert table from python list representation to markdown format.\n    The input list consists of rows of tables, the first row is the header.\n\n    Args:\n        table_as_list: list of table rows\n            Example: [[\"Name\", \"Age\", \"Height\"],\n                    [\"Jake\", 20, 5'10],\n                    [\"Mary\", 21, 5'7]]\n    Returns:\n        markdown representation of the table\n    \"\"\"\n    markdown = \"\\n\" + str(\"| \")\n\n    for e in table_as_list[0]:\n        to_add = \" \" + str(e) + str(\" |\")\n        markdown += to_add\n    markdown += \"\\n\"\n\n    markdown += \"| \"\n    for i in range(len(table_as_list[0])):\n        markdown += str(\"--- | \")\n    markdown += \"\\n\"\n\n    for entry in table_as_list[1:]:\n        markdown += str(\"| \")\n        for e in entry:\n            to_add = str(e) + str(\" | \")\n            markdown += to_add\n        markdown += \"\\n\"\n\n    return markdown + \"\\n\"\n\n\ndef load_json(input_path: Union[str | Path]) -> dict:\n    \"\"\"Load json file\"\"\"\n    with open(input_path, \"r\") as fi:\n        data = json.load(fi)\n\n    return data\n\n\ndef load_excel(input_path: Union[str | Path]) -> str:\n    \"\"\"Load excel file and convert to markdown\"\"\"\n\n    df = pd.read_excel(input_path).fillna(\"\")\n    # Convert dataframe to a list of rows\n    row_list = [df.columns.values.tolist()] + df.values.tolist()\n\n    for item_id, item in enumerate(row_list[0]):\n        if \"Unnamed\" in item:\n            row_list[0][item_id] = \"\"\n\n    for row in row_list:\n        for item_id, item in enumerate(row):\n            row[item_id] = str(item).replace(\"_x000D_\", \" \").replace(\"\\n\", \" \").strip()\n\n    markdown_str = make_markdown_table(row_list)\n    return markdown_str\n\n\ndef encode_image_base64(image_path: Union[str | Path]) -> Union[bytes, str]:\n    \"\"\"Convert image to base64\"\"\"\n\n    with open(image_path, \"rb\") as image_file:\n        return base64.b64encode(image_file.read()).decode(\"utf-8\")\n\n\ndef parse_table_paths(file_paths: List[Path]) -> str:\n    \"\"\"Read the table stored in an excel file given the file path\"\"\"\n\n    content = \"\"\n    for path in file_paths:\n        if path.suffix == \".xlsx\":\n            content = load_excel(path)\n            break\n    return content\n\n\ndef parse_figure_paths(file_paths: List[Path]) -> Union[bytes, str]:\n    \"\"\"Read and convert an image to base64 given the image path\"\"\"\n\n    content = \"\"\n    for path in file_paths:\n        if path.suffix == \".png\":\n            base64_image = encode_image_base64(path)\n            content = f\"data:image/png;base64,{base64_image}\"  # type: ignore\n            break\n    return content\n\n\ndef generate_single_figure_caption(vlm_endpoint: str, figure: str) -> str:\n    output = \"\"\n\n    \"\"\"Summarize a single figure using GPT-4V\"\"\"\n    if figure:\n        try:\n            output = generate_gpt4v(\n                endpoint=vlm_endpoint,\n                prompt=\"Provide a short 2 sentence summary of this image?\",\n                images=figure,\n            )\n            if \"sorry\" in output.lower():\n                output = \"\"\n        except Exception as e:\n            print(f\"Error generating caption: {e}\")\n\n    return output\n\n\ndef generate_figure_captions(\n    vlm_endpoint: str, figures: List, max_figures_to_process: int\n) -> List:\n    \"\"\"Summarize several figures using GPT-4V.\n    Args:\n        vlm_endpoint (str): endpoint to the vision language model service\n        figures (List): list of base64 images\n        max_figures_to_process (int): the maximum number of figures will be summarized,\n        the rest are ignored.\n\n    Returns:\n        results (List[str]): list of all figure captions and empty strings for\n        ignored figures.\n    \"\"\"\n    to_gen_figures = figures[:max_figures_to_process]\n    other_figures = figures[max_figures_to_process:]\n\n    with ThreadPoolExecutor() as executor:\n        futures = [\n            executor.submit(\n                lambda: generate_single_figure_caption(vlm_endpoint, figure)\n            )\n            for figure in to_gen_figures\n        ]\n\n    results = [future.result() for future in futures]\n    return results + [\"\"] * len(other_figures)\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/loaders/utils/box.py",
    "content": "from typing import List, Tuple\n\n\ndef bbox_to_points(box: List[int]):\n    \"\"\"Convert bounding box to list of points\"\"\"\n    x1, y1, x2, y2 = box\n    return [(x1, y1), (x2, y1), (x2, y2), (x1, y2)]\n\n\ndef points_to_bbox(points: List[Tuple[int, int]]):\n    \"\"\"Convert list of points to bounding box\"\"\"\n    all_x = [p[0] for p in points]\n    all_y = [p[1] for p in points]\n    return [min(all_x), min(all_y), max(all_x), max(all_y)]\n\n\ndef scale_points(points: List[Tuple[int, int]], scale_factor: float = 1.0):\n    \"\"\"Scale points by a scale factor\"\"\"\n    return [(int(pos[0] * scale_factor), int(pos[1] * scale_factor)) for pos in points]\n\n\ndef union_points(points: List[Tuple[int, int]]):\n    \"\"\"Return union bounding box of list of points\"\"\"\n    all_x = [p[0] for p in points]\n    all_y = [p[1] for p in points]\n    bbox = (min(all_x), min(all_y), max(all_x), max(all_y))\n    return bbox\n\n\ndef scale_box(box: List[int], scale_factor: float = 1.0):\n    \"\"\"Scale box by a scale factor\"\"\"\n    return [int(pos * scale_factor) for pos in box]\n\n\ndef box_h(box: List[int]):\n    \"Return box height\"\n    return box[3] - box[1]\n\n\ndef box_w(box: List[int]):\n    \"Return box width\"\n    return box[2] - box[0]\n\n\ndef box_area(box: List[int]):\n    \"Return box area\"\n    x1, y1, x2, y2 = box\n    return (x2 - x1) * (y2 - y1)\n\n\ndef get_rect_iou(gt_box: List[tuple], pd_box: List[tuple], iou_type=0) -> int:\n    \"\"\"Intersection over union on layout rectangle\n\n    Args:\n        gt_box: List[tuple]\n            A list contains bounding box coordinates of ground truth\n        pd_box: List[tuple]\n            A list contains bounding box coordinates of prediction\n        iou_type: int\n            0: intersection / union, normal IOU\n            1: intersection / min(areas), useful when boxes are under/over-segmented\n\n        Input format: [(x1, y1), (x2, y1), (x2, y2), (x1, y2)]\n        Annotation for each element in bbox:\n        (x1, y1)        (x2, y1)\n            +-------+\n            |       |\n            |       |\n            +-------+\n        (x1, y2)        (x2, y2)\n\n    Returns:\n        Intersection over union value\n    \"\"\"\n\n    assert iou_type in [0, 1], \"Only support 0: origin iou, 1: intersection / min(area)\"\n\n    # determine the (x, y)-coordinates of the intersection rectangle\n    # gt_box: [(x1, y1), (x2, y1), (x2, y2), (x1, y2)]\n    # pd_box: [(x1, y1), (x2, y1), (x2, y2), (x1, y2)]\n    x_left = max(gt_box[0][0], pd_box[0][0])\n    y_top = max(gt_box[0][1], pd_box[0][1])\n    x_right = min(gt_box[2][0], pd_box[2][0])\n    y_bottom = min(gt_box[2][1], pd_box[2][1])\n\n    # compute the area of intersection rectangle\n    interArea = max(0, x_right - x_left) * max(0, y_bottom - y_top)\n\n    # compute the area of both the prediction and ground-truth\n    # rectangles\n    gt_area = (gt_box[2][0] - gt_box[0][0]) * (gt_box[2][1] - gt_box[0][1])\n    pd_area = (pd_box[2][0] - pd_box[0][0]) * (pd_box[2][1] - pd_box[0][1])\n\n    # compute the intersection over union by taking the intersection\n    # area and dividing it by the sum of prediction + ground-truth\n    # areas - the intersection area\n    if iou_type == 0:\n        iou = interArea / float(gt_area + pd_area - interArea)\n    elif iou_type == 1:\n        iou = interArea / max(min(gt_area, pd_area), 1)\n\n    # return the intersection over union value\n    return iou\n\n\ndef sort_funsd_reading_order(lines: List[dict], box_key_name: str = \"box\"):\n    \"\"\"Sort cell list to create the right reading order using their locations\n\n    Args:\n        lines: list of cells to sort\n\n    Returns:\n        a list of cell lists in the right reading order that contain\n        no key or start with a key and contain no other key\n    \"\"\"\n    sorted_list = []\n\n    if len(lines) == 0:\n        return lines\n\n    while len(lines) > 1:\n        topleft_line = lines[0]\n        for line in lines[1:]:\n            topleft_line_pos = topleft_line[box_key_name]\n            topleft_line_center_y = (topleft_line_pos[1] + topleft_line_pos[3]) / 2\n            x1, y1, x2, y2 = line[box_key_name]\n            box_center_x = (x1 + x2) / 2\n            box_center_y = (y1 + y2) / 2\n            cell_h = y2 - y1\n            if box_center_y <= topleft_line_center_y - cell_h / 2:\n                topleft_line = line\n                continue\n            if (\n                box_center_x < topleft_line_pos[2]\n                and box_center_y < topleft_line_pos[3]\n            ):\n                topleft_line = line\n                continue\n        sorted_list.append(topleft_line)\n        lines.remove(topleft_line)\n\n    sorted_list.append(lines[0])\n\n    return sorted_list\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/loaders/utils/gpt4v.py",
    "content": "import json\nimport logging\nfrom typing import Any, List\n\nimport requests\nfrom decouple import config\n\nlogger = logging.getLogger(__name__)\n\n\ndef generate_gpt4v(\n    endpoint: str,\n    images: str | List[str],\n    prompt: str,\n    max_tokens: int = 512,\n    max_images: int = 10,\n) -> str:\n    # OpenAI API Key\n    api_key = config(\"AZURE_OPENAI_API_KEY\", default=\"\")\n    headers = {\"Content-Type\": \"application/json\", \"api-key\": api_key}\n\n    if isinstance(images, str):\n        images = [images]\n\n    payload = {\n        \"messages\": [\n            {\n                \"role\": \"user\",\n                \"content\": [\n                    {\"type\": \"text\", \"text\": prompt},\n                ]\n                + [\n                    {\n                        \"type\": \"image_url\",\n                        \"image_url\": {\"url\": image},\n                    }\n                    for image in images[:max_images]\n                ],\n            }\n        ],\n        \"max_tokens\": max_tokens,\n        \"temperature\": 0,\n    }\n\n    if len(images) > max_images:\n        print(f\"Truncated to {max_images} images (original {len(images)} images\")\n\n    response = requests.post(endpoint, headers=headers, json=payload)\n\n    try:\n        response.raise_for_status()\n    except Exception as e:\n        logger.exception(f\"Error generating gpt4v: {response.text}; error {e}\")\n        return \"\"\n\n    output = response.json()\n    output = output[\"choices\"][0][\"message\"][\"content\"]\n    return output\n\n\ndef stream_gpt4v(\n    endpoint: str,\n    images: str | List[str],\n    prompt: str,\n    max_tokens: int = 512,\n    max_images: int = 10,\n) -> Any:\n    # OpenAI API Key\n    api_key = config(\"AZURE_OPENAI_API_KEY\", default=\"\")\n    headers = {\"Content-Type\": \"application/json\", \"api-key\": api_key}\n\n    if isinstance(images, str):\n        images = [images]\n\n    payload = {\n        \"messages\": [\n            {\n                \"role\": \"user\",\n                \"content\": [\n                    {\"type\": \"text\", \"text\": prompt},\n                ]\n                + [\n                    {\n                        \"type\": \"image_url\",\n                        \"image_url\": {\"url\": image},\n                    }\n                    for image in images[:max_images]\n                ],\n            }\n        ],\n        \"max_tokens\": max_tokens,\n        \"stream\": True,\n        \"logprobs\": True,\n        \"temperature\": 0,\n    }\n    if len(images) > max_images:\n        print(f\"Truncated to {max_images} images (original {len(images)} images\")\n    try:\n        response = requests.post(endpoint, headers=headers, json=payload, stream=True)\n        assert response.status_code == 200, str(response.content)\n        output = \"\"\n        logprobs = []\n        for line in response.iter_lines():\n            if line:\n                if line.startswith(b\"\\xef\\xbb\\xbf\"):\n                    line = line[9:]\n                else:\n                    line = line[6:]\n                try:\n                    if line == \"[DONE]\":\n                        break\n                    line = json.loads(line.decode(\"utf-8\"))\n                except Exception:\n                    break\n                if len(line[\"choices\"]):\n                    if line[\"choices\"][0].get(\"logprobs\") is None:\n                        _logprobs = []\n                    else:\n                        _logprobs = [\n                            logprob[\"logprob\"]\n                            for logprob in line[\"choices\"][0][\"logprobs\"].get(\n                                \"content\", []\n                            )\n                        ]\n\n                    output += line[\"choices\"][0][\"delta\"].get(\"content\", \"\")\n                    logprobs += _logprobs\n                    yield line[\"choices\"][0][\"delta\"].get(\"content\", \"\"), _logprobs\n\n    except Exception as e:\n        logger.error(f\"Error streaming gpt4v {e}\")\n        logprobs = []\n        output = \"\"\n\n    return output, logprobs\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/loaders/utils/pdf_ocr.py",
    "content": "from collections import defaultdict\nfrom pathlib import Path\nfrom typing import Dict, List, Optional, Union\n\nfrom .box import (\n    bbox_to_points,\n    box_area,\n    box_h,\n    box_w,\n    get_rect_iou,\n    points_to_bbox,\n    scale_box,\n    scale_points,\n    sort_funsd_reading_order,\n    union_points,\n)\nfrom .table import table_cells_to_markdown\n\nIOU_THRES = 0.5\nPADDING_THRES = 1.1\n\n\ndef read_pdf_unstructured(input_path: Union[Path, str]):\n    \"\"\"Convert PDF from specified path to list of text items with\n    location information\n\n    Args:\n        input_path: path to input file\n\n    Returns:\n        Dict page_number: list of text boxes\n    \"\"\"\n    try:\n        from unstructured.partition.auto import partition\n    except ImportError as e:\n        raise ImportError(\n            \"Please install unstructured PDF reader `pip install unstructured[pdf]`: \"\n            f\"{e}\"\n        )\n\n    page_items = defaultdict(list)\n    items = partition(input_path)\n    for item in items:\n        page_number = item.metadata.page_number\n        bbox = points_to_bbox(item.metadata.coordinates.points)\n        coord_system = item.metadata.coordinates.system\n        max_w, max_h = coord_system.width, coord_system.height\n        page_items[page_number - 1].append(\n            {\n                \"text\": item.text,\n                \"box\": bbox,\n                \"location\": bbox_to_points(bbox),\n                \"page_shape\": (max_w, max_h),\n            }\n        )\n\n    return page_items\n\n\ndef merge_ocr_and_pdf_texts(\n    ocr_list: List[dict], pdf_text_list: List[dict], debug_info=None\n):\n    \"\"\"Merge PDF and OCR text using IOU overlapping location\n    Args:\n        ocr_list: List of OCR items {\"text\", \"box\", \"location\"}\n        pdf_text_list: List of PDF items {\"text\", \"box\", \"location\"}\n\n    Returns:\n        Combined list of PDF text and non-overlap OCR text\n    \"\"\"\n    not_matched_ocr = []\n\n    # check for debug info\n    if debug_info is not None:\n        cv2, debug_im = debug_info\n\n    for ocr_item in ocr_list:\n        matched = False\n        for pdf_item in pdf_text_list:\n            if (\n                get_rect_iou(ocr_item[\"location\"], pdf_item[\"location\"], iou_type=1)\n                > IOU_THRES\n            ):\n                matched = True\n                break\n\n        color = (255, 0, 0)\n        if not matched:\n            ocr_item[\"matched\"] = False\n            not_matched_ocr.append(ocr_item)\n            color = (0, 255, 255)\n\n        if debug_info is not None:\n            cv2.rectangle(\n                debug_im,\n                ocr_item[\"location\"][0],\n                ocr_item[\"location\"][2],\n                color=color,\n                thickness=1,\n            )\n\n    if debug_info is not None:\n        for pdf_item in pdf_text_list:\n            cv2.rectangle(\n                debug_im,\n                pdf_item[\"location\"][0],\n                pdf_item[\"location\"][2],\n                color=(0, 255, 0),\n                thickness=2,\n            )\n\n    return pdf_text_list + not_matched_ocr\n\n\ndef merge_table_cell_and_ocr(\n    table_list: List[dict], ocr_list: List[dict], pdf_list: List[dict], debug_info=None\n):\n    \"\"\"Merge table items with OCR text using IOU overlapping location\n    Args:\n        table_list: List of table items\n            \"type\": (\"table\", \"cell\", \"text\"), \"text\", \"box\", \"location\"}\n        ocr_list: List of OCR items {\"text\", \"box\", \"location\"}\n        pdf_list: List of PDF items {\"text\", \"box\", \"location\"}\n\n    Returns:\n        all_table_cells: List of tables, each of table is represented\n            by list of cells with combined text from OCR\n        not_matched_items: List of PDF text which is not overlapped by table region\n    \"\"\"\n    # check for debug info\n    if debug_info is not None:\n        cv2, debug_im = debug_info\n\n    cell_list = [item for item in table_list if item[\"type\"] == \"cell\"]\n    table_list = [item for item in table_list if item[\"type\"] == \"table\"]\n\n    # sort table by area\n    table_list = sorted(table_list, key=lambda item: box_area(item[\"bbox\"]))\n\n    all_tables = []\n    matched_pdf_ids = []\n    matched_cell_ids = []\n\n    for table in table_list:\n        if debug_info is not None:\n            cv2.rectangle(\n                debug_im,\n                table[\"location\"][0],\n                table[\"location\"][2],\n                color=[0, 0, 255],\n                thickness=5,\n            )\n\n        cur_table_cells = []\n        for cell_id, cell in enumerate(cell_list):\n            if cell_id in matched_cell_ids:\n                continue\n\n            if get_rect_iou(\n                table[\"location\"], cell[\"location\"], iou_type=1\n            ) > IOU_THRES and box_area(table[\"bbox\"]) > box_area(cell[\"bbox\"]):\n                color = [128, 0, 128]\n                # cell matched to table\n                for item_list, item_type in [(pdf_list, \"pdf\"), (ocr_list, \"ocr\")]:\n                    cell[\"ocr\"] = []\n                    for item_id, item in enumerate(item_list):\n                        if item_type == \"pdf\" and item_id in matched_pdf_ids:\n                            continue\n                        if (\n                            get_rect_iou(item[\"location\"], cell[\"location\"], iou_type=1)\n                            > IOU_THRES\n                        ):\n                            cell[\"ocr\"].append(item)\n                            if item_type == \"pdf\":\n                                matched_pdf_ids.append(item_id)\n\n                    if len(cell[\"ocr\"]) > 0:\n                        # check if union of matched ocr does\n                        # not extend over cell boundary,\n                        # if True, continue to use OCR_list to match\n                        all_box_points_in_cell = []\n                        for item in cell[\"ocr\"]:\n                            all_box_points_in_cell.extend(item[\"location\"])\n                        union_box = union_points(all_box_points_in_cell)\n                        cell_okay = (\n                            box_h(union_box) <= box_h(cell[\"bbox\"]) * PADDING_THRES\n                            and box_w(union_box) <= box_w(cell[\"bbox\"]) * PADDING_THRES\n                        )\n                    else:\n                        cell_okay = False\n\n                    if cell_okay:\n                        if item_type == \"pdf\":\n                            color = [255, 0, 255]\n                        break\n\n                if debug_info is not None:\n                    cv2.rectangle(\n                        debug_im,\n                        cell[\"location\"][0],\n                        cell[\"location\"][2],\n                        color=color,\n                        thickness=3,\n                    )\n\n                matched_cell_ids.append(cell_id)\n                cur_table_cells.append(cell)\n\n        all_tables.append(cur_table_cells)\n\n    not_matched_items = [\n        item for _id, item in enumerate(pdf_list) if _id not in matched_pdf_ids\n    ]\n    if debug_info is not None:\n        for item in not_matched_items:\n            cv2.rectangle(\n                debug_im,\n                item[\"location\"][0],\n                item[\"location\"][2],\n                color=[128, 128, 128],\n                thickness=3,\n            )\n\n    return all_tables, not_matched_items\n\n\ndef parse_ocr_output(\n    ocr_page_items: List[dict],\n    pdf_page_items: Dict[int, List[dict]],\n    artifact_path: Optional[str] = None,\n    debug_path: Optional[str] = None,\n):\n    \"\"\"Main function to combine OCR output and PDF text to\n    form list of table / non-table regions\n    Args:\n        ocr_page_items: List of OCR items by page\n        pdf_page_items: Dict of PDF texts (page number as key)\n        debug_path: If specified, use OpenCV to plot debug image and save to debug_path\n    \"\"\"\n    all_tables = []\n    all_texts = []\n\n    for page_id, page in enumerate(ocr_page_items):\n        ocr_list = page[\"json\"][\"ocr\"]\n        table_list = page[\"json\"][\"table\"]\n        page_shape = page[\"image_shape\"]\n        pdf_item_list = pdf_page_items[page_id]\n\n        # create bbox additional information\n        for item in ocr_list:\n            item[\"box\"] = points_to_bbox(item[\"location\"])\n\n        # re-scale pdf items according to new image size\n        for item in pdf_item_list:\n            scale_factor = page_shape[0] / item[\"page_shape\"][0]\n            item[\"box\"] = scale_box(item[\"box\"], scale_factor=scale_factor)\n            item[\"location\"] = scale_points(item[\"location\"], scale_factor=scale_factor)\n\n        # if using debug mode, openCV must be installed\n        if debug_path and artifact_path is not None:\n            try:\n                import cv2\n            except ImportError:\n                raise ImportError(\n                    \"Please install openCV first to use OCRReader debug mode\"\n                )\n            image_path = Path(artifact_path) / page[\"image\"]\n            image = cv2.imread(str(image_path))\n            debug_info = (cv2, image)\n        else:\n            debug_info = None\n\n        new_pdf_list = merge_ocr_and_pdf_texts(\n            ocr_list, pdf_item_list, debug_info=debug_info\n        )\n\n        # sort by reading order\n        ocr_list = sort_funsd_reading_order(ocr_list)\n        new_pdf_list = sort_funsd_reading_order(new_pdf_list)\n\n        all_table_cells, non_table_text_list = merge_table_cell_and_ocr(\n            table_list, ocr_list, new_pdf_list, debug_info=debug_info\n        )\n\n        table_texts = [table_cells_to_markdown(cells) for cells in all_table_cells]\n        all_tables.extend([(page_id, text) for text in table_texts])\n        all_texts.append(\n            (page_id, \" \".join(item[\"text\"] for item in non_table_text_list))\n        )\n\n        # export debug image to debug_path\n        if debug_path:\n            cv2.imwrite(str(Path(debug_path) / \"page_{}.png\".format(page_id)), image)\n\n    return all_tables, all_texts\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/loaders/utils/table.py",
    "content": "import csv\nfrom io import StringIO\nfrom typing import List, Optional, Tuple\n\nfrom .box import get_rect_iou\n\n\ndef check_col_conflicts(\n    col_a: List[str], col_b: List[str], thres: float = 0.15\n) -> bool:\n    \"\"\"Check if 2 columns A and B has non-empty content in the same row\n    (to be used with merge_cols)\n\n    Args:\n        col_a: column A (list of str)\n        col_b: column B (list of str)\n        thres: percentage of overlapping allowed\n    Returns:\n        if number of overlapping greater than threshold\n    \"\"\"\n    num_rows = len([cell for cell in col_a if cell])\n    assert len(col_a) == len(col_b)\n    conflict_count = 0\n    for cell_a, cell_b in zip(col_a, col_b):\n        if cell_a and cell_b:\n            conflict_count += 1\n    return conflict_count > num_rows * thres\n\n\ndef merge_cols(col_a: List[str], col_b: List[str]) -> List[str]:\n    \"\"\"Merge column A and B if they do not have conflict rows\n\n    Args:\n        col_a: column A (list of str)\n        col_b: column B (list of str)\n    Returns:\n        merged column\n    \"\"\"\n    for r_id in range(len(col_a)):\n        if col_b[r_id]:\n            col_a[r_id] = col_a[r_id] + \" \" + col_b[r_id]\n    return col_a\n\n\ndef add_index_col(csv_rows: List[List[str]]) -> List[List[str]]:\n    \"\"\"Add index column as the first column of the table csv_rows\n\n    Args:\n        csv_rows: input table\n    Returns:\n        output table with index column\n    \"\"\"\n    new_csv_rows = [[\"row id\"] + [\"\"] * len(csv_rows[0])]\n    for r_id, row in enumerate(csv_rows):\n        new_csv_rows.append([str(r_id + 1)] + row)\n    return new_csv_rows\n\n\ndef compress_csv(csv_rows: List[List[str]]) -> List[List[str]]:\n    \"\"\"Compress table csv_rows by merging sparse columns (merge_cols)\n\n    Args:\n        csv_rows: input table\n    Returns:\n        output: compressed table\n    \"\"\"\n    csv_cols = [[r[c_id] for r in csv_rows] for c_id in range(len(csv_rows[0]))]\n    to_remove_col_ids = []\n    last_c_id = 0\n    for c_id in range(1, len(csv_cols)):\n        if not check_col_conflicts(csv_cols[last_c_id], csv_cols[c_id]):\n            to_remove_col_ids.append(c_id)\n            csv_cols[last_c_id] = merge_cols(csv_cols[last_c_id], csv_cols[c_id])\n        else:\n            last_c_id = c_id\n\n    csv_cols = [r for c_id, r in enumerate(csv_cols) if c_id not in to_remove_col_ids]\n    csv_rows = [[c[r_id] for c in csv_cols] for r_id in range(len(csv_cols[0]))]\n    return csv_rows\n\n\ndef get_table_from_ocr(ocr_list: List[dict], table_list: List[dict]):\n    \"\"\"Get list of text lines belong to table regions specified by table_list\n\n    Args:\n        ocr_list: list of OCR output in Casia format (Flax)\n        table_list: list of table output in Casia format (Flax)\n\n    Returns:\n        _type_: _description_\n    \"\"\"\n    table_texts = []\n    for table in table_list:\n        if table[\"type\"] != \"table\":\n            continue\n        cur_table_texts = []\n        for ocr in ocr_list:\n            _iou = get_rect_iou(table[\"location\"], ocr[\"location\"], iou_type=1)\n            if _iou > 0.8:\n                cur_table_texts.append(ocr[\"text\"])\n        table_texts.append(cur_table_texts)\n\n    return table_texts\n\n\ndef make_markdown_table(array: List[List[str]]) -> str:\n    \"\"\"Convert table rows in list format to markdown string\n\n    Args:\n        Python list with rows of table as lists\n        First element as header.\n        Example Input:\n                [[\"Name\", \"Age\", \"Height\"],\n                [\"Jake\", 20, 5'10],\n                [\"Mary\", 21, 5'7]]\n    Returns:\n        String to put into a .md file\n    \"\"\"\n    array = compress_csv(array)\n    array = add_index_col(array)\n    markdown = \"\\n\" + str(\"| \")\n\n    for e in array[0]:\n        to_add = \" \" + str(e) + str(\" |\")\n        markdown += to_add\n    markdown += \"\\n\"\n\n    markdown += \"| \"\n    for i in range(len(array[0])):\n        markdown += str(\"--- | \")\n    markdown += \"\\n\"\n\n    for entry in array[1:]:\n        markdown += str(\"| \")\n        for e in entry:\n            to_add = str(e) + str(\" | \")\n            markdown += to_add\n        markdown += \"\\n\"\n\n    return markdown + \"\\n\"\n\n\ndef parse_csv_string_to_list(csv_str: str) -> List[List[str]]:\n    \"\"\"Convert CSV string to list of rows\n\n    Args:\n        csv_str: input CSV string\n\n    Returns:\n        Output table in list format\n    \"\"\"\n    io = StringIO(csv_str)\n    csv_reader = csv.reader(io, delimiter=\",\")\n    rows = [row for row in csv_reader]\n    return rows\n\n\ndef format_cell(cell: str, length_limit: Optional[int] = None) -> str:\n    \"\"\"Format cell content by remove redundant character and enforce length limit\n\n    Args:\n        cell: input cell text\n        length_limit: limit of text length.\n\n    Returns:\n        new cell text\n    \"\"\"\n    cell = cell.replace(\"\\n\", \" \")\n    if length_limit:\n        cell = cell[:length_limit]\n    return cell\n\n\ndef extract_tables_from_csv_string(\n    csv_content: str, table_texts: List[List[str]]\n) -> Tuple[List[str], str]:\n    \"\"\"Extract list of table from FullOCR output\n    (csv_content) with the specified table_texts\n\n    Args:\n        csv_content: CSV output from FullOCR pipeline\n        table_texts: list of table texts extracted\n        from get_table_from_ocr()\n\n    Returns:\n        List of tables and non-text content\n    \"\"\"\n    rows = parse_csv_string_to_list(csv_content)\n    used_row_ids = []\n    table_csv_list = []\n    for table in table_texts:\n        cur_rows = []\n        for row_id, row in enumerate(rows):\n            scores = [\n                any(cell in cell_reference for cell in table)\n                for cell_reference in row\n                if cell_reference\n            ]\n            score = sum(scores) / len(scores)\n            if score > 0.5 and row_id not in used_row_ids:\n                used_row_ids.append(row_id)\n                cur_rows.append([format_cell(cell) for cell in row])\n        if cur_rows:\n            table_csv_list.append(make_markdown_table(cur_rows))\n        else:\n            print(\"table not matched\", table)\n\n    non_table_rows = [\n        row for row_id, row in enumerate(rows) if row_id not in used_row_ids\n    ]\n    non_table_text = \"\\n\".join(\n        \" \".join(format_cell(cell) for cell in row) for row in non_table_rows\n    )\n    return table_csv_list, non_table_text\n\n\ndef strip_special_chars_markdown(text: str) -> str:\n    \"\"\"Strip special characters from input text in markdown table format\"\"\"\n    return text.replace(\"|\", \"\").replace(\":---:\", \"\").replace(\"---\", \"\")\n\n\ndef parse_markdown_text_to_tables(text: str) -> Tuple[List[str], List[str]]:\n    \"\"\"Convert markdown text to list of non-table spans and table spans\n\n    Args:\n        text: input markdown text\n\n    Returns:\n        list of table spans and non-table spans\n    \"\"\"\n    # init empty tables and texts list\n    tables = []\n    texts = []\n\n    # split input by line break\n    lines = text.split(\"\\n\")\n    cur_table = []\n    cur_text: List[str] = []\n    for line in lines:\n        line = line.strip()\n        if line.startswith(\"|\"):\n            if len(cur_text) > 0:\n                texts.append(cur_text)\n                cur_text = []\n            cur_table.append(line)\n        else:\n            # add new table to the list\n            if len(cur_table) > 0:\n                tables.append(cur_table)\n                cur_table = []\n            cur_text.append(line)\n\n    table_texts = [\"\\n\".join(table) for table in tables]\n    non_table_texts = [\"\\n\".join(text) for text in texts]\n    return table_texts, non_table_texts\n\n\ndef table_cells_to_markdown(cells: List[dict]):\n    \"\"\"Convert list of cells with attached text to Markdown table\"\"\"\n\n    if len(cells) == 0:\n        return \"\"\n\n    all_row_ids = []\n    all_col_ids = []\n    for cell in cells:\n        all_row_ids.extend(cell[\"rows\"])\n        all_col_ids.extend(cell[\"columns\"])\n\n    num_rows, num_cols = max(all_row_ids) + 1, max(all_col_ids) + 1\n    table_rows = [[\"\" for c in range(num_cols)] for r in range(num_rows)]\n\n    # start filling in the grid\n    for cell in cells:\n        cell_text = \" \".join(item[\"text\"] for item in cell[\"ocr\"])\n        start_row_id, end_row_id = cell[\"rows\"]\n        start_col_id, end_col_id = cell[\"columns\"]\n        span_cell = end_row_id != start_row_id or end_col_id != start_col_id\n\n        # do not repeat long text in span cell to prevent context length issue\n        if span_cell and len(cell_text.replace(\" \", \"\")) < 20 and start_row_id > 0:\n            for row in range(start_row_id, end_row_id + 1):\n                for col in range(start_col_id, end_col_id + 1):\n                    table_rows[row][col] += cell_text + \" \"\n        else:\n            table_rows[start_row_id][start_col_id] += cell_text + \" \"\n\n    return make_markdown_table(table_rows)\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/loaders/web_loader.py",
    "content": "from pathlib import Path\nfrom typing import Optional\n\nimport requests\nfrom decouple import config\n\nfrom kotaemon.base import Document\n\nfrom .base import BaseReader\n\nJINA_API_KEY = config(\"JINA_API_KEY\", default=\"\")\nJINA_URL = config(\"JINA_URL\", default=\"https://r.jina.ai/\")\n\n\nclass WebReader(BaseReader):\n    def run(\n        self, file_path: str | Path, extra_info: Optional[dict] = None, **kwargs\n    ) -> list[Document]:\n        return self.load_data(Path(file_path), extra_info=extra_info, **kwargs)\n\n    def fetch_url(self, url: str):\n        # setup the request\n        api_url = f\"https://r.jina.ai/{url}\"\n        headers = {\n            \"X-With-Links-Summary\": \"true\",\n        }\n        if JINA_API_KEY:\n            headers[\"Authorization\"] = f\"Bearer {JINA_API_KEY}\"\n\n        response = requests.get(api_url, headers=headers)\n        response.raise_for_status()\n\n        data = response.text\n        return data\n\n    def load_data(\n        self, file_path: str | Path, extra_info: Optional[dict] = None, **kwargs\n    ) -> list[Document]:\n        file_path = str(file_path)\n        output = self.fetch_url(file_path)\n        metadata = extra_info or {}\n\n        return [Document(text=output, metadata=metadata)]\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/parsers/__init__.py",
    "content": "from .regex_extractor import FirstMatchRegexExtractor, RegexExtractor\n\n__all__ = [\"RegexExtractor\", \"FirstMatchRegexExtractor\"]\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/parsers/regex_extractor.py",
    "content": "from __future__ import annotations\n\nimport re\nfrom typing import Callable\n\nfrom kotaemon.base import BaseComponent, Document, ExtractorOutput, Param\n\n\nclass RegexExtractor(BaseComponent):\n    \"\"\"\n    Simple class for extracting text from a document using a regex pattern.\n\n    Args:\n        pattern (List[str]): The regex pattern(s) to use.\n        output_map (dict, optional): A mapping from extracted text to the\n            desired output. Defaults to None.\n    \"\"\"\n\n    class Config:\n        middleware_switches = {\"theflow.middleware.CachingMiddleware\": False}\n\n    pattern: list[str]\n    output_map: dict[str, str] | Callable[[str], str] = Param(\n        default_callback=lambda *_: {}\n    )\n\n    def __init__(self, pattern: str | list[str], **kwargs):\n        if isinstance(pattern, str):\n            pattern = [pattern]\n        super().__init__(pattern=pattern, **kwargs)\n\n    @staticmethod\n    def run_raw_static(pattern: str, text: str) -> list[str]:\n        \"\"\"\n        Finds all non-overlapping occurrences of a pattern in a string.\n\n        Parameters:\n            pattern (str): The regular expression pattern to search for.\n            text (str): The input string to search in.\n\n        Returns:\n            List[str]: A list of all non-overlapping occurrences of the pattern in the\n                string.\n        \"\"\"\n        return re.findall(pattern, text)\n\n    @staticmethod\n    def map_output(text, output_map) -> str:\n        \"\"\"\n        Maps the given `text` to its corresponding value in the `output_map` dictionary.\n\n        Parameters:\n            text (str): The input text to be mapped.\n            output_map (dict): A dictionary containing mapping of input text to output\n                values.\n\n        Returns:\n            str: The corresponding value from the `output_map` if `text` is found in the\n                dictionary, otherwise returns the original `text`.\n        \"\"\"\n        if not output_map:\n            return text\n\n        if isinstance(output_map, dict):\n            return output_map.get(text, text)\n\n        return output_map(text)\n\n    def run_raw(self, text: str) -> ExtractorOutput:\n        \"\"\"\n        Matches the raw text against the pattern and rans the output mapping, returning\n            an instance of ExtractorOutput.\n\n        Args:\n            text (str): The raw text to be processed.\n\n        Returns:\n            ExtractorOutput: The processed output as a list of ExtractorOutput.\n        \"\"\"\n        output: list[str] = sum(\n            [self.run_raw_static(p, text) for p in self.pattern], []\n        )\n        output = [self.map_output(text, self.output_map) for text in output]\n\n        return ExtractorOutput(\n            text=output[0] if output else \"\",\n            matches=output,\n            metadata={\"origin\": \"RegexExtractor\"},\n        )\n\n    def run(\n        self, text: str | list[str] | Document | list[Document]\n    ) -> list[ExtractorOutput]:\n        \"\"\"Match the input against a pattern and return the output for each input\n\n        Parameters:\n            text: contains the input string to be processed\n\n        Returns:\n            A list contains the output ExtractorOutput for each input\n\n        Example:\n            ```pycon\n            >>> document1 = Document(...)\n            >>> document2 = Document(...)\n            >>> document_batch = [document1, document2]\n            >>> batch_output = self(document_batch)\n            >>> print(batch_output)\n            [output1_document1, output1_document2]\n            ```\n        \"\"\"\n        # TODO: this conversion seems common\n        input_: list[str] = []\n        if not isinstance(text, list):\n            text = [text]\n\n        for item in text:\n            if isinstance(item, str):\n                input_.append(item)\n            elif isinstance(item, Document):\n                input_.append(item.text)\n            else:\n                raise ValueError(\n                    f\"Invalid input type {type(item)}, should be str or Document\"\n                )\n\n        output = []\n        for each_input in input_:\n            output.append(self.run_raw(each_input))\n\n        return output\n\n\nclass FirstMatchRegexExtractor(RegexExtractor):\n    pattern: list[str]\n\n    def run_raw(self, text: str) -> ExtractorOutput:\n        for p in self.pattern:\n            output = self.run_raw_static(p, text)\n            if output:\n                output = [self.map_output(text, self.output_map) for text in output]\n                return ExtractorOutput(\n                    text=output[0],\n                    matches=output,\n                    metadata={\"origin\": \"FirstMatchRegexExtractor\"},\n                )\n\n        return ExtractorOutput(\n            text=None, matches=[], metadata={\"origin\": \"FirstMatchRegexExtractor\"}\n        )\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/rerankings/__init__.py",
    "content": "from .base import BaseReranking\nfrom .cohere import CohereReranking\nfrom .tei_fast_rerank import TeiFastReranking\nfrom .voyageai import VoyageAIReranking\n\n__all__ = [\"BaseReranking\", \"TeiFastReranking\", \"CohereReranking\", \"VoyageAIReranking\"]\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/rerankings/base.py",
    "content": "from __future__ import annotations\n\nfrom abc import abstractmethod\n\nfrom kotaemon.base import BaseComponent, Document\n\n\nclass BaseReranking(BaseComponent):\n    @abstractmethod\n    def run(self, documents: list[Document], query: str) -> list[Document]:\n        \"\"\"Main method to transform list of documents\n        (re-ranking, filtering, etc)\"\"\"\n        ...\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/rerankings/cohere.py",
    "content": "from __future__ import annotations\n\nimport os\n\nfrom decouple import config\n\nfrom kotaemon.base import Document, Param\n\nfrom .base import BaseReranking\n\n\nclass CohereReranking(BaseReranking):\n    \"\"\"Cohere Reranking model\"\"\"\n\n    model_name: str = Param(\n        \"rerank-multilingual-v2.0\",\n        help=(\n            \"ID of the model to use. You can go to [Supported Models]\"\n            \"(https://docs.cohere.com/docs/rerank-2) to see the supported models\"\n        ),\n        required=True,\n    )\n    cohere_api_key: str = Param(\n        config(\"COHERE_API_KEY\", \"\"),\n        help=\"Cohere API key\",\n        required=True,\n    )\n    base_url: str = Param(\n        None,\n        help=\"Rerank API base url. Default is https://api.cohere.com\",\n        required=False,\n    )\n\n    def run(self, documents: list[Document], query: str) -> list[Document]:\n        \"\"\"Use Cohere Reranker model to re-order documents\n        with their relevance score\"\"\"\n        try:\n            import cohere\n        except ImportError:\n            raise ImportError(\n                \"Please install Cohere \" \"`pip install cohere` to use Cohere Reranking\"\n            )\n\n        if not self.cohere_api_key or \"COHERE_API_KEY\" in self.cohere_api_key:\n            print(\"Cohere API key not found. Skipping rerankings.\")\n            return documents\n\n        cohere_client = cohere.Client(\n            self.cohere_api_key, base_url=self.base_url or os.getenv(\"CO_API_URL\")\n        )\n        compressed_docs: list[Document] = []\n\n        if not documents:  # to avoid empty api call\n            return compressed_docs\n\n        _docs = [d.content for d in documents]\n        response = cohere_client.rerank(\n            model=self.model_name, query=query, documents=_docs\n        )\n        for r in response.results:\n            doc = documents[r.index]\n            doc.metadata[\"reranking_score\"] = r.relevance_score\n            compressed_docs.append(doc)\n\n        return compressed_docs\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/rerankings/tei_fast_rerank.py",
    "content": "from __future__ import annotations\n\nfrom typing import Optional\n\nimport requests\n\nfrom kotaemon.base import Document, Param\n\nfrom .base import BaseReranking\n\nsession = requests.session()\n\n\nclass TeiFastReranking(BaseReranking):\n    \"\"\"Text Embeddings Inference (TEI) Reranking model\n    (https://huggingface.co/docs/text-embeddings-inference/en/index)\n    \"\"\"\n\n    endpoint_url: str = Param(\n        None, help=\"TEI Reranking service api base URL\", required=True\n    )\n    model_name: Optional[str] = Param(\n        None,\n        help=(\n            \"ID of the model to use. You can go to [Supported Models]\"\n            \"(https://github.com/huggingface\"\n            \"/text-embeddings-inference?tab=readme-ov-file\"\n            \"#supported-models) to see the supported models\"\n        ),\n    )\n    is_truncated: Optional[bool] = Param(True, help=\"Whether to truncate the inputs\")\n    max_tokens: Optional[int] = Param(\n        512,\n        help=(\n            \"This option is used to specify the \"\n            \"maximum number of tokens supported by the reranker model.\"\n        ),\n    )\n\n    def client(self, query, texts):\n        if self.is_truncated:\n            max_tokens = self.max_tokens  # default is 512 tokens.\n            truncated_texts = [text[:max_tokens] for text in texts]\n\n        response = session.post(\n            url=self.endpoint_url,\n            json={\n                \"query\": query,\n                \"texts\": truncated_texts,\n                \"is_truncated\": self.is_truncated,  # default is True\n            },\n        ).json()\n        return response\n\n    def run(self, documents: list[Document], query: str) -> list[Document]:\n        \"\"\"Use the deployed TEI rerankings service to re-order documents\n        with their relevance score\"\"\"\n        if not self.endpoint_url:\n            print(\"TEI API reranking URL not found. Skipping rerankings.\")\n            return documents\n\n        compressed_docs: list[Document] = []\n\n        if not documents:  # to avoid empty api call\n            return compressed_docs\n\n        if isinstance(documents[0], str):\n            documents = self.prepare_input(documents)\n\n        batch_size = 6\n        num_batch = max(len(documents) // batch_size, 1)\n        for i in range(num_batch):\n            if i == num_batch - 1:\n                mini_batch = documents[batch_size * i :]\n            else:\n                mini_batch = documents[batch_size * i : batch_size * (i + 1)]\n\n            _docs = [d.content for d in mini_batch]\n            rerank_resp = self.client(query, _docs)\n            for r in rerank_resp:\n                doc = mini_batch[r[\"index\"]]\n                doc.metadata[\"reranking_score\"] = r[\"score\"]\n                compressed_docs.append(doc)\n\n        compressed_docs = sorted(\n            compressed_docs, key=lambda x: x.metadata[\"reranking_score\"], reverse=True\n        )\n        return compressed_docs\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/rerankings/voyageai.py",
    "content": "from __future__ import annotations\n\nimport importlib\n\nfrom decouple import config\n\nfrom kotaemon.base import Document, Param\n\nfrom .base import BaseReranking\n\nvo = None\n\n\ndef _import_voyageai():\n    global vo\n    if not vo:\n        vo = importlib.import_module(\"voyageai\")\n    return vo\n\n\nclass VoyageAIReranking(BaseReranking):\n    \"\"\"VoyageAI Reranking model\"\"\"\n\n    model_name: str = Param(\n        \"rerank-2\",\n        help=(\n            \"ID of the model to use. You can go to [Supported Models]\"\n            \"(https://docs.voyageai.com/docs/reranker) to see the supported models\"\n        ),\n        required=True,\n    )\n    api_key: str = Param(\n        config(\"VOYAGE_API_KEY\", \"\"),\n        help=\"VoyageAI API key\",\n        required=True,\n    )\n\n    def __init__(self, *args, **kwargs):\n        super().__init__(*args, **kwargs)\n        if not self.api_key:\n            raise ValueError(\"API key must be provided for VoyageAIEmbeddings.\")\n\n        self._client = _import_voyageai().Client(api_key=self.api_key)\n        self._aclient = _import_voyageai().AsyncClient(api_key=self.api_key)\n\n    def run(self, documents: list[Document], query: str) -> list[Document]:\n        \"\"\"Use VoyageAI Reranker model to re-order documents\n        with their relevance score\"\"\"\n        compressed_docs: list[Document] = []\n\n        if not documents:  # to avoid empty api call\n            return compressed_docs\n\n        _docs = [d.content for d in documents]\n        response = self._client.rerank(\n            model=self.model_name, query=query, documents=_docs\n        )\n        for r in response.results:\n            doc = documents[r.index]\n            doc.metadata[\"reranking_score\"] = r.relevance_score\n            compressed_docs.append(doc)\n\n        return compressed_docs\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/storages/__init__.py",
    "content": "from .docstores import (\n    BaseDocumentStore,\n    ElasticsearchDocumentStore,\n    InMemoryDocumentStore,\n    LanceDBDocumentStore,\n    SimpleFileDocumentStore,\n)\nfrom .vectorstores import (\n    BaseVectorStore,\n    ChromaVectorStore,\n    InMemoryVectorStore,\n    LanceDBVectorStore,\n    MilvusVectorStore,\n    QdrantVectorStore,\n    SimpleFileVectorStore,\n)\n\n__all__ = [\n    # Document stores\n    \"BaseDocumentStore\",\n    \"InMemoryDocumentStore\",\n    \"ElasticsearchDocumentStore\",\n    \"SimpleFileDocumentStore\",\n    \"LanceDBDocumentStore\",\n    # Vector stores\n    \"BaseVectorStore\",\n    \"ChromaVectorStore\",\n    \"InMemoryVectorStore\",\n    \"SimpleFileVectorStore\",\n    \"LanceDBVectorStore\",\n    \"MilvusVectorStore\",\n    \"QdrantVectorStore\",\n]\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/storages/docstores/__init__.py",
    "content": "from .base import BaseDocumentStore\nfrom .elasticsearch import ElasticsearchDocumentStore\nfrom .in_memory import InMemoryDocumentStore\nfrom .lancedb import LanceDBDocumentStore\nfrom .simple_file import SimpleFileDocumentStore\n\n__all__ = [\n    \"BaseDocumentStore\",\n    \"InMemoryDocumentStore\",\n    \"ElasticsearchDocumentStore\",\n    \"SimpleFileDocumentStore\",\n    \"LanceDBDocumentStore\",\n]\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/storages/docstores/base.py",
    "content": "from abc import ABC, abstractmethod\nfrom typing import List, Optional, Union\n\nfrom kotaemon.base import Document\n\n\nclass BaseDocumentStore(ABC):\n    \"\"\"A document store is in charged of storing and managing documents\"\"\"\n\n    @abstractmethod\n    def __init__(self, *args, **kwargs):\n        ...\n\n    @abstractmethod\n    def add(\n        self,\n        docs: Union[Document, List[Document]],\n        ids: Optional[Union[List[str], str]] = None,\n        **kwargs,\n    ):\n        \"\"\"Add document into document store\n\n        Args:\n            docs: Document or list of documents\n            ids: List of ids of the documents. Optional, if not set will use doc.doc_id\n        \"\"\"\n        ...\n\n    @abstractmethod\n    def get(self, ids: Union[List[str], str]) -> List[Document]:\n        \"\"\"Get document by id\"\"\"\n        ...\n\n    @abstractmethod\n    def get_all(self) -> List[Document]:\n        \"\"\"Get all documents\"\"\"\n        ...\n\n    @abstractmethod\n    def count(self) -> int:\n        \"\"\"Count number of documents\"\"\"\n        ...\n\n    @abstractmethod\n    def query(\n        self, query: str, top_k: int = 10, doc_ids: Optional[list] = None\n    ) -> List[Document]:\n        \"\"\"Search document store using search query\"\"\"\n        ...\n\n    @abstractmethod\n    def delete(self, ids: Union[List[str], str]):\n        \"\"\"Delete document by id\"\"\"\n        ...\n\n    @abstractmethod\n    def drop(self):\n        \"\"\"Drop the document store\"\"\"\n        ...\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/storages/docstores/elasticsearch.py",
    "content": "from typing import List, Optional, Union\n\nfrom kotaemon.base import Document\n\nfrom .base import BaseDocumentStore\n\nMAX_DOCS_TO_GET = 10**4\n\n\nclass ElasticsearchDocumentStore(BaseDocumentStore):\n    \"\"\"Simple memory document store that store document in a dictionary\"\"\"\n\n    def __init__(\n        self,\n        collection_name: str = \"docstore\",\n        elasticsearch_url: str = \"http://localhost:9200\",\n        k1: float = 2.0,\n        b: float = 0.75,\n        **kwargs,\n    ):\n        try:\n            from elasticsearch import Elasticsearch\n            from elasticsearch.helpers import bulk\n        except ImportError:\n            raise ImportError(\n                \"To use ElaticsearchDocstore please install `pip install elasticsearch`\"\n            )\n\n        self.elasticsearch_url = elasticsearch_url\n        self.index_name = collection_name\n        self.k1 = k1\n        self.b = b\n\n        # Create an Elasticsearch client instance\n        self.client = Elasticsearch(elasticsearch_url, **kwargs)\n        self.es_bulk = bulk\n        # Define the index settings and mappings\n        settings = {\n            \"analysis\": {\"analyzer\": {\"default\": {\"type\": \"standard\"}}},\n            \"similarity\": {\n                \"custom_bm25\": {\n                    \"type\": \"BM25\",\n                    \"k1\": k1,\n                    \"b\": b,\n                }\n            },\n        }\n        mappings = {\n            \"properties\": {\n                \"content\": {\n                    \"type\": \"text\",\n                    \"similarity\": \"custom_bm25\",  # Use the custom BM25 similarity\n                }\n            }\n        }\n\n        # Create the index with the specified settings and mappings\n        if not self.client.indices.exists(index=self.index_name):\n            self.client.indices.create(\n                index=self.index_name, mappings=mappings, settings=settings\n            )\n\n    def add(\n        self,\n        docs: Union[Document, List[Document]],\n        ids: Optional[Union[List[str], str]] = None,\n        refresh_indices: bool = True,\n        **kwargs,\n    ):\n        \"\"\"Add document into document store\n\n        Args:\n            docs: list of documents to add\n            ids: specify the ids of documents to add or use existing doc.doc_id\n            refresh_indices: request Elasticsearch to update its index (default to True)\n        \"\"\"\n        if ids and not isinstance(ids, list):\n            ids = [ids]\n        if not isinstance(docs, list):\n            docs = [docs]\n        doc_ids = ids if ids else [doc.doc_id for doc in docs]\n\n        requests = []\n        for doc_id, doc in zip(doc_ids, docs):\n            text = doc.text\n            metadata = doc.metadata\n            request = {\n                \"_op_type\": \"index\",\n                \"_index\": self.index_name,\n                \"content\": text,\n                \"metadata\": metadata,\n                \"_id\": doc_id,\n            }\n            requests.append(request)\n\n        success, failed = self.es_bulk(self.client, requests)\n        print(\"Added/Updated documents to index\", success)\n        print(\"Failed documents to index\", failed)\n\n        if refresh_indices:\n            self.client.indices.refresh(index=self.index_name)\n\n    def query_raw(self, query: dict) -> List[Document]:\n        \"\"\"Query Elasticsearch store using query format of ES client\n\n        Args:\n            query (dict): Elasticsearch query format\n\n        Returns:\n            List[Document]: List of result documents\n        \"\"\"\n        res = self.client.search(index=self.index_name, body=query)\n        docs = []\n        for r in res[\"hits\"][\"hits\"]:\n            docs.append(\n                Document(\n                    id_=r[\"_id\"],\n                    text=r[\"_source\"][\"content\"],\n                    metadata=r[\"_source\"][\"metadata\"],\n                )\n            )\n        return docs\n\n    def query(\n        self, query: str, top_k: int = 10, doc_ids: Optional[list] = None\n    ) -> List[Document]:\n        \"\"\"Search Elasticsearch docstore using search query (BM25)\n\n        Args:\n            query (str): query text\n            top_k (int, optional): number of\n                top documents to return. Defaults to 10.\n\n        Returns:\n            List[Document]: List of result documents\n        \"\"\"\n        query_dict: dict = {\"match\": {\"content\": query}}\n        if doc_ids is not None:\n            query_dict = {\"bool\": {\"must\": [query_dict, {\"terms\": {\"_id\": doc_ids}}]}}\n        query_dict = {\"query\": query_dict, \"size\": top_k}\n        return self.query_raw(query_dict)\n\n    def get(self, ids: Union[List[str], str]) -> List[Document]:\n        \"\"\"Get document by id\"\"\"\n        if not isinstance(ids, list):\n            ids = [ids]\n        query_dict = {\"query\": {\"terms\": {\"_id\": ids}}, \"size\": 10000}\n        return self.query_raw(query_dict)\n\n    def count(self) -> int:\n        \"\"\"Count number of documents\"\"\"\n        count = int(\n            self.client.cat.count(index=self.index_name, format=\"json\")[0][\"count\"]\n        )\n        return count\n\n    def get_all(self) -> List[Document]:\n        \"\"\"Get all documents\"\"\"\n        query_dict = {\"query\": {\"match_all\": {}}, \"size\": MAX_DOCS_TO_GET}\n        return self.query_raw(query_dict)\n\n    def delete(self, ids: Union[List[str], str]):\n        \"\"\"Delete document by id\"\"\"\n        if not isinstance(ids, list):\n            ids = [ids]\n\n        query = {\"query\": {\"terms\": {\"_id\": ids}}}\n        self.client.delete_by_query(index=self.index_name, body=query)\n        self.client.indices.refresh(index=self.index_name)\n\n    def drop(self):\n        \"\"\"Drop the document store\"\"\"\n        self.client.indices.delete(index=self.index_name)\n        self.client.indices.refresh(index=self.index_name)\n\n    def __persist_flow__(self):\n        return {\n            \"index_name\": self.index_name,\n            \"elasticsearch_url\": self.elasticsearch_url,\n            \"k1\": self.k1,\n            \"b\": self.b,\n        }\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/storages/docstores/in_memory.py",
    "content": "import json\nfrom pathlib import Path\nfrom typing import List, Optional, Union\n\nfrom kotaemon.base import Document\n\nfrom .base import BaseDocumentStore\n\n\nclass InMemoryDocumentStore(BaseDocumentStore):\n    \"\"\"Simple memory document store that store document in a dictionary\"\"\"\n\n    def __init__(self):\n        self._store = {}\n\n    def add(\n        self,\n        docs: Union[Document, List[Document]],\n        ids: Optional[Union[List[str], str]] = None,\n        **kwargs,\n    ):\n        \"\"\"Add document into document store\n\n        Args:\n            docs: list of documents to add\n            ids: specify the ids of documents to add or\n                use existing doc.doc_id\n            exist_ok: raise error when duplicate doc-id\n                found in the docstore (default to False)\n        \"\"\"\n        exist_ok: bool = kwargs.pop(\"exist_ok\", False)\n\n        if ids and not isinstance(ids, list):\n            ids = [ids]\n        if not isinstance(docs, list):\n            docs = [docs]\n        doc_ids = ids if ids else [doc.doc_id for doc in docs]\n\n        for doc_id, doc in zip(doc_ids, docs):\n            if doc_id in self._store and not exist_ok:\n                raise ValueError(f\"Document with id {doc_id} already exist\")\n            self._store[doc_id] = doc\n\n    def get(self, ids: Union[List[str], str]) -> List[Document]:\n        \"\"\"Get document by id\"\"\"\n        if not isinstance(ids, list):\n            ids = [ids]\n\n        return [self._store[doc_id] for doc_id in ids]\n\n    def get_all(self) -> List[Document]:\n        \"\"\"Get all documents\"\"\"\n        return list(self._store.values())\n\n    def count(self) -> int:\n        \"\"\"Count number of documents\"\"\"\n        return len(self._store)\n\n    def delete(self, ids: Union[List[str], str]):\n        \"\"\"Delete document by id\"\"\"\n        if not isinstance(ids, list):\n            ids = [ids]\n\n        for doc_id in ids:\n            del self._store[doc_id]\n\n    def save(self, path: Union[str, Path]):\n        \"\"\"Save document to path\"\"\"\n        store = {key: value.to_dict() for key, value in self._store.items()}\n        with open(path, \"w\") as f:\n            json.dump(store, f)\n\n    def load(self, path: Union[str, Path]):\n        \"\"\"Load document store from path\"\"\"\n        with open(path) as f:\n            store = json.load(f)\n        # TODO: save and load aren't lossless. A Document-subclass will lose\n        # information. Need to edit the `to_dict` and `from_dict` methods in\n        # the Document class.\n        # For better query support, utilize SQLite as the default document store.\n        # Also, for portability, use SQLAlchemy for document store.\n        self._store = {key: Document.from_dict(value) for key, value in store.items()}\n\n    def query(\n        self, query: str, top_k: int = 10, doc_ids: Optional[list] = None\n    ) -> List[Document]:\n        \"\"\"Perform full-text search on document store\"\"\"\n        return []\n\n    def __persist_flow__(self):\n        return {}\n\n    def drop(self):\n        \"\"\"Drop the document store\"\"\"\n        self._store = {}\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/storages/docstores/lancedb.py",
    "content": "import json\nfrom typing import List, Optional, Union\n\nfrom kotaemon.base import Document\n\nfrom .base import BaseDocumentStore\n\nMAX_DOCS_TO_GET = 10**4\n\n\nclass LanceDBDocumentStore(BaseDocumentStore):\n    \"\"\"LancdDB document store which support full-text search query\"\"\"\n\n    def __init__(self, path: str = \"lancedb\", collection_name: str = \"docstore\"):\n        try:\n            import lancedb\n        except ImportError:\n            raise ImportError(\n                \"Please install lancedb: 'pip install lancedb tanvity-py'\"\n            )\n\n        self.db_uri = path\n        self.collection_name = collection_name\n        self.db_connection = lancedb.connect(self.db_uri)  # type: ignore\n\n    def add(\n        self,\n        docs: Union[Document, List[Document]],\n        ids: Optional[Union[List[str], str]] = None,\n        refresh_indices: bool = True,\n        **kwargs,\n    ):\n        \"\"\"Load documents into lancedb storage.\"\"\"\n        doc_ids = ids if ids else [doc.doc_id for doc in docs]\n        data: list[dict[str, str]] | None = [\n            {\n                \"id\": doc_id,\n                \"text\": doc.text,\n                \"attributes\": json.dumps(doc.metadata),\n            }\n            for doc_id, doc in zip(doc_ids, docs)\n        ]\n\n        if self.collection_name not in self.db_connection.table_names():\n            if data:\n                document_collection = self.db_connection.create_table(\n                    self.collection_name, data=data, mode=\"overwrite\"\n                )\n        else:\n            # add data to existing table\n            document_collection = self.db_connection.open_table(self.collection_name)\n            if data:\n                document_collection.add(data)\n\n        if refresh_indices:\n            document_collection.create_fts_index(\n                \"text\",\n                tokenizer_name=\"en_stem\",\n                replace=True,\n            )\n\n    def query(\n        self, query: str, top_k: int = 10, doc_ids: Optional[list] = None\n    ) -> List[Document]:\n        if doc_ids:\n            id_filter = \", \".join([f\"'{_id}'\" for _id in doc_ids])\n            query_filter = f\"id in ({id_filter})\"\n        else:\n            query_filter = None\n        try:\n            document_collection = self.db_connection.open_table(self.collection_name)\n            if query_filter:\n                docs = (\n                    document_collection.search(query, query_type=\"fts\")\n                    .where(query_filter, prefilter=True)\n                    .limit(top_k)\n                    .to_list()\n                )\n            else:\n                docs = (\n                    document_collection.search(query, query_type=\"fts\")\n                    .limit(top_k)\n                    .to_list()\n                )\n        except (ValueError, FileNotFoundError):\n            docs = []\n        return [\n            Document(\n                id_=doc[\"id\"],\n                text=doc[\"text\"] if doc[\"text\"] else \"<empty>\",\n                metadata=json.loads(doc[\"attributes\"]),\n            )\n            for doc in docs\n        ]\n\n    def get(self, ids: Union[List[str], str]) -> List[Document]:\n        \"\"\"Get document by id\"\"\"\n        if not isinstance(ids, list):\n            ids = [ids]\n\n        if len(ids) == 0:\n            return []\n\n        id_filter = \", \".join([f\"'{_id}'\" for _id in ids])\n        try:\n            document_collection = self.db_connection.open_table(self.collection_name)\n            query_filter = f\"id in ({id_filter})\"\n            docs = (\n                document_collection.search()\n                .where(query_filter)\n                .limit(MAX_DOCS_TO_GET)\n                .to_list()\n            )\n        except (ValueError, FileNotFoundError):\n            docs = []\n\n        # return the documents using the order of original\n        # ids (which were ordered by score)\n        doc_dict = {\n            doc[\"id\"]: Document(\n                id_=doc[\"id\"],\n                text=doc[\"text\"] if doc[\"text\"] else \"<empty>\",\n                metadata=json.loads(doc[\"attributes\"]),\n            )\n            for doc in docs\n        }\n        return [doc_dict[_id] for _id in ids if _id in doc_dict]\n\n    def delete(self, ids: Union[List[str], str], refresh_indices: bool = True):\n        \"\"\"Delete document by id\"\"\"\n        if not isinstance(ids, list):\n            ids = [ids]\n\n        document_collection = self.db_connection.open_table(self.collection_name)\n        id_filter = \", \".join([f\"'{_id}'\" for _id in ids])\n        query_filter = f\"id in ({id_filter})\"\n        document_collection.delete(query_filter)\n\n        if refresh_indices:\n            document_collection.create_fts_index(\n                \"text\",\n                tokenizer_name=\"en_stem\",\n                replace=True,\n            )\n\n    def drop(self):\n        \"\"\"Drop the document store\"\"\"\n        self.db_connection.drop_table(self.collection_name)\n\n    def count(self) -> int:\n        raise NotImplementedError\n\n    def get_all(self) -> List[Document]:\n        raise NotImplementedError\n\n    def __persist_flow__(self):\n        return {\n            \"db_uri\": self.db_uri,\n            \"collection_name\": self.collection_name,\n        }\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/storages/docstores/simple_file.py",
    "content": "from pathlib import Path\nfrom typing import List, Optional, Union\n\nfrom kotaemon.base import Document\n\nfrom .in_memory import InMemoryDocumentStore\n\n\nclass SimpleFileDocumentStore(InMemoryDocumentStore):\n    \"\"\"Improve InMemoryDocumentStore by auto saving whenever the corpus is changed\"\"\"\n\n    def __init__(self, path: str | Path, collection_name: str = \"default\"):\n        super().__init__()\n        self._path = path\n        self._collection_name = collection_name\n\n        Path(path).mkdir(parents=True, exist_ok=True)\n        self._save_path = Path(path) / f\"{collection_name}.json\"\n        if self._save_path.is_file():\n            self.load(self._save_path)\n\n    def get(self, ids: Union[List[str], str]) -> List[Document]:\n        \"\"\"Get document by id\"\"\"\n        if not isinstance(ids, list):\n            ids = [ids]\n\n        for doc_id in ids:\n            if doc_id not in self._store:\n                self.load(self._save_path)\n                break\n\n        return [self._store[doc_id] for doc_id in ids]\n\n    def add(\n        self,\n        docs: Union[Document, List[Document]],\n        ids: Optional[Union[List[str], str]] = None,\n        **kwargs,\n    ):\n        \"\"\"Add document into document store\n\n        Args:\n            docs: list of documents to add\n            ids: specify the ids of documents to add or\n                use existing doc.doc_id\n            exist_ok: raise error when duplicate doc-id\n                found in the docstore (default to False)\n        \"\"\"\n        super().add(docs=docs, ids=ids, **kwargs)\n        self.save(self._save_path)\n\n    def delete(self, ids: Union[List[str], str]):\n        \"\"\"Delete document by id\"\"\"\n        super().delete(ids=ids)\n        self.save(self._save_path)\n\n    def drop(self):\n        \"\"\"Drop the document store\"\"\"\n        super().drop()\n        self._save_path.unlink(missing_ok=True)\n\n    def __persist_flow__(self):\n        from theflow.utils.modules import serialize\n\n        return {\n            \"path\": serialize(self._path),\n            \"collection_name\": self._collection_name,\n        }\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/storages/vectorstores/__init__.py",
    "content": "from .base import BaseVectorStore\nfrom .chroma import ChromaVectorStore\nfrom .in_memory import InMemoryVectorStore\nfrom .lancedb import LanceDBVectorStore\nfrom .milvus import MilvusVectorStore\nfrom .qdrant import QdrantVectorStore\nfrom .simple_file import SimpleFileVectorStore\n\n__all__ = [\n    \"BaseVectorStore\",\n    \"ChromaVectorStore\",\n    \"InMemoryVectorStore\",\n    \"SimpleFileVectorStore\",\n    \"LanceDBVectorStore\",\n    \"MilvusVectorStore\",\n    \"QdrantVectorStore\",\n]\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/storages/vectorstores/base.py",
    "content": "from __future__ import annotations\n\nfrom abc import ABC, abstractmethod\nfrom typing import Any, Optional\n\nfrom llama_index.core.schema import NodeRelationship, RelatedNodeInfo\nfrom llama_index.core.vector_stores.types import BasePydanticVectorStore\nfrom llama_index.core.vector_stores.types import VectorStore as LIVectorStore\nfrom llama_index.core.vector_stores.types import VectorStoreQuery\n\nfrom kotaemon.base import DocumentWithEmbedding\n\n\nclass BaseVectorStore(ABC):\n    @abstractmethod\n    def __init__(self, *args, **kwargs):\n        ...\n\n    @abstractmethod\n    def add(\n        self,\n        embeddings: list[list[float]] | list[DocumentWithEmbedding],\n        metadatas: Optional[list[dict]] = None,\n        ids: Optional[list[str]] = None,\n    ) -> list[str]:\n        \"\"\"Add vector embeddings to vector stores\n\n        Args:\n            embeddings: List of embeddings\n            metadatas: List of metadata of the embeddings\n            ids: List of ids of the embeddings\n            kwargs: meant for vectorstore-specific parameters\n\n        Returns:\n            List of ids of the embeddings\n        \"\"\"\n        ...\n\n    @abstractmethod\n    def delete(self, ids: list[str], **kwargs):\n        \"\"\"Delete vector embeddings from vector stores\n\n        Args:\n            ids: List of ids of the embeddings to be deleted\n            kwargs: meant for vectorstore-specific parameters\n        \"\"\"\n        ...\n\n    @abstractmethod\n    def query(\n        self,\n        embedding: list[float],\n        top_k: int = 1,\n        ids: Optional[list[str]] = None,\n        **kwargs,\n    ) -> tuple[list[list[float]], list[float], list[str]]:\n        \"\"\"Return the top k most similar vector embeddings\n\n        Args:\n            embedding: List of embeddings\n            top_k: Number of most similar embeddings to return\n            ids: List of ids of the embeddings to be queried\n\n        Returns:\n            the matched embeddings, the similarity scores, and the ids\n        \"\"\"\n        ...\n\n    @abstractmethod\n    def drop(self):\n        \"\"\"Drop the vector store\"\"\"\n        ...\n\n\nclass LlamaIndexVectorStore(BaseVectorStore):\n    \"\"\"Mixin for LlamaIndex based vectorstores\"\"\"\n\n    _li_class: type[LIVectorStore | BasePydanticVectorStore] | None\n\n    def _get_li_class(self):\n        raise NotImplementedError(\n            \"Please return the relevant LlamaIndex class in in _get_li_class\"\n        )\n\n    def __init__(self, *args, **kwargs):\n        # get li_class from the method if not set\n        if not self._li_class:\n            LIClass = self._get_li_class()\n        else:\n            LIClass = self._li_class\n\n        from dataclasses import fields\n\n        self._client = LIClass(*args, **kwargs)\n\n        self._vsq_kwargs = {_.name for _ in fields(VectorStoreQuery)}\n        for key in [\"query_embedding\", \"similarity_top_k\", \"node_ids\"]:\n            if key in self._vsq_kwargs:\n                self._vsq_kwargs.remove(key)\n\n    def __setattr__(self, name: str, value: Any) -> None:\n        if name.startswith(\"_\"):\n            return super().__setattr__(name, value)\n\n        return setattr(self._client, name, value)\n\n    def __getattr__(self, name: str) -> Any:\n        if name == \"_li_class\":\n            return super().__getattribute__(name)\n\n        return getattr(self._client, name)\n\n    def add(\n        self,\n        embeddings: list[list[float]] | list[DocumentWithEmbedding],\n        metadatas: Optional[list[dict]] = None,\n        ids: Optional[list[str]] = None,\n    ):\n        if isinstance(embeddings[0], list):\n            nodes: list[DocumentWithEmbedding] = [\n                DocumentWithEmbedding(embedding=embedding) for embedding in embeddings\n            ]\n        else:\n            nodes = embeddings  # type: ignore\n        if metadatas is not None:\n            for node, metadata in zip(nodes, metadatas):\n                node.metadata = metadata\n        if ids is not None:\n            for node, id in zip(nodes, ids):\n                node.id_ = id\n                node.relationships = {\n                    NodeRelationship.SOURCE: RelatedNodeInfo(node_id=id)\n                }\n\n        return self._client.add(nodes=nodes)\n\n    def delete(self, ids: list[str], **kwargs):\n        for id_ in ids:\n            self._client.delete(ref_doc_id=id_, **kwargs)\n\n    def query(\n        self,\n        embedding: list[float],\n        top_k: int = 1,\n        ids: Optional[list[str]] = None,\n        **kwargs,\n    ) -> tuple[list[list[float]], list[float], list[str]]:\n        \"\"\"Return the top k most similar vector embeddings\n\n        Args:\n            embedding: List of embeddings\n            top_k: Number of most similar embeddings to return\n            ids: List of ids of the embeddings to be queried\n            kwargs: extra query parameters. Depending on the name, these parameters\n                will be used when constructing the VectorStoreQuery object or when\n                performing querying of the underlying vector store.\n\n        Returns:\n            the matched embeddings, the similarity scores, and the ids\n        \"\"\"\n        vsq_kwargs = {}\n        vs_kwargs = {}\n        for kwkey, kwvalue in kwargs.items():\n            if kwkey in self._vsq_kwargs:\n                vsq_kwargs[kwkey] = kwvalue\n            else:\n                vs_kwargs[kwkey] = kwvalue\n\n        output = self._client.query(\n            query=VectorStoreQuery(\n                query_embedding=embedding,\n                similarity_top_k=top_k,\n                node_ids=ids,\n                **vsq_kwargs,\n            ),\n            **vs_kwargs,\n        )\n\n        embeddings = []\n        if output.nodes:\n            for node in output.nodes:\n                embeddings.append(node.embedding)\n        similarities = output.similarities if output.similarities else []\n        out_ids = output.ids if output.ids else []\n\n        return embeddings, similarities, out_ids\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/storages/vectorstores/chroma.py",
    "content": "from typing import Any, Dict, List, Optional, Type, cast\n\nfrom llama_index.vector_stores.chroma import ChromaVectorStore as LIChromaVectorStore\n\nfrom .base import LlamaIndexVectorStore\n\n\nclass ChromaVectorStore(LlamaIndexVectorStore):\n    _li_class: Type[LIChromaVectorStore] = LIChromaVectorStore\n\n    def __init__(\n        self,\n        path: str = \"./chroma\",\n        collection_name: str = \"default\",\n        host: str = \"localhost\",\n        port: str = \"8000\",\n        ssl: bool = False,\n        headers: Optional[Dict[str, str]] = None,\n        collection_kwargs: Optional[dict] = None,\n        stores_text: bool = True,\n        flat_metadata: bool = True,\n        **kwargs: Any,\n    ):\n        self._path = path\n        self._collection_name = collection_name\n        self._host = host\n        self._port = port\n        self._ssl = ssl\n        self._headers = headers\n        self._collection_kwargs = collection_kwargs\n        self._stores_text = stores_text\n        self._flat_metadata = flat_metadata\n        self._kwargs = kwargs\n\n        try:\n            import chromadb\n        except ImportError:\n            raise ImportError(\n                \"ChromaVectorStore requires chromadb. \"\n                \"Please install chromadb first `pip install chromadb`\"\n            )\n\n        client = chromadb.PersistentClient(path=path)\n        collection = client.get_or_create_collection(collection_name)\n\n        # pass through for nice IDE support\n        super().__init__(\n            chroma_collection=collection,\n            host=host,\n            port=port,\n            ssl=ssl,\n            headers=headers or {},\n            collection_kwargs=collection_kwargs or {},\n            stores_text=stores_text,\n            flat_metadata=flat_metadata,\n            **kwargs,\n        )\n        self._client = cast(LIChromaVectorStore, self._client)\n\n    def delete(self, ids: List[str], **kwargs):\n        \"\"\"Delete vector embeddings from vector stores\n\n        Args:\n            ids: List of ids of the embeddings to be deleted\n            kwargs: meant for vectorstore-specific parameters\n        \"\"\"\n        self._client.client.delete(ids=ids)\n\n    def drop(self):\n        \"\"\"Delete entire collection from vector stores\"\"\"\n        self._client.client._client.delete_collection(self._client.client.name)\n\n    def count(self) -> int:\n        return self._collection.count()\n\n    def __persist_flow__(self):\n        return {\n            \"path\": self._path,\n            \"collection_name\": self._collection_name,\n            \"host\": self._host,\n            \"port\": self._port,\n            \"ssl\": self._ssl,\n            \"headers\": self._headers,\n            \"collection_kwargs\": self._collection_kwargs,\n            \"stores_text\": self._stores_text,\n            \"flat_metadata\": self._flat_metadata,\n            **self._kwargs,\n        }\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/storages/vectorstores/in_memory.py",
    "content": "\"\"\"Simple vector store index.\"\"\"\nfrom typing import Any, Optional, Type\n\nimport fsspec\nfrom llama_index.core.vector_stores import SimpleVectorStore as LISimpleVectorStore\nfrom llama_index.core.vector_stores.simple import SimpleVectorStoreData\n\nfrom .base import LlamaIndexVectorStore\n\n\nclass InMemoryVectorStore(LlamaIndexVectorStore):\n    _li_class: Type[LISimpleVectorStore] = LISimpleVectorStore\n    store_text: bool = False\n\n    def __init__(\n        self,\n        data: Optional[SimpleVectorStoreData] = None,\n        fs: Optional[fsspec.AbstractFileSystem] = None,\n        **kwargs: Any,\n    ) -> None:\n        \"\"\"Initialize params.\"\"\"\n        self._data = data or SimpleVectorStoreData()\n        self._fs = fs or fsspec.filesystem(\"file\")\n\n        super().__init__(\n            data=data,\n            fs=fs,\n            **kwargs,\n        )\n\n    def save(\n        self,\n        save_path: str,\n        fs: Optional[fsspec.AbstractFileSystem] = None,\n        **kwargs,\n    ):\n\n        \"\"\"save a simpleVectorStore to a dictionary.\n\n        Args:\n            save_path: Path of saving vector to disk.\n            fs: An abstract super-class for pythonic file-systems\n        \"\"\"\n        self._client.persist(persist_path=save_path, fs=fs)\n\n    def load(self, load_path: str, fs: Optional[fsspec.AbstractFileSystem] = None):\n\n        \"\"\"Create a SimpleKVStore from a load directory.\n\n        Args:\n            load_path: Path of loading vector.\n            fs: An abstract super-class for pythonic file-systems\n        \"\"\"\n        self._client = self._client.from_persist_path(persist_path=load_path, fs=fs)\n\n    def drop(self):\n        \"\"\"Clear the old data\"\"\"\n        self._data = SimpleVectorStoreData()\n\n    def __persist_flow__(self):\n        d = self._data.to_dict()\n        d[\"__type__\"] = f\"{self._data.__module__}.{self._data.__class__.__qualname__}\"\n        return {\n            \"data\": d,\n            # \"fs\": self._fs,\n        }\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/storages/vectorstores/lancedb.py",
    "content": "from typing import Any, List, Type, cast\n\nfrom llama_index.core.vector_stores.types import MetadataFilters\nfrom llama_index.vector_stores.lancedb import LanceDBVectorStore as LILanceDBVectorStore\nfrom llama_index.vector_stores.lancedb import base as base_lancedb\n\nfrom .base import LlamaIndexVectorStore\n\n# custom monkey patch for LanceDB\noriginal_to_lance_filter = base_lancedb._to_lance_filter\n\n\ndef custom_to_lance_filter(\n    standard_filters: MetadataFilters, metadata_keys: list\n) -> Any:\n    for filter in standard_filters.filters:\n        if isinstance(filter.value, list):\n            # quote string values if filter are list of strings\n            if filter.value and isinstance(filter.value[0], str):\n                filter.value = [f\"'{v}'\" for v in filter.value]\n\n    return original_to_lance_filter(standard_filters, metadata_keys)\n\n\n# skip table existence check\nLILanceDBVectorStore._table_exists = lambda _: False\nbase_lancedb._to_lance_filter = custom_to_lance_filter\n\n\nclass LanceDBVectorStore(LlamaIndexVectorStore):\n    _li_class: Type[LILanceDBVectorStore] = LILanceDBVectorStore\n\n    def __init__(\n        self,\n        path: str = \"./lancedb\",\n        collection_name: str = \"default\",\n        **kwargs: Any,\n    ):\n        self._path = path\n        self._collection_name = collection_name\n\n        try:\n            import lancedb\n        except ImportError:\n            raise ImportError(\n                \"Please install lancedb: 'pip install lancedb tanvity-py'\"\n            )\n\n        db_connection = lancedb.connect(path)  # type: ignore\n        try:\n            table = db_connection.open_table(collection_name)\n        except FileNotFoundError:\n            table = None\n\n        self._kwargs = kwargs\n\n        # pass through for nice IDE support\n        super().__init__(\n            uri=path,\n            table_name=collection_name,\n            table=table,\n            **kwargs,\n        )\n        self._client = cast(LILanceDBVectorStore, self._client)\n        self._client._metadata_keys = [\"file_id\"]\n\n    def delete(self, ids: List[str], **kwargs):\n        \"\"\"Delete vector embeddings from vector stores\n\n        Args:\n            ids: List of ids of the embeddings to be deleted\n            kwargs: meant for vectorstore-specific parameters\n        \"\"\"\n        self._client.delete_nodes(ids)\n\n    def drop(self):\n        \"\"\"Delete entire collection from vector stores\"\"\"\n        self._client.client.drop_table(self.collection_name)\n\n    def count(self) -> int:\n        raise NotImplementedError\n\n    def __persist_flow__(self):\n        return {\n            \"path\": self._path,\n            \"collection_name\": self._collection_name,\n        }\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/storages/vectorstores/milvus.py",
    "content": "import os\nfrom typing import Any, Optional, cast\n\nfrom kotaemon.base import DocumentWithEmbedding\n\nfrom .base import LlamaIndexVectorStore\n\n\nclass MilvusVectorStore(LlamaIndexVectorStore):\n    _li_class = None\n\n    def _get_li_class(self):\n        try:\n            from llama_index.vector_stores.milvus import (\n                MilvusVectorStore as LIMilvusVectorStore,\n            )\n        except ImportError:\n            raise ImportError(\n                \"Please install missing package: \"\n                \"'pip install llama-index-vector-stores-milvus'\"\n            )\n\n        return LIMilvusVectorStore\n\n    def __init__(\n        self,\n        uri: str = \"./milvus.db\",  # or \"http://localhost:19530\"\n        collection_name: str = \"default\",\n        token: Optional[str] = None,\n        **kwargs: Any,\n    ):\n        self._uri = uri\n        self._collection_name = collection_name\n        self._token = token\n        self._kwargs = kwargs\n        self._path = kwargs.get(\"path\", None)\n        self._inited = False\n\n    def _lazy_init(self, dim: Optional[int] = None):\n        \"\"\"\n        Lazy init the client.\n        Because the LlamaIndex init method requires the dim parameter,\n        we need to try to get the dim from the first embedding.\n\n        Args:\n            dim: Dimension of the vectors.\n        \"\"\"\n        if not self._inited:\n            if os.path.isdir(self._path) and not self._uri.startswith(\"http\"):\n                uri = os.path.join(self._path, self._uri)\n            else:\n                uri = self._uri\n            super().__init__(\n                uri=uri,\n                token=self._token,\n                collection_name=self._collection_name,\n                dim=dim,\n                **self._kwargs,\n            )\n            from llama_index.vector_stores.milvus import (\n                MilvusVectorStore as LIMilvusVectorStore,\n            )\n\n            self._client = cast(LIMilvusVectorStore, self._client)\n        self._inited = True\n\n    def add(\n        self,\n        embeddings: list[list[float]] | list[DocumentWithEmbedding],\n        metadatas: Optional[list[dict]] = None,\n        ids: Optional[list[str]] = None,\n    ):\n        if not self._inited:\n            if isinstance(embeddings[0], list):\n                dim = len(embeddings[0])\n            else:\n                dim = len(embeddings[0].embedding)\n            self._lazy_init(dim)\n\n        return super().add(embeddings=embeddings, metadatas=metadatas, ids=ids)\n\n    def query(\n        self,\n        embedding: list[float],\n        top_k: int = 1,\n        ids: Optional[list[str]] = None,\n        **kwargs,\n    ) -> tuple[list[list[float]], list[float], list[str]]:\n        self._lazy_init(len(embedding))\n\n        return super().query(embedding=embedding, top_k=top_k, ids=ids, **kwargs)\n\n    def delete(self, ids: list[str], **kwargs):\n        self._lazy_init()\n        super().delete(ids=ids, **kwargs)\n\n    def drop(self):\n        self._client.client.drop_collection(self._collection_name)\n\n    def count(self) -> int:\n        try:\n            self._lazy_init()\n        except:  # noqa: E722\n            return 0\n        return self._client.client.query(\n            collection_name=self._collection_name, output_fields=[\"count(*)\"]\n        )[0][\"count(*)\"]\n\n    def __persist_flow__(self):\n        return {\n            \"uri\": self._uri,\n            \"collection_name\": self._collection_name,\n            \"token\": self._token,\n            **self._kwargs,\n        }\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/storages/vectorstores/qdrant.py",
    "content": "from typing import Any, List, Optional, cast\n\nfrom .base import LlamaIndexVectorStore\n\n\nclass QdrantVectorStore(LlamaIndexVectorStore):\n    _li_class = None\n\n    def _get_li_class(self):\n        try:\n            from llama_index.vector_stores.qdrant import (\n                QdrantVectorStore as LIQdrantVectorStore,\n            )\n        except ImportError:\n            raise ImportError(\n                \"Please install missing package: \"\n                \"'pip install llama-index-vector-stores-qdrant'\"\n            )\n\n        return LIQdrantVectorStore\n\n    def __init__(\n        self,\n        collection_name,\n        url: Optional[str] = None,\n        api_key: Optional[str] = None,\n        client_kwargs: Optional[dict] = None,\n        **kwargs: Any,\n    ):\n        self._collection_name = collection_name\n        self._url = url\n        self._api_key = api_key\n        self._client_kwargs = client_kwargs\n        self._kwargs = kwargs\n\n        super().__init__(\n            collection_name=collection_name,\n            url=url,\n            api_key=api_key,\n            client_kwargs=client_kwargs,\n            **kwargs,\n        )\n        from llama_index.vector_stores.qdrant import (\n            QdrantVectorStore as LIQdrantVectorStore,\n        )\n\n        self._client = cast(LIQdrantVectorStore, self._client)\n\n    def delete(self, ids: List[str], **kwargs):\n        \"\"\"Delete vector embeddings from vector stores\n\n        Args:\n            ids: List of ids of the embeddings to be deleted\n            kwargs: meant for vectorstore-specific parameters\n        \"\"\"\n        from qdrant_client import models\n\n        self._client.client.delete(\n            collection_name=self._collection_name,\n            points_selector=models.PointIdsList(\n                points=ids,\n            ),\n            **kwargs,\n        )\n\n    def drop(self):\n        \"\"\"Delete entire collection from vector stores\"\"\"\n        self._client.client.delete_collection(self._collection_name)\n\n    def count(self) -> int:\n        return self._client.client.count(\n            collection_name=self._collection_name, exact=True\n        ).count\n\n    def __persist_flow__(self):\n        return {\n            \"collection_name\": self._collection_name,\n            \"url\": self._url,\n            \"api_key\": self._api_key,\n            \"client_kwargs\": self._client_kwargs,\n            **self._kwargs,\n        }\n"
  },
  {
    "path": "libs/kotaemon/kotaemon/storages/vectorstores/simple_file.py",
    "content": "\"\"\"Simple file vector store index.\"\"\"\nfrom pathlib import Path\nfrom typing import Any, Optional, Type\n\nimport fsspec\nfrom llama_index.core.vector_stores import SimpleVectorStore as LISimpleVectorStore\nfrom llama_index.core.vector_stores.simple import SimpleVectorStoreData\n\nfrom kotaemon.base import DocumentWithEmbedding\n\nfrom .base import LlamaIndexVectorStore\n\n\nclass SimpleFileVectorStore(LlamaIndexVectorStore):\n    \"\"\"Similar to InMemoryVectorStore but is backed by file by default\"\"\"\n\n    _li_class: Type[LISimpleVectorStore] = LISimpleVectorStore\n    store_text: bool = False\n\n    def __init__(\n        self,\n        path: str | Path,\n        collection_name: str = \"default\",\n        data: Optional[SimpleVectorStoreData] = None,\n        fs: Optional[fsspec.AbstractFileSystem] = None,\n        **kwargs: Any,\n    ) -> None:\n        \"\"\"Initialize params.\"\"\"\n        self._data = data or SimpleVectorStoreData()\n        self._fs = fs or fsspec.filesystem(\"file\")\n        self._collection_name = collection_name\n        self._path = path\n        self._save_path = Path(path) / collection_name\n\n        super().__init__(\n            data=data,\n            fs=fs,\n            **kwargs,\n        )\n\n        if self._save_path.is_file():\n            self._client = self._li_class.from_persist_path(\n                persist_path=str(self._save_path), fs=self._fs\n            )\n\n    def add(\n        self,\n        embeddings: list[list[float]] | list[DocumentWithEmbedding],\n        metadatas: Optional[list[dict]] = None,\n        ids: Optional[list[str]] = None,\n    ):\n        r = super().add(embeddings, metadatas, ids)\n        self._client.persist(str(self._save_path), self._fs)\n        return r\n\n    def delete(self, ids: list[str], **kwargs):\n        r = super().delete(ids, **kwargs)\n        self._client.persist(str(self._save_path), self._fs)\n        return r\n\n    def drop(self):\n        self._data = SimpleVectorStoreData()\n        self._save_path.unlink(missing_ok=True)\n\n    def __persist_flow__(self):\n        d = self._data.to_dict()\n        d[\"__type__\"] = f\"{self._data.__module__}.{self._data.__class__.__qualname__}\"\n        return {\n            \"data\": d,\n            \"collection_name\": self._collection_name,\n            \"path\": str(self._path),\n            # \"fs\": self._fs,\n        }\n"
  },
  {
    "path": "libs/kotaemon/pyproject.toml",
    "content": "# build backand and build dependencies\n[build-system]\nrequires = [\"setuptools >= 61.0\", \"wheel\", \"setuptools-git-versioning>=2.0,<3\"]\nbuild-backend = \"setuptools.build_meta\"\n\n[tool.setuptools]\ninclude-package-data = false\npackages.find.include = [\"kotaemon*\"]\npackages.find.exclude = [\"tests*\", \"env*\"]\n\n[tool.setuptools-git-versioning]\nenabled = true\ndev_template = \"{tag}\"\ndirty_template = \"{tag}\"\ntag_filter = \"v?\\\\d+(\\\\.\\\\d+)*.*\"\n\n# metadata and dependencies\n[project]\nname = \"kotaemon\"\ndynamic = [\"version\"]\nrequires-python = \">= 3.10\"\ndescription = \"Kotaemon core library for AI development.\"\ndependencies = [\n    \"azure-ai-documentintelligence\",\n    \"beautifulsoup4>=4.12.3,<4.13\",\n    \"click>=8.1.7,<9\",\n    \"cohere>=5.3.2,<6\",\n    \"cookiecutter>=2.6.0,<2.7\",\n    \"fast_langdetect\",\n    \"fastapi<=0.112.1\",\n    \"gradio>=4.31.0,<5\",\n    \"html2text==2024.2.26\",\n    \"langchain<2\",\n    \"langchain-community<1\",\n    \"langchain-openai<2\",\n    \"langchain-google-genai<5\",\n    \"langchain-anthropic<2\",\n    \"langchain-ollama<2\",\n    \"langchain-mistralai<2\",\n    \"langchain-cohere<1\",\n    \"llama-hub>=0.0.79,<0.1.0\",\n    \"llama-index>=0.10.40,<0.11.0\",\n    \"chromadb<=0.5.16\",\n    \"llama-index-vector-stores-chroma>=0.1.9\",\n    \"llama-index-vector-stores-lancedb\",\n    \"openai>=1.23.6,<2\",\n    \"matplotlib\",\n    \"matplotlib-inline\",\n    \"openpyxl>=3.1.2,<3.2\",\n    \"opentelemetry-exporter-otlp-proto-grpc>=1.25.0\", # https://github.com/chroma-core/chroma/issues/2571\n    \"pandas>=2.2.2,<2.3\",\n    \"plotly<6.0.0\",\n    \"PyMuPDF>=1.23,<=1.24.11\",\n    \"pypdf>=4.2.0,<4.3\",\n    \"pylance\",\n    \"python-decouple\", # for theflow\n    \"python-docx>=1.1.0,<1.2\",\n    \"python-dotenv>=1.0.1,<1.1\",\n    \"tenacity>=8.2.3,<8.3\",\n    \"theflow>=0.8.6,<0.9.0\",\n    \"trogon>=0.5.0,<0.6\",\n    \"umap-learn==0.5.5\",\n    \"tavily-python>=0.4.0\",\n    \"pydantic<=2.10.6\",\n]\nreadme = \"README.md\"\nauthors = [\n    { name = \"@trducng\", email = \"john@cinnamon.is\" },\n    { name = \"@lone17\", email = \"ian@cinnamon.is\" },\n    { name = \"@taprosoft\", email = \"tadashi@cinnamon.is\" },\n    { name = \"@cin-albert\", email = \"albert@cinnamon.is\" },\n]\nclassifiers = [\n    \"Programming Language :: Python :: 3\",\n    \"Operating System :: OS Independent\",\n]\n\n[project.optional-dependencies]\nadv = [\n    \"duckduckgo-search>=6.1.0,<6.2\",\n    \"elasticsearch>=8.13.0,<8.14\",\n    \"fastembed\",\n    \"onnxruntime<v1.20\",\n    \"googlesearch-python>=1.2.4,<1.3\",\n    \"llama-cpp-python<0.2.8\",\n    \"llama-index>=0.10.40,<0.11.0\",\n    \"llama-index-vector-stores-milvus\",\n    \"llama-index-vector-stores-qdrant\",\n    \"mcp[cli]>=1.0.0\",\n    \"sentence-transformers\",\n    \"tabulate\",\n    \"unstructured>=0.15.8,<0.16\",\n    \"wikipedia>=1.4.0,<1.5\",\n    \"voyageai>=0.3.0\",\n]\ndev = [\n    \"black\",\n    \"coverage\",\n    \"flake8\",\n    \"ipython\",\n    \"pre-commit\",\n    \"pytest\",\n    \"pytest-mock\",\n    \"sphinx\",\n]\nall = [\"kotaemon[adv,dev]\"]\n\n[project.scripts]\nkotaemon = \"kotaemon.cli:main\"\n"
  },
  {
    "path": "libs/kotaemon/pytest.ini",
    "content": "[pytest]\nminversion = 7.4.0\ntestpaths = tests\naddopts = -ra -q\nlog_cli=true\nlog_level=WARNING\nlog_format = %(asctime)s %(levelname)s %(message)s\nlog_date_format = %Y-%m-%d %H:%M:%S\nlog_file = logs/pytest-logs.txt\n"
  },
  {
    "path": "libs/kotaemon/tests/__init__.py",
    "content": ""
  },
  {
    "path": "libs/kotaemon/tests/_test_multimodal_reader.py",
    "content": "# TODO: This test is broken and should be rewritten\nfrom pathlib import Path\n\nfrom kotaemon.loaders import AdobeReader\n\n# from dotenv import load_dotenv\n\n\ninput_file = Path(__file__).parent / \"resources\" / \"multimodal.pdf\"\n\n# load_dotenv()\n\n\ndef test_adobe_reader():\n    reader = AdobeReader()\n    documents = reader.load_data(input_file)\n    table_docs = [doc for doc in documents if doc.metadata.get(\"type\", \"\") == \"table\"]\n    assert len(table_docs) == 2\n\n    figure_docs = [doc for doc in documents if doc.metadata.get(\"type\", \"\") == \"image\"]\n    assert len(figure_docs) == 2\n"
  },
  {
    "path": "libs/kotaemon/tests/conftest.py",
    "content": "import pytest\n\n\n@pytest.fixture(scope=\"function\")\ndef mock_google_search(monkeypatch):\n    import googlesearch\n\n    def result(*args, **kwargs):\n        yield googlesearch.SearchResult(\n            url=\"https://www.cinnamon.is/en/\",\n            title=\"Cinnamon AI\",\n            description=\"Cinnamon AI is an enterprise AI company.\",\n        )\n\n    monkeypatch.setattr(googlesearch, \"search\", result)\n\n\ndef if_haystack_not_installed():\n    try:\n        import haystack  # noqa: F401\n    except ImportError:\n        return True\n    else:\n        return False\n\n\ndef if_sentence_bert_not_installed():\n    try:\n        import sentence_transformers  # noqa: F401\n    except ImportError:\n        return True\n    else:\n        return False\n\n\ndef if_sentence_fastembed_not_installed():\n    try:\n        import fastembed  # noqa: F401\n    except ImportError:\n        return True\n    else:\n        return False\n\n\ndef if_unstructured_pdf_not_installed():\n    try:\n        import unstructured  # noqa: F401\n        from unstructured.partition.pdf import partition_pdf  # noqa: F401\n    except ImportError:\n        return True\n    else:\n        return False\n\n\ndef if_cohere_not_installed():\n    try:\n        import cohere  # noqa: F401\n    except ImportError:\n        return True\n    else:\n        return False\n\n\ndef if_llama_cpp_not_installed():\n    try:\n        import llama_cpp  # noqa: F401\n    except ImportError:\n        return True\n    else:\n        return False\n\n\ndef if_voyageai_not_installed():\n    try:\n        import voyageai  # noqa: F401\n    except ImportError:\n        return True\n    else:\n        return False\n\n\nskip_when_haystack_not_installed = pytest.mark.skipif(\n    if_haystack_not_installed(), reason=\"Haystack is not installed\"\n)\n\nskip_when_sentence_bert_not_installed = pytest.mark.skipif(\n    if_sentence_bert_not_installed(), reason=\"SBert is not installed\"\n)\n\nskip_when_fastembed_not_installed = pytest.mark.skipif(\n    if_sentence_fastembed_not_installed(), reason=\"fastembed is not installed\"\n)\n\nskip_when_unstructured_pdf_not_installed = pytest.mark.skipif(\n    if_unstructured_pdf_not_installed(), reason=\"unstructured is not installed\"\n)\n\nskip_when_cohere_not_installed = pytest.mark.skipif(\n    if_cohere_not_installed(), reason=\"cohere is not installed\"\n)\n\nskip_openai_lc_wrapper_test = pytest.mark.skipif(\n    True, reason=\"OpenAI LC wrapper test is skipped\"\n)\n\nskip_llama_cpp_not_installed = pytest.mark.skipif(\n    if_llama_cpp_not_installed(), reason=\"llama_cpp is not installed\"\n)\n\nskip_when_voyageai_not_installed = pytest.mark.skipif(\n    if_voyageai_not_installed(), reason=\"voyageai is not installed\"\n)\n"
  },
  {
    "path": "libs/kotaemon/tests/resources/dummy.mhtml",
    "content": "MIME-Version: 1.0\nContent-Type: multipart/related; boundary=\"----=_NextPart_01CF5AE5.5C24CD00\"\n\nThis document is a Single File Web Page, also known as a Web Archive file.  If you are seeing this message, your browser or editor doesn't support Web Archive files.  Please download a browser that supports Web Archive, such as Windows® Internet Explorer®.\n\n------=_NextPart_01CF5AE5.5C24CD00\nContent-Location: file:///C:/D16BB227/testing.htm\nContent-Transfer-Encoding: quoted-printable\nContent-Type: text/html; charset=\"us-ascii\"\n\n<html xmlns:v=3D\"urn:schemas-microsoft-com:vml\"\nxmlns:o=3D\"urn:schemas-microsoft-com:office:office\"\nxmlns:w=3D\"urn:schemas-microsoft-com:office:word\"\nxmlns:m=3D\"http://schemas.microsoft.com/office/2004/12/omml\"\nxmlns=3D\"http://www.w3.org/TR/REC-html40\">\n\n<head>\n<meta http-equiv=3DContent-Type content=3D\"text/html; charset=3Dus-ascii\">\n<meta name=3DProgId content=3DWord.Document>\n<meta name=3DGenerator content=3D\"Microsoft Word 12\">\n<meta name=3DOriginator content=3D\"Microsoft Word 12\">\n<link rel=3DFile-List href=3D\"testing_files/filelist.xml\">\n<!--[if gte mso 9]><xml>\n <o:DocumentProperties>\n  <o:Author>dtobias</o:Author>\n  <o:Template>testing.mht</o:Template>\n  <o:LastAuthor>dtobias</o:LastAuthor>\n  <o:Revision>2</o:Revision>\n  <o:TotalTime>1</o:TotalTime>\n  <o:LastPrinted>2014-04-18T13:05:00Z</o:LastPrinted>\n  <o:Created>2014-04-18T13:05:00Z</o:Created>\n  <o:LastSaved>2014-04-18T13:05:00Z</o:LastSaved>\n  <o:Pages>1</o:Pages>\n  <o:Words>49</o:Words>\n  <o:Characters>280</o:Characters>\n  <o:Company>Microsoft</o:Company>\n  <o:Lines>2</o:Lines>\n  <o:Paragraphs>1</o:Paragraphs>\n  <o:CharactersWithSpaces>328</o:CharactersWithSpaces>\n  <o:Version>12.00</o:Version>\n </o:DocumentProperties>\n</xml><![endif]-->\n<link rel=3DthemeData href=3D\"testing_files/themedata.thmx\">\n<link rel=3DcolorSchemeMapping href=3D\"testing_files/colorschememapping.xml=\n\">\n<!--[if gte mso 9]><xml>\n <w:WordDocument>\n  <w:SpellingState>Clean</w:SpellingState>\n  <w:TrackMoves>false</w:TrackMoves>\n  <w:TrackFormatting/>\n  <w:PunctuationKerning/>\n  <w:ValidateAgainstSchemas/>\n  <w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid>\n  <w:IgnoreMixedContent>false</w:IgnoreMixedContent>\n  <w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText>\n  <w:DoNotPromoteQF/>\n  <w:LidThemeOther>EN-US</w:LidThemeOther>\n  <w:LidThemeAsian>X-NONE</w:LidThemeAsian>\n  <w:LidThemeComplexScript>X-NONE</w:LidThemeComplexScript>\n  <w:Compatibility>\n   <w:BreakWrappedTables/>\n   <w:SnapToGridInCell/>\n   <w:WrapTextWithPunct/>\n   <w:UseAsianBreakRules/>\n   <w:DontGrowAutofit/>\n   <w:SplitPgBreakAndParaMark/>\n   <w:DontVertAlignCellWithSp/>\n   <w:DontBreakConstrainedForcedTables/>\n   <w:DontVertAlignInTxbx/>\n   <w:Word11KerningPairs/>\n   <w:CachedColBalance/>\n  </w:Compatibility>\n  <w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel>\n  <m:mathPr>\n   <m:mathFont m:val=3D\"Cambria Math\"/>\n   <m:brkBin m:val=3D\"before\"/>\n   <m:brkBinSub m:val=3D\"&#45;-\"/>\n   <m:smallFrac m:val=3D\"off\"/>\n   <m:dispDef/>\n   <m:lMargin m:val=3D\"0\"/>\n   <m:rMargin m:val=3D\"0\"/>\n   <m:defJc m:val=3D\"centerGroup\"/>\n   <m:wrapIndent m:val=3D\"1440\"/>\n   <m:intLim m:val=3D\"subSup\"/>\n   <m:naryLim m:val=3D\"undOvr\"/>\n  </m:mathPr></w:WordDocument>\n</xml><![endif]--><!--[if gte mso 9]><xml>\n <w:LatentStyles DefLockedState=3D\"false\" DefUnhideWhenUsed=3D\"true\"\n  DefSemiHidden=3D\"true\" DefQFormat=3D\"false\" DefPriority=3D\"99\"\n  LatentStyleCount=3D\"267\">\n  <w:LsdException Locked=3D\"false\" Priority=3D\"0\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" QFormat=3D\"true\" Name=3D\"Normal\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"9\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" QFormat=3D\"true\" Name=3D\"heading 1\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"9\" QFormat=3D\"true\" Name=3D\"=\nheading 2\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"9\" QFormat=3D\"true\" Name=3D\"=\nheading 3\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"9\" QFormat=3D\"true\" Name=3D\"=\nheading 4\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"9\" QFormat=3D\"true\" Name=3D\"=\nheading 5\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"9\" QFormat=3D\"true\" Name=3D\"=\nheading 6\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"9\" QFormat=3D\"true\" Name=3D\"=\nheading 7\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"9\" QFormat=3D\"true\" Name=3D\"=\nheading 8\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"9\" QFormat=3D\"true\" Name=3D\"=\nheading 9\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"39\" Name=3D\"toc 1\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"39\" Name=3D\"toc 2\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"39\" Name=3D\"toc 3\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"39\" Name=3D\"toc 4\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"39\" Name=3D\"toc 5\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"39\" Name=3D\"toc 6\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"39\" Name=3D\"toc 7\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"39\" Name=3D\"toc 8\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"39\" Name=3D\"toc 9\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"35\" QFormat=3D\"true\" Name=3D=\n\"caption\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"10\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" QFormat=3D\"true\" Name=3D\"Title\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"1\" Name=3D\"Default Paragraph=\n Font\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"11\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" QFormat=3D\"true\" Name=3D\"Subtitle\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"22\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" QFormat=3D\"true\" Name=3D\"Strong\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"20\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" QFormat=3D\"true\" Name=3D\"Emphasis\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"59\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Table Grid\"/>\n  <w:LsdException Locked=3D\"false\" UnhideWhenUsed=3D\"false\" Name=3D\"Placeho=\nlder Text\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"1\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" QFormat=3D\"true\" Name=3D\"No Spacing\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"60\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Light Shading\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"61\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Light List\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"62\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Light Grid\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"63\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium Shading 1\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"64\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium Shading 2\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"65\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium List 1\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"66\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium List 2\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"67\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium Grid 1\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"68\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium Grid 2\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"69\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium Grid 3\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"70\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Dark List\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"71\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Colorful Shading\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"72\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Colorful List\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"73\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Colorful Grid\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"60\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Light Shading Accent 1\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"61\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Light List Accent 1\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"62\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Light Grid Accent 1\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"63\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium Shading 1 Accent 1\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"64\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium Shading 2 Accent 1\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"65\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium List 1 Accent 1\"/>\n  <w:LsdException Locked=3D\"false\" UnhideWhenUsed=3D\"false\" Name=3D\"Revisio=\nn\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"34\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" QFormat=3D\"true\" Name=3D\"List Paragraph\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"29\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" QFormat=3D\"true\" Name=3D\"Quote\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"30\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" QFormat=3D\"true\" Name=3D\"Intense Quote\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"66\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium List 2 Accent 1\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"67\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium Grid 1 Accent 1\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"68\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium Grid 2 Accent 1\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"69\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium Grid 3 Accent 1\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"70\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Dark List Accent 1\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"71\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Colorful Shading Accent 1\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"72\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Colorful List Accent 1\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"73\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Colorful Grid Accent 1\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"60\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Light Shading Accent 2\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"61\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Light List Accent 2\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"62\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Light Grid Accent 2\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"63\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium Shading 1 Accent 2\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"64\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium Shading 2 Accent 2\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"65\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium List 1 Accent 2\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"66\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium List 2 Accent 2\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"67\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium Grid 1 Accent 2\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"68\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium Grid 2 Accent 2\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"69\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium Grid 3 Accent 2\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"70\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Dark List Accent 2\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"71\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Colorful Shading Accent 2\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"72\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Colorful List Accent 2\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"73\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Colorful Grid Accent 2\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"60\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Light Shading Accent 3\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"61\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Light List Accent 3\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"62\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Light Grid Accent 3\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"63\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium Shading 1 Accent 3\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"64\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium Shading 2 Accent 3\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"65\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium List 1 Accent 3\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"66\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium List 2 Accent 3\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"67\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium Grid 1 Accent 3\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"68\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium Grid 2 Accent 3\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"69\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium Grid 3 Accent 3\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"70\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Dark List Accent 3\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"71\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Colorful Shading Accent 3\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"72\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Colorful List Accent 3\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"73\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Colorful Grid Accent 3\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"60\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Light Shading Accent 4\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"61\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Light List Accent 4\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"62\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Light Grid Accent 4\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"63\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium Shading 1 Accent 4\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"64\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium Shading 2 Accent 4\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"65\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium List 1 Accent 4\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"66\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium List 2 Accent 4\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"67\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium Grid 1 Accent 4\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"68\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium Grid 2 Accent 4\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"69\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium Grid 3 Accent 4\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"70\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Dark List Accent 4\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"71\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Colorful Shading Accent 4\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"72\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Colorful List Accent 4\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"73\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Colorful Grid Accent 4\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"60\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Light Shading Accent 5\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"61\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Light List Accent 5\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"62\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Light Grid Accent 5\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"63\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium Shading 1 Accent 5\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"64\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium Shading 2 Accent 5\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"65\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium List 1 Accent 5\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"66\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium List 2 Accent 5\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"67\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium Grid 1 Accent 5\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"68\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium Grid 2 Accent 5\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"69\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium Grid 3 Accent 5\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"70\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Dark List Accent 5\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"71\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Colorful Shading Accent 5\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"72\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Colorful List Accent 5\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"73\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Colorful Grid Accent 5\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"60\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Light Shading Accent 6\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"61\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Light List Accent 6\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"62\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Light Grid Accent 6\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"63\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium Shading 1 Accent 6\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"64\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium Shading 2 Accent 6\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"65\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium List 1 Accent 6\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"66\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium List 2 Accent 6\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"67\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium Grid 1 Accent 6\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"68\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium Grid 2 Accent 6\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"69\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Medium Grid 3 Accent 6\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"70\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Dark List Accent 6\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"71\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Colorful Shading Accent 6\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"72\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Colorful List Accent 6\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"73\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" Name=3D\"Colorful Grid Accent 6\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"19\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" QFormat=3D\"true\" Name=3D\"Subtle Emphasis\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"21\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" QFormat=3D\"true\" Name=3D\"Intense Emphasis\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"31\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" QFormat=3D\"true\" Name=3D\"Subtle Reference\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"32\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" QFormat=3D\"true\" Name=3D\"Intense Reference\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"33\" SemiHidden=3D\"false\"\n   UnhideWhenUsed=3D\"false\" QFormat=3D\"true\" Name=3D\"Book Title\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"37\" Name=3D\"Bibliography\"/>\n  <w:LsdException Locked=3D\"false\" Priority=3D\"39\" QFormat=3D\"true\" Name=3D=\n\"TOC Heading\"/>\n </w:LatentStyles>\n</xml><![endif]-->\n<style>\n<!--\n /* Font Definitions */\n @font-face\n\t{font-family:\"Cambria Math\";\n\tpanose-1:2 4 5 3 5 4 6 3 2 4;\n\tmso-font-charset:1;\n\tmso-generic-font-family:roman;\n\tmso-font-format:other;\n\tmso-font-pitch:variable;\n\tmso-font-signature:0 0 0 0 0 0;}\n@font-face\n\t{font-family:Cambria;\n\tpanose-1:2 4 5 3 5 4 6 3 2 4;\n\tmso-font-charset:0;\n\tmso-generic-font-family:roman;\n\tmso-font-pitch:variable;\n\tmso-font-signature:-536870145 1073743103 0 0 415 0;}\n@font-face\n\t{font-family:Calibri;\n\tpanose-1:2 15 5 2 2 2 4 3 2 4;\n\tmso-font-charset:0;\n\tmso-generic-font-family:swiss;\n\tmso-font-pitch:variable;\n\tmso-font-signature:-536870145 1073786111 1 0 415 0;}\n /* Style Definitions */\n p.MsoNormal, li.MsoNormal, div.MsoNormal\n\t{mso-style-unhide:no;\n\tmso-style-qformat:yes;\n\tmso-style-parent:\"\";\n\tmargin-top:0in;\n\tmargin-right:0in;\n\tmargin-bottom:10.0pt;\n\tmargin-left:0in;\n\tline-height:115%;\n\tmso-pagination:widow-orphan;\n\tfont-size:11.0pt;\n\tfont-family:\"Calibri\",\"sans-serif\";\n\tmso-fareast-font-family:Calibri;\n\tmso-bidi-font-family:\"Times New Roman\";}\np.MsoTitle, li.MsoTitle, div.MsoTitle\n\t{mso-style-priority:10;\n\tmso-style-unhide:no;\n\tmso-style-qformat:yes;\n\tmso-style-link:\"Title Char\";\n\tmso-style-next:Normal;\n\tmargin-top:0in;\n\tmargin-right:0in;\n\tmargin-bottom:15.0pt;\n\tmargin-left:0in;\n\tmso-add-space:auto;\n\tmso-pagination:widow-orphan;\n\tborder:none;\n\tmso-border-bottom-alt:solid #4F81BD 1.0pt;\n\tpadding:0in;\n\tmso-padding-alt:0in 0in 4.0pt 0in;\n\tfont-size:26.0pt;\n\tfont-family:\"Cambria\",\"serif\";\n\tmso-fareast-font-family:\"Times New Roman\";\n\tmso-bidi-font-family:\"Times New Roman\";\n\tcolor:#17365D;\n\tletter-spacing:.25pt;\n\tmso-font-kerning:14.0pt;}\np.MsoTitleCxSpFirst, li.MsoTitleCxSpFirst, div.MsoTitleCxSpFirst\n\t{mso-style-priority:10;\n\tmso-style-unhide:no;\n\tmso-style-qformat:yes;\n\tmso-style-link:\"Title Char\";\n\tmso-style-next:Normal;\n\tmso-style-type:export-only;\n\tmargin:0in;\n\tmargin-bottom:.0001pt;\n\tmso-add-space:auto;\n\tmso-pagination:widow-orphan;\n\tborder:none;\n\tmso-border-bottom-alt:solid #4F81BD 1.0pt;\n\tpadding:0in;\n\tmso-padding-alt:0in 0in 4.0pt 0in;\n\tfont-size:26.0pt;\n\tfont-family:\"Cambria\",\"serif\";\n\tmso-fareast-font-family:\"Times New Roman\";\n\tmso-bidi-font-family:\"Times New Roman\";\n\tcolor:#17365D;\n\tletter-spacing:.25pt;\n\tmso-font-kerning:14.0pt;}\np.MsoTitleCxSpMiddle, li.MsoTitleCxSpMiddle, div.MsoTitleCxSpMiddle\n\t{mso-style-priority:10;\n\tmso-style-unhide:no;\n\tmso-style-qformat:yes;\n\tmso-style-link:\"Title Char\";\n\tmso-style-next:Normal;\n\tmso-style-type:export-only;\n\tmargin:0in;\n\tmargin-bottom:.0001pt;\n\tmso-add-space:auto;\n\tmso-pagination:widow-orphan;\n\tborder:none;\n\tmso-border-bottom-alt:solid #4F81BD 1.0pt;\n\tpadding:0in;\n\tmso-padding-alt:0in 0in 4.0pt 0in;\n\tfont-size:26.0pt;\n\tfont-family:\"Cambria\",\"serif\";\n\tmso-fareast-font-family:\"Times New Roman\";\n\tmso-bidi-font-family:\"Times New Roman\";\n\tcolor:#17365D;\n\tletter-spacing:.25pt;\n\tmso-font-kerning:14.0pt;}\np.MsoTitleCxSpLast, li.MsoTitleCxSpLast, div.MsoTitleCxSpLast\n\t{mso-style-priority:10;\n\tmso-style-unhide:no;\n\tmso-style-qformat:yes;\n\tmso-style-link:\"Title Char\";\n\tmso-style-next:Normal;\n\tmso-style-type:export-only;\n\tmargin-top:0in;\n\tmargin-right:0in;\n\tmargin-bottom:15.0pt;\n\tmargin-left:0in;\n\tmso-add-space:auto;\n\tmso-pagination:widow-orphan;\n\tborder:none;\n\tmso-border-bottom-alt:solid #4F81BD 1.0pt;\n\tpadding:0in;\n\tmso-padding-alt:0in 0in 4.0pt 0in;\n\tfont-size:26.0pt;\n\tfont-family:\"Cambria\",\"serif\";\n\tmso-fareast-font-family:\"Times New Roman\";\n\tmso-bidi-font-family:\"Times New Roman\";\n\tcolor:#17365D;\n\tletter-spacing:.25pt;\n\tmso-font-kerning:14.0pt;}\nspan.TitleChar\n\t{mso-style-name:\"Title Char\";\n\tmso-style-priority:10;\n\tmso-style-unhide:no;\n\tmso-style-locked:yes;\n\tmso-style-link:Title;\n\tmso-ansi-font-size:26.0pt;\n\tmso-bidi-font-size:26.0pt;\n\tfont-family:\"Cambria\",\"serif\";\n\tmso-ascii-font-family:Cambria;\n\tmso-fareast-font-family:\"Times New Roman\";\n\tmso-hansi-font-family:Cambria;\n\tmso-bidi-font-family:\"Times New Roman\";\n\tcolor:#17365D;\n\tletter-spacing:.25pt;\n\tmso-font-kerning:14.0pt;}\nspan.SpellE\n\t{mso-style-name:\"\";\n\tmso-spl-e:yes;}\n.MsoChpDefault\n\t{mso-style-type:export-only;\n\tmso-default-props:yes;\n\tfont-size:10.0pt;\n\tmso-ansi-font-size:10.0pt;\n\tmso-bidi-font-size:10.0pt;\n\tmso-ascii-font-family:Calibri;\n\tmso-fareast-font-family:Calibri;\n\tmso-hansi-font-family:Calibri;}\n@page WordSection1\n\t{size:8.5in 11.0in;\n\tmargin:1.0in 1.0in 1.0in 1.0in;\n\tmso-header-margin:.5in;\n\tmso-footer-margin:.5in;\n\tmso-paper-source:0;}\ndiv.WordSection1\n\t{page:WordSection1;}\n-->\n</style>\n<!--[if gte mso 10]>\n<style>\n /* Style Definitions */\n table.MsoNormalTable\n\t{mso-style-name:\"Table Normal\";\n\tmso-tstyle-rowband-size:0;\n\tmso-tstyle-colband-size:0;\n\tmso-style-noshow:yes;\n\tmso-style-priority:99;\n\tmso-style-qformat:yes;\n\tmso-style-parent:\"\";\n\tmso-padding-alt:0in 5.4pt 0in 5.4pt;\n\tmso-para-margin:0in;\n\tmso-para-margin-bottom:.0001pt;\n\tmso-pagination:widow-orphan;\n\tfont-size:10.0pt;\n\tfont-family:\"Calibri\",\"sans-serif\";}\n</style>\n<![endif]--><!--[if gte mso 9]><xml>\n <o:shapedefaults v:ext=3D\"edit\" spidmax=3D\"2050\"/>\n</xml><![endif]--><!--[if gte mso 9]><xml>\n <o:shapelayout v:ext=3D\"edit\">\n  <o:idmap v:ext=3D\"edit\" data=3D\"1\"/>\n </o:shapelayout></xml><![endif]-->\n</head>\n\n<body lang=3DEN-US style=3D'tab-interval:.5in'>\n\n<div class=3DWordSection1>\n\n<div style=3D'mso-element:para-border-div;border:none;border-bottom:solid #=\n4F81BD 1.0pt;\npadding:0in 0in 4.0pt 0in'>\n\n<p class=3DMsoTitle>This is a test.</p>\n\n</div>\n\n<p class=3DMsoNormal>This is <b style=3D'mso-bidi-font-weight:normal'>bold,=\n <i\nstyle=3D'mso-bidi-font-style:normal'>italic, and <u>underlined.</u></i></b>=\n</p>\n\n<p class=3DMsoNormal><span class=3DSpellE>asdakl</span> <span class=3DSpell=\nE>fskljf</span>\n<span class=3DSpellE>sklf</span> <span class=3DSpellE>jkslaf</span>; <span\nclass=3DSpellE>djks</span> <span class=3DSpellE>dlkfa</span> <span class=3D=\nSpellE>sk</span>\n<span class=3DSpellE>sdjkl</span> <span class=3DSpellE>ksjkl</span> <span\nclass=3DSpellE>jsjk</span> <span class=3DSpellE>skdjjks</span> <span class=\n=3DSpellE>i</span>\nw <span class=3DSpellE>ie</span> <span class=3DSpellE>sjkfksd</span> <span\nclass=3DSpellE>fjisdf</span> <span class=3DSpellE>jks</span> <span class=3D=\nSpellE>fjs</span>\n<span class=3DSpellE>kdj</span> <span class=3DSpellE>fsk</span> <span class=\n=3DSpellE>dfjskd</span>\n<span class=3DSpellE>fjskd</span> <span class=3DSpellE>fjsd</span> <span\nclass=3DSpellE>kfjsk</span> f <span class=3DSpellE>jskdf</span> <span class=\n=3DSpellE>jskd</span>\n<span class=3DSpellE>fjsk</span> <span class=3DSpellE>dfjskdf</span> <span\nclass=3DSpellE>jsifj</span> <span class=3DSpellE>sifj</span> <span class=3D=\nSpellE>sk</span>\n<span class=3DSpellE>fjks</span> <span class=3DSpellE>fjksd</span> <span\nclass=3DSpellE>fjskdf</span> <span class=3DSpellE>kjs</span> <span class=3D=\nSpellE>jdfksk</span>\n<span class=3DSpellE>fdjs</span> <span class=3DSpellE>fksj</span> <span\nclass=3DSpellE>fks</span> <span class=3DSpellE>dfjs</span> <span class=3DSp=\nellE>dfks</span>\n<span class=3DSpellE>fdjsk</span> <span class=3DSpellE>fjskdfjskdf</span> <=\nspan\nclass=3DSpellE>sjkf</span> <span class=3DSpellE>skjf</span> <span class=3DS=\npellE>sjkdf</span>\n<span class=3DSpellE>skfjsfjk</span> s</p>\n\n<p class=3DMsoNormal>The end.</p>\n\n</div>\n\n</body>\n\n</html>\n\n------=_NextPart_01CF5AE5.5C24CD00\nContent-Location: file:///C:/D16BB227/testing_files/themedata.thmx\nContent-Transfer-Encoding: base64\nContent-Type: application/vnd.ms-officetheme\n\nUEsDBBQABgAIAAAAIQCCirwT+gAAABwCAAATAAAAW0NvbnRlbnRfVHlwZXNdLnhtbKyRy2rDMBBF\n94X+g9C22HK6KKXYzqJJd30s0g8Y5LEtao+ENAnJ33fsuFC6CC10IxBizpl7Va6P46AOGJPzVOlV\nXmiFZH3jqKv0++4pu9cqMVADgyes9AmTXtfXV+XuFDApmaZU6Z45PBiTbI8jpNwHJHlpfRyB5Ro7\nE8B+QIfmtijujPXESJzxxNB1+SoLRNegeoPILzCKx7Cg8Pv5DCSAmAtYq8czYVqi0hDC4CywRDAH\nan7oM9+2zmLj7X4UaT6DF9jNBDO/XGD1P+ov5wZb2A+stkfp4lx/xCH9LdtSay6Tc/7Uu5AuGC6X\nt7Rh5r+tPwEAAP//AwBQSwMEFAAGAAgAAAAhAKXWp+fAAAAANgEAAAsAAABfcmVscy8ucmVsc4SP\nz2rDMAyH74W9g9F9UdLDGCV2L6WQQy+jfQDhKH9oIhvbG+vbT8cGCrsIhKTv96k9/q6L+eGU5yAW\nmqoGw+JDP8to4XY9v3+CyYWkpyUIW3hwhqN727VfvFDRozzNMRulSLYwlRIPiNlPvFKuQmTRyRDS\nSkXbNGIkf6eRcV/XH5ieGeA2TNP1FlLXN2Cuj6jJ/7PDMMyeT8F/ryzlRQRuN5RMaeRioagv41O9\nkKhlqtQe0LW4+db9AQAA//8DAFBLAwQUAAYACAAAACEAa3mWFoMAAACKAAAAHAAAAHRoZW1lL3Ro\nZW1lL3RoZW1lTWFuYWdlci54bWwMzE0KwyAQQOF9oXeQ2TdjuyhFYrLLrrv2AEOcGkHHoNKf29fl\n44M3zt8U1ZtLDVksnAcNimXNLoi38Hwspxuo2kgcxSxs4ccV5ul4GMm0jRPfSchzUX0j1ZCFrbXd\nINa1K9Uh7yzdXrkkaj2LR1fo0/cp4kXrKyYKAjj9AQAA//8DAFBLAwQUAAYACAAAACEAlrWt4pYG\nAABQGwAAFgAAAHRoZW1lL3RoZW1lL3RoZW1lMS54bWzsWU9v2zYUvw/YdyB0b2MndhoHdYrYsZst\nTRvEboceaYmW2FCiQNJJfRva44ABw7phhxXYbYdhW4EW2KX7NNk6bB3Qr7BHUpLFWF6SNtiKrT4k\nEvnj+/8eH6mr1+7HDB0SISlP2l79cs1DJPF5QJOw7d0e9i+teUgqnASY8YS0vSmR3rWN99+7itdV\nRGKCYH0i13Hbi5RK15eWpA/DWF7mKUlgbsxFjBW8inApEPgI6MZsablWW12KMU08lOAYyN4aj6lP\n0FCT9DZy4j0Gr4mSesBnYqBJE2eFwQYHdY2QU9llAh1i1vaAT8CPhuS+8hDDUsFE26uZn7e0cXUJ\nr2eLmFqwtrSub37ZumxBcLBseIpwVDCt9xutK1sFfQNgah7X6/W6vXpBzwCw74OmVpYyzUZ/rd7J\naZZA9nGedrfWrDVcfIn+ypzMrU6n02xlsliiBmQfG3P4tdpqY3PZwRuQxTfn8I3OZre76uANyOJX\n5/D9K63Vhos3oIjR5GAOrR3a72fUC8iYs+1K+BrA12oZfIaCaCiiS7MY80QtirUY3+OiDwANZFjR\nBKlpSsbYhyju4ngkKNYM8DrBpRk75Mu5Ic0LSV/QVLW9D1MMGTGj9+r596+eP0XHD54dP/jp+OHD\n4wc/WkLOqm2chOVVL7/97M/HH6M/nn7z8tEX1XhZxv/6wye//Px5NRDSZybOiy+f/PbsyYuvPv39\nu0cV8E2BR2X4kMZEopvkCO3zGBQzVnElJyNxvhXDCNPyis0klDjBmksF/Z6KHPTNKWaZdxw5OsS1\n4B0B5aMKeH1yzxF4EImJohWcd6LYAe5yzjpcVFphR/MqmXk4ScJq5mJSxu1jfFjFu4sTx7+9SQp1\nMw9LR/FuRBwx9xhOFA5JQhTSc/yAkArt7lLq2HWX+oJLPlboLkUdTCtNMqQjJ5pmi7ZpDH6ZVukM\n/nZss3sHdTir0nqLHLpIyArMKoQfEuaY8TqeKBxXkRzimJUNfgOrqErIwVT4ZVxPKvB0SBhHvYBI\nWbXmlgB9S07fwVCxKt2+y6axixSKHlTRvIE5LyO3+EE3wnFahR3QJCpjP5AHEKIY7XFVBd/lbobo\nd/ADTha6+w4ljrtPrwa3aeiINAsQPTMR2pdQqp0KHNPk78oxo1CPbQxcXDmGAvji68cVkfW2FuJN\n2JOqMmH7RPldhDtZdLtcBPTtr7lbeJLsEQjz+Y3nXcl9V3K9/3zJXZTPZy20s9oKZVf3DbYpNi1y\nvLBDHlPGBmrKyA1pmmQJ+0TQh0G9zpwOSXFiSiN4zOq6gwsFNmuQ4OojqqJBhFNosOueJhLKjHQo\nUcolHOzMcCVtjYcmXdljYVMfGGw9kFjt8sAOr+jh/FxQkDG7TWgOnzmjFU3grMxWrmREQe3XYVbX\nQp2ZW92IZkqdw61QGXw4rxoMFtaEBgRB2wJWXoXzuWYNBxPMSKDtbvfe3C3GCxfpIhnhgGQ+0nrP\n+6hunJTHirkJgNip8JE+5J1itRK3lib7BtzO4qQyu8YCdrn33sRLeQTPvKTz9kQ6sqScnCxBR22v\n1VxuesjHadsbw5kWHuMUvC51z4dZCBdDvhI27E9NZpPlM2+2csXcJKjDNYW1+5zCTh1IhVRbWEY2\nNMxUFgIs0Zys/MtNMOtFKWAj/TWkWFmDYPjXpAA7uq4l4zHxVdnZpRFtO/ualVI+UUQMouAIjdhE\n7GNwvw5V0CegEq4mTEXQL3CPpq1tptzinCVd+fbK4Ow4ZmmEs3KrUzTPZAs3eVzIYN5K4oFulbIb\n5c6vikn5C1KlHMb/M1X0fgI3BSuB9oAP17gCI52vbY8LFXGoQmlE/b6AxsHUDogWuIuFaQgquEw2\n/wU51P9tzlkaJq3hwKf2aYgEhf1IRYKQPShLJvpOIVbP9i5LkmWETESVxJWpFXtEDgkb6hq4qvd2\nD0UQ6qaaZGXA4E7Gn/ueZdAo1E1OOd+cGlLsvTYH/unOxyYzKOXWYdPQ5PYvRKzYVe16szzfe8uK\n6IlZm9XIswKYlbaCVpb2rynCObdaW7HmNF5u5sKBF+c1hsGiIUrhvgfpP7D/UeEz+2VCb6hDvg+1\nFcGHBk0Mwgai+pJtPJAukHZwBI2THbTBpElZ02atk7ZavllfcKdb8D1hbC3ZWfx9TmMXzZnLzsnF\nizR2ZmHH1nZsoanBsydTFIbG+UHGOMZ80ip/deKje+DoLbjfnzAlTTDBNyWBofUcmDyA5LcczdKN\nvwAAAP//AwBQSwMEFAAGAAgAAAAhAA3RkJ+2AAAAGwEAACcAAAB0aGVtZS90aGVtZS9fcmVscy90\naGVtZU1hbmFnZXIueG1sLnJlbHOEj00KwjAUhPeCdwhvb9O6EJEm3YjQrdQDhOQ1DTY/JFHs7Q2u\nLAguh2G+mWm7l53JE2My3jFoqhoIOumVcZrBbbjsjkBSFk6J2TtksGCCjm837RVnkUsoTSYkUigu\nMZhyDidKk5zQilT5gK44o49W5CKjpkHIu9BI93V9oPGbAXzFJL1iEHvVABmWUJr/s/04GolnLx8W\nXf5RQXPZhQUoosbM4CObqkwEylu6usTfAAAA//8DAFBLAQItABQABgAIAAAAIQCCirwT+gAAABwC\nAAATAAAAAAAAAAAAAAAAAAAAAABbQ29udGVudF9UeXBlc10ueG1sUEsBAi0AFAAGAAgAAAAhAKXW\np+fAAAAANgEAAAsAAAAAAAAAAAAAAAAAKwEAAF9yZWxzLy5yZWxzUEsBAi0AFAAGAAgAAAAhAGt5\nlhaDAAAAigAAABwAAAAAAAAAAAAAAAAAFAIAAHRoZW1lL3RoZW1lL3RoZW1lTWFuYWdlci54bWxQ\nSwECLQAUAAYACAAAACEAlrWt4pYGAABQGwAAFgAAAAAAAAAAAAAAAADRAgAAdGhlbWUvdGhlbWUv\ndGhlbWUxLnhtbFBLAQItABQABgAIAAAAIQAN0ZCftgAAABsBAAAnAAAAAAAAAAAAAAAAAJsJAAB0\naGVtZS90aGVtZS9fcmVscy90aGVtZU1hbmFnZXIueG1sLnJlbHNQSwUGAAAAAAUABQBdAQAAlgoA\nAAAA\n\n------=_NextPart_01CF5AE5.5C24CD00\nContent-Location: file:///C:/D16BB227/testing_files/colorschememapping.xml\nContent-Transfer-Encoding: quoted-printable\nContent-Type: text/xml\n\n<?xml version=3D\"1.0\" encoding=3D\"UTF-8\" standalone=3D\"yes\"?>\n<a:clrMap xmlns:a=3D\"http://schemas.openxmlformats.org/drawingml/2006/main\"=\n bg1=3D\"lt1\" tx1=3D\"dk1\" bg2=3D\"lt2\" tx2=3D\"dk2\" accent1=3D\"accent1\" accent=\n2=3D\"accent2\" accent3=3D\"accent3\" accent4=3D\"accent4\" accent5=3D\"accent5\" a=\nccent6=3D\"accent6\" hlink=3D\"hlink\" folHlink=3D\"folHlink\"/>\n------=_NextPart_01CF5AE5.5C24CD00\nContent-Location: file:///C:/D16BB227/testing_files/filelist.xml\nContent-Transfer-Encoding: quoted-printable\nContent-Type: text/xml; charset=\"utf-8\"\n\n<xml xmlns:o=3D\"urn:schemas-microsoft-com:office:office\">\n <o:MainFile HRef=3D\"../testing.htm\"/>\n <o:File HRef=3D\"themedata.thmx\"/>\n <o:File HRef=3D\"colorschememapping.xml\"/>\n <o:File HRef=3D\"filelist.xml\"/>\n</xml>\n------=_NextPart_01CF5AE5.5C24CD00--\n"
  },
  {
    "path": "libs/kotaemon/tests/resources/embedding_openai.json",
    "content": "{\n  \"object\": \"list\",\n  \"data\": [\n    {\n      \"object\": \"embedding\",\n      \"index\": 0,\n      \"embedding\": [\n        0.006555966101586819,\n        0.003670461941510439,\n        -0.011642491444945335,\n        -0.026776473969221115,\n        -0.012383491732180119,\n        -0.0014341175556182861,\n        -0.013375678099691868,\n        0.009356695227324963,\n        -0.006364436354488134,\n        -0.0294390507042408,\n        0.023950627073645592,\n        0.0029859787318855524,\n        -0.023234745487570763,\n        -0.009205983020365238,\n        0.006744355894625187,\n        0.0011790062999352813,\n        0.02607315219938755,\n        -0.018437083810567856,\n        0.008904559537768364,\n        0.009620440192520618,\n        -0.01306169480085373,\n        -0.0011358336778357625,\n        0.007253008428961039,\n        0.00875384733080864,\n        -0.012710033915936947,\n        0.0037206991109997034,\n        0.005419347435235977,\n        -0.017243949696421623,\n        0.036246202886104584,\n        -0.0266759991645813,\n        0.012647237628698349,\n        -0.008552898652851582,\n        -0.00762350857257843,\n        -0.012546762824058533,\n        0.007083457428961992,\n        -0.014078999869525433,\n        0.0048761568032205105,\n        -0.013689660467207432,\n        0.018211016431450844,\n        -0.014367864467203617,\n        0.008307991549372673,\n        0.006022194866091013,\n        0.005457025486975908,\n        -0.006320478860288858,\n        -0.03609549254179001,\n        0.012986338697373867,\n        0.00017592862423043698,\n        -0.016427593305706978,\n        -0.004734864458441734,\n        0.0257717277854681,\n        0.01944182999432087,\n        -0.00211938563734293,\n        -0.0147195253521204,\n        0.01332544069737196,\n        -0.017670966684818268,\n        0.0013956546317785978,\n        -0.04024006798863411,\n        0.024754423648118973,\n        0.032051388174295425,\n        -0.018713390454649925,\n        0.011265711858868599,\n        0.012370931915938854,\n        -0.013576626777648926,\n        0.0040974789299070835,\n        -0.002153923735022545,\n        0.009915584698319435,\n        0.006637601647526026,\n        0.011353626847267151,\n        -0.019919084385037422,\n        0.030142372474074364,\n        0.01966789737343788,\n        -0.008213796652853489,\n        -0.004213652573525906,\n        -0.009162025526165962,\n        0.012446288019418716,\n        0.008979915641248226,\n        -0.02098662778735161,\n        0.003113141981884837,\n        0.0018838982796296477,\n        -0.0008289152756333351,\n        0.02722861059010029,\n        -0.03355850651860237,\n        -0.004408321809023619,\n        0.017859356477856636,\n        0.019893966615200043,\n        -0.0014137086691334844,\n        -0.0021602034103125334,\n        0.017959831282496452,\n        -0.0114603815600276,\n        -0.020069796591997147,\n        0.007604669313877821,\n        0.001281835837289691,\n        0.01132222916930914,\n        0.006668999791145325,\n        -0.01743233948945999,\n        0.0048761568032205105,\n        0.0016923686489462852,\n        0.02356128767132759,\n        -0.005796127021312714,\n        -0.041470881551504135,\n        -0.01292354241013527,\n        -0.005824385676532984,\n        -0.010700542479753494,\n        -0.017005322501063347,\n        -0.007950050756335258,\n        0.011560855433344841,\n        -0.005485283676534891,\n        -0.0002590360236354172,\n        0.028434304520487785,\n        0.007447678130120039,\n        -0.026198744773864746,\n        0.009306457825005054,\n        -0.00500488979741931,\n        -0.03107176162302494,\n        -0.0007555217016488314,\n        -0.008056805469095707,\n        0.007479076273739338,\n        0.003978165332227945,\n        -0.005984516814351082,\n        -0.02778122015297413,\n        0.015272135846316814,\n        0.029338575899600983,\n        0.010964288376271725,\n        -0.023548727855086327,\n        0.007014381233602762,\n        0.0014137086691334844,\n        -0.03380969539284706,\n        -0.016741575673222542,\n        -0.005230957642197609,\n        -0.004794521257281303,\n        0.0315992534160614,\n        0.0041100382804870605,\n        0.010644025169312954,\n        -0.0014294078573584557,\n        -0.035367049276828766,\n        0.0246916264295578,\n        -0.007096016779541969,\n        0.02096150815486908,\n        -0.017030440270900726,\n        -0.011906237341463566,\n        0.006273381412029266,\n        0.021237812936306,\n        0.01757049188017845,\n        -0.013224965892732143,\n        -0.005092805251479149,\n        0.01288586389273405,\n        0.004486817866563797,\n        -0.016339678317308426,\n        0.008917118422687054,\n        -0.015083746053278446,\n        0.008377067744731903,\n        0.0185626782476902,\n        0.012961219996213913,\n        -0.002535413019359112,\n        0.0050143091939389706,\n        0.016025694087147713,\n        0.004618690814822912,\n        0.0205470509827137,\n        0.008000288158655167,\n        -0.013074253685772419,\n        -0.0007264782907441258,\n        -0.0004474258457776159,\n        0.0033125211484730244,\n        -0.03084569424390793,\n        0.009400652721524239,\n        0.015787066891789436,\n        0.02607315219938755,\n        0.003689300734549761,\n        -0.013940847478806973,\n        -0.02604803442955017,\n        -0.011454101651906967,\n        0.007340923883020878,\n        -0.043178949505090714,\n        0.02660064399242401,\n        -0.01593777909874916,\n        0.00514304218813777,\n        -0.001305384561419487,\n        0.022820288315415382,\n        -0.012697474099695683,\n        -0.017269067466259003,\n        -0.03024284727871418,\n        0.011617372743785381,\n        0.008992474526166916,\n        0.025897322222590446,\n        0.00629850011318922,\n        -0.00014511904737446457,\n        0.009890465997159481,\n        -0.006364436354488134,\n        0.00384629238396883,\n        -0.017030440270900726,\n        0.016666220501065254,\n        0.03333244100213051,\n        0.021011745557188988,\n        0.01844964362680912,\n        -0.6892555952072144,\n        -0.007196491584181786,\n        0.025319593027234077,\n        0.02024562656879425,\n        0.006245122756808996,\n        0.009909304790198803,\n        0.012176262214779854,\n        0.027806337922811508,\n        0.006518288049846888,\n        0.0274546779692173,\n        -0.02107454277575016,\n        0.013011457398533821,\n        -0.015083746053278446,\n        -0.010719381272792816,\n        -0.0002598209830466658,\n        -0.00823263544589281,\n        0.0055763390846550465,\n        -0.012339534237980843,\n        -0.011881118640303612,\n        0.015196779742836952,\n        -0.010141652077436447,\n        0.03129782900214195,\n        -0.022104406729340553,\n        -0.014568813145160675,\n        0.009928143583238125,\n        0.011730406433343887,\n        0.0025102945510298014,\n        -0.009739753790199757,\n        -0.006964143831282854,\n        0.030192609876394272,\n        -0.0021272350568324327,\n        0.0114603815600276,\n        9.394961671205238e-05,\n        0.014154355973005295,\n        0.06611227244138718,\n        0.007610949221998453,\n        -0.005463304929435253,\n        0.008088203147053719,\n        0.009827669709920883,\n        0.028986915946006775,\n        -0.019077610224485397,\n        -0.022267676889896393,\n        0.012559321708977222,\n        -0.01662854291498661,\n        -0.002763050841167569,\n        0.009205983020365238,\n        0.012659796513617039,\n        -0.0024051100481301546,\n        0.0022543983068317175,\n        0.0017849936848506331,\n        0.014568813145160675,\n        -0.004851038102060556,\n        0.010857533663511276,\n        0.019504627212882042,\n        -0.005343991331756115,\n        0.0071588135324418545,\n        0.02151411771774292,\n        0.011962753720581532,\n        -0.002935741562396288,\n        0.004521355964243412,\n        0.0005718416068702936,\n        0.017168592661619186,\n        -0.0016280021518468857,\n        0.0038494321051985025,\n        -0.022443508729338646,\n        0.02381247468292713,\n        -0.021313169971108437,\n        0.01400364376604557,\n        -0.008929678238928318,\n        -0.012050669640302658,\n        -0.0018368008313700557,\n        -0.007994008250534534,\n        -0.018047746270895004,\n        -0.014744644053280354,\n        0.013965966179966927,\n        0.028107762336730957,\n        0.01681693270802498,\n        -0.0033219405449926853,\n        -0.006160347256809473,\n        0.01966789737343788,\n        0.010926609858870506,\n        0.013237525708973408,\n        -0.014066440984606743,\n        -0.01764584705233574,\n        0.02349849045276642,\n        -0.019780931994318962,\n        -0.030067017301917076,\n        -0.016276881098747253,\n        0.010430516675114632,\n        0.011479220353066921,\n        0.030192609876394272,\n        0.0033250804990530014,\n        -0.012370931915938854,\n        -0.01980605162680149,\n        0.02883620373904705,\n        0.0005384809337556362,\n        -0.011548296548426151,\n        0.003821173682808876,\n        0.027303965762257576,\n        0.0004238771216478199,\n        0.014933033846318722,\n        -0.0028038686141371727,\n        0.0003752097545657307,\n        0.00652456795796752,\n        0.010279805399477482,\n        0.006656440440565348,\n        -0.00809448305517435,\n        0.033156611025333405,\n        0.02444044128060341,\n        -0.030142372474074364,\n        0.006317338906228542,\n        0.004094338975846767,\n        -0.04071732237935066,\n        0.02121269516646862,\n        0.008471262641251087,\n        -0.032001152634620667,\n        0.0013022447237744927,\n        0.016013136133551598,\n        0.026374576613307,\n        -0.014694406650960445,\n        0.023159390315413475,\n        0.0020424597896635532,\n        0.013664542697370052,\n        0.0057458896189928055,\n        0.0041539957746863365,\n        0.005303173791617155,\n        0.0003883577883243561,\n        0.004342385567724705,\n        -0.01690484769642353,\n        0.001676669460721314,\n        0.020697763189673424,\n        0.005133622791618109,\n        0.00046273251064121723,\n        -0.005651694722473621,\n        0.009808829985558987,\n        0.00202676048502326,\n        0.009896745905280113,\n        -0.017884474247694016,\n        0.00862825382500887,\n        0.020308423787355423,\n        -0.007994008250534534,\n        0.005127343349158764,\n        -0.0029388812836259604,\n        -0.0006303209811449051,\n        0.011818322353065014,\n        -0.018399406224489212,\n        -0.012025550939142704,\n        0.008678491227328777,\n        0.00016974708705674857,\n        -0.013677101582288742,\n        0.01983116939663887,\n        -0.007918652147054672,\n        -0.03245328739285469,\n        0.021162457764148712,\n        -0.008044245652854443,\n        -0.0002005566784646362,\n        0.0036076651886105537,\n        -0.029187863692641258,\n        -0.03569359332323074,\n        -0.028961796313524246,\n        0.0027096737176179886,\n        0.016364796087145805,\n        -0.0071022966876626015,\n        -0.005174440797418356,\n        -0.01400364376604557,\n        -0.023787355050444603,\n        -0.024566033855080605,\n        0.011648771353065968,\n        0.005579478573054075,\n        -0.029012033715844154,\n        0.010857533663511276,\n        -0.0049420930445194244,\n        -0.025093525648117065,\n        -0.0024961652234196663,\n        -0.0030016780365258455,\n        0.010694262571632862,\n        -0.010681703686714172,\n        -0.015724271535873413,\n        -0.003249724628403783,\n        -0.01731930486857891,\n        0.006876228842884302,\n        -0.009601601399481297,\n        -0.009821389801800251,\n        0.012176262214779854,\n        0.02582196518778801,\n        -0.010185610502958298,\n        -0.0005722341011278331,\n        0.02747979573905468,\n        -0.019793491810560226,\n        -0.0016405613860115409,\n        0.017005322501063347,\n        0.0072027710266411304,\n        -0.02220488153398037,\n        0.0006103045307099819,\n        -0.007912373170256615,\n        -0.004559034015983343,\n        -0.0010808866936713457,\n        0.0038023346569389105,\n        0.01679181307554245,\n        0.01718115247786045,\n        0.018713390454649925,\n        0.004100618418306112,\n        0.017859356477856636,\n        -0.026022914797067642,\n        -0.016013136133551598,\n        -0.028811084106564522,\n        -0.0023705719504505396,\n        -0.030343322083353996,\n        0.003858851734548807,\n        0.013890610076487064,\n        0.011969033628702164,\n        -0.002329754177480936,\n        -0.0014223431935533881,\n        0.01764584705233574,\n        0.016276881098747253,\n        0.02692718617618084,\n        0.0021821821574121714,\n        -0.020923830568790436,\n        -0.02855989895761013,\n        -0.007510474417358637,\n        -0.002681415295228362,\n        0.008326830342411995,\n        -0.01458137296140194,\n        -0.0007727907504886389,\n        0.01720627024769783,\n        0.032604001462459564,\n        0.004417741671204567,\n        0.03554287925362587,\n        0.003953046631067991,\n        -0.0322021022439003,\n        -0.02505584806203842,\n        0.009231101721525192,\n        0.012659796513617039,\n        0.009563923813402653,\n        -0.016842050477862358,\n        -0.005513542331755161,\n        0.0041414364241063595,\n        -0.014405542053282261,\n        0.02916274592280388,\n        0.012163703329861164,\n        -0.008502661250531673,\n        0.010185610502958298,\n        0.028710609301924706,\n        -0.0177839994430542,\n        0.00950112659484148,\n        0.028384067118167877,\n        0.014744644053280354,\n        -0.009444610215723515,\n        0.005055127199739218,\n        0.02803240716457367,\n        -0.0009089809027500451,\n        0.0004933458403684199,\n        -0.014405542053282261,\n        -0.008433585055172443,\n        0.0012111896649003029,\n        -0.031498778611421585,\n        0.0004556679050438106,\n        -0.021388525143265724,\n        0.019605102017521858,\n        0.03569359332323074,\n        0.013689660467207432,\n        0.01161109283566475,\n        -0.0064680506475269794,\n        -0.005783567670732737,\n        0.009205983020365238,\n        -0.013413355685770512,\n        -0.001551076304167509,\n        -0.013488711789250374,\n        0.003160239430144429,\n        -0.005114783998578787,\n        -0.012408610433340073,\n        -0.003158669453114271,\n        0.0335836261510849,\n        -0.026274101808667183,\n        0.009319016709923744,\n        0.0005424057017080486,\n        -0.0011326938401907682,\n        -0.008841762319207191,\n        -0.005205838941037655,\n        0.0030566249042749405,\n        -0.013036576099693775,\n        -0.022443508729338646,\n        -0.0017865635454654694,\n        0.011830881237983704,\n        0.007096016779541969,\n        -0.01605081371963024,\n        -0.024038542062044144,\n        -0.005039427895098925,\n        -0.0011122849537059665,\n        -0.0032779830507934093,\n        -0.008571737445890903,\n        0.0041194576770067215,\n        0.014292508363723755,\n        -0.02342313528060913,\n        0.015485644340515137,\n        -0.002646877197548747,\n        0.020722880959510803,\n        0.00680087273940444,\n        -0.009865347295999527,\n        -0.00600649556145072,\n        0.021036865189671516,\n        0.020371221005916595,\n        -0.0016107329865917563,\n        -0.019291117787361145,\n        0.013287762179970741,\n        -0.004006423521786928,\n        -0.00762350857257843,\n        -0.015334932133555412,\n        0.0008681631297804415,\n        -0.008295431733131409,\n        -0.0007543442770838737,\n        0.011523177847266197,\n        -0.0027818898670375347,\n        0.0037332584615796804,\n        0.01304913591593504,\n        -0.005237237084656954,\n        -0.00340043636970222,\n        0.016691338270902634,\n        0.02612338960170746,\n        -0.002295216079801321,\n        -0.008333110250532627,\n        -0.009683237411081791,\n        -0.022305356338620186,\n        -0.00935041531920433,\n        0.06972935795783997,\n        -0.001001605880446732,\n        -0.0031319810077548027,\n        0.019617659971117973,\n        -0.019014813005924225,\n        -0.012848186306655407,\n        -0.04051637277007103,\n        -0.018223576247692108,\n        -0.01665366068482399,\n        -0.0031979172490537167,\n        -0.004436580464243889,\n        0.004458559211343527,\n        -0.013802695088088512,\n        0.01994420401751995,\n        0.013551508076488972,\n        0.009080389514565468,\n        0.00340043636970222,\n        -0.027052778750658035,\n        -0.011341067962348461,\n        0.0048667374067008495,\n        -0.0151088647544384,\n        0.010110254399478436,\n        0.00046077012666501105,\n        0.009990940801799297,\n        0.0205470509827137,\n        0.008307991549372673,\n        0.00277247023768723,\n        0.01720627024769783,\n        -0.007460237015038729,\n        -0.0038965295534580946,\n        0.008483821526169777,\n        -0.00610383041203022,\n        0.01745745725929737,\n        0.008778966031968594,\n        0.013312880881130695,\n        0.014078999869525433,\n        0.0029043431859463453,\n        0.03780356049537659,\n        0.008464982733130455,\n        -0.005384809337556362,\n        0.011868558824062347,\n        0.005287474486976862,\n        -0.006179186515510082,\n        -0.006009635515511036,\n        0.0142673896625638,\n        0.0023344638757407665,\n        -0.0057710083201527596,\n        0.028660371899604797,\n        -0.0014239131705835462,\n        -0.028685491532087326,\n        0.023121710866689682,\n        0.007742822170257568,\n        -0.009218541905283928,\n        -0.023071475327014923,\n        -0.01187483873218298,\n        0.018072864040732384,\n        0.00027355772908777,\n        -0.013928287662565708,\n        -0.010844974778592587,\n        -0.017796559259295464,\n        -0.014744644053280354,\n        -0.01513398252427578,\n        -0.014694406650960445,\n        -0.001071467180736363,\n        -0.031172236427664757,\n        -0.019190644845366478,\n        -0.02369944006204605,\n        0.002376851625740528,\n        -0.007466516923159361,\n        0.001561280689202249,\n        -0.0048981355503201485,\n        -0.03863247483968735,\n        -0.0125404829159379,\n        -0.0036516229156404734,\n        0.01909017004072666,\n        0.0004517431079875678,\n        -0.001898027490824461,\n        -0.006486889906227589,\n        0.004044101573526859,\n        0.013802695088088512,\n        -0.006047313567250967,\n        -0.029012033715844154,\n        0.011271991766989231,\n        -0.006292220205068588,\n        0.015611236914992332,\n        0.0031727987807244062,\n        0.00666272034868598,\n        0.009111788123846054,\n        -0.01690484769642353,\n        0.029589762911200523,\n        0.008521500043570995,\n        0.015071186237037182,\n        0.026299219578504562,\n        0.0003149642434436828,\n        0.018650593236088753,\n        0.004537055268883705,\n        0.0050582666881382465,\n        0.001783423707820475,\n        -0.019906524568796158,\n        -1.6802998288767412e-05,\n        -0.008307991549372673,\n        -0.01179320365190506,\n        -0.0020785678643733263,\n        0.004810220096260309,\n        -0.0034695127978920937,\n        0.01676669530570507,\n        0.01690484769642353,\n        0.01568659394979477,\n        -0.015372609719634056,\n        -0.0016327118501067162,\n        0.034211594611406326,\n        -0.024088779464364052,\n        0.016251763328909874,\n        -0.0064806099981069565,\n        0.0001965337578440085,\n        0.013438474386930466,\n        0.021564355120062828,\n        0.03504050895571709,\n        -0.004694046452641487,\n        -0.03443766012787819,\n        -0.005557499825954437,\n        -0.025281915441155434,\n        0.008490101434290409,\n        0.0157996267080307,\n        0.009130626916885376,\n        0.0012418029364198446,\n        -0.0018619195325300097,\n        -0.022192321717739105,\n        -0.006907626986503601,\n        -0.0032748430967330933,\n        -0.005548080429434776,\n        0.019542304798960686,\n        -0.006304779555648565,\n        -0.0023250444792211056,\n        -0.015309813432395458,\n        -0.006499449256807566,\n        -0.018324051052331924,\n        0.005968817975372076,\n        -0.006851110141724348,\n        -0.017859356477856636,\n        -0.02425205148756504,\n        -0.0063236188143491745,\n        -0.006568525452166796,\n        -0.013890610076487064,\n        -0.04551498219370842,\n        -0.044786542654037476,\n        -0.016163846477866173,\n        0.00394362723454833,\n        -0.017444897443056107,\n        0.016364796087145805,\n        -0.003949906677007675,\n        -0.000322813808452338,\n        -0.019981881603598595,\n        -0.010625186376273632,\n        -0.006342457607388496,\n        -0.01692996546626091,\n        -0.0006311059114523232,\n        -0.006116389762610197,\n        0.027002541348338127,\n        0.027931932359933853,\n        0.025369830429553986,\n        0.013978525064885616,\n        0.0114603815600276,\n        0.006474330555647612,\n        -0.008559177629649639,\n        -0.01662854291498661,\n        0.004559034015983343,\n        -0.012094627134501934,\n        -0.014204593375325203,\n        0.012710033915936947,\n        0.020999185740947723,\n        0.028459424152970314,\n        -0.007026940584182739,\n        0.009595322422683239,\n        0.0048447586596012115,\n        -0.008188677951693535,\n        -0.020873593166470528,\n        0.0010173050686717033,\n        -0.013928287662565708,\n        -0.014405542053282261,\n        -0.020974067971110344,\n        -0.0010471334680914879,\n        0.002153923735022545,\n        0.010958008468151093,\n        0.011234313249588013,\n        -0.015548440627753735,\n        0.011812042444944382,\n        0.008596856147050858,\n        0.02079823799431324,\n        0.003050345228984952,\n        0.026299219578504562,\n        -0.01969301700592041,\n        0.02742955833673477,\n        0.007265567779541016,\n        0.020597288385033607,\n        -0.013162169605493546,\n        0.007711423560976982,\n        -0.006794593296945095,\n        0.016917407512664795,\n        0.009262500330805779,\n        0.021451322361826897,\n        0.02770586498081684,\n        -0.012270457111299038,\n        -0.010593787766993046,\n        0.005884042475372553,\n        0.026173627004027367,\n        0.0031162817031145096,\n        0.007510474417358637,\n        0.004194813314825296,\n        -0.007862135767936707,\n        0.0027096737176179886,\n        -0.020195389166474342,\n        -0.004998610354959965,\n        -0.013501270674169064,\n        0.004712885711342096,\n        -0.012201380915939808,\n        -0.01872594840824604,\n        0.018625473603606224,\n        -0.011881118640303612,\n        -0.029087388888001442,\n        -0.015787066891789436,\n        -0.0003167304093949497,\n        0.048001728951931,\n        0.005959398113191128,\n        0.012100907042622566,\n        0.01939159259200096,\n        0.0028556757606565952,\n        -0.005940559320151806,\n        0.02068520337343216,\n        -0.018675711005926132,\n        -0.005522961728274822,\n        0.039813049137592316,\n        0.0015455815009772778,\n        -0.0063236188143491745,\n        -0.02416413463652134,\n        0.004684627056121826,\n        0.004078639671206474,\n        -0.015071186237037182,\n        -0.024051101878285408,\n        0.008050525560975075,\n        0.028760846704244614,\n        -0.002221429953351617,\n        -0.014619050547480583,\n        0.006113249808549881,\n        -0.033131491392850876,\n        0.02471674606204033,\n        0.003482071915641427,\n        -0.021363407373428345,\n        -0.015058627352118492,\n        0.012440008111298084,\n        -0.023071475327014923,\n        0.0088919997215271,\n        -0.0305191520601511,\n        0.02720349095761776,\n        0.03413623571395874,\n        -0.005836945027112961,\n        -0.006668999791145325,\n        0.003796054981648922,\n        0.00045880774268880486,\n        -0.0013477721950039268,\n        -0.007686304859817028,\n        0.02833382971584797,\n        -0.007058338727802038,\n        0.0036641822662204504,\n        0.01891433820128441,\n        -0.0021272350568324327,\n        -0.02079823799431324,\n        -0.005208978895097971,\n        -0.012320694513618946,\n        0.022581661120057106,\n        -0.017331864684820175,\n        0.005482144188135862,\n        -0.011899957433342934,\n        0.011328508146107197,\n        -0.003271703375503421,\n        0.012075788341462612,\n        -0.013790135271847248,\n        -0.00652456795796752,\n        -0.006210584659129381,\n        -0.0016672499477863312,\n        0.02665088139474392,\n        2.0819775272684637e-06,\n        -0.001816391944885254,\n        -0.01485767774283886,\n        -0.017984949052333832,\n        -0.024503236636519432,\n        -0.04712257534265518,\n        -0.004483677912503481,\n        0.0015801197150722146,\n        -0.029589762911200523,\n        -0.004659508354961872,\n        -0.005466444883495569,\n        0.009551363997161388,\n        0.02565869502723217,\n        0.016779253259301186,\n        -0.012358373031020164,\n        0.012553042732179165,\n        0.023247305303812027,\n        -0.0181733388453722,\n        -0.0057301907800138,\n        0.018688270822167397,\n        0.015975456684827805,\n        -0.02584708482027054,\n        0.016251763328909874,\n        -0.0035699873697012663,\n        -0.019178085029125214,\n        0.004577872809022665,\n        -0.014104118570685387,\n        -0.004160275217145681,\n        0.011585974134504795,\n        0.013388236984610558,\n        0.00026119465474039316,\n        -0.016113610938191414,\n        0.006348737049847841,\n        0.02936369553208351,\n        0.016867170110344887,\n        0.009036432020366192,\n        -0.007322084624320269,\n        -0.003296822076663375,\n        0.037050001323223114,\n        -0.014217152260243893,\n        0.0041539957746863365,\n        -0.007334643974900246,\n        -0.012697474099695683,\n        0.019655339419841766,\n        -0.007052059285342693,\n        -0.007667466066777706,\n        -0.0007700434071011841,\n        -0.03576894849538803,\n        -0.003962466027587652,\n        -0.0019985020626336336,\n        0.008138440549373627,\n        0.014342745766043663,\n        -0.015837304294109344,\n        -0.03210162743926048,\n        -0.0056108771823346615,\n        -0.016703898087143898,\n        0.00034538135514594615,\n        0.009984660893678665,\n        -0.01095172856003046,\n        0.001340707647614181,\n        0.013840372674167156,\n        0.016716457903385162,\n        0.00038973146001808345,\n        0.005940559320151806,\n        -0.011535737663507462,\n        -0.01093916967511177,\n        -0.016314558684825897,\n        -0.02427716925740242,\n        -0.012634677812457085,\n        -0.0012151143746450543,\n        0.041244812309741974,\n        0.006957864388823509,\n        -0.018110541626811028,\n        -0.0088919997215271,\n        -0.005127343349158764,\n        -0.02747979573905468,\n        -0.03227745741605759,\n        -0.002741072094067931,\n        -0.0029655699618160725,\n        -0.0056234365329146385,\n        0.016741575673222542,\n        -0.008527779951691628,\n        0.022016491740942,\n        0.006263962015509605,\n        -0.0022748070769011974,\n        -0.00584008451551199,\n        -0.019630219787359238,\n        0.011824601329863071,\n        0.004703465849161148,\n        0.01513398252427578,\n        -0.000919970334507525,\n        -0.02328498288989067,\n        0.002163343131542206,\n        0.0026296081487089396,\n        0.011912517249584198,\n        -0.0042073726654052734,\n        0.00936297420412302,\n        -0.009331576526165009,\n        -0.010487033985555172,\n        -0.014355304650962353,\n        -0.0040284027345478535,\n        -0.002763050841167569,\n        0.007253008428961039,\n        0.016364796087145805,\n        -0.0157996267080307,\n        0.001935705542564392,\n        0.0050582666881382465,\n        -0.003422415116801858,\n        0.003422415116801858,\n        -0.00468148710206151,\n        0.008433585055172443,\n        -0.015360050834715366,\n        0.015837304294109344,\n        -0.019981881603598595,\n        -0.006320478860288858,\n        0.003088023280724883,\n        -0.02336033806204796,\n        0.015159101225435734,\n        -0.025420067831873894,\n        0.00652456795796752,\n        0.0294390507042408,\n        0.01068798266351223,\n        0.00756699126213789,\n        -0.0008603135356679559,\n        -0.0018933177925646305,\n        0.0024160996545106173,\n        0.005488423630595207,\n        -0.00935041531920433,\n        -0.011315949261188507,\n        -2.2285437808022834e-05,\n        -0.007372322026640177,\n        -0.021149897947907448,\n        -0.02966511808335781,\n        0.0021617733873426914,\n        -0.025721492245793343,\n        -0.014330185949802399,\n        0.001420773332938552,\n        0.03476420417428017,\n        0.015787066891789436,\n        -0.026550406590104103,\n        -0.01319984719157219,\n        -0.023159390315413475,\n        0.013011457398533821,\n        0.009406931698322296,\n        -0.013388236984610558,\n        0.001562850666232407,\n        -0.00962672010064125,\n        -0.0042324913665652275,\n        0.021916016936302185,\n        -0.01762072928249836,\n        0.005290614441037178,\n        0.018537558615207672,\n        0.016176406294107437,\n        -0.004251330625265837,\n        0.039260439574718475,\n        0.2337038666009903,\n        -0.012791668996214867,\n        0.0035228899214416742,\n        0.027354203164577484,\n        0.02217976190149784,\n        0.013551508076488972,\n        0.010210729204118252,\n        0.0017080678371712565,\n        0.006982983089983463,\n        0.0028289873152971268,\n        -0.01159853395074606,\n        0.0012826207093894482,\n        -0.00470974575728178,\n        0.006756915245205164,\n        0.005450745578855276,\n        0.00941949151456356,\n        -0.02107454277575016,\n        -0.019793491810560226,\n        -0.030921051278710365,\n        -0.024503236636519432,\n        0.006279660854488611,\n        -0.03295566141605377,\n        -0.025256795808672905,\n        -0.014305067248642445,\n        0.017495134845376015,\n        -0.0072969659231603146,\n        -0.008446143940091133,\n        0.0011868559522554278,\n        0.03363386541604996,\n        0.009746033698320389,\n        0.004763122648000717,\n        -0.011573415249586105,\n        0.011228034272789955,\n        0.011246873065829277,\n        -0.014367864467203617,\n        -0.019567424431443214,\n        0.03604525327682495,\n        0.013363118283450603,\n        0.03084569424390793,\n        0.003871411085128784,\n        -0.016553185880184174,\n        -0.025068406015634537,\n        -0.004518216010183096,\n        -0.017633287236094475,\n        0.0027238030452281237,\n        0.011096160858869553,\n        0.007246728986501694,\n        -0.026173627004027367,\n        0.018123101443052292,\n        0.033985525369644165,\n        0.004656368400901556,\n        0.007014381233602762,\n        0.02300867810845375,\n        0.044032983481884,\n        0.00041406514355912805,\n        -0.015950338914990425,\n        0.008653372526168823,\n        0.016666220501065254,\n        -0.005259216297417879,\n        -0.000553002639207989,\n        0.0005902881384827197,\n        0.03185044229030609,\n        -2.5854542400338687e-05,\n        0.03943626955151558,\n        -0.023598965257406235,\n        0.007472796365618706,\n        -0.02519400045275688,\n        -0.002959290286526084,\n        0.003968745935708284,\n        -0.0035166102461516857,\n        -0.01667878031730652,\n        -0.0014144936576485634,\n        0.00603475421667099,\n        -0.00840846635401249,\n        -0.029514405876398087,\n        -0.009519966319203377,\n        0.010499592870473862,\n        0.0036798813380301,\n        0.03335756063461304,\n        0.0274546779692173,\n        -0.009388092905282974,\n        -0.0015463664894923568,\n        -0.009859067387878895,\n        -0.02010747417807579,\n        -0.038657594472169876,\n        -0.029037151485681534,\n        0.0022512583527714014,\n        -0.00729068648070097,\n        -0.012107186019420624,\n        -0.023372897878289223,\n        -0.014531135559082031,\n        -0.010254686698317528,\n        0.001805402571335435,\n        -0.006069292314350605,\n        0.023686882108449936,\n        0.026198744773864746,\n        -0.0034569534473121166,\n        0.010612627491354942,\n        -0.02091127075254917,\n        0.012364652007818222,\n        -0.005510402377694845,\n        -0.00246790680103004,\n        7.820140308467671e-05,\n        0.0090866694226861,\n        0.0022292796056717634,\n        0.009903025813400745,\n        -0.0071148560382425785,\n        0.0007563066901639104,\n        -0.00300324778072536,\n        -0.01563635654747486,\n        -0.022280236706137657,\n        -0.0016735296230763197,\n        -0.006807152647525072,\n        -0.01108988095074892,\n        -0.00680087273940444,\n        0.00976487249135971,\n        -0.005080245900899172,\n        -0.00265472661703825,\n        -0.0003416528052184731,\n        -0.008885719813406467,\n        -0.003959326073527336,\n        -0.013915728777647018,\n        -0.014794881455600262,\n        0.002414529677480459,\n        -0.006169767118990421,\n        -0.014807440340518951,\n        -0.006888788193464279,\n        0.00915574561804533,\n        0.014531135559082031,\n        -0.030016779899597168,\n        0.00854661874473095,\n        -0.0002739502233453095,\n        0.021237812936306,\n        0.008113321848213673,\n        -0.006989262532442808,\n        0.007334643974900246,\n        0.016992762684822083,\n        -0.0028666651342064142,\n        -0.016879728063941002,\n        0.0035448686685413122,\n        -0.0021131059620529413,\n        -0.010003499686717987,\n        0.0088919997215271,\n        -0.0073534827679395676,\n        -0.014920474030077457,\n        -0.01649039052426815,\n        0.03112199902534485,\n        -0.027555152773857117,\n        -0.010970567353069782,\n        0.002863525412976742,\n        -0.04272681474685669,\n        -0.018185898661613464,\n        -0.013903168961405754,\n        -0.0035385889932513237,\n        0.027931932359933853,\n        -0.020735440775752068,\n        -0.019680457189679146,\n        -0.02485489845275879,\n        -0.006210584659129381,\n        0.009896745905280113,\n        -0.012502805329859257,\n        0.009645558893680573,\n        0.02996654249727726,\n        -0.007862135767936707,\n        -0.038657594472169876,\n        -0.013149609789252281,\n        -0.16045789420604706,\n        0.014430660754442215,\n        0.011523177847266197,\n        -0.012760271318256855,\n        0.02066008374094963,\n        0.015171661041676998,\n        0.024641389027237892,\n        0.004643809515982866,\n        -0.009067830629646778,\n        0.018575238063931465,\n        0.009859067387878895,\n        -0.01288586389273405,\n        -0.014945592731237411,\n        -0.014305067248642445,\n        0.005290614441037178,\n        -0.01612616889178753,\n        -0.008182398043572903,\n        0.012352093122899532,\n        0.029740475118160248,\n        0.028685491532087326,\n        0.035115864127874374,\n        -0.007334643974900246,\n        -0.0009168304968625307,\n        -0.00477882195264101,\n        0.013714779168367386,\n        -0.007811898365616798,\n        0.0007111715967766941,\n        0.03393528610467911,\n        -0.010053737089037895,\n        -0.007020661141723394,\n        -0.016000576317310333,\n        -0.02888644114136696,\n        0.02582196518778801,\n        0.004559034015983343,\n        0.021966254338622093,\n        -0.006430373061448336,\n        0.012446288019418716,\n        -0.042601220309734344,\n        -0.01635223627090454,\n        0.024930253624916077,\n        0.032855186611413956,\n        0.018625473603606224,\n        0.019228322431445122,\n        -0.013526389375329018,\n        -0.015410288237035275,\n        -0.005224677734076977,\n        0.020609848201274872,\n        0.015083746053278446,\n        0.01621408388018608,\n        -0.0070897373370826244,\n        0.008119601756334305,\n        -0.003028366481885314,\n        -0.0005110073834657669,\n        -0.00622942391782999,\n        0.019981881603598595,\n        0.029489288106560707,\n        0.006888788193464279,\n        0.02452835626900196,\n        0.0024773261975497007,\n        -0.008458703756332397,\n        -0.0002564848982729018,\n        -0.012370931915938854,\n        -0.006254542153328657,\n        -0.004706605803221464,\n        0.006050453521311283,\n        -0.005224677734076977,\n        0.0014686556532979012,\n        0.009689517319202423,\n        -0.00024942029267549515,\n        0.018349168822169304,\n        -0.008910838514566422,\n        -0.023084033280611038,\n        -0.01039911899715662,\n        -0.02554566040635109,\n        0.006825991440564394,\n        0.008389626629650593,\n        -0.03378457576036453,\n        0.03109688125550747,\n        -0.0114603815600276,\n        -0.014832559041678905,\n        -0.008295431733131409,\n        0.021313169971108437,\n        -0.0024836058728396893,\n        0.007070898078382015,\n        -0.023121710866689682,\n        0.00038973146001808345,\n        0.0032465846743434668,\n        0.012653516605496407,\n        -0.03134806826710701,\n        -0.027002541348338127,\n        0.007453957572579384,\n        -0.017984949052333832,\n        -0.014166914857923985,\n        -0.018022626638412476,\n        0.00543504673987627,\n        0.015598678030073643,\n        0.012534203007817268,\n        0.01623920351266861,\n        -0.01571171171963215,\n        -0.019893966615200043,\n        -0.002260677982121706,\n        0.017143474891781807,\n        -0.016013136133551598,\n        0.011190355755388737,\n        0.03049403429031372,\n        -0.005469584837555885,\n        0.017532814294099808,\n        0.007981449365615845,\n        0.022405831143260002,\n        -0.03169972822070122,\n        -0.03217698261141777,\n        0.006631322205066681,\n        0.014694406650960445,\n        0.026148507371544838,\n        0.01623920351266861,\n        0.026449931785464287,\n        -0.013262644410133362,\n        -0.01734442450106144,\n        0.0048321993090212345,\n        -0.013099372386932373,\n        0.052045829594135284,\n        0.006223144009709358,\n        -0.04420881345868111,\n        -0.0010887362295761704,\n        0.0028792244847863913,\n        0.0002376459160586819,\n        -0.08409722149372101,\n        -0.03438742458820343,\n        0.004712885711342096,\n        0.014405542053282261,\n        -0.001269276486709714,\n        0.03835617005825043,\n        0.004216792527586222,\n        -0.0017881334060803056,\n        0.0019875126890838146,\n        0.005535521078854799,\n        0.008923398330807686,\n        -0.03049403429031372,\n        -0.01787191443145275,\n        -0.010568669065833092,\n        0.04071732237935066,\n        -0.02720349095761776,\n        0.012779110111296177,\n        0.0010651875054463744,\n        -0.033960405737161636,\n        0.011604813858866692,\n        -0.008678491227328777,\n        0.007868414744734764,\n        0.0028478263411670923,\n        -0.001828951295465231,\n        -0.02222999930381775,\n        0.012201380915939808,\n        -0.022971000522375107,\n        0.03127271309494972,\n        0.0016358516877517104,\n        -0.004926394205540419,\n        -0.016754135489463806,\n        -0.024063661694526672,\n        -0.014166914857923985,\n        -0.008307991549372673,\n        -0.004449139814823866,\n        0.013626864179968834,\n        -0.04099362716078758,\n        0.03102152608335018,\n        0.013413355685770512,\n        -0.04106898233294487,\n        0.027253728359937668,\n        0.010593787766993046,\n        -0.006461771205067635,\n        -0.05581362545490265,\n        -0.012659796513617039,\n        0.017168592661619186,\n        -0.0019137266790494323,\n        0.03777844086289406,\n        0.01870083063840866,\n        -0.009048991836607456,\n        -0.03222722187638283,\n        -0.03139830380678177,\n        -0.021916016936302185,\n        0.010832414962351322,\n        0.01048075407743454,\n        -0.0017535953084006906,\n        0.008056805469095707,\n        0.028911558911204338,\n        -0.009921864606440067,\n        0.011962753720581532,\n        0.013375678099691868,\n        0.006147787906229496,\n        -0.02038377895951271,\n        0.01704300008714199,\n        -0.012559321708977222,\n        0.018575238063931465,\n        -0.012697474099695683,\n        0.0232221856713295,\n        0.0201577115803957,\n        0.00969579629600048,\n        -0.01563635654747486,\n        0.028409186750650406,\n        0.006245122756808996,\n        0.010053737089037895,\n        -0.03792915120720863,\n        -0.005469584837555885,\n        -0.015837304294109344,\n        -0.020258186385035515,\n        0.006606203503906727,\n        -0.005811826325953007,\n        -0.03310637176036835,\n        -0.020735440775752068,\n        0.009218541905283928,\n        -0.008182398043572903,\n        0.026022914797067642,\n        0.01360174547880888,\n        -0.01054983027279377,\n        0.0057710083201527596,\n        -0.015347491018474102,\n        -0.03544240817427635,\n        -0.03054427169263363,\n        0.01035516057163477,\n        0.006989262532442808,\n        -0.011510618962347507,\n        0.007529313676059246,\n        0.014757202938199043,\n        -0.01180576253682375,\n        -0.0010895212180912495,\n        0.01607593148946762,\n        0.010807296261191368,\n        -0.02485489845275879,\n        -0.011824601329863071,\n        -0.0650070533156395,\n        0.032352812588214874,\n        -0.016113610938191414,\n        -0.019353915005922318,\n        0.0009600031771697104,\n        -0.022016491740942,\n        0.006687839049845934,\n        -0.01512142363935709,\n        0.002095836913213134,\n        -0.00881036464124918,\n        -0.018675711005926132,\n        -0.0048635974526405334,\n        -0.002117815660312772,\n        0.012615839019417763,\n        -0.02798216976225376,\n        0.0032685634214431047,\n        0.020182831212878227,\n        0.014192033559083939,\n        0.005880902521312237,\n        -0.002144504338502884,\n        0.00037756460369564593,\n        -0.03245328739285469,\n        -0.016502948477864265,\n        0.001036928966641426,\n        -0.023774797096848488,\n        0.011177796870470047,\n        -0.0314234234392643,\n        0.0005577123956754804,\n        -0.002411389723420143,\n        -0.00874128844588995,\n        0.0064366525039076805,\n        -0.042576100677251816,\n        0.0064021144062280655,\n        0.022682135924696922,\n        0.0008108612382784486,\n        -0.010122813284397125,\n        0.006794593296945095,\n        0.015083746053278446,\n        0.007366042118519545,\n        0.020333541557192802,\n        -0.030895931646227837,\n        -0.033181726932525635,\n        0.015611236914992332,\n        -0.010832414962351322,\n        0.0036013855133205652,\n        0.004634389653801918,\n        -0.005340851843357086,\n        -0.0033941566944122314,\n        0.030418677255511284,\n        0.009808829985558987,\n        -0.003227745648473501,\n        0.029263220727443695,\n        -0.028409186750650406,\n        -0.013036576099693775,\n        -0.012841906398534775,\n        -0.008351949043571949,\n        -0.017143474891781807,\n        -0.0003757984668482095,\n        -0.008333110250532627,\n        -0.04493725299835205,\n        0.02248118631541729,\n        0.00018220828496851027,\n        0.02966511808335781,\n        -0.007868414744734764,\n        0.005246656946837902,\n        0.010813576169312,\n        -0.0205470509827137,\n        -0.002692404668778181,\n        -0.014619050547480583,\n        -0.02234303392469883,\n        -0.006920186337083578,\n        -0.00593741936609149,\n        0.00517130084335804,\n        0.005312593188136816,\n        0.019630219787359238,\n        0.020647525787353516,\n        -0.003180648200213909,\n        0.0170932374894619,\n        -0.040616847574710846,\n        0.037728201597929,\n        0.020258186385035515,\n        0.021112220361828804,\n        -0.030318202450871468,\n        -0.0004332966054789722,\n        0.034563254565000534,\n        0.006088131107389927,\n        -0.006254542153328657,\n        -0.01731930486857891,\n        -0.009205983020365238,\n        0.00310058263130486,\n        -0.03607037290930748,\n        -0.0266759991645813,\n        0.006939025595784187,\n        0.024289729073643684,\n        -0.0038368727546185255,\n        0.00473800441250205,\n        0.017520254477858543,\n        0.00783073715865612,\n        0.017658406868577003,\n        0.023372897878289223,\n        0.0023721419274806976,\n        0.009438330307602882,\n        -0.016000576317310333,\n        -0.004552754107862711,\n        -0.008785245940089226,\n        0.005924860015511513,\n        -0.006289080251008272,\n        -0.037878915667533875,\n        -0.007328364532440901,\n        0.012898423708975315,\n        0.011812042444944382,\n        0.029790712520480156,\n        -0.006361296400427818,\n        0.015046067535877228,\n        -0.02499305084347725,\n        -0.00584008451551199,\n        0.02609827183187008,\n        -0.0014945593429729342,\n        -0.028434304520487785,\n        0.04242539033293724,\n        -0.008973635733127594,\n        0.0035794067662209272,\n        0.02770586498081684,\n        -0.015071186237037182,\n        0.008433585055172443,\n        0.002970279660075903,\n        -0.007793059106916189,\n        -0.0035228899214416742,\n        0.01905249059200287,\n        -0.0050017498433589935,\n        0.011717847548425198,\n        -0.010041178204119205,\n        -0.016452711075544357,\n        -0.015410288237035275,\n        0.0009136906592175364,\n        0.01372733898460865,\n        -0.008351949043571949,\n        0.024339966475963593,\n        -0.0161889661103487,\n        0.05631599947810173,\n        -0.012722592800855637,\n        -0.0041634151712059975,\n        0.02287052571773529,\n        0.014292508363723755,\n        0.020735440775752068,\n        0.024427881464362144,\n        0.018072864040732384,\n        -0.008559177629649639,\n        -0.02024562656879425,\n        -0.0038494321051985025,\n        0.006756915245205164,\n        -0.0008289152756333351,\n        -0.02259422093629837,\n        -0.011127559468150139,\n        0.004289008677005768,\n        -0.01591266132891178,\n        0.004398902412503958,\n        -0.019630219787359238,\n        0.01292354241013527,\n        0.018324051052331924,\n        0.008942237123847008,\n        0.0243902038782835,\n        0.013212407007813454,\n        -0.018072864040732384,\n        -0.019931644201278687,\n        0.00423877127468586,\n        0.0012331684119999409,\n        -0.018349168822169304,\n        -0.016151288524270058,\n        0.017243949696421623,\n        0.0007649412145838141,\n        -0.0012857605470344424,\n        -0.01260955911129713,\n        0.006656440440565348,\n        0.002959290286526084,\n        -0.007108576130121946,\n        -0.009513686411082745,\n        -0.0036422032862901688,\n        0.010562390089035034,\n        0.02289564348757267,\n        0.0006240413058549166,\n        -0.018097983673214912,\n        -0.03295566141605377,\n        0.006813432089984417,\n        0.002750491490587592,\n        -0.02936369553208351,\n        -0.005384809337556362,\n        -0.02317194826900959\n      ]\n    }\n  ],\n  \"model\": \"ada\",\n  \"usage\": {\n    \"prompt_tokens\": 3,\n    \"total_tokens\": 3\n  }\n}\n"
  },
  {
    "path": "libs/kotaemon/tests/resources/embedding_openai_batch.json",
    "content": "{\n  \"object\": \"list\",\n  \"data\": [\n    {\n      \"object\": \"embedding\",\n      \"index\": 0,\n      \"embedding\": [\n        0.006555966101586819,\n        0.003670461941510439,\n        -0.011642491444945335,\n        -0.026776473969221115,\n        -0.012383491732180119,\n        -0.0014341175556182861,\n        -0.013375678099691868,\n        0.009356695227324963,\n        -0.006364436354488134,\n        -0.0294390507042408,\n        0.023950627073645592,\n        0.0029859787318855524,\n        -0.023234745487570763,\n        -0.009205983020365238,\n        0.006744355894625187,\n        0.0011790062999352813,\n        0.02607315219938755,\n        -0.018437083810567856,\n        0.008904559537768364,\n        0.009620440192520618,\n        -0.01306169480085373,\n        -0.0011358336778357625,\n        0.007253008428961039,\n        0.00875384733080864,\n        -0.012710033915936947,\n        0.0037206991109997034,\n        0.005419347435235977,\n        -0.017243949696421623,\n        0.036246202886104584,\n        -0.0266759991645813,\n        0.012647237628698349,\n        -0.008552898652851582,\n        -0.00762350857257843,\n        -0.012546762824058533,\n        0.007083457428961992,\n        -0.014078999869525433,\n        0.0048761568032205105,\n        -0.013689660467207432,\n        0.018211016431450844,\n        -0.014367864467203617,\n        0.008307991549372673,\n        0.006022194866091013,\n        0.005457025486975908,\n        -0.006320478860288858,\n        -0.03609549254179001,\n        0.012986338697373867,\n        0.00017592862423043698,\n        -0.016427593305706978,\n        -0.004734864458441734,\n        0.0257717277854681,\n        0.01944182999432087,\n        -0.00211938563734293,\n        -0.0147195253521204,\n        0.01332544069737196,\n        -0.017670966684818268,\n        0.0013956546317785978,\n        -0.04024006798863411,\n        0.024754423648118973,\n        0.032051388174295425,\n        -0.018713390454649925,\n        0.011265711858868599,\n        0.012370931915938854,\n        -0.013576626777648926,\n        0.0040974789299070835,\n        -0.002153923735022545,\n        0.009915584698319435,\n        0.006637601647526026,\n        0.011353626847267151,\n        -0.019919084385037422,\n        0.030142372474074364,\n        0.01966789737343788,\n        -0.008213796652853489,\n        -0.004213652573525906,\n        -0.009162025526165962,\n        0.012446288019418716,\n        0.008979915641248226,\n        -0.02098662778735161,\n        0.003113141981884837,\n        0.0018838982796296477,\n        -0.0008289152756333351,\n        0.02722861059010029,\n        -0.03355850651860237,\n        -0.004408321809023619,\n        0.017859356477856636,\n        0.019893966615200043,\n        -0.0014137086691334844,\n        -0.0021602034103125334,\n        0.017959831282496452,\n        -0.0114603815600276,\n        -0.020069796591997147,\n        0.007604669313877821,\n        0.001281835837289691,\n        0.01132222916930914,\n        0.006668999791145325,\n        -0.01743233948945999,\n        0.0048761568032205105,\n        0.0016923686489462852,\n        0.02356128767132759,\n        -0.005796127021312714,\n        -0.041470881551504135,\n        -0.01292354241013527,\n        -0.005824385676532984,\n        -0.010700542479753494,\n        -0.017005322501063347,\n        -0.007950050756335258,\n        0.011560855433344841,\n        -0.005485283676534891,\n        -0.0002590360236354172,\n        0.028434304520487785,\n        0.007447678130120039,\n        -0.026198744773864746,\n        0.009306457825005054,\n        -0.00500488979741931,\n        -0.03107176162302494,\n        -0.0007555217016488314,\n        -0.008056805469095707,\n        0.007479076273739338,\n        0.003978165332227945,\n        -0.005984516814351082,\n        -0.02778122015297413,\n        0.015272135846316814,\n        0.029338575899600983,\n        0.010964288376271725,\n        -0.023548727855086327,\n        0.007014381233602762,\n        0.0014137086691334844,\n        -0.03380969539284706,\n        -0.016741575673222542,\n        -0.005230957642197609,\n        -0.004794521257281303,\n        0.0315992534160614,\n        0.0041100382804870605,\n        0.010644025169312954,\n        -0.0014294078573584557,\n        -0.035367049276828766,\n        0.0246916264295578,\n        -0.007096016779541969,\n        0.02096150815486908,\n        -0.017030440270900726,\n        -0.011906237341463566,\n        0.006273381412029266,\n        0.021237812936306,\n        0.01757049188017845,\n        -0.013224965892732143,\n        -0.005092805251479149,\n        0.01288586389273405,\n        0.004486817866563797,\n        -0.016339678317308426,\n        0.008917118422687054,\n        -0.015083746053278446,\n        0.008377067744731903,\n        0.0185626782476902,\n        0.012961219996213913,\n        -0.002535413019359112,\n        0.0050143091939389706,\n        0.016025694087147713,\n        0.004618690814822912,\n        0.0205470509827137,\n        0.008000288158655167,\n        -0.013074253685772419,\n        -0.0007264782907441258,\n        -0.0004474258457776159,\n        0.0033125211484730244,\n        -0.03084569424390793,\n        0.009400652721524239,\n        0.015787066891789436,\n        0.02607315219938755,\n        0.003689300734549761,\n        -0.013940847478806973,\n        -0.02604803442955017,\n        -0.011454101651906967,\n        0.007340923883020878,\n        -0.043178949505090714,\n        0.02660064399242401,\n        -0.01593777909874916,\n        0.00514304218813777,\n        -0.001305384561419487,\n        0.022820288315415382,\n        -0.012697474099695683,\n        -0.017269067466259003,\n        -0.03024284727871418,\n        0.011617372743785381,\n        0.008992474526166916,\n        0.025897322222590446,\n        0.00629850011318922,\n        -0.00014511904737446457,\n        0.009890465997159481,\n        -0.006364436354488134,\n        0.00384629238396883,\n        -0.017030440270900726,\n        0.016666220501065254,\n        0.03333244100213051,\n        0.021011745557188988,\n        0.01844964362680912,\n        -0.6892555952072144,\n        -0.007196491584181786,\n        0.025319593027234077,\n        0.02024562656879425,\n        0.006245122756808996,\n        0.009909304790198803,\n        0.012176262214779854,\n        0.027806337922811508,\n        0.006518288049846888,\n        0.0274546779692173,\n        -0.02107454277575016,\n        0.013011457398533821,\n        -0.015083746053278446,\n        -0.010719381272792816,\n        -0.0002598209830466658,\n        -0.00823263544589281,\n        0.0055763390846550465,\n        -0.012339534237980843,\n        -0.011881118640303612,\n        0.015196779742836952,\n        -0.010141652077436447,\n        0.03129782900214195,\n        -0.022104406729340553,\n        -0.014568813145160675,\n        0.009928143583238125,\n        0.011730406433343887,\n        0.0025102945510298014,\n        -0.009739753790199757,\n        -0.006964143831282854,\n        0.030192609876394272,\n        -0.0021272350568324327,\n        0.0114603815600276,\n        9.394961671205238e-05,\n        0.014154355973005295,\n        0.06611227244138718,\n        0.007610949221998453,\n        -0.005463304929435253,\n        0.008088203147053719,\n        0.009827669709920883,\n        0.028986915946006775,\n        -0.019077610224485397,\n        -0.022267676889896393,\n        0.012559321708977222,\n        -0.01662854291498661,\n        -0.002763050841167569,\n        0.009205983020365238,\n        0.012659796513617039,\n        -0.0024051100481301546,\n        0.0022543983068317175,\n        0.0017849936848506331,\n        0.014568813145160675,\n        -0.004851038102060556,\n        0.010857533663511276,\n        0.019504627212882042,\n        -0.005343991331756115,\n        0.0071588135324418545,\n        0.02151411771774292,\n        0.011962753720581532,\n        -0.002935741562396288,\n        0.004521355964243412,\n        0.0005718416068702936,\n        0.017168592661619186,\n        -0.0016280021518468857,\n        0.0038494321051985025,\n        -0.022443508729338646,\n        0.02381247468292713,\n        -0.021313169971108437,\n        0.01400364376604557,\n        -0.008929678238928318,\n        -0.012050669640302658,\n        -0.0018368008313700557,\n        -0.007994008250534534,\n        -0.018047746270895004,\n        -0.014744644053280354,\n        0.013965966179966927,\n        0.028107762336730957,\n        0.01681693270802498,\n        -0.0033219405449926853,\n        -0.006160347256809473,\n        0.01966789737343788,\n        0.010926609858870506,\n        0.013237525708973408,\n        -0.014066440984606743,\n        -0.01764584705233574,\n        0.02349849045276642,\n        -0.019780931994318962,\n        -0.030067017301917076,\n        -0.016276881098747253,\n        0.010430516675114632,\n        0.011479220353066921,\n        0.030192609876394272,\n        0.0033250804990530014,\n        -0.012370931915938854,\n        -0.01980605162680149,\n        0.02883620373904705,\n        0.0005384809337556362,\n        -0.011548296548426151,\n        0.003821173682808876,\n        0.027303965762257576,\n        0.0004238771216478199,\n        0.014933033846318722,\n        -0.0028038686141371727,\n        0.0003752097545657307,\n        0.00652456795796752,\n        0.010279805399477482,\n        0.006656440440565348,\n        -0.00809448305517435,\n        0.033156611025333405,\n        0.02444044128060341,\n        -0.030142372474074364,\n        0.006317338906228542,\n        0.004094338975846767,\n        -0.04071732237935066,\n        0.02121269516646862,\n        0.008471262641251087,\n        -0.032001152634620667,\n        0.0013022447237744927,\n        0.016013136133551598,\n        0.026374576613307,\n        -0.014694406650960445,\n        0.023159390315413475,\n        0.0020424597896635532,\n        0.013664542697370052,\n        0.0057458896189928055,\n        0.0041539957746863365,\n        0.005303173791617155,\n        0.0003883577883243561,\n        0.004342385567724705,\n        -0.01690484769642353,\n        0.001676669460721314,\n        0.020697763189673424,\n        0.005133622791618109,\n        0.00046273251064121723,\n        -0.005651694722473621,\n        0.009808829985558987,\n        0.00202676048502326,\n        0.009896745905280113,\n        -0.017884474247694016,\n        0.00862825382500887,\n        0.020308423787355423,\n        -0.007994008250534534,\n        0.005127343349158764,\n        -0.0029388812836259604,\n        -0.0006303209811449051,\n        0.011818322353065014,\n        -0.018399406224489212,\n        -0.012025550939142704,\n        0.008678491227328777,\n        0.00016974708705674857,\n        -0.013677101582288742,\n        0.01983116939663887,\n        -0.007918652147054672,\n        -0.03245328739285469,\n        0.021162457764148712,\n        -0.008044245652854443,\n        -0.0002005566784646362,\n        0.0036076651886105537,\n        -0.029187863692641258,\n        -0.03569359332323074,\n        -0.028961796313524246,\n        0.0027096737176179886,\n        0.016364796087145805,\n        -0.0071022966876626015,\n        -0.005174440797418356,\n        -0.01400364376604557,\n        -0.023787355050444603,\n        -0.024566033855080605,\n        0.011648771353065968,\n        0.005579478573054075,\n        -0.029012033715844154,\n        0.010857533663511276,\n        -0.0049420930445194244,\n        -0.025093525648117065,\n        -0.0024961652234196663,\n        -0.0030016780365258455,\n        0.010694262571632862,\n        -0.010681703686714172,\n        -0.015724271535873413,\n        -0.003249724628403783,\n        -0.01731930486857891,\n        0.006876228842884302,\n        -0.009601601399481297,\n        -0.009821389801800251,\n        0.012176262214779854,\n        0.02582196518778801,\n        -0.010185610502958298,\n        -0.0005722341011278331,\n        0.02747979573905468,\n        -0.019793491810560226,\n        -0.0016405613860115409,\n        0.017005322501063347,\n        0.0072027710266411304,\n        -0.02220488153398037,\n        0.0006103045307099819,\n        -0.007912373170256615,\n        -0.004559034015983343,\n        -0.0010808866936713457,\n        0.0038023346569389105,\n        0.01679181307554245,\n        0.01718115247786045,\n        0.018713390454649925,\n        0.004100618418306112,\n        0.017859356477856636,\n        -0.026022914797067642,\n        -0.016013136133551598,\n        -0.028811084106564522,\n        -0.0023705719504505396,\n        -0.030343322083353996,\n        0.003858851734548807,\n        0.013890610076487064,\n        0.011969033628702164,\n        -0.002329754177480936,\n        -0.0014223431935533881,\n        0.01764584705233574,\n        0.016276881098747253,\n        0.02692718617618084,\n        0.0021821821574121714,\n        -0.020923830568790436,\n        -0.02855989895761013,\n        -0.007510474417358637,\n        -0.002681415295228362,\n        0.008326830342411995,\n        -0.01458137296140194,\n        -0.0007727907504886389,\n        0.01720627024769783,\n        0.032604001462459564,\n        0.004417741671204567,\n        0.03554287925362587,\n        0.003953046631067991,\n        -0.0322021022439003,\n        -0.02505584806203842,\n        0.009231101721525192,\n        0.012659796513617039,\n        0.009563923813402653,\n        -0.016842050477862358,\n        -0.005513542331755161,\n        0.0041414364241063595,\n        -0.014405542053282261,\n        0.02916274592280388,\n        0.012163703329861164,\n        -0.008502661250531673,\n        0.010185610502958298,\n        0.028710609301924706,\n        -0.0177839994430542,\n        0.00950112659484148,\n        0.028384067118167877,\n        0.014744644053280354,\n        -0.009444610215723515,\n        0.005055127199739218,\n        0.02803240716457367,\n        -0.0009089809027500451,\n        0.0004933458403684199,\n        -0.014405542053282261,\n        -0.008433585055172443,\n        0.0012111896649003029,\n        -0.031498778611421585,\n        0.0004556679050438106,\n        -0.021388525143265724,\n        0.019605102017521858,\n        0.03569359332323074,\n        0.013689660467207432,\n        0.01161109283566475,\n        -0.0064680506475269794,\n        -0.005783567670732737,\n        0.009205983020365238,\n        -0.013413355685770512,\n        -0.001551076304167509,\n        -0.013488711789250374,\n        0.003160239430144429,\n        -0.005114783998578787,\n        -0.012408610433340073,\n        -0.003158669453114271,\n        0.0335836261510849,\n        -0.026274101808667183,\n        0.009319016709923744,\n        0.0005424057017080486,\n        -0.0011326938401907682,\n        -0.008841762319207191,\n        -0.005205838941037655,\n        0.0030566249042749405,\n        -0.013036576099693775,\n        -0.022443508729338646,\n        -0.0017865635454654694,\n        0.011830881237983704,\n        0.007096016779541969,\n        -0.01605081371963024,\n        -0.024038542062044144,\n        -0.005039427895098925,\n        -0.0011122849537059665,\n        -0.0032779830507934093,\n        -0.008571737445890903,\n        0.0041194576770067215,\n        0.014292508363723755,\n        -0.02342313528060913,\n        0.015485644340515137,\n        -0.002646877197548747,\n        0.020722880959510803,\n        0.00680087273940444,\n        -0.009865347295999527,\n        -0.00600649556145072,\n        0.021036865189671516,\n        0.020371221005916595,\n        -0.0016107329865917563,\n        -0.019291117787361145,\n        0.013287762179970741,\n        -0.004006423521786928,\n        -0.00762350857257843,\n        -0.015334932133555412,\n        0.0008681631297804415,\n        -0.008295431733131409,\n        -0.0007543442770838737,\n        0.011523177847266197,\n        -0.0027818898670375347,\n        0.0037332584615796804,\n        0.01304913591593504,\n        -0.005237237084656954,\n        -0.00340043636970222,\n        0.016691338270902634,\n        0.02612338960170746,\n        -0.002295216079801321,\n        -0.008333110250532627,\n        -0.009683237411081791,\n        -0.022305356338620186,\n        -0.00935041531920433,\n        0.06972935795783997,\n        -0.001001605880446732,\n        -0.0031319810077548027,\n        0.019617659971117973,\n        -0.019014813005924225,\n        -0.012848186306655407,\n        -0.04051637277007103,\n        -0.018223576247692108,\n        -0.01665366068482399,\n        -0.0031979172490537167,\n        -0.004436580464243889,\n        0.004458559211343527,\n        -0.013802695088088512,\n        0.01994420401751995,\n        0.013551508076488972,\n        0.009080389514565468,\n        0.00340043636970222,\n        -0.027052778750658035,\n        -0.011341067962348461,\n        0.0048667374067008495,\n        -0.0151088647544384,\n        0.010110254399478436,\n        0.00046077012666501105,\n        0.009990940801799297,\n        0.0205470509827137,\n        0.008307991549372673,\n        0.00277247023768723,\n        0.01720627024769783,\n        -0.007460237015038729,\n        -0.0038965295534580946,\n        0.008483821526169777,\n        -0.00610383041203022,\n        0.01745745725929737,\n        0.008778966031968594,\n        0.013312880881130695,\n        0.014078999869525433,\n        0.0029043431859463453,\n        0.03780356049537659,\n        0.008464982733130455,\n        -0.005384809337556362,\n        0.011868558824062347,\n        0.005287474486976862,\n        -0.006179186515510082,\n        -0.006009635515511036,\n        0.0142673896625638,\n        0.0023344638757407665,\n        -0.0057710083201527596,\n        0.028660371899604797,\n        -0.0014239131705835462,\n        -0.028685491532087326,\n        0.023121710866689682,\n        0.007742822170257568,\n        -0.009218541905283928,\n        -0.023071475327014923,\n        -0.01187483873218298,\n        0.018072864040732384,\n        0.00027355772908777,\n        -0.013928287662565708,\n        -0.010844974778592587,\n        -0.017796559259295464,\n        -0.014744644053280354,\n        -0.01513398252427578,\n        -0.014694406650960445,\n        -0.001071467180736363,\n        -0.031172236427664757,\n        -0.019190644845366478,\n        -0.02369944006204605,\n        0.002376851625740528,\n        -0.007466516923159361,\n        0.001561280689202249,\n        -0.0048981355503201485,\n        -0.03863247483968735,\n        -0.0125404829159379,\n        -0.0036516229156404734,\n        0.01909017004072666,\n        0.0004517431079875678,\n        -0.001898027490824461,\n        -0.006486889906227589,\n        0.004044101573526859,\n        0.013802695088088512,\n        -0.006047313567250967,\n        -0.029012033715844154,\n        0.011271991766989231,\n        -0.006292220205068588,\n        0.015611236914992332,\n        0.0031727987807244062,\n        0.00666272034868598,\n        0.009111788123846054,\n        -0.01690484769642353,\n        0.029589762911200523,\n        0.008521500043570995,\n        0.015071186237037182,\n        0.026299219578504562,\n        0.0003149642434436828,\n        0.018650593236088753,\n        0.004537055268883705,\n        0.0050582666881382465,\n        0.001783423707820475,\n        -0.019906524568796158,\n        -1.6802998288767412e-05,\n        -0.008307991549372673,\n        -0.01179320365190506,\n        -0.0020785678643733263,\n        0.004810220096260309,\n        -0.0034695127978920937,\n        0.01676669530570507,\n        0.01690484769642353,\n        0.01568659394979477,\n        -0.015372609719634056,\n        -0.0016327118501067162,\n        0.034211594611406326,\n        -0.024088779464364052,\n        0.016251763328909874,\n        -0.0064806099981069565,\n        0.0001965337578440085,\n        0.013438474386930466,\n        0.021564355120062828,\n        0.03504050895571709,\n        -0.004694046452641487,\n        -0.03443766012787819,\n        -0.005557499825954437,\n        -0.025281915441155434,\n        0.008490101434290409,\n        0.0157996267080307,\n        0.009130626916885376,\n        0.0012418029364198446,\n        -0.0018619195325300097,\n        -0.022192321717739105,\n        -0.006907626986503601,\n        -0.0032748430967330933,\n        -0.005548080429434776,\n        0.019542304798960686,\n        -0.006304779555648565,\n        -0.0023250444792211056,\n        -0.015309813432395458,\n        -0.006499449256807566,\n        -0.018324051052331924,\n        0.005968817975372076,\n        -0.006851110141724348,\n        -0.017859356477856636,\n        -0.02425205148756504,\n        -0.0063236188143491745,\n        -0.006568525452166796,\n        -0.013890610076487064,\n        -0.04551498219370842,\n        -0.044786542654037476,\n        -0.016163846477866173,\n        0.00394362723454833,\n        -0.017444897443056107,\n        0.016364796087145805,\n        -0.003949906677007675,\n        -0.000322813808452338,\n        -0.019981881603598595,\n        -0.010625186376273632,\n        -0.006342457607388496,\n        -0.01692996546626091,\n        -0.0006311059114523232,\n        -0.006116389762610197,\n        0.027002541348338127,\n        0.027931932359933853,\n        0.025369830429553986,\n        0.013978525064885616,\n        0.0114603815600276,\n        0.006474330555647612,\n        -0.008559177629649639,\n        -0.01662854291498661,\n        0.004559034015983343,\n        -0.012094627134501934,\n        -0.014204593375325203,\n        0.012710033915936947,\n        0.020999185740947723,\n        0.028459424152970314,\n        -0.007026940584182739,\n        0.009595322422683239,\n        0.0048447586596012115,\n        -0.008188677951693535,\n        -0.020873593166470528,\n        0.0010173050686717033,\n        -0.013928287662565708,\n        -0.014405542053282261,\n        -0.020974067971110344,\n        -0.0010471334680914879,\n        0.002153923735022545,\n        0.010958008468151093,\n        0.011234313249588013,\n        -0.015548440627753735,\n        0.011812042444944382,\n        0.008596856147050858,\n        0.02079823799431324,\n        0.003050345228984952,\n        0.026299219578504562,\n        -0.01969301700592041,\n        0.02742955833673477,\n        0.007265567779541016,\n        0.020597288385033607,\n        -0.013162169605493546,\n        0.007711423560976982,\n        -0.006794593296945095,\n        0.016917407512664795,\n        0.009262500330805779,\n        0.021451322361826897,\n        0.02770586498081684,\n        -0.012270457111299038,\n        -0.010593787766993046,\n        0.005884042475372553,\n        0.026173627004027367,\n        0.0031162817031145096,\n        0.007510474417358637,\n        0.004194813314825296,\n        -0.007862135767936707,\n        0.0027096737176179886,\n        -0.020195389166474342,\n        -0.004998610354959965,\n        -0.013501270674169064,\n        0.004712885711342096,\n        -0.012201380915939808,\n        -0.01872594840824604,\n        0.018625473603606224,\n        -0.011881118640303612,\n        -0.029087388888001442,\n        -0.015787066891789436,\n        -0.0003167304093949497,\n        0.048001728951931,\n        0.005959398113191128,\n        0.012100907042622566,\n        0.01939159259200096,\n        0.0028556757606565952,\n        -0.005940559320151806,\n        0.02068520337343216,\n        -0.018675711005926132,\n        -0.005522961728274822,\n        0.039813049137592316,\n        0.0015455815009772778,\n        -0.0063236188143491745,\n        -0.02416413463652134,\n        0.004684627056121826,\n        0.004078639671206474,\n        -0.015071186237037182,\n        -0.024051101878285408,\n        0.008050525560975075,\n        0.028760846704244614,\n        -0.002221429953351617,\n        -0.014619050547480583,\n        0.006113249808549881,\n        -0.033131491392850876,\n        0.02471674606204033,\n        0.003482071915641427,\n        -0.021363407373428345,\n        -0.015058627352118492,\n        0.012440008111298084,\n        -0.023071475327014923,\n        0.0088919997215271,\n        -0.0305191520601511,\n        0.02720349095761776,\n        0.03413623571395874,\n        -0.005836945027112961,\n        -0.006668999791145325,\n        0.003796054981648922,\n        0.00045880774268880486,\n        -0.0013477721950039268,\n        -0.007686304859817028,\n        0.02833382971584797,\n        -0.007058338727802038,\n        0.0036641822662204504,\n        0.01891433820128441,\n        -0.0021272350568324327,\n        -0.02079823799431324,\n        -0.005208978895097971,\n        -0.012320694513618946,\n        0.022581661120057106,\n        -0.017331864684820175,\n        0.005482144188135862,\n        -0.011899957433342934,\n        0.011328508146107197,\n        -0.003271703375503421,\n        0.012075788341462612,\n        -0.013790135271847248,\n        -0.00652456795796752,\n        -0.006210584659129381,\n        -0.0016672499477863312,\n        0.02665088139474392,\n        2.0819775272684637e-06,\n        -0.001816391944885254,\n        -0.01485767774283886,\n        -0.017984949052333832,\n        -0.024503236636519432,\n        -0.04712257534265518,\n        -0.004483677912503481,\n        0.0015801197150722146,\n        -0.029589762911200523,\n        -0.004659508354961872,\n        -0.005466444883495569,\n        0.009551363997161388,\n        0.02565869502723217,\n        0.016779253259301186,\n        -0.012358373031020164,\n        0.012553042732179165,\n        0.023247305303812027,\n        -0.0181733388453722,\n        -0.0057301907800138,\n        0.018688270822167397,\n        0.015975456684827805,\n        -0.02584708482027054,\n        0.016251763328909874,\n        -0.0035699873697012663,\n        -0.019178085029125214,\n        0.004577872809022665,\n        -0.014104118570685387,\n        -0.004160275217145681,\n        0.011585974134504795,\n        0.013388236984610558,\n        0.00026119465474039316,\n        -0.016113610938191414,\n        0.006348737049847841,\n        0.02936369553208351,\n        0.016867170110344887,\n        0.009036432020366192,\n        -0.007322084624320269,\n        -0.003296822076663375,\n        0.037050001323223114,\n        -0.014217152260243893,\n        0.0041539957746863365,\n        -0.007334643974900246,\n        -0.012697474099695683,\n        0.019655339419841766,\n        -0.007052059285342693,\n        -0.007667466066777706,\n        -0.0007700434071011841,\n        -0.03576894849538803,\n        -0.003962466027587652,\n        -0.0019985020626336336,\n        0.008138440549373627,\n        0.014342745766043663,\n        -0.015837304294109344,\n        -0.03210162743926048,\n        -0.0056108771823346615,\n        -0.016703898087143898,\n        0.00034538135514594615,\n        0.009984660893678665,\n        -0.01095172856003046,\n        0.001340707647614181,\n        0.013840372674167156,\n        0.016716457903385162,\n        0.00038973146001808345,\n        0.005940559320151806,\n        -0.011535737663507462,\n        -0.01093916967511177,\n        -0.016314558684825897,\n        -0.02427716925740242,\n        -0.012634677812457085,\n        -0.0012151143746450543,\n        0.041244812309741974,\n        0.006957864388823509,\n        -0.018110541626811028,\n        -0.0088919997215271,\n        -0.005127343349158764,\n        -0.02747979573905468,\n        -0.03227745741605759,\n        -0.002741072094067931,\n        -0.0029655699618160725,\n        -0.0056234365329146385,\n        0.016741575673222542,\n        -0.008527779951691628,\n        0.022016491740942,\n        0.006263962015509605,\n        -0.0022748070769011974,\n        -0.00584008451551199,\n        -0.019630219787359238,\n        0.011824601329863071,\n        0.004703465849161148,\n        0.01513398252427578,\n        -0.000919970334507525,\n        -0.02328498288989067,\n        0.002163343131542206,\n        0.0026296081487089396,\n        0.011912517249584198,\n        -0.0042073726654052734,\n        0.00936297420412302,\n        -0.009331576526165009,\n        -0.010487033985555172,\n        -0.014355304650962353,\n        -0.0040284027345478535,\n        -0.002763050841167569,\n        0.007253008428961039,\n        0.016364796087145805,\n        -0.0157996267080307,\n        0.001935705542564392,\n        0.0050582666881382465,\n        -0.003422415116801858,\n        0.003422415116801858,\n        -0.00468148710206151,\n        0.008433585055172443,\n        -0.015360050834715366,\n        0.015837304294109344,\n        -0.019981881603598595,\n        -0.006320478860288858,\n        0.003088023280724883,\n        -0.02336033806204796,\n        0.015159101225435734,\n        -0.025420067831873894,\n        0.00652456795796752,\n        0.0294390507042408,\n        0.01068798266351223,\n        0.00756699126213789,\n        -0.0008603135356679559,\n        -0.0018933177925646305,\n        0.0024160996545106173,\n        0.005488423630595207,\n        -0.00935041531920433,\n        -0.011315949261188507,\n        -2.2285437808022834e-05,\n        -0.007372322026640177,\n        -0.021149897947907448,\n        -0.02966511808335781,\n        0.0021617733873426914,\n        -0.025721492245793343,\n        -0.014330185949802399,\n        0.001420773332938552,\n        0.03476420417428017,\n        0.015787066891789436,\n        -0.026550406590104103,\n        -0.01319984719157219,\n        -0.023159390315413475,\n        0.013011457398533821,\n        0.009406931698322296,\n        -0.013388236984610558,\n        0.001562850666232407,\n        -0.00962672010064125,\n        -0.0042324913665652275,\n        0.021916016936302185,\n        -0.01762072928249836,\n        0.005290614441037178,\n        0.018537558615207672,\n        0.016176406294107437,\n        -0.004251330625265837,\n        0.039260439574718475,\n        0.2337038666009903,\n        -0.012791668996214867,\n        0.0035228899214416742,\n        0.027354203164577484,\n        0.02217976190149784,\n        0.013551508076488972,\n        0.010210729204118252,\n        0.0017080678371712565,\n        0.006982983089983463,\n        0.0028289873152971268,\n        -0.01159853395074606,\n        0.0012826207093894482,\n        -0.00470974575728178,\n        0.006756915245205164,\n        0.005450745578855276,\n        0.00941949151456356,\n        -0.02107454277575016,\n        -0.019793491810560226,\n        -0.030921051278710365,\n        -0.024503236636519432,\n        0.006279660854488611,\n        -0.03295566141605377,\n        -0.025256795808672905,\n        -0.014305067248642445,\n        0.017495134845376015,\n        -0.0072969659231603146,\n        -0.008446143940091133,\n        0.0011868559522554278,\n        0.03363386541604996,\n        0.009746033698320389,\n        0.004763122648000717,\n        -0.011573415249586105,\n        0.011228034272789955,\n        0.011246873065829277,\n        -0.014367864467203617,\n        -0.019567424431443214,\n        0.03604525327682495,\n        0.013363118283450603,\n        0.03084569424390793,\n        0.003871411085128784,\n        -0.016553185880184174,\n        -0.025068406015634537,\n        -0.004518216010183096,\n        -0.017633287236094475,\n        0.0027238030452281237,\n        0.011096160858869553,\n        0.007246728986501694,\n        -0.026173627004027367,\n        0.018123101443052292,\n        0.033985525369644165,\n        0.004656368400901556,\n        0.007014381233602762,\n        0.02300867810845375,\n        0.044032983481884,\n        0.00041406514355912805,\n        -0.015950338914990425,\n        0.008653372526168823,\n        0.016666220501065254,\n        -0.005259216297417879,\n        -0.000553002639207989,\n        0.0005902881384827197,\n        0.03185044229030609,\n        -2.5854542400338687e-05,\n        0.03943626955151558,\n        -0.023598965257406235,\n        0.007472796365618706,\n        -0.02519400045275688,\n        -0.002959290286526084,\n        0.003968745935708284,\n        -0.0035166102461516857,\n        -0.01667878031730652,\n        -0.0014144936576485634,\n        0.00603475421667099,\n        -0.00840846635401249,\n        -0.029514405876398087,\n        -0.009519966319203377,\n        0.010499592870473862,\n        0.0036798813380301,\n        0.03335756063461304,\n        0.0274546779692173,\n        -0.009388092905282974,\n        -0.0015463664894923568,\n        -0.009859067387878895,\n        -0.02010747417807579,\n        -0.038657594472169876,\n        -0.029037151485681534,\n        0.0022512583527714014,\n        -0.00729068648070097,\n        -0.012107186019420624,\n        -0.023372897878289223,\n        -0.014531135559082031,\n        -0.010254686698317528,\n        0.001805402571335435,\n        -0.006069292314350605,\n        0.023686882108449936,\n        0.026198744773864746,\n        -0.0034569534473121166,\n        0.010612627491354942,\n        -0.02091127075254917,\n        0.012364652007818222,\n        -0.005510402377694845,\n        -0.00246790680103004,\n        7.820140308467671e-05,\n        0.0090866694226861,\n        0.0022292796056717634,\n        0.009903025813400745,\n        -0.0071148560382425785,\n        0.0007563066901639104,\n        -0.00300324778072536,\n        -0.01563635654747486,\n        -0.022280236706137657,\n        -0.0016735296230763197,\n        -0.006807152647525072,\n        -0.01108988095074892,\n        -0.00680087273940444,\n        0.00976487249135971,\n        -0.005080245900899172,\n        -0.00265472661703825,\n        -0.0003416528052184731,\n        -0.008885719813406467,\n        -0.003959326073527336,\n        -0.013915728777647018,\n        -0.014794881455600262,\n        0.002414529677480459,\n        -0.006169767118990421,\n        -0.014807440340518951,\n        -0.006888788193464279,\n        0.00915574561804533,\n        0.014531135559082031,\n        -0.030016779899597168,\n        0.00854661874473095,\n        -0.0002739502233453095,\n        0.021237812936306,\n        0.008113321848213673,\n        -0.006989262532442808,\n        0.007334643974900246,\n        0.016992762684822083,\n        -0.0028666651342064142,\n        -0.016879728063941002,\n        0.0035448686685413122,\n        -0.0021131059620529413,\n        -0.010003499686717987,\n        0.0088919997215271,\n        -0.0073534827679395676,\n        -0.014920474030077457,\n        -0.01649039052426815,\n        0.03112199902534485,\n        -0.027555152773857117,\n        -0.010970567353069782,\n        0.002863525412976742,\n        -0.04272681474685669,\n        -0.018185898661613464,\n        -0.013903168961405754,\n        -0.0035385889932513237,\n        0.027931932359933853,\n        -0.020735440775752068,\n        -0.019680457189679146,\n        -0.02485489845275879,\n        -0.006210584659129381,\n        0.009896745905280113,\n        -0.012502805329859257,\n        0.009645558893680573,\n        0.02996654249727726,\n        -0.007862135767936707,\n        -0.038657594472169876,\n        -0.013149609789252281,\n        -0.16045789420604706,\n        0.014430660754442215,\n        0.011523177847266197,\n        -0.012760271318256855,\n        0.02066008374094963,\n        0.015171661041676998,\n        0.024641389027237892,\n        0.004643809515982866,\n        -0.009067830629646778,\n        0.018575238063931465,\n        0.009859067387878895,\n        -0.01288586389273405,\n        -0.014945592731237411,\n        -0.014305067248642445,\n        0.005290614441037178,\n        -0.01612616889178753,\n        -0.008182398043572903,\n        0.012352093122899532,\n        0.029740475118160248,\n        0.028685491532087326,\n        0.035115864127874374,\n        -0.007334643974900246,\n        -0.0009168304968625307,\n        -0.00477882195264101,\n        0.013714779168367386,\n        -0.007811898365616798,\n        0.0007111715967766941,\n        0.03393528610467911,\n        -0.010053737089037895,\n        -0.007020661141723394,\n        -0.016000576317310333,\n        -0.02888644114136696,\n        0.02582196518778801,\n        0.004559034015983343,\n        0.021966254338622093,\n        -0.006430373061448336,\n        0.012446288019418716,\n        -0.042601220309734344,\n        -0.01635223627090454,\n        0.024930253624916077,\n        0.032855186611413956,\n        0.018625473603606224,\n        0.019228322431445122,\n        -0.013526389375329018,\n        -0.015410288237035275,\n        -0.005224677734076977,\n        0.020609848201274872,\n        0.015083746053278446,\n        0.01621408388018608,\n        -0.0070897373370826244,\n        0.008119601756334305,\n        -0.003028366481885314,\n        -0.0005110073834657669,\n        -0.00622942391782999,\n        0.019981881603598595,\n        0.029489288106560707,\n        0.006888788193464279,\n        0.02452835626900196,\n        0.0024773261975497007,\n        -0.008458703756332397,\n        -0.0002564848982729018,\n        -0.012370931915938854,\n        -0.006254542153328657,\n        -0.004706605803221464,\n        0.006050453521311283,\n        -0.005224677734076977,\n        0.0014686556532979012,\n        0.009689517319202423,\n        -0.00024942029267549515,\n        0.018349168822169304,\n        -0.008910838514566422,\n        -0.023084033280611038,\n        -0.01039911899715662,\n        -0.02554566040635109,\n        0.006825991440564394,\n        0.008389626629650593,\n        -0.03378457576036453,\n        0.03109688125550747,\n        -0.0114603815600276,\n        -0.014832559041678905,\n        -0.008295431733131409,\n        0.021313169971108437,\n        -0.0024836058728396893,\n        0.007070898078382015,\n        -0.023121710866689682,\n        0.00038973146001808345,\n        0.0032465846743434668,\n        0.012653516605496407,\n        -0.03134806826710701,\n        -0.027002541348338127,\n        0.007453957572579384,\n        -0.017984949052333832,\n        -0.014166914857923985,\n        -0.018022626638412476,\n        0.00543504673987627,\n        0.015598678030073643,\n        0.012534203007817268,\n        0.01623920351266861,\n        -0.01571171171963215,\n        -0.019893966615200043,\n        -0.002260677982121706,\n        0.017143474891781807,\n        -0.016013136133551598,\n        0.011190355755388737,\n        0.03049403429031372,\n        -0.005469584837555885,\n        0.017532814294099808,\n        0.007981449365615845,\n        0.022405831143260002,\n        -0.03169972822070122,\n        -0.03217698261141777,\n        0.006631322205066681,\n        0.014694406650960445,\n        0.026148507371544838,\n        0.01623920351266861,\n        0.026449931785464287,\n        -0.013262644410133362,\n        -0.01734442450106144,\n        0.0048321993090212345,\n        -0.013099372386932373,\n        0.052045829594135284,\n        0.006223144009709358,\n        -0.04420881345868111,\n        -0.0010887362295761704,\n        0.0028792244847863913,\n        0.0002376459160586819,\n        -0.08409722149372101,\n        -0.03438742458820343,\n        0.004712885711342096,\n        0.014405542053282261,\n        -0.001269276486709714,\n        0.03835617005825043,\n        0.004216792527586222,\n        -0.0017881334060803056,\n        0.0019875126890838146,\n        0.005535521078854799,\n        0.008923398330807686,\n        -0.03049403429031372,\n        -0.01787191443145275,\n        -0.010568669065833092,\n        0.04071732237935066,\n        -0.02720349095761776,\n        0.012779110111296177,\n        0.0010651875054463744,\n        -0.033960405737161636,\n        0.011604813858866692,\n        -0.008678491227328777,\n        0.007868414744734764,\n        0.0028478263411670923,\n        -0.001828951295465231,\n        -0.02222999930381775,\n        0.012201380915939808,\n        -0.022971000522375107,\n        0.03127271309494972,\n        0.0016358516877517104,\n        -0.004926394205540419,\n        -0.016754135489463806,\n        -0.024063661694526672,\n        -0.014166914857923985,\n        -0.008307991549372673,\n        -0.004449139814823866,\n        0.013626864179968834,\n        -0.04099362716078758,\n        0.03102152608335018,\n        0.013413355685770512,\n        -0.04106898233294487,\n        0.027253728359937668,\n        0.010593787766993046,\n        -0.006461771205067635,\n        -0.05581362545490265,\n        -0.012659796513617039,\n        0.017168592661619186,\n        -0.0019137266790494323,\n        0.03777844086289406,\n        0.01870083063840866,\n        -0.009048991836607456,\n        -0.03222722187638283,\n        -0.03139830380678177,\n        -0.021916016936302185,\n        0.010832414962351322,\n        0.01048075407743454,\n        -0.0017535953084006906,\n        0.008056805469095707,\n        0.028911558911204338,\n        -0.009921864606440067,\n        0.011962753720581532,\n        0.013375678099691868,\n        0.006147787906229496,\n        -0.02038377895951271,\n        0.01704300008714199,\n        -0.012559321708977222,\n        0.018575238063931465,\n        -0.012697474099695683,\n        0.0232221856713295,\n        0.0201577115803957,\n        0.00969579629600048,\n        -0.01563635654747486,\n        0.028409186750650406,\n        0.006245122756808996,\n        0.010053737089037895,\n        -0.03792915120720863,\n        -0.005469584837555885,\n        -0.015837304294109344,\n        -0.020258186385035515,\n        0.006606203503906727,\n        -0.005811826325953007,\n        -0.03310637176036835,\n        -0.020735440775752068,\n        0.009218541905283928,\n        -0.008182398043572903,\n        0.026022914797067642,\n        0.01360174547880888,\n        -0.01054983027279377,\n        0.0057710083201527596,\n        -0.015347491018474102,\n        -0.03544240817427635,\n        -0.03054427169263363,\n        0.01035516057163477,\n        0.006989262532442808,\n        -0.011510618962347507,\n        0.007529313676059246,\n        0.014757202938199043,\n        -0.01180576253682375,\n        -0.0010895212180912495,\n        0.01607593148946762,\n        0.010807296261191368,\n        -0.02485489845275879,\n        -0.011824601329863071,\n        -0.0650070533156395,\n        0.032352812588214874,\n        -0.016113610938191414,\n        -0.019353915005922318,\n        0.0009600031771697104,\n        -0.022016491740942,\n        0.006687839049845934,\n        -0.01512142363935709,\n        0.002095836913213134,\n        -0.00881036464124918,\n        -0.018675711005926132,\n        -0.0048635974526405334,\n        -0.002117815660312772,\n        0.012615839019417763,\n        -0.02798216976225376,\n        0.0032685634214431047,\n        0.020182831212878227,\n        0.014192033559083939,\n        0.005880902521312237,\n        -0.002144504338502884,\n        0.00037756460369564593,\n        -0.03245328739285469,\n        -0.016502948477864265,\n        0.001036928966641426,\n        -0.023774797096848488,\n        0.011177796870470047,\n        -0.0314234234392643,\n        0.0005577123956754804,\n        -0.002411389723420143,\n        -0.00874128844588995,\n        0.0064366525039076805,\n        -0.042576100677251816,\n        0.0064021144062280655,\n        0.022682135924696922,\n        0.0008108612382784486,\n        -0.010122813284397125,\n        0.006794593296945095,\n        0.015083746053278446,\n        0.007366042118519545,\n        0.020333541557192802,\n        -0.030895931646227837,\n        -0.033181726932525635,\n        0.015611236914992332,\n        -0.010832414962351322,\n        0.0036013855133205652,\n        0.004634389653801918,\n        -0.005340851843357086,\n        -0.0033941566944122314,\n        0.030418677255511284,\n        0.009808829985558987,\n        -0.003227745648473501,\n        0.029263220727443695,\n        -0.028409186750650406,\n        -0.013036576099693775,\n        -0.012841906398534775,\n        -0.008351949043571949,\n        -0.017143474891781807,\n        -0.0003757984668482095,\n        -0.008333110250532627,\n        -0.04493725299835205,\n        0.02248118631541729,\n        0.00018220828496851027,\n        0.02966511808335781,\n        -0.007868414744734764,\n        0.005246656946837902,\n        0.010813576169312,\n        -0.0205470509827137,\n        -0.002692404668778181,\n        -0.014619050547480583,\n        -0.02234303392469883,\n        -0.006920186337083578,\n        -0.00593741936609149,\n        0.00517130084335804,\n        0.005312593188136816,\n        0.019630219787359238,\n        0.020647525787353516,\n        -0.003180648200213909,\n        0.0170932374894619,\n        -0.040616847574710846,\n        0.037728201597929,\n        0.020258186385035515,\n        0.021112220361828804,\n        -0.030318202450871468,\n        -0.0004332966054789722,\n        0.034563254565000534,\n        0.006088131107389927,\n        -0.006254542153328657,\n        -0.01731930486857891,\n        -0.009205983020365238,\n        0.00310058263130486,\n        -0.03607037290930748,\n        -0.0266759991645813,\n        0.006939025595784187,\n        0.024289729073643684,\n        -0.0038368727546185255,\n        0.00473800441250205,\n        0.017520254477858543,\n        0.00783073715865612,\n        0.017658406868577003,\n        0.023372897878289223,\n        0.0023721419274806976,\n        0.009438330307602882,\n        -0.016000576317310333,\n        -0.004552754107862711,\n        -0.008785245940089226,\n        0.005924860015511513,\n        -0.006289080251008272,\n        -0.037878915667533875,\n        -0.007328364532440901,\n        0.012898423708975315,\n        0.011812042444944382,\n        0.029790712520480156,\n        -0.006361296400427818,\n        0.015046067535877228,\n        -0.02499305084347725,\n        -0.00584008451551199,\n        0.02609827183187008,\n        -0.0014945593429729342,\n        -0.028434304520487785,\n        0.04242539033293724,\n        -0.008973635733127594,\n        0.0035794067662209272,\n        0.02770586498081684,\n        -0.015071186237037182,\n        0.008433585055172443,\n        0.002970279660075903,\n        -0.007793059106916189,\n        -0.0035228899214416742,\n        0.01905249059200287,\n        -0.0050017498433589935,\n        0.011717847548425198,\n        -0.010041178204119205,\n        -0.016452711075544357,\n        -0.015410288237035275,\n        0.0009136906592175364,\n        0.01372733898460865,\n        -0.008351949043571949,\n        0.024339966475963593,\n        -0.0161889661103487,\n        0.05631599947810173,\n        -0.012722592800855637,\n        -0.0041634151712059975,\n        0.02287052571773529,\n        0.014292508363723755,\n        0.020735440775752068,\n        0.024427881464362144,\n        0.018072864040732384,\n        -0.008559177629649639,\n        -0.02024562656879425,\n        -0.0038494321051985025,\n        0.006756915245205164,\n        -0.0008289152756333351,\n        -0.02259422093629837,\n        -0.011127559468150139,\n        0.004289008677005768,\n        -0.01591266132891178,\n        0.004398902412503958,\n        -0.019630219787359238,\n        0.01292354241013527,\n        0.018324051052331924,\n        0.008942237123847008,\n        0.0243902038782835,\n        0.013212407007813454,\n        -0.018072864040732384,\n        -0.019931644201278687,\n        0.00423877127468586,\n        0.0012331684119999409,\n        -0.018349168822169304,\n        -0.016151288524270058,\n        0.017243949696421623,\n        0.0007649412145838141,\n        -0.0012857605470344424,\n        -0.01260955911129713,\n        0.006656440440565348,\n        0.002959290286526084,\n        -0.007108576130121946,\n        -0.009513686411082745,\n        -0.0036422032862901688,\n        0.010562390089035034,\n        0.02289564348757267,\n        0.0006240413058549166,\n        -0.018097983673214912,\n        -0.03295566141605377,\n        0.006813432089984417,\n        0.002750491490587592,\n        -0.02936369553208351,\n        -0.005384809337556362,\n        -0.02317194826900959\n      ]\n    },\n    {\n      \"object\": \"embedding\",\n      \"index\": 1,\n      \"embedding\": [\n        0.01023657713085413,\n        -0.006719755474478006,\n        0.0031741075217723846,\n        -0.007405183278024197,\n        -0.010435158386826515,\n        0.005877383518964052,\n        -0.011242298409342766,\n        0.0028522126376628876,\n        0.00522398529574275,\n        -0.016347775235772133,\n        0.03156811743974686,\n        0.029928214848041534,\n        0.005617945920675993,\n        0.005086258985102177,\n        0.02043471857905388,\n        0.0074948654510080814,\n        0.02780146524310112,\n        -0.012901417911052704,\n        0.013311393558979034,\n        0.0038499266374856234,\n        -0.0013556417543441057,\n        0.02084469422698021,\n        0.011761173605918884,\n        -0.014605378732085228,\n        -0.002282090485095978,\n        -0.002938691759482026,\n        0.004013276193290949,\n        -0.02040909416973591,\n        0.01891012117266655,\n        -0.025367235764861107,\n        0.01896136812865734,\n        -0.010870757512748241,\n        -0.010473594069480896,\n        -0.005979877430945635,\n        -0.004096552263945341,\n        -0.022228362038731575,\n        -0.011773984879255295,\n        -0.0177955012768507,\n        0.02324048802256584,\n        -0.006566014606505632,\n        0.006367432419210672,\n        0.0005869376473128796,\n        -0.0010113263269886374,\n        0.0065083615481853485,\n        -0.038076478987932205,\n        0.01811579428613186,\n        0.023471100255846977,\n        -0.009845818392932415,\n        -0.016091540455818176,\n        0.02248459681868553,\n        0.02122904546558857,\n        0.0023685696069151163,\n        -0.017590513452887535,\n        -0.013811050914227962,\n        -0.030312566086649895,\n        0.014323520474135876,\n        -0.03871706500649452,\n        0.029825720936059952,\n        0.02003755420446396,\n        -0.0033887040335685015,\n        0.019935060292482376,\n        -0.012920635752379894,\n        -0.019140733405947685,\n        0.019896624609827995,\n        -0.012786111794412136,\n        0.002304510911926627,\n        0.005758875049650669,\n        -6.095583376009017e-05,\n        -0.03625721111893654,\n        0.0196532029658556,\n        0.013977603055536747,\n        0.011741955764591694,\n        0.006931148935109377,\n        -0.009788165800273418,\n        0.012030219659209251,\n        0.003657750552520156,\n        -0.016104351729154587,\n        0.0059318337589502335,\n        0.006143227219581604,\n        -0.0033887040335685015,\n        0.021126551553606987,\n        -0.046916574239730835,\n        -0.0228177011013031,\n        0.01429789699614048,\n        0.03195246681571007,\n        -0.004461687058210373,\n        -0.0117803905159235,\n        0.007040048483759165,\n        -0.01507941260933876,\n        -0.011613838374614716,\n        0.014835990034043789,\n        0.017910806462168694,\n        0.013759803958237171,\n        0.021100929006934166,\n        -0.01820547692477703,\n        0.003545647719874978,\n        -0.00044400669867172837,\n        0.032977405935525894,\n        -0.003929999656975269,\n        -0.031439997255802155,\n        -0.005480220075696707,\n        -0.02357359416782856,\n        0.00333745707757771,\n        -0.006777408067137003,\n        0.000521677837241441,\n        -0.0029210757929831743,\n        0.00032910145819187164,\n        -0.003926796838641167,\n        0.026443421840667725,\n        -0.010403129272162914,\n        -0.03277241811156273,\n        0.02312518283724785,\n        0.010614522732794285,\n        -0.027596479281783104,\n        -0.01773144118487835,\n        -0.009339755401015282,\n        0.014067285694181919,\n        0.004765965510159731,\n        -0.013823863118886948,\n        -0.027724595740437508,\n        0.0022884963545948267,\n        0.020972810685634613,\n        0.020575648173689842,\n        -0.03743589296936989,\n        0.016129974275827408,\n        -0.004756357055157423,\n        -0.01506660133600235,\n        -0.028365181758999825,\n        -0.010576087981462479,\n        -0.0010553667088970542,\n        0.018295157700777054,\n        0.0022580684162676334,\n        0.024034816771745682,\n        -0.009512714110314846,\n        -0.012882200069725513,\n        0.015604693442583084,\n        0.004615427926182747,\n        0.01621965691447258,\n        -0.004429657477885485,\n        -0.00846855714917183,\n        0.014054473489522934,\n        0.03046630695462227,\n        0.009807383641600609,\n        -0.018666699528694153,\n        -0.005236797034740448,\n        0.013964791782200336,\n        0.002373374067246914,\n        -0.0050446209497749805,\n        0.007213007193058729,\n        -0.0037378238048404455,\n        0.01932009682059288,\n        0.0057748896069824696,\n        0.01694992557168007,\n        -0.014361955225467682,\n        0.0234967228025198,\n        0.019806943833827972,\n        -0.005627554841339588,\n        0.011914914473891258,\n        -0.004134987480938435,\n        -0.01775706559419632,\n        0.0029723227489739656,\n        -0.008686356246471405,\n        0.005156723782420158,\n        -0.018038922920823097,\n        0.022868948057293892,\n        0.014938483946025372,\n        0.01589936390519142,\n        -0.0007162560941651464,\n        -0.009980342350900173,\n        -0.030389437451958656,\n        -0.008948997594416142,\n        0.015207529999315739,\n        -0.03758963197469711,\n        0.020575648173689842,\n        -0.022177115082740784,\n        0.035334765911102295,\n        0.005444987677037716,\n        0.012677212245762348,\n        -0.004330366384238005,\n        -0.022279608994722366,\n        -0.04430298134684563,\n        0.01099887490272522,\n        0.004112567286938429,\n        0.03018444962799549,\n        0.008673544973134995,\n        -0.0011778789339587092,\n        0.014618190005421638,\n        -0.005390537902712822,\n        0.01775706559419632,\n        -0.009474278427660465,\n        0.006604449823498726,\n        0.02739149145781994,\n        0.008058581501245499,\n        0.004231075756251812,\n        -0.6772795915603638,\n        -0.003121259156614542,\n        0.025303177535533905,\n        0.0057332515716552734,\n        0.010127676650881767,\n        0.0010986062698066235,\n        0.02974884957075119,\n        0.021049682050943375,\n        0.01465662568807602,\n        0.015105036087334156,\n        -0.022894570603966713,\n        0.01137682143598795,\n        -0.030261319130659103,\n        -0.01026860624551773,\n        -0.005838948301970959,\n        -0.006393055897206068,\n        -0.004516136832535267,\n        -0.022676771506667137,\n        0.0039940583519637585,\n        0.006681320257484913,\n        -0.006175256334245205,\n        0.037333399057388306,\n        0.002186002442613244,\n        -0.0066300733014941216,\n        0.01861545257270336,\n        -0.004410440102219582,\n        -0.002250061137601733,\n        -0.02284332364797592,\n        -0.00948709063231945,\n        0.0037602444645017385,\n        -0.001710366690531373,\n        0.009800978004932404,\n        -0.0129590705037117,\n        0.006847872864454985,\n        0.07159198075532913,\n        -0.0021379583049565554,\n        -0.00130119186360389,\n        -0.0110373105853796,\n        -0.0034111246932297945,\n        0.028032077476382256,\n        -0.013631686568260193,\n        -0.019140733405947685,\n        0.012433789670467377,\n        -0.0001967802527360618,\n        -0.01329858135432005,\n        -0.007482053712010384,\n        0.011953349225223064,\n        -0.003997261635959148,\n        -0.010377505794167519,\n        -0.018846062943339348,\n        0.004202249459922314,\n        -0.0018769192975014448,\n        0.009980342350900173,\n        0.02435510978102684,\n        -0.014592566527426243,\n        0.018231099471449852,\n        0.03131188079714775,\n        -0.0053296820260584354,\n        -0.0018192664720118046,\n        -0.002783349482342601,\n        0.009397407993674278,\n        0.009301319718360901,\n        -0.000698239600751549,\n        0.014361955225467682,\n        -0.014925671741366386,\n        0.017500830814242363,\n        -0.01782112382352352,\n        0.032285574823617935,\n        0.014631002210080624,\n        -0.014823177829384804,\n        -0.006114400923252106,\n        0.001224321429617703,\n        -0.008840097114443779,\n        -0.004362395964562893,\n        0.014413202181458473,\n        0.028775157406926155,\n        0.00983941275626421,\n        -0.01080669928342104,\n        -0.009262884967029095,\n        0.00967926625162363,\n        0.015655940398573875,\n        0.0043399753049016,\n        0.004109364002943039,\n        -0.0020130439661443233,\n        0.03015882521867752,\n        -0.023868262767791748,\n        -0.023868262767791748,\n        -0.009358973242342472,\n        -0.006130415480583906,\n        0.006937554571777582,\n        0.002025855705142021,\n        0.011870073154568672,\n        -0.010403129272162914,\n        -0.00846855714917183,\n        0.023471100255846977,\n        0.003901173360645771,\n        -0.023855451494455338,\n        -0.004756357055157423,\n        0.025636283680796623,\n        -0.005633960478007793,\n        0.008474962785840034,\n        -0.004141393583267927,\n        0.011005280539393425,\n        -0.012555500492453575,\n        -0.013798239640891552,\n        0.006847872864454985,\n        -0.022971441969275475,\n        0.02202337421476841,\n        0.029646355658769608,\n        -0.031132517382502556,\n        0.005563496146351099,\n        0.0035136183723807335,\n        -0.014272273518145084,\n        0.005041418131440878,\n        -0.0041157701052725315,\n        -0.022881759330630302,\n        0.0003765449218917638,\n        0.0010201344266533852,\n        0.006389853078871965,\n        -0.01926884986460209,\n        0.01627090387046337,\n        -0.0010417542653158307,\n        0.008398092351853848,\n        0.0011298349127173424,\n        0.005438581574708223,\n        -0.003050794592127204,\n        0.014605378732085228,\n        0.010678581893444061,\n        -0.042304351925849915,\n        0.005249608773738146,\n        -0.003196527948603034,\n        0.004705110099166632,\n        0.002853814046829939,\n        -0.010242982767522335,\n        -0.01078748144209385,\n        -0.0075909532606601715,\n        0.01176757924258709,\n        -0.014182590879499912,\n        -0.0024118092842400074,\n        0.021510904654860497,\n        0.005288043990731239,\n        0.001864107558503747,\n        -0.017564889043569565,\n        -0.011741955764591694,\n        0.025200683623552322,\n        -0.01003799494355917,\n        0.002890647854655981,\n        0.003169303061440587,\n        -0.007981711067259312,\n        0.004801197908818722,\n        0.028057700023055077,\n        0.004791588988155127,\n        8.422715472988784e-05,\n        0.016091540455818176,\n        -0.00481400964781642,\n        -0.0185385812073946,\n        -0.00987784843891859,\n        -0.019409779459238052,\n        -0.04791589081287384,\n        -0.007853593677282333,\n        0.02980009652674198,\n        0.01744958385825157,\n        -0.0005477016675285995,\n        -0.005957457236945629,\n        -0.011953349225223064,\n        -0.0197172611951828,\n        -0.007450024131685495,\n        0.020216919481754303,\n        -0.007385965436697006,\n        -0.024239802733063698,\n        -4.3239608203293756e-05,\n        -0.018333593383431435,\n        -0.011581809259951115,\n        0.00608877744525671,\n        -0.012440195307135582,\n        0.015655940398573875,\n        -0.0077446941286325455,\n        -0.020614081993699074,\n        -0.021395597606897354,\n        -0.015271589159965515,\n        -0.0005056631634943187,\n        -0.011223080568015575,\n        -0.018769193440675735,\n        0.011613838374614716,\n        0.022984253242611885,\n        -0.005845354404300451,\n        0.002275684615597129,\n        0.03618033975362778,\n        -0.02429104968905449,\n        0.004833227023482323,\n        0.0247394610196352,\n        0.03738464415073395,\n        -0.0323624424636364,\n        -0.01097965706139803,\n        0.008744009770452976,\n        -0.018602639436721802,\n        0.004000464454293251,\n        0.005938239395618439,\n        0.02201056107878685,\n        0.011114181019365788,\n        0.005358508322387934,\n        -0.013048752211034298,\n        0.01488723699003458,\n        0.005803716368973255,\n        0.004477701615542173,\n        -0.01932009682059288,\n        0.0016703300643712282,\n        -0.030363813042640686,\n        -0.002752921776846051,\n        0.017705818638205528,\n        -0.0026488262228667736,\n        0.015361270867288113,\n        -0.0025447309017181396,\n        0.027647726237773895,\n        0.008378875441849232,\n        0.04028009623289108,\n        0.007136136759072542,\n        -0.01488723699003458,\n        -0.01627090387046337,\n        -0.013144840486347675,\n        -0.0019249633187428117,\n        0.016450269147753716,\n        -0.012183960527181625,\n        0.008564645424485207,\n        -0.006649290677160025,\n        0.028032077476382256,\n        0.004836430307477713,\n        0.03659031540155411,\n        0.01620684564113617,\n        -0.03505290672183037,\n        -0.0251750610768795,\n        -0.01411853265017271,\n        0.012280048802495003,\n        -0.0006562010967172682,\n        -0.0055795107036828995,\n        -0.0033822981640696526,\n        0.01121667493134737,\n        -0.008628703653812408,\n        0.04125379025936127,\n        0.0010465586092323065,\n        -0.018423276022076607,\n        0.007616576738655567,\n        0.037640880793333054,\n        0.004807603545486927,\n        -0.016885867342352867,\n        0.02052440121769905,\n        0.005595525726675987,\n        0.007027236744761467,\n        -0.003356674686074257,\n        0.022689582780003548,\n        -0.025034131482243538,\n        0.009557554498314857,\n        -0.016155598685145378,\n        0.00549943745136261,\n        0.006598043721169233,\n        -0.013311393558979034,\n        0.005717237014323473,\n        -0.014938483946025372,\n        0.020280977711081505,\n        0.0401776023209095,\n        -0.000625372864305973,\n        -0.008955403231084347,\n        0.0024742663372308016,\n        0.005147114861756563,\n        -0.014092909172177315,\n        -0.011024498380720615,\n        0.012267236597836018,\n        -0.016091540455818176,\n        0.008833691477775574,\n        0.009435843676328659,\n        -0.019601956009864807,\n        -0.0034463568590581417,\n        0.014682249166071415,\n        -0.009756136685609818,\n        0.026520293205976486,\n        0.010768263600766659,\n        0.024893201887607574,\n        -0.007014425005763769,\n        -0.01701398566365242,\n        -0.0014581356663256884,\n        0.0028249877505004406,\n        -0.015553447417914867,\n        -0.006124009378254414,\n        0.02238210290670395,\n        -0.000762698648031801,\n        -0.03464293107390404,\n        -0.01215833704918623,\n        -0.001716772560030222,\n        -0.008692762814462185,\n        0.008077799342572689,\n        -0.01653994992375374,\n        0.021754326298832893,\n        -0.003926796838641167,\n        0.0035264301113784313,\n        -0.0023925916757434607,\n        -0.014618190005421638,\n        0.01852576993405819,\n        -0.01816704124212265,\n        -0.0038787529338151217,\n        0.021895255893468857,\n        0.006037530489265919,\n        0.00013292176299728453,\n        0.0070656719617545605,\n        -0.016411833465099335,\n        -0.00541936419904232,\n        -0.0051439120434224606,\n        0.0014685451751574874,\n        -0.011351197957992554,\n        0.003955623134970665,\n        -0.02509818971157074,\n        -0.01705241948366165,\n        -0.023881075903773308,\n        -0.010697798803448677,\n        -0.014912860468029976,\n        0.008526209741830826,\n        -0.0033534718677401543,\n        -0.013618875294923782,\n        0.018448898568749428,\n        0.03259305655956268,\n        0.012446600943803787,\n        -0.003862738376483321,\n        -0.0021667848341166973,\n        -0.009916283190250397,\n        -0.027647726237773895,\n        0.09865036606788635,\n        0.006809437647461891,\n        -0.007751100230962038,\n        0.014682249166071415,\n        -0.026827774941921234,\n        -0.004753153771162033,\n        -0.03133750334382057,\n        -0.031004400923848152,\n        -0.01702679693698883,\n        0.006242518313229084,\n        0.0016134779434651136,\n        0.021036868914961815,\n        -0.01583530567586422,\n        0.011927725747227669,\n        0.009724107570946217,\n        0.011709926649928093,\n        0.01588655263185501,\n        -0.007321906741708517,\n        -0.006399461999535561,\n        -0.003433545120060444,\n        -0.019102297723293304,\n        0.016475891694426537,\n        -0.017603324726223946,\n        0.013849485665559769,\n        -0.004676283337175846,\n        0.0009240464423783123,\n        -0.010825916193425655,\n        0.007001613266766071,\n        -0.008462151512503624,\n        -0.007687041535973549,\n        0.002632811665534973,\n        0.005361711140722036,\n        0.006835061125457287,\n        0.010678581893444061,\n        -0.01044156402349472,\n        0.0174239594489336,\n        0.011985378339886665,\n        0.02315080538392067,\n        0.011549779213964939,\n        -0.016680879518389702,\n        0.011223080568015575,\n        0.002693667309358716,\n        -0.004775574430823326,\n        -0.02509818971157074,\n        -0.009595990180969238,\n        -0.005316870287060738,\n        -0.02504694275557995,\n        0.03051755391061306,\n        -0.013003911823034286,\n        -0.029441367834806442,\n        0.025713153183460236,\n        -0.0028185818810015917,\n        0.010691393166780472,\n        -0.02595657669007778,\n        0.0003090831160079688,\n        0.01659119687974453,\n        -0.011639461852610111,\n        -0.035001661628484726,\n        -0.010902786627411842,\n        -0.0024406355805695057,\n        -0.010825916193425655,\n        -0.029979461804032326,\n        -0.01656557433307171,\n        -0.009134767577052116,\n        -0.013798239640891552,\n        -0.034591685980558395,\n        -0.03797398507595062,\n        -0.012305672280490398,\n        -0.0393064059317112,\n        -0.013913544826209545,\n        0.022561466321349144,\n        -0.043534278869628906,\n        -0.020357847213745117,\n        -0.0037602444645017385,\n        0.009653642773628235,\n        0.012408166192471981,\n        0.007373153697699308,\n        -0.025034131482243538,\n        0.007161760237067938,\n        0.003913985099643469,\n        -0.0014597370754927397,\n        -0.023483911529183388,\n        0.002860219916328788,\n        -0.0022228360176086426,\n        0.0009752933401614428,\n        0.0060503422282636166,\n        0.013798239640891552,\n        0.004894082900136709,\n        -0.034566063433885574,\n        0.02857016958296299,\n        0.020024742931127548,\n        0.009346161037683487,\n        0.019140733405947685,\n        -0.0030187652446329594,\n        0.014246650040149689,\n        -0.015374083071947098,\n        -0.003545647719874978,\n        0.016142787411808968,\n        -0.015156283043324947,\n        0.00502540310844779,\n        0.007988116703927517,\n        -0.005896601360291243,\n        -0.00850058626383543,\n        0.006473129615187645,\n        0.005573105067014694,\n        0.013951979577541351,\n        0.02664840966463089,\n        0.022305231541395187,\n        -0.0019217603839933872,\n        -0.011120586656033993,\n        0.020947188138961792,\n        -0.02203618548810482,\n        -0.009403813630342484,\n        -0.008263569325208664,\n        0.005002982914447784,\n        0.02397075667977333,\n        0.030825035646557808,\n        0.03218308091163635,\n        0.008071393705904484,\n        -0.012645183131098747,\n        -0.0004888477851636708,\n        -0.023445475846529007,\n        -0.001793642994016409,\n        0.009320537559688091,\n        -0.002629608614370227,\n        0.0033534718677401543,\n        -0.009749731048941612,\n        -0.021446844562888145,\n        -0.004551369231194258,\n        -0.0021443641744554043,\n        -0.014336331747472286,\n        0.018436087295413017,\n        -0.0060823713429272175,\n        -0.00885931495577097,\n        -0.026955891400575638,\n        -0.00512469420209527,\n        -0.014797554351389408,\n        0.01046078186482191,\n        -0.005541075486689806,\n        -0.016527138650417328,\n        -0.02511100098490715,\n        -0.0049997796304523945,\n        -0.012023814022541046,\n        0.017231784760951996,\n        -0.03443794697523117,\n        -0.03218308091163635,\n        -0.010684987530112267,\n        0.01390073262155056,\n        -0.003420733381062746,\n        0.01892293430864811,\n        -0.009000244550406933,\n        -0.0045737894251942635,\n        -0.017577700316905975,\n        0.010729828849434853,\n        -0.004916503559798002,\n        -0.024790707975625992,\n        -0.020767822861671448,\n        -0.019115108996629715,\n        0.017193349078297615,\n        0.027160879224538803,\n        0.029236380010843277,\n        0.007879217155277729,\n        0.022599902004003525,\n        0.018026111647486687,\n        -0.005970268975943327,\n        -0.0005601130542345345,\n        -0.005742860492318869,\n        -0.009813789278268814,\n        -0.018666699528694153,\n        0.012837358750402927,\n        0.008180293254554272,\n        0.010922004468739033,\n        0.007981711067259312,\n        0.0018689119024202228,\n        0.004576992709189653,\n        0.005787701345980167,\n        -0.006642885040491819,\n        -0.013670122250914574,\n        -0.00983941275626421,\n        -0.01294625923037529,\n        -0.013285770080983639,\n        -0.0135163813829422,\n        -0.0028377994894981384,\n        0.00687349634245038,\n        0.001427707727998495,\n        -0.014387578703463078,\n        0.021523715928196907,\n        0.0060599506832659245,\n        -0.009615207090973854,\n        -0.003865941194817424,\n        0.01852576993405819,\n        -0.020652517676353455,\n        0.022497408092021942,\n        -0.009423031471669674,\n        0.013644498772919178,\n        -0.020934375002980232,\n        0.007200195454061031,\n        0.013580439612269402,\n        0.011658679693937302,\n        0.005355305504053831,\n        -0.010114865377545357,\n        0.024508850648999214,\n        -0.01703960821032524,\n        -0.008410904556512833,\n        0.005451393313705921,\n        0.01741114817559719,\n        -0.0021139364689588547,\n        -0.014195403084158897,\n        0.008955403231084347,\n        0.00019858189625665545,\n        0.012395353987812996,\n        -0.02900576964020729,\n        -0.023663274943828583,\n        -0.0270327627658844,\n        -0.007962493225932121,\n        -0.009205232374370098,\n        -0.01616840995848179,\n        0.013606063090264797,\n        -0.013926356099545956,\n        -0.03935765102505684,\n        -0.018666699528694153,\n        -0.005313667468726635,\n        0.025777211412787437,\n        0.0016703300643712282,\n        0.015156283043324947,\n        0.0024758679792284966,\n        -0.02739149145781994,\n        -0.0009432640508748591,\n        0.01311921700835228,\n        -0.013670122250914574,\n        -0.00221963319927454,\n        0.039229534566402435,\n        -0.002186002442613244,\n        -0.004865256603807211,\n        -0.002150770043954253,\n        -0.0204603411257267,\n        0.021113740280270576,\n        -0.010736234486103058,\n        -0.03448919206857681,\n        0.011402444913983345,\n        0.007072078064084053,\n        0.009768947958946228,\n        -0.009032273665070534,\n        -0.0029226772021502256,\n        -0.013542004860937595,\n        0.03413046523928642,\n        -0.006847872864454985,\n        -0.027519607916474342,\n        -0.021895255893468857,\n        0.0026216013357043266,\n        -0.008052175864577293,\n        0.025572223588824272,\n        -0.035795990377664566,\n        0.036487821489572525,\n        0.03525789454579353,\n        -0.022715207189321518,\n        0.003015562193468213,\n        0.001989021897315979,\n        0.014579755254089832,\n        -0.004801197908818722,\n        -0.011056527495384216,\n        0.03623158857226372,\n        0.0025671515613794327,\n        -0.021100929006934166,\n        0.004980562254786491,\n        -0.018410464748740196,\n        -0.001675134408287704,\n        0.017859559506177902,\n        0.00018567006918601692,\n        0.018282346427440643,\n        -0.007155354134738445,\n        -0.0038371148984879255,\n        -0.01964038982987404,\n        0.00597347179427743,\n        0.01121026836335659,\n        -0.010889975354075432,\n        -0.0077126650139689445,\n        -0.02195931412279606,\n        -0.0038339118473231792,\n        -0.010185330174863338,\n        0.021818386390805244,\n        0.0065307822078466415,\n        0.0014765525702387094,\n        -0.009576772339642048,\n        -0.016040293499827385,\n        -0.025738777592778206,\n        -0.015591882169246674,\n        -0.0012203177902847528,\n        0.015450953505933285,\n        -0.01466943696141243,\n        -0.011434474028646946,\n        -0.0034047188237309456,\n        -0.002813777420669794,\n        0.031516868621110916,\n        0.004307946190237999,\n        -0.00024062041484285146,\n        -0.0018000488635152578,\n        0.016399022191762924,\n        -0.02008880116045475,\n        0.0038211001083254814,\n        0.02317642979323864,\n        0.027135256677865982,\n        -0.020947188138961792,\n        0.0015998654998838902,\n        -0.00026924663688987494,\n        -0.016399022191762924,\n        0.007289877627044916,\n        -0.009128361940383911,\n        0.004077334888279438,\n        0.005662787239998579,\n        0.00022040188196115196,\n        -0.005749266128987074,\n        0.006681320257484913,\n        -0.004484107252210379,\n        -0.005912615917623043,\n        0.0028281905688345432,\n        0.010076429694890976,\n        -0.024662591516971588,\n        -0.006681320257484913,\n        0.030235696583986282,\n        -0.011050121858716011,\n        0.008827285841107368,\n        -0.0037025916390120983,\n        -0.0015502199530601501,\n        -0.012965476140379906,\n        0.00152139354031533,\n        -0.026955891400575638,\n        0.00638024415820837,\n        -0.027980830520391464,\n        0.007610171101987362,\n        0.008692762814462185,\n        0.014541319571435452,\n        0.009685671888291836,\n        -0.036077845841646194,\n        -0.0355910025537014,\n        0.01060811709612608,\n        0.005262420512735844,\n        0.01213911920785904,\n        0.017705818638205528,\n        0.001870513428002596,\n        -0.002735305577516556,\n        0.02274082973599434,\n        0.007981711067259312,\n        0.0035200242418795824,\n        0.01664244383573532,\n        0.0029915403574705124,\n        0.011223080568015575,\n        -0.009743324480950832,\n        -0.026084693148732185,\n        -0.01694992557168007,\n        0.007552518043667078,\n        0.03935765102505684,\n        -0.00034671759931370616,\n        -0.0142594613134861,\n        -0.006598043721169233,\n        0.010793887078762054,\n        -0.014400390908122063,\n        0.005746063310652971,\n        -0.014041662216186523,\n        0.014169779606163502,\n        0.007213007193058729,\n        0.009147578850388527,\n        -0.002192408312112093,\n        0.02549535408616066,\n        0.015463764779269695,\n        0.007597359362989664,\n        -0.004432860296219587,\n        -0.01140885055065155,\n        0.005822933744639158,\n        0.012324889190495014,\n        0.0270327627658844,\n        -0.0009344559512101114,\n        -0.03131188079714775,\n        0.0033694864250719547,\n        -0.0012227200204506516,\n        0.011114181019365788,\n        -0.005787701345980167,\n        0.004224669653922319,\n        0.009890659712255001,\n        -0.01046078186482191,\n        -0.0008039363892748952,\n        -0.011447285301983356,\n        0.001793642994016409,\n        0.01627090387046337,\n        -0.0035200242418795824,\n        -0.005569902248680592,\n        0.010166112333536148,\n        0.009704889729619026,\n        -0.0030636063311249018,\n        0.016514327377080917,\n        -0.003913985099643469,\n        0.021741515025496483,\n        -0.02705838531255722,\n        0.004576992709189653,\n        -0.014797554351389408,\n        -0.0013019925681874156,\n        0.004862053785473108,\n        -0.026046257466077805,\n        0.00040076710865832865,\n        -0.006790219806134701,\n        0.0010249388869851828,\n        0.031875599175691605,\n        -0.007347530219703913,\n        -0.0044648898765444756,\n        0.01933290809392929,\n        -0.016770562157034874,\n        -0.008186698891222477,\n        0.0021379583049565554,\n        -0.005749266128987074,\n        -0.021036868914961815,\n        0.0017135696252807975,\n        -0.007847188040614128,\n        -0.01194694358855486,\n        -0.02903139218688011,\n        0.0028169802390038967,\n        -0.024137310683727264,\n        -0.012818141840398312,\n        -0.02092156372964382,\n        0.01975569687783718,\n        0.026161564514040947,\n        0.0018448899500072002,\n        -0.0010946026304736733,\n        -0.023727335035800934,\n        0.016104351729154587,\n        0.013452322222292423,\n        -0.019960684701800346,\n        0.007398777175694704,\n        -0.02933887392282486,\n        -0.016001857817173004,\n        0.013388263992965221,\n        -0.01579686999320984,\n        -0.007635794579982758,\n        0.00889775063842535,\n        0.008731197565793991,\n        0.017590513452887535,\n        0.038076478987932205,\n        0.22610150277614594,\n        -0.024124497547745705,\n        0.0023061123210936785,\n        0.022279608994722366,\n        0.013465134426951408,\n        0.02438073232769966,\n        -0.009045084938406944,\n        -0.0006485941121354699,\n        0.0080906106159091,\n        0.008039363659918308,\n        -0.025790024548768997,\n        0.01277970615774393,\n        -0.001265959581360221,\n        -0.000538092921487987,\n        0.002045073313638568,\n        -0.017859559506177902,\n        -0.04202249273657799,\n        -0.01431070826947689,\n        -0.02090875245630741,\n        -0.026571540161967278,\n        0.0007498869090341032,\n        -0.014861613512039185,\n        -0.011652273125946522,\n        -0.006566014606505632,\n        0.02472664974629879,\n        -0.007501271087676287,\n        -0.008103422820568085,\n        0.00027305010007694364,\n        0.021024057641625404,\n        0.007373153697699308,\n        0.0004956540069542825,\n        -0.019140733405947685,\n        0.0003104844072367996,\n        0.013093593530356884,\n        0.004218264017254114,\n        -0.020255353301763535,\n        0.014631002210080624,\n        -0.004054914228618145,\n        0.019384155049920082,\n        0.02545691840350628,\n        -0.01894855685532093,\n        -0.0017231784295290709,\n        -0.006325794383883476,\n        -0.014733496122062206,\n        0.01698836125433445,\n        0.010345476679503918,\n        -0.0015934596303850412,\n        -0.01577124558389187,\n        -0.006975989788770676,\n        0.02739149145781994,\n        -0.01701398566365242,\n        0.00041117664659395814,\n        0.017193349078297615,\n        0.04294493794441223,\n        0.0035264301113784313,\n        0.013132029213011265,\n        -0.010095647536218166,\n        0.002938691759482026,\n        -0.014682249166071415,\n        -0.0053713200613856316,\n        -0.014605378732085228,\n        0.02775021828711033,\n        0.0006537988665513694,\n        0.010492810979485512,\n        -0.022535841912031174,\n        0.023368606343865395,\n        -0.033361759036779404,\n        -0.005384131800383329,\n        0.03331051394343376,\n        0.0019073471194133162,\n        -0.004026087932288647,\n        -0.004006870090961456,\n        -0.012312077917158604,\n        0.018320782110095024,\n        -0.034566063433885574,\n        -0.005374522879719734,\n        -0.00016395017155446112,\n        0.00687349634245038,\n        0.03292616084218025,\n        0.012862982228398323,\n        -0.013721369206905365,\n        0.010415940545499325,\n        0.014195403084158897,\n        -0.011953349225223064,\n        -0.0247394610196352,\n        -0.03605222329497337,\n        0.022177115082740784,\n        -0.005835745483636856,\n        -0.007040048483759165,\n        -0.006034327670931816,\n        0.005159926600754261,\n        -0.031183764338493347,\n        -0.017090855166316032,\n        0.006377041339874268,\n        0.020345035940408707,\n        0.040305718779563904,\n        0.004743545316159725,\n        0.008993837982416153,\n        -0.014541319571435452,\n        -0.005217579193413258,\n        0.005903006996959448,\n        -0.03448919206857681,\n        0.0002622402098495513,\n        -0.0002678453456610441,\n        -0.007219412829726934,\n        -0.0060503422282636166,\n        -0.00964083056896925,\n        0.01178679708391428,\n        -0.005573105067014694,\n        -0.031158139929175377,\n        -0.010781075805425644,\n        -0.014067285694181919,\n        -0.003763447282835841,\n        -0.007456430234014988,\n        -0.002764131873846054,\n        0.011882884427905083,\n        -0.0002940693811979145,\n        0.008705574087798595,\n        0.005621149204671383,\n        -0.016770562157034874,\n        0.020947188138961792,\n        -0.013401075266301632,\n        -0.0018721148371696472,\n        -0.0043463814072310925,\n        -0.011223080568015575,\n        -0.011107774451375008,\n        -0.007943276315927505,\n        0.004211857914924622,\n        0.01619403436779976,\n        -0.032285574823617935,\n        0.001259553711861372,\n        -0.008237945847213268,\n        0.027980830520391464,\n        0.000949669920373708,\n        -0.016706503927707672,\n        0.002653630683198571,\n        0.012638777494430542,\n        -0.01657838560640812,\n        -0.007257848046720028,\n        0.018730757758021355,\n        -0.002205220051109791,\n        -0.0010481601348146796,\n        0.009314131923019886,\n        -0.0022596698254346848,\n        -0.015989046543836594,\n        0.004990171175450087,\n        0.023048311471939087,\n        -0.010294229723513126,\n        -0.01657838560640812,\n        -0.012081466615200043,\n        -0.03520664945244789,\n        -0.022663960233330727,\n        -0.03374611213803291,\n        0.0006313783233053982,\n        0.04133065789937973,\n        -0.014566943049430847,\n        -0.017577700316905975,\n        -0.029569486156105995,\n        -0.011242298409342766,\n        0.016527138650417328,\n        -0.031106894835829735,\n        0.00647953525185585,\n        0.019486648961901665,\n        -0.015040977858006954,\n        -0.03899892419576645,\n        -0.011107774451375008,\n        -0.16378523409366608,\n        0.02549535408616066,\n        0.00891056191176176,\n        -0.005118288565427065,\n        0.03548850864171982,\n        0.013798239640891552,\n        0.012440195307135582,\n        0.0008255562279373407,\n        -0.010121271014213562,\n        0.001718374085612595,\n        0.031414374709129333,\n        0.0060055009089410305,\n        -0.008609486743807793,\n        -0.0006441900623030961,\n        0.017116479575634003,\n        -0.018064547330141068,\n        -0.009019461460411549,\n        0.010281417518854141,\n        0.02862141653895378,\n        0.023778581991791725,\n        0.0017423960380256176,\n        -0.002471063518896699,\n        0.006655696779489517,\n        -0.002250061137601733,\n        0.01470787264406681,\n        -0.006040733307600021,\n        0.006373838521540165,\n        0.041894376277923584,\n        -0.016437456011772156,\n        0.007635794579982758,\n        -0.03400234505534172,\n        -0.03679530322551727,\n        0.03172185644507408,\n        0.017680194228887558,\n        0.019973495975136757,\n        0.005326479207724333,\n        -0.010531246662139893,\n        -0.02708400972187519,\n        -0.014323520474135876,\n        0.002940293401479721,\n        0.028390806168317795,\n        0.03287491202354431,\n        0.0009328544838353992,\n        0.02003755420446396,\n        -0.015348459593951702,\n        -0.0016350977821275592,\n        0.003641735762357712,\n        0.012440195307135582,\n        0.005115085281431675,\n        -0.007366748061031103,\n        0.016283715143799782,\n        -0.004560977686196566,\n        -0.015912175178527832,\n        -0.0129590705037117,\n        0.002512701554223895,\n        0.020614081993699074,\n        0.01374699268490076,\n        0.016821809113025665,\n        -0.005512249190360308,\n        0.003782664891332388,\n        -0.02204899676144123,\n        -0.015732811763882637,\n        -0.0023989975452423096,\n        0.00907070841640234,\n        0.009391002357006073,\n        -0.0027048776391893625,\n        0.007347530219703913,\n        0.017257407307624817,\n        -0.0012227200204506516,\n        0.013087187893688679,\n        0.009211638011038303,\n        -0.019922249019145966,\n        0.02316361851990223,\n        -0.0023861858062446117,\n        0.00347518315538764,\n        0.010121271014213562,\n        -0.018692322075366974,\n        0.01848733425140381,\n        0.00313407089561224,\n        -0.023676088079810143,\n        -0.020998435094952583,\n        0.026776527985930443,\n        -0.01964038982987404,\n        0.005364914424717426,\n        -0.00030127595528028905,\n        0.003420733381062746,\n        -0.003888361854478717,\n        0.006751784589141607,\n        -0.030773788690567017,\n        -0.007936869747936726,\n        0.02197212725877762,\n        -0.030671294778585434,\n        -0.011671490967273712,\n        -0.008609486743807793,\n        0.003657750552520156,\n        0.02320205233991146,\n        0.006905525457113981,\n        0.005444987677037716,\n        -0.017552077770233154,\n        -0.013862297870218754,\n        -0.0018016502726823092,\n        -0.0028025670908391476,\n        -0.009865036234259605,\n        -0.01310640573501587,\n        0.04432860389351845,\n        -0.004295134451240301,\n        0.02782708965241909,\n        0.001064174808561802,\n        0.017231784760951996,\n        -0.01896136812865734,\n        -0.020280977711081505,\n        -0.00022720811830367893,\n        0.024483226239681244,\n        0.017500830814242363,\n        0.013951979577541351,\n        0.005438581574708223,\n        0.0038339118473231792,\n        -0.014105720445513725,\n        0.00026384167722426355,\n        0.0073411245830357075,\n        0.04986327514052391,\n        -0.011306356638669968,\n        -0.028390806168317795,\n        -0.002086711348965764,\n        -0.005976674612611532,\n        -0.01780831255018711,\n        -0.08840097486972809,\n        -0.026904644444584846,\n        0.007655011955648661,\n        0.01738552562892437,\n        -0.007001613266766071,\n        0.009352566674351692,\n        0.0024342297110706568,\n        0.0030043520964682102,\n        -0.00830841064453125,\n        0.012741271406412125,\n        0.011844449676573277,\n        -0.02623843401670456,\n        -0.024572908878326416,\n        -0.0005709229735657573,\n        0.034232959151268005,\n        -0.027929583564400673,\n        0.01891012117266655,\n        -0.020204106345772743,\n        -0.012734864838421345,\n        0.026776527985930443,\n        -0.00251430319622159,\n        0.001099407090805471,\n        -0.008276381529867649,\n        0.014131343923509121,\n        -0.018231099471449852,\n        -0.005486625712364912,\n        -0.01698836125433445,\n        0.03215745463967323,\n        0.002780146664008498,\n        -0.002128349617123604,\n        -0.01506660133600235,\n        -0.019063862040638924,\n        0.0063546206802129745,\n        0.0014044864801689982,\n        0.01137682143598795,\n        0.008571051061153412,\n        -0.026776527985930443,\n        0.013951979577541351,\n        0.021498091518878937,\n        -0.038563322275877,\n        0.007808752823621035,\n        0.004125379025936127,\n        -0.02276645414531231,\n        -0.05173378810286522,\n        -0.008744009770452976,\n        -0.0009520720923319459,\n        -0.0035232272930443287,\n        0.014771930873394012,\n        0.0011250305688008666,\n        -0.013990415260195732,\n        -0.028698287904262543,\n        -0.04373926669359207,\n        -0.007853593677282333,\n        0.007879217155277729,\n        0.024457603693008423,\n        0.013849485665559769,\n        0.02507256716489792,\n        0.022125868126749992,\n        -0.005592322442680597,\n        0.02816019393503666,\n        0.012254425324499607,\n        -0.00028085726080462337,\n        -0.032669924199581146,\n        0.022907383739948273,\n        -0.00753970630466938,\n        0.0050222002901136875,\n        -0.015976233407855034,\n        0.013836674392223358,\n        0.018372029066085815,\n        0.004708312917500734,\n        -0.021088115870952606,\n        0.03295178338885307,\n        0.005685207433998585,\n        0.015117848291993141,\n        -0.03436107560992241,\n        -0.009262884967029095,\n        -0.023266112431883812,\n        -0.0015718397917225957,\n        0.008391686715185642,\n        0.0009889057837426662,\n        -0.011460097506642342,\n        -0.0154381413012743,\n        0.008583863265812397,\n        0.013849485665559769,\n        0.0004984565894119442,\n        0.006226503290235996,\n        0.005361711140722036,\n        -0.0038851588033139706,\n        -0.010512028820812702,\n        -0.014746307395398617,\n        -0.006809437647461891,\n        0.011882884427905083,\n        0.0064314911141991615,\n        -0.02550816535949707,\n        0.003427139250561595,\n        0.02284332364797592,\n        -0.014797554351389408,\n        0.0044969189912080765,\n        0.023663274943828583,\n        0.0019041441846638918,\n        -0.026020634919404984,\n        0.005438581574708223,\n        -0.06108635663986206,\n        0.028723910450935364,\n        -0.011876478791236877,\n        -0.022689582780003548,\n        0.010025182738900185,\n        -0.02892889827489853,\n        0.0033406601287424564,\n        0.0023477505892515182,\n        -0.0189741812646389,\n        -0.0009905073093250394,\n        0.00869916845113039,\n        0.00667491415515542,\n        0.0037089975085109472,\n        0.004727530293166637,\n        -0.03874268755316734,\n        -0.001560629578307271,\n        0.01935853250324726,\n        0.0013035940937697887,\n        0.01623246818780899,\n        0.0004672279756050557,\n        -7.51688567106612e-05,\n        -0.01702679693698883,\n        0.0073603419587016106,\n        -0.004833227023482323,\n        -0.028108946979045868,\n        0.0002832594618666917,\n        -0.03487354516983032,\n        0.002471063518896699,\n        0.007309095002710819,\n        -0.031183764338493347,\n        0.028108946979045868,\n        -0.0479927621781826,\n        -0.003910782281309366,\n        0.01580968126654625,\n        0.0031372737139463425,\n        0.012209584005177021,\n        -0.03090190701186657,\n        0.011690708808600903,\n        0.010108459740877151,\n        0.013042346574366093,\n        -0.0432780422270298,\n        -0.031004400923848152,\n        0.03333613649010658,\n        -0.01580968126654625,\n        0.01142166182398796,\n        0.007001613266766071,\n        -0.021164987236261368,\n        0.004189437720924616,\n        0.015963422134518623,\n        0.020755011588335037,\n        -0.0024470414500683546,\n        0.03866581618785858,\n        -0.022228362038731575,\n        -0.001466943765990436,\n        0.00762298284098506,\n        -0.0011986979516223073,\n        0.01666806824505329,\n        -0.0010177321964874864,\n        -0.008250758051872253,\n        -0.023752957582473755,\n        0.01616840995848179,\n        0.004394425079226494,\n        0.014105720445513725,\n        0.008763226680457592,\n        -0.0016431050607934594,\n        0.008295598439872265,\n        -0.03169623389840126,\n        -0.017244596034288406,\n        -0.0317474789917469,\n        -0.025982199236750603,\n        -0.005864571779966354,\n        0.011581809259951115,\n        0.02741711400449276,\n        0.016155598685145378,\n        0.02505975402891636,\n        0.017897995188832283,\n        -0.01272845920175314,\n        0.024906013160943985,\n        -0.05765280872583389,\n        0.02357359416782856,\n        0.014156967401504517,\n        0.004826821386814117,\n        -0.043047431856393814,\n        0.008404498919844627,\n        0.021498091518878937,\n        0.001606271369382739,\n        -0.0024998898152261972,\n        -0.009192420169711113,\n        -0.01060811709612608,\n        0.004897285718470812,\n        -0.04061320051550865,\n        -0.002450244501233101,\n        0.025264741852879524,\n        0.013567628338932991,\n        -0.022612713277339935,\n        0.012241613119840622,\n        0.014374767430126667,\n        0.0047723716124892235,\n        0.004509730730205774,\n        0.010729828849434853,\n        0.0045193396508693695,\n        0.017231784760951996,\n        -0.02195931412279606,\n        -0.0007931265281513333,\n        -0.008417310193181038,\n        0.03487354516983032,\n        -0.017218973487615585,\n        -0.026904644444584846,\n        -0.008737603202462196,\n        0.01970444992184639,\n        0.013990415260195732,\n        0.03928077965974808,\n        0.012446600943803787,\n        0.010960440151393414,\n        -0.016501516103744507,\n        0.013426698744297028,\n        0.0066621024161577225,\n        0.0011090158950537443,\n        -0.02980009652674198,\n        0.057345326989889145,\n        -0.0031372737139463425,\n        0.01810298301279545,\n        0.023355793207883835,\n        0.007763911969959736,\n        0.018743569031357765,\n        -0.00771907065063715,\n        -0.02933887392282486,\n        -0.010422347113490105,\n        0.02276645414531231,\n        -0.014528508298099041,\n        0.002581564709544182,\n        -0.0050446209497749805,\n        -0.02236928977072239,\n        -0.007206601090729237,\n        0.00023361398780252784,\n        0.0018256723415106535,\n        0.0009008251363411546,\n        0.039178285747766495,\n        -0.010781075805425644,\n        0.045789141207933426,\n        0.013260146602988243,\n        -0.015271589159965515,\n        0.01817985251545906,\n        0.01529721263796091,\n        0.01333701703697443,\n        0.00445528095588088,\n        0.02203618548810482,\n        -0.017859559506177902,\n        -0.009685671888291836,\n        0.0341048389673233,\n        -0.008628703653812408,\n        0.010146894492208958,\n        -0.028032077476382256,\n        -0.006540391128510237,\n        0.006248923949897289,\n        -0.024790707975625992,\n        0.005390537902712822,\n        -0.010652958415448666,\n        0.029876967892050743,\n        0.03018444962799549,\n        0.0038307090289890766,\n        0.032669924199581146,\n        0.017193349078297615,\n        -0.028903275728225708,\n        -0.006047139409929514,\n        -0.0038243031594902277,\n        0.0026344130747020245,\n        -0.006245721131563187,\n        -0.0285189226269722,\n        0.015335647389292717,\n        0.009608801454305649,\n        -0.0038723470643162727,\n        -0.005493031814694405,\n        0.026033446192741394,\n        -0.007661418057978153,\n        -0.00520476745441556,\n        -0.01388792134821415,\n        -0.02043471857905388,\n        0.002728899708017707,\n        0.01976850815117359,\n        -0.0064314911141991615,\n        -0.025392860174179077,\n        -0.02584127150475979,\n        0.00866713933646679,\n        0.0018817236414179206,\n        -0.028493300080299377,\n        0.001047359430231154,\n        -0.020665328949689865\n      ]\n    }\n  ],\n  \"model\": \"ada\",\n  \"usage\": {\n    \"prompt_tokens\": 7,\n    \"total_tokens\": 7\n  }\n}\n"
  },
  {
    "path": "libs/kotaemon/tests/resources/fullocr_sample_output.json",
    "content": "[{\"csv_string\": \",,,\\u5358\\u4f4d,\\u5b9f\\u65bd\\u4f8b1,\\u5b9f\\u65bd\\u4f8b2,\\u5b9f\\u65bd\\u4f8b3,\\u6bd4\\u8f03\\u4f8b1,\\u6bd4\\u8f03\\u4f8b2,\\u6bd4\\u8f03\\u4f8b3,\\u6bd4\\u8f03\\u4f8b4\\n\\u71b1\\u786c\\u5316\\u6027\\u6a39\\u8102\\u7d44\\u6210\\u7269,\\u71b1\\u786c\\u5316\\u6027\\u6a39\\u8102,\\u30a8\\u30dd\\u30ad\\u30b7\\u6a39\\u81021,\\u8cea\\u91cf%,10.2,12.4,12.4,10.2,12.4,,10.2\\n,,\\u30a8\\u30dd\\u30ad\\u30b7\\u6a39\\u81022,,-,-,-,-,-,14.4,-\\n,\\u786c\\u5316\\u5264,\\u786c\\u5316\\u52641,,4.8,6.6,6.6,4.8,6.6,7.6,4.8\\n,\\u7121\\u6a5f\\u5145\\u586b\\u6750,\\u7121\\u6a5f\\u5145\\u586b\\u52641,,74,70,70,74,70,67,74\\n,,\\u7121\\u6a5f\\u5145\\u586b\\u52642,,10,10,10,10,10,10,10\\n,\\u786c\\u5316\\u4fc3\\u9032\\u5264,\\u786c\\u5316\\u4fc3\\u9032\\u52641,,0.2,0.2,0.2,0.2,0.2,0.2,0.2\\n,\\u30ab\\u30c3\\u30d7\\u30ea\\u30f3\\u30b0\\u5264,\\u30ab\\u30c3\\u30d7\\u30ea\\u30f3\\u30b0\\u52641,,0.2,0.2,0.2,0.2,0.2,0.2,0.2\\n,\\u96e2\\u578b\\u5264,\\u96e2\\u578b\\u52641,,0.2,0.2,0.2,0.2,0.2,0.2,0.2\\n,\\u7740\\u8272\\u5264,\\u7740\\u8272\\u52641,,0.4,0.4,0.4,0.4,0.4,0.4,0.4\\n,\\u5408\\u8a08,,,100,100,100,100,100,100,100\\n,,,\\u5358\\u4f4d,\\u5b9f\\u65bd\\u4f8b1,\\u5b9f\\u65bd\\u4f8b2,\\u5b9f\\u65bd\\u4f8b3,\\u6bd4\\u8f03\\u4f8b1,\\u6bd4\\u8f03\\u4f8b2,\\u6bd4\\u8f03\\u4f8b3,\\u6bd4\\u8f03\\u4f8b4\\n\\u71b1\\u786c\\u5316\\u6027\\u6a39\\u8102\\u7d44\\u6210\\u7269,\\u71b1\\u786c\\u5316\\u6027\\u6a39\\u8102,\\u30a8\\u30dd\\u30ad\\u30b7\\u6a39\\u81021,%,,10,10,(\\u30bb\\u30e9\\u30df\\u30c3\\u30af),(\\u30bb\\u30e9\\u30df\\u30c3\\u30af),,\\n,,\\u30a8\\u30dd\\u30ad\\u30b7\\u6a39\\u81022,,5,-,-,,,5,5\\n,,\\u30a8\\u30dd\\u30ad\\u30b7\\u6a39\\u81023,,5,-,-,,,5,5\\n,,\\u30b7\\u30a2\\u30cd\\u30fc\\u30c8\\u6a39\\u81021,,10,10,10,,,10,10\\n,,\\u30d5\\u30a7\\u30ce\\u30fc\\u30eb\\u7cfb\\u786c\\u5316\\u52641,,5,5,5,,,5,5\\n,\\u786c\\u5316\\u89e6\\u5a92,\\u786c\\u5316\\u89e6\\u5a921,,0.2,0.2,0.2,,,0.2,0.2\\n,\\u7121\\u6a5f\\u5145\\u586b\\u6750,\\u7121\\u6a5f\\u5145\\u586b\\u67501,,74.8,74.8,74.8,,,74.8,74.8\\n,\\u5408\\u8a08,,,100,100,100,,,100,100\\nIL1TD-L1TU,,,ppm,510,470,470,,,510,510\\nL125D-L125U,,,ppm,390,330,330,,,390,390\\n\\u30ac\\u30e9\\u30b9\\u8ee2\\u79fb\\u6e29\\u5ea6 (Tg),,,\\u00b0C,,224,224,,,235,235\\n\", \"image\": \"7810d908b0ff4ce381dcab873196d133.jpg\", \"image_shape\": [1653, 2339], \"json\": {\"ocr\": [{\"location\": [[237, 290], [352, 290], [352, 309], [237, 309]], \"type\": \"textline\", \"text\": \"\\u71b1\\u786c\\u5316\\u6027\\u6a39\\u8102\", \"confidence_by_character\": [0.9686674475669861, 0.9664099812507629, 0.9206223487854004, 0.9281898140907288, 0.962303102016449, 0.9605254530906677], \"confidence_by_field\": 0.9206223487854004, \"original_text\": \"\\u71b1\\u786c\\u5316\\u6027\\u6a39\\u8102\"}, {\"location\": [[694, 272], [739, 272], [739, 297], [694, 297]], \"type\": \"textline\", \"text\": \"10.2\", \"confidence_by_character\": [0.9169260263442993, 0.9255782961845398, 0.924151599407196, 0.9199540019035339], \"confidence_by_field\": 0.9169260263442993, \"original_text\": \"10.2\"}, {\"location\": [[767, 242], [843, 242], [843, 264], [767, 264]], \"type\": \"textline\", \"text\": \"\\u5b9f\\u65bd\\u4f8b2\", \"confidence_by_character\": [0.9473932385444641, 0.9163241982460022, 0.7033942937850952, 0.8979774117469788], \"confidence_by_field\": 0.7033942937850952, \"original_text\": \"\\u5b9f\\u65bd\\u4f8b2\"}, {\"location\": [[783, 272], [830, 272], [830, 297], [783, 297]], \"type\": \"textline\", \"text\": \"12.4\", \"confidence_by_character\": [0.9123356342315674, 0.9198176264762878, 0.9250789880752563, 0.92127925157547], \"confidence_by_field\": 0.9123356342315674, \"original_text\": \"12.4\"}, {\"location\": [[961, 268], [1011, 268], [1011, 298], [961, 298]], \"type\": \"textline\", \"text\": \"10.2\", \"confidence_by_character\": [0.917376697063446, 0.9228717684745789, 0.9257153868675232, 0.9199367761611938], \"confidence_by_field\": 0.917376697063446, \"original_text\": \"10.2\"}, {\"location\": [[1050, 268], [1100, 268], [1100, 298], [1050, 298]], \"type\": \"textline\", \"text\": \"12.4\", \"confidence_by_character\": [0.9123420119285583, 0.9206566214561462, 0.9267528057098389, 0.9224498867988586], \"confidence_by_field\": 0.9123420119285583, \"original_text\": \"12.4\"}, {\"location\": [[1130, 242], [1203, 242], [1203, 264], [1130, 264]], \"type\": \"textline\", \"text\": \"\\u6bd4\\u8f03\\u4f8b3\", \"confidence_by_character\": [0.9575085639953613, 0.9481230974197388, 0.756417453289032, 0.9103184342384338], \"confidence_by_field\": 0.756417453289032, \"original_text\": \"\\u6bd4\\u8f03\\u4f8b3\"}, {\"location\": [[236, 335], [297, 335], [297, 358], [236, 358]], \"type\": \"textline\", \"text\": \"\\u786c\\u5316\\u5264\", \"confidence_by_character\": [0.9772048592567444, 0.9326395988464355, 0.9597039222717285], \"confidence_by_field\": 0.9326395988464355, \"original_text\": \"\\u786c\\u5316\\u5264\"}, {\"location\": [[383, 335], [454, 335], [454, 358], [383, 358]], \"type\": \"textline\", \"text\": \"\\u786c\\u5316\\u52641\", \"confidence_by_character\": [0.9785639047622681, 0.9359536170959473, 0.9465298056602478, 0.911139965057373], \"confidence_by_field\": 0.911139965057373, \"original_text\": \"\\u786c\\u5316\\u52641\"}, {\"location\": [[385, 305], [510, 305], [510, 325], [385, 325]], \"type\": \"textline\", \"text\": \"\\u30a8\\u30dd\\u30ad\\u30b7\\u6a39\\u81022\", \"confidence_by_character\": [0.9240000247955322, 0.9118557572364807, 0.8709841966629028, 0.898737370967865, 0.9585683345794678, 0.9542934894561768, 0.9125517010688782], \"confidence_by_field\": 0.8709841966629028, \"original_text\": \"\\u30a8\\u30dd\\u30ad\\u30b7\\u6a39\\u81022\"}, {\"location\": [[385, 274], [509, 274], [509, 294], [385, 294]], \"type\": \"textline\", \"text\": \"\\u30a8\\u30dd\\u30ad\\u30b7\\u6a39\\u81021\", \"confidence_by_character\": [0.9230291247367859, 0.913360595703125, 0.8715535402297974, 0.9017773270606995, 0.9544979333877563, 0.9605312943458557, 0.9045484662055969], \"confidence_by_field\": 0.8715535402297974, \"original_text\": \"\\u30a8\\u30dd\\u30ad\\u30b7\\u6a39\\u81021\"}, {\"location\": [[601, 240], [644, 240], [644, 265], [601, 265]], \"type\": \"textline\", \"text\": \"\\u5358\\u4f4d\", \"confidence_by_character\": [0.9253715872764587, 0.9197276830673218], \"confidence_by_field\": 0.9197276830673218, \"original_text\": \"\\u5358\\u4f4d\"}, {\"location\": [[675, 242], [752, 242], [752, 264], [675, 264]], \"type\": \"textline\", \"text\": \"\\u5b9f\\u65bd\\u4f8b1\", \"confidence_by_character\": [0.9467881917953491, 0.9383226037025452, 0.7930335402488708, 0.7423544526100159], \"confidence_by_field\": 0.7423544526100159, \"original_text\": \"\\u5b9f\\u65bd\\u4f8b1\"}, {\"location\": [[856, 242], [933, 242], [933, 264], [856, 264]], \"type\": \"textline\", \"text\": \"\\u5b9f\\u65bd\\u4f8b3\", \"confidence_by_character\": [0.9493170976638794, 0.9393699169158936, 0.7815940976142883, 0.8886772990226746], \"confidence_by_field\": 0.7815940976142883, \"original_text\": \"\\u5b9f\\u65bd\\u4f8b3\"}, {\"location\": [[1232, 272], [1280, 272], [1280, 297], [1232, 297]], \"type\": \"textline\", \"text\": \"10.2\", \"confidence_by_character\": [0.9169086217880249, 0.9247543811798096, 0.9242917895317078, 0.9190678596496582], \"confidence_by_field\": 0.9169086217880249, \"original_text\": \"10.2\"}, {\"location\": [[709, 308], [722, 308], [722, 324], [709, 324]], \"type\": \"textline\", \"text\": \"-\", \"confidence_by_character\": [0.8763231635093689], \"confidence_by_field\": 0.8763231635093689, \"original_text\": \"-\"}, {\"location\": [[788, 334], [825, 334], [825, 359], [788, 359]], \"type\": \"textline\", \"text\": \"6.6\", \"confidence_by_character\": [0.9220069050788879, 0.9281726479530334, 0.9160889387130737], \"confidence_by_field\": 0.9160889387130737, \"original_text\": \"6.6\"}, {\"location\": [[799, 308], [812, 308], [812, 324], [799, 324]], \"type\": \"textline\", \"text\": \"-\", \"confidence_by_character\": [0.888385534286499], \"confidence_by_field\": 0.888385534286499, \"original_text\": \"-\"}, {\"location\": [[873, 273], [919, 273], [919, 297], [873, 297]], \"type\": \"textline\", \"text\": \"12.4\", \"confidence_by_character\": [0.912204921245575, 0.9198381900787354, 0.9228084087371826, 0.9204709529876709], \"confidence_by_field\": 0.912204921245575, \"original_text\": \"12.4\"}, {\"location\": [[949, 242], [1022, 242], [1022, 264], [949, 264]], \"type\": \"textline\", \"text\": \"\\u6bd4\\u8f03\\u4f8b1\", \"confidence_by_character\": [0.9624030590057373, 0.9464587569236755, 0.7610214948654175, 0.9083214998245239], \"confidence_by_field\": 0.7610214948654175, \"original_text\": \"\\u6bd4\\u8f03\\u4f8b1\"}, {\"location\": [[1040, 242], [1114, 242], [1114, 264], [1040, 264]], \"type\": \"textline\", \"text\": \"\\u6bd4\\u8f03\\u4f8b2\", \"confidence_by_character\": [0.9574549794197083, 0.95237135887146, 0.7252858281135559, 0.9158275127410889], \"confidence_by_field\": 0.7252858281135559, \"original_text\": \"\\u6bd4\\u8f03\\u4f8b2\"}, {\"location\": [[1219, 242], [1293, 242], [1293, 264], [1219, 264]], \"type\": \"textline\", \"text\": \"\\u6bd4\\u8f03\\u4f8b4\", \"confidence_by_character\": [0.9668106436729431, 0.9518352746963501, 0.792457640171051, 0.91578209400177], \"confidence_by_field\": 0.792457640171051, \"original_text\": \"\\u6bd4\\u8f03\\u4f8b4\"}, {\"location\": [[886, 307], [904, 307], [904, 324], [886, 324]], \"type\": \"textline\", \"text\": \"-\", \"confidence_by_character\": [0.8876243233680725], \"confidence_by_field\": 0.8876243233680725, \"original_text\": \"-\"}, {\"location\": [[980, 310], [993, 310], [993, 324], [980, 324]], \"type\": \"textline\", \"text\": \"-\", \"confidence_by_character\": [0.8970376253128052], \"confidence_by_field\": 0.8970376253128052, \"original_text\": \"-\"}, {\"location\": [[1065, 306], [1087, 306], [1087, 328], [1065, 328]], \"type\": \"textline\", \"text\": \"-\", \"confidence_by_character\": [0.8992383480072021], \"confidence_by_field\": 0.8992383480072021, \"original_text\": \"-\"}, {\"location\": [[1249, 310], [1262, 310], [1262, 324], [1249, 324]], \"type\": \"textline\", \"text\": \"-\", \"confidence_by_character\": [0.8871487379074097], \"confidence_by_field\": 0.8871487379074097, \"original_text\": \"-\"}, {\"location\": [[237, 382], [333, 382], [333, 401], [237, 401]], \"type\": \"textline\", \"text\": \"\\u7121\\u6a5f\\u5145\\u586b\\u6750\", \"confidence_by_character\": [0.952167809009552, 0.9192871451377869, 0.9678506255149841, 0.9592251181602478, 0.9547764658927917], \"confidence_by_field\": 0.9192871451377869, \"original_text\": \"\\u7121\\u6a5f\\u5145\\u586b\\u6750\"}, {\"location\": [[384, 367], [490, 367], [490, 386], [384, 386]], \"type\": \"textline\", \"text\": \"\\u7121\\u6a5f\\u5145\\u586b\\u52641\", \"confidence_by_character\": [0.9493359923362732, 0.9333418011665344, 0.9685581922531128, 0.9741933941841125, 0.9563856720924377, 0.9097828269004822], \"confidence_by_field\": 0.9097828269004822, \"original_text\": \"\\u7121\\u6a5f\\u5145\\u586b\\u52641\"}, {\"location\": [[697, 334], [734, 334], [734, 358], [697, 358]], \"type\": \"textline\", \"text\": \"4.8\", \"confidence_by_character\": [0.9166890978813171, 0.9267215132713318, 0.9219436645507812], \"confidence_by_field\": 0.9166890978813171, \"original_text\": \"4.8\"}, {\"location\": [[880, 337], [914, 337], [914, 359], [880, 359]], \"type\": \"textline\", \"text\": \"6.6\", \"confidence_by_character\": [0.9212806820869446, 0.9280492067337036, 0.917919933795929], \"confidence_by_field\": 0.917919933795929, \"original_text\": \"6.6\"}, {\"location\": [[701, 396], [733, 396], [733, 421], [701, 421]], \"type\": \"textline\", \"text\": \"10\", \"confidence_by_character\": [0.9160857200622559, 0.9220792651176453], \"confidence_by_field\": 0.9160857200622559, \"original_text\": \"10\"}, {\"location\": [[701, 364], [733, 364], [733, 389], [701, 389]], \"type\": \"textline\", \"text\": \"74\", \"confidence_by_character\": [0.9179115295410156, 0.9187763333320618], \"confidence_by_field\": 0.9179115295410156, \"original_text\": \"74\"}, {\"location\": [[793, 365], [820, 365], [820, 388], [793, 388]], \"type\": \"textline\", \"text\": \"70\", \"confidence_by_character\": [0.9186102151870728, 0.9202508330345154], \"confidence_by_field\": 0.9186102151870728, \"original_text\": \"70\"}, {\"location\": [[883, 367], [910, 367], [910, 388], [883, 388]], \"type\": \"textline\", \"text\": \"70\", \"confidence_by_character\": [0.9183272123336792, 0.920913577079773], \"confidence_by_field\": 0.9183272123336792, \"original_text\": \"70\"}, {\"location\": [[969, 334], [1006, 334], [1006, 359], [969, 359]], \"type\": \"textline\", \"text\": \"4.8\", \"confidence_by_character\": [0.9184820652008057, 0.9289669990539551, 0.9193996787071228], \"confidence_by_field\": 0.9184820652008057, \"original_text\": \"4.8\"}, {\"location\": [[970, 364], [1002, 364], [1002, 389], [970, 389]], \"type\": \"textline\", \"text\": \"74\", \"confidence_by_character\": [0.9170815348625183, 0.9173583984375], \"confidence_by_field\": 0.9170815348625183, \"original_text\": \"74\"}, {\"location\": [[1059, 334], [1094, 334], [1094, 359], [1059, 359]], \"type\": \"textline\", \"text\": \"6.6\", \"confidence_by_character\": [0.9224190711975098, 0.9298495650291443, 0.9167705178260803], \"confidence_by_field\": 0.9167705178260803, \"original_text\": \"6.6\"}, {\"location\": [[1143, 303], [1190, 303], [1190, 327], [1143, 327]], \"type\": \"textline\", \"text\": \"14.4\", \"confidence_by_character\": [0.9120434522628784, 0.9212027788162231, 0.9268625974655151, 0.9202296137809753], \"confidence_by_field\": 0.9120434522628784, \"original_text\": \"14.4\"}, {\"location\": [[1061, 364], [1093, 364], [1093, 391], [1061, 391]], \"type\": \"textline\", \"text\": \"70\", \"confidence_by_character\": [0.9193744659423828, 0.9198716878890991], \"confidence_by_field\": 0.9193744659423828, \"original_text\": \"70\"}, {\"location\": [[1148, 334], [1185, 334], [1185, 359], [1148, 359]], \"type\": \"textline\", \"text\": \"7.6\", \"confidence_by_character\": [0.9237031936645508, 0.922902524471283, 0.9160147905349731], \"confidence_by_field\": 0.9160147905349731, \"original_text\": \"7.6\"}, {\"location\": [[1149, 364], [1182, 364], [1182, 389], [1149, 389]], \"type\": \"textline\", \"text\": \"67\", \"confidence_by_character\": [0.9192152619361877, 0.9226703643798828], \"confidence_by_field\": 0.9192152619361877, \"original_text\": \"67\"}, {\"location\": [[1238, 334], [1275, 334], [1275, 358], [1238, 358]], \"type\": \"textline\", \"text\": \"4.8\", \"confidence_by_character\": [0.9171125292778015, 0.9256289005279541, 0.9216449856758118], \"confidence_by_field\": 0.9171125292778015, \"original_text\": \"4.8\"}, {\"location\": [[1243, 365], [1270, 365], [1270, 388], [1243, 388]], \"type\": \"textline\", \"text\": \"74\", \"confidence_by_character\": [0.9173100590705872, 0.9199501872062683], \"confidence_by_field\": 0.9173100590705872, \"original_text\": \"74\"}, {\"location\": [[384, 429], [495, 429], [495, 448], [384, 448]], \"type\": \"textline\", \"text\": \"\\u786c\\u5316\\u4fc3\\u9032\\u52641\", \"confidence_by_character\": [0.9833092093467712, 0.9183294773101807, 0.8089151382446289, 0.9589380621910095, 0.9683259725570679, 0.9022980332374573], \"confidence_by_field\": 0.8089151382446289, \"original_text\": \"\\u786c\\u5316\\u4fc3\\u9032\\u52641\"}, {\"location\": [[236, 520], [295, 520], [295, 542], [236, 542]], \"type\": \"textline\", \"text\": \"\\u7740\\u8272\\u5264\", \"confidence_by_character\": [0.9404123425483704, 0.9459457397460938, 0.9636843204498291], \"confidence_by_field\": 0.9404123425483704, \"original_text\": \"\\u7740\\u8272\\u5264\"}, {\"location\": [[236, 489], [295, 489], [295, 512], [236, 512]], \"type\": \"textline\", \"text\": \"\\u96e2\\u578b\\u5264\", \"confidence_by_character\": [0.9581624865531921, 0.945020318031311, 0.9589981436729431], \"confidence_by_field\": 0.945020318031311, \"original_text\": \"\\u96e2\\u578b\\u5264\"}, {\"location\": [[237, 459], [371, 459], [371, 480], [237, 480]], \"type\": \"textline\", \"text\": \"\\u30ab\\u30c3\\u30d7\\u30ea\\u30f3\\u30b0\\u5264\", \"confidence_by_character\": [0.9071572422981262, 0.9223423600196838, 0.9463334083557129, 0.9168652892112732, 0.921983003616333, 0.9373961091041565, 0.9660329818725586], \"confidence_by_field\": 0.9071572422981262, \"original_text\": \"\\u30ab\\u30c3\\u30d7\\u30ea\\u30f3\\u30b0\\u5264\"}, {\"location\": [[237, 429], [333, 429], [333, 448], [237, 448]], \"type\": \"textline\", \"text\": \"\\u786c\\u5316\\u4fc3\\u9032\\u5264\", \"confidence_by_character\": [0.9770804643630981, 0.9250267744064331, 0.831341564655304, 0.9542734026908875, 0.9714828729629517], \"confidence_by_field\": 0.831341564655304, \"original_text\": \"\\u786c\\u5316\\u4fc3\\u9032\\u5264\"}, {\"location\": [[384, 398], [491, 398], [491, 417], [384, 417]], \"type\": \"textline\", \"text\": \"\\u7121\\u6a5f\\u5145\\u586b\\u52642\", \"confidence_by_character\": [0.9506935477256775, 0.931281566619873, 0.9721326231956482, 0.9734131693840027, 0.9598016738891602, 0.915614664554596], \"confidence_by_field\": 0.915614664554596, \"original_text\": \"\\u7121\\u6a5f\\u5145\\u586b\\u52642\"}, {\"location\": [[592, 410], [652, 410], [652, 434], [592, 434]], \"type\": \"textline\", \"text\": \"\\u8cea\\u91cf%\", \"confidence_by_character\": [0.9058261513710022, 0.9396486282348633, 0.9175691604614258], \"confidence_by_field\": 0.9058261513710022, \"original_text\": \"\\u8cea\\u91cf%\"}, {\"location\": [[883, 397], [910, 397], [910, 419], [883, 419]], \"type\": \"textline\", \"text\": \"10\", \"confidence_by_character\": [0.9157432913780212, 0.9219378232955933], \"confidence_by_field\": 0.9157432913780212, \"original_text\": \"10\"}, {\"location\": [[970, 396], [1002, 396], [1002, 421], [970, 421]], \"type\": \"textline\", \"text\": \"10\", \"confidence_by_character\": [0.9164617657661438, 0.9216349720954895], \"confidence_by_field\": 0.9164617657661438, \"original_text\": \"10\"}, {\"location\": [[1061, 396], [1093, 396], [1093, 421], [1061, 421]], \"type\": \"textline\", \"text\": \"10\", \"confidence_by_character\": [0.9159533977508545, 0.9209455251693726], \"confidence_by_field\": 0.9159533977508545, \"original_text\": \"10\"}, {\"location\": [[1151, 396], [1183, 396], [1183, 421], [1151, 421]], \"type\": \"textline\", \"text\": \"10\", \"confidence_by_character\": [0.9155728816986084, 0.9205871224403381], \"confidence_by_field\": 0.9155728816986084, \"original_text\": \"10\"}, {\"location\": [[1243, 397], [1270, 397], [1270, 419], [1243, 419]], \"type\": \"textline\", \"text\": \"10\", \"confidence_by_character\": [0.9161996841430664, 0.9221776127815247], \"confidence_by_field\": 0.9161996841430664, \"original_text\": \"10\"}, {\"location\": [[385, 459], [532, 459], [532, 479], [385, 479]], \"type\": \"textline\", \"text\": \"\\u30ab\\u30c3\\u30d7\\u30ea\\u30f3\\u30b0\\u52641\", \"confidence_by_character\": [0.906684398651123, 0.9242702126502991, 0.9463043808937073, 0.9182814955711365, 0.9192276000976562, 0.9397355914115906, 0.958014190196991, 0.9101974368095398], \"confidence_by_field\": 0.906684398651123, \"original_text\": \"\\u30ab\\u30c3\\u30d7\\u30ea\\u30f3\\u30b0\\u52641\"}, {\"location\": [[384, 520], [457, 520], [457, 542], [384, 542]], \"type\": \"textline\", \"text\": \"\\u7740\\u8272\\u52641\", \"confidence_by_character\": [0.9345429539680481, 0.9415675401687622, 0.9616525769233704, 0.9126831889152527], \"confidence_by_field\": 0.9126831889152527, \"original_text\": \"\\u7740\\u8272\\u52641\"}, {\"location\": [[384, 491], [457, 491], [457, 510], [384, 510]], \"type\": \"textline\", \"text\": \"\\u96e2\\u578b\\u52641\", \"confidence_by_character\": [0.938474178314209, 0.9532917737960815, 0.9655312895774841, 0.9134681224822998], \"confidence_by_field\": 0.9134681224822998, \"original_text\": \"\\u96e2\\u578b\\u52641\"}, {\"location\": [[697, 458], [734, 458], [734, 483], [697, 483]], \"type\": \"textline\", \"text\": \"0.2\", \"confidence_by_character\": [0.918220579624176, 0.9233868718147278, 0.9168131947517395], \"confidence_by_field\": 0.9168131947517395, \"original_text\": \"0.2\"}, {\"location\": [[697, 427], [734, 427], [734, 451], [697, 451]], \"type\": \"textline\", \"text\": \"0.2\", \"confidence_by_character\": [0.9187952280044556, 0.9251962304115295, 0.9168661236763], \"confidence_by_field\": 0.9168661236763, \"original_text\": \"0.2\"}, {\"location\": [[878, 427], [915, 427], [915, 451], [878, 451]], \"type\": \"textline\", \"text\": \"0.2\", \"confidence_by_character\": [0.9180716276168823, 0.9220828413963318, 0.9168251156806946], \"confidence_by_field\": 0.9168251156806946, \"original_text\": \"0.2\"}, {\"location\": [[697, 520], [734, 520], [734, 543], [697, 543]], \"type\": \"textline\", \"text\": \"0.4\", \"confidence_by_character\": [0.9189480543136597, 0.9261084794998169, 0.9156019687652588], \"confidence_by_field\": 0.9156019687652588, \"original_text\": \"0.4\"}, {\"location\": [[699, 488], [734, 488], [734, 513], [699, 513]], \"type\": \"textline\", \"text\": \"0.2\", \"confidence_by_character\": [0.9182897210121155, 0.9248169660568237, 0.9167609214782715], \"confidence_by_field\": 0.9167609214782715, \"original_text\": \"0.2\"}, {\"location\": [[788, 456], [825, 456], [825, 481], [788, 481]], \"type\": \"textline\", \"text\": \"0.2\", \"confidence_by_character\": [0.9177680015563965, 0.9243178963661194, 0.9165362119674683], \"confidence_by_field\": 0.9165362119674683, \"original_text\": \"0.2\"}, {\"location\": [[788, 427], [825, 427], [825, 451], [788, 451]], \"type\": \"textline\", \"text\": \"0.2\", \"confidence_by_character\": [0.9185940027236938, 0.922125518321991, 0.9162130951881409], \"confidence_by_field\": 0.9162130951881409, \"original_text\": \"0.2\"}, {\"location\": [[793, 397], [820, 397], [820, 419], [793, 419]], \"type\": \"textline\", \"text\": \"10\", \"confidence_by_character\": [0.9177662134170532, 0.9242537021636963], \"confidence_by_field\": 0.9177662134170532, \"original_text\": \"10\"}, {\"location\": [[789, 488], [825, 488], [825, 513], [789, 513]], \"type\": \"textline\", \"text\": \"0.2\", \"confidence_by_character\": [0.9175012111663818, 0.9248098731040955, 0.9154093265533447], \"confidence_by_field\": 0.9154093265533447, \"original_text\": \"0.2\"}, {\"location\": [[878, 458], [914, 458], [914, 483], [878, 483]], \"type\": \"textline\", \"text\": \"0.2\", \"confidence_by_character\": [0.9184542298316956, 0.9236595034599304, 0.9165345430374146], \"confidence_by_field\": 0.9165345430374146, \"original_text\": \"0.2\"}, {\"location\": [[969, 456], [1006, 456], [1006, 481], [969, 481]], \"type\": \"textline\", \"text\": \"0.2\", \"confidence_by_character\": [0.9164131879806519, 0.9254051446914673, 0.9154728651046753], \"confidence_by_field\": 0.9154728651046753, \"original_text\": \"0.2\"}, {\"location\": [[969, 426], [1006, 426], [1006, 451], [969, 451]], \"type\": \"textline\", \"text\": \"0.2\", \"confidence_by_character\": [0.9165401458740234, 0.9244502782821655, 0.9159473776817322], \"confidence_by_field\": 0.9159473776817322, \"original_text\": \"0.2\"}, {\"location\": [[1057, 456], [1094, 456], [1094, 481], [1057, 481]], \"type\": \"textline\", \"text\": \"0.2\", \"confidence_by_character\": [0.9178974628448486, 0.9235621690750122, 0.9160559177398682], \"confidence_by_field\": 0.9160559177398682, \"original_text\": \"0.2\"}, {\"location\": [[1057, 426], [1094, 426], [1094, 451], [1057, 451]], \"type\": \"textline\", \"text\": \"0.2\", \"confidence_by_character\": [0.9180322885513306, 0.921731173992157, 0.9166355133056641], \"confidence_by_field\": 0.9166355133056641, \"original_text\": \"0.2\"}, {\"location\": [[1148, 456], [1185, 456], [1185, 483], [1148, 483]], \"type\": \"textline\", \"text\": \"0.2\", \"confidence_by_character\": [0.9164595603942871, 0.9242724180221558, 0.9157667756080627], \"confidence_by_field\": 0.9157667756080627, \"original_text\": \"0.2\"}, {\"location\": [[1148, 426], [1185, 426], [1185, 451], [1148, 451]], \"type\": \"textline\", \"text\": \"0.2\", \"confidence_by_character\": [0.9180106520652771, 0.9206836223602295, 0.9163994789123535], \"confidence_by_field\": 0.9163994789123535, \"original_text\": \"0.2\"}, {\"location\": [[1238, 456], [1274, 456], [1274, 481], [1238, 481]], \"type\": \"textline\", \"text\": \"0.2\", \"confidence_by_character\": [0.917959451675415, 0.9236196279525757, 0.916109561920166], \"confidence_by_field\": 0.916109561920166, \"original_text\": \"0.2\"}, {\"location\": [[1238, 426], [1275, 426], [1275, 451], [1238, 451]], \"type\": \"textline\", \"text\": \"0.2\", \"confidence_by_character\": [0.9176772832870483, 0.9209798574447632, 0.9162566065788269], \"confidence_by_field\": 0.9162566065788269, \"original_text\": \"0.2\"}, {\"location\": [[791, 521], [825, 521], [825, 543], [791, 543]], \"type\": \"textline\", \"text\": \"0.4\", \"confidence_by_character\": [0.9188475012779236, 0.9243553280830383, 0.9138782024383545], \"confidence_by_field\": 0.9138782024383545, \"original_text\": \"0.4\"}, {\"location\": [[878, 518], [914, 518], [914, 543], [878, 543]], \"type\": \"textline\", \"text\": \"0.4\", \"confidence_by_character\": [0.9186568856239319, 0.924356997013092, 0.9140645265579224], \"confidence_by_field\": 0.9140645265579224, \"original_text\": \"0.4\"}, {\"location\": [[878, 488], [914, 488], [914, 513], [878, 513]], \"type\": \"textline\", \"text\": \"0.2\", \"confidence_by_character\": [0.9191107749938965, 0.9236589670181274, 0.9163731336593628], \"confidence_by_field\": 0.9163731336593628, \"original_text\": \"0.2\"}, {\"location\": [[969, 518], [1006, 518], [1006, 543], [969, 543]], \"type\": \"textline\", \"text\": \"0.4\", \"confidence_by_character\": [0.9177034497261047, 0.9257544279098511, 0.911573052406311], \"confidence_by_field\": 0.911573052406311, \"original_text\": \"0.4\"}, {\"location\": [[969, 488], [1006, 488], [1006, 513], [969, 513]], \"type\": \"textline\", \"text\": \"0.2\", \"confidence_by_character\": [0.9173431992530823, 0.9257070422172546, 0.9158886075019836], \"confidence_by_field\": 0.9158886075019836, \"original_text\": \"0.2\"}, {\"location\": [[1057, 488], [1094, 488], [1094, 513], [1057, 513]], \"type\": \"textline\", \"text\": \"0.2\", \"confidence_by_character\": [0.9179250001907349, 0.9226782917976379, 0.9163774251937866], \"confidence_by_field\": 0.9163774251937866, \"original_text\": \"0.2\"}, {\"location\": [[1059, 518], [1094, 518], [1094, 543], [1059, 543]], \"type\": \"textline\", \"text\": \"0.4\", \"confidence_by_character\": [0.9183651208877563, 0.9259626865386963, 0.9120912551879883], \"confidence_by_field\": 0.9120912551879883, \"original_text\": \"0.4\"}, {\"location\": [[1148, 488], [1185, 488], [1185, 513], [1148, 513]], \"type\": \"textline\", \"text\": \"0.2\", \"confidence_by_character\": [0.9188930988311768, 0.922532856464386, 0.91632080078125], \"confidence_by_field\": 0.91632080078125, \"original_text\": \"0.2\"}, {\"location\": [[1149, 518], [1185, 518], [1185, 543], [1149, 543]], \"type\": \"textline\", \"text\": \"0.4\", \"confidence_by_character\": [0.9181385040283203, 0.9250556230545044, 0.9124614000320435], \"confidence_by_field\": 0.9124614000320435, \"original_text\": \"0.4\"}, {\"location\": [[1238, 518], [1274, 518], [1274, 543], [1238, 543]], \"type\": \"textline\", \"text\": \"0.4\", \"confidence_by_character\": [0.9189369678497314, 0.924277663230896, 0.9133647680282593], \"confidence_by_field\": 0.9133647680282593, \"original_text\": \"0.4\"}, {\"location\": [[1238, 488], [1274, 488], [1274, 513], [1238, 513]], \"type\": \"textline\", \"text\": \"0.2\", \"confidence_by_character\": [0.9190401434898376, 0.9235551357269287, 0.916479766368866], \"confidence_by_field\": 0.916479766368866, \"original_text\": \"0.2\"}, {\"location\": [[383, 551], [423, 551], [423, 571], [383, 571]], \"type\": \"textline\", \"text\": \"\\u5408\\u8a08\", \"confidence_by_character\": [0.9172796607017517, 0.9246873259544373], \"confidence_by_field\": 0.9172796607017517, \"original_text\": \"\\u5408\\u8a08\"}, {\"location\": [[696, 550], [736, 550], [736, 574], [696, 574]], \"type\": \"textline\", \"text\": \"100\", \"confidence_by_character\": [0.9186745285987854, 0.9217865467071533, 0.9134313464164734], \"confidence_by_field\": 0.9134313464164734, \"original_text\": \"100\"}, {\"location\": [[786, 548], [826, 548], [826, 574], [786, 574]], \"type\": \"textline\", \"text\": \"100\", \"confidence_by_character\": [0.9187353849411011, 0.9226377010345459, 0.9132767915725708], \"confidence_by_field\": 0.9132767915725708, \"original_text\": \"100\"}, {\"location\": [[877, 548], [917, 548], [917, 574], [877, 574]], \"type\": \"textline\", \"text\": \"100\", \"confidence_by_character\": [0.918876051902771, 0.9223041534423828, 0.9158591628074646], \"confidence_by_field\": 0.9158591628074646, \"original_text\": \"100\"}, {\"location\": [[965, 548], [1007, 548], [1007, 574], [965, 574]], \"type\": \"textline\", \"text\": \"100\", \"confidence_by_character\": [0.9187213182449341, 0.9187850952148438, 0.9130398035049438], \"confidence_by_field\": 0.9130398035049438, \"original_text\": \"100\"}, {\"location\": [[1056, 548], [1096, 548], [1096, 574], [1056, 574]], \"type\": \"textline\", \"text\": \"100\", \"confidence_by_character\": [0.9191818237304688, 0.9226840734481812, 0.9121555089950562], \"confidence_by_field\": 0.9121555089950562, \"original_text\": \"100\"}, {\"location\": [[1146, 548], [1186, 548], [1186, 574], [1146, 574]], \"type\": \"textline\", \"text\": \"100\", \"confidence_by_character\": [0.9189720749855042, 0.9227191805839539, 0.9106909036636353], \"confidence_by_field\": 0.9106909036636353, \"original_text\": \"100\"}, {\"location\": [[1237, 550], [1277, 550], [1277, 574], [1237, 574]], \"type\": \"textline\", \"text\": \"100\", \"confidence_by_character\": [0.9188445806503296, 0.9203974008560181, 0.9133830666542053], \"confidence_by_field\": 0.9133830666542053, \"original_text\": \"100\"}, {\"location\": [[797, 763], [828, 763], [828, 793], [797, 793]], \"type\": \"textline\", \"text\": \"10\", \"confidence_by_character\": [0.9181389808654785, 0.9251900911331177], \"confidence_by_field\": 0.9181389808654785, \"original_text\": \"10\"}, {\"location\": [[862, 736], [938, 736], [938, 758], [862, 758]], \"type\": \"textline\", \"text\": \"\\u5b9f\\u65bd\\u4f8b3\", \"confidence_by_character\": [0.940096378326416, 0.9323399066925049, 0.7783450484275818, 0.8894999623298645], \"confidence_by_field\": 0.7783450484275818, \"original_text\": \"\\u5b9f\\u65bd\\u4f8b3\"}, {\"location\": [[886, 763], [919, 763], [919, 791], [886, 791]], \"type\": \"textline\", \"text\": \"10\", \"confidence_by_character\": [0.915480375289917, 0.92236328125], \"confidence_by_field\": 0.915480375289917, \"original_text\": \"10\"}, {\"location\": [[1225, 736], [1299, 736], [1299, 758], [1225, 758]], \"type\": \"textline\", \"text\": \"\\u6bd4\\u8f03\\u4f8b4\", \"confidence_by_character\": [0.9564768671989441, 0.9437181949615479, 0.8032115697860718, 0.9161063432693481], \"confidence_by_field\": 0.8032115697860718, \"original_text\": \"\\u6bd4\\u8f03\\u4f8b4\"}, {\"location\": [[243, 831], [358, 831], [358, 851], [243, 851]], \"type\": \"textline\", \"text\": \"\\u71b1\\u786c\\u5316\\u6027\\u6a39\\u8102\", \"confidence_by_character\": [0.9673244953155518, 0.9746560454368591, 0.9246954917907715, 0.9313607811927795, 0.9588099122047424, 0.9608355164527893], \"confidence_by_field\": 0.9246954917907715, \"original_text\": \"\\u71b1\\u786c\\u5316\\u6027\\u6a39\\u8102\"}, {\"location\": [[391, 864], [534, 864], [534, 883], [391, 883]], \"type\": \"textline\", \"text\": \"\\u30b7\\u30a2\\u30cd\\u30fc\\u30c8\\u6a39\\u81021\", \"confidence_by_character\": [0.9139906764030457, 0.9277936220169067, 0.9299982190132141, 0.9196512699127197, 0.9228595495223999, 0.9653444886207581, 0.9586682319641113, 0.9014871120452881], \"confidence_by_field\": 0.9014871120452881, \"original_text\": \"\\u30b7\\u30a2\\u30cd\\u30fc\\u30c8\\u6a39\\u81021\"}, {\"location\": [[391, 832], [515, 832], [515, 852], [391, 852]], \"type\": \"textline\", \"text\": \"\\u30a8\\u30dd\\u30ad\\u30b7\\u6a39\\u81023\", \"confidence_by_character\": [0.9210723042488098, 0.9120734930038452, 0.8719823360443115, 0.9054407477378845, 0.9484961628913879, 0.955137312412262, 0.9074426889419556], \"confidence_by_field\": 0.8719823360443115, \"original_text\": \"\\u30a8\\u30dd\\u30ad\\u30b7\\u6a39\\u81023\"}, {\"location\": [[391, 801], [515, 801], [515, 821], [391, 821]], \"type\": \"textline\", \"text\": \"\\u30a8\\u30dd\\u30ad\\u30b7\\u6a39\\u81022\", \"confidence_by_character\": [0.9195655584335327, 0.8951327800750732, 0.8749004602432251, 0.9026333689689636, 0.9528785347938538, 0.9567875266075134, 0.9132635593414307], \"confidence_by_field\": 0.8749004602432251, \"original_text\": \"\\u30a8\\u30dd\\u30ad\\u30b7\\u6a39\\u81022\"}, {\"location\": [[391, 769], [512, 769], [512, 789], [391, 789]], \"type\": \"textline\", \"text\": \"\\u30a8\\u30dd\\u30ad\\u30b7\\u6a39\\u81021\", \"confidence_by_character\": [0.9198406338691711, 0.9114395380020142, 0.8742417097091675, 0.9063976407051086, 0.946216344833374, 0.9661325216293335, 0.8822324275970459], \"confidence_by_field\": 0.8742417097091675, \"original_text\": \"\\u30a8\\u30dd\\u30ad\\u30b7\\u6a39\\u81021\"}, {\"location\": [[605, 734], [649, 734], [649, 760], [605, 760]], \"type\": \"textline\", \"text\": \"\\u5358\\u4f4d\", \"confidence_by_character\": [0.9275341629981995, 0.9182473421096802], \"confidence_by_field\": 0.9182473421096802, \"original_text\": \"\\u5358\\u4f4d\"}, {\"location\": [[681, 736], [759, 736], [759, 760], [681, 760]], \"type\": \"textline\", \"text\": \"\\u5b9f\\u65bd\\u4f8b1\", \"confidence_by_character\": [0.9462378621101379, 0.9295192360877991, 0.7775577306747437, 0.7519926428794861], \"confidence_by_field\": 0.7519926428794861, \"original_text\": \"\\u5b9f\\u65bd\\u4f8b1\"}, {\"location\": [[772, 736], [847, 736], [847, 760], [772, 760]], \"type\": \"textline\", \"text\": \"\\u5b9f\\u65bd\\u4f8b2\", \"confidence_by_character\": [0.945273756980896, 0.9234545826911926, 0.7456976771354675, 0.8920645117759705], \"confidence_by_field\": 0.7456976771354675, \"original_text\": \"\\u5b9f\\u65bd\\u4f8b2\"}, {\"location\": [[956, 736], [1027, 736], [1027, 758], [956, 758]], \"type\": \"textline\", \"text\": \"\\u6bd4\\u8f03\\u4f8b1\", \"confidence_by_character\": [0.9605406522750854, 0.9484114050865173, 0.8022181391716003, 0.9111186861991882], \"confidence_by_field\": 0.8022181391716003, \"original_text\": \"\\u6bd4\\u8f03\\u4f8b1\"}, {\"location\": [[1046, 736], [1119, 736], [1119, 758], [1046, 758]], \"type\": \"textline\", \"text\": \"\\u6bd4\\u8f03\\u4f8b2\", \"confidence_by_character\": [0.939124345779419, 0.9446831941604614, 0.7813613414764404, 0.9183083176612854], \"confidence_by_field\": 0.7813613414764404, \"original_text\": \"\\u6bd4\\u8f03\\u4f8b2\"}, {\"location\": [[1136, 736], [1209, 736], [1209, 758], [1136, 758]], \"type\": \"textline\", \"text\": \"\\u6bd4\\u8f03\\u4f8b3\", \"confidence_by_character\": [0.9410742521286011, 0.9363664984703064, 0.8050448894500732, 0.912356972694397], \"confidence_by_field\": 0.8050448894500732, \"original_text\": \"\\u6bd4\\u8f03\\u4f8b3\"}, {\"location\": [[710, 799], [731, 799], [731, 823], [710, 823]], \"type\": \"textline\", \"text\": \"5\", \"confidence_by_character\": [0.9124702215194702], \"confidence_by_field\": 0.9124702215194702, \"original_text\": \"5\"}, {\"location\": [[1161, 798], [1183, 798], [1183, 823], [1161, 823]], \"type\": \"textline\", \"text\": \"5\", \"confidence_by_character\": [0.9127680659294128], \"confidence_by_field\": 0.9127680659294128, \"original_text\": \"5\"}, {\"location\": [[612, 874], [641, 874], [641, 901], [612, 901]], \"type\": \"textline\", \"text\": \"%\", \"confidence_by_character\": [0.9244439005851746], \"confidence_by_field\": 0.9244439005851746, \"original_text\": \"%\"}, {\"location\": [[705, 861], [738, 861], [738, 887], [705, 887]], \"type\": \"textline\", \"text\": \"10\", \"confidence_by_character\": [0.9159777164459229, 0.9210568070411682], \"confidence_by_field\": 0.9159777164459229, \"original_text\": \"10\"}, {\"location\": [[710, 829], [731, 829], [731, 853], [710, 853]], \"type\": \"textline\", \"text\": \"5\", \"confidence_by_character\": [0.9128246903419495], \"confidence_by_field\": 0.9128246903419495, \"original_text\": \"5\"}, {\"location\": [[710, 895], [731, 895], [731, 917], [710, 917]], \"type\": \"textline\", \"text\": \"5\", \"confidence_by_character\": [0.913021445274353], \"confidence_by_field\": 0.913021445274353, \"original_text\": \"5\"}, {\"location\": [[797, 863], [825, 863], [825, 885], [797, 885]], \"type\": \"textline\", \"text\": \"10\", \"confidence_by_character\": [0.9169679284095764, 0.9230868220329285], \"confidence_by_field\": 0.9169679284095764, \"original_text\": \"10\"}, {\"location\": [[804, 834], [820, 834], [820, 852], [804, 852]], \"type\": \"textline\", \"text\": \"-\", \"confidence_by_character\": [0.9069211483001709], \"confidence_by_field\": 0.9069211483001709, \"original_text\": \"-\"}, {\"location\": [[804, 804], [820, 804], [820, 822], [804, 822]], \"type\": \"textline\", \"text\": \"-\", \"confidence_by_character\": [0.9056602120399475], \"confidence_by_field\": 0.9056602120399475, \"original_text\": \"-\"}, {\"location\": [[894, 834], [909, 834], [909, 850], [894, 850]], \"type\": \"textline\", \"text\": \"-\", \"confidence_by_character\": [0.9042128920555115], \"confidence_by_field\": 0.9042128920555115, \"original_text\": \"-\"}, {\"location\": [[894, 806], [909, 806], [909, 822], [894, 822]], \"type\": \"textline\", \"text\": \"-\", \"confidence_by_character\": [0.9011750817298889], \"confidence_by_field\": 0.9011750817298889, \"original_text\": \"-\"}, {\"location\": [[1251, 798], [1272, 798], [1272, 823], [1251, 823]], \"type\": \"textline\", \"text\": \"5\", \"confidence_by_character\": [0.9124239683151245], \"confidence_by_field\": 0.9124239683151245, \"original_text\": \"5\"}, {\"location\": [[801, 895], [822, 895], [822, 917], [801, 917]], \"type\": \"textline\", \"text\": \"5\", \"confidence_by_character\": [0.9138578176498413], \"confidence_by_field\": 0.9138578176498413, \"original_text\": \"5\"}, {\"location\": [[888, 863], [915, 863], [915, 885], [888, 885]], \"type\": \"textline\", \"text\": \"10\", \"confidence_by_character\": [0.916037380695343, 0.9229444265365601], \"confidence_by_field\": 0.916037380695343, \"original_text\": \"10\"}, {\"location\": [[1161, 829], [1183, 829], [1183, 853], [1161, 853]], \"type\": \"textline\", \"text\": \"5\", \"confidence_by_character\": [0.913336455821991], \"confidence_by_field\": 0.913336455821991, \"original_text\": \"5\"}, {\"location\": [[891, 893], [912, 893], [912, 917], [891, 917]], \"type\": \"textline\", \"text\": \"5\", \"confidence_by_character\": [0.9128530025482178], \"confidence_by_field\": 0.9128530025482178, \"original_text\": \"5\"}, {\"location\": [[1156, 860], [1190, 860], [1190, 887], [1156, 887]], \"type\": \"textline\", \"text\": \"10\", \"confidence_by_character\": [0.9161958694458008, 0.922942042350769], \"confidence_by_field\": 0.9161958694458008, \"original_text\": \"10\"}, {\"location\": [[1157, 889], [1185, 889], [1185, 920], [1157, 920]], \"type\": \"textline\", \"text\": \"5\", \"confidence_by_character\": [0.9128023386001587], \"confidence_by_field\": 0.9128023386001587, \"original_text\": \"5\"}, {\"location\": [[1246, 860], [1278, 860], [1278, 887], [1246, 887]], \"type\": \"textline\", \"text\": \"10\", \"confidence_by_character\": [0.9154711961746216, 0.9222509860992432], \"confidence_by_field\": 0.9154711961746216, \"original_text\": \"10\"}, {\"location\": [[1249, 829], [1272, 829], [1272, 853], [1249, 853]], \"type\": \"textline\", \"text\": \"5\", \"confidence_by_character\": [0.9134175777435303], \"confidence_by_field\": 0.9134175777435303, \"original_text\": \"5\"}, {\"location\": [[1251, 893], [1272, 893], [1272, 918], [1251, 918]], \"type\": \"textline\", \"text\": \"5\", \"confidence_by_character\": [0.912886381149292], \"confidence_by_field\": 0.912886381149292, \"original_text\": \"5\"}, {\"location\": [[242, 958], [338, 958], [338, 978], [242, 978]], \"type\": \"textline\", \"text\": \"\\u7121\\u6a5f\\u5145\\u586b\\u6750\", \"confidence_by_character\": [0.9516746997833252, 0.9333645105361938, 0.9753089547157288, 0.9772308468818665, 0.961030125617981], \"confidence_by_field\": 0.9333645105361938, \"original_text\": \"\\u7121\\u6a5f\\u5145\\u586b\\u6750\"}, {\"location\": [[242, 927], [320, 927], [320, 946], [242, 946]], \"type\": \"textline\", \"text\": \"\\u786c\\u5316\\u89e6\\u5a92\", \"confidence_by_character\": [0.9817054271697998, 0.9334205985069275, 0.9472013115882874, 0.9651457667350769], \"confidence_by_field\": 0.9334205985069275, \"original_text\": \"\\u786c\\u5316\\u89e6\\u5a92\"}, {\"location\": [[392, 895], [571, 895], [571, 915], [392, 915]], \"type\": \"textline\", \"text\": \"\\u30d5\\u30a7\\u30ce\\u30fc\\u30eb\\u7cfb\\u786c\\u5316\\u52641\", \"confidence_by_character\": [0.8958667516708374, 0.8963339328765869, 0.918157160282135, 0.9283046126365662, 0.9119880199432373, 0.9494227766990662, 0.9859360456466675, 0.9373766779899597, 0.948577344417572, 0.9160327911376953], \"confidence_by_field\": 0.8958667516708374, \"original_text\": \"\\u30d5\\u30a7\\u30ce\\u30fc\\u30eb\\u7cfb\\u786c\\u5316\\u52641\"}, {\"location\": [[390, 958], [496, 958], [496, 978], [390, 978]], \"type\": \"textline\", \"text\": \"\\u7121\\u6a5f\\u5145\\u586b\\u67501\", \"confidence_by_character\": [0.955777108669281, 0.9075387120246887, 0.9792174100875854, 0.9780659079551697, 0.9546536803245544, 0.9078008532524109], \"confidence_by_field\": 0.9075387120246887, \"original_text\": \"\\u7121\\u6a5f\\u5145\\u586b\\u67501\"}, {\"location\": [[390, 927], [477, 927], [477, 946], [390, 946]], \"type\": \"textline\", \"text\": \"\\u786c\\u5316\\u89e6\\u5a921\", \"confidence_by_character\": [0.9808465838432312, 0.9380431771278381, 0.9594202041625977, 0.9618983864784241, 0.9127021431922913], \"confidence_by_field\": 0.9127021431922913, \"original_text\": \"\\u786c\\u5316\\u89e6\\u5a921\"}, {\"location\": [[705, 926], [739, 926], [739, 949], [705, 949]], \"type\": \"textline\", \"text\": \"0.2\", \"confidence_by_character\": [0.9183443188667297, 0.9227449893951416, 0.9166637063026428], \"confidence_by_field\": 0.9166637063026428, \"original_text\": \"0.2\"}, {\"location\": [[959, 939], [1025, 939], [1025, 963], [959, 963]], \"type\": \"textline\", \"text\": \"(\\u30bb\\u30e9\\u30df\\u30c3\\u30af)\", \"confidence_by_character\": [0.916853666305542, 0.9260740876197815, 0.9092920422554016, 0.9257704615592957, 0.9260854721069336, 0.9179700613021851, 0.9244228005409241], \"confidence_by_field\": 0.9092920422554016, \"original_text\": \"(\\u30bb\\u30e9\\u30df\\u30c3\\u30af)\"}, {\"location\": [[1049, 939], [1115, 939], [1115, 963], [1049, 963]], \"type\": \"textline\", \"text\": \"(\\u30bb\\u30e9\\u30df\\u30c3\\u30af)\", \"confidence_by_character\": [0.9169306755065918, 0.9260130524635315, 0.9072588682174683, 0.9243903160095215, 0.925151526927948, 0.9165183901786804, 0.9235830307006836], \"confidence_by_field\": 0.9072588682174683, \"original_text\": \"(\\u30bb\\u30e9\\u30df\\u30c3\\u30af)\"}, {\"location\": [[273, 1052], [489, 1052], [489, 1071], [273, 1071]], \"type\": \"textline\", \"text\": \"L125D-L125U\", \"confidence_by_character\": [0.9245724678039551, 0.9132282733917236, 0.906276524066925, 0.9224117398262024, 0.9258933067321777, 0.9184830784797668, 0.9157562255859375, 0.9087009429931641, 0.912157416343689, 0.9245545268058777, 0.8954219222068787], \"confidence_by_field\": 0.8954219222068787, \"original_text\": \"L125D-L125U\"}, {\"location\": [[277, 1020], [458, 1020], [458, 1039], [277, 1039]], \"type\": \"textline\", \"text\": \"IL1TD-L1TU\", \"confidence_by_character\": [0.8654385209083557, 0.9101585149765015, 0.9015868306159973, 0.8901948928833008, 0.9379502534866333, 0.9125338792800903, 0.9176797866821289, 0.8999291658401489, 0.9107672572135925, 0.9229012131690979], \"confidence_by_field\": 0.8654385209083557, \"original_text\": \"IL1TD-L1TU\"}, {\"location\": [[386, 987], [431, 987], [431, 1011], [386, 1011]], \"type\": \"textline\", \"text\": \"\\u5408\\u8a08\", \"confidence_by_character\": [0.9146950840950012, 0.9339513182640076], \"confidence_by_field\": 0.9146950840950012, \"original_text\": \"\\u5408\\u8a08\"}, {\"location\": [[701, 958], [743, 958], [743, 977], [701, 977]], \"type\": \"textline\", \"text\": \"74.8\", \"confidence_by_character\": [0.9160793423652649, 0.9184203147888184, 0.9271222949028015, 0.9212100505828857], \"confidence_by_field\": 0.9160793423652649, \"original_text\": \"74.8\"}, {\"location\": [[792, 958], [831, 958], [831, 977], [792, 977]], \"type\": \"textline\", \"text\": \"74.8\", \"confidence_by_character\": [0.9163533449172974, 0.9201879501342773, 0.9259200692176819, 0.9224289059638977], \"confidence_by_field\": 0.9163533449172974, \"original_text\": \"74.8\"}, {\"location\": [[796, 926], [826, 926], [826, 945], [796, 945]], \"type\": \"textline\", \"text\": \"0.2\", \"confidence_by_character\": [0.9181907176971436, 0.9227388501167297, 0.9164943099021912], \"confidence_by_field\": 0.9164943099021912, \"original_text\": \"0.2\"}, {\"location\": [[886, 926], [917, 926], [917, 945], [886, 945]], \"type\": \"textline\", \"text\": \"0.2\", \"confidence_by_character\": [0.917766809463501, 0.9248180985450745, 0.916694164276123], \"confidence_by_field\": 0.916694164276123, \"original_text\": \"0.2\"}, {\"location\": [[1153, 923], [1190, 923], [1190, 949], [1153, 949]], \"type\": \"textline\", \"text\": \"0.2\", \"confidence_by_character\": [0.9172568917274475, 0.923073947429657, 0.9173568487167358], \"confidence_by_field\": 0.9172568917274475, \"original_text\": \"0.2\"}, {\"location\": [[1243, 923], [1280, 923], [1280, 949], [1243, 949]], \"type\": \"textline\", \"text\": \"0.2\", \"confidence_by_character\": [0.918009340763092, 0.9230136871337891, 0.9172936081886292], \"confidence_by_field\": 0.9172936081886292, \"original_text\": \"0.2\"}, {\"location\": [[603, 1021], [649, 1021], [649, 1044], [603, 1044]], \"type\": \"textline\", \"text\": \"ppm\", \"confidence_by_character\": [0.93487948179245, 0.9136003255844116, 0.9295913577079773], \"confidence_by_field\": 0.9136003255844116, \"original_text\": \"ppm\"}, {\"location\": [[702, 1019], [741, 1019], [741, 1042], [702, 1042]], \"type\": \"textline\", \"text\": \"510\", \"confidence_by_character\": [0.9160666465759277, 0.9195601344108582, 0.9182965755462646], \"confidence_by_field\": 0.9160666465759277, \"original_text\": \"510\"}, {\"location\": [[704, 990], [739, 990], [739, 1009], [704, 1009]], \"type\": \"textline\", \"text\": \"100\", \"confidence_by_character\": [0.9155290126800537, 0.9208177924156189, 0.9127534031867981], \"confidence_by_field\": 0.9127534031867981, \"original_text\": \"100\"}, {\"location\": [[794, 990], [830, 990], [830, 1009], [794, 1009]], \"type\": \"textline\", \"text\": \"100\", \"confidence_by_character\": [0.9162766337394714, 0.9207051396369934, 0.9104440212249756], \"confidence_by_field\": 0.9104440212249756, \"original_text\": \"100\"}, {\"location\": [[881, 958], [922, 958], [922, 977], [881, 977]], \"type\": \"textline\", \"text\": \"74.8\", \"confidence_by_character\": [0.9179209470748901, 0.919108510017395, 0.9271717667579651, 0.9218475222587585], \"confidence_by_field\": 0.9179209470748901, \"original_text\": \"74.8\"}, {\"location\": [[1149, 957], [1195, 957], [1195, 980], [1149, 980]], \"type\": \"textline\", \"text\": \"74.8\", \"confidence_by_character\": [0.91691654920578, 0.9166283011436462, 0.9277328848838806, 0.9221088886260986], \"confidence_by_field\": 0.9166283011436462, \"original_text\": \"74.8\"}, {\"location\": [[881, 1019], [920, 1019], [920, 1042], [881, 1042]], \"type\": \"textline\", \"text\": \"470\", \"confidence_by_character\": [0.9185625314712524, 0.9136890769004822, 0.9183825254440308], \"confidence_by_field\": 0.9136890769004822, \"original_text\": \"470\"}, {\"location\": [[883, 990], [920, 990], [920, 1009], [883, 1009]], \"type\": \"textline\", \"text\": \"100\", \"confidence_by_character\": [0.9160255789756775, 0.9194254279136658, 0.9109370708465576], \"confidence_by_field\": 0.9109370708465576, \"original_text\": \"100\"}, {\"location\": [[1151, 987], [1193, 987], [1193, 1012], [1151, 1012]], \"type\": \"textline\", \"text\": \"100\", \"confidence_by_character\": [0.9162431955337524, 0.9209614992141724, 0.9097722768783569], \"confidence_by_field\": 0.9097722768783569, \"original_text\": \"100\"}, {\"location\": [[1240, 957], [1285, 957], [1285, 980], [1240, 980]], \"type\": \"textline\", \"text\": \"74.8\", \"confidence_by_character\": [0.9182584881782532, 0.92033451795578, 0.9307538270950317, 0.9210019111633301], \"confidence_by_field\": 0.9182584881782532, \"original_text\": \"74.8\"}, {\"location\": [[1241, 1019], [1282, 1019], [1282, 1042], [1241, 1042]], \"type\": \"textline\", \"text\": \"510\", \"confidence_by_character\": [0.914992094039917, 0.9192731976509094, 0.9183305501937866], \"confidence_by_field\": 0.914992094039917, \"original_text\": \"510\"}, {\"location\": [[1241, 988], [1282, 988], [1282, 1012], [1241, 1012]], \"type\": \"textline\", \"text\": \"100\", \"confidence_by_character\": [0.9163695573806763, 0.9216129779815674, 0.9101614952087402], \"confidence_by_field\": 0.9101614952087402, \"original_text\": \"100\"}, {\"location\": [[278, 1082], [413, 1082], [413, 1103], [278, 1103]], \"type\": \"textline\", \"text\": \"\\u30ac\\u30e9\\u30b9\\u8ee2\\u79fb\\u6e29\\u5ea6\", \"confidence_by_character\": [0.9343377947807312, 0.920525074005127, 0.9253968000411987, 0.9179226756095886, 0.9002148509025574, 0.9482362270355225, 0.9286996722221375], \"confidence_by_field\": 0.9002148509025574, \"original_text\": \"\\u30ac\\u30e9\\u30b9\\u8ee2\\u79fb\\u6e29\\u5ea6\"}, {\"location\": [[420, 1082], [458, 1082], [458, 1104], [420, 1104]], \"type\": \"textline\", \"text\": \"(Tg)\", \"confidence_by_character\": [0.9211322665214539, 0.8978011012077332, 0.9115868210792542, 0.9274181723594666], \"confidence_by_field\": 0.8978011012077332, \"original_text\": \"(Tg)\"}, {\"location\": [[605, 1051], [650, 1051], [650, 1075], [605, 1075]], \"type\": \"textline\", \"text\": \"ppm\", \"confidence_by_character\": [0.9387204647064209, 0.9056726098060608, 0.9325636029243469], \"confidence_by_field\": 0.9056726098060608, \"original_text\": \"ppm\"}, {\"location\": [[617, 1082], [641, 1082], [641, 1104], [617, 1104]], \"type\": \"textline\", \"text\": \"\\u00b0C\", \"confidence_by_character\": [0.9452391266822815, 0.9272077679634094], \"confidence_by_field\": 0.9272077679634094, \"original_text\": \"\\u00b0C\"}, {\"location\": [[704, 1052], [739, 1052], [739, 1071], [704, 1071]], \"type\": \"textline\", \"text\": \"390\", \"confidence_by_character\": [0.9161151647567749, 0.9135255217552185, 0.9237526655197144], \"confidence_by_field\": 0.9135255217552185, \"original_text\": \"390\"}, {\"location\": [[793, 1084], [828, 1084], [828, 1103], [793, 1103]], \"type\": \"textline\", \"text\": \"224\", \"confidence_by_character\": [0.9199969172477722, 0.9111586213111877, 0.9230489730834961], \"confidence_by_field\": 0.9111586213111877, \"original_text\": \"224\"}, {\"location\": [[793, 1052], [830, 1052], [830, 1071], [793, 1071]], \"type\": \"textline\", \"text\": \"330\", \"confidence_by_character\": [0.9186529517173767, 0.913848340511322, 0.9239091277122498], \"confidence_by_field\": 0.913848340511322, \"original_text\": \"330\"}, {\"location\": [[793, 1019], [831, 1019], [831, 1042], [793, 1042]], \"type\": \"textline\", \"text\": \"470\", \"confidence_by_character\": [0.9227962493896484, 0.9154515862464905, 0.9186294078826904], \"confidence_by_field\": 0.9154515862464905, \"original_text\": \"470\"}, {\"location\": [[1151, 1019], [1191, 1019], [1191, 1044], [1151, 1044]], \"type\": \"textline\", \"text\": \"510\", \"confidence_by_character\": [0.919104814529419, 0.9196509122848511, 0.9189945459365845], \"confidence_by_field\": 0.9189945459365845, \"original_text\": \"510\"}, {\"location\": [[881, 1081], [920, 1081], [920, 1104], [881, 1104]], \"type\": \"textline\", \"text\": \"224\", \"confidence_by_character\": [0.9203312993049622, 0.9107922315597534, 0.9243770241737366], \"confidence_by_field\": 0.9107922315597534, \"original_text\": \"224\"}, {\"location\": [[881, 1050], [920, 1050], [920, 1074], [881, 1074]], \"type\": \"textline\", \"text\": \"330\", \"confidence_by_character\": [0.9171018600463867, 0.9141280055046082, 0.9222427010536194], \"confidence_by_field\": 0.9141280055046082, \"original_text\": \"330\"}, {\"location\": [[1151, 1084], [1191, 1084], [1191, 1108], [1151, 1108]], \"type\": \"textline\", \"text\": \"235\", \"confidence_by_character\": [0.9159690141677856, 0.9248628616333008, 0.9205185174942017], \"confidence_by_field\": 0.9159690141677856, \"original_text\": \"235\"}, {\"location\": [[1151, 1050], [1191, 1050], [1191, 1074], [1151, 1074]], \"type\": \"textline\", \"text\": \"390\", \"confidence_by_character\": [0.9146880507469177, 0.911052405834198, 0.9227294325828552], \"confidence_by_field\": 0.911052405834198, \"original_text\": \"390\"}, {\"location\": [[1241, 1084], [1280, 1084], [1280, 1108], [1241, 1108]], \"type\": \"textline\", \"text\": \"235\", \"confidence_by_character\": [0.9159569144248962, 0.9244524240493774, 0.920172393321991], \"confidence_by_field\": 0.9159569144248962, \"original_text\": \"235\"}, {\"location\": [[1241, 1050], [1282, 1050], [1282, 1074], [1241, 1074]], \"type\": \"textline\", \"text\": \"390\", \"confidence_by_character\": [0.9140211939811707, 0.9115845561027527, 0.9224275946617126], \"confidence_by_field\": 0.9115845561027527, \"original_text\": \"390\"}, {\"location\": [[184, 338], [205, 338], [205, 508], [184, 508]], \"type\": \"textline\", \"text\": \"\\u71b1\\u786c\\u5316\\u6027\\u6a39\\u8102\\u7d44\\u6210\\u7269\", \"confidence_by_character\": [0.9959474205970764, 0.9997753500938416, 0.9999818801879883, 0.9999709129333496, 0.9999679327011108, 0.9999550580978394, 0.9999865293502808, 0.9992927312850952, 0.9999799728393555], \"confidence_by_field\": 0.9959474205970764, \"original_text\": \"\\u71b1\\u786c\\u5316\\u6027\\u6a39\\u8102\\u7d44\\u6210\\u7269\"}, {\"location\": [[189, 804], [210, 804], [210, 976], [189, 976]], \"type\": \"textline\", \"text\": \"\\u71b1\\u786c\\u5316\\u6027\\u6a39\\u8102\\u7d44\\u6210\\u7269\", \"confidence_by_character\": [0.9931052327156067, 0.9999232292175293, 0.9999855756759644, 0.9999867677688599, 0.9999822378158569, 0.9999606609344482, 0.9999895095825195, 0.9993935823440552, 0.9371008276939392], \"confidence_by_field\": 0.9371008276939392, \"original_text\": \"\\u71b1\\u786c\\u5316\\u6027\\u6a39\\u8102\\u7d44\\u6210\\u7269\"}], \"table\": [{\"location\": [[1218, 1080], [1303, 1080], [1303, 1107], [1218, 1107]], \"bbox\": [1218, 1080, 1303, 1107], \"points\": [[1277, 1081], [1278, 1080], [1279, 1081], [1280, 1080], [1281, 1081], [1282, 1080], [1284, 1081], [1285, 1080], [1286, 1081], [1287, 1080], [1288, 1081], [1289, 1080], [1291, 1081], [1302, 1081], [1303, 1082], [1303, 1106], [1302, 1107], [1219, 1107], [1218, 1106], [1218, 1082], [1219, 1081]], \"type\": \"cell\", \"rows\": [11, 11], \"columns\": [10, 10], \"text_list\": []}, {\"location\": [[1128, 1080], [1213, 1080], [1213, 1107], [1128, 1107]], \"bbox\": [1128, 1080, 1213, 1107], \"points\": [[1129, 1081], [1130, 1080], [1132, 1081], [1212, 1081], [1213, 1082], [1213, 1106], [1212, 1107], [1129, 1107], [1128, 1106], [1129, 1105]], \"type\": \"cell\", \"rows\": [11, 11], \"columns\": [9, 9], \"text_list\": []}, {\"location\": [[858, 1080], [943, 1080], [943, 1107], [858, 1107]], \"bbox\": [858, 1080, 943, 1107], \"points\": [[877, 1081], [878, 1080], [879, 1081], [880, 1080], [881, 1081], [882, 1080], [884, 1081], [885, 1080], [887, 1080], [888, 1081], [889, 1080], [934, 1080], [935, 1081], [942, 1081], [943, 1082], [943, 1106], [942, 1107], [859, 1107], [858, 1106], [858, 1082], [859, 1081]], \"type\": \"cell\", \"rows\": [11, 11], \"columns\": [6, 6], \"text_list\": []}, {\"location\": [[769, 1080], [853, 1080], [853, 1107], [769, 1107]], \"bbox\": [769, 1080, 853, 1107], \"points\": [[792, 1081], [794, 1080], [795, 1081], [806, 1081], [808, 1080], [809, 1081], [811, 1081], [812, 1080], [815, 1080], [816, 1081], [817, 1080], [838, 1080], [839, 1081], [840, 1080], [842, 1081], [852, 1081], [853, 1082], [853, 1106], [852, 1107], [770, 1107], [769, 1106], [769, 1082], [770, 1081]], \"type\": \"cell\", \"rows\": [11, 11], \"columns\": [5, 5], \"text_list\": []}, {\"location\": [[678, 1080], [763, 1080], [763, 1107], [678, 1107]], \"bbox\": [678, 1080, 763, 1107], \"points\": [[704, 1081], [705, 1080], [706, 1081], [707, 1080], [708, 1081], [709, 1080], [711, 1081], [712, 1080], [713, 1081], [714, 1080], [715, 1081], [716, 1080], [718, 1081], [719, 1080], [720, 1081], [721, 1080], [722, 1081], [723, 1080], [725, 1081], [726, 1080], [727, 1081], [762, 1081], [763, 1082], [763, 1106], [762, 1107], [679, 1107], [678, 1106], [678, 1082], [679, 1081]], \"type\": \"cell\", \"rows\": [11, 11], \"columns\": [4, 4], \"text_list\": []}, {\"location\": [[577, 1080], [674, 1080], [674, 1107], [577, 1107]], \"bbox\": [577, 1080, 674, 1107], \"points\": [[622, 1081], [623, 1080], [625, 1080], [626, 1081], [628, 1081], [629, 1080], [635, 1080], [636, 1081], [672, 1081], [673, 1082], [673, 1104], [674, 1105], [674, 1106], [673, 1107], [578, 1107], [577, 1106], [577, 1082], [578, 1081]], \"type\": \"cell\", \"rows\": [11, 11], \"columns\": [3, 3], \"text_list\": []}, {\"location\": [[171, 1080], [574, 1080], [574, 1107], [171, 1107]], \"bbox\": [171, 1080, 574, 1107], \"points\": [[171, 1081], [173, 1080], [174, 1081], [198, 1081], [199, 1080], [201, 1081], [202, 1080], [221, 1080], [222, 1081], [223, 1080], [224, 1081], [225, 1080], [226, 1081], [285, 1081], [286, 1080], [288, 1080], [290, 1081], [292, 1081], [293, 1080], [297, 1080], [298, 1081], [299, 1081], [300, 1080], [312, 1080], [313, 1081], [318, 1081], [319, 1080], [320, 1081], [321, 1080], [370, 1080], [371, 1081], [374, 1081], [375, 1080], [390, 1080], [391, 1081], [394, 1081], [395, 1080], [408, 1080], [409, 1081], [410, 1080], [411, 1081], [418, 1081], [419, 1080], [421, 1081], [422, 1080], [423, 1081], [425, 1081], [426, 1080], [443, 1080], [444, 1081], [451, 1081], [452, 1080], [456, 1080], [457, 1081], [573, 1081], [574, 1082], [574, 1106], [573, 1107], [173, 1107], [171, 1106]], \"type\": \"cell\", \"rows\": [11, 11], \"columns\": [0, 2], \"text_list\": []}, {\"location\": [[1218, 1050], [1303, 1050], [1303, 1075], [1218, 1075]], \"bbox\": [1218, 1050, 1303, 1075], \"points\": [[1218, 1051], [1219, 1050], [1302, 1050], [1303, 1051], [1303, 1074], [1302, 1075], [1219, 1075], [1218, 1074]], \"type\": \"cell\", \"rows\": [10, 10], \"columns\": [10, 10], \"text_list\": []}, {\"location\": [[1129, 1050], [1213, 1050], [1213, 1075], [1129, 1075]], \"bbox\": [1129, 1050, 1213, 1075], \"points\": [[1129, 1051], [1130, 1050], [1212, 1050], [1213, 1051], [1213, 1074], [1212, 1075], [1130, 1075], [1129, 1074]], \"type\": \"cell\", \"rows\": [10, 10], \"columns\": [9, 9], \"text_list\": []}, {\"location\": [[858, 1050], [943, 1050], [943, 1075], [858, 1075]], \"bbox\": [858, 1050, 943, 1075], \"points\": [[858, 1051], [859, 1050], [942, 1050], [943, 1051], [943, 1074], [942, 1075], [859, 1075], [858, 1074]], \"type\": \"cell\", \"rows\": [10, 10], \"columns\": [6, 6], \"text_list\": []}, {\"location\": [[769, 1050], [853, 1050], [853, 1075], [769, 1075]], \"bbox\": [769, 1050, 853, 1075], \"points\": [[769, 1051], [770, 1050], [852, 1050], [853, 1051], [853, 1074], [852, 1075], [770, 1075], [769, 1074]], \"type\": \"cell\", \"rows\": [10, 10], \"columns\": [5, 5], \"text_list\": []}, {\"location\": [[678, 1050], [763, 1050], [763, 1075], [678, 1075]], \"bbox\": [678, 1050, 763, 1075], \"points\": [[678, 1051], [679, 1050], [762, 1050], [763, 1051], [763, 1074], [762, 1075], [679, 1075], [678, 1074]], \"type\": \"cell\", \"rows\": [10, 10], \"columns\": [4, 4], \"text_list\": []}, {\"location\": [[577, 1050], [673, 1050], [673, 1075], [577, 1075]], \"bbox\": [577, 1050, 673, 1075], \"points\": [[577, 1051], [578, 1050], [672, 1050], [673, 1051], [673, 1074], [672, 1075], [580, 1075], [578, 1074], [578, 1061], [577, 1060], [577, 1054], [578, 1053], [578, 1052]], \"type\": \"cell\", \"rows\": [10, 10], \"columns\": [3, 3], \"text_list\": []}, {\"location\": [[171, 1050], [574, 1050], [574, 1075], [171, 1075]], \"bbox\": [171, 1050, 574, 1075], \"points\": [[171, 1051], [173, 1050], [573, 1050], [574, 1051], [574, 1074], [573, 1075], [173, 1075], [171, 1074]], \"type\": \"cell\", \"rows\": [10, 10], \"columns\": [0, 2], \"text_list\": []}, {\"location\": [[577, 1018], [673, 1018], [673, 1044], [577, 1044]], \"bbox\": [577, 1018, 673, 1044], \"points\": [[577, 1019], [578, 1018], [672, 1018], [673, 1019], [673, 1043], [672, 1044], [580, 1044], [578, 1043], [578, 1031], [577, 1030]], \"type\": \"cell\", \"rows\": [9, 9], \"columns\": [3, 3], \"text_list\": []}, {\"location\": [[1218, 1017], [1303, 1017], [1303, 1044], [1218, 1044]], \"bbox\": [1218, 1017, 1303, 1044], \"points\": [[1244, 1018], [1245, 1017], [1289, 1017], [1291, 1018], [1292, 1017], [1293, 1018], [1302, 1018], [1303, 1019], [1303, 1043], [1302, 1044], [1219, 1044], [1218, 1043], [1218, 1019], [1219, 1018]], \"type\": \"cell\", \"rows\": [9, 9], \"columns\": [10, 10], \"text_list\": []}, {\"location\": [[1129, 1017], [1213, 1017], [1213, 1044], [1129, 1044]], \"bbox\": [1129, 1017, 1213, 1044], \"points\": [[1153, 1018], [1154, 1017], [1155, 1018], [1156, 1018], [1157, 1017], [1196, 1017], [1197, 1018], [1198, 1017], [1199, 1018], [1212, 1018], [1213, 1019], [1213, 1043], [1212, 1044], [1130, 1044], [1129, 1043], [1129, 1019], [1130, 1018]], \"type\": \"cell\", \"rows\": [9, 9], \"columns\": [9, 9], \"text_list\": []}, {\"location\": [[858, 1017], [943, 1017], [943, 1044], [858, 1044]], \"bbox\": [858, 1017, 943, 1044], \"points\": [[884, 1018], [885, 1017], [929, 1017], [930, 1018], [942, 1018], [943, 1019], [943, 1043], [942, 1044], [859, 1044], [858, 1043], [858, 1019], [859, 1018]], \"type\": \"cell\", \"rows\": [9, 9], \"columns\": [6, 6], \"text_list\": []}, {\"location\": [[769, 1017], [853, 1017], [853, 1044], [769, 1044]], \"bbox\": [769, 1017, 853, 1044], \"points\": [[795, 1018], [796, 1017], [836, 1017], [837, 1018], [838, 1017], [839, 1018], [852, 1018], [853, 1019], [853, 1043], [852, 1044], [770, 1044], [769, 1043], [769, 1019], [770, 1018]], \"type\": \"cell\", \"rows\": [9, 9], \"columns\": [5, 5], \"text_list\": []}, {\"location\": [[678, 1017], [763, 1017], [763, 1044], [678, 1044]], \"bbox\": [678, 1017, 763, 1044], \"points\": [[704, 1018], [705, 1017], [706, 1018], [707, 1017], [747, 1017], [748, 1018], [749, 1017], [750, 1018], [762, 1018], [763, 1019], [763, 1043], [762, 1044], [679, 1044], [678, 1043], [678, 1019], [679, 1018]], \"type\": \"cell\", \"rows\": [9, 9], \"columns\": [4, 4], \"text_list\": []}, {\"location\": [[171, 1017], [574, 1017], [574, 1044], [171, 1044]], \"bbox\": [171, 1017, 574, 1044], \"points\": [[273, 1018], [274, 1017], [276, 1018], [277, 1017], [366, 1017], [367, 1018], [383, 1018], [384, 1017], [396, 1017], [397, 1018], [398, 1017], [399, 1018], [401, 1017], [402, 1018], [403, 1017], [464, 1017], [465, 1018], [467, 1018], [468, 1017], [471, 1017], [472, 1018], [573, 1018], [574, 1019], [574, 1043], [573, 1044], [173, 1044], [171, 1043], [171, 1019], [173, 1018]], \"type\": \"cell\", \"rows\": [9, 9], \"columns\": [0, 2], \"text_list\": []}, {\"location\": [[1218, 987], [1303, 987], [1303, 1013], [1218, 1013]], \"bbox\": [1218, 987, 1303, 1013], \"points\": [[1218, 988], [1219, 987], [1302, 987], [1303, 988], [1303, 1012], [1302, 1013], [1219, 1013], [1218, 1012]], \"type\": \"cell\", \"rows\": [8, 8], \"columns\": [10, 10], \"text_list\": []}, {\"location\": [[1129, 987], [1213, 987], [1213, 1013], [1129, 1013]], \"bbox\": [1129, 987, 1213, 1013], \"points\": [[1129, 988], [1130, 987], [1212, 987], [1213, 988], [1213, 1012], [1212, 1013], [1130, 1013], [1129, 1012]], \"type\": \"cell\", \"rows\": [8, 8], \"columns\": [9, 9], \"text_list\": []}, {\"location\": [[858, 987], [943, 987], [943, 1013], [858, 1013]], \"bbox\": [858, 987, 943, 1013], \"points\": [[858, 988], [859, 987], [942, 987], [943, 988], [943, 1012], [942, 1013], [859, 1013], [858, 1012]], \"type\": \"cell\", \"rows\": [8, 8], \"columns\": [6, 6], \"text_list\": []}, {\"location\": [[769, 987], [853, 987], [853, 1013], [769, 1013]], \"bbox\": [769, 987, 853, 1013], \"points\": [[769, 988], [770, 987], [852, 987], [853, 988], [853, 1012], [852, 1013], [770, 1013], [769, 1012]], \"type\": \"cell\", \"rows\": [8, 8], \"columns\": [5, 5], \"text_list\": []}, {\"location\": [[678, 987], [763, 987], [763, 1013], [678, 1013]], \"bbox\": [678, 987, 763, 1013], \"points\": [[678, 988], [679, 987], [762, 987], [763, 988], [763, 1012], [762, 1013], [679, 1013], [678, 1012]], \"type\": \"cell\", \"rows\": [8, 8], \"columns\": [4, 4], \"text_list\": []}, {\"location\": [[240, 987], [574, 987], [574, 1013], [240, 1013]], \"bbox\": [240, 987, 574, 1013], \"points\": [[240, 988], [242, 987], [573, 987], [574, 988], [574, 1012], [573, 1013], [242, 1013], [240, 1012]], \"type\": \"cell\", \"rows\": [8, 8], \"columns\": [1, 2], \"text_list\": []}, {\"location\": [[240, 955], [383, 955], [383, 983], [240, 983]], \"bbox\": [240, 955, 383, 983], \"points\": [[240, 956], [242, 955], [382, 955], [383, 956], [383, 982], [382, 983], [242, 983], [240, 982]], \"type\": \"cell\", \"rows\": [7, 7], \"columns\": [1, 1], \"text_list\": []}, {\"location\": [[1218, 954], [1303, 954], [1303, 983], [1218, 983]], \"bbox\": [1218, 954, 1303, 983], \"points\": [[1249, 955], [1250, 954], [1264, 954], [1265, 955], [1302, 955], [1303, 956], [1303, 982], [1302, 983], [1219, 983], [1218, 982], [1219, 981], [1218, 980], [1218, 956], [1219, 955]], \"type\": \"cell\", \"rows\": [7, 7], \"columns\": [10, 10], \"text_list\": []}, {\"location\": [[1129, 954], [1213, 954], [1213, 983], [1129, 983]], \"bbox\": [1129, 954, 1213, 983], \"points\": [[1164, 955], [1165, 954], [1167, 955], [1168, 954], [1169, 955], [1170, 954], [1171, 955], [1173, 954], [1174, 955], [1212, 955], [1213, 956], [1213, 982], [1212, 983], [1130, 983], [1129, 982], [1129, 956], [1130, 955]], \"type\": \"cell\", \"rows\": [7, 7], \"columns\": [9, 9], \"text_list\": []}, {\"location\": [[858, 954], [943, 954], [943, 983], [858, 983]], \"bbox\": [858, 954, 943, 983], \"points\": [[888, 955], [889, 954], [904, 954], [905, 955], [942, 955], [943, 956], [943, 982], [942, 983], [859, 983], [858, 982], [858, 956], [859, 955]], \"type\": \"cell\", \"rows\": [7, 7], \"columns\": [6, 6], \"text_list\": []}, {\"location\": [[769, 954], [853, 954], [853, 983], [769, 983]], \"bbox\": [769, 954, 853, 983], \"points\": [[806, 955], [808, 954], [809, 955], [810, 954], [811, 955], [852, 955], [853, 956], [853, 982], [852, 983], [770, 983], [769, 982], [769, 956], [770, 955]], \"type\": \"cell\", \"rows\": [7, 7], \"columns\": [5, 5], \"text_list\": []}, {\"location\": [[678, 954], [763, 954], [763, 983], [678, 983]], \"bbox\": [678, 954, 763, 983], \"points\": [[713, 955], [714, 954], [715, 955], [762, 955], [763, 956], [763, 982], [762, 983], [679, 983], [678, 982], [678, 956], [679, 955]], \"type\": \"cell\", \"rows\": [7, 7], \"columns\": [4, 4], \"text_list\": []}, {\"location\": [[388, 954], [574, 954], [574, 983], [388, 983]], \"bbox\": [388, 954, 574, 983], \"points\": [[388, 955], [389, 954], [391, 954], [392, 955], [573, 955], [574, 956], [574, 982], [573, 983], [389, 983], [388, 982]], \"type\": \"cell\", \"rows\": [7, 7], \"columns\": [2, 2], \"text_list\": []}, {\"location\": [[1218, 923], [1303, 923], [1303, 950], [1218, 950]], \"bbox\": [1218, 923, 1303, 950], \"points\": [[1244, 925], [1245, 923], [1287, 923], [1288, 925], [1289, 923], [1291, 925], [1292, 923], [1293, 925], [1302, 925], [1303, 926], [1303, 949], [1302, 950], [1219, 950], [1218, 949], [1219, 948], [1218, 947], [1218, 926], [1219, 925]], \"type\": \"cell\", \"rows\": [6, 6], \"columns\": [10, 10], \"text_list\": []}, {\"location\": [[1129, 923], [1213, 923], [1213, 950], [1129, 950]], \"bbox\": [1129, 923, 1213, 950], \"points\": [[1129, 925], [1130, 923], [1132, 925], [1141, 925], [1142, 923], [1143, 925], [1144, 923], [1146, 925], [1147, 923], [1148, 925], [1149, 923], [1150, 925], [1151, 923], [1187, 923], [1188, 925], [1189, 923], [1190, 925], [1191, 923], [1192, 925], [1212, 925], [1213, 926], [1213, 949], [1212, 950], [1130, 950], [1129, 949]], \"type\": \"cell\", \"rows\": [6, 6], \"columns\": [9, 9], \"text_list\": []}, {\"location\": [[858, 923], [943, 923], [943, 950], [858, 950]], \"bbox\": [858, 923, 943, 950], \"points\": [[884, 925], [885, 923], [929, 923], [930, 925], [932, 923], [933, 925], [942, 925], [943, 926], [943, 949], [942, 950], [859, 950], [858, 949], [858, 926], [859, 925]], \"type\": \"cell\", \"rows\": [6, 6], \"columns\": [6, 6], \"text_list\": []}, {\"location\": [[769, 923], [853, 923], [853, 950], [769, 950]], \"bbox\": [769, 923, 853, 950], \"points\": [[790, 925], [791, 923], [792, 925], [794, 923], [824, 923], [825, 925], [826, 923], [828, 925], [829, 923], [830, 925], [831, 923], [832, 925], [833, 923], [835, 925], [836, 923], [837, 925], [852, 925], [853, 926], [853, 949], [852, 950], [770, 950], [769, 949], [769, 926], [770, 925]], \"type\": \"cell\", \"rows\": [6, 6], \"columns\": [5, 5], \"text_list\": []}, {\"location\": [[678, 923], [763, 923], [763, 950], [678, 950]], \"bbox\": [678, 923, 763, 950], \"points\": [[678, 925], [679, 923], [680, 925], [694, 925], [695, 923], [697, 925], [699, 925], [700, 923], [701, 925], [702, 923], [737, 923], [739, 925], [740, 923], [741, 925], [742, 923], [743, 925], [744, 923], [746, 925], [747, 923], [748, 925], [749, 923], [750, 925], [751, 923], [753, 925], [762, 925], [763, 926], [763, 949], [762, 950], [679, 950], [678, 949]], \"type\": \"cell\", \"rows\": [6, 6], \"columns\": [4, 4], \"text_list\": []}, {\"location\": [[388, 923], [574, 923], [574, 950], [388, 950]], \"bbox\": [388, 923, 574, 950], \"points\": [[388, 925], [389, 923], [390, 925], [391, 923], [410, 923], [411, 925], [414, 925], [415, 923], [422, 923], [423, 925], [425, 925], [426, 923], [429, 923], [430, 925], [431, 923], [438, 923], [439, 925], [440, 925], [442, 923], [446, 923], [447, 925], [449, 925], [450, 923], [454, 923], [456, 925], [457, 923], [490, 923], [491, 925], [492, 923], [493, 925], [494, 923], [495, 925], [497, 923], [498, 925], [499, 923], [500, 925], [501, 923], [502, 925], [504, 923], [505, 925], [506, 923], [507, 925], [508, 923], [509, 925], [511, 923], [512, 925], [513, 923], [514, 925], [515, 923], [516, 925], [518, 923], [519, 925], [520, 923], [521, 925], [522, 923], [523, 925], [525, 923], [526, 925], [527, 923], [528, 925], [529, 923], [530, 925], [532, 923], [533, 925], [534, 923], [535, 925], [536, 923], [537, 925], [539, 923], [546, 923], [547, 925], [548, 923], [549, 925], [550, 923], [552, 925], [553, 923], [554, 925], [555, 923], [556, 925], [573, 925], [574, 926], [574, 949], [573, 950], [389, 950], [388, 949]], \"type\": \"cell\", \"rows\": [6, 6], \"columns\": [2, 2], \"text_list\": []}, {\"location\": [[240, 923], [383, 923], [383, 950], [240, 950]], \"bbox\": [240, 923, 383, 950], \"points\": [[240, 925], [242, 923], [243, 925], [244, 925], [245, 923], [260, 923], [261, 925], [263, 923], [264, 925], [267, 925], [268, 923], [274, 923], [276, 925], [278, 925], [279, 923], [281, 923], [283, 925], [284, 923], [291, 923], [292, 925], [293, 925], [294, 923], [300, 923], [301, 925], [302, 923], [321, 923], [322, 925], [323, 923], [325, 925], [326, 923], [327, 925], [382, 925], [383, 926], [383, 949], [382, 950], [242, 950], [240, 949]], \"type\": \"cell\", \"rows\": [6, 6], \"columns\": [1, 1], \"text_list\": []}, {\"location\": [[1218, 893], [1303, 893], [1303, 920], [1218, 920]], \"bbox\": [1218, 893, 1303, 920], \"points\": [[1218, 894], [1219, 893], [1302, 893], [1303, 894], [1303, 919], [1302, 920], [1219, 920], [1218, 919]], \"type\": \"cell\", \"rows\": [5, 5], \"columns\": [10, 10], \"text_list\": []}, {\"location\": [[1129, 893], [1213, 893], [1213, 920], [1129, 920]], \"bbox\": [1129, 893, 1213, 920], \"points\": [[1129, 894], [1130, 893], [1212, 893], [1213, 894], [1213, 919], [1212, 920], [1130, 920], [1129, 919]], \"type\": \"cell\", \"rows\": [5, 5], \"columns\": [9, 9], \"text_list\": []}, {\"location\": [[858, 893], [943, 893], [943, 920], [858, 920]], \"bbox\": [858, 893, 943, 920], \"points\": [[858, 894], [859, 893], [942, 893], [943, 894], [943, 919], [942, 920], [859, 920], [858, 919]], \"type\": \"cell\", \"rows\": [5, 5], \"columns\": [6, 6], \"text_list\": []}, {\"location\": [[769, 893], [853, 893], [853, 920], [769, 920]], \"bbox\": [769, 893, 853, 920], \"points\": [[769, 894], [770, 893], [852, 893], [853, 894], [853, 919], [852, 920], [770, 920], [769, 919]], \"type\": \"cell\", \"rows\": [5, 5], \"columns\": [5, 5], \"text_list\": []}, {\"location\": [[678, 893], [763, 893], [763, 920], [678, 920]], \"bbox\": [678, 893, 763, 920], \"points\": [[678, 894], [679, 893], [762, 893], [763, 894], [763, 919], [762, 920], [679, 920], [678, 919], [679, 918], [678, 916]], \"type\": \"cell\", \"rows\": [5, 5], \"columns\": [4, 4], \"text_list\": []}, {\"location\": [[388, 893], [574, 893], [574, 920], [388, 920]], \"bbox\": [388, 893, 574, 920], \"points\": [[388, 894], [389, 893], [573, 893], [574, 894], [574, 919], [573, 920], [389, 920], [388, 919]], \"type\": \"cell\", \"rows\": [5, 5], \"columns\": [2, 2], \"text_list\": []}, {\"location\": [[1218, 860], [1303, 860], [1303, 887], [1218, 887]], \"bbox\": [1218, 860, 1303, 887], \"points\": [[1244, 861], [1245, 860], [1246, 861], [1247, 860], [1287, 860], [1288, 861], [1289, 860], [1291, 861], [1302, 861], [1303, 863], [1303, 886], [1302, 887], [1219, 887], [1218, 886], [1218, 863], [1219, 861]], \"type\": \"cell\", \"rows\": [4, 4], \"columns\": [10, 10], \"text_list\": []}, {\"location\": [[1129, 860], [1213, 860], [1213, 887], [1129, 887]], \"bbox\": [1129, 860, 1213, 887], \"points\": [[1143, 861], [1144, 860], [1146, 861], [1147, 860], [1148, 861], [1149, 860], [1150, 861], [1151, 860], [1153, 861], [1154, 860], [1155, 861], [1156, 860], [1157, 861], [1158, 860], [1191, 860], [1192, 861], [1194, 860], [1195, 861], [1212, 861], [1213, 863], [1213, 886], [1212, 887], [1130, 887], [1129, 886], [1129, 863], [1130, 861]], \"type\": \"cell\", \"rows\": [4, 4], \"columns\": [9, 9], \"text_list\": []}, {\"location\": [[858, 860], [943, 860], [943, 887], [858, 887]], \"bbox\": [858, 860, 943, 887], \"points\": [[874, 861], [875, 860], [877, 861], [878, 860], [879, 861], [884, 861], [885, 860], [886, 861], [887, 860], [929, 860], [930, 861], [932, 860], [933, 861], [942, 861], [943, 863], [943, 886], [942, 887], [859, 887], [858, 886], [858, 863], [859, 861]], \"type\": \"cell\", \"rows\": [4, 4], \"columns\": [6, 6], \"text_list\": []}, {\"location\": [[769, 860], [853, 860], [853, 887], [769, 887]], \"bbox\": [769, 860, 853, 887], \"points\": [[795, 861], [796, 860], [797, 861], [798, 860], [831, 860], [832, 861], [833, 860], [835, 861], [852, 861], [853, 863], [853, 886], [852, 887], [770, 887], [769, 886], [769, 863], [770, 861]], \"type\": \"cell\", \"rows\": [4, 4], \"columns\": [5, 5], \"text_list\": []}, {\"location\": [[678, 860], [763, 860], [763, 887], [678, 887]], \"bbox\": [678, 860, 763, 887], \"points\": [[678, 861], [679, 860], [680, 861], [704, 861], [705, 860], [706, 861], [707, 860], [747, 860], [748, 861], [749, 860], [750, 861], [751, 860], [753, 861], [762, 861], [763, 863], [763, 886], [762, 887], [679, 887], [678, 886]], \"type\": \"cell\", \"rows\": [4, 4], \"columns\": [4, 4], \"text_list\": []}, {\"location\": [[388, 860], [574, 860], [574, 887], [388, 887]], \"bbox\": [388, 860, 574, 887], \"points\": [[388, 861], [389, 860], [392, 860], [394, 861], [411, 861], [412, 860], [414, 861], [415, 860], [416, 861], [417, 860], [548, 860], [549, 861], [550, 860], [552, 861], [553, 860], [554, 861], [555, 860], [556, 861], [557, 860], [559, 861], [560, 860], [561, 861], [573, 861], [574, 863], [574, 886], [573, 887], [389, 887], [388, 886]], \"type\": \"cell\", \"rows\": [4, 4], \"columns\": [2, 2], \"text_list\": []}, {\"location\": [[1218, 830], [1303, 830], [1303, 857], [1218, 857]], \"bbox\": [1218, 830, 1303, 857], \"points\": [[1218, 831], [1219, 830], [1302, 830], [1303, 831], [1303, 856], [1302, 857], [1219, 857], [1218, 856]], \"type\": \"cell\", \"rows\": [3, 3], \"columns\": [10, 10], \"text_list\": []}, {\"location\": [[1129, 830], [1213, 830], [1213, 857], [1129, 857]], \"bbox\": [1129, 830, 1213, 857], \"points\": [[1129, 831], [1130, 830], [1212, 830], [1213, 831], [1213, 856], [1212, 857], [1130, 857], [1129, 856]], \"type\": \"cell\", \"rows\": [3, 3], \"columns\": [9, 9], \"text_list\": []}, {\"location\": [[858, 830], [943, 830], [943, 857], [858, 857]], \"bbox\": [858, 830, 943, 857], \"points\": [[858, 831], [859, 830], [942, 830], [943, 831], [943, 856], [942, 857], [859, 857], [858, 856]], \"type\": \"cell\", \"rows\": [3, 3], \"columns\": [6, 6], \"text_list\": []}, {\"location\": [[769, 830], [853, 830], [853, 857], [769, 857]], \"bbox\": [769, 830, 853, 857], \"points\": [[769, 831], [770, 830], [852, 830], [853, 831], [853, 856], [852, 857], [770, 857], [769, 856]], \"type\": \"cell\", \"rows\": [3, 3], \"columns\": [5, 5], \"text_list\": []}, {\"location\": [[678, 830], [763, 830], [763, 857], [678, 857]], \"bbox\": [678, 830, 763, 857], \"points\": [[678, 831], [679, 830], [762, 830], [763, 831], [763, 856], [762, 857], [679, 857], [678, 856]], \"type\": \"cell\", \"rows\": [3, 3], \"columns\": [4, 4], \"text_list\": []}, {\"location\": [[388, 830], [574, 830], [574, 857], [388, 857]], \"bbox\": [388, 830, 574, 857], \"points\": [[388, 831], [389, 830], [573, 830], [574, 831], [574, 856], [573, 857], [389, 857], [388, 856]], \"type\": \"cell\", \"rows\": [3, 3], \"columns\": [2, 2], \"text_list\": []}, {\"location\": [[769, 798], [853, 798], [853, 824], [769, 824]], \"bbox\": [769, 798, 853, 824], \"points\": [[769, 799], [770, 798], [852, 798], [853, 799], [853, 823], [852, 824], [770, 824], [769, 823]], \"type\": \"cell\", \"rows\": [2, 2], \"columns\": [5, 5], \"text_list\": []}, {\"location\": [[1218, 797], [1303, 797], [1303, 824], [1218, 824]], \"bbox\": [1218, 797, 1303, 824], \"points\": [[1244, 798], [1245, 797], [1246, 798], [1247, 797], [1249, 798], [1250, 797], [1275, 797], [1277, 798], [1278, 797], [1279, 798], [1302, 798], [1303, 799], [1303, 823], [1302, 824], [1219, 824], [1218, 823], [1218, 799], [1219, 798]], \"type\": \"cell\", \"rows\": [2, 2], \"columns\": [10, 10], \"text_list\": []}, {\"location\": [[1129, 797], [1213, 797], [1213, 824], [1129, 824]], \"bbox\": [1129, 797, 1213, 824], \"points\": [[1129, 798], [1130, 797], [1132, 798], [1146, 798], [1147, 797], [1148, 798], [1149, 797], [1150, 798], [1151, 797], [1153, 798], [1154, 797], [1155, 798], [1164, 798], [1165, 797], [1167, 798], [1168, 797], [1169, 798], [1170, 797], [1171, 798], [1173, 797], [1174, 798], [1212, 798], [1213, 799], [1213, 823], [1212, 824], [1130, 824], [1129, 823]], \"type\": \"cell\", \"rows\": [2, 2], \"columns\": [9, 9], \"text_list\": []}, {\"location\": [[858, 797], [943, 797], [943, 824], [858, 824]], \"bbox\": [858, 797, 943, 824], \"points\": [[888, 798], [889, 797], [891, 798], [892, 797], [893, 798], [894, 797], [895, 798], [942, 798], [943, 799], [943, 823], [942, 824], [859, 824], [858, 823], [858, 799], [859, 798]], \"type\": \"cell\", \"rows\": [2, 2], \"columns\": [6, 6], \"text_list\": []}, {\"location\": [[678, 797], [763, 797], [763, 824], [678, 824]], \"bbox\": [678, 797, 763, 824], \"points\": [[678, 798], [679, 797], [680, 798], [704, 798], [705, 797], [706, 798], [707, 797], [708, 798], [709, 797], [711, 798], [712, 797], [733, 797], [734, 798], [735, 797], [736, 798], [762, 798], [763, 799], [763, 823], [762, 824], [679, 824], [678, 823]], \"type\": \"cell\", \"rows\": [2, 2], \"columns\": [4, 4], \"text_list\": []}, {\"location\": [[388, 797], [574, 797], [574, 824], [388, 824]], \"bbox\": [388, 797, 574, 824], \"points\": [[388, 798], [389, 797], [394, 797], [395, 798], [418, 798], [419, 797], [421, 798], [424, 798], [425, 797], [429, 797], [430, 798], [431, 797], [432, 798], [433, 797], [435, 798], [484, 798], [485, 797], [486, 798], [495, 798], [497, 797], [498, 798], [573, 798], [574, 799], [574, 823], [573, 824], [389, 824], [388, 823]], \"type\": \"cell\", \"rows\": [2, 2], \"columns\": [2, 2], \"text_list\": []}, {\"location\": [[240, 768], [383, 768], [383, 920], [240, 920]], \"bbox\": [240, 768, 383, 920], \"points\": [[240, 769], [242, 768], [382, 768], [383, 769], [383, 919], [382, 920], [242, 920], [240, 919]], \"type\": \"cell\", \"rows\": [1, 5], \"columns\": [1, 1], \"text_list\": []}, {\"location\": [[1218, 767], [1303, 767], [1303, 792], [1218, 792]], \"bbox\": [1218, 767, 1303, 792], \"points\": [[1218, 768], [1219, 767], [1220, 768], [1223, 768], [1224, 767], [1302, 767], [1303, 768], [1303, 791], [1302, 792], [1219, 792], [1218, 791]], \"type\": \"cell\", \"rows\": [1, 1], \"columns\": [10, 10], \"text_list\": []}, {\"location\": [[1128, 767], [1213, 767], [1213, 792], [1128, 792]], \"bbox\": [1128, 767, 1213, 792], \"points\": [[1130, 767], [1132, 768], [1133, 767], [1205, 767], [1206, 768], [1208, 767], [1209, 768], [1212, 768], [1213, 769], [1213, 791], [1212, 792], [1130, 792], [1129, 791], [1129, 773], [1128, 771], [1128, 769]], \"type\": \"cell\", \"rows\": [1, 1], \"columns\": [9, 9], \"text_list\": []}, {\"location\": [[1038, 767], [1123, 767], [1123, 1107], [1038, 1107]], \"bbox\": [1038, 767, 1123, 1107], \"points\": [[1038, 768], [1039, 767], [1040, 768], [1045, 768], [1046, 767], [1047, 768], [1049, 767], [1050, 768], [1051, 767], [1052, 768], [1053, 767], [1054, 768], [1068, 768], [1070, 767], [1071, 768], [1072, 767], [1073, 768], [1074, 767], [1075, 768], [1077, 767], [1078, 768], [1079, 767], [1080, 768], [1081, 767], [1082, 768], [1122, 768], [1123, 769], [1123, 1106], [1122, 1107], [1039, 1107], [1038, 1106]], \"type\": \"cell\", \"rows\": [1, 11], \"columns\": [8, 8], \"text_list\": []}, {\"location\": [[948, 767], [1033, 767], [1033, 1107], [948, 1107]], \"bbox\": [948, 767, 1033, 1107], \"points\": [[951, 768], [953, 767], [954, 768], [955, 767], [956, 768], [957, 767], [958, 768], [960, 767], [961, 768], [962, 767], [963, 768], [975, 768], [976, 767], [977, 768], [978, 767], [980, 768], [981, 767], [982, 768], [983, 767], [984, 768], [985, 767], [987, 768], [988, 767], [989, 768], [1032, 768], [1033, 769], [1033, 1106], [1032, 1107], [949, 1107], [948, 1106], [948, 1082], [949, 1081], [949, 1075], [948, 1074], [948, 1051], [949, 1050], [949, 1044], [948, 1043], [948, 1019], [949, 1018], [949, 1012], [948, 1011], [948, 988], [949, 987], [949, 982], [948, 981], [948, 956], [949, 955], [949, 950], [948, 949], [948, 926], [949, 925], [949, 919], [948, 918], [948, 894], [949, 893], [949, 887], [948, 886], [948, 863], [949, 861], [949, 856], [948, 854], [948, 831], [949, 830], [949, 824], [948, 823], [948, 799], [949, 798], [949, 792], [948, 791], [948, 769], [949, 768]], \"type\": \"cell\", \"rows\": [1, 11], \"columns\": [7, 7], \"text_list\": []}, {\"location\": [[858, 767], [943, 767], [943, 792], [858, 792]], \"bbox\": [858, 767, 943, 792], \"points\": [[858, 768], [859, 767], [860, 768], [861, 768], [863, 767], [936, 767], [937, 768], [939, 767], [940, 768], [941, 767], [943, 769], [943, 791], [942, 792], [859, 792], [858, 791]], \"type\": \"cell\", \"rows\": [1, 1], \"columns\": [6, 6], \"text_list\": []}, {\"location\": [[769, 767], [853, 767], [853, 792], [769, 792]], \"bbox\": [769, 767, 853, 792], \"points\": [[769, 768], [770, 767], [771, 768], [773, 767], [850, 767], [851, 768], [852, 768], [853, 769], [853, 791], [852, 792], [770, 792], [769, 791]], \"type\": \"cell\", \"rows\": [1, 1], \"columns\": [5, 5], \"text_list\": []}, {\"location\": [[678, 767], [763, 767], [763, 792], [678, 792]], \"bbox\": [678, 767, 763, 792], \"points\": [[678, 768], [679, 767], [680, 768], [682, 768], [684, 767], [761, 767], [763, 769], [763, 791], [762, 792], [679, 792], [678, 791]], \"type\": \"cell\", \"rows\": [1, 1], \"columns\": [4, 4], \"text_list\": []}, {\"location\": [[577, 767], [674, 767], [674, 1013], [577, 1013]], \"bbox\": [577, 767, 674, 1013], \"points\": [[624, 768], [625, 767], [626, 768], [628, 767], [630, 767], [631, 768], [632, 767], [633, 768], [635, 767], [636, 768], [673, 768], [674, 769], [673, 770], [673, 1012], [672, 1013], [578, 1013], [577, 1012], [578, 1011], [578, 988], [580, 987], [580, 983], [578, 982], [578, 955], [580, 954], [580, 950], [578, 949], [578, 925], [580, 923], [580, 920], [578, 919], [578, 893], [580, 892], [580, 888], [578, 887], [578, 861], [580, 860], [580, 857], [578, 856], [578, 830], [580, 829], [580, 824], [578, 823], [578, 798], [580, 797], [580, 792], [578, 791], [578, 769], [580, 768]], \"type\": \"cell\", \"rows\": [1, 8], \"columns\": [3, 3], \"text_list\": []}, {\"location\": [[388, 767], [574, 767], [574, 792], [388, 792]], \"bbox\": [388, 767, 574, 792], \"points\": [[388, 768], [389, 767], [390, 768], [391, 767], [562, 767], [563, 768], [564, 767], [566, 768], [573, 768], [574, 769], [574, 791], [573, 792], [389, 792], [388, 791]], \"type\": \"cell\", \"rows\": [1, 1], \"columns\": [2, 2], \"text_list\": []}, {\"location\": [[171, 767], [237, 767], [237, 1013], [171, 1013]], \"bbox\": [171, 767, 237, 1013], \"points\": [[171, 768], [173, 767], [174, 768], [236, 768], [237, 769], [237, 770], [236, 771], [236, 833], [237, 835], [236, 836], [237, 837], [236, 838], [237, 839], [236, 840], [237, 842], [236, 843], [237, 844], [236, 845], [236, 929], [237, 930], [237, 937], [236, 939], [236, 962], [237, 963], [237, 966], [236, 967], [237, 968], [236, 969], [237, 970], [236, 971], [236, 1009], [237, 1010], [237, 1012], [236, 1013], [173, 1013], [171, 1012]], \"type\": \"cell\", \"rows\": [1, 8], \"columns\": [0, 0], \"text_list\": []}, {\"location\": [[1218, 734], [1303, 734], [1303, 761], [1218, 761]], \"bbox\": [1218, 734, 1303, 761], \"points\": [[1218, 735], [1219, 734], [1220, 735], [1225, 735], [1226, 734], [1257, 734], [1258, 735], [1259, 734], [1302, 734], [1303, 735], [1303, 760], [1302, 761], [1219, 761], [1218, 760]], \"type\": \"cell\", \"rows\": [0, 0], \"columns\": [10, 10], \"text_list\": []}, {\"location\": [[1128, 734], [1213, 734], [1213, 761], [1128, 761]], \"bbox\": [1128, 734, 1213, 761], \"points\": [[1130, 734], [1132, 735], [1134, 735], [1135, 734], [1165, 734], [1167, 735], [1168, 735], [1169, 734], [1175, 734], [1176, 735], [1177, 735], [1178, 734], [1189, 734], [1190, 735], [1198, 735], [1199, 734], [1203, 734], [1204, 735], [1211, 735], [1212, 734], [1213, 735], [1213, 760], [1212, 761], [1129, 761], [1128, 760], [1128, 736]], \"type\": \"cell\", \"rows\": [0, 0], \"columns\": [9, 9], \"text_list\": []}, {\"location\": [[1038, 734], [1123, 734], [1123, 761], [1038, 761]], \"bbox\": [1038, 734, 1123, 761], \"points\": [[1038, 735], [1039, 734], [1040, 735], [1045, 735], [1046, 734], [1047, 735], [1050, 735], [1051, 734], [1056, 734], [1057, 735], [1058, 734], [1063, 734], [1064, 735], [1065, 734], [1075, 734], [1077, 735], [1078, 735], [1079, 734], [1084, 734], [1085, 735], [1088, 735], [1089, 734], [1098, 734], [1099, 735], [1104, 735], [1105, 734], [1106, 735], [1111, 735], [1112, 734], [1114, 734], [1115, 735], [1122, 735], [1123, 736], [1123, 760], [1122, 761], [1039, 761], [1038, 760]], \"type\": \"cell\", \"rows\": [0, 0], \"columns\": [8, 8], \"text_list\": []}, {\"location\": [[948, 734], [1033, 734], [1033, 761], [948, 761]], \"bbox\": [948, 734, 1033, 761], \"points\": [[954, 735], [955, 734], [957, 734], [958, 735], [960, 734], [964, 734], [966, 735], [968, 735], [969, 734], [971, 734], [973, 735], [974, 734], [975, 735], [976, 734], [978, 734], [980, 735], [981, 734], [985, 734], [987, 735], [989, 735], [990, 734], [995, 734], [996, 735], [997, 735], [998, 734], [1016, 734], [1017, 735], [1018, 734], [1019, 735], [1020, 734], [1023, 734], [1024, 735], [1025, 734], [1026, 735], [1027, 734], [1029, 735], [1030, 734], [1031, 735], [1032, 734], [1033, 735], [1033, 760], [1032, 761], [949, 761], [948, 760], [948, 736], [949, 735]], \"type\": \"cell\", \"rows\": [0, 0], \"columns\": [7, 7], \"text_list\": []}, {\"location\": [[858, 734], [943, 734], [943, 761], [858, 761]], \"bbox\": [858, 734, 943, 761], \"points\": [[858, 735], [859, 734], [860, 735], [863, 735], [864, 734], [865, 735], [866, 734], [878, 734], [879, 735], [886, 735], [887, 734], [889, 734], [891, 735], [895, 735], [897, 734], [898, 735], [900, 735], [901, 734], [902, 735], [905, 735], [906, 734], [934, 734], [935, 735], [940, 735], [941, 734], [943, 736], [943, 760], [942, 761], [859, 761], [858, 760]], \"type\": \"cell\", \"rows\": [0, 0], \"columns\": [6, 6], \"text_list\": []}, {\"location\": [[769, 734], [853, 734], [853, 761], [769, 761]], \"bbox\": [769, 734, 853, 761], \"points\": [[769, 735], [770, 734], [771, 735], [773, 734], [774, 735], [775, 734], [776, 735], [777, 734], [787, 734], [788, 735], [789, 734], [790, 735], [791, 734], [792, 735], [796, 735], [797, 734], [799, 734], [801, 735], [804, 735], [805, 734], [806, 735], [811, 735], [812, 734], [813, 735], [815, 734], [845, 734], [846, 735], [847, 734], [849, 735], [851, 735], [852, 734], [853, 735], [853, 760], [852, 761], [770, 761], [769, 760]], \"type\": \"cell\", \"rows\": [0, 0], \"columns\": [5, 5], \"text_list\": []}, {\"location\": [[678, 734], [763, 734], [763, 761], [678, 761]], \"bbox\": [678, 734, 763, 761], \"points\": [[678, 735], [679, 734], [680, 735], [685, 735], [686, 734], [698, 734], [699, 735], [706, 735], [707, 734], [709, 734], [711, 735], [715, 735], [716, 734], [718, 735], [720, 735], [721, 734], [759, 734], [760, 735], [761, 734], [762, 734], [763, 735], [763, 760], [762, 761], [679, 761], [678, 760]], \"type\": \"cell\", \"rows\": [0, 0], \"columns\": [4, 4], \"text_list\": []}, {\"location\": [[577, 734], [674, 734], [674, 761], [577, 761]], \"bbox\": [577, 734, 674, 761], \"points\": [[605, 735], [606, 734], [621, 734], [622, 735], [623, 734], [635, 734], [636, 735], [638, 735], [639, 734], [640, 735], [642, 734], [643, 735], [673, 735], [674, 736], [674, 760], [673, 761], [578, 761], [577, 760], [577, 736], [578, 735]], \"type\": \"cell\", \"rows\": [0, 0], \"columns\": [3, 3], \"text_list\": []}, {\"location\": [[171, 734], [574, 734], [574, 761], [171, 761]], \"bbox\": [171, 734, 574, 761], \"points\": [[196, 735], [197, 734], [198, 735], [199, 734], [214, 734], [215, 735], [216, 734], [217, 735], [218, 734], [219, 735], [573, 735], [574, 736], [574, 760], [573, 761], [173, 761], [171, 760], [171, 736], [173, 735]], \"type\": \"cell\", \"rows\": [0, 0], \"columns\": [0, 2], \"text_list\": []}, {\"location\": [[166, 729], [1309, 729], [1309, 1113], [166, 1113]], \"bbox\": [166, 729, 1309, 1113], \"points\": [[167, 729], [167, 788], [166, 789], [167, 790], [167, 792], [166, 794], [167, 795], [167, 1082], [166, 1084], [167, 1085], [167, 1113], [1308, 1113], [1309, 1112], [1309, 1107], [1308, 1106], [1308, 1101], [1309, 1100], [1308, 1099], [1309, 1098], [1308, 1096], [1309, 1095], [1309, 1075], [1308, 1074], [1308, 1073], [1309, 1072], [1308, 1071], [1309, 1070], [1309, 1056], [1308, 1054], [1309, 1053], [1308, 1052], [1309, 1051], [1309, 1044], [1308, 1043], [1309, 1042], [1308, 1040], [1309, 1039], [1308, 1038], [1309, 1037], [1309, 1020], [1308, 1019], [1309, 1018], [1309, 1011], [1308, 1010], [1309, 1009], [1309, 950], [1308, 949], [1309, 948], [1308, 947], [1309, 946], [1309, 929], [1308, 928], [1309, 927], [1308, 926], [1309, 925], [1309, 919], [1308, 918], [1308, 916], [1309, 915], [1309, 897], [1308, 895], [1309, 894], [1309, 887], [1308, 886], [1309, 885], [1308, 884], [1309, 882], [1308, 881], [1309, 880], [1309, 864], [1308, 863], [1309, 861], [1309, 856], [1308, 854], [1308, 853], [1309, 852], [1309, 836], [1308, 835], [1309, 833], [1308, 832], [1309, 831], [1309, 824], [1308, 823], [1309, 822], [1308, 820], [1309, 819], [1309, 792], [1308, 791], [1308, 790], [1309, 789], [1309, 773], [1308, 771], [1308, 769], [1309, 768], [1309, 761], [1308, 760], [1308, 736], [1309, 735], [1309, 730], [171, 730], [170, 729]], \"type\": \"table\", \"contains\": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84]}, {\"location\": [[1213, 549], [1298, 549], [1298, 576], [1213, 576]], \"bbox\": [1213, 549, 1298, 576], \"points\": [[1213, 550], [1215, 549], [1296, 549], [1298, 550], [1298, 575], [1296, 576], [1215, 576], [1213, 575]], \"type\": \"cell\", \"rows\": [10, 10], \"columns\": [10, 10], \"text_list\": []}, {\"location\": [[1123, 549], [1209, 549], [1209, 576], [1123, 576]], \"bbox\": [1123, 549, 1209, 576], \"points\": [[1123, 550], [1125, 549], [1208, 549], [1209, 550], [1209, 575], [1208, 576], [1125, 576], [1123, 575]], \"type\": \"cell\", \"rows\": [10, 10], \"columns\": [9, 9], \"text_list\": []}, {\"location\": [[1032, 549], [1118, 549], [1118, 576], [1032, 576]], \"bbox\": [1032, 549, 1118, 576], \"points\": [[1035, 549], [1116, 549], [1118, 550], [1118, 575], [1116, 576], [1035, 576], [1032, 574], [1032, 568], [1033, 567], [1032, 566], [1033, 564], [1032, 563], [1033, 562], [1032, 561], [1033, 560], [1032, 559], [1033, 557], [1033, 553], [1032, 552]], \"type\": \"cell\", \"rows\": [10, 10], \"columns\": [8, 8], \"text_list\": []}, {\"location\": [[942, 549], [1027, 549], [1027, 576], [942, 576]], \"bbox\": [942, 549, 1027, 576], \"points\": [[942, 550], [943, 549], [1026, 549], [1027, 550], [1027, 575], [1026, 576], [943, 576], [942, 575]], \"type\": \"cell\", \"rows\": [10, 10], \"columns\": [7, 7], \"text_list\": []}, {\"location\": [[852, 549], [937, 549], [937, 576], [852, 576]], \"bbox\": [852, 549, 937, 576], \"points\": [[853, 550], [854, 549], [936, 549], [937, 550], [937, 575], [936, 576], [854, 576], [852, 574], [853, 573]], \"type\": \"cell\", \"rows\": [10, 10], \"columns\": [6, 6], \"text_list\": []}, {\"location\": [[762, 549], [847, 549], [847, 576], [762, 576]], \"bbox\": [762, 549, 847, 576], \"points\": [[763, 550], [764, 549], [846, 549], [847, 550], [847, 575], [846, 576], [763, 576], [762, 575], [763, 574]], \"type\": \"cell\", \"rows\": [10, 10], \"columns\": [5, 5], \"text_list\": []}, {\"location\": [[673, 549], [759, 549], [759, 576], [673, 576]], \"bbox\": [673, 549, 759, 576], \"points\": [[673, 550], [674, 549], [756, 549], [759, 552], [757, 553], [759, 554], [759, 574], [756, 576], [674, 576], [673, 575]], \"type\": \"cell\", \"rows\": [10, 10], \"columns\": [4, 4], \"text_list\": []}, {\"location\": [[233, 549], [569, 549], [569, 576], [233, 576]], \"bbox\": [233, 549, 569, 576], \"points\": [[235, 550], [236, 549], [568, 549], [569, 550], [569, 575], [568, 576], [235, 576], [233, 575], [235, 574]], \"type\": \"cell\", \"rows\": [10, 10], \"columns\": [1, 2], \"text_list\": []}, {\"location\": [[1213, 519], [1298, 519], [1298, 544], [1213, 544]], \"bbox\": [1213, 519, 1298, 544], \"points\": [[1213, 520], [1215, 519], [1296, 519], [1298, 520], [1298, 543], [1296, 544], [1215, 544], [1213, 543]], \"type\": \"cell\", \"rows\": [9, 9], \"columns\": [10, 10], \"text_list\": []}, {\"location\": [[1123, 519], [1209, 519], [1209, 544], [1123, 544]], \"bbox\": [1123, 519, 1209, 544], \"points\": [[1123, 520], [1125, 519], [1208, 519], [1209, 520], [1209, 543], [1208, 544], [1125, 544], [1123, 543]], \"type\": \"cell\", \"rows\": [9, 9], \"columns\": [9, 9], \"text_list\": []}, {\"location\": [[1032, 519], [1118, 519], [1118, 544], [1032, 544]], \"bbox\": [1032, 519, 1118, 544], \"points\": [[1035, 519], [1116, 519], [1118, 520], [1118, 543], [1116, 544], [1035, 544], [1032, 542], [1033, 541], [1033, 527], [1032, 526], [1033, 525], [1032, 523], [1033, 522], [1032, 521]], \"type\": \"cell\", \"rows\": [9, 9], \"columns\": [8, 8], \"text_list\": []}, {\"location\": [[942, 519], [1027, 519], [1027, 544], [942, 544]], \"bbox\": [942, 519, 1027, 544], \"points\": [[942, 520], [943, 519], [1026, 519], [1027, 520], [1027, 543], [1026, 544], [944, 544], [942, 542], [943, 541], [942, 540], [943, 539], [942, 537], [943, 536], [942, 535], [943, 534], [942, 533]], \"type\": \"cell\", \"rows\": [9, 9], \"columns\": [7, 7], \"text_list\": []}, {\"location\": [[853, 519], [937, 519], [937, 544], [853, 544]], \"bbox\": [853, 519, 937, 544], \"points\": [[853, 520], [854, 519], [936, 519], [937, 520], [937, 543], [936, 544], [854, 544], [853, 543]], \"type\": \"cell\", \"rows\": [9, 9], \"columns\": [6, 6], \"text_list\": []}, {\"location\": [[763, 519], [847, 519], [847, 544], [763, 544]], \"bbox\": [763, 519, 847, 544], \"points\": [[763, 520], [764, 519], [846, 519], [847, 520], [847, 543], [846, 544], [764, 544], [763, 543]], \"type\": \"cell\", \"rows\": [9, 9], \"columns\": [5, 5], \"text_list\": []}, {\"location\": [[673, 519], [759, 519], [759, 544], [673, 544]], \"bbox\": [673, 519, 759, 544], \"points\": [[673, 520], [674, 519], [756, 519], [759, 521], [757, 522], [759, 523], [759, 540], [757, 541], [757, 543], [756, 544], [674, 544], [673, 543]], \"type\": \"cell\", \"rows\": [9, 9], \"columns\": [4, 4], \"text_list\": []}, {\"location\": [[383, 519], [569, 519], [569, 544], [383, 544]], \"bbox\": [383, 519, 569, 544], \"points\": [[383, 520], [384, 519], [568, 519], [569, 520], [569, 543], [568, 544], [384, 544], [383, 543]], \"type\": \"cell\", \"rows\": [9, 9], \"columns\": [2, 2], \"text_list\": []}, {\"location\": [[233, 519], [377, 519], [377, 544], [233, 544]], \"bbox\": [233, 519, 377, 544], \"points\": [[233, 520], [235, 519], [376, 519], [377, 520], [377, 543], [376, 544], [236, 544], [235, 543], [235, 540], [233, 539], [233, 537], [235, 536], [233, 535], [233, 523], [235, 522], [233, 521]], \"type\": \"cell\", \"rows\": [9, 9], \"columns\": [1, 1], \"text_list\": []}, {\"location\": [[1213, 488], [1298, 488], [1298, 515], [1213, 515]], \"bbox\": [1213, 488, 1298, 515], \"points\": [[1213, 490], [1215, 488], [1216, 490], [1218, 490], [1219, 488], [1220, 490], [1222, 488], [1296, 488], [1298, 490], [1298, 514], [1296, 515], [1215, 515], [1213, 514]], \"type\": \"cell\", \"rows\": [8, 8], \"columns\": [10, 10], \"text_list\": []}, {\"location\": [[1123, 488], [1209, 488], [1209, 515], [1123, 515]], \"bbox\": [1123, 488, 1209, 515], \"points\": [[1123, 490], [1125, 488], [1126, 490], [1132, 490], [1133, 488], [1194, 488], [1195, 490], [1196, 488], [1197, 490], [1198, 488], [1199, 490], [1202, 490], [1203, 488], [1204, 490], [1205, 488], [1206, 490], [1208, 490], [1209, 491], [1209, 514], [1208, 515], [1125, 515], [1123, 514]], \"type\": \"cell\", \"rows\": [8, 8], \"columns\": [9, 9], \"text_list\": []}, {\"location\": [[1032, 488], [1118, 488], [1118, 515], [1032, 515]], \"bbox\": [1032, 488, 1118, 515], \"points\": [[1035, 488], [1036, 490], [1037, 488], [1038, 490], [1039, 488], [1040, 490], [1042, 488], [1043, 490], [1044, 488], [1107, 488], [1108, 490], [1109, 488], [1111, 490], [1112, 488], [1116, 488], [1118, 490], [1118, 514], [1116, 515], [1035, 515], [1033, 514], [1033, 513], [1032, 512], [1033, 511], [1032, 509], [1033, 508], [1033, 497], [1032, 495], [1033, 494], [1032, 493], [1032, 491]], \"type\": \"cell\", \"rows\": [8, 8], \"columns\": [8, 8], \"text_list\": []}, {\"location\": [[942, 488], [1027, 488], [1027, 515], [942, 515]], \"bbox\": [942, 488, 1027, 515], \"points\": [[942, 490], [943, 488], [944, 488], [946, 490], [949, 490], [950, 488], [951, 490], [953, 488], [1016, 488], [1017, 490], [1018, 488], [1019, 490], [1020, 488], [1022, 490], [1026, 490], [1027, 491], [1027, 514], [1026, 515], [943, 515], [942, 514], [943, 513], [942, 512]], \"type\": \"cell\", \"rows\": [8, 8], \"columns\": [7, 7], \"text_list\": []}, {\"location\": [[853, 488], [937, 488], [937, 515], [853, 515]], \"bbox\": [853, 488, 937, 515], \"points\": [[853, 490], [854, 488], [856, 490], [860, 490], [861, 488], [863, 490], [864, 488], [929, 488], [930, 490], [932, 488], [933, 490], [934, 488], [935, 490], [936, 490], [937, 491], [937, 514], [936, 515], [854, 515], [853, 514]], \"type\": \"cell\", \"rows\": [8, 8], \"columns\": [6, 6], \"text_list\": []}, {\"location\": [[762, 488], [847, 488], [847, 515], [762, 515]], \"bbox\": [762, 488, 847, 515], \"points\": [[763, 490], [764, 488], [766, 490], [771, 490], [773, 488], [774, 490], [775, 488], [838, 488], [839, 490], [840, 488], [842, 490], [843, 488], [844, 490], [845, 488], [847, 491], [847, 514], [846, 515], [763, 515], [762, 514], [763, 513]], \"type\": \"cell\", \"rows\": [8, 8], \"columns\": [5, 5], \"text_list\": []}, {\"location\": [[673, 488], [759, 488], [759, 515], [673, 515]], \"bbox\": [673, 488, 759, 515], \"points\": [[673, 490], [674, 488], [679, 488], [680, 490], [681, 488], [682, 490], [684, 488], [751, 488], [753, 490], [754, 488], [755, 490], [757, 490], [759, 491], [759, 511], [757, 512], [757, 514], [756, 515], [674, 515], [673, 514]], \"type\": \"cell\", \"rows\": [8, 8], \"columns\": [4, 4], \"text_list\": []}, {\"location\": [[383, 488], [569, 488], [569, 515], [383, 515]], \"bbox\": [383, 488, 569, 515], \"points\": [[383, 490], [384, 488], [475, 488], [477, 490], [478, 488], [479, 490], [480, 488], [481, 490], [483, 488], [484, 490], [485, 488], [486, 490], [487, 488], [488, 490], [490, 488], [491, 490], [492, 488], [501, 488], [502, 490], [504, 488], [505, 490], [506, 488], [507, 490], [508, 488], [509, 490], [511, 488], [512, 490], [513, 488], [550, 488], [552, 490], [553, 488], [554, 490], [555, 488], [556, 490], [557, 488], [567, 488], [569, 491], [569, 514], [568, 515], [384, 515], [383, 514]], \"type\": \"cell\", \"rows\": [8, 8], \"columns\": [2, 2], \"text_list\": []}, {\"location\": [[233, 488], [377, 488], [377, 515], [233, 515]], \"bbox\": [233, 488, 377, 515], \"points\": [[233, 490], [235, 488], [356, 488], [357, 490], [359, 488], [360, 490], [361, 488], [362, 490], [363, 488], [364, 490], [366, 488], [367, 490], [368, 488], [369, 490], [370, 488], [371, 490], [373, 488], [374, 490], [375, 488], [376, 488], [377, 490], [377, 514], [376, 515], [236, 515], [235, 514], [235, 511], [233, 509]], \"type\": \"cell\", \"rows\": [8, 8], \"columns\": [1, 1], \"text_list\": []}, {\"location\": [[1212, 457], [1298, 457], [1298, 484], [1212, 484]], \"bbox\": [1212, 457, 1298, 484], \"points\": [[1212, 458], [1213, 457], [1296, 457], [1298, 458], [1298, 483], [1296, 484], [1215, 484], [1213, 483], [1213, 459]], \"type\": \"cell\", \"rows\": [7, 7], \"columns\": [10, 10], \"text_list\": []}, {\"location\": [[1123, 457], [1209, 457], [1209, 484], [1123, 484]], \"bbox\": [1123, 457, 1209, 484], \"points\": [[1123, 458], [1125, 457], [1208, 457], [1209, 458], [1209, 483], [1208, 484], [1125, 484], [1123, 483]], \"type\": \"cell\", \"rows\": [7, 7], \"columns\": [9, 9], \"text_list\": []}, {\"location\": [[1032, 457], [1118, 457], [1118, 484], [1032, 484]], \"bbox\": [1032, 457, 1118, 484], \"points\": [[1032, 458], [1033, 457], [1116, 457], [1118, 458], [1118, 483], [1116, 484], [1033, 484], [1032, 483], [1032, 479], [1033, 478], [1032, 477], [1033, 475], [1032, 474], [1033, 473], [1032, 472], [1033, 471], [1032, 470], [1033, 468], [1032, 467]], \"type\": \"cell\", \"rows\": [7, 7], \"columns\": [8, 8], \"text_list\": []}, {\"location\": [[942, 457], [1027, 457], [1027, 484], [942, 484]], \"bbox\": [942, 457, 1027, 484], \"points\": [[942, 458], [943, 457], [1026, 457], [1027, 458], [1027, 483], [1026, 484], [943, 484], [942, 483]], \"type\": \"cell\", \"rows\": [7, 7], \"columns\": [7, 7], \"text_list\": []}, {\"location\": [[852, 457], [937, 457], [937, 484], [852, 484]], \"bbox\": [852, 457, 937, 484], \"points\": [[852, 458], [853, 457], [936, 457], [937, 458], [937, 483], [936, 484], [854, 484], [853, 483], [853, 461], [852, 460]], \"type\": \"cell\", \"rows\": [7, 7], \"columns\": [6, 6], \"text_list\": []}, {\"location\": [[762, 457], [847, 457], [847, 484], [762, 484]], \"bbox\": [762, 457, 847, 484], \"points\": [[762, 458], [763, 457], [846, 457], [847, 458], [847, 483], [846, 484], [764, 484], [763, 483], [763, 459]], \"type\": \"cell\", \"rows\": [7, 7], \"columns\": [5, 5], \"text_list\": []}, {\"location\": [[673, 457], [759, 457], [759, 484], [673, 484]], \"bbox\": [673, 457, 759, 484], \"points\": [[673, 458], [674, 457], [757, 457], [759, 458], [759, 483], [757, 484], [674, 484], [673, 483]], \"type\": \"cell\", \"rows\": [7, 7], \"columns\": [4, 4], \"text_list\": []}, {\"location\": [[383, 457], [569, 457], [569, 484], [383, 484]], \"bbox\": [383, 457, 569, 484], \"points\": [[383, 458], [384, 457], [568, 457], [569, 458], [569, 483], [568, 484], [384, 484], [383, 483]], \"type\": \"cell\", \"rows\": [7, 7], \"columns\": [2, 2], \"text_list\": []}, {\"location\": [[233, 457], [377, 457], [377, 484], [233, 484]], \"bbox\": [233, 457, 377, 484], \"points\": [[233, 458], [235, 457], [376, 457], [377, 458], [377, 483], [376, 484], [236, 484], [235, 483], [235, 475], [233, 474], [233, 460], [235, 459]], \"type\": \"cell\", \"rows\": [7, 7], \"columns\": [1, 1], \"text_list\": []}, {\"location\": [[1213, 426], [1298, 426], [1298, 452], [1213, 452]], \"bbox\": [1213, 426, 1298, 452], \"points\": [[1213, 428], [1215, 426], [1296, 426], [1298, 428], [1298, 451], [1296, 452], [1215, 452], [1213, 451]], \"type\": \"cell\", \"rows\": [6, 6], \"columns\": [10, 10], \"text_list\": []}, {\"location\": [[1123, 426], [1209, 426], [1209, 452], [1123, 452]], \"bbox\": [1123, 426, 1209, 452], \"points\": [[1123, 428], [1125, 426], [1206, 426], [1209, 429], [1209, 451], [1208, 452], [1125, 452], [1123, 451]], \"type\": \"cell\", \"rows\": [6, 6], \"columns\": [9, 9], \"text_list\": []}, {\"location\": [[1032, 426], [1118, 426], [1118, 452], [1032, 452]], \"bbox\": [1032, 426, 1118, 452], \"points\": [[1032, 428], [1033, 426], [1116, 426], [1118, 428], [1118, 451], [1116, 452], [1035, 452], [1032, 450]], \"type\": \"cell\", \"rows\": [6, 6], \"columns\": [8, 8], \"text_list\": []}, {\"location\": [[942, 426], [1027, 426], [1027, 452], [942, 452]], \"bbox\": [942, 426, 1027, 452], \"points\": [[942, 428], [943, 426], [1026, 426], [1027, 428], [1027, 451], [1026, 452], [943, 452], [942, 451]], \"type\": \"cell\", \"rows\": [6, 6], \"columns\": [7, 7], \"text_list\": []}, {\"location\": [[852, 426], [937, 426], [937, 452], [852, 452]], \"bbox\": [852, 426, 937, 452], \"points\": [[852, 428], [853, 426], [936, 426], [937, 428], [937, 451], [936, 452], [854, 452], [853, 451], [853, 431], [852, 430], [853, 429]], \"type\": \"cell\", \"rows\": [6, 6], \"columns\": [6, 6], \"text_list\": []}, {\"location\": [[762, 426], [847, 426], [847, 452], [762, 452]], \"bbox\": [762, 426, 847, 452], \"points\": [[762, 428], [763, 426], [846, 426], [847, 428], [847, 451], [846, 452], [764, 452], [763, 451], [763, 429]], \"type\": \"cell\", \"rows\": [6, 6], \"columns\": [5, 5], \"text_list\": []}, {\"location\": [[673, 426], [759, 426], [759, 452], [673, 452]], \"bbox\": [673, 426, 759, 452], \"points\": [[673, 428], [674, 426], [756, 426], [759, 429], [759, 451], [757, 452], [674, 452], [673, 451]], \"type\": \"cell\", \"rows\": [6, 6], \"columns\": [4, 4], \"text_list\": []}, {\"location\": [[383, 426], [569, 426], [569, 452], [383, 452]], \"bbox\": [383, 426, 569, 452], \"points\": [[383, 428], [384, 426], [568, 426], [569, 428], [569, 451], [568, 452], [384, 452], [383, 451]], \"type\": \"cell\", \"rows\": [6, 6], \"columns\": [2, 2], \"text_list\": []}, {\"location\": [[233, 426], [377, 426], [377, 452], [233, 452]], \"bbox\": [233, 426, 377, 452], \"points\": [[233, 428], [235, 426], [376, 426], [377, 428], [377, 451], [376, 452], [236, 452], [233, 450]], \"type\": \"cell\", \"rows\": [6, 6], \"columns\": [1, 1], \"text_list\": []}, {\"location\": [[1213, 395], [1298, 395], [1298, 422], [1213, 422]], \"bbox\": [1213, 395, 1298, 422], \"points\": [[1244, 396], [1245, 395], [1273, 395], [1274, 396], [1275, 395], [1277, 396], [1296, 396], [1298, 397], [1298, 421], [1296, 422], [1215, 422], [1213, 421], [1213, 397], [1215, 396]], \"type\": \"cell\", \"rows\": [5, 5], \"columns\": [10, 10], \"text_list\": []}, {\"location\": [[1123, 395], [1209, 395], [1209, 422], [1123, 422]], \"bbox\": [1123, 395, 1209, 422], \"points\": [[1153, 396], [1154, 395], [1180, 395], [1181, 396], [1208, 396], [1209, 397], [1209, 421], [1208, 422], [1125, 422], [1123, 421], [1123, 397], [1125, 396]], \"type\": \"cell\", \"rows\": [5, 5], \"columns\": [9, 9], \"text_list\": []}, {\"location\": [[1032, 395], [1118, 395], [1118, 422], [1032, 422]], \"bbox\": [1032, 395, 1118, 422], \"points\": [[1064, 396], [1065, 395], [1088, 395], [1089, 396], [1091, 395], [1092, 396], [1116, 396], [1118, 397], [1118, 421], [1116, 422], [1035, 422], [1032, 419], [1032, 414], [1033, 412], [1032, 411], [1033, 410], [1032, 409], [1033, 408], [1032, 406], [1032, 397], [1033, 396]], \"type\": \"cell\", \"rows\": [5, 5], \"columns\": [8, 8], \"text_list\": []}, {\"location\": [[942, 395], [1027, 395], [1027, 422], [942, 422]], \"bbox\": [942, 395, 1027, 422], \"points\": [[973, 396], [974, 395], [997, 395], [998, 396], [999, 395], [1001, 396], [1026, 396], [1027, 397], [1027, 421], [1026, 422], [943, 422], [942, 421], [942, 397], [943, 396]], \"type\": \"cell\", \"rows\": [5, 5], \"columns\": [7, 7], \"text_list\": []}, {\"location\": [[852, 395], [937, 395], [937, 422], [852, 422]], \"bbox\": [852, 395, 937, 422], \"points\": [[884, 396], [885, 395], [911, 395], [912, 396], [913, 395], [914, 396], [936, 396], [937, 397], [937, 421], [936, 422], [854, 422], [853, 421], [853, 398], [852, 397], [853, 396]], \"type\": \"cell\", \"rows\": [5, 5], \"columns\": [6, 6], \"text_list\": []}, {\"location\": [[762, 395], [847, 395], [847, 422], [762, 422]], \"bbox\": [762, 395, 847, 422], \"points\": [[792, 396], [794, 395], [819, 395], [820, 396], [822, 395], [823, 396], [846, 396], [847, 397], [847, 421], [846, 422], [763, 422], [762, 421], [763, 419], [763, 398], [762, 397], [763, 396]], \"type\": \"cell\", \"rows\": [5, 5], \"columns\": [5, 5], \"text_list\": []}, {\"location\": [[673, 395], [759, 395], [759, 422], [673, 422]], \"bbox\": [673, 395, 759, 422], \"points\": [[673, 396], [674, 395], [675, 396], [701, 396], [702, 395], [737, 395], [739, 396], [757, 396], [759, 397], [759, 419], [756, 422], [674, 422], [673, 421]], \"type\": \"cell\", \"rows\": [5, 5], \"columns\": [4, 4], \"text_list\": []}, {\"location\": [[383, 395], [569, 395], [569, 422], [383, 422]], \"bbox\": [383, 395, 569, 422], \"points\": [[383, 396], [384, 395], [394, 395], [395, 396], [396, 395], [397, 396], [402, 396], [403, 395], [404, 396], [405, 396], [406, 395], [422, 395], [423, 396], [444, 396], [445, 395], [457, 395], [458, 396], [459, 395], [460, 396], [465, 396], [466, 395], [471, 395], [472, 396], [473, 395], [474, 396], [477, 396], [478, 395], [480, 395], [481, 396], [483, 395], [487, 395], [488, 396], [568, 396], [569, 397], [569, 421], [568, 422], [384, 422], [383, 421]], \"type\": \"cell\", \"rows\": [5, 5], \"columns\": [2, 2], \"text_list\": []}, {\"location\": [[1213, 364], [1298, 364], [1298, 391], [1213, 391]], \"bbox\": [1213, 364, 1298, 391], \"points\": [[1213, 366], [1215, 364], [1216, 366], [1217, 364], [1296, 364], [1298, 366], [1298, 390], [1296, 391], [1215, 391], [1213, 390]], \"type\": \"cell\", \"rows\": [4, 4], \"columns\": [10, 10], \"text_list\": []}, {\"location\": [[1123, 364], [1209, 364], [1209, 391], [1123, 391]], \"bbox\": [1123, 364, 1209, 391], \"points\": [[1123, 366], [1125, 364], [1129, 364], [1130, 366], [1132, 364], [1206, 364], [1209, 367], [1209, 390], [1208, 391], [1125, 391], [1123, 390]], \"type\": \"cell\", \"rows\": [4, 4], \"columns\": [9, 9], \"text_list\": []}, {\"location\": [[1032, 364], [1118, 364], [1118, 391], [1032, 391]], \"bbox\": [1032, 364, 1118, 391], \"points\": [[1035, 364], [1036, 366], [1037, 364], [1116, 364], [1118, 366], [1118, 390], [1116, 391], [1035, 391], [1032, 389], [1032, 385], [1033, 384], [1032, 383], [1033, 382], [1032, 381], [1033, 380], [1033, 375], [1032, 374], [1033, 373], [1032, 371], [1033, 370], [1032, 369], [1032, 367]], \"type\": \"cell\", \"rows\": [4, 4], \"columns\": [8, 8], \"text_list\": []}, {\"location\": [[942, 364], [1027, 364], [1027, 391], [942, 391]], \"bbox\": [942, 364, 1027, 391], \"points\": [[942, 366], [943, 364], [944, 364], [946, 366], [947, 364], [1023, 364], [1024, 366], [1025, 364], [1026, 364], [1027, 366], [1027, 390], [1026, 391], [943, 391], [942, 390]], \"type\": \"cell\", \"rows\": [4, 4], \"columns\": [7, 7], \"text_list\": []}, {\"location\": [[852, 364], [937, 364], [937, 391], [852, 391]], \"bbox\": [852, 364, 937, 391], \"points\": [[853, 366], [854, 364], [856, 366], [857, 364], [934, 364], [935, 366], [936, 364], [937, 366], [937, 390], [936, 391], [854, 391], [852, 389], [853, 388]], \"type\": \"cell\", \"rows\": [4, 4], \"columns\": [6, 6], \"text_list\": []}, {\"location\": [[762, 364], [847, 364], [847, 391], [762, 391]], \"bbox\": [762, 364, 847, 391], \"points\": [[762, 366], [763, 364], [764, 364], [766, 366], [767, 364], [846, 364], [847, 366], [847, 390], [846, 391], [763, 391], [762, 390], [763, 389], [763, 367]], \"type\": \"cell\", \"rows\": [4, 4], \"columns\": [5, 5], \"text_list\": []}, {\"location\": [[673, 364], [759, 364], [759, 391], [673, 391]], \"bbox\": [673, 364, 759, 391], \"points\": [[673, 366], [674, 364], [756, 364], [759, 367], [759, 389], [756, 391], [674, 391], [673, 390]], \"type\": \"cell\", \"rows\": [4, 4], \"columns\": [4, 4], \"text_list\": []}, {\"location\": [[383, 364], [569, 364], [569, 391], [383, 391]], \"bbox\": [383, 364, 569, 391], \"points\": [[383, 366], [384, 364], [567, 364], [569, 367], [569, 390], [568, 391], [384, 391], [383, 390]], \"type\": \"cell\", \"rows\": [4, 4], \"columns\": [2, 2], \"text_list\": []}, {\"location\": [[233, 364], [377, 364], [377, 422], [233, 422]], \"bbox\": [233, 364, 377, 422], \"points\": [[235, 366], [236, 364], [239, 364], [240, 366], [242, 364], [243, 366], [244, 364], [245, 366], [246, 364], [247, 366], [249, 364], [250, 366], [251, 364], [252, 366], [253, 364], [254, 366], [259, 366], [260, 364], [261, 366], [263, 364], [264, 366], [265, 364], [266, 366], [267, 364], [268, 366], [270, 364], [271, 366], [272, 364], [273, 366], [274, 364], [276, 366], [277, 364], [278, 366], [279, 364], [280, 366], [374, 366], [375, 364], [376, 364], [377, 366], [377, 421], [376, 422], [236, 422], [235, 421], [235, 396], [233, 395], [235, 394], [233, 392], [233, 390], [235, 389], [233, 388], [233, 383], [235, 382]], \"type\": \"cell\", \"rows\": [4, 5], \"columns\": [1, 1], \"text_list\": []}, {\"location\": [[1213, 334], [1298, 334], [1298, 360], [1213, 360]], \"bbox\": [1213, 334, 1298, 360], \"points\": [[1213, 335], [1215, 334], [1296, 334], [1298, 335], [1298, 359], [1296, 360], [1215, 360], [1213, 359]], \"type\": \"cell\", \"rows\": [3, 3], \"columns\": [10, 10], \"text_list\": []}, {\"location\": [[1123, 334], [1209, 334], [1209, 360], [1123, 360]], \"bbox\": [1123, 334, 1209, 360], \"points\": [[1123, 335], [1125, 334], [1208, 334], [1209, 335], [1209, 359], [1208, 360], [1125, 360], [1123, 359]], \"type\": \"cell\", \"rows\": [3, 3], \"columns\": [9, 9], \"text_list\": []}, {\"location\": [[1032, 334], [1118, 334], [1118, 360], [1032, 360]], \"bbox\": [1032, 334, 1118, 360], \"points\": [[1032, 335], [1033, 334], [1116, 334], [1118, 335], [1118, 359], [1116, 360], [1033, 360], [1032, 359], [1032, 355], [1033, 354], [1032, 353], [1033, 352], [1032, 350], [1033, 349], [1032, 348], [1033, 347], [1033, 345], [1032, 343], [1033, 342], [1032, 341]], \"type\": \"cell\", \"rows\": [3, 3], \"columns\": [8, 8], \"text_list\": []}, {\"location\": [[942, 334], [1027, 334], [1027, 360], [942, 360]], \"bbox\": [942, 334, 1027, 360], \"points\": [[942, 335], [943, 334], [1026, 334], [1027, 335], [1027, 359], [1026, 360], [943, 360], [942, 359]], \"type\": \"cell\", \"rows\": [3, 3], \"columns\": [7, 7], \"text_list\": []}, {\"location\": [[852, 334], [937, 334], [937, 360], [852, 360]], \"bbox\": [852, 334, 937, 360], \"points\": [[854, 334], [936, 334], [937, 335], [937, 359], [936, 360], [854, 360], [853, 359], [853, 337], [852, 336]], \"type\": \"cell\", \"rows\": [3, 3], \"columns\": [6, 6], \"text_list\": []}, {\"location\": [[762, 334], [847, 334], [847, 360], [762, 360]], \"bbox\": [762, 334, 847, 360], \"points\": [[762, 335], [763, 334], [846, 334], [847, 335], [847, 359], [846, 360], [764, 360], [763, 359], [763, 336]], \"type\": \"cell\", \"rows\": [3, 3], \"columns\": [5, 5], \"text_list\": []}, {\"location\": [[673, 334], [759, 334], [759, 360], [673, 360]], \"bbox\": [673, 334, 759, 360], \"points\": [[673, 335], [674, 334], [756, 334], [759, 336], [759, 359], [757, 360], [674, 360], [673, 359]], \"type\": \"cell\", \"rows\": [3, 3], \"columns\": [4, 4], \"text_list\": []}, {\"location\": [[383, 334], [569, 334], [569, 360], [383, 360]], \"bbox\": [383, 334, 569, 360], \"points\": [[383, 335], [384, 334], [568, 334], [569, 335], [569, 359], [568, 360], [384, 360], [383, 359]], \"type\": \"cell\", \"rows\": [3, 3], \"columns\": [2, 2], \"text_list\": []}, {\"location\": [[233, 334], [377, 334], [377, 360], [233, 360]], \"bbox\": [233, 334, 377, 360], \"points\": [[233, 335], [235, 334], [376, 334], [377, 335], [377, 359], [376, 360], [236, 360], [235, 359], [235, 356], [233, 355], [233, 339], [235, 337], [233, 336]], \"type\": \"cell\", \"rows\": [3, 3], \"columns\": [1, 1], \"text_list\": []}, {\"location\": [[1213, 302], [1298, 302], [1298, 329], [1213, 329]], \"bbox\": [1213, 302, 1298, 329], \"points\": [[1213, 304], [1215, 302], [1296, 302], [1298, 304], [1298, 328], [1296, 329], [1215, 329], [1213, 328]], \"type\": \"cell\", \"rows\": [2, 2], \"columns\": [10, 10], \"text_list\": []}, {\"location\": [[1123, 302], [1209, 302], [1209, 329], [1123, 329]], \"bbox\": [1123, 302, 1209, 329], \"points\": [[1123, 304], [1125, 302], [1206, 302], [1209, 305], [1209, 328], [1208, 329], [1125, 329], [1123, 328]], \"type\": \"cell\", \"rows\": [2, 2], \"columns\": [9, 9], \"text_list\": []}, {\"location\": [[1032, 302], [1118, 302], [1118, 329], [1032, 329]], \"bbox\": [1032, 302, 1118, 329], \"points\": [[1032, 304], [1033, 302], [1116, 302], [1118, 304], [1118, 328], [1116, 329], [1035, 329], [1032, 327], [1032, 325], [1033, 323], [1032, 322], [1033, 321], [1033, 314], [1032, 313], [1033, 312], [1032, 311], [1033, 309], [1032, 308]], \"type\": \"cell\", \"rows\": [2, 2], \"columns\": [8, 8], \"text_list\": []}, {\"location\": [[942, 302], [1027, 302], [1027, 329], [942, 329]], \"bbox\": [942, 302, 1027, 329], \"points\": [[942, 304], [943, 302], [1026, 302], [1027, 304], [1027, 328], [1026, 329], [944, 329], [942, 327]], \"type\": \"cell\", \"rows\": [2, 2], \"columns\": [7, 7], \"text_list\": []}, {\"location\": [[852, 302], [937, 302], [937, 329], [852, 329]], \"bbox\": [852, 302, 937, 329], \"points\": [[852, 304], [853, 302], [936, 302], [937, 304], [937, 328], [936, 329], [854, 329], [853, 328], [853, 305]], \"type\": \"cell\", \"rows\": [2, 2], \"columns\": [6, 6], \"text_list\": []}, {\"location\": [[762, 302], [847, 302], [847, 329], [762, 329]], \"bbox\": [762, 302, 847, 329], \"points\": [[762, 304], [763, 302], [846, 302], [847, 304], [847, 328], [846, 329], [764, 329], [763, 328], [763, 305]], \"type\": \"cell\", \"rows\": [2, 2], \"columns\": [5, 5], \"text_list\": []}, {\"location\": [[673, 302], [759, 302], [759, 329], [673, 329]], \"bbox\": [673, 302, 759, 329], \"points\": [[673, 304], [674, 302], [756, 302], [759, 305], [759, 328], [757, 329], [674, 329], [673, 328]], \"type\": \"cell\", \"rows\": [2, 2], \"columns\": [4, 4], \"text_list\": []}, {\"location\": [[383, 302], [569, 302], [569, 329], [383, 329]], \"bbox\": [383, 302, 569, 329], \"points\": [[383, 304], [384, 302], [568, 302], [569, 304], [569, 328], [568, 329], [384, 329], [383, 328]], \"type\": \"cell\", \"rows\": [2, 2], \"columns\": [2, 2], \"text_list\": []}, {\"location\": [[1213, 273], [1298, 273], [1298, 298], [1213, 298]], \"bbox\": [1213, 273, 1298, 298], \"points\": [[1213, 274], [1215, 273], [1296, 273], [1298, 274], [1298, 297], [1296, 298], [1215, 298], [1213, 297]], \"type\": \"cell\", \"rows\": [1, 1], \"columns\": [10, 10], \"text_list\": []}, {\"location\": [[1123, 273], [1209, 273], [1209, 298], [1123, 298]], \"bbox\": [1123, 273, 1209, 298], \"points\": [[1123, 274], [1125, 273], [1208, 273], [1209, 274], [1209, 297], [1208, 298], [1125, 298], [1123, 297]], \"type\": \"cell\", \"rows\": [1, 1], \"columns\": [9, 9], \"text_list\": []}, {\"location\": [[1032, 273], [1118, 273], [1118, 298], [1032, 298]], \"bbox\": [1032, 273, 1118, 298], \"points\": [[1032, 274], [1033, 273], [1116, 273], [1118, 274], [1118, 297], [1116, 298], [1035, 298], [1032, 295]], \"type\": \"cell\", \"rows\": [1, 1], \"columns\": [8, 8], \"text_list\": []}, {\"location\": [[942, 273], [1027, 273], [1027, 298], [942, 298]], \"bbox\": [942, 273, 1027, 298], \"points\": [[942, 274], [943, 273], [1026, 273], [1027, 274], [1027, 297], [1026, 298], [943, 298], [942, 297]], \"type\": \"cell\", \"rows\": [1, 1], \"columns\": [7, 7], \"text_list\": []}, {\"location\": [[852, 273], [937, 273], [937, 298], [852, 298]], \"bbox\": [852, 273, 937, 298], \"points\": [[852, 274], [853, 273], [936, 273], [937, 274], [937, 297], [936, 298], [854, 298], [853, 297], [853, 281], [852, 280]], \"type\": \"cell\", \"rows\": [1, 1], \"columns\": [6, 6], \"text_list\": []}, {\"location\": [[762, 273], [847, 273], [847, 298], [762, 298]], \"bbox\": [762, 273, 847, 298], \"points\": [[762, 274], [763, 273], [846, 273], [847, 274], [847, 297], [846, 298], [763, 298], [762, 297], [763, 295], [763, 277], [762, 276]], \"type\": \"cell\", \"rows\": [1, 1], \"columns\": [5, 5], \"text_list\": []}, {\"location\": [[673, 273], [759, 273], [759, 298], [673, 298]], \"bbox\": [673, 273, 759, 298], \"points\": [[673, 274], [674, 273], [757, 273], [759, 274], [759, 295], [756, 298], [674, 298], [673, 297]], \"type\": \"cell\", \"rows\": [1, 1], \"columns\": [4, 4], \"text_list\": []}, {\"location\": [[573, 273], [668, 273], [668, 576], [573, 576]], \"bbox\": [573, 273, 668, 576], \"points\": [[574, 274], [575, 273], [667, 273], [668, 274], [668, 575], [667, 576], [574, 576], [573, 575], [574, 574], [574, 457], [575, 456], [575, 453], [574, 452]], \"type\": \"cell\", \"rows\": [1, 10], \"columns\": [3, 3], \"text_list\": []}, {\"location\": [[383, 273], [569, 273], [569, 298], [383, 298]], \"bbox\": [383, 273, 569, 298], \"points\": [[383, 274], [384, 273], [568, 273], [569, 274], [569, 297], [568, 298], [384, 298], [383, 297]], \"type\": \"cell\", \"rows\": [1, 1], \"columns\": [2, 2], \"text_list\": []}, {\"location\": [[233, 273], [377, 273], [377, 329], [233, 329]], \"bbox\": [233, 273, 377, 329], \"points\": [[233, 274], [235, 273], [376, 273], [377, 274], [377, 328], [376, 329], [236, 329], [235, 328], [235, 305], [233, 304], [235, 302], [233, 301], [233, 294], [235, 293], [233, 292], [235, 291], [233, 290], [235, 288], [235, 276]], \"type\": \"cell\", \"rows\": [1, 2], \"columns\": [1, 1], \"text_list\": []}, {\"location\": [[166, 272], [231, 272], [231, 576], [166, 576]], \"bbox\": [166, 272, 231, 576], \"points\": [[166, 273], [167, 272], [168, 273], [230, 273], [231, 274], [231, 278], [230, 279], [230, 329], [231, 330], [230, 332], [230, 339], [231, 340], [231, 353], [230, 354], [230, 360], [231, 361], [230, 362], [230, 385], [231, 387], [231, 392], [230, 394], [230, 430], [231, 431], [231, 459], [230, 460], [230, 464], [231, 465], [231, 467], [230, 468], [230, 484], [231, 485], [231, 486], [230, 487], [230, 488], [231, 490], [230, 491], [231, 492], [231, 508], [230, 509], [230, 526], [231, 527], [231, 534], [230, 535], [230, 571], [231, 573], [231, 575], [230, 576], [167, 576], [166, 575]], \"type\": \"cell\", \"rows\": [1, 10], \"columns\": [0, 0], \"text_list\": []}, {\"location\": [[1212, 240], [1298, 240], [1298, 267], [1212, 267]], \"bbox\": [1212, 240, 1298, 267], \"points\": [[1215, 240], [1216, 242], [1217, 240], [1296, 240], [1298, 242], [1298, 266], [1296, 267], [1215, 267], [1212, 265], [1213, 264], [1212, 263], [1212, 243]], \"type\": \"cell\", \"rows\": [0, 0], \"columns\": [10, 10], \"text_list\": []}, {\"location\": [[1123, 240], [1209, 240], [1209, 267], [1123, 267]], \"bbox\": [1123, 240, 1209, 267], \"points\": [[1123, 242], [1125, 240], [1126, 242], [1127, 242], [1128, 240], [1170, 240], [1171, 242], [1173, 240], [1183, 240], [1184, 242], [1185, 240], [1190, 240], [1191, 242], [1192, 240], [1208, 240], [1209, 242], [1209, 266], [1208, 267], [1125, 267], [1123, 266]], \"type\": \"cell\", \"rows\": [0, 0], \"columns\": [9, 9], \"text_list\": []}, {\"location\": [[1032, 240], [1118, 240], [1118, 267], [1032, 267]], \"bbox\": [1032, 240, 1118, 267], \"points\": [[1035, 240], [1036, 242], [1037, 240], [1038, 242], [1039, 240], [1116, 240], [1118, 242], [1118, 266], [1116, 267], [1035, 267], [1032, 265], [1032, 243]], \"type\": \"cell\", \"rows\": [0, 0], \"columns\": [8, 8], \"text_list\": []}, {\"location\": [[942, 240], [1027, 240], [1027, 267], [942, 267]], \"bbox\": [942, 240, 1027, 267], \"points\": [[942, 242], [943, 240], [944, 240], [946, 242], [947, 242], [948, 240], [1026, 240], [1027, 242], [1027, 266], [1026, 267], [943, 267], [942, 266]], \"type\": \"cell\", \"rows\": [0, 0], \"columns\": [7, 7], \"text_list\": []}, {\"location\": [[852, 240], [937, 240], [937, 267], [852, 267]], \"bbox\": [852, 240, 937, 267], \"points\": [[854, 240], [856, 242], [857, 240], [858, 242], [859, 240], [860, 242], [861, 240], [873, 240], [874, 242], [875, 240], [877, 242], [880, 242], [881, 240], [885, 240], [886, 242], [887, 240], [892, 240], [893, 242], [894, 240], [897, 240], [898, 242], [899, 240], [936, 240], [937, 242], [937, 266], [936, 267], [854, 267], [852, 265], [852, 243]], \"type\": \"cell\", \"rows\": [0, 0], \"columns\": [6, 6], \"text_list\": []}, {\"location\": [[762, 240], [847, 240], [847, 267], [762, 267]], \"bbox\": [762, 240, 847, 267], \"points\": [[762, 242], [763, 240], [764, 240], [766, 242], [767, 242], [768, 240], [769, 242], [770, 242], [771, 240], [781, 240], [782, 242], [785, 242], [787, 240], [788, 242], [790, 242], [791, 240], [795, 240], [796, 242], [797, 240], [803, 240], [804, 242], [805, 240], [846, 240], [847, 242], [847, 266], [846, 267], [763, 267], [762, 266]], \"type\": \"cell\", \"rows\": [0, 0], \"columns\": [5, 5], \"text_list\": []}, {\"location\": [[673, 240], [759, 240], [759, 267], [673, 267]], \"bbox\": [673, 240, 759, 267], \"points\": [[673, 242], [674, 240], [675, 242], [678, 242], [679, 240], [680, 242], [681, 240], [693, 240], [694, 242], [695, 240], [697, 242], [700, 242], [701, 240], [705, 240], [706, 242], [707, 240], [712, 240], [713, 242], [714, 240], [756, 240], [759, 243], [759, 265], [756, 267], [674, 267], [673, 266]], \"type\": \"cell\", \"rows\": [0, 0], \"columns\": [4, 4], \"text_list\": []}, {\"location\": [[573, 240], [668, 240], [668, 267], [573, 267]], \"bbox\": [573, 240, 668, 267], \"points\": [[601, 242], [602, 240], [630, 240], [631, 242], [632, 242], [633, 240], [637, 240], [638, 242], [667, 242], [668, 243], [668, 266], [667, 267], [574, 267], [573, 266], [573, 243], [574, 242]], \"type\": \"cell\", \"rows\": [0, 0], \"columns\": [3, 3], \"text_list\": []}, {\"location\": [[166, 240], [569, 240], [569, 267], [166, 267]], \"bbox\": [166, 240, 569, 267], \"points\": [[166, 242], [167, 240], [168, 242], [568, 242], [569, 243], [569, 266], [568, 267], [167, 267], [166, 266]], \"type\": \"cell\", \"rows\": [0, 0], \"columns\": [0, 2], \"text_list\": []}, {\"location\": [[161, 236], [1303, 236], [1303, 581], [161, 581]], \"bbox\": [161, 236, 1303, 581], \"points\": [[162, 236], [162, 238], [161, 239], [161, 580], [162, 581], [1302, 581], [1303, 580], [1303, 237], [166, 237], [164, 236]], \"type\": \"table\", \"contains\": [85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182]}]}}]\n"
  },
  {
    "path": "libs/kotaemon/tests/resources/html/dummy.html",
    "content": "<html><head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" /><meta http-equiv=\"Content-Style-Type\" content=\"text/css\" /><title></title></head><body style=\"text-align:justify; line-height:18pt; widows:0; orphans:0; font-family:'ＭＳ 明朝'; font-size:10.5pt\"><div><div style=\"-aw-headerfooter-type:header-primary; clear:both\"><p style=\"margin-top:0pt; margin-bottom:0pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">細則　本社編（情報システム部）　　　　　　　　　　　</span><span style=\"width:37.85pt; font-family:'ＭＳ ゴシック'; font-size:10pt; display:inline-block\">&#xa0;</span><span style=\"width:42.55pt; font-family:'ＭＳ ゴシック'; font-size:10pt; display:inline-block\">&#xa0;</span><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">　分類番号　１５７３００</span></p><p style=\"margin-top:0pt; margin-right:5.65pt; margin-bottom:0pt; text-align:right\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">２０２０．２</span></p><p style=\"margin-top:0pt; margin-bottom:0pt; line-height:150%; font-size:9pt\"><span style=\"font-family:'Times New Roman'; -aw-import:ignore\">&#xa0;</span></p></div><p style=\"margin-top:0pt; margin-bottom:0pt\"></p><p style=\"margin-top:0pt; margin-bottom:0pt\"><span style=\"height:0pt; text-align:left; display:block; position:absolute; z-index:0\"><img src=\"dummy_image.png\" width=\"605\" height=\"50\" alt=\"\" style=\"margin-top:8.45pt; margin-left:1.55pt; -aw-left-pos:2.05pt; -aw-rel-hpos:column; -aw-rel-vpos:paragraph; -aw-top-pos:8.95pt; -aw-wrap-type:none; position:absolute\" /></span><span style=\"font-family:'ＭＳ ゴシック'; -aw-import:ignore\">&#xa0;</span></p><p style=\"margin-top:0pt; margin-bottom:0pt\"><span style=\"font-family:'ＭＳ ゴシック'; -aw-import:ignore\">&#xa0;</span></p><p style=\"margin-top:0pt; margin-bottom:0pt\"><span style=\"font-family:'ＭＳ ゴシック'; -aw-import:ignore\">&#xa0;</span></p><p style=\"margin-top:0pt; margin-left:21pt; margin-bottom:0pt; text-indent:-21pt; -aw-import:list-item; -aw-list-level-number:0; -aw-list-number-format:'%0．'; -aw-list-number-styles:'decimalFullWidth'; -aw-list-number-values:'1'; -aw-list-padding-sml:1pt\"><span style=\"-aw-import:ignore\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">１．</span><span style=\"width:1pt; font:7pt 'Times New Roman'; display:inline-block\"> </span></span><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">スパットくん紛失・盗難時の取扱</span></p><p style=\"margin-top:0pt; margin-left:16.9pt; margin-bottom:2.4pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">スパットくんの紛失・盗難の際は、速やかに停止依頼処理を入力するとともに、報告書を起票します。</span></p><table cellspacing=\"0\" cellpadding=\"0\" style=\"margin-left:16.8pt; border:0.75pt solid #000000; -aw-border:0.5pt single; border-collapse:collapse\"><tr style=\"height:19.8pt\"><td style=\"width:31.05pt; border-right-style:solid; border-right-width:0.75pt; border-bottom-style:solid; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; background-color:#e0e0e0; -aw-border-bottom:0.5pt single; -aw-border-right:0.5pt single\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">ＮＯ</span></p></td><td style=\"width:80.6pt; border-right-style:solid; border-right-width:0.75pt; border-left-style:solid; border-left-width:0.75pt; border-bottom-style:solid; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; background-color:#e0e0e0; -aw-border-bottom:0.5pt single; -aw-border-left:0.5pt single; -aw-border-right:0.5pt single\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">項目</span></p></td><td style=\"width:303.6pt; border-left-style:solid; border-left-width:0.75pt; border-bottom-style:solid; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; background-color:#e0e0e0; -aw-border-bottom:0.5pt single; -aw-border-left:0.5pt single\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">内容</span></p></td></tr><tr style=\"height:85.5pt; page-break-inside:avoid\"><td style=\"width:31.05pt; border-top-style:solid; border-top-width:0.75pt; border-right-style:solid; border-right-width:0.75pt; border-bottom-style:dotted; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-right:0.5pt single; -aw-border-top:0.5pt single\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">１</span></p></td><td style=\"width:80.6pt; border-style:solid solid dotted; border-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-left:0.5pt single; -aw-border-right:0.5pt single; -aw-border-top:0.5pt single\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">対象のスパット</span></p><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">くん確認</span></p></td><td style=\"width:303.6pt; border-top-style:solid; border-top-width:0.75pt; border-left-style:solid; border-left-width:0.75pt; border-bottom-style:dotted; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-left:0.5pt single; -aw-border-top:0.5pt single\"><p style=\"margin-top:0pt; margin-bottom:2.4pt; text-indent:0.3pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">紛失・盗難に気づいた時には、対象のスパットくんの端末識別番号を確認します。</span></p><p style=\"margin-top:2.4pt; margin-left:11pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:16pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">※盗難の場合は警察への届出も必要です。</span></p><p style=\"margin-top:0pt; margin-left:11pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:16pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">※紛失・盗難の場合は盗難・紛失事故報告も必要です。</span></p><p style=\"margin-top:0pt; margin-left:23.75pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:16pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">あいリクエスト（総務室（大阪））</span></p><p style=\"margin-top:0pt; margin-left:23.75pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:16pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">『盗難・紛失事故兼個人情報等事故報告』</span></p></td></tr><tr style=\"height:35.4pt; page-break-inside:avoid\"><td style=\"width:31.05pt; border-top-style:dotted; border-top-width:0.75pt; border-right-style:solid; border-right-width:0.75pt; border-bottom-style:dotted; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">２</span></p></td><td style=\"width:80.6pt; border-style:dotted solid; border-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-left:0.5pt single; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">報告書の起票</span></p></td><td style=\"width:303.6pt; border-top-style:dotted; border-top-width:0.75pt; border-left-style:solid; border-left-width:0.75pt; border-bottom-style:dotted; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-left:0.5pt single; -aw-border-top:0.5pt dot\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">あいリクエスト（システム業務室）-『モバイル決済端末（スパットくん）紛失・盗難報告書』を起票します。</span></p></td></tr><tr style=\"height:62.2pt; page-break-inside:avoid\"><td style=\"width:31.05pt; border-top-style:dotted; border-top-width:0.75pt; border-right-style:solid; border-right-width:0.75pt; border-bottom-style:dotted; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">３</span></p></td><td style=\"width:80.6pt; border-style:dotted solid; border-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-left:0.5pt single; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">停止依頼入力</span></p></td><td style=\"width:303.6pt; border-top-style:dotted; border-top-width:0.75pt; border-left-style:solid; border-left-width:0.75pt; border-bottom-style:dotted; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-left:0.5pt single; -aw-border-top:0.5pt dot\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">モバイル端末管理ウェブより停止依頼処理を入力します。</span></p><p style=\"margin-top:0pt; margin-left:11.55pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">※停止依頼入力により該当スパットくんは使用不可となります。</span></p></td></tr><tr style=\"height:45.75pt; page-break-inside:avoid\"><td style=\"width:31.05pt; border-top-style:dotted; border-top-width:0.75pt; border-right-style:solid; border-right-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">４</span></p></td><td style=\"width:80.6pt; border-top-style:dotted; border-top-width:0.75pt; border-right-style:solid; border-right-width:0.75pt; border-left-style:solid; border-left-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-left:0.5pt single; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">報告書の承認と</span></p><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">担当室への報告</span></p></td><td style=\"width:303.6pt; border-top-style:dotted; border-top-width:0.75pt; border-left-style:solid; border-left-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-left:0.5pt single; -aw-border-top:0.5pt dot\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">所属長はあいリクエストにて申請・送付された報告書を確認・承認します。承認後、報告書をあいリクエスト（総務室（大阪））－『紛失・盗難事故　兼　個人情報等事故報告』に添付し報告します。（関連細則：</span><a href=\"https://a3.itr.sumitomolife.co.jp/sso/dfw/kn/zc/open/zcko0102.do?DOCID=H00044505\" style=\"text-decoration:none\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt; text-decoration:underline; color:#0000ff\">本支社編11110／210060「紛失・盗難事故等の被害報告」</span></a><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">を参照）</span></p><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">※日計処理の実施が「無」の場合本社担当室（システム業務室、</span></p><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">契約審査室、収納サービス室、損保サービス室）へ連絡する。</span></p></td></tr></table><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:136.5pt; line-height:17pt\"><a href=\"https://a3.itr.sumitomolife.co.jp/sso/dfw/kn/zc/open/zcko0102.do?DOCID=H00028928\" style=\"text-decoration:none\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt; text-decoration:underline; color:#0000ff\">【関連マニュアル】　スパットくん・モバイル管理ウェブ操作マニュアル</span></a></p><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:130pt; line-height:17pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt; -aw-import:ignore\">&#xa0;</span></p><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:10pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">＜スパットくん再発見時の対応＞</span></p><table cellspacing=\"0\" cellpadding=\"0\" style=\"margin-left:16.8pt; border:0.75pt solid #000000; -aw-border:0.5pt single; border-collapse:collapse\"><tr style=\"height:19.8pt\"><td style=\"width:31.05pt; border-right-style:solid; border-right-width:0.75pt; border-bottom-style:solid; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; background-color:#e0e0e0; -aw-border-bottom:0.5pt single; -aw-border-right:0.5pt single\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">ＮＯ</span></p></td><td style=\"width:80.6pt; border-right-style:solid; border-right-width:0.75pt; border-left-style:solid; border-left-width:0.75pt; border-bottom-style:solid; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; background-color:#e0e0e0; -aw-border-bottom:0.5pt single; -aw-border-left:0.5pt single; -aw-border-right:0.5pt single\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">項目</span></p></td><td style=\"width:303.6pt; border-left-style:solid; border-left-width:0.75pt; border-bottom-style:solid; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; background-color:#e0e0e0; -aw-border-bottom:0.5pt single; -aw-border-left:0.5pt single\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">内容</span></p></td></tr><tr style=\"height:49.7pt; page-break-inside:avoid\"><td style=\"width:31.05pt; border-top-style:dotted; border-top-width:0.75pt; border-right-style:solid; border-right-width:0.75pt; border-bottom-style:dotted; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">１</span></p></td><td style=\"width:80.6pt; border-style:dotted solid; border-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-left:0.5pt single; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">端末設置組織の</span></p><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">確認</span></p></td><td style=\"width:303.6pt; border-top-style:dotted; border-top-width:0.75pt; border-left-style:solid; border-left-width:0.75pt; border-bottom-style:dotted; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-left:0.5pt single; -aw-border-top:0.5pt dot\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">スパットくんの設置状況照会を行い、紛失・盗難となったスパットくんであるか確認します。</span></p></td></tr><tr style=\"height:56.75pt; page-break-inside:avoid\"><td style=\"width:31.05pt; border-top-style:dotted; border-top-width:0.75pt; border-right-style:solid; border-right-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">２</span></p></td><td style=\"width:80.6pt; border-top-style:dotted; border-top-width:0.75pt; border-right-style:solid; border-right-width:0.75pt; border-left-style:solid; border-left-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-left:0.5pt single; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">端末停止解除</span></p><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">入力</span></p></td><td style=\"width:303.6pt; border-top-style:dotted; border-top-width:0.75pt; border-left-style:solid; border-left-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-left:0.5pt single; -aw-border-top:0.5pt dot\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">モバイル端末管理ウェブより停止解除処理を入力します。</span></p><p style=\"margin-top:0pt; margin-left:20.7pt; margin-bottom:0pt; text-indent:-10.1pt; text-align:justify; line-height:16pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">※スパットくんの利用再開は、支社にて停止解除入力から２営業日以上経過してから利用下さい。</span></p></td></tr></table><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:9.75pt; text-align:center\"><span>　　　　　　　　</span><a href=\"https://a3.itr.sumitomolife.co.jp/sso/dfw/kn/zc/open/zcko0102.do?DOCID=H00028928\" style=\"text-decoration:none\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt; text-decoration:underline; color:#0000ff\">【関連マニュアル】　スパットくん・モバイル管理ウェブ操作マニュアル</span></a></p><p style=\"margin-top:0pt; margin-bottom:0pt\"><span style=\"font-family:'ＭＳ ゴシック'; -aw-import:ignore\">&#xa0;</span></p><p style=\"margin-top:0pt; margin-bottom:0pt\"><span style=\"font-family:'ＭＳ ゴシック'; -aw-import:ignore\">&#xa0;</span></p><p style=\"margin-top:0pt; margin-bottom:0pt\"><span style=\"font-family:'ＭＳ ゴシック'; -aw-import:ignore\">&#xa0;</span></p><p style=\"margin-top:0pt; margin-left:21pt; margin-bottom:0pt; text-indent:-21pt; -aw-import:list-item; -aw-list-level-number:0; -aw-list-number-format:'%0．'; -aw-list-number-styles:'decimalFullWidth'; -aw-list-number-values:'2'; -aw-list-padding-sml:1pt\"><span style=\"-aw-import:ignore\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">２．</span><span style=\"width:1pt; font:7pt 'Times New Roman'; display:inline-block\"> </span></span><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">スパットくん故障時の取扱</span></p><p style=\"margin-top:0pt; margin-left:15.95pt; margin-bottom:2.4pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">スパットくんが故障した場合には、代替機への交換と故障機の返却を行います。</span></p><table cellspacing=\"0\" cellpadding=\"0\" style=\"margin-left:16.8pt; border:0.75pt solid #000000; -aw-border:0.5pt single; border-collapse:collapse\"><tr style=\"height:19.8pt\"><td style=\"width:30.35pt; border-right-style:solid; border-right-width:0.75pt; border-bottom-style:solid; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; background-color:#e0e0e0; -aw-border-bottom:0.5pt single; -aw-border-right:0.5pt single\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">ＮＯ</span></p></td><td style=\"width:78.25pt; border-right-style:solid; border-right-width:0.75pt; border-left-style:solid; border-left-width:0.75pt; border-bottom-style:solid; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; background-color:#e0e0e0; -aw-border-bottom:0.5pt single; -aw-border-left:0.5pt single; -aw-border-right:0.5pt single\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">項目</span></p></td><td style=\"width:317.35pt; border-left-style:solid; border-left-width:0.75pt; border-bottom-style:solid; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; background-color:#e0e0e0; -aw-border-bottom:0.5pt single; -aw-border-left:0.5pt single\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">内容</span></p></td></tr><tr style=\"height:55.55pt\"><td style=\"width:30.35pt; border-top-style:solid; border-top-width:0.75pt; border-right-style:solid; border-right-width:0.75pt; border-bottom-style:dotted; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-right:0.5pt single; -aw-border-top:0.5pt single\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">１</span></p></td><td style=\"width:78.25pt; border-style:solid solid dotted; border-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-left:0.5pt single; -aw-border-right:0.5pt single; -aw-border-top:0.5pt single\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">故障内容の確認と報告書の起票</span></p></td><td style=\"width:317.35pt; border-top-style:solid; border-top-width:0.75pt; border-left-style:solid; border-left-width:0.75pt; border-bottom-style:dotted; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-left:0.5pt single; -aw-border-top:0.5pt single\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">スパットくんが故障した場合は、あいリクエスト（システム業務室）-『モバイル決済端末（スパットくん）故障報告書』を起票します。</span></p></td></tr><tr style=\"height:106pt; page-break-inside:avoid\"><td style=\"width:30.35pt; border-top-style:dotted; border-top-width:0.75pt; border-right-style:solid; border-right-width:0.75pt; border-bottom-style:dotted; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">２</span></p></td><td style=\"width:78.25pt; border-style:dotted solid; border-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-left:0.5pt single; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">修理依頼入力</span></p><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt\"><span style=\"font-family:'Times New Roman'; font-size:10pt; font-weight:bold; -aw-import:ignore\">&#xa0;</span></p></td><td style=\"width:317.35pt; border-top-style:dotted; border-top-width:0.75pt; border-left-style:solid; border-left-width:0.75pt; border-bottom-style:dotted; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-left:0.5pt single; -aw-border-top:0.5pt dot\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.2pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">『モバイル端末管理ウェブ』より修理依頼処理を入力します。修理</span></p><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.2pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">依頼処理の入力により、代替機が送付されます。</span><br /><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">その際、「モバイル決済端末修理依頼書」を印刷し、「モバイル決済端末（スパットくん）故障報告書」を参照して必要事項を記入します。</span></p></td></tr><tr style=\"height:76.5pt; page-break-inside:avoid\"><td style=\"width:30.35pt; border-top-style:dotted; border-top-width:0.75pt; border-right-style:solid; border-right-width:0.75pt; border-bottom-style:dotted; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">３</span></p></td><td style=\"width:78.25pt; border-style:dotted solid; border-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-left:0.5pt single; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">報告書の承認と担当室への報告</span></p></td><td style=\"width:317.35pt; border-top-style:dotted; border-top-width:0.75pt; border-left-style:solid; border-left-width:0.75pt; border-bottom-style:dotted; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-left:0.5pt single; -aw-border-top:0.5pt dot\"><p style=\"margin-top:0pt; margin-left:0.2pt; margin-bottom:0pt; text-indent:-0.2pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">所属長はあいリクエストにて申請・送付された報告書を確認・承認し、システム業務室に報告します。</span></p></td></tr><tr style=\"height:86.2pt; page-break-inside:avoid\"><td style=\"width:30.35pt; border-top-style:dotted; border-top-width:0.75pt; border-right-style:solid; border-right-width:0.75pt; border-bottom-style:dotted; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">４</span></p></td><td style=\"width:78.25pt; border-style:dotted solid; border-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-left:0.5pt single; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">梱包</span></p></td><td style=\"width:317.35pt; border-top-style:dotted; border-top-width:0.75pt; border-left-style:solid; border-left-width:0.75pt; border-bottom-style:dotted; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-left:0.5pt single; -aw-border-top:0.5pt dot\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">以下をセットで梱包します。</span></p><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:10.3pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">・故障したスパットくん本体</span></p><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:10.3pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">・付属品（充電アダプタ、コード、タッチペン、ストラップ）</span></p><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:10.3pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">・モバイル決済端末修理依頼書</span></p><p style=\"margin-top:0pt; margin-left:4pt; margin-bottom:0pt; text-indent:10.1pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">※各所属にて保管のスパットくん送付箱に梱包します</span></p><p style=\"margin-top:0pt; margin-left:4pt; margin-bottom:0pt; text-indent:10.1pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'Times New Roman'; font-size:10pt; -aw-import:ignore\">&#xa0;</span></p></td></tr><tr style=\"height:86.2pt; page-break-inside:avoid\"><td style=\"width:30.35pt; border-top-style:dotted; border-top-width:0.75pt; border-right-style:solid; border-right-width:0.75pt; border-bottom-style:dotted; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">５</span></p></td><td style=\"width:78.25pt; border-style:dotted solid; border-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-left:0.5pt single; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">代替機の受取と</span></p><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">故障機の送付</span></p><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'Times New Roman'; font-size:10pt; font-weight:bold; -aw-import:ignore\">&#xa0;</span></p></td><td style=\"width:317.35pt; border-top-style:dotted; border-top-width:0.75pt; border-left-style:solid; border-left-width:0.75pt; border-bottom-style:dotted; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-left:0.5pt single; -aw-border-top:0.5pt dot\"><p style=\"margin-top:0pt; margin-left:0.1pt; margin-bottom:0pt; text-indent:-0.1pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">入力後数日でスパットくんの代替機が到着します。</span></p><p style=\"margin-top:0pt; margin-left:0.1pt; margin-bottom:0pt; text-indent:-0.1pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">受け取ると同時に、配送業者（日本通運）に故障したスパットくん、および「モバイル決済端末修理依頼書」を渡します。</span></p><p style=\"margin-top:0pt; margin-left:0.1pt; margin-bottom:2.4pt; text-indent:10pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">※「モバイル決済端末修理依頼書」を忘れずに同梱下さい。</span></p><p style=\"margin-top:2.4pt; margin-left:14.8pt; margin-bottom:0pt; text-indent:-14.8pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'Times New Roman'; font-size:10pt; -aw-import:ignore\">&#xa0;</span></p></td></tr><tr style=\"height:52.15pt; page-break-inside:avoid\"><td style=\"width:30.35pt; border-top-style:dotted; border-top-width:0.75pt; border-right-style:solid; border-right-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">６</span></p></td><td style=\"width:78.25pt; border-top-style:dotted; border-top-width:0.75pt; border-right-style:solid; border-right-width:0.75pt; border-left-style:solid; border-left-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-left:0.5pt single; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">代替機の端末受取入力</span></p></td><td style=\"width:317.35pt; border-top-style:dotted; border-top-width:0.75pt; border-left-style:solid; border-left-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-left:0.5pt single; -aw-border-top:0.5pt dot\"><p style=\"margin-top:2.4pt; margin-left:0.1pt; margin-bottom:0pt; text-indent:-0.1pt; text-align:justify; line-height:18pt\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt\">モバイル端末管理ウェブより代替機の端末受取処理を入力します。</span></p></td></tr></table><p style=\"margin-top:0pt; margin-bottom:0pt; text-indent:9.75pt; text-align:center\"><span>　　　　　　　　　　</span><a href=\"https://a3.itr.sumitomolife.co.jp/sso/dfw/kn/zc/open/zcko0102.do?DOCID=H00028928\" style=\"text-decoration:none\"><span style=\"font-family:'ＭＳ ゴシック'; font-size:10pt; text-decoration:underline; color:#0000ff\">【関連マニュアル】　スパットくん・モバイル管理ウェブ操作マニュアル</span></a></p><p style=\"margin-top:0pt; margin-bottom:0pt\"><span style=\"font-family:'ＭＳ ゴシック'; -aw-import:ignore\">&#xa0;</span></p><div style=\"-aw-headerfooter-type:footer-primary; clear:both\"><p style=\"margin-top:0pt; margin-bottom:0pt; text-align:center\"><span>- </span><span style=\"-aw-field-start:true\"></span><span style=\"-aw-field-code:' PAGE '\"></span><span style=\"-aw-field-separator:true\"></span><span>1</span><span style=\"-aw-field-end:true\"></span><span> -</span></p></div></div></body></html>\n"
  },
  {
    "path": "libs/kotaemon/tests/resources/policy.md",
    "content": "# 5 年ごと配当付特定状態保障定期保険特約条項 目次\n\n## 1. この特約の概要\n\n第 1 条 特約保険金の支払\n\n第 2 条 特約保険金の支払に関する補則\n\n第 3 条 特約保険金の免責事由に該当した場合の取扱\n\n第 4 条 特約保険金の請求、支払時期および支払場所\n\n第 5 条 特約の保険料払込の免除\n\n第 6 条 特約の締結\n\n第 7 条 特約の責任開始期\n\n第 8 条 特約の保険期間および保険料払込期間\n\n第 9 条 特約の保険料の払込\n\n第 10 条 猶予期間中の保険事故亡保険料の取扱\n\n第 11 条 特約の失効\n\n第 12 条 特約の復活\n\n第 13 条 告知義務\n\n第 14 条 告知義務違反による解除\n\n第 15 条 特約を解除できない場合\n\n第 16 条 重大事由による解除\n\n第 17 条 特約の解約\n\n第 18 条 特約の返還金\n\n第 19 条 特約の消滅とみなす場合\n\n第 20 条 債権者等により特約が解約される場合の取扱\n\n第 21 条 特約保険金額の減額\n\n第 22 条 特約の更新\n\n第 23 条 特約の契約者配当金\n\n第 24 条 主契約の内容变更に伴う特約の取扱\n\n第 25 条 主契約について保険料の自動貸付の規定を適用 する場合の取扱\n\n第 26 条 主契約を払済保険に变更する場合の取扱\n\n第 27 条 法令等の改正等に伴う特約障害保険金および特 約介護保険金の支払事由に関する規定の变更\n\n第 28 条 管轄裁判所\n\n第 29 条 契約内容の登録\n\n第 30 条 主約款の規定の準用\n\n第 31 条 5 年ごと配当付定期保険または 5 年ごと利差配 当付定期保険に付加した場合の特則\n\n第 32 条 5 年ごと配当付生存給付金付定期保険または 5 年己゙と利差配当付生存給付金付定期保険に付加 した場合の特則\n第 33 条 5 年ごと配当付逓増定期保険または 5 年ごと利 差配当付逓增定期保険沉付加した場合の特則\n\n第 34 条 5 年ごと配当付養老保険または 5 年ごと利差配 当付養老保険に付加した場合の特則\n\n第 35 条 5 年ごと配当付終身保険に 5 年ごと配当付年金 支払移行特約等を付加した場合または 5 年ごと 利差配当付終身保険厄 5 年己゙と利差配当付年金 支払移行特約等を付加した場合の特約の取扱\n\n第 36 条 保険料払込期間が終身の 5 年ごと配当付終身保 険または保険料払込期間が終身の 5 年ごと利差 配当付終身保険尺付加した場合の特則\n\n第 37 条 5 年ごと配当付更新型終身移行保険または 5 年 ごと利差配当付更新型終身移行保険に付加した 場合の特則\n\n第 38 条 5 年ごと配当付更新型終身移行保険または 5 年 ごと利差配当付更新型終身移行保険に 5 年ごと 配当付年金支払移行特約等を付加した場合の特 約の取扱\n\n第 39 条 5 年ごと配当付介護年金終身保障保険または 5 年ごと利差配当付介護年金終身保障保険に付加 した場合の特則\n\n第 40 条 5 年己゙と配当付終身医療保険または 5 年ごと利 差配当付経身医療保険汇付加した場合の特則\n\n第 41 条 5 年ごと配当付介護年金保険（解約返還金なし 型）に付加した場合の特則\n\n第 42 条 転換後契約または变更後契約に付加した場合の 特則\n\n第 43 条 転換特約、部分保障变更特約または家族内保障 承継特約を付加した場合の特則\n\n第 44 条 特別条件を付けた場合の特則\n\n第 45 条 契約日が平成 22 年 3 月 1 日以前の主契約に付加 した場合の特則\n\n第 46 条 契約日が平成 24 年 10 月 1 日以前の主契約に付加 した場合の特約特定疾病保険金、特約障害保険 金および特約介護保険金の代理請求\n\n## 2. 5 年ごと配当付特定状態保障定期保険特約条項\n\n## 3. (この特約の概要)\n\n(2015 年 5 月 21 日改正)\n\nこの特約は、つぎの給付を行うことを主な内容とするものです。なお、特約死亡保険金額、特約特定疾病保険金額、特 約障害保険金額および特約介護保険金額は同額です。\n\n|                    |                                                                                         給付の内容                                                                                          |\n| :----------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |\n|   特約死亡保険金   |                                                                 被保険者がこの特約の保険期間中に死亡したときに支払います。                                                                  |\n| 特約特定疾病保険金 | $\\begin{array}{l}\\text { 被保険者がこの特約の保険期間中に特定の疾病（悪性新生物（がん）、急性心筋梗塞または脳 } \\\\ \\text { 卒中）により所定の状態に該当したときに支払います。 }\\end{array}$ |\n|   特約障害保険金   |                               ![](https://cdn.mathpix.com/cropped/2023_09_15_60b7a05a11e6ef69c8bbg-1.jpg?height=93&width=1317&top_left_y=2514&top_left_x=537)                               |\n|   特約介護保険金   |                                                     被保険者がこの特約の保険期間中に傷害または疾病により所定の要介護状態に該当したとき                                                      |\n\n1。この特約において支払う特約保険金はつぎのとおりです。\n\n|                                                                                                                                                           |           $\\begin{array}{l}\\text { 特約保険金を支払う場合（以下「支払事由」 } \\\\ \\text { といいます。） }\\end{array}$           |                                                          支払額                                                           |                                                                                                          受取人                                                                                                           |                                                                                                                                            $\\begin{array}{l}\\text { 支払事由に該当しても特約保険金を支払 } \\\\ \\text { わない場合 (以下「免責事由」といいます。) }\\end{array}$                                                                                                                                            |\n| :-------------------------------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |\n|      $\\begin{array}{l}\\text { 特 } \\\\ \\text { 約 } \\\\ \\text { 死 } \\\\ 亡 \\\\ \\text { 亡 } \\\\ \\text { 保 } \\\\ \\text { 険 } \\\\ \\text { 金 }\\end{array}$      |                $\\begin{array}{l}\\text { 被保険者がこの特約の保険期間中に死亡し } \\\\ \\text { たとき }\\end{array}$                | $\\begin{array}{l}\\text { 特 } \\\\ \\text { 約 } \\\\ \\text { 保 } \\\\ \\text { 険 } \\\\ \\text { 金 } \\\\ \\text { 額 }\\end{array}$ |                                                 $\\begin{array}{l}\\text { 特 } \\\\ \\text { 絢 } \\\\ \\text { 㨐 } \\\\ \\text { 険 } \\\\ \\text { 善 } \\\\ \\text { 聚 }\\end{array}$                                                 | $\\begin{array}{l}\\text { つぎのいずれかにより左記の支払事由が } \\\\ \\text { 生じたとき } \\\\ \\text { (1) この特約の責任開始期（復活の取扱が } \\\\ \\text { 行われた後は、最後の復活の際の責任開 } \\\\ \\text { 始期。以下同じ。）の属するもからその } \\\\ \\text { 日を含めて } 3 \\text { 年以内の自殺 } \\\\ \\text { (2) 保険契約者または特約死亡保険金受 } \\\\ \\text { 取人の故意 } \\\\ \\text { (3) 戦争その他の变乱 }\\end{array}$ |\n| $\\begin{array}{l}\\text { 特 } \\\\ \\text { 約 } \\\\ \\text { 特 } \\\\ \\text { 定 } \\\\ \\text { 疾 } \\\\ \\text { 病 } \\\\ \\text { 除 } \\\\ \\text { 金 }\\end{array}$ | ![](https://cdn.mathpix.com/cropped/2023_09_15_60b7a05a11e6ef69c8bbg-2.jpg?height=1118&width=621&top_left_y=724&top_left_x=305) |                                                                                                                           | $\\begin{array}{l}\\text { 特 } \\\\ \\text { 約 } \\\\ \\text { 特 } \\\\ \\text { 定 } \\\\ \\text { 疾 } \\\\ \\text { 病 } \\\\ \\text { 保 } \\\\ \\text { 険 } \\\\ \\text { 金 } \\\\ \\text { 受 } \\\\ \\text { 取 } \\\\ \\text { 人 }\\end{array}$ |                                                                                                                                                                                                            +                                                                                                                                                                                                             |\n|         $\\begin{array}{l}\\text { 特 } \\\\ \\text { 約 } \\\\ \\text { 障 } \\\\ \\text { 害 } \\\\ \\text { 保 } \\\\ \\text { 険 } \\\\ \\text { 金 }\\end{array}$         | ![](https://cdn.mathpix.com/cropped/2023_09_15_60b7a05a11e6ef69c8bbg-2.jpg?height=900&width=621&top_left_y=1837&top_left_x=305) |                                                                                                                           |                 $\\begin{array}{l}\\text { 特 } \\\\ \\text { 約 } \\\\ \\text { 障 } \\\\ \\text { 害 } \\\\ \\text { 保 } \\\\ \\text { 険 } \\\\ \\text { 金 } \\\\ \\text { 受 } \\\\ \\text { 取 } \\\\ \\text { 人 }\\end{array}$                 |                                                                                                                                             ![](https://cdn.mathpix.com/cropped/2023_09_15_60b7a05a11e6ef69c8bbg-2.jpg?height=904&width=594&top_left_y=1837&top_left_x=1262)                                                                                                                                             |\n\n|                                                                                                                                           |                                                                                                支払事由                                                                                                |                                                          支払額                                                           |                                                                                          受取人                                                                                           |                                                                                                                                                                                                                                                                                                                                           免責事由                                                                                                                                                                                                                                                                                                                                            |\n| :---------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |\n| $\\begin{array}{l}\\text { 特 } \\\\ \\text { 約 } \\\\ \\text { 介 } \\\\ \\text { 護 } \\\\ \\text { 保 } \\\\ \\text { 険 } \\\\ \\text { 金 }\\end{array}$ | $\\begin{array}{l}\\text { 被保険者がこの特約の責任開始期以後の傷 } \\\\ \\text { 害または疾病を原因として、この特約の保 } \\\\ \\text { 険期間中に要介護状態（表4）に該当した } \\\\ \\text { とき }\\end{array}$ | $\\begin{array}{l}\\text { 特 } \\\\ \\text { 約 } \\\\ \\text { 保 } \\\\ \\text { 険 } \\\\ \\text { 金 } \\\\ \\text { 額 }\\end{array}$ | $\\begin{array}{l}\\text { 特 } \\\\ \\text { 約 } \\\\ \\text { 介 } \\\\ \\text { 護 } \\\\ \\text { 保 } \\\\ \\text { 険 } \\\\ \\text { 金 } \\\\ \\text { 受 } \\\\ \\text { 取 } \\\\ \\text { 人 }\\end{array}$ | $\\begin{array}{l}\\text { つぎのいずれかにより左記の支払事由が生 } \\\\ \\text { じたとき } \\\\ \\text { (1) 保険契約者または被保険者の故意また } \\\\ \\text { は重大な過失 } \\\\ \\text { (2) 被保険者の犯罪行為 } \\\\ \\text { (3) 被保険者の精神障害を原因とする事故 } \\\\ \\text { (4) 被保険者の泥酔の状態を原因とする事 } \\\\ \\text { 故 } \\\\ \\text { (5) 被保険者が法令に定める運転資格を持 } \\\\ \\text { たないで運転している間に生じた事故 } \\\\ \\text { (6) 被保険者が法令に定める酒気帯び運転 } \\\\ \\text { またはこれに相当する運転をしている間 } \\\\ \\text { に生じた事故 } \\\\ \\text { (7) 被保険者の薬物依存 } \\\\ \\text { (8) 地震、噴火または津波 } \\\\ \\text { (9) 戦争その他の变乱 }\\end{array}$ |\n\n2. 第 1 項の特約特定疾病保険金の支払事由の(1)に該当した場合でも、この特約の責任開始期の属する日からその日を含 めて 90 日以内に乳房の悪性新生物（表 1 中、基本分類コード C 50 の悪性新生物。以下同じ。）に䍜患し、医師により診断 確定されたときは、当会社は、特約特定疾病保険金を支払いません。ただし、その後（乳房の悪性新生物についてはこ の特約の責任開始期の属する日からその日を含めて 90 日経過後)、この特約の保険期間中に、被保険者がその乳房の悪性 新生物と因果関係のない悪性新生物（表 1）に罹患し、医師により診断確定されたときは、特約特定疾病保険金を支払 います。\n\n## 4. 表 1 対象となる悪性新生物、急性心筋梗塞、脳卒中\n\n対象となる悪性新生物、急性心筋梗塞、脳卒中とは、次表によって定義づけられる疾病とし、かつ、平成 21 年 3 月 23 日 総務省告示第 176 号にもとづ＜厚生労働省大臣官房統計情報部編「疾病、傷害および死因統計分類提要ＩＣＤ－10（2003 年版）準拠」に記載された分類項目中、次表の基本分類コードに規定される内容によるものをいいます。\n\n|  疾 病 名  |                                                         疾 病 の 定 義                                                          |                                                           分 類 項 目                                                           |                                                                                            ![](https://cdn.mathpix.com/cropped/2023_09_15_60b7a05a11e6ef69c8bbg-3.jpg?height=85&width=150&top_left_y=1540&top_left_x=1689)                                                                                             |\n| :--------: | :-----------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |\n| 悪性新生物 | ![](https://cdn.mathpix.com/cropped/2023_09_15_60b7a05a11e6ef69c8bbg-3.jpg?height=732&width=445&top_left_y=1617&top_left_x=442) | ![](https://cdn.mathpix.com/cropped/2023_09_15_60b7a05a11e6ef69c8bbg-3.jpg?height=732&width=750&top_left_y=1617&top_left_x=909) | $\\begin{array}{l}\\mathrm{C} 00-\\mathrm{C} 1 \\\\ \\mathrm{C} 00 \\\\ \\mathrm{C} 01 \\\\ \\mathrm{C} 02 \\\\ \\mathrm{C} 03 \\\\ \\mathrm{C} 04 \\\\ \\mathrm{C} 05 \\\\ \\mathrm{C} 06 \\\\ \\mathrm{C} 07 \\\\ \\mathrm{C} 08 \\\\ \\mathrm{C} 09 \\\\ \\mathrm{C} 10 \\\\ \\mathrm{C} 11 \\\\ \\mathrm{C} 12 \\\\ \\mathrm{C} 13 \\\\ \\mathrm{C} 14\\end{array}$ |\n"
  },
  {
    "path": "libs/kotaemon/tests/simple_pipeline.py",
    "content": "import tempfile\nfrom typing import List\n\nfrom kotaemon.base import BaseComponent, LLMInterface, lazy\nfrom kotaemon.embeddings import LCAzureOpenAIEmbeddings\nfrom kotaemon.indices import VectorRetrieval\nfrom kotaemon.llms import AzureOpenAI\nfrom kotaemon.storages import ChromaVectorStore\n\n\nclass Pipeline(BaseComponent):\n    llm: AzureOpenAI = AzureOpenAI.withx(\n        azure_endpoint=\"https://test.openai.azure.com/\",\n        openai_api_key=\"some-key\",\n        openai_api_version=\"2023-03-15-preview\",\n        deployment_name=\"gpt35turbo\",\n        temperature=0,\n        request_timeout=60,\n    )\n\n    retrieving_pipeline: VectorRetrieval = VectorRetrieval.withx(\n        vector_store=lazy(ChromaVectorStore).withx(path=str(tempfile.mkdtemp())),\n        embedding=LCAzureOpenAIEmbeddings.withx(\n            model=\"text-embedding-ada-002\",\n            deployment=\"embedding-deployment\",\n            azure_endpoint=\"https://test.openai.azure.com/\",\n            openai_api_key=\"some-key\",\n        ),\n    )\n\n    def run(self, text: str) -> LLMInterface:\n        matched_texts: List[str] = self.retrieving_pipeline(text)\n        return self.llm(\"\\n\".join(matched_texts))\n"
  },
  {
    "path": "libs/kotaemon/tests/test_agent.py",
    "content": "from unittest.mock import patch\n\nimport pytest\nfrom openai.types.chat.chat_completion import ChatCompletion\n\nfrom kotaemon.agents import (\n    AgentType,\n    BaseTool,\n    GoogleSearchTool,\n    LangchainAgent,\n    LLMTool,\n    ReactAgent,\n    RewooAgent,\n    WikipediaTool,\n)\nfrom kotaemon.llms import AzureChatOpenAI\n\nfrom .conftest import skip_openai_lc_wrapper_test\n\nFINAL_RESPONSE_TEXT = \"Final Answer: Hello Cinnamon AI!\"\nREWOO_VALID_PLAN = (\n    \"#Plan1: Search for Cinnamon AI company on Google\\n\"\n    \"#E1: google_search[Cinnamon AI company]\\n\"\n    \"#Plan2: Search for Cinnamon on Wikipedia\\n\"\n    \"#E2: wikipedia[Cinnamon]\\n\"\n)\nREWOO_INVALID_PLAN = (\n    \"#E1: google_search[Cinnamon AI company]\\n\"\n    \"#Plan2: Search for Cinnamon on Wikipedia\\n\"\n    \"#E2: wikipedia[Cinnamon]\\n\"\n)\n\n\ndef generate_chat_completion_obj(text):\n    return ChatCompletion.parse_obj(\n        {\n            \"id\": \"chatcmpl-7qyuw6Q1CFCpcKsMdFkmUPUa7JP2x\",\n            \"object\": \"chat.completion\",\n            \"created\": 1692338378,\n            \"model\": \"gpt-35-turbo\",\n            \"system_fingerprint\": None,\n            \"choices\": [\n                {\n                    \"index\": 0,\n                    \"finish_reason\": \"stop\",\n                    \"message\": {\n                        \"role\": \"assistant\",\n                        \"content\": text,\n                        \"function_call\": None,\n                        \"tool_calls\": None,\n                    },\n                    \"logprobs\": None,\n                }\n            ],\n            \"usage\": {\"completion_tokens\": 9, \"prompt_tokens\": 10, \"total_tokens\": 19},\n        }\n    )\n\n\n_openai_chat_completion_responses_rewoo = [\n    generate_chat_completion_obj(text=text)\n    for text in [REWOO_VALID_PLAN, FINAL_RESPONSE_TEXT]\n]\n\n_openai_chat_completion_responses_rewoo_error = [\n    generate_chat_completion_obj(text=text)\n    for text in [REWOO_INVALID_PLAN, FINAL_RESPONSE_TEXT]\n]\n\n_openai_chat_completion_responses_react = [\n    generate_chat_completion_obj(text=text)\n    for text in [\n        (\n            \"I don't have prior knowledge about Cinnamon AI company, \"\n            \"so I should gather information about it.\\n\"\n            \"Action: wikipedia\\n\"\n            \"Action Input: Cinnamon AI company\\n\"\n        ),\n        (\n            \"The information retrieved from Wikipedia is not \"\n            \"about Cinnamon AI company, but about Blue Prism, \"\n            \"a British multinational software corporation. \"\n            \"I need to try another source to gather information \"\n            \"about Cinnamon AI company.\\n\"\n            \"Action: google_search\\n\"\n            \"Action Input: Cinnamon AI company\\n\"\n        ),\n        FINAL_RESPONSE_TEXT,\n    ]\n]\n\n_openai_chat_completion_responses_react_langchain_tool = [\n    generate_chat_completion_obj(text=text)\n    for text in [\n        (\n            \"I don't have prior knowledge about Cinnamon AI company, \"\n            \"so I should gather information about it.\\n\"\n            \"Action: wikipedia\\n\"\n            \"Action Input: Cinnamon AI company\\n\"\n        ),\n        # (\n        #     \"The information retrieved from Wikipedia is not \"\n        #     \"about Cinnamon AI company, but about Blue Prism, \"\n        #     \"a British multinational software corporation. \"\n        #     \"I need to try another source to gather information \"\n        #     \"about Cinnamon AI company.\\n\"\n        #     \"Action: duckduckgo_search\\n\"\n        #     \"Action Input: Cinnamon AI company\\n\"\n        # ),\n        FINAL_RESPONSE_TEXT,\n    ]\n]\n\n\n@pytest.fixture\ndef llm():\n    return AzureChatOpenAI(\n        api_key=\"dummy\",\n        api_version=\"2024-05-01-preview\",\n        azure_deployment=\"gpt-4o\",\n        azure_endpoint=\"https://test.openai.azure.com/\",\n    )\n\n\n@patch(\n    \"openai.resources.chat.completions.Completions.create\",\n    side_effect=_openai_chat_completion_responses_rewoo_error,\n)\ndef test_agent_fail(openai_completion, llm, mock_google_search):\n    plugins = [\n        GoogleSearchTool(),\n        WikipediaTool(),\n        LLMTool(llm=llm),\n    ]\n\n    agent = RewooAgent(planner_llm=llm, solver_llm=llm, plugins=plugins)\n\n    response = agent(\"Tell me about Cinnamon AI company\")\n    openai_completion.assert_called()\n    assert not response\n    assert response.status == \"failed\"\n\n\n@patch(\n    \"openai.resources.chat.completions.Completions.create\",\n    side_effect=_openai_chat_completion_responses_rewoo,\n)\ndef test_rewoo_agent(openai_completion, llm, mock_google_search):\n    plugins = [\n        GoogleSearchTool(),\n        WikipediaTool(),\n        LLMTool(llm=llm),\n    ]\n\n    agent = RewooAgent(planner_llm=llm, solver_llm=llm, plugins=plugins)\n\n    response = agent(\"Tell me about Cinnamon AI company\")\n    openai_completion.assert_called()\n    assert response.text == FINAL_RESPONSE_TEXT\n\n\n@patch(\n    \"openai.resources.chat.completions.Completions.create\",\n    side_effect=_openai_chat_completion_responses_react,\n)\ndef test_react_agent(openai_completion, llm, mock_google_search):\n    plugins = [\n        GoogleSearchTool(),\n        WikipediaTool(),\n        LLMTool(llm=llm),\n    ]\n    agent = ReactAgent(llm=llm, plugins=plugins, max_iterations=4)\n\n    response = agent(\"Tell me about Cinnamon AI company\")\n    openai_completion.assert_called()\n    assert response.text == FINAL_RESPONSE_TEXT\n\n\n@skip_openai_lc_wrapper_test\n@patch(\n    \"openai.resources.chat.completions.Completions.create\",\n    side_effect=_openai_chat_completion_responses_react,\n)\ndef test_react_agent_langchain(openai_completion, llm, mock_google_search):\n    from langchain.agents import AgentType, initialize_agent\n\n    plugins = [\n        GoogleSearchTool(),\n        WikipediaTool(),\n        LLMTool(llm=llm),\n    ]\n    langchain_plugins = [tool.to_langchain_format() for tool in plugins]\n    agent = initialize_agent(\n        langchain_plugins,\n        llm.to_langchain_format(),\n        agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,\n        verbose=True,\n    )\n    response = agent(\"Tell me about Cinnamon AI company\")\n    openai_completion.assert_called()\n    assert response\n\n\n@skip_openai_lc_wrapper_test\n@patch(\n    \"openai.resources.chat.completions.Completions.create\",\n    side_effect=_openai_chat_completion_responses_react,\n)\ndef test_wrapper_agent_langchain(openai_completion, llm, mock_google_search):\n    plugins = [\n        GoogleSearchTool(),\n        WikipediaTool(),\n        LLMTool(llm=llm),\n    ]\n    agent = LangchainAgent(\n        llm=llm,\n        plugins=plugins,\n        agent_type=AgentType.react,\n    )\n    response = agent(\"Tell me about Cinnamon AI company\")\n    openai_completion.assert_called()\n    assert response\n\n\n@patch(\n    \"openai.resources.chat.completions.Completions.create\",\n    side_effect=_openai_chat_completion_responses_react_langchain_tool,\n)\ndef test_react_agent_with_langchain_tools(openai_completion, llm):\n    from langchain_community.tools import DuckDuckGoSearchRun, WikipediaQueryRun\n    from langchain_community.utilities import WikipediaAPIWrapper\n\n    wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())\n    search = DuckDuckGoSearchRun()\n\n    langchain_plugins = [wikipedia, search]\n    plugins = [BaseTool.from_langchain_format(tool) for tool in langchain_plugins]\n    agent = ReactAgent(llm=llm, plugins=plugins, max_iterations=4)\n\n    response = agent(\"Tell me about Cinnamon AI company\")\n    openai_completion.assert_called()\n    assert response.text == FINAL_RESPONSE_TEXT\n"
  },
  {
    "path": "libs/kotaemon/tests/test_composite.py",
    "content": "from copy import deepcopy\n\nimport pytest\nfrom openai.types.chat.chat_completion import ChatCompletion\n\nfrom kotaemon.llms import (\n    AzureChatOpenAI,\n    BasePromptComponent,\n    GatedBranchingPipeline,\n    GatedLinearPipeline,\n    SimpleBranchingPipeline,\n    SimpleLinearPipeline,\n)\nfrom kotaemon.parsers import RegexExtractor\n\n_openai_chat_completion_response = ChatCompletion.parse_obj(\n    {\n        \"id\": \"chatcmpl-7qyuw6Q1CFCpcKsMdFkmUPUa7JP2x\",\n        \"object\": \"chat.completion\",\n        \"created\": 1692338378,\n        \"model\": \"gpt-35-turbo\",\n        \"system_fingerprint\": None,\n        \"choices\": [\n            {\n                \"index\": 0,\n                \"finish_reason\": \"stop\",\n                \"message\": {\n                    \"role\": \"assistant\",\n                    \"content\": \"This is a test 123\",\n                    \"finish_reason\": \"length\",\n                    \"logprobs\": None,\n                },\n                \"logprobs\": None,\n            }\n        ],\n        \"usage\": {\"completion_tokens\": 9, \"prompt_tokens\": 10, \"total_tokens\": 19},\n    }\n)\n\n\n@pytest.fixture\ndef mock_llm():\n    return AzureChatOpenAI(\n        api_key=\"dummy\",\n        api_version=\"2024-05-01-preview\",\n        azure_deployment=\"gpt-4o\",\n        azure_endpoint=\"https://test.openai.azure.com/\",\n    )\n\n\n@pytest.fixture\ndef mock_post_processor():\n    return RegexExtractor(pattern=r\"\\d+\")\n\n\n@pytest.fixture\ndef mock_prompt():\n    return BasePromptComponent(template=\"Test prompt {value}\")\n\n\n@pytest.fixture\ndef mock_simple_linear_pipeline(mock_prompt, mock_llm, mock_post_processor):\n    return SimpleLinearPipeline(\n        prompt=mock_prompt, llm=mock_llm, post_processor=mock_post_processor\n    )\n\n\n@pytest.fixture\ndef mock_gated_linear_pipeline_positive(mock_prompt, mock_llm, mock_post_processor):\n    return GatedLinearPipeline(\n        prompt=mock_prompt,\n        llm=mock_llm,\n        post_processor=mock_post_processor,\n        condition=RegexExtractor(pattern=\"positive\"),\n    )\n\n\n@pytest.fixture\ndef mock_gated_linear_pipeline_negative(mock_prompt, mock_llm, mock_post_processor):\n    return GatedLinearPipeline(\n        prompt=mock_prompt,\n        llm=mock_llm,\n        post_processor=mock_post_processor,\n        condition=RegexExtractor(pattern=\"negative\"),\n    )\n\n\ndef test_simple_linear_pipeline_run(mocker, mock_simple_linear_pipeline):\n    openai_mocker = mocker.patch(\n        \"openai.resources.chat.completions.Completions.create\",\n        return_value=_openai_chat_completion_response,\n    )\n\n    result = mock_simple_linear_pipeline(value=\"abc\")\n\n    assert result.text == \"123\"\n    assert openai_mocker.call_count == 1\n\n\ndef test_gated_linear_pipeline_run_positive(\n    mocker, mock_gated_linear_pipeline_positive\n):\n    openai_mocker = mocker.patch(\n        \"openai.resources.chat.completions.Completions.create\",\n        return_value=_openai_chat_completion_response,\n    )\n\n    result = mock_gated_linear_pipeline_positive(\n        value=\"abc\", condition_text=\"positive condition\"\n    )\n\n    assert result.text == \"123\"\n    assert openai_mocker.call_count == 1\n\n\ndef test_gated_linear_pipeline_run_negative(\n    mocker, mock_gated_linear_pipeline_positive\n):\n    openai_mocker = mocker.patch(\n        \"openai.resources.chat.completions.Completions.create\",\n        return_value=_openai_chat_completion_response,\n    )\n\n    result = mock_gated_linear_pipeline_positive(\n        value=\"abc\", condition_text=\"negative condition\"\n    )\n\n    assert result.content is None\n    assert openai_mocker.call_count == 0\n\n\ndef test_simple_branching_pipeline_run(mocker, mock_simple_linear_pipeline):\n    response0: ChatCompletion = _openai_chat_completion_response\n    response1: ChatCompletion = deepcopy(_openai_chat_completion_response)\n    response1.choices[0].message.content = \"a quick brown fox\"\n    response2: ChatCompletion = deepcopy(_openai_chat_completion_response)\n    response2.choices[0].message.content = \"jumps over the lazy dog 456\"\n    openai_mocker = mocker.patch(\n        \"openai.resources.chat.completions.Completions.create\",\n        side_effect=[response0, response1, response2],\n    )\n    pipeline = SimpleBranchingPipeline()\n    for _ in range(3):\n        pipeline.add_branch(mock_simple_linear_pipeline)\n\n    result = pipeline.run(value=\"abc\")\n    texts = [each.text for each in result]\n\n    assert len(result) == 3\n    assert texts == [\"123\", \"\", \"456\"]\n    assert openai_mocker.call_count == 3\n\n\ndef test_simple_gated_branching_pipeline_run(\n    mocker, mock_gated_linear_pipeline_positive, mock_gated_linear_pipeline_negative\n):\n    response0: ChatCompletion = deepcopy(_openai_chat_completion_response)\n    response0.choices[0].message.content = \"a quick brown fox\"\n    openai_mocker = mocker.patch(\n        \"openai.resources.chat.completions.Completions.create\",\n        return_value=response0,\n    )\n    pipeline = GatedBranchingPipeline()\n\n    pipeline.add_branch(mock_gated_linear_pipeline_negative)\n    pipeline.add_branch(mock_gated_linear_pipeline_positive)\n    pipeline.add_branch(mock_gated_linear_pipeline_positive)\n\n    result = pipeline.run(value=\"abc\", condition_text=\"positive condition\")\n\n    assert result.text == \"\"\n    assert openai_mocker.call_count == 2\n"
  },
  {
    "path": "libs/kotaemon/tests/test_cot.py",
    "content": "from unittest.mock import patch\n\nfrom openai.types.chat.chat_completion import ChatCompletion\n\nfrom kotaemon.llms import AzureChatOpenAI\nfrom kotaemon.llms.cot import ManualSequentialChainOfThought, Thought\n\n_openai_chat_completion_response = [\n    ChatCompletion.parse_obj(\n        {\n            \"id\": \"chatcmpl-7qyuw6Q1CFCpcKsMdFkmUPUa7JP2x\",\n            \"object\": \"chat.completion\",\n            \"created\": 1692338378,\n            \"model\": \"gpt-35-turbo\",\n            \"system_fingerprint\": None,\n            \"choices\": [\n                {\n                    \"index\": 0,\n                    \"finish_reason\": \"stop\",\n                    \"message\": {\n                        \"role\": \"assistant\",\n                        \"content\": text,\n                        \"function_call\": None,\n                        \"tool_calls\": None,\n                    },\n                    \"logprobs\": None,\n                }\n            ],\n            \"usage\": {\"completion_tokens\": 9, \"prompt_tokens\": 10, \"total_tokens\": 19},\n        }\n    )\n    for text in [\"Bonjour\", \"こんにちは (Konnichiwa)\"]\n]\n\n\n@patch(\n    \"openai.resources.chat.completions.Completions.create\",\n    side_effect=_openai_chat_completion_response,\n)\ndef test_cot_plus_operator(openai_completion):\n    llm = AzureChatOpenAI(\n        api_key=\"dummy\",\n        api_version=\"2024-05-01-preview\",\n        azure_deployment=\"gpt-4o\",\n        azure_endpoint=\"https://test.openai.azure.com/\",\n    )\n    thought1 = Thought(\n        prompt=\"Word {word} in {language} is \",\n        llm=llm,\n        post_process=lambda string: {\"translated\": string},\n    )\n    thought2 = Thought(\n        prompt=\"Translate {translated} to Japanese\",\n        llm=llm,\n        post_process=lambda string: {\"output\": string},\n    )\n    thought = thought1 + thought2\n    output = thought(word=\"hello\", language=\"French\")\n    assert output.content == {\n        \"word\": \"hello\",\n        \"language\": \"French\",\n        \"translated\": \"Bonjour\",\n        \"output\": \"こんにちは (Konnichiwa)\",\n    }\n\n\n@patch(\n    \"openai.resources.chat.completions.Completions.create\",\n    side_effect=_openai_chat_completion_response,\n)\ndef test_cot_manual(openai_completion):\n    llm = AzureChatOpenAI(\n        api_key=\"dummy\",\n        api_version=\"2024-05-01-preview\",\n        azure_deployment=\"gpt-4o\",\n        azure_endpoint=\"https://test.openai.azure.com/\",\n    )\n    thought1 = Thought(\n        prompt=\"Word {word} in {language} is \",\n        post_process=lambda string: {\"translated\": string},\n    )\n    thought2 = Thought(\n        prompt=\"Translate {translated} to Japanese\",\n        post_process=lambda string: {\"output\": string},\n    )\n    thought = ManualSequentialChainOfThought(thoughts=[thought1, thought2], llm=llm)\n    output = thought(word=\"hello\", language=\"French\")\n    assert output.content == {\n        \"word\": \"hello\",\n        \"language\": \"French\",\n        \"translated\": \"Bonjour\",\n        \"output\": \"こんにちは (Konnichiwa)\",\n    }\n\n\n@patch(\n    \"openai.resources.chat.completions.Completions.create\",\n    side_effect=_openai_chat_completion_response,\n)\ndef test_cot_with_termination_callback(openai_completion):\n    llm = AzureChatOpenAI(\n        api_key=\"dummy\",\n        api_version=\"2024-05-01-preview\",\n        azure_deployment=\"gpt-4o\",\n        azure_endpoint=\"https://test.openai.azure.com/\",\n    )\n    thought1 = Thought(\n        prompt=\"Word {word} in {language} is \",\n        post_process=lambda string: {\"translated\": string},\n    )\n    thought2 = Thought(\n        prompt=\"Translate {translated} to Japanese\",\n        post_process=lambda string: {\"output\": string},\n    )\n    thought = ManualSequentialChainOfThought(\n        thoughts=[thought1, thought2],\n        llm=llm,\n        terminate=lambda d: True if d.get(\"translated\", \"\") == \"Bonjour\" else False,\n    )\n    output = thought(word=\"hallo\", language=\"French\")\n    assert output.content == {\n        \"word\": \"hallo\",\n        \"language\": \"French\",\n        \"translated\": \"Bonjour\",\n    }\n"
  },
  {
    "path": "libs/kotaemon/tests/test_docstores.py",
    "content": "import os\nfrom unittest.mock import patch\n\nimport pytest\nfrom elastic_transport import ApiResponseMeta\n\nfrom kotaemon.base import Document\nfrom kotaemon.storages import (\n    ElasticsearchDocumentStore,\n    InMemoryDocumentStore,\n    SimpleFileDocumentStore,\n)\n\nmeta_success = ApiResponseMeta(\n    status=200,\n    http_version=\"1.1\",\n    headers={\"x-elastic-product\": \"Elasticsearch\"},\n    duration=1.0,\n    node=None,\n)\nmeta_fail = ApiResponseMeta(\n    status=404,\n    http_version=\"1.1\",\n    headers={\"x-elastic-product\": \"Elasticsearch\"},\n    duration=1.0,\n    node=None,\n)\n_elastic_search_responses = [\n    # check exist\n    (meta_fail, None),\n    # create index\n    (\n        meta_success,\n        {\"acknowledged\": True, \"shards_acknowledged\": True, \"index\": \"test\"},\n    ),\n    # count API\n    (\n        meta_success,\n        [{\"epoch\": \"1700474422\", \"timestamp\": \"10:00:22\", \"count\": \"0\"}],\n    ),\n    # add documents\n    (\n        meta_success,\n        {\n            \"took\": 50,\n            \"errors\": False,\n            \"items\": [\n                {\n                    \"index\": {\n                        \"_index\": \"test\",\n                        \"_id\": \"a3774dab-b8f1-43ba-adb8-842cb7a76eeb\",\n                        \"_version\": 1,\n                        \"result\": \"created\",\n                        \"_shards\": {\"total\": 2, \"successful\": 1, \"failed\": 0},\n                        \"_seq_no\": 0,\n                        \"_primary_term\": 1,\n                        \"status\": 201,\n                    }\n                },\n                {\n                    \"index\": {\n                        \"_index\": \"test\",\n                        \"_id\": \"b44f5593-7587-4f91-afd0-5736e5bd5bfe\",\n                        \"_version\": 1,\n                        \"result\": \"created\",\n                        \"_shards\": {\"total\": 2, \"successful\": 1, \"failed\": 0},\n                        \"_seq_no\": 1,\n                        \"_primary_term\": 1,\n                        \"status\": 201,\n                    }\n                },\n                {\n                    \"index\": {\n                        \"_index\": \"test\",\n                        \"_id\": \"13ae7825-eef9-4214-a164-983c2e6bbeaa\",\n                        \"_version\": 1,\n                        \"result\": \"created\",\n                        \"_shards\": {\"total\": 2, \"successful\": 1, \"failed\": 0},\n                        \"_seq_no\": 2,\n                        \"_primary_term\": 1,\n                        \"status\": 201,\n                    }\n                },\n            ],\n        },\n    ),\n    # check exist\n    (\n        meta_success,\n        {\"_shards\": {\"total\": 2, \"successful\": 1, \"failed\": 0}},\n    ),\n    # count\n    (\n        meta_success,\n        [{\"epoch\": \"1700474422\", \"timestamp\": \"10:00:22\", \"count\": \"3\"}],\n    ),\n    # get_all\n    (\n        meta_success,\n        {\n            \"took\": 1,\n            \"timed_out\": False,\n            \"_shards\": {\"total\": 1, \"successful\": 1, \"skipped\": 0, \"failed\": 0},\n            \"hits\": {\n                \"total\": {\"value\": 3, \"relation\": \"eq\"},\n                \"max_score\": 1.0,\n                \"hits\": [\n                    {\n                        \"_index\": \"test\",\n                        \"_id\": \"a3774dab-b8f1-43ba-adb8-842cb7a76eeb\",\n                        \"_score\": 1.0,\n                        \"_source\": {\"content\": \"Sample text 0\", \"metadata\": {}},\n                    },\n                    {\n                        \"_index\": \"test\",\n                        \"_id\": \"b44f5593-7587-4f91-afd0-5736e5bd5bfe\",\n                        \"_score\": 1.0,\n                        \"_source\": {\"content\": \"Sample text 1\", \"metadata\": {}},\n                    },\n                    {\n                        \"_index\": \"test\",\n                        \"_id\": \"13ae7825-eef9-4214-a164-983c2e6bbeaa\",\n                        \"_score\": 1.0,\n                        \"_source\": {\"content\": \"Sample text 2\", \"metadata\": {}},\n                    },\n                ],\n            },\n        },\n    ),\n    # get by-id\n    (\n        meta_success,\n        {\n            \"took\": 1,\n            \"timed_out\": False,\n            \"_shards\": {\"total\": 1, \"successful\": 1, \"skipped\": 0, \"failed\": 0},\n            \"hits\": {\n                \"total\": {\"value\": 1, \"relation\": \"eq\"},\n                \"max_score\": 1.0,\n                \"hits\": [\n                    {\n                        \"_index\": \"test\",\n                        \"_id\": \"a3774dab-b8f1-43ba-adb8-842cb7a76eeb\",\n                        \"_score\": 1.0,\n                        \"_source\": {\"content\": \"Sample text 0\", \"metadata\": {}},\n                    }\n                ],\n            },\n        },\n    ),\n    # query\n    (\n        meta_success,\n        {\n            \"took\": 2,\n            \"timed_out\": False,\n            \"_shards\": {\"total\": 1, \"successful\": 1, \"skipped\": 0, \"failed\": 0},\n            \"hits\": {\n                \"total\": {\"value\": 3, \"relation\": \"eq\"},\n                \"max_score\": 0.13353139,\n                \"hits\": [\n                    {\n                        \"_index\": \"test\",\n                        \"_id\": \"a3774dab-b8f1-43ba-adb8-842cb7a76eeb\",\n                        \"_score\": 0.13353139,\n                        \"_source\": {\"content\": \"Sample text 0\", \"metadata\": {}},\n                    },\n                    {\n                        \"_index\": \"test\",\n                        \"_id\": \"b44f5593-7587-4f91-afd0-5736e5bd5bfe\",\n                        \"_score\": 0.13353139,\n                        \"_source\": {\"content\": \"Sample text 1\", \"metadata\": {}},\n                    },\n                    {\n                        \"_index\": \"test\",\n                        \"_id\": \"13ae7825-eef9-4214-a164-983c2e6bbeaa\",\n                        \"_score\": 0.13353139,\n                        \"_source\": {\"content\": \"Sample text 2\", \"metadata\": {}},\n                    },\n                ],\n            },\n        },\n    ),\n    # delete\n    (\n        meta_success,\n        {\n            \"took\": 10,\n            \"timed_out\": False,\n            \"total\": 1,\n            \"deleted\": 1,\n            \"batches\": 1,\n            \"version_conflicts\": 0,\n            \"noops\": 0,\n            \"retries\": {\"bulk\": 0, \"search\": 0},\n            \"throttled_millis\": 0,\n            \"requests_per_second\": -1.0,\n            \"throttled_until_millis\": 0,\n            \"failures\": [],\n        },\n    ),\n    # check exists\n    (\n        meta_success,\n        {\"_shards\": {\"total\": 2, \"successful\": 1, \"failed\": 0}},\n    ),\n    # count\n    (\n        meta_success,\n        [{\"epoch\": \"1700549363\", \"timestamp\": \"06:49:23\", \"count\": \"2\"}],\n    ),\n]\n\n\ndef test_inmemory_document_store_base_interfaces(tmp_path):\n    \"\"\"Test all interfaces of a a document store\"\"\"\n\n    store = InMemoryDocumentStore()\n    docs = [\n        Document(text=f\"Sample text {idx}\", meta={\"meta_key\": f\"meta_value_{idx}\"})\n        for idx in range(10)\n    ]\n\n    # Test add and get all\n    assert len(store.get_all()) == 0, \"Document store should be empty\"\n    store.add(docs)\n    assert len(store.get_all()) == 10, \"Document store should have 10 documents\"\n\n    # Test add with provided ids\n    store.add(docs=docs, ids=[f\"doc_{idx}\" for idx in range(10)])\n    assert len(store.get_all()) == 20, \"Document store should have 20 documents\"\n\n    # Test add without exist_ok\n    with pytest.raises(ValueError):\n        store.add(docs=docs, ids=[f\"doc_{idx}\" for idx in range(10)])\n\n    # Update ok with add exist_ok\n    store.add(docs=docs, ids=[f\"doc_{idx}\" for idx in range(10)], exist_ok=True)\n    assert len(store.get_all()) == 20, \"Document store should have 20 documents\"\n\n    # Test get with str id\n    matched = store.get(docs[0].doc_id)\n    assert len(matched) == 1, \"Should return 1 document\"\n    assert matched[0].text == docs[0].text, \"Should return the correct document\"\n\n    # Test get with list of ids\n    matched = store.get([docs[0].doc_id, docs[1].doc_id])\n    assert len(matched) == 2, \"Should return 2 documents\"\n    assert [doc.text for doc in matched] == [doc.text for doc in docs[:2]]\n\n    # Test delete with str id\n    store.delete(docs[0].doc_id)\n    assert len(store.get_all()) == 19, \"Document store should have 19 documents\"\n\n    # Test delete with list of ids\n    store.delete([docs[1].doc_id, docs[2].doc_id])\n    assert len(store.get_all()) == 17, \"Document store should have 17 documents\"\n\n    # Test save\n    store.save(tmp_path / \"store.json\")\n    assert (tmp_path / \"store.json\").exists(), \"File should exist\"\n\n    # Test load\n    store2 = InMemoryDocumentStore()\n    store2.load(tmp_path / \"store.json\")\n    assert len(store2.get_all()) == 17, \"Laded document store should have 17 documents\"\n\n    os.remove(tmp_path / \"store.json\")\n\n\ndef test_simplefile_document_store_base_interfaces(tmp_path):\n    \"\"\"Test all interfaces of a a document store\"\"\"\n\n    store = SimpleFileDocumentStore(path=tmp_path)\n    docs = [\n        Document(text=f\"Sample text {idx}\", meta={\"meta_key\": f\"meta_value_{idx}\"})\n        for idx in range(10)\n    ]\n\n    # Test add and get all\n    assert len(store.get_all()) == 0, \"Document store should be empty\"\n    store.add(docs)\n    assert len(store.get_all()) == 10, \"Document store should have 10 documents\"\n\n    # Test add with provided ids\n    store.add(docs=docs, ids=[f\"doc_{idx}\" for idx in range(10)])\n    assert len(store.get_all()) == 20, \"Document store should have 20 documents\"\n\n    # Test add without exist_ok\n    with pytest.raises(ValueError):\n        store.add(docs=docs, ids=[f\"doc_{idx}\" for idx in range(10)])\n\n    # Update ok with add exist_ok\n    store.add(docs=docs, ids=[f\"doc_{idx}\" for idx in range(10)], exist_ok=True)\n    assert len(store.get_all()) == 20, \"Document store should have 20 documents\"\n\n    # Test get with str id\n    matched = store.get(docs[0].doc_id)\n    assert len(matched) == 1, \"Should return 1 document\"\n    assert matched[0].text == docs[0].text, \"Should return the correct document\"\n\n    # Test get with list of ids\n    matched = store.get([docs[0].doc_id, docs[1].doc_id])\n    assert len(matched) == 2, \"Should return 2 documents\"\n    assert [doc.text for doc in matched] == [doc.text for doc in docs[:2]]\n\n    # Test delete with str id\n    store.delete(docs[0].doc_id)\n    assert len(store.get_all()) == 19, \"Document store should have 19 documents\"\n\n    # Test delete with list of ids\n    store.delete([docs[1].doc_id, docs[2].doc_id])\n    assert len(store.get_all()) == 17, \"Document store should have 17 documents\"\n\n    # Test save\n    assert (tmp_path / \"default.json\").exists(), \"File should exist\"\n\n    # Test load\n    store2 = SimpleFileDocumentStore(path=tmp_path)\n    assert len(store2.get_all()) == 17, \"Laded document store should have 17 documents\"\n\n    os.remove(tmp_path / \"default.json\")\n\n\n@patch(\n    \"elastic_transport.Transport.perform_request\",\n    side_effect=_elastic_search_responses,\n)\ndef test_elastic_document_store(elastic_api):\n    store = ElasticsearchDocumentStore(collection_name=\"test\")\n\n    docs = [\n        Document(text=f\"Sample text {idx}\", meta={\"meta_key\": f\"meta_value_{idx}\"})\n        for idx in range(3)\n    ]\n\n    # Test add and get all\n    assert store.count() == 0, \"Document store should be empty\"\n    store.add(docs)\n    assert store.count() == 3, \"Document store count should changed after adding docs\"\n\n    docs = store.get_all()\n    first_doc = docs[0]\n    assert len(docs) == 3, \"Document store get_all() failed\"\n\n    doc_by_ids = store.get(first_doc.doc_id)\n    assert doc_by_ids[0].doc_id == first_doc.doc_id, \"Document store get() failed\"\n\n    docs = store.query(\"text\")\n    assert len(docs) == 3, \"Document store query() failed\"\n\n    # delete test\n    store.delete(first_doc.doc_id)\n    assert store.count() == 2, \"Document store delete() failed\"\n\n    elastic_api.assert_called()\n"
  },
  {
    "path": "libs/kotaemon/tests/test_documents.py",
    "content": "from kotaemon.base.schema import Document, RetrievedDocument\n\nfrom .conftest import skip_when_haystack_not_installed\n\n\ndef test_document_constructor_with_builtin_types():\n    for value in [\"str\", 1, {}, set(), [], tuple, None]:\n        doc = Document(value)\n        assert doc.text == (str(value) if value else \"\")\n        assert doc.content == value\n        assert bool(doc) == bool(value)\n\n\ndef test_document_constructor_with_document():\n    text = \"Sample text\"\n    doc1 = Document(text)\n    doc2 = Document(doc1)\n    assert doc2.text == doc1.text\n    assert doc2.content == doc1.content\n\n\n@skip_when_haystack_not_installed\ndef test_document_to_haystack_format():\n    from haystack.schema import Document as HaystackDocument\n\n    text = \"Sample text\"\n    metadata = {\"filename\": \"sample.txt\"}\n    doc = Document(text, metadata=metadata)\n    haystack_doc = doc.to_haystack_format()\n    assert isinstance(haystack_doc, HaystackDocument)\n    assert haystack_doc.content == doc.text\n    assert haystack_doc.meta == metadata\n\n\ndef test_retrieved_document_default_values():\n    sample_text = \"text\"\n    retrieved_doc = RetrievedDocument(text=sample_text)\n    assert retrieved_doc.text == sample_text\n    assert retrieved_doc.score == 0.0\n    assert retrieved_doc.retrieval_metadata == {}\n\n\ndef test_retrieved_document_attributes():\n    sample_text = \"text\"\n    score = 0.8\n    metadata = {\"source\": \"retrieval_system\"}\n    retrieved_doc = RetrievedDocument(\n        text=sample_text, score=score, retrieval_metadata=metadata\n    )\n    assert retrieved_doc.text == sample_text\n    assert retrieved_doc.score == score\n    assert retrieved_doc.retrieval_metadata == metadata\n"
  },
  {
    "path": "libs/kotaemon/tests/test_embedding_models.py",
    "content": "import json\nfrom pathlib import Path\nfrom unittest.mock import Mock, patch\n\nfrom openai.types.create_embedding_response import CreateEmbeddingResponse\n\nfrom kotaemon.base import Document, DocumentWithEmbedding\nfrom kotaemon.embeddings import (\n    AzureOpenAIEmbeddings,\n    FastEmbedEmbeddings,\n    LCCohereEmbeddings,\n    LCHuggingFaceEmbeddings,\n    OpenAIEmbeddings,\n    VoyageAIEmbeddings,\n)\n\nfrom .conftest import (\n    skip_when_cohere_not_installed,\n    skip_when_fastembed_not_installed,\n    skip_when_sentence_bert_not_installed,\n    skip_when_voyageai_not_installed,\n)\n\nwith open(Path(__file__).parent / \"resources\" / \"embedding_openai_batch.json\") as f:\n    openai_embedding_batch = CreateEmbeddingResponse.model_validate(json.load(f))\n\nwith open(Path(__file__).parent / \"resources\" / \"embedding_openai.json\") as f:\n    openai_embedding = CreateEmbeddingResponse.model_validate(json.load(f))\n\n\ndef assert_embedding_result(output):\n    assert isinstance(output, list)\n    assert isinstance(output[0], Document)\n    assert isinstance(output[0].embedding, list)\n    assert isinstance(output[0].embedding[0], float)\n\n\n@patch(\n    \"openai.resources.embeddings.Embeddings.create\",\n    side_effect=lambda *args, **kwargs: openai_embedding,\n)\ndef test_azureopenai_embeddings_raw(openai_embedding_call):\n    model = AzureOpenAIEmbeddings(\n        azure_deployment=\"embedding-deployment\",\n        azure_endpoint=\"https://test.openai.azure.com/\",\n        api_key=\"some-key\",\n        api_version=\"version\",\n    )\n    output = model(\"Hello world\")\n    assert_embedding_result(output)\n    openai_embedding_call.assert_called()\n\n\n@patch(\n    \"openai.resources.embeddings.Embeddings.create\",\n    side_effect=lambda *args, **kwargs: openai_embedding_batch,\n)\ndef test_lcazureopenai_embeddings_batch_raw(openai_embedding_call):\n    model = AzureOpenAIEmbeddings(\n        azure_deployment=\"embedding-deployment\",\n        azure_endpoint=\"https://test.openai.azure.com/\",\n        api_key=\"some-key\",\n        api_version=\"version\",\n    )\n    output = model([\"Hello world\", \"Goodbye world\"])\n    assert_embedding_result(output)\n    openai_embedding_call.assert_called()\n\n\n@patch(\n    \"openai.resources.embeddings.Embeddings.create\",\n    side_effect=lambda *args, **kwargs: openai_embedding_batch,\n)\ndef test_azureopenai_embeddings_batch_raw(openai_embedding_call):\n    model = AzureOpenAIEmbeddings(\n        azure_deployment=\"text-embedding-ada-002\",\n        azure_endpoint=\"https://test.openai.azure.com/\",\n        api_key=\"some-key\",\n        api_version=\"version\",\n    )\n    output = model([\"Hello world\", \"Goodbye world\"])\n    assert_embedding_result(output)\n    openai_embedding_call.assert_called()\n\n\n@patch(\n    \"openai.resources.embeddings.Embeddings.create\",\n    side_effect=lambda *args, **kwargs: openai_embedding,\n)\ndef test_openai_embeddings_raw(openai_embedding_call):\n    model = OpenAIEmbeddings(\n        api_key=\"some-key\",\n        model=\"text-embedding-ada-002\",\n    )\n    output = model(\"Hello world\")\n    assert_embedding_result(output)\n    openai_embedding_call.assert_called()\n\n\n@patch(\n    \"openai.resources.embeddings.Embeddings.create\",\n    side_effect=lambda *args, **kwargs: openai_embedding_batch,\n)\ndef test_openai_embeddings_batch_raw(openai_embedding_call):\n    model = OpenAIEmbeddings(\n        api_key=\"some-key\",\n        model=\"text-embedding-ada-002\",\n    )\n    output = model([\"Hello world\", \"Goodbye world\"])\n    assert_embedding_result(output)\n    openai_embedding_call.assert_called()\n\n\n@skip_when_sentence_bert_not_installed\n@patch(\n    \"sentence_transformers.SentenceTransformer\",\n    side_effect=lambda *args, **kwargs: None,\n)\n@patch(\n    \"langchain.embeddings.huggingface.HuggingFaceBgeEmbeddings.embed_documents\",\n    side_effect=lambda *args, **kwargs: [[1.0, 2.1, 3.2]],\n)\ndef test_lchuggingface_embeddings(\n    langchain_huggingface_embedding_call, sentence_transformers_init\n):\n    model = LCHuggingFaceEmbeddings(\n        model_name=\"intfloat/multilingual-e5-large\",\n        model_kwargs={\"device\": \"cpu\"},\n        encode_kwargs={\"normalize_embeddings\": False},\n    )\n\n    output = model(\"Hello World\")\n    assert_embedding_result(output)\n    sentence_transformers_init.assert_called()\n    langchain_huggingface_embedding_call.assert_called()\n\n\n@skip_when_cohere_not_installed\n@patch(\n    \"langchain_cohere.CohereEmbeddings.embed_documents\",\n    side_effect=lambda *args, **kwargs: [[1.0, 2.1, 3.2]],\n)\ndef test_lccohere_embeddings(langchain_cohere_embedding_call):\n    model = LCCohereEmbeddings(\n        model=\"embed-english-light-v2.0\",\n        cohere_api_key=\"my-api-key\",\n        user_agent=\"test\",\n    )\n\n    output = model(\"Hello World\")\n    assert_embedding_result(output)\n    langchain_cohere_embedding_call.assert_called()\n\n\n@skip_when_fastembed_not_installed\ndef test_fastembed_embeddings():\n    model = FastEmbedEmbeddings()\n    output = model(\"Hello World\")\n    assert_embedding_result(output)\n\n\nvoyage_output_mock = Mock()\nvoyage_output_mock.embeddings = [[1.0, 2.1, 3.2]]\n\n\n@skip_when_voyageai_not_installed\n@patch(\"voyageai.Client.embed\", return_value=voyage_output_mock)\n@patch(\"voyageai.AsyncClient.embed\", return_value=voyage_output_mock)\ndef test_voyageai_embeddings(sync_call, async_call):\n    model = VoyageAIEmbeddings(api_key=\"test\")\n    output = model(\"Hello, world!\")\n    assert all(isinstance(doc, DocumentWithEmbedding) for doc in output)\n"
  },
  {
    "path": "libs/kotaemon/tests/test_indexing_retrieval.py",
    "content": "import json\nfrom pathlib import Path\nfrom typing import cast\nfrom unittest.mock import patch\n\nfrom openai.types.create_embedding_response import CreateEmbeddingResponse\n\nfrom kotaemon.base import Document\nfrom kotaemon.embeddings import AzureOpenAIEmbeddings\nfrom kotaemon.indices import VectorIndexing, VectorRetrieval\nfrom kotaemon.storages import ChromaVectorStore, InMemoryDocumentStore\n\nwith open(Path(__file__).parent / \"resources\" / \"embedding_openai.json\") as f:\n    openai_embedding = CreateEmbeddingResponse.model_validate(json.load(f))\n\n\n@patch(\n    \"openai.resources.embeddings.Embeddings.create\",\n    side_effect=lambda *args, **kwargs: openai_embedding,\n)\ndef test_indexing(tmp_path):\n    db = ChromaVectorStore(path=str(tmp_path))\n    doc_store = InMemoryDocumentStore()\n    embedding = AzureOpenAIEmbeddings(\n        azure_deployment=\"text-embedding-ada-002\",\n        azure_endpoint=\"https://test.openai.azure.com/\",\n        api_key=\"some-key\",\n        api_version=\"version\",\n    )\n\n    pipeline = VectorIndexing(vector_store=db, embedding=embedding, doc_store=doc_store)\n    pipeline.doc_store = cast(InMemoryDocumentStore, pipeline.doc_store)\n    pipeline.vector_store = cast(ChromaVectorStore, pipeline.vector_store)\n    assert pipeline.vector_store._collection.count() == 0, \"Expected empty collection\"\n    assert len(pipeline.doc_store._store) == 0, \"Expected empty doc store\"\n    pipeline(text=Document(text=\"Hello world\"))\n    assert pipeline.vector_store._collection.count() == 1, \"Index 1 item\"\n    assert len(pipeline.doc_store._store) == 1, \"Expected 1 document\"\n\n\n@patch(\n    \"openai.resources.embeddings.Embeddings.create\",\n    side_effect=lambda *args, **kwargs: openai_embedding,\n)\ndef test_retrieving(tmp_path):\n    db = ChromaVectorStore(path=str(tmp_path))\n    doc_store = InMemoryDocumentStore()\n    embedding = AzureOpenAIEmbeddings(\n        azure_deployment=\"text-embedding-ada-002\",\n        azure_endpoint=\"https://test.openai.azure.com/\",\n        api_key=\"some-key\",\n        api_version=\"version\",\n    )\n\n    index_pipeline = VectorIndexing(\n        vector_store=db, embedding=embedding, doc_store=doc_store\n    )\n    retrieval_pipeline = VectorRetrieval(\n        vector_store=db, doc_store=doc_store, embedding=embedding\n    )\n\n    index_pipeline(text=Document(text=\"Hello world\"))\n    output = retrieval_pipeline(text=\"Hello world\")\n    output1 = retrieval_pipeline(text=\"Hello world\")\n\n    assert len(output) == 1, \"Expect 1 results\"\n    assert output == output1, \"Expect identical results\"\n"
  },
  {
    "path": "libs/kotaemon/tests/test_ingestor.py",
    "content": "from pathlib import Path\n\nfrom kotaemon.indices.ingests import DocumentIngestor\nfrom kotaemon.indices.splitters import TokenSplitter\n\n\ndef test_ingestor_include_src():\n    dirpath = Path(__file__).parent\n    ingestor = DocumentIngestor(\n        pdf_mode=\"normal\",\n        text_splitter=TokenSplitter(chunk_size=200, chunk_overlap=10),\n    )\n    nodes = ingestor(dirpath / \"resources\" / \"table.pdf\")\n    assert type(nodes) is list\n    assert nodes[0].relationships\n"
  },
  {
    "path": "libs/kotaemon/tests/test_llms_chat_models.py",
    "content": "from pathlib import Path\nfrom unittest.mock import patch\n\nimport pytest\n\nfrom kotaemon.base.schema import AIMessage, HumanMessage, LLMInterface, SystemMessage\nfrom kotaemon.llms import AzureChatOpenAI, LlamaCppChat\n\ntry:\n    pass\nexcept ImportError:\n    pass\n\nfrom openai.types.chat.chat_completion import ChatCompletion\n\nfrom .conftest import skip_llama_cpp_not_installed\n\n_openai_chat_completion_response = ChatCompletion.parse_obj(\n    {\n        \"id\": \"chatcmpl-7qyuw6Q1CFCpcKsMdFkmUPUa7JP2x\",\n        \"object\": \"chat.completion\",\n        \"created\": 1692338378,\n        \"model\": \"gpt-35-turbo\",\n        \"system_fingerprint\": None,\n        \"choices\": [\n            {\n                \"index\": 0,\n                \"finish_reason\": \"stop\",\n                \"message\": {\n                    \"role\": \"assistant\",\n                    \"content\": \"Hello! How can I assist you today?\",\n                    \"function_call\": None,\n                    \"tool_calls\": None,\n                },\n                \"logprobs\": None,\n            }\n        ],\n        \"usage\": {\"completion_tokens\": 9, \"prompt_tokens\": 10, \"total_tokens\": 19},\n    }\n)\n\n\n@patch(\n    \"openai.resources.chat.completions.Completions.create\",\n    side_effect=lambda *args, **kwargs: _openai_chat_completion_response,\n)\ndef test_azureopenai_model(openai_completion):\n    model = AzureChatOpenAI(\n        api_key=\"dummy\",\n        api_version=\"2024-05-01-preview\",\n        azure_deployment=\"gpt-4o\",\n        azure_endpoint=\"https://test.openai.azure.com/\",\n    )\n    # test for str input - stream mode\n    output = model(\"hello world\")\n    assert isinstance(\n        output, LLMInterface\n    ), \"Output for single text is not LLMInterface\"\n    openai_completion.assert_called()\n\n    # test for list[message] input - stream mode\n    messages = [\n        SystemMessage(content=\"You are a philosohper\"),\n        HumanMessage(content=\"What is the meaning of life\"),\n        AIMessage(content=\"42\"),\n        HumanMessage(content=\"What is the meaning of 42\"),\n    ]\n\n    output = model(messages)\n    assert isinstance(\n        output, LLMInterface\n    ), \"Output for single text is not LLMInterface\"\n    openai_completion.assert_called()\n\n\n@skip_llama_cpp_not_installed\ndef test_llamacpp_chat():\n    from llama_cpp import Llama\n\n    dir_path = Path(__file__).parent / \"resources\" / \"ggml-vocab-llama.gguf\"\n\n    # test initialization\n    model = LlamaCppChat(model_path=str(dir_path), chat_format=\"llama\", vocab_only=True)\n    assert isinstance(model.client_object, Llama), \"Error initializing llama_cpp.Llama\"\n\n    # test error if model_path is omitted\n    with pytest.raises(ValueError):\n        model = LlamaCppChat(chat_format=\"llama\", vocab_only=True)\n        model.client_object\n\n    # test error if chat_format is omitted\n    with pytest.raises(ValueError):\n        model = LlamaCppChat(model_path=str(dir_path), vocab_only=True)\n        model.client_object\n"
  },
  {
    "path": "libs/kotaemon/tests/test_llms_completion_models.py",
    "content": "from pathlib import Path\nfrom unittest.mock import patch\n\nfrom kotaemon.base.schema import LLMInterface\nfrom kotaemon.llms import AzureOpenAI, LlamaCpp, OpenAI\n\ntry:\n    from langchain_openai import AzureOpenAI as AzureOpenAILC\n    from langchain_openai import OpenAI as OpenAILC\nexcept ImportError:\n    from langchain.llms import AzureOpenAI as AzureOpenAILC\n    from langchain.llms import OpenAI as OpenAILC\n\nfrom openai.types.completion import Completion\n\nfrom .conftest import skip_llama_cpp_not_installed, skip_openai_lc_wrapper_test\n\n_openai_completion_response = Completion.parse_obj(\n    {\n        \"id\": \"cmpl-7qyNoIo6gRSCJR0hi8o3ZKBH4RkJ0\",\n        \"object\": \"text_completion\",\n        \"created\": 1392751226,\n        \"model\": \"gpt-35-turbo\",\n        \"system_fingerprint\": None,\n        \"choices\": [\n            {\n                \"text\": \"completion\",\n                \"index\": 0,\n                \"finish_reason\": \"length\",\n                \"logprobs\": None,\n            }\n        ],\n        \"usage\": {\"completion_tokens\": 20, \"prompt_tokens\": 2, \"total_tokens\": 22},\n    }\n)\n\n\n@skip_openai_lc_wrapper_test\n@patch(\n    \"openai.resources.completions.Completions.create\",\n    side_effect=lambda *args, **kwargs: _openai_completion_response,\n)\ndef test_azureopenai_model(openai_completion):\n    model = AzureOpenAI(\n        azure_endpoint=\"https://test.openai.azure.com/\",\n        openai_api_key=\"some-key\",\n        openai_api_version=\"2023-03-15-preview\",\n        deployment_name=\"gpt35turbo\",\n        temperature=0,\n        request_timeout=60,\n    )\n    assert isinstance(\n        model.to_langchain_format(), AzureOpenAILC\n    ), \"Agent not wrapped in Langchain's AzureOpenAI\"\n\n    output = model(\"hello world\")\n    assert isinstance(\n        output, LLMInterface\n    ), \"Output for single text is not LLMInterface\"\n\n\n@patch(\n    \"openai.resources.completions.Completions.create\",\n    side_effect=lambda *args, **kwargs: _openai_completion_response,\n)\ndef test_openai_model(openai_completion):\n    model = OpenAI(\n        openai_api_base=\"https://test.openai.azure.com/\",\n        openai_api_key=\"some-key\",\n        openai_api_version=\"2023-03-15-preview\",\n        deployment_name=\"gpt35turbo\",\n        temperature=0,\n        request_timeout=60,\n    )\n    assert isinstance(\n        model.to_langchain_format(), OpenAILC\n    ), \"Agent is not wrapped in Langchain's OpenAI\"\n\n    output = model(\"hello world\")\n    assert isinstance(\n        output, LLMInterface\n    ), \"Output for single text is not LLMInterface\"\n\n\n@skip_llama_cpp_not_installed\ndef test_llamacpp_model():\n    weight_path = Path(__file__).parent / \"resources\" / \"ggml-vocab-llama.gguf\"\n\n    # test initialization\n    model = LlamaCpp(model_path=str(weight_path), vocab_only=True)\n    assert isinstance(model._obj, model._get_lc_class())\n"
  },
  {
    "path": "libs/kotaemon/tests/test_mcp_manager.py",
    "content": "\"\"\"Tests for ktem.mcp.manager module.\r\n\r\nUses an in-memory SQLite engine to test MCPManager CRUD operations\r\nwithout depending on the application's database.\r\n\"\"\"\r\n\r\nimport pytest\r\nfrom sqlalchemy import JSON, Column, String, create_engine\r\nfrom sqlalchemy.orm import DeclarativeBase, Session\r\n\r\n# ---------------------------------------------------------------------------\r\n# In-memory DB setup (mirrors ktem.mcp.db but fully isolated)\r\n# ---------------------------------------------------------------------------\r\n\r\n\r\nclass _Base(DeclarativeBase):\r\n    pass\r\n\r\n\r\nclass _MCPTable(_Base):\r\n    __tablename__ = \"mcp_table\"\r\n    name = Column(String, primary_key=True, unique=True)\r\n    config = Column(JSON, default={})\r\n\r\n\r\n# ---------------------------------------------------------------------------\r\n# Fixtures\r\n# ---------------------------------------------------------------------------\r\n\r\n\r\n@pytest.fixture()\r\ndef manager():\r\n    \"\"\"Fresh manager with a clean in-memory DB for each test.\"\"\"\r\n    engine = create_engine(\"sqlite:///:memory:\")\r\n    _MCPTable.metadata.create_all(engine)\r\n    return MCPManagerForTest(engine)\r\n\r\n\r\n# ---------------------------------------------------------------------------\r\n# Minimal MCPManager that uses the test engine\r\n# ---------------------------------------------------------------------------\r\n\r\n\r\nclass MCPManagerForTest:\r\n    \"\"\"Same logic as ktem.mcp.manager.MCPManager but uses our test engine.\"\"\"\r\n\r\n    def __init__(self, engine):\r\n        self._engine = engine\r\n        self._info: dict[str, dict] = {}\r\n        self.load()\r\n\r\n    def load(self):\r\n        self._info = {}\r\n        with Session(self._engine) as session:\r\n            for item in session.query(_MCPTable).all():\r\n                self._info[item.name] = {  # type: ignore[index]\r\n                    \"name\": item.name,\r\n                    \"config\": item.config,\r\n                }\r\n\r\n    def info(self) -> dict:\r\n        return self._info\r\n\r\n    def get(self, name: str) -> dict | None:\r\n        return self._info.get(name)\r\n\r\n    def add(self, name: str, config: dict):\r\n        name = name.strip()\r\n        if not name:\r\n            raise ValueError(\"Name must not be empty\")\r\n        with Session(self._engine) as session:\r\n            session.add(_MCPTable(name=name, config=config))\r\n            session.commit()\r\n        self.load()\r\n\r\n    def update(self, name: str, config: dict):\r\n        if not name:\r\n            raise ValueError(\"Name must not be empty\")\r\n        with Session(self._engine) as session:\r\n            item = session.query(_MCPTable).filter_by(name=name).first()\r\n            if not item:\r\n                raise ValueError(f\"MCP server '{name}' not found\")\r\n            item.config = config  # type: ignore[assignment]\r\n            session.commit()\r\n        self.load()\r\n\r\n    def delete(self, name: str):\r\n        with Session(self._engine) as session:\r\n            item = session.query(_MCPTable).filter_by(name=name).first()\r\n            if item:\r\n                session.delete(item)\r\n                session.commit()\r\n        self.load()\r\n\r\n    def get_enabled_tools(self) -> list[str]:\r\n        return [\r\n            f\"[MCP] {name}\"\r\n            for name, entry in self._info.items()\r\n            if entry.get(\"config\", {}).get(\"enabled_tools\") is not None\r\n        ]\r\n\r\n\r\n# ---------------------------------------------------------------------------\r\n# Tests\r\n# ---------------------------------------------------------------------------\r\n\r\n\r\nclass TestMCPManagerAdd:\r\n    def test_add_and_retrieve(self, manager):\r\n        \"\"\"add() persists data; get() and info() reflect it.\"\"\"\r\n        manager.add(\"server1\", {\"command\": \"uvx\", \"args\": [\"mcp-server-fetch\"]})\r\n        assert manager.info()[\"server1\"][\"config\"][\"command\"] == \"uvx\"\r\n        assert manager.get(\"server1\")[\"name\"] == \"server1\"\r\n\r\n    def test_add_multiple(self, manager):\r\n        manager.add(\"s1\", {\"command\": \"cmd1\"})\r\n        manager.add(\"s2\", {\"command\": \"cmd2\"})\r\n        assert set(manager.info().keys()) == {\"s1\", \"s2\"}\r\n\r\n    @pytest.mark.parametrize(\"name\", [\"\", \"   \"])\r\n    def test_empty_or_whitespace_name_raises(self, manager, name):\r\n        with pytest.raises(ValueError, match=\"Name must not be empty\"):\r\n            manager.add(name, {})\r\n\r\n    def test_whitespace_name_is_stripped(self, manager):\r\n        manager.add(\"  server1  \", {\"command\": \"uvx\"})\r\n        assert \"server1\" in manager.info()\r\n\r\n    def test_complex_config_stored_correctly(self, manager):\r\n        config = {\r\n            \"command\": \"uvx\",\r\n            \"env\": {\"JIRA_URL\": \"https://example.atlassian.net\"},\r\n            \"enabled_tools\": [\"jira_search\"],\r\n        }\r\n        manager.add(\"atlassian\", config)\r\n        stored = manager.get(\"atlassian\")[\"config\"]\r\n        assert stored[\"env\"][\"JIRA_URL\"] == \"https://example.atlassian.net\"\r\n        assert stored[\"enabled_tools\"] == [\"jira_search\"]\r\n\r\n\r\nclass TestMCPManagerUpdateDelete:\r\n    def test_update_changes_config(self, manager):\r\n        manager.add(\"s1\", {\"command\": \"cmd1\"})\r\n        manager.add(\"s2\", {\"command\": \"cmd2\"})\r\n        manager.update(\"s1\", {\"command\": \"updated\"})\r\n        assert manager.info()[\"s1\"][\"config\"][\"command\"] == \"updated\"\r\n        assert manager.info()[\"s2\"][\"config\"][\"command\"] == \"cmd2\"  # untouched\r\n\r\n    def test_update_nonexistent_raises(self, manager):\r\n        with pytest.raises(ValueError, match=\"not found\"):\r\n            manager.update(\"ghost\", {})\r\n\r\n    def test_delete_removes_entry(self, manager):\r\n        manager.add(\"s1\", {})\r\n        manager.add(\"s2\", {})\r\n        manager.delete(\"s1\")\r\n        assert \"s1\" not in manager.info()\r\n        assert \"s2\" in manager.info()\r\n\r\n    def test_delete_nonexistent_is_noop(self, manager):\r\n        manager.delete(\"ghost\")  # must not raise\r\n        assert len(manager.info()) == 0\r\n\r\n\r\nclass TestMCPManagerGetEnabledTools:\r\n    def test_only_servers_with_enabled_tools_listed(self, manager):\r\n        manager.add(\"no_filter\", {\"command\": \"uvx\"})\r\n        manager.add(\"with_filter\", {\"command\": \"uvx\", \"enabled_tools\": [\"tool_a\"]})\r\n        choices = manager.get_enabled_tools()\r\n        assert \"[MCP] no_filter\" not in choices\r\n        assert \"[MCP] with_filter\" in choices\r\n\r\n    def test_empty_when_no_servers(self, manager):\r\n        assert manager.get_enabled_tools() == []\r\n\r\n\r\nclass TestMCPManagerLoad:\r\n    def test_load_picks_up_external_db_changes(self, manager):\r\n        manager.add(\"server1\", {})\r\n        with Session(manager._engine) as session:\r\n            session.add(_MCPTable(name=\"external\", config={\"command\": \"ext\"}))\r\n            session.commit()\r\n\r\n        assert \"external\" not in manager.info()  # not yet refreshed\r\n        manager.load()\r\n        assert \"external\" in manager.info()\r\n"
  },
  {
    "path": "libs/kotaemon/tests/test_mcp_tools.py",
    "content": "\"\"\"Tests for kotaemon.agents.tools.mcp module.\r\n\r\nCovers config parsing, JSON Schema -> Pydantic model building,\r\ntool formatting, and MCPTool construction (without real MCP servers).\r\n\"\"\"\r\n\r\nfrom types import SimpleNamespace\r\nfrom unittest.mock import patch\r\n\r\nimport pytest\r\n\r\nfrom kotaemon.agents.tools.mcp import (\r\n    MCPTool,\r\n    _json_schema_type_to_python,\r\n    _make_tool,\r\n    build_args_model,\r\n    create_tools_from_config,\r\n    format_tool_list,\r\n    parse_mcp_config,\r\n)\r\n\r\n# ---------------------------------------------------------------------------\r\n# _json_schema_type_to_python — parametrized to avoid 7 near-identical tests\r\n# ---------------------------------------------------------------------------\r\n\r\n\r\n@pytest.mark.parametrize(\r\n    \"json_type, expected\",\r\n    [\r\n        (\"string\", str),\r\n        (\"integer\", int),\r\n        (\"number\", float),\r\n        (\"boolean\", bool),\r\n        (\"object\", dict),\r\n        (\"array\", list),\r\n        (\"unknown_type\", str),  # fallback\r\n    ],\r\n)\r\ndef test_json_schema_type_to_python(json_type, expected):\r\n    assert _json_schema_type_to_python(json_type) is expected\r\n\r\n\r\n# ---------------------------------------------------------------------------\r\n# build_args_model\r\n# ---------------------------------------------------------------------------\r\n\r\n\r\nclass TestBuildArgsModel:\r\n    def test_model_fields_and_name(self):\r\n        \"\"\"Required + optional fields and the generated model name.\"\"\"\r\n        schema = {\r\n            \"properties\": {\r\n                \"url\": {\"type\": \"string\", \"description\": \"The URL to fetch\"},\r\n                \"timeout\": {\"type\": \"integer\", \"description\": \"Timeout in seconds\"},\r\n            },\r\n            \"required\": [\"url\"],\r\n        }\r\n        model = build_args_model(\"fetch\", schema)\r\n        assert model.__name__ == \"MCPArgs_fetch\"\r\n        assert model.model_fields[\"url\"].is_required()\r\n        assert not model.model_fields[\"timeout\"].is_required()\r\n\r\n    def test_optional_field_preserves_default(self):\r\n        schema = {\r\n            \"properties\": {\r\n                \"limit\": {\r\n                    \"type\": \"integer\",\r\n                    \"description\": \"Max results\",\r\n                    \"default\": 10,\r\n                },\r\n            },\r\n            \"required\": [],\r\n        }\r\n        assert build_args_model(\"search\", schema).model_fields[\"limit\"].default == 10\r\n\r\n    def test_empty_schema_produces_no_fields(self):\r\n        assert len(build_args_model(\"empty\", {}).model_fields) == 0\r\n\r\n\r\n# ---------------------------------------------------------------------------\r\n# parse_mcp_config\r\n# ---------------------------------------------------------------------------\r\n\r\n\r\nclass TestParseMcpConfig:\r\n    def test_full_stdio_config(self):\r\n        config = {\r\n            \"transport\": \"stdio\",\r\n            \"command\": \"uvx\",\r\n            \"args\": [\"mcp-server-fetch\"],\r\n            \"env\": {\"KEY\": \"value\"},\r\n        }\r\n        parsed = parse_mcp_config(config)\r\n        assert parsed == {\r\n            \"transport\": \"stdio\",\r\n            \"command\": \"uvx\",\r\n            \"args\": [\"mcp-server-fetch\"],\r\n            \"env\": {\"KEY\": \"value\"},\r\n        }\r\n\r\n    def test_defaults_for_empty_config(self):\r\n        parsed = parse_mcp_config({})\r\n        assert parsed[\"transport\"] == \"stdio\"\r\n        assert parsed[\"command\"] == \"\"\r\n        assert parsed[\"args\"] == []\r\n        assert parsed[\"env\"] == {}\r\n\r\n    def test_auto_split_multi_word_command(self):\r\n        \"\"\"stdio with no explicit args: space-delimited command is split.\"\"\"\r\n        parsed = parse_mcp_config(\r\n            {\"command\": \"npx -y mcp-remote https://example.com/sse\"}\r\n        )\r\n        assert parsed[\"command\"] == \"npx\"\r\n        assert parsed[\"args\"] == [\"-y\", \"mcp-remote\", \"https://example.com/sse\"]\r\n\r\n    def test_no_split_when_args_already_provided(self):\r\n        \"\"\"Explicit args suppress the auto-split.\"\"\"\r\n        parsed = parse_mcp_config(\r\n            {\r\n                \"command\": \"npx -y mcp-remote https://example.com/sse\",\r\n                \"args\": [\"--flag\"],\r\n            }\r\n        )\r\n        assert parsed[\"command\"] == \"npx -y mcp-remote https://example.com/sse\"\r\n        assert parsed[\"args\"] == [\"--flag\"]\r\n\r\n    def test_sse_transport_uses_url_as_command(self):\r\n        \"\"\"For SSE, the url field becomes the effective command.\"\"\"\r\n        parsed = parse_mcp_config(\r\n            {\r\n                \"transport\": \"sse\",\r\n                \"url\": \"http://localhost:8080/sse\",\r\n                \"command\": \"ignored\",\r\n            }\r\n        )\r\n        assert parsed[\"transport\"] == \"sse\"\r\n        assert parsed[\"command\"] == \"http://localhost:8080/sse\"\r\n\r\n\r\n# ---------------------------------------------------------------------------\r\n# _make_tool\r\n# ---------------------------------------------------------------------------\r\n\r\n\r\nclass TestMakeTool:\r\n    def test_creates_mcp_tool_with_schema(self):\r\n        parsed = {\r\n            \"transport\": \"stdio\",\r\n            \"command\": \"uvx\",\r\n            \"args\": [\"mcp-server-fetch\"],\r\n            \"env\": {},\r\n        }\r\n        tool_info = SimpleNamespace(\r\n            name=\"fetch\",\r\n            description=\"Fetch a URL\",\r\n            inputSchema={\r\n                \"properties\": {\r\n                    \"url\": {\"type\": \"string\", \"description\": \"URL to fetch\"}\r\n                },\r\n                \"required\": [\"url\"],\r\n            },\r\n        )\r\n        tool = _make_tool(parsed, tool_info)\r\n\r\n        assert isinstance(tool, MCPTool)\r\n        assert tool.name == \"fetch\"\r\n        assert tool.description == \"Fetch a URL\"\r\n        assert tool.server_transport == \"stdio\"\r\n        assert tool.server_command == \"uvx\"\r\n        assert tool.server_args == [\"mcp-server-fetch\"]\r\n\r\n    def test_missing_schema_and_description_uses_defaults(self):\r\n        \"\"\"No inputSchema → args_schema is None; None description → auto-generated.\"\"\"\r\n        parsed = {\"transport\": \"stdio\", \"command\": \"uvx\", \"args\": [], \"env\": {}}\r\n        tool_info = SimpleNamespace(name=\"ping\", description=None)\r\n        tool = _make_tool(parsed, tool_info)\r\n        assert tool.description == \"MCP tool: ping\"\r\n        assert tool.args_schema is None\r\n\r\n\r\n# ---------------------------------------------------------------------------\r\n# format_tool_list\r\n# ---------------------------------------------------------------------------\r\n\r\n\r\nclass TestFormatToolList:\r\n    def test_all_tools_enabled_by_default(self):\r\n        tool_infos = [\r\n            {\"name\": \"fetch\", \"description\": \"Fetch a URL\"},\r\n            {\"name\": \"search\", \"description\": \"Search the web\"},\r\n        ]\r\n        result = format_tool_list(tool_infos)\r\n        assert \"2\" in result\r\n        assert \"fetch\" in result and \"search\" in result\r\n        assert \"All tools enabled\" in result\r\n\r\n    def test_partial_filter_shows_counts_and_icons(self):\r\n        tool_infos = [\r\n            {\"name\": \"fetch\", \"description\": \"Fetch a URL\"},\r\n            {\"name\": \"search\", \"description\": \"Search the web\"},\r\n        ]\r\n        result = format_tool_list(tool_infos, enabled_tools=[\"fetch\"])\r\n        assert \"1/2 tool(s) enabled\" in result\r\n        assert \"✅\" in result  # fetch enabled\r\n        assert \"⬜\" in result  # search disabled\r\n\r\n    def test_long_description_is_truncated(self):\r\n        result = format_tool_list([{\"name\": \"tool\", \"description\": \"A\" * 200}])\r\n        assert \"A\" * 121 not in result\r\n\r\n    def test_none_description_shows_placeholder(self):\r\n        result = format_tool_list([{\"name\": \"tool\", \"description\": None}])\r\n        assert \"No description\" in result\r\n\r\n\r\n# ---------------------------------------------------------------------------\r\n# create_tools_from_config (mocked MCP server connection)\r\n# ---------------------------------------------------------------------------\r\n\r\n\r\nclass TestCreateToolsFromConfig:\r\n    def _make_mock_tools(self):\r\n        return [\r\n            MCPTool(\r\n                name=\"fetch\",\r\n                description=\"Fetch\",\r\n                server_transport=\"stdio\",\r\n                server_command=\"uvx\",\r\n                mcp_tool_name=\"fetch\",\r\n            ),\r\n            MCPTool(\r\n                name=\"search\",\r\n                description=\"Search\",\r\n                server_transport=\"stdio\",\r\n                server_command=\"uvx\",\r\n                mcp_tool_name=\"search\",\r\n            ),\r\n        ]\r\n\r\n    @patch(\"kotaemon.agents.tools.mcp._run_async\")\r\n    def test_no_filter_returns_all(self, mock_run_async):\r\n        mock_run_async.return_value = self._make_mock_tools()\r\n        tools = create_tools_from_config({\"command\": \"uvx\"})\r\n        assert len(tools) == 2\r\n\r\n    @patch(\"kotaemon.agents.tools.mcp._run_async\")\r\n    def test_enabled_tools_filter(self, mock_run_async):\r\n        \"\"\"Non-empty filter returns only nominated tools; empty list returns all.\"\"\"\r\n        mock_run_async.return_value = self._make_mock_tools()\r\n        filtered = create_tools_from_config({\"command\": \"uvx\"}, enabled_tools=[\"fetch\"])\r\n        assert len(filtered) == 1\r\n        assert filtered[0].mcp_tool_name == \"fetch\"\r\n\r\n        # Empty list == no filter\r\n        mock_run_async.return_value = self._make_mock_tools()\r\n        all_tools = create_tools_from_config({\"command\": \"uvx\"}, enabled_tools=[])\r\n        assert len(all_tools) == 2\r\n\r\n\r\n# ---------------------------------------------------------------------------\r\n# MCPTool._format_result\r\n# ---------------------------------------------------------------------------\r\n\r\n\r\nclass TestMCPToolFormatResult:\r\n    def _make_tool(self):\r\n        return MCPTool(\r\n            name=\"test\",\r\n            description=\"Test tool\",\r\n            server_transport=\"stdio\",\r\n            server_command=\"echo\",\r\n            mcp_tool_name=\"test\",\r\n        )\r\n\r\n    def test_text_content_joined(self):\r\n        result = self._make_tool()._format_result(\r\n            SimpleNamespace(\r\n                isError=False,\r\n                content=[SimpleNamespace(text=\"Hello\"), SimpleNamespace(text=\"World\")],\r\n            )\r\n        )\r\n        assert result == \"Hello\\nWorld\"\r\n\r\n    def test_error_flag(self):\r\n        result = self._make_tool()._format_result(\r\n            SimpleNamespace(\r\n                isError=True,\r\n                content=\"Something went wrong\",\r\n            )\r\n        )\r\n        assert \"MCP Tool Error\" in result\r\n\r\n    def test_binary_content(self):\r\n        result = self._make_tool()._format_result(\r\n            SimpleNamespace(\r\n                isError=False,\r\n                content=[SimpleNamespace(data=b\"bytes\", mimeType=\"image/png\")],\r\n            )\r\n        )\r\n        assert \"[Binary data: image/png]\" in result\r\n"
  },
  {
    "path": "libs/kotaemon/tests/test_post_processing.py",
    "content": "import pytest\n\nfrom kotaemon.base import Document\nfrom kotaemon.parsers import RegexExtractor\n\n\n@pytest.fixture\ndef regex_extractor():\n    return RegexExtractor(\n        pattern=r\"\\d+\", output_map={\"1\": \"One\", \"2\": \"Two\", \"3\": \"Three\"}\n    )\n\n\ndef test_run_document(regex_extractor):\n    document = Document(text=\"This is a test. 1 2 3\")\n    extracted_document = regex_extractor(document)[0]\n    assert extracted_document.text == \"One\"\n    assert extracted_document.matches == [\"One\", \"Two\", \"Three\"]\n\n\ndef test_run_raw(regex_extractor):\n    output = regex_extractor(\"This is a test. 123\")[0]\n    assert output.text == \"123\"\n    assert output.matches == [\"123\"]\n\n\ndef test_run_batch_raw(regex_extractor):\n    output = regex_extractor([\"This is a test. 123\", \"456\"])\n    extracted_text = [each.text for each in output]\n    extracted_matches = [each.matches for each in output]\n    assert extracted_text == [\"123\", \"456\"]\n    assert extracted_matches == [[\"123\"], [\"456\"]]\n"
  },
  {
    "path": "libs/kotaemon/tests/test_prompt.py",
    "content": "import pytest\n\nfrom kotaemon.base import Document\nfrom kotaemon.llms import BasePromptComponent, PromptTemplate\nfrom kotaemon.parsers import RegexExtractor\n\n\ndef test_set_attributes():\n    template = PromptTemplate(\"str = {s}, int = {i}, doc = {doc}, comp = {comp}\")\n    doc = Document(text=\"Helloo, Alice!\")\n    comp = RegexExtractor(\n        pattern=r\"\\d+\", output_map={\"1\": \"One\", \"2\": \"Two\", \"3\": \"Three\"}\n    )\n    comp.set_run(kwargs={\"text\": \"This is a test. 1 2 3\"}, temp=True)\n\n    prompt = BasePromptComponent(template=template, s=\"Alice\", i=30, doc=doc, comp=comp)\n    assert prompt.s == \"Alice\"\n    assert prompt.i == 30\n    assert prompt.doc == doc\n    assert prompt.comp == comp\n\n\ndef test_check_redundant_kwargs():\n    template = PromptTemplate(\"Hello, {name}!\")\n    prompt = BasePromptComponent(template=template, name=\"Alice\")\n    with pytest.warns(UserWarning, match=\"Keys provided but not in template: age\"):\n        prompt._BasePromptComponent__check_redundant_kwargs(name=\"Alice\", age=30)\n\n\ndef test_check_unset_placeholders():\n    template = PromptTemplate(\"Hello, {name}! I'm {age} years old.\")\n    prompt = BasePromptComponent(template=template, name=\"Alice\")\n    with pytest.raises(ValueError):\n        prompt._BasePromptComponent__check_unset_placeholders()\n\n\ndef test_validate_value_type():\n    template = PromptTemplate(\"Hello, {name}!\")\n    prompt = BasePromptComponent(template=template)\n    with pytest.raises(ValueError):\n        prompt._BasePromptComponent__validate_value_type(name={})\n\n\ndef test_run():\n    template = PromptTemplate(\"str = {s}, int = {i}, doc = {doc}, comp = {comp}\")\n    doc = Document(text=\"Helloo, Alice!\")\n    comp = RegexExtractor(\n        pattern=r\"\\d+\", output_map={\"1\": \"One\", \"2\": \"Two\", \"3\": \"Three\"}\n    )\n    comp.set_run(kwargs={\"text\": \"This is a test. 1 2 3\"}, temp=True)\n\n    prompt = BasePromptComponent(template=template, s=\"Alice\", i=30, doc=doc, comp=comp)\n\n    result = prompt()\n\n    assert result.text == \"str = Alice, int = 30, doc = Helloo, Alice!, comp = ['One']\"\n\n\ndef test_set_method():\n    template = PromptTemplate(\"Hello, {name}!\")\n    prompt = BasePromptComponent(template=template)\n    prompt.set_value(name=\"Alice\")\n    assert prompt.name == \"Alice\"\n"
  },
  {
    "path": "libs/kotaemon/tests/test_promptui.py",
    "content": "from kotaemon.contribs.promptui.config import export_pipeline_to_config\nfrom kotaemon.contribs.promptui.export import export_from_dict\nfrom kotaemon.contribs.promptui.ui import build_from_dict\n\nfrom .simple_pipeline import Pipeline\n\n\nclass TestPromptConfig:\n    def test_export_prompt_config(self):\n        \"\"\"Test if the prompt config is exported correctly\"\"\"\n        pipeline = Pipeline()\n        config_dict = export_pipeline_to_config(pipeline)\n        config = list(config_dict.values())[0]\n\n        assert \"inputs\" in config, \"inputs should be in config\"\n        assert \"text\" in config[\"inputs\"], \"inputs should have config\"\n\n        assert \"params\" in config, \"params should be in config\"\n        assert \"llm.deployment_name\" in config[\"params\"]\n        assert \"llm.azure_endpoint\" in config[\"params\"]\n        assert \"llm.openai_api_key\" in config[\"params\"]\n        assert \"llm.openai_api_version\" in config[\"params\"]\n        assert \"llm.request_timeout\" in config[\"params\"]\n        assert \"llm.temperature\" in config[\"params\"]\n\n\nclass TestPromptUI:\n    def test_uigeneration(self):\n        \"\"\"Test if the gradio UI is exposed without any problem\"\"\"\n        pipeline = Pipeline()\n        config = export_pipeline_to_config(pipeline)\n\n        build_from_dict(config)\n\n\nclass TestExport:\n    def test_export(self, tmp_path):\n        \"\"\"Test if the export functionality works without error\"\"\"\n        from pathlib import Path\n\n        import yaml\n        from theflow.storage import storage\n\n        config_path = tmp_path / \"config.yaml\"\n        pipeline = Pipeline()\n        Path(storage.url(pipeline.config.store_result)).mkdir(\n            parents=True, exist_ok=True\n        )\n\n        config_dict = export_pipeline_to_config(pipeline)\n        pipeline_name = list(config_dict.keys())[0]\n\n        config_dict[pipeline_name][\"logs\"] = {\n            \"sheet1\": {\n                \"inputs\": [{\"name\": \"text\", \"step\": \".\", \"variable\": \"text\"}],\n                \"outputs\": [{\"name\": \"answer\", \"step\": \".\"}],\n            },\n        }\n        with open(config_path, \"w\") as f:\n            yaml.safe_dump(config_dict, f)\n\n        export_from_dict(\n            config=str(config_path),\n            pipeline=pipeline_name,\n            output_path=str(tmp_path / \"exported.xlsx\"),\n        )\n"
  },
  {
    "path": "libs/kotaemon/tests/test_reader.py",
    "content": "from pathlib import Path\nfrom unittest.mock import patch\n\nfrom langchain.schema import Document as LangchainDocument\nfrom llama_index.core.node_parser import SimpleNodeParser\n\nfrom kotaemon.base import Document\nfrom kotaemon.loaders import (\n    AutoReader,\n    AzureAIDocumentIntelligenceLoader,\n    DocxReader,\n    HtmlReader,\n    MhtmlReader,\n    UnstructuredReader,\n)\n\nfrom .conftest import skip_when_unstructured_pdf_not_installed\n\n\ndef test_docx_reader():\n    reader = DocxReader()\n    documents = reader.load_data(Path(__file__).parent / \"resources\" / \"dummy.docx\")\n\n    assert len(documents)\n\n\ndef test_html_reader():\n    reader = HtmlReader()\n    documents = reader.load_data(\n        Path(__file__).parent / \"resources\" / \"html\" / \"dummy.html\"\n    )\n\n    assert len(documents)\n\n\ndef test_pdf_reader():\n    reader = AutoReader(\"PDFReader\")\n    dirpath = Path(__file__).parent\n    documents = reader.load_data(dirpath / \"resources\" / \"dummy.pdf\")\n\n    # check document reader output\n    assert len(documents) == 1\n\n    first_doc = documents[0]\n    assert isinstance(first_doc, Document)\n    assert first_doc.text.lower().replace(\" \", \"\") == \"dummypdffile\"\n\n    langchain_doc = first_doc.to_langchain_format()\n    assert isinstance(langchain_doc, LangchainDocument)\n\n    # test chunking using NodeParser from llama-index\n    node_parser = SimpleNodeParser.from_defaults(chunk_size=100, chunk_overlap=20)\n    nodes = node_parser.get_nodes_from_documents(documents)\n    assert len(nodes) > 0\n\n\n@skip_when_unstructured_pdf_not_installed\ndef test_unstructured_pdf_reader():\n    reader = UnstructuredReader()\n    dirpath = Path(__file__).parent\n    input_path = dirpath / \"resources/dummy.pdf\"\n    documents = reader.load_data(input_path)\n\n    # check document reader output\n    assert len(documents) == 1\n\n    first_doc = documents[0]\n    assert isinstance(first_doc, Document)\n    assert first_doc.text.lower().replace(\" \", \"\") == \"dummypdffile\"\n\n    # split documents mode\n    documents = reader.load_data(input_path, split_documents=True)\n    # check document reader output\n    assert len(documents) == 1\n\n\ndef test_mhtml_reader():\n    reader = MhtmlReader()\n    input_path = Path(__file__).parent / \"resources\" / \"dummy.mhtml\"\n    docs = reader.load_data(input_path)\n\n    assert len(docs) == 1\n    assert docs[0].text.startswith(\"This is a test\")\n\n\n@patch(\"azure.ai.documentintelligence.DocumentIntelligenceClient\")\ndef test_azureai_document_intelligence_reader(mock_client):\n    reader = AzureAIDocumentIntelligenceLoader(\n        endpoint=\"https://endpoint.com\",\n        credential=\"credential\",\n    )\n    docs = reader(Path(__file__).parent / \"resources\" / \"dummy.pdf\")\n\n    assert len(docs) == 1\n    mock_client.assert_called_once()\n"
  },
  {
    "path": "libs/kotaemon/tests/test_reranking.py",
    "content": "from unittest.mock import patch\n\nimport pytest\nfrom openai.types.chat.chat_completion import ChatCompletion\n\nfrom kotaemon.base import Document\nfrom kotaemon.indices.rankings import LLMReranking\nfrom kotaemon.llms import AzureChatOpenAI\n\n_openai_chat_completion_responses = [\n    ChatCompletion.parse_obj(\n        {\n            \"id\": \"chatcmpl-7qyuw6Q1CFCpcKsMdFkmUPUa7JP2x\",\n            \"object\": \"chat.completion\",\n            \"created\": 1692338378,\n            \"model\": \"gpt-35-turbo\",\n            \"system_fingerprint\": None,\n            \"choices\": [\n                {\n                    \"index\": 0,\n                    \"finish_reason\": \"stop\",\n                    \"message\": {\n                        \"role\": \"assistant\",\n                        \"content\": text,\n                        \"function_call\": None,\n                        \"tool_calls\": None,\n                    },\n                    \"logprobs\": None,\n                }\n            ],\n            \"usage\": {\"completion_tokens\": 9, \"prompt_tokens\": 10, \"total_tokens\": 19},\n        }\n    )\n    for text in [\n        \"YES\",\n        \"NO\",\n        \"YES\",\n    ]\n]\n\n\n@pytest.fixture\ndef llm():\n    return AzureChatOpenAI(\n        api_key=\"dummy\",\n        api_version=\"2024-05-01-preview\",\n        azure_deployment=\"gpt-4o\",\n        azure_endpoint=\"https://test.openai.azure.com/\",\n    )\n\n\n@patch(\n    \"openai.resources.chat.completions.Completions.create\",\n    side_effect=_openai_chat_completion_responses,\n)\ndef test_reranking(openai_completion, llm):\n    documents = [Document(text=f\"test {idx}\") for idx in range(3)]\n    query = \"test query\"\n\n    reranker = LLMReranking(llm=llm, concurrent=False)\n    rerank_docs = reranker(documents, query=query)\n\n    assert len(rerank_docs) == 2\n"
  },
  {
    "path": "libs/kotaemon/tests/test_splitter.py",
    "content": "from llama_index.core.schema import NodeRelationship\n\nfrom kotaemon.base import Document\nfrom kotaemon.indices.splitters import TokenSplitter\n\nsource1 = Document(\n    content=\"The City Hall and Raffles Place MRT stations are paired cross-platform \"\n    \"interchanges on the North–South line (NSL) and East–West line (EWL) of the \"\n    \"Singapore Mass Rapid Transit (MRT) system. Both are situated in the Downtown \"\n    \"Core district: City Hall station is near landmarks such as the former City Hall, \"\n    \"St Andrew's Cathedral and the Padang, while Raffles Place station serves Merlion \"\n    \"Park, The Fullerton Hotel and the Asian Civilisations Museum. The stations were \"\n    \"first announced in 1982. Constructing the tunnels between the City Hall and \"\n    \"Raffles Place stations required the draining of the Singapore River. The \"\n    \"stations opened on 12 December 1987 as part of the MRT extension to Outram Park \"\n    \"station. Cross-platform transfers between the NSL and EWL began on 28 October \"\n    \"1989, ahead of the split of the MRT network into two lines. Both stations are \"\n    \"designated Civil Defence shelters. City Hall station features a mural by Simon\"\n    \"Wong which depicts government buildings in the area, while two murals at Raffles \"\n    \"Place station by Lim Sew Yong and Thang Kiang How depict scenes of Singapore's \"\n    \"history\"\n)\n\nsource2 = Document(\n    content=\"The pink cockatoo (Cacatua leadbeateri) is a medium-sized cockatoo that \"\n    \"inhabits arid and semi-arid inland areas across Australia, with the exception of \"\n    \"the north east. The bird has a soft-textured white and salmon-pink plumage and \"\n    \"large, bright red and yellow crest. The sexes are quite similar, although males \"\n    \"are usually bigger while the female has a broader yellow stripe on the crest and \"\n    \"develops a red eye when mature. The pink cockatoo is usually found in pairs or \"\n    \"small groups, and feeds both on the ground and in trees. It is listed as an \"\n    \"endangered species by the Australian government. Formerly known as Major \"\n    \"Mitchell's cockatoo, after the explorer Thomas Mitchell, the species was \"\n    \"officially renamed the pink cockatoo in 2023 by BirdLife Australia in light of \"\n    \"Mitchell's involvement in the massacre of Aboriginal people at Mount Dispersion, \"\n    \"as well as a general trend to make Australian species names more culturally \"\n    \"inclusive. This pink cockatoo with a raised crest was photographed near Mount \"\n    \"Grenfell in New South Wales.\"\n)\n\n\ndef test_split_token():\n    \"\"\"Test that it can split tokens successfully\"\"\"\n    splitter = TokenSplitter(chunk_size=30, chunk_overlap=10)\n    chunks = splitter([source1, source2])\n\n    assert isinstance(chunks, list), \"Chunks should be a list\"\n    assert isinstance(chunks[0], Document), \"Chunks should be a list of Documents\"\n\n    assert chunks[0].relationships[NodeRelationship.SOURCE].node_id == source1.doc_id\n    assert (\n        chunks[1].relationships[NodeRelationship.PREVIOUS].node_id == chunks[0].doc_id\n    )\n    assert chunks[1].relationships[NodeRelationship.NEXT].node_id == chunks[2].doc_id\n    assert chunks[-1].relationships[NodeRelationship.SOURCE].node_id == source2.doc_id\n"
  },
  {
    "path": "libs/kotaemon/tests/test_table_reader.py",
    "content": "import json\nfrom pathlib import Path\n\nimport pytest\n\nfrom kotaemon.loaders import MathpixPDFReader, OCRReader, PandasExcelReader\n\nfrom .conftest import skip_when_unstructured_pdf_not_installed\n\ninput_file = Path(__file__).parent / \"resources\" / \"table.pdf\"\ninput_file_excel = Path(__file__).parent / \"resources\" / \"dummy.xlsx\"\n\n\n@pytest.fixture\ndef fullocr_output():\n    with open(\n        Path(__file__).parent / \"resources\" / \"fullocr_sample_output.json\",\n        encoding=\"utf-8\",\n    ) as f:\n        fullocr = json.load(f)\n    return fullocr\n\n\n@pytest.fixture\ndef mathpix_output():\n    with open(Path(__file__).parent / \"resources\" / \"policy.md\", encoding=\"utf-8\") as f:\n        content = f.read()\n    return content\n\n\n@skip_when_unstructured_pdf_not_installed\ndef test_ocr_reader(fullocr_output):\n    reader = OCRReader()\n    documents = reader.load_data(input_file, response_content=fullocr_output)\n    table_docs = [doc for doc in documents if doc.metadata.get(\"type\", \"\") == \"table\"]\n    assert len(table_docs) == 2\n\n\ndef test_mathpix_reader(mathpix_output):\n    reader = MathpixPDFReader()\n    documents = reader.load_data(input_file, response_content=mathpix_output)\n    table_docs = [doc for doc in documents if doc.metadata.get(\"type\", \"\") == \"table\"]\n    assert len(table_docs) == 4\n\n\ndef test_excel_reader():\n    reader = PandasExcelReader()\n    documents = reader.load_data(\n        input_file_excel,\n    )\n    assert len(documents) == 1\n"
  },
  {
    "path": "libs/kotaemon/tests/test_telemetry.py",
    "content": "import os\nimport sys\n\nimport pytest\n\nfrom .conftest import skip_when_haystack_not_installed\n\n\n@pytest.fixture\ndef clean_artifacts_for_telemetry():\n    try:\n        del sys.modules[\"kotaemon\"]\n    except KeyError:\n        pass\n\n    try:\n        del sys.modules[\"haystack\"]\n    except KeyError:\n        pass\n\n    try:\n        del sys.modules[\"haystack.telemetry\"]\n    except KeyError:\n        pass\n\n    if \"HAYSTACK_TELEMETRY_ENABLED\" in os.environ:\n        del os.environ[\"HAYSTACK_TELEMETRY_ENABLED\"]\n\n\n@pytest.mark.usefixtures(\"clean_artifacts_for_telemetry\")\n@skip_when_haystack_not_installed\ndef test_disable_telemetry_import_haystack_first():\n    \"\"\"Test that telemetry is disabled when kotaemon lib is initiated after\"\"\"\n    import os\n\n    import haystack.telemetry\n\n    assert haystack.telemetry.telemetry is not None\n    assert os.environ.get(\"HAYSTACK_TELEMETRY_ENABLED\", \"True\") != \"False\"\n\n    import kotaemon  # noqa: F401\n\n    assert haystack.telemetry.telemetry is None\n    assert os.environ.get(\"HAYSTACK_TELEMETRY_ENABLED\", \"True\") == \"False\"\n\n\n@pytest.mark.usefixtures(\"clean_artifacts_for_telemetry\")\n@skip_when_haystack_not_installed\ndef test_disable_telemetry_import_haystack_after_kotaemon():\n    \"\"\"Test that telemetry is disabled when kotaemon lib is initiated before\"\"\"\n    import os\n\n    import haystack.telemetry\n\n    import kotaemon  # noqa: F401\n\n    assert haystack.telemetry.telemetry is None\n    assert os.environ.get(\"HAYSTACK_TELEMETRY_ENABLED\", \"True\") == \"False\"\n"
  },
  {
    "path": "libs/kotaemon/tests/test_template.py",
    "content": "import pytest\n\nfrom kotaemon.llms import PromptTemplate\n\n\ndef test_prompt_template_creation():\n    # Ensure the PromptTemplate object is created correctly\n    template_string = \"This is a template\"\n    template = PromptTemplate(template_string)\n    assert template.template == template_string\n\n    template_string = \"Hello, {name}! Today is {day}.\"\n    template = PromptTemplate(template_string)\n    assert template.template == template_string\n    assert template.placeholders == {\"name\", \"day\"}\n\n\ndef test_prompt_template_creation_invalid_placeholder():\n    # Ensure the PromptTemplate object handle invalid placeholder correctly\n    template_string = \"Hello, {name}! Today is {0day}.\"\n\n    with pytest.raises(ValueError):\n        PromptTemplate(template_string, ignore_invalid=False)\n\n    with pytest.warns(\n        UserWarning,\n        match=\"Ignore invalid placeholder: 0day.\",\n    ):\n        PromptTemplate(template_string, ignore_invalid=True)\n\n\ndef test_prompt_template_addition():\n    # Ensure the __add__ method concatenates the templates correctly\n    template1 = PromptTemplate(\"Hello, \")\n    template2 = PromptTemplate(\"world!\")\n    result = template1 + template2\n    assert result.template == \"Hello, \\nworld!\"\n\n    template1 = PromptTemplate(\"Hello, {name}!\")\n    template2 = PromptTemplate(\"Today is {day}.\")\n    result = template1 + template2\n    assert result.template == \"Hello, {name}!\\nToday is {day}.\"\n\n\ndef test_prompt_template_extract_placeholders():\n    # Ensure the PromptTemplate correctly extracts placeholders\n    template_string = \"Hello, {name}! Today is {day}.\"\n    result = PromptTemplate(template_string).placeholders\n    assert result == {\"name\", \"day\"}\n\n\ndef test_prompt_template_populate():\n    # Ensure the populate method populates the template correctly\n    template_string = \"Hello, {name}! Today is {day}.\"\n    template = PromptTemplate(template_string)\n    result = template.populate(name=\"John\", day=\"Monday\")\n    assert result == \"Hello, John! Today is Monday.\"\n\n\ndef test_prompt_template_check_missing_kwargs():\n    # Ensure the check_missing_kwargs and populate methods raise an exception for\n    # missing placeholders\n    template_string = \"Hello, {name}! Today is {day}.\"\n    template = PromptTemplate(template_string)\n    kwargs = dict(name=\"John\")\n\n    with pytest.raises(ValueError):\n        template.check_missing_kwargs(**kwargs)\n\n    with pytest.raises(ValueError):\n        template.populate(**kwargs)\n\n\ndef test_prompt_template_check_redundant_kwargs():\n    # Ensure the check_redundant_kwargs, partial_populate and populate methods warn for\n    # redundant placeholders\n    template_string = \"Hello, {name}! Today is {day}.\"\n    template = PromptTemplate(template_string)\n    kwargs = dict(name=\"John\", day=\"Monday\", age=\"30\")\n\n    with pytest.warns(UserWarning, match=\"Keys provided but not in template: age\"):\n        template.check_redundant_kwargs(**kwargs)\n\n    with pytest.warns(UserWarning, match=\"Keys provided but not in template: age\"):\n        template.partial_populate(**kwargs)\n\n    with pytest.warns(UserWarning, match=\"Keys provided but not in template: age\"):\n        template.populate(**kwargs)\n\n\ndef test_prompt_template_populate_complex_template():\n    # Ensure the populate method produces the same results as the built-in str.format\n    # function\n    template_string = (\n        \"a = {a:.2f}, b = {b}, c = {c:.1%}, d = {d:#.0g}, ascii of {e} = {e!a:>2}\"\n    )\n    template = PromptTemplate(template_string)\n    kwargs = dict(a=1, b=\"two\", c=3, d=4, e=\"á\")\n    populated = template.populate(**kwargs)\n    expected = template_string.format(**kwargs)\n    assert populated == expected\n\n\ndef test_prompt_template_partial_populate():\n    # Ensure the partial_populate method populates correctly\n    template_string = (\n        \"a = {a:.2f}, b = {b}, c = {c:.1%}, d = {d:#.0g}, ascii of {e} = {e!a:>2}\"\n    )\n    template = PromptTemplate(template_string)\n    kwargs = dict(a=1, b=\"two\", d=4, e=\"á\")\n    populated = template.partial_populate(**kwargs)\n    expected = \"a = 1.00, b = two, c = {c:.1%}, d = 4., ascii of á = '\\\\xe1'\"\n    assert populated == expected\n"
  },
  {
    "path": "libs/kotaemon/tests/test_tools.py",
    "content": "import json\nfrom pathlib import Path\nfrom unittest.mock import patch\n\nfrom openai.types.create_embedding_response import CreateEmbeddingResponse\n\nfrom kotaemon.agents.tools import ComponentTool, GoogleSearchTool, WikipediaTool\nfrom kotaemon.base import Document\nfrom kotaemon.embeddings import AzureOpenAIEmbeddings\nfrom kotaemon.indices.vectorindex import VectorIndexing, VectorRetrieval\nfrom kotaemon.storages import ChromaVectorStore, InMemoryDocumentStore\n\nwith open(Path(__file__).parent / \"resources\" / \"embedding_openai.json\") as f:\n    openai_embedding = CreateEmbeddingResponse.model_validate(json.load(f))\n\n\ndef test_google_tool(mock_google_search):\n    tool = GoogleSearchTool()\n    assert tool.name\n    assert tool.description\n    output = tool(\"What is Cinnamon AI\")\n    assert output\n\n\ndef test_wikipedia_tool():\n    tool = WikipediaTool()\n    assert tool.name\n    assert tool.description\n    output = tool(\"Cinnamon\")\n    assert output\n\n\n@patch(\n    \"openai.resources.embeddings.Embeddings.create\",\n    side_effect=lambda *args, **kwargs: openai_embedding,\n)\ndef test_pipeline_tool(tmp_path):\n    db = ChromaVectorStore(path=str(tmp_path))\n    doc_store = InMemoryDocumentStore()\n    embedding = AzureOpenAIEmbeddings(\n        azure_deployment=\"embedding-deployment\",\n        azure_endpoint=\"https://test.openai.azure.com/\",\n        api_key=\"some-key\",\n        api_version=\"version\",\n    )\n\n    index_pipeline = VectorIndexing(\n        vector_store=db, embedding=embedding, doc_store=doc_store\n    )\n    retrieval_pipeline = VectorRetrieval(\n        vector_store=db, doc_store=doc_store, embedding=embedding\n    )\n\n    index_tool = ComponentTool(\n        name=\"index_document\",\n        description=\"A tool to use to index a document to be searched later\",\n        component=index_pipeline,\n    )\n    output = index_tool({\"text\": Document(text=\"Cinnamon AI\")})\n\n    retrieval_tool = ComponentTool(\n        name=\"search_document\",\n        description=\"A tool to use to search a document in a vectorstore\",\n        component=retrieval_pipeline,\n    )\n    output = retrieval_tool(\"Cinnamon AI\")\n    assert output\n"
  },
  {
    "path": "libs/kotaemon/tests/test_vectorstore.py",
    "content": "import json\nimport os\n\nimport pytest\n\nfrom kotaemon.base import DocumentWithEmbedding\nfrom kotaemon.storages import (\n    ChromaVectorStore,\n    InMemoryVectorStore,\n    MilvusVectorStore,\n    QdrantVectorStore,\n    SimpleFileVectorStore,\n)\n\n\nclass TestChromaVectorStore:\n    def test_add(self, tmp_path):\n        \"\"\"Test that the DB add correctly\"\"\"\n        db = ChromaVectorStore(path=str(tmp_path))\n\n        embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]\n        metadatas = [{\"a\": 1, \"b\": 2}, {\"a\": 3, \"b\": 4}]\n        ids = [\"1\", \"2\"]\n\n        assert db._collection.count() == 0, \"Expected empty collection\"\n        output = db.add(embeddings=embeddings, metadatas=metadatas, ids=ids)\n        assert output == ids, \"Expected output to be the same as ids\"\n        assert db._collection.count() == 2, \"Expected 2 added entries\"\n\n    def test_add_from_docs(self, tmp_path):\n        db = ChromaVectorStore(path=str(tmp_path))\n\n        embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]\n        metadatas = [{\"a\": 1, \"b\": 2}, {\"a\": 3, \"b\": 4}]\n        documents = [\n            DocumentWithEmbedding(embedding=embedding, metadata=metadata)\n            for embedding, metadata in zip(embeddings, metadatas)\n        ]\n        assert db._collection.count() == 0, \"Expected empty collection\"\n        output = db.add(documents)\n        assert len(output) == 2, \"Expected outputting 2 ids\"\n        assert db._collection.count() == 2, \"Expected 2 added entries\"\n\n    def test_delete(self, tmp_path):\n        db = ChromaVectorStore(path=str(tmp_path))\n\n        embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]]\n        metadatas = [{\"a\": 1, \"b\": 2}, {\"a\": 3, \"b\": 4}, {\"a\": 5, \"b\": 6}]\n        ids = [\"a\", \"b\", \"c\"]\n\n        db.add(embeddings=embeddings, metadatas=metadatas, ids=ids)\n        assert db._collection.count() == 3, \"Expected 3 added entries\"\n        db.delete(ids=[\"a\", \"b\"])\n        assert db._collection.count() == 1, \"Expected 1 remaining entry\"\n        db.delete(ids=[\"c\"])\n        assert db._collection.count() == 0, \"Expected 0 remaining entry\"\n\n    def test_query(self, tmp_path):\n        db = ChromaVectorStore(path=str(tmp_path))\n\n        embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]]\n        metadatas = [{\"a\": 1, \"b\": 2}, {\"a\": 3, \"b\": 4}, {\"a\": 5, \"b\": 6}]\n        ids = [\"a\", \"b\", \"c\"]\n\n        db.add(embeddings=embeddings, metadatas=metadatas, ids=ids)\n\n        _, sim, out_ids = db.query(embedding=[0.1, 0.2, 0.3], top_k=1)\n        assert sim[0] - 1.0 < 1e-6\n        assert out_ids == [\"a\"]\n\n        _, _, out_ids = db.query(embedding=[0.42, 0.52, 0.53], top_k=1)\n        assert out_ids == [\"b\"]\n\n    def test_save_load_delete(self, tmp_path):\n        \"\"\"Test that save/load func behave correctly.\"\"\"\n        embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]]\n        metadatas = [{\"a\": 1, \"b\": 2}, {\"a\": 3, \"b\": 4}, {\"a\": 5, \"b\": 6}]\n        ids = [\"1\", \"2\", \"3\"]\n        db = ChromaVectorStore(path=str(tmp_path))\n        db.add(embeddings=embeddings, metadatas=metadatas, ids=ids)\n\n        db2 = ChromaVectorStore(path=str(tmp_path))\n        assert (\n            db2._collection.count() == 3\n        ), \"load function does not load data completely\"\n\n        # test delete collection function\n        db2.drop()\n        # reinit the chroma with the same collection name\n        db2 = ChromaVectorStore(path=str(tmp_path))\n        assert (\n            db2._collection.count() == 0\n        ), \"delete collection function does not work correctly\"\n\n\nclass TestInMemoryVectorStore:\n    def test_add(self):\n        \"\"\"Test that add func adds correctly.\"\"\"\n\n        embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]\n        metadatas = [{\"a\": 1, \"b\": 2}, {\"a\": 3, \"b\": 4}]\n        ids = [\"1\", \"2\"]\n        db = InMemoryVectorStore()\n\n        output = db.add(embeddings=embeddings, metadatas=metadatas, ids=ids)\n        assert output == ids, \"Excepted output to be the same as ids\"\n\n    def test_save_load_delete(self, tmp_path):\n        \"\"\"Test that delete func deletes correctly.\"\"\"\n        embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]]\n        metadatas = [{\"a\": 1, \"b\": 2}, {\"a\": 3, \"b\": 4}, {\"a\": 5, \"b\": 6}]\n        ids = [\"1\", \"2\", \"3\"]\n        db = InMemoryVectorStore()\n        db.add(embeddings=embeddings, metadatas=metadatas, ids=ids)\n        db.delete([\"3\"])\n        db.save(save_path=tmp_path / \"test_save_load_delete.json\")\n        with open(tmp_path / \"test_save_load_delete.json\") as f:\n            data = json.load(f)\n        assert (\n            \"1\" and \"2\" in data[\"text_id_to_ref_doc_id\"]\n        ), \"save function does not save data completely\"\n        assert (\n            \"3\" not in data[\"text_id_to_ref_doc_id\"]\n        ), \"delete function does not delete data completely\"\n        db2 = InMemoryVectorStore()\n        db2.load(load_path=tmp_path / \"test_save_load_delete.json\")\n        assert db2.get(\"2\") == [\n            0.4,\n            0.5,\n            0.6,\n        ], \"load function does not load data completely\"\n\n\nclass TestSimpleFileVectorStore:\n    def test_add_delete(self, tmp_path):\n        \"\"\"Test that delete func deletes correctly.\"\"\"\n        embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]]\n        metadatas = [{\"a\": 1, \"b\": 2}, {\"a\": 3, \"b\": 4}, {\"a\": 5, \"b\": 6}]\n        ids = [\"1\", \"2\", \"3\"]\n        collection_name = \"test_save_load_delete\"\n        db = SimpleFileVectorStore(path=tmp_path, collection_name=collection_name)\n        db.add(embeddings=embeddings, metadatas=metadatas, ids=ids)\n        db.delete([\"3\"])\n        with open(tmp_path / collection_name) as f:\n            data = json.load(f)\n        assert (\n            \"1\" and \"2\" in data[\"text_id_to_ref_doc_id\"]\n        ), \"save function does not save data completely\"\n        assert (\n            \"3\" not in data[\"text_id_to_ref_doc_id\"]\n        ), \"delete function does not delete data completely\"\n        db2 = SimpleFileVectorStore(path=tmp_path, collection_name=collection_name)\n        assert db2.get(\"2\") == [\n            0.4,\n            0.5,\n            0.6,\n        ], \"load function does not load data completely\"\n\n        os.remove(tmp_path / collection_name)\n\n\nclass TestMilvusVectorStore:\n    def test_add(self, tmp_path):\n        \"\"\"Test that the DB add correctly\"\"\"\n        db = MilvusVectorStore(\n            path=str(tmp_path),\n            overwrite=True,\n        )\n\n        embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]\n        metadatas = [{\"a\": 1, \"b\": 2}, {\"a\": 3, \"b\": 4}]\n        ids = [\"1\", \"2\"]\n\n        assert db.count() == 0, \"Expected empty collection\"\n        output = db.add(embeddings=embeddings, metadatas=metadatas, ids=ids)\n        assert output == ids, \"Expected output to be the same as ids\"\n        assert db.count() == 2, \"Expected 2 added entries\"\n\n    def test_add_from_docs(self, tmp_path):\n        db = MilvusVectorStore(\n            path=str(tmp_path),\n            overwrite=True,\n        )\n\n        embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]\n        metadatas = [{\"a\": 1, \"b\": 2}, {\"a\": 3, \"b\": 4}]\n        documents = [\n            DocumentWithEmbedding(embedding=embedding, metadata=metadata)\n            for embedding, metadata in zip(embeddings, metadatas)\n        ]\n        assert db.count() == 0, \"Expected empty collection\"\n        output = db.add(documents)\n        assert len(output) == 2, \"Expected outputting 2 ids\"\n        assert db.count() == 2, \"Expected 2 added entries\"\n\n    def test_delete(self, tmp_path):\n        db = MilvusVectorStore(\n            path=str(tmp_path),\n            overwrite=True,\n        )\n\n        embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]]\n        metadatas = [{\"a\": 1, \"b\": 2}, {\"a\": 3, \"b\": 4}, {\"a\": 5, \"b\": 6}]\n        ids = [\"a\", \"b\", \"c\"]\n\n        db.add(embeddings=embeddings, metadatas=metadatas, ids=ids)\n        assert db.count() == 3, \"Expected 3 added entries\"\n        db.delete(ids=[\"a\", \"b\"])\n        assert db.count() == 1, \"Expected 1 remaining entry\"\n        db.delete(ids=[\"c\"])\n        assert db.count() == 0, \"Expected 0 remaining entry\"\n\n    def test_query(self, tmp_path):\n        db = MilvusVectorStore(path=str(tmp_path), overwrite=True)\n        import numpy as np\n\n        embeddings = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]])\n        norms = np.linalg.norm(embeddings, axis=1)\n        normalized_embeddings = (embeddings / norms[:, np.newaxis]).tolist()\n\n        metadatas = [{\"a\": 1, \"b\": 2}, {\"a\": 3, \"b\": 4}, {\"a\": 5, \"b\": 6}]\n        ids = [\"a\", \"b\", \"c\"]\n\n        db.add(embeddings=normalized_embeddings, metadatas=metadatas, ids=ids)\n\n        _, sim, out_ids = db.query(embedding=normalized_embeddings[0], top_k=1)\n        assert sim[0] - 1.0 < 1e-6\n        assert out_ids == [\"a\"]\n\n        query_embedding = [\n            normalized_embeddings[1][0] + 0.02,\n            normalized_embeddings[1][1] + 0.02,\n            normalized_embeddings[1][2] + 0.02,\n        ]\n        _, _, out_ids = db.query(embedding=query_embedding, top_k=1)\n        assert out_ids == [\"b\"]\n\n    def test_save_load_delete(self, tmp_path):\n        \"\"\"Test that save/load func behave correctly.\"\"\"\n        embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]]\n        metadatas = [{\"a\": 1, \"b\": 2}, {\"a\": 3, \"b\": 4}, {\"a\": 5, \"b\": 6}]\n        ids = [\"1\", \"2\", \"3\"]\n        db = MilvusVectorStore(path=str(tmp_path), overwrite=True)\n        db.add(embeddings=embeddings, metadatas=metadatas, ids=ids)\n\n        db2 = MilvusVectorStore(path=str(tmp_path), overrides=False)\n        assert db2.count() == 3, \"load function does not load data completely\"\n\n        # test delete collection function\n        db2.drop()\n        # reinit the milvus with the same collection name\n        db2 = MilvusVectorStore(path=str(tmp_path), overwrite=False)\n        assert db2.count() == 0, \"delete collection function does not work correctly\"\n\n\nclass TestQdrantVectorStore:\n    def test_add(self):\n        from qdrant_client import QdrantClient\n\n        db = QdrantVectorStore(collection_name=\"test\", client=QdrantClient(\":memory:\"))\n\n        embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]\n        metadatas = [{\"a\": 1, \"b\": 2}, {\"a\": 3, \"b\": 4}]\n        ids = [\n            \"0f0611b3-2d9c-4818-ab69-1f1c4cf66693\",\n            \"90aba5d3-f4f8-47c6-bad9-5ea457442e07\",\n        ]\n\n        output = db.add(embeddings=embeddings, metadatas=metadatas, ids=ids)\n        assert output == ids, \"Expected output to be the same as ids\"\n        assert db.count() == 2, \"Expected 2 added entries\"\n\n    def test_add_from_docs(self, tmp_path):\n        from qdrant_client import QdrantClient\n\n        db = QdrantVectorStore(collection_name=\"test\", client=QdrantClient(\":memory:\"))\n\n        embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]\n        metadatas = [{\"a\": 1, \"b\": 2}, {\"a\": 3, \"b\": 4}]\n        documents = [\n            DocumentWithEmbedding(embedding=embedding, metadata=metadata)\n            for embedding, metadata in zip(embeddings, metadatas)\n        ]\n\n        output = db.add(documents)\n        assert len(output) == 2, \"Expected outputting 2 ids\"\n        assert db.count() == 2, \"Expected 2 added entries\"\n\n    def test_delete(self, tmp_path):\n        from qdrant_client import QdrantClient\n\n        db = QdrantVectorStore(collection_name=\"test\", client=QdrantClient(\":memory:\"))\n\n        embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]]\n        metadatas = [{\"a\": 1, \"b\": 2}, {\"a\": 3, \"b\": 4}, {\"a\": 5, \"b\": 6}]\n        ids = [\n            \"0f0611b3-2d9c-4818-ab69-1f1c4cf66693\",\n            \"90aba5d3-f4f8-47c6-bad9-5ea457442e07\",\n            \"6bed07c3-d284-47a3-a711-c3f9186755b8\",\n        ]\n\n        db.add(embeddings=embeddings, metadatas=metadatas, ids=ids)\n        assert db.count() == 3, \"Expected 3 added entries\"\n        db.delete(\n            ids=[\n                \"0f0611b3-2d9c-4818-ab69-1f1c4cf66693\",\n                \"90aba5d3-f4f8-47c6-bad9-5ea457442e07\",\n            ]\n        )\n        assert db.count() == 1, \"Expected 1 remaining entry\"\n        db.delete(ids=[\"6bed07c3-d284-47a3-a711-c3f9186755b8\"])\n        assert db.count() == 0, \"Expected 0 remaining entry\"\n\n    def test_query(self, tmp_path):\n        from qdrant_client import QdrantClient\n\n        db = QdrantVectorStore(collection_name=\"test\", client=QdrantClient(\":memory:\"))\n\n        embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]]\n        metadatas = [{\"a\": 1, \"b\": 2}, {\"a\": 3, \"b\": 4}, {\"a\": 5, \"b\": 6}]\n        ids = [\n            \"0f0611b3-2d9c-4818-ab69-1f1c4cf66693\",\n            \"90aba5d3-f4f8-47c6-bad9-5ea457442e07\",\n            \"6bed07c3-d284-47a3-a711-c3f9186755b8\",\n        ]\n\n        db.add(embeddings=embeddings, metadatas=metadatas, ids=ids)\n\n        _, sim, out_ids = db.query(embedding=[0.1, 0.2, 0.3], top_k=1)\n        assert sim[0] - 1.0 < 1e-6\n        assert out_ids == [\"0f0611b3-2d9c-4818-ab69-1f1c4cf66693\"]\n\n        _, _, out_ids = db.query(embedding=[0.4, 0.5, 0.6], top_k=1)\n        assert out_ids == [\"90aba5d3-f4f8-47c6-bad9-5ea457442e07\"]\n\n    def test_save_load_delete(self, tmp_path):\n        \"\"\"Test that save/load func behave correctly.\"\"\"\n        embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]]\n        metadatas = [{\"a\": 1, \"b\": 2}, {\"a\": 3, \"b\": 4}, {\"a\": 5, \"b\": 6}]\n        ids = [\n            \"0f0611b3-2d9c-4818-ab69-1f1c4cf66693\",\n            \"90aba5d3-f4f8-47c6-bad9-5ea457442e07\",\n            \"6bed07c3-d284-47a3-a711-c3f9186755b8\",\n        ]\n        from qdrant_client import QdrantClient\n\n        db = QdrantVectorStore(\n            collection_name=\"test\", client=QdrantClient(path=tmp_path)\n        )\n        db.add(embeddings=embeddings, metadatas=metadatas, ids=ids)\n        del db\n\n        db2 = QdrantVectorStore(\n            collection_name=\"test\", client=QdrantClient(path=tmp_path)\n        )\n        assert db2.count() == 3\n\n        db2.drop()\n        del db2\n\n        db2 = QdrantVectorStore(\n            collection_name=\"test\", client=QdrantClient(path=tmp_path)\n        )\n\n        with pytest.raises(Exception):\n            # Since no docs were added, the collection should not exist yet\n            # and thus the count function should raise an exception\n            db2.count()\n"
  },
  {
    "path": "libs/ktem/.gitignore",
    "content": "14-1_抜粋-1.pdf\n_example_.db\nktem/assets/prebuilt/\n"
  },
  {
    "path": "libs/ktem/MANIFEST.in",
    "content": "include ktem/assets/css/*.css\ninclude ktem/assets/img/*.svg\ninclude ktem/assets/js/*.js\ninclude ktem/assets/md/*.md\n"
  },
  {
    "path": "libs/ktem/alembic.ini",
    "content": "# A generic, single database configuration.\n\n[alembic]\n# path to migration scripts\nscript_location = migrations\n\n# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s\n# Uncomment the line below if you want the files to be prepended with date and time\n# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file\n# for all available tokens\n# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s\n\n# sys.path path, will be prepended to sys.path if present.\n# defaults to the current working directory.\nprepend_sys_path = .\n\n# timezone to use when rendering the date within the migration file\n# as well as the filename.\n# If specified, requires the python>=3.9 or backports.zoneinfo library.\n# Any required deps can installed by adding `alembic[tz]` to the pip requirements\n# string value is passed to ZoneInfo()\n# leave blank for localtime\n# timezone =\n\n# max length of characters to apply to the\n# \"slug\" field\n# truncate_slug_length = 40\n\n# set to 'true' to run the environment during\n# the 'revision' command, regardless of autogenerate\n# revision_environment = false\n\n# set to 'true' to allow .pyc and .pyo files without\n# a source .py file to be detected as revisions in the\n# versions/ directory\n# sourceless = false\n\n# version location specification; This defaults\n# to migrations/versions.  When using multiple version\n# directories, initial revisions must be specified with --version-path.\n# The path separator used here should be the separator specified by \"version_path_separator\" below.\n# version_locations = %(here)s/bar:%(here)s/bat:migrations/versions\n\n# version path separator; As mentioned above, this is the character used to split\n# version_locations. The default within new alembic.ini files is \"os\", which uses os.pathsep.\n# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.\n# Valid values for version_path_separator are:\n#\n# version_path_separator = :\n# version_path_separator = ;\n# version_path_separator = space\nversion_path_separator = os  # Use os.pathsep. Default configuration used for new projects.\n\n# set to 'true' to search source files recursively\n# in each \"version_locations\" directory\n# new in Alembic version 1.10\n# recursive_version_locations = false\n\n# the output encoding used when revision files\n# are written from script.py.mako\n# output_encoding = utf-8\n\nsqlalchemy.url = driver://user:pass@localhost/dbname\n\n\n[post_write_hooks]\n# post_write_hooks defines scripts or Python functions that are run\n# on newly generated revision scripts.  See the documentation for further\n# detail and examples\n\n# format using \"black\" - use the console_scripts runner, against the \"black\" entrypoint\n# hooks = black\n# black.type = console_scripts\n# black.entrypoint = black\n# black.options = -l 79 REVISION_SCRIPT_FILENAME\n\n# lint with attempts to fix using \"ruff\" - use the exec runner, execute a binary\n# hooks = ruff\n# ruff.type = exec\n# ruff.executable = %(here)s/.venv/bin/ruff\n# ruff.options = --fix REVISION_SCRIPT_FILENAME\n\n# Logging configuration\n[loggers]\nkeys = root,sqlalchemy,alembic\n\n[handlers]\nkeys = console\n\n[formatters]\nkeys = generic\n\n[logger_root]\nlevel = WARN\nhandlers = console\nqualname =\n\n[logger_sqlalchemy]\nlevel = WARN\nhandlers =\nqualname = sqlalchemy.engine\n\n[logger_alembic]\nlevel = INFO\nhandlers =\nqualname = alembic\n\n[handler_console]\nclass = StreamHandler\nargs = (sys.stderr,)\nlevel = NOTSET\nformatter = generic\n\n[formatter_generic]\nformat = %(levelname)-5.5s [%(name)s] %(message)s\ndatefmt = %H:%M:%S\n"
  },
  {
    "path": "libs/ktem/ktem/__init__.py",
    "content": ""
  },
  {
    "path": "libs/ktem/ktem/app.py",
    "content": "import os\nfrom pathlib import Path\nfrom typing import Optional\n\nimport gradio as gr\nimport pluggy\nfrom ktem import extension_protocol\nfrom ktem.assets import PDFJS_PREBUILT_DIR, KotaemonTheme\nfrom ktem.components import reasonings\nfrom ktem.exceptions import HookAlreadyDeclared, HookNotDeclared\nfrom ktem.index import IndexManager\nfrom ktem.settings import BaseSettingGroup, SettingGroup, SettingReasoningGroup\nfrom theflow.settings import settings\nfrom theflow.utils.modules import import_dotted_string\n\nBASE_PATH = os.environ.get(\"GR_FILE_ROOT_PATH\", \"\")\n\n\nclass BaseApp:\n    \"\"\"The main app of Kotaemon\n\n    The main application contains app-level information:\n        - setting state\n        - dynamic conversation state\n        - user id\n\n    Also contains registering methods for:\n        - reasoning pipelines\n        - indexing & retrieval pipelines\n\n    App life-cycle:\n        - Render\n        - Declare public events\n        - Subscribe public events\n        - Register events\n    \"\"\"\n\n    public_events: list[str] = []\n\n    def __init__(self):\n        self.dev_mode = getattr(settings, \"KH_MODE\", \"\") == \"dev\"\n        self.app_name = getattr(settings, \"KH_APP_NAME\", \"Kotaemon\")\n        self.app_version = getattr(settings, \"KH_APP_VERSION\", \"\")\n        self.f_user_management = getattr(settings, \"KH_FEATURE_USER_MANAGEMENT\", False)\n        self._theme = KotaemonTheme()\n\n        dir_assets = Path(__file__).parent / \"assets\"\n        with (dir_assets / \"css\" / \"main.css\").open() as fi:\n            self._css = fi.read()\n        with (dir_assets / \"js\" / \"main.js\").open() as fi:\n            self._js = fi.read()\n            self._js = self._js.replace(\"KH_APP_VERSION\", self.app_version)\n        with (dir_assets / \"js\" / \"pdf_viewer.js\").open(encoding=\"utf-8\") as fi:\n            self._pdf_view_js = fi.read()\n            # workaround for Windows path\n            pdf_js_dist_dir = str(PDFJS_PREBUILT_DIR).replace(\"\\\\\", \"\\\\\\\\\")\n            self._pdf_view_js = self._pdf_view_js.replace(\n                \"PDFJS_PREBUILT_DIR\",\n                pdf_js_dist_dir,\n            ).replace(\"GR_FILE_ROOT_PATH\", BASE_PATH)\n        with (dir_assets / \"js\" / \"svg-pan-zoom.min.js\").open() as fi:\n            self._svg_js = fi.read()\n\n        self._favicon = str(dir_assets / \"img\" / \"favicon.svg\")\n\n        self.default_settings = SettingGroup(\n            application=BaseSettingGroup(settings=settings.SETTINGS_APP),\n            reasoning=SettingReasoningGroup(settings=settings.SETTINGS_REASONING),\n        )\n\n        self._callbacks: dict[str, list] = {}\n        self._events: dict[str, list] = {}\n\n        self.register_extensions()\n        self.register_reasonings()\n        self.initialize_indices()\n\n        self.default_settings.reasoning.finalize()\n        self.default_settings.index.finalize()\n        self.settings_state = gr.State(self.default_settings.flatten())\n\n        self.user_id = gr.State(\"default\" if not self.f_user_management else None)\n\n    def initialize_indices(self):\n        \"\"\"Create the index manager, start indices, and register to app settings\"\"\"\n        self.index_manager = IndexManager(self)\n        self.index_manager.on_application_startup()\n\n        for index in self.index_manager.indices:\n            options = index.get_user_settings()\n            self.default_settings.index.options[index.id] = BaseSettingGroup(\n                settings=options\n            )\n\n    def register_reasonings(self):\n        \"\"\"Register the reasoning components from app settings\"\"\"\n        if getattr(settings, \"KH_REASONINGS\", None) is None:\n            return\n\n        for value in settings.KH_REASONINGS:\n            reasoning_cls = import_dotted_string(value, safe=False)\n            rid = reasoning_cls.get_info()[\"id\"]\n            reasonings[rid] = reasoning_cls\n            options = reasoning_cls().get_user_settings()\n            self.default_settings.reasoning.options[rid] = BaseSettingGroup(\n                settings=options\n            )\n\n    def register_extensions(self):\n        \"\"\"Register installed extensions\"\"\"\n        self.exman = pluggy.PluginManager(\"ktem\")\n        self.exman.add_hookspecs(extension_protocol)\n        self.exman.load_setuptools_entrypoints(\"ktem\")\n\n        # retrieve and register extension declarations\n        extension_declarations = self.exman.hook.ktem_declare_extensions()\n        for extension_declaration in extension_declarations:\n            # if already in database, with the same version: skip\n\n            # otherwise,\n            # remove the old information from the database if it exists\n            # store the information into the database\n\n            functionality = extension_declaration[\"functionality\"]\n\n            # update the reasoning information\n            if \"reasoning\" in functionality:\n                for rid, rdec in functionality[\"reasoning\"].items():\n                    unique_rid = f\"{extension_declaration['id']}/{rid}\"\n                    self.default_settings.reasoning.options[\n                        unique_rid\n                    ] = BaseSettingGroup(\n                        settings=rdec[\"settings\"],\n                    )\n\n    def declare_event(self, name: str):\n        \"\"\"Declare a public gradio event for other components to subscribe to\n\n        Args:\n            name: The name of the event\n        \"\"\"\n        if name in self._events:\n            raise HookAlreadyDeclared(f\"Hook {name} is already declared\")\n        self._events[name] = []\n\n    def subscribe_event(self, name: str, definition: dict):\n        \"\"\"Register a hook for the app\n\n        Args:\n            name: The name of the hook\n            hook: The hook to be registered\n        \"\"\"\n        if name not in self._events:\n            raise HookNotDeclared(f\"Hook {name} is not declared\")\n        self._events[name].append(definition)\n\n    def get_event(self, name) -> list[dict]:\n        if name not in self._events:\n            raise HookNotDeclared(f\"Hook {name} is not declared\")\n\n        return self._events[name]\n\n    def ui(self):\n        raise NotImplementedError\n\n    def on_subscribe_public_events(self):\n        \"\"\"Subscribe to the declared public event of the app\"\"\"\n\n    def on_register_events(self):\n        \"\"\"Register all events to the app\"\"\"\n\n    def _on_app_created(self):\n        \"\"\"Called when the app is created\"\"\"\n\n    def make(self):\n        markmap_js = \"\"\"\n        <script>\n            window.markmap = {\n                /** @type AutoLoaderOptions */\n                autoLoader: {\n                    toolbar: true, // Enable toolbar\n                },\n            };\n        </script>\n        \"\"\"\n        external_js = (\n            \"<script type='module' \"\n            \"src='https://cdn.skypack.dev/pdfjs-viewer-element'>\"\n            \"</script>\"\n            \"<script type='module' \"\n            \"src='https://cdnjs.cloudflare.com/ajax/libs/tributejs/5.1.3/tribute.min.js'>\"  # noqa\n            f\"{markmap_js}\"\n            \"<script src='https://cdn.jsdelivr.net/npm/markmap-autoloader@0.16'></script>\"  # noqa\n            \"<script src='https://cdn.jsdelivr.net/npm/minisearch@7.1.1/dist/umd/index.min.js'></script>\"  # noqa\n            \"</script>\"\n            \"<link rel='stylesheet' href='https://cdnjs.cloudflare.com/ajax/libs/tributejs/5.1.3/tribute.css'/>\"  # noqa\n        )\n\n        with gr.Blocks(\n            theme=self._theme,\n            css=self._css,\n            title=self.app_name,\n            analytics_enabled=False,\n            js=self._js,\n            head=external_js,\n        ) as demo:\n            self.app = demo\n            self.settings_state.render()\n            self.user_id.render()\n\n            self.ui()\n\n            self.declare_public_events()\n            self.subscribe_public_events()\n            self.register_events()\n            self.on_app_created()\n\n            demo.load(None, None, None, js=self._pdf_view_js)\n\n        return demo\n\n    def declare_public_events(self):\n        \"\"\"Declare an event for the app\"\"\"\n        for event in self.public_events:\n            self.declare_event(event)\n\n        for value in self.__dict__.values():\n            if isinstance(value, BasePage):\n                value.declare_public_events()\n\n    def subscribe_public_events(self):\n        \"\"\"Subscribe to an event\"\"\"\n        self.on_subscribe_public_events()\n        for value in self.__dict__.values():\n            if isinstance(value, BasePage):\n                value.subscribe_public_events()\n\n    def register_events(self):\n        \"\"\"Register all events\"\"\"\n        self.on_register_events()\n        for value in self.__dict__.values():\n            if isinstance(value, BasePage):\n                value.register_events()\n\n    def on_app_created(self):\n        \"\"\"Execute on app created callbacks\"\"\"\n        self._on_app_created()\n        for value in self.__dict__.values():\n            if isinstance(value, BasePage):\n                value.on_app_created()\n\n\nclass BasePage:\n    \"\"\"The logic of the Kotaemon app\"\"\"\n\n    public_events: list[str] = []\n\n    def __init__(self, app):\n        self._app = app\n\n    def on_building_ui(self):\n        \"\"\"Build the UI of the app\"\"\"\n\n    def on_subscribe_public_events(self):\n        \"\"\"Subscribe to the declared public event of the app\"\"\"\n\n    def on_register_events(self):\n        \"\"\"Register all events to the app\"\"\"\n\n    def _on_app_created(self):\n        \"\"\"Called when the app is created\"\"\"\n\n    def as_gradio_component(\n        self,\n    ) -> Optional[gr.components.Component | list[gr.components.Component]]:\n        \"\"\"Return the gradio components responsible for events\n\n        Note: in ideal scenario, this method shouldn't be necessary.\n        \"\"\"\n        return None\n\n    def render(self):\n        for value in self.__dict__.values():\n            if isinstance(value, gr.blocks.Block):\n                value.render()\n            if isinstance(value, BasePage):\n                value.render()\n\n    def unrender(self):\n        for value in self.__dict__.values():\n            if isinstance(value, gr.blocks.Block):\n                value.unrender()\n            if isinstance(value, BasePage):\n                value.unrender()\n\n    def declare_public_events(self):\n        \"\"\"Declare an event for the app\"\"\"\n        for event in self.public_events:\n            self._app.declare_event(event)\n\n        for value in self.__dict__.values():\n            if isinstance(value, BasePage):\n                value.declare_public_events()\n\n    def subscribe_public_events(self):\n        \"\"\"Subscribe to an event\"\"\"\n        self.on_subscribe_public_events()\n        for value in self.__dict__.values():\n            if isinstance(value, BasePage):\n                value.subscribe_public_events()\n\n    def register_events(self):\n        \"\"\"Register all events\"\"\"\n        self.on_register_events()\n        for value in self.__dict__.values():\n            if isinstance(value, BasePage):\n                value.register_events()\n\n    def on_app_created(self):\n        \"\"\"Execute on app created callbacks\"\"\"\n        self._on_app_created()\n        for value in self.__dict__.values():\n            if isinstance(value, BasePage):\n                value.on_app_created()\n"
  },
  {
    "path": "libs/ktem/ktem/assets/__init__.py",
    "content": "from pathlib import Path\r\n\r\nfrom decouple import config\r\n\r\nfrom .theme import Kotaemon as KotaemonTheme\r\n\r\nPDFJS_VERSION_DIST: str = config(\"PDFJS_VERSION_DIST\", \"pdfjs-4.0.379-dist\")\r\nPDFJS_PREBUILT_DIR: Path = config(\r\n    \"PDFJS_PREBUILT_DIR\", Path(__file__).parent / \"prebuilt\" / PDFJS_VERSION_DIST\r\n)\r\n\r\n__all__ = [\"KotaemonTheme\", \"PDFJS_VERSION_DIST\", \"PDFJS_PREBUILT_DIR\"]\r\n"
  },
  {
    "path": "libs/ktem/ktem/assets/css/main.css",
    "content": ":root {\n  --main-area-height: calc(100vh - 110px);\n}\n\n/* no footer */\nfooter {\n  display: none !important;\n}\n\n/* customize scrollbar */\n::-webkit-scrollbar {\n  background: var(--background-fill-primary);\n}\n::-webkit-scrollbar-thumb {\n  background-color: var(--border-color-primary);\n  border: 4px solid transparent;\n  border-radius: 100px;\n  background-clip: content-box;\n}\n::-webkit-scrollbar-corner {\n  background: var(--background-fill-primary);\n}\n\n.gradio-container {\n  max-width: 100% !important;\n  /* overflow: scroll !important;\n  height: 100% !important; */\n}\n\n/* styling for header bar */\n.header-bar {\n  background-color: transparent;\n  margin: 0px 0px 20px;\n  overflow-x: scroll;\n  display: block !important;\n  text-wrap: nowrap;\n  border: none;\n}\n.header-bar button.selected {\n  border: none;\n\n  /* an alternative header bar style with rounded background */\n  /* background-color: var(--background-fill-primary);\n  border: 4px solid transparent;\n  border-radius: var(--radius-lg);\n  background-clip: padding-box; */\n}\n\n/* selected buttons have highlighted text */\nbutton.selected {\n  color: var(--block-label-text-color);\n  font-weight: bold;\n}\n\n.message-row.bubble.bot-row{\n  overflow-x: auto;\n}\n\n.flex-wrap.bot {\n  overflow-x: inherit;\n}\n\n#chat-tab,\n#indices-tab,\n#settings-tab,\n#help-tab,\n#resources-tab,\n#login-tab {\n  border: none !important;\n}\n\n#help-tab,\n#settings-tab {\n  /* text-dense view should not be wide for readability */\n  max-width: max(56vw, 900px) !important;\n  margin: 0 auto !important;\n}\n\n.indices-tab,\n#resources-tab {\n  /* Other view should not be too wide */\n  border: none !important;\n  max-width: max(70vw, 1200px) !important;\n  margin: 0 auto !important;\n}\n\n#main-chat-bot {\n  background: var(--background-fill-primary);\n  flex: auto;\n}\n\n#chat-area {\n  height: var(--main-area-height) !important;\n  column-gap: 2px !important;\n}\n\n#chat-info-panel {\n  max-height: var(--main-area-height) !important;\n  overflow: auto !important;\n  transition: all 0.4s;\n}\n\nbody.dark #chat-info-panel figure>img{\n  filter: invert(100%);\n}\n\n#conv-settings-panel {\n  max-height: var(--main-area-height) !important;\n  flex-wrap: unset;\n  overflow-y: scroll !important;\n  position: sticky;\n  column-gap: 2px !important;\n  scrollbar-width: none;\n  /* Firefox */\n  -ms-overflow-style: none;\n  /* Internet Explorer 10+ */\n  transition: all 0.3s;\n}\n\n#conv-settings-panel::-webkit-scrollbar {\n  /* WebKit */\n  width: 0;\n  height: 0;\n}\n\ntd {\n  /* Fix for Firefox Gradio table overflow display */\n  overflow: hidden;\n}\n\n.setting-answer-mode-description {\n  margin: 5px 5px 2px !important;\n}\n\n.message-buttons-right {\n  display: none !important;\n}\n\nmark {\n  background-color: #10b981;\n}\n\n/* clpse */\n.clpse {\n  background-color: var(--background-fill-secondary);\n  font-weight: bold;\n  cursor: pointer;\n  padding: 3px;\n  width: 100%;\n  border: none;\n  text-align: left;\n  outline: none;\n}\n\n/* for setting transparent background for elements */\n.no-background {\n  background-color: transparent;\n  border: none;\n}\n\n/* for setting bold text for elements */\n.bold-text {\n  font-weight: bold;\n}\n\n/* for setting highlighted text for elements */\n.body-text-color {\n  color: var(--body-text-color);\n}\n\n/* for setting right-aligned buttons */\n.right-button {\n  min-width: 200px !important;\n  width: fit-content;\n  padding-left: 20px;\n  padding-right: 20px;\n  margin: 0px 0px 0px auto;\n}\n\n/* for setting height limit for buttons */\n.cap-button-height {\n  max-height: 42px;\n}\n\n/* Hide sort buttons at gr.DataFrame */\n.sort-button {\n  display: none !important;\n}\n\n/* Show sort button only in File list*/\n#file_list_view .sort-button {\n  display: block !important;\n}\n\n#toggle-dark-button {\n  position: fixed;\n  top: 6px;\n  right: 30px;\n}\n\n#info-expand-button {\n  position: absolute;\n  top: 6px;\n  right: 15px;\n}\n\n/* prevent overflow of html info panel */\n#html-info-panel {\n  overflow-x: auto !important;\n}\n\n#chat-expand-button {\n  position: absolute;\n  top: 6px;\n  right: -10px;\n  z-index: 1;\n}\n\n#save-setting-btn {\n  width: 150px;\n  height: 30px;\n  min-width: 100px !important;\n}\n\n#quick-setting-labels {\n  margin-top: 5px;\n  margin-bottom: -10px;\n}\n\n#use-mindmap-checkbox {\n  position: absolute;\n  width: 110px;\n  top: 10px;\n  right: 25px;\n}\n\n#citation-dropdown {\n  width: min(25%, 100px);\n  position: absolute;\n  top: 2px;\n  left: 120px;\n  height: 35px;\n}\n\n#quick-url textarea {\n  resize: none;\n  background: transparent;\n  margin-top: 0px;\n}\n\n#quick-url textarea::placeholder {\n  text-align: center;\n}\n\n#quick-file {\n  height: 110px;\n}\n\nspan.icon {\n  color: #cecece;\n}\n\n.upload-button {\n  display: none;\n}\n\n.scrollable {\n  overflow-y: auto;\n}\n\n.fill-main-area-height {\n  max-height: var(--main-area-height);\n}\n\n.unset-overflow {\n  overflow: unset !important;\n}\n\npdfjs-viewer-element {\n  height: 100dvh;\n}\n\n/* Modal styles */\n\n.modal {\n  display: none;\n  position: relative;\n  z-index: 2;\n  left: 0;\n  top: 0;\n  width: 100%;\n  height: 85dvh;\n  overflow: hidden;\n  background-color: rgba(0, 0, 0, 0.4);\n}\n\n.modal-header {\n  padding: 0px 10px\n}\n\n.modal-content {\n  background-color: #fefefe;\n  height: 100%;\n  display: flex;\n  flex-direction: column;\n}\n\n.close {\n  color: #aaa;\n  align-self: flex-end;\n  font-size: 28px;\n  font-weight: bold;\n}\n\n.close:hover,\n.close:focus {\n  color: black;\n  text-decoration: none;\n  cursor: pointer;\n}\n\n.modal-body {\n  flex: 1;\n  overflow: hidden;\n}\n\n/* Switch checkbox styles */\n\n/* #is-public-checkbox {\n  position: relative;\n  top: 4px;\n} */\n\n#suggest-chat-checkbox {\n  position: relative;\n  top: 4px;\n}\n\n.switch input {\n  position: absolute;\n  opacity: 0;\n}\n\n.switch {\n  display: inline-block;\n  /* 1 */\n  height: 1em;\n  width: 2em;\n  background: #8f8f8f;\n  border-radius: 1em;\n  position: relative;\n  top: 2px;\n  margin-right: 1em;\n}\n\n.switch div {\n  height: 1em;\n  width: 1em;\n  border-radius: 1em;\n  background: #FFF;\n  box-shadow: 0 0.1em 0.3em rgba(0, 0, 0, 0.3);\n  -webkit-transition: all 300ms;\n  -moz-transition: all 300ms;\n  transition: all 300ms;\n}\n\n.switch input:checked+div {\n  -webkit-transform: translate3d(100%, 0, 0);\n  -moz-transform: translate3d(100%, 0, 0);\n  transform: translate3d(100%, 0, 0);\n  background: #12df9a;\n}\n\n.switch:has(> input:checked) {\n  background: #0c895f;\n}\n\n/* Bot animation */\n\n.message.bot {\n  animation: fadein 1.0s ease-in-out forwards;\n}\n\ndetails.evidence {\n  animation: fadein 0.3s ease-in-out forwards;\n}\n\n@keyframes fadein {\n  0% {\n    opacity: 0;\n  }\n\n  100% {\n    opacity: 100%;\n  }\n}\n\n.message a.citation {\n  color: #10b981;\n  text-decoration: none;\n}\n\n/* pop-up for file tag in chat input*/\n.tribute-container ul {\n  background-color: var(--background-fill-primary) !important;\n  color: var(--body-text-color) !important;\n  font-family: var(--font);\n  font-size: var(--text-md);\n}\n\n.tribute-container li.highlight {\n  background-color: var(--border-color-primary) !important;\n}\n\n/* a fix for flickering background in Gradio DataFrame */\ntbody:not(.row_odd) {\n  background: var(--table-even-background-fill);\n}\n\n#chat-suggestion {\n  max-height: 350px;\n}\n\n#chat-suggestion table {\n  overflow: hidden;\n}\n\n#chat-suggestion table thead {\n  display: none;\n}\n\n#paper-suggestion table {\n  overflow: hidden;\n}\n\nsvg.markmap {\n  width: 100%;\n  height: 100%;\n  font-family: Quicksand, sans-serif;\n  font-size: 15px;\n}\n\ndiv.markmap {\n  height: 400px;\n}\n\n#google-login {\n  max-width: 450px;\n}\n\n#user-api-key-wrapper {\n  max-width: 450px;\n}\n\n#login-row {\n  display: grid;\n  place-items: center;\n}\n"
  },
  {
    "path": "libs/ktem/ktem/assets/js/main.js",
    "content": "function run() {\n  let main_parent = document.getElementById(\"chat-tab\").parentNode;\n\n  main_parent.childNodes[0].classList.add(\"header-bar\");\n  main_parent.style = \"padding: 0; margin: 0\";\n  main_parent.parentNode.style = \"gap: 0\";\n  main_parent.parentNode.parentNode.style = \"padding: 0\";\n\n  const version_node = document.createElement(\"p\");\n  version_node.innerHTML = \"version: KH_APP_VERSION\";\n  version_node.style = \"position: fixed; top: 10px; right: 10px;\";\n  main_parent.appendChild(version_node);\n\n  // add favicon\n  const favicon = document.createElement(\"link\");\n  // set favicon attributes\n  favicon.rel = \"icon\";\n  favicon.type = \"image/svg+xml\";\n  favicon.href = \"/favicon.ico\";\n  document.head.appendChild(favicon);\n\n  // setup conversation dropdown placeholder\n  let conv_dropdown = document.querySelector(\"#conversation-dropdown input\");\n  conv_dropdown.placeholder = \"Browse conversation\";\n\n  // move info-expand-button\n  let info_expand_button = document.getElementById(\"info-expand-button\");\n  let chat_info_panel = document.getElementById(\"info-expand\");\n  chat_info_panel.insertBefore(\n    info_expand_button,\n    chat_info_panel.childNodes[2]\n  );\n\n  // move toggle-side-bar button\n  let chat_expand_button = document.getElementById(\"chat-expand-button\");\n  let chat_column = document.getElementById(\"main-chat-bot\");\n  let conv_column = document.getElementById(\"conv-settings-panel\");\n\n  // move setting close button\n  let setting_tab_nav_bar = document.querySelector(\"#settings-tab .tab-nav\");\n  let setting_close_button = document.getElementById(\"save-setting-btn\");\n  if (setting_close_button) {\n    setting_tab_nav_bar.appendChild(setting_close_button);\n  }\n\n  let default_conv_column_min_width = \"min(300px, 100%)\";\n  conv_column.style.minWidth = default_conv_column_min_width;\n\n  globalThis.toggleChatColumn = () => {\n    /* get flex-grow value of chat_column */\n    let flex_grow = conv_column.style.flexGrow;\n    if (flex_grow == \"0\") {\n      conv_column.style.flexGrow = \"1\";\n      conv_column.style.minWidth = default_conv_column_min_width;\n    } else {\n      conv_column.style.flexGrow = \"0\";\n      conv_column.style.minWidth = \"0px\";\n    }\n  };\n\n  chat_column.insertBefore(chat_expand_button, chat_column.firstChild);\n\n  // move use mind-map checkbox\n  let mindmap_checkbox = document.getElementById(\"use-mindmap-checkbox\");\n  let citation_dropdown = document.getElementById(\"citation-dropdown\");\n  let chat_setting_panel = document.getElementById(\"chat-settings-expand\");\n  chat_setting_panel.insertBefore(\n    mindmap_checkbox,\n    chat_setting_panel.childNodes[2]\n  );\n  chat_setting_panel.insertBefore(citation_dropdown, mindmap_checkbox);\n\n  // move share conv checkbox\n  let report_div = document.querySelector(\n    \"#report-accordion > div:nth-child(3) > div:nth-child(1)\"\n  );\n  let share_conv_checkbox = document.getElementById(\"is-public-checkbox\");\n  if (share_conv_checkbox) {\n    report_div.insertBefore(share_conv_checkbox, report_div.querySelector(\"button\"));\n  }\n\n  // create slider toggle\n  const is_public_checkbox = document.getElementById(\"suggest-chat-checkbox\");\n  const label_element = is_public_checkbox.getElementsByTagName(\"label\")[0];\n  const checkbox_span = is_public_checkbox.getElementsByTagName(\"span\")[0];\n  new_div = document.createElement(\"div\");\n\n  label_element.classList.add(\"switch\");\n  is_public_checkbox.appendChild(checkbox_span);\n  label_element.appendChild(new_div);\n\n  // clpse\n  globalThis.clpseFn = (id) => {\n    var obj = document.getElementById(\"clpse-btn-\" + id);\n    obj.classList.toggle(\"clpse-active\");\n    var content = obj.nextElementSibling;\n    if (content.style.display === \"none\") {\n      content.style.display = \"block\";\n    } else {\n      content.style.display = \"none\";\n    }\n  };\n\n  // store info in local storage\n  globalThis.setStorage = (key, value) => {\n    localStorage.setItem(key, value);\n  };\n  globalThis.getStorage = (key, value) => {\n    item = localStorage.getItem(key);\n    return item ? item : value;\n  };\n  globalThis.removeFromStorage = (key) => {\n    localStorage.removeItem(key);\n  };\n\n  // Function to scroll to given citation with ID\n  // Sleep function using Promise and setTimeout\n  function sleep(ms) {\n    return new Promise((resolve) => setTimeout(resolve, ms));\n  }\n\n  globalThis.scrollToCitation = async (event) => {\n    event.preventDefault(); // Prevent the default link behavior\n    var citationId = event.target.getAttribute(\"id\");\n\n    await sleep(100); // Sleep for 100 milliseconds\n\n    // check if modal is open\n    var modal = document.getElementById(\"pdf-modal\");\n    var citation = document.querySelector('mark[id=\"' + citationId + '\"]');\n\n    if (modal.style.display == \"block\") {\n      // trigger on click event of PDF Preview link\n      var detail_elem = citation;\n      // traverse up the DOM tree to find the parent element with tag detail\n      while (detail_elem.tagName.toLowerCase() != \"details\") {\n        detail_elem = detail_elem.parentElement;\n      }\n      detail_elem.getElementsByClassName(\"pdf-link\").item(0).click();\n    } else {\n      if (citation) {\n        citation.scrollIntoView({ behavior: \"smooth\" });\n      }\n    }\n  };\n\n  globalThis.fullTextSearch = () => {\n    // Assign text selection event to last bot message\n    var bot_messages = document.querySelectorAll(\n      \"div#main-chat-bot div.message-row.bot-row\"\n    );\n    var last_bot_message = bot_messages[bot_messages.length - 1];\n\n    // check if the last bot message has class \"text_selection\"\n    if (last_bot_message.classList.contains(\"text_selection\")) {\n      return;\n    }\n\n    // assign new class to last message\n    last_bot_message.classList.add(\"text_selection\");\n\n    // Get sentences from evidence div\n    var evidences = document.querySelectorAll(\n      \"#html-info-panel > div:last-child > div > details.evidence div.evidence-content\"\n    );\n    console.log(\"Indexing evidences\", evidences);\n\n    const segmenterEn = new Intl.Segmenter(\"en\", { granularity: \"sentence\" });\n    // Split sentences and save to all_segments list\n    var all_segments = [];\n    for (var evidence of evidences) {\n      // check if <details> tag is open\n      if (!evidence.parentElement.open) {\n        continue;\n      }\n      var markmap_div = evidence.querySelector(\"div.markmap\");\n      if (markmap_div) {\n        continue;\n      }\n\n      var evidence_content = evidence.textContent.replace(/[\\r\\n]+/g, \" \");\n      sentence_it = segmenterEn.segment(evidence_content)[Symbol.iterator]();\n      while ((sentence = sentence_it.next().value)) {\n        segment = sentence.segment.trim();\n        if (segment) {\n          all_segments.push({\n            id: all_segments.length,\n            text: segment,\n          });\n        }\n      }\n    }\n\n    let miniSearch = new MiniSearch({\n      fields: [\"text\"], // fields to index for full-text search\n      storeFields: [\"text\"],\n    });\n\n    // Index all documents\n    miniSearch.addAll(all_segments);\n\n    last_bot_message.addEventListener(\"mouseup\", () => {\n      let selection = window.getSelection().toString();\n      let results = miniSearch.search(selection);\n\n      if (results.length == 0) {\n        return;\n      }\n      let matched_text = results[0].text;\n      console.log(\"query\\n\", selection, \"\\nmatched text\\n\", matched_text);\n\n      var evidences = document.querySelectorAll(\n        \"#html-info-panel > div:last-child > div > details.evidence div.evidence-content\"\n      );\n      // check if modal is open\n      var modal = document.getElementById(\"pdf-modal\");\n\n      // convert all <mark> in evidences to normal text\n      evidences.forEach((evidence) => {\n        evidence.querySelectorAll(\"mark\").forEach((mark) => {\n          mark.outerHTML = mark.innerText;\n        });\n      });\n\n      // highlight matched_text in evidences\n      for (var evidence of evidences) {\n        var evidence_content = evidence.textContent.replace(/[\\r\\n]+/g, \" \");\n        if (evidence_content.includes(matched_text)) {\n          // select all p and li elements\n          paragraphs = evidence.querySelectorAll(\"p, li\");\n          for (var p of paragraphs) {\n            var p_content = p.textContent.replace(/[\\r\\n]+/g, \" \");\n            if (p_content.includes(matched_text)) {\n              p.innerHTML = p_content.replace(\n                matched_text,\n                \"<mark>\" + matched_text + \"</mark>\"\n              );\n              console.log(\"highlighted\", matched_text, \"in\", p);\n              if (modal.style.display == \"block\") {\n                // trigger on click event of PDF Preview link\n                var detail_elem = p;\n                // traverse up the DOM tree to find the parent element with tag detail\n                while (detail_elem.tagName.toLowerCase() != \"details\") {\n                  detail_elem = detail_elem.parentElement;\n                }\n                detail_elem.getElementsByClassName(\"pdf-link\").item(0).click();\n              } else {\n                p.scrollIntoView({ behavior: \"smooth\", block: \"center\" });\n              }\n              break;\n            }\n          }\n        }\n      }\n    });\n  };\n\n  globalThis.spawnDocument = (content, options) => {\n    let opt = {\n      window: \"\",\n      closeChild: true,\n      childId: \"_blank\",\n    };\n    Object.assign(opt, options);\n    // minimal error checking\n    if (\n      content &&\n      typeof content.toString == \"function\" &&\n      content.toString().length\n    ) {\n      let child = window.open(\"\", opt.childId, opt.window);\n      child.document.write(content.toString());\n      if (opt.closeChild) child.document.close();\n      return child;\n    }\n  };\n\n  globalThis.fillChatInput = (event) => {\n    let chatInput = document.querySelector(\"#chat-input textarea\");\n    // fill the chat input with the clicked div text\n    chatInput.value = \"Explain \" + event.target.textContent;\n    var evt = new Event(\"change\");\n    chatInput.dispatchEvent(new Event(\"input\", { bubbles: true }));\n    chatInput.focus();\n  };\n}\n"
  },
  {
    "path": "libs/ktem/ktem/assets/js/pdf_viewer.js",
    "content": "function onBlockLoad() {\r\n  var infor_panel_scroll_pos = 0;\r\n  globalThis.createModal = () => {\r\n    // Create modal for the 1st time if it does not exist\r\n    var modal = document.getElementById(\"pdf-modal\");\r\n    var old_position = null;\r\n    var old_width = null;\r\n    var old_left = null;\r\n    var expanded = false;\r\n\r\n    modal.id = \"pdf-modal\";\r\n    modal.className = \"modal\";\r\n    modal.innerHTML = `\r\n            <div class=\"modal-content\">\r\n              <div class=\"modal-header\">\r\n                <span class=\"close\" id=\"modal-close\">&times;</span>\r\n                <span class=\"close\" id=\"modal-expand\">&#x26F6;</span>\r\n              </div>\r\n              <div class=\"modal-body\">\r\n                <pdfjs-viewer-element id=\"pdf-viewer\" viewer-path=\"GR_FILE_ROOT_PATH/file=PDFJS_PREBUILT_DIR\" locale=\"en\" phrase=\"true\">\r\n                </pdfjs-viewer-element>\r\n              </div>\r\n            </div>\r\n          `;\r\n\r\n    modal.querySelector(\"#modal-close\").onclick = function () {\r\n      modal.style.display = \"none\";\r\n      var info_panel = document.getElementById(\"html-info-panel\");\r\n      if (info_panel) {\r\n        info_panel.style.display = \"block\";\r\n      }\r\n      var scrollableDiv = document.getElementById(\"chat-info-panel\");\r\n      scrollableDiv.scrollTop = infor_panel_scroll_pos;\r\n    };\r\n\r\n    modal.querySelector(\"#modal-expand\").onclick = function () {\r\n      expanded = !expanded;\r\n      if (expanded) {\r\n        old_position = modal.style.position;\r\n        old_left = modal.style.left;\r\n        old_width = modal.style.width;\r\n\r\n        modal.style.position = \"fixed\";\r\n        modal.style.width = \"70%\";\r\n        modal.style.left = \"15%\";\r\n        modal.style.height = \"100dvh\";\r\n      } else {\r\n        modal.style.position = old_position;\r\n        modal.style.width = old_width;\r\n        modal.style.left = old_left;\r\n        modal.style.height = \"85dvh\";\r\n      }\r\n    };\r\n  };\r\n\r\n  function matchRatio(str1, str2) {\r\n    let n = str1.length;\r\n    let m = str2.length;\r\n\r\n    let lcs = [];\r\n    for (let i = 0; i <= n; i++) {\r\n      lcs[i] = [];\r\n      for (let j = 0; j <= m; j++) {\r\n        lcs[i][j] = 0;\r\n      }\r\n    }\r\n\r\n    let result = \"\";\r\n    let max = 0;\r\n    for (let i = 0; i < n; i++) {\r\n      for (let j = 0; j < m; j++) {\r\n        if (str1[i] === str2[j]) {\r\n          lcs[i + 1][j + 1] = lcs[i][j] + 1;\r\n          if (lcs[i + 1][j + 1] > max) {\r\n            max = lcs[i + 1][j + 1];\r\n            result = str1.substring(i - max + 1, i + 1);\r\n          }\r\n        }\r\n      }\r\n    }\r\n\r\n    return result.length / Math.min(n, m);\r\n  }\r\n\r\n  globalThis.compareText = (search_phrases, page_label) => {\r\n    var iframe = document.querySelector(\"#pdf-viewer\").iframe;\r\n    var innerDoc = iframe.contentDocument\r\n      ? iframe.contentDocument\r\n      : iframe.contentWindow.document;\r\n\r\n    var renderedPages = innerDoc.querySelectorAll(\"div#viewer div.page\");\r\n    if (renderedPages.length == 0) {\r\n      // if pages are not rendered yet, wait and try again\r\n      setTimeout(() => compareText(search_phrases, page_label), 2000);\r\n      return;\r\n    }\r\n\r\n    var query_selector =\r\n      \"#viewer > div[data-page-number='\" +\r\n      page_label +\r\n      \"'] > div.textLayer > span\";\r\n    var page_spans = innerDoc.querySelectorAll(query_selector);\r\n    for (var i = 0; i < page_spans.length; i++) {\r\n      var span = page_spans[i];\r\n      if (\r\n        span.textContent.length > 4 &&\r\n        search_phrases.some(\r\n          (phrase) => matchRatio(phrase, span.textContent) > 0.5\r\n        )\r\n      ) {\r\n        span.innerHTML =\r\n          \"<span class='highlight selected'>\" + span.textContent + \"</span>\";\r\n      } else {\r\n        // if span is already highlighted, remove it\r\n        if (span.querySelector(\".highlight\")) {\r\n          span.innerHTML = span.textContent;\r\n        }\r\n      }\r\n    }\r\n  };\r\n\r\n  // Sleep function using Promise and setTimeout\r\n  function sleep(ms) {\r\n    return new Promise((resolve) => setTimeout(resolve, ms));\r\n  }\r\n\r\n  // Function to open modal and display PDF\r\n  globalThis.openModal = async (event) => {\r\n    event.preventDefault();\r\n    var target = event.currentTarget;\r\n    var src = target.getAttribute(\"data-src\");\r\n    var page = target.getAttribute(\"data-page\");\r\n    var search = target.getAttribute(\"data-search\");\r\n    var highlighted_spans =\r\n      target.parentElement.parentElement.querySelectorAll(\"mark\");\r\n\r\n    // Get text from highlighted spans\r\n    var search_phrases = Array.from(highlighted_spans).map(\r\n      (span) => span.textContent\r\n    );\r\n    // Use regex to strip 【id】from search phrases\r\n    search_phrases = search_phrases.map((phrase) =>\r\n      phrase.replace(/【\\d+】/g, \"\")\r\n    );\r\n\r\n    // var phrase = target.getAttribute(\"data-phrase\");\r\n\r\n    var pdfViewer = document.getElementById(\"pdf-viewer\");\r\n\r\n    current_src = pdfViewer.getAttribute(\"src\");\r\n    if (current_src != src) {\r\n      pdfViewer.setAttribute(\"src\", src);\r\n    }\r\n    // pdfViewer.setAttribute(\"phrase\", phrase);\r\n    // pdfViewer.setAttribute(\"search\", search);\r\n    pdfViewer.setAttribute(\"page\", page);\r\n\r\n    var scrollableDiv = document.getElementById(\"chat-info-panel\");\r\n    infor_panel_scroll_pos = scrollableDiv.scrollTop;\r\n\r\n    var modal = document.getElementById(\"pdf-modal\");\r\n    modal.style.display = \"block\";\r\n    var info_panel = document.getElementById(\"html-info-panel\");\r\n    if (info_panel) {\r\n      info_panel.style.display = \"none\";\r\n    }\r\n    scrollableDiv.scrollTop = 0;\r\n\r\n    /* search for text inside PDF page */\r\n    await sleep(500);\r\n    compareText(search_phrases, page);\r\n  };\r\n\r\n  globalThis.assignPdfOnclickEvent = () => {\r\n    // Get all links and attach click event\r\n    var links = document.getElementsByClassName(\"pdf-link\");\r\n    for (var i = 0; i < links.length; i++) {\r\n      links[i].onclick = openModal;\r\n    }\r\n  };\r\n\r\n  var created_modal = document.getElementById(\"pdf-viewer\");\r\n  if (!created_modal) {\r\n    createModal();\r\n  }\r\n}\r\n"
  },
  {
    "path": "libs/ktem/ktem/assets/md/about.md",
    "content": "# About Kotaemon\n\nAn open-source tool for you to chat with your documents.\n\n[Source Code](https://github.com/Cinnamon/kotaemon) |\n[Demo](https://huggingface.co/spaces/cin-model/kotaemon-demo)\n\n[User Guide](https://cinnamon.github.io/kotaemon/) |\n[Developer Guide](https://cinnamon.github.io/kotaemon/development/) |\n[Feedback](https://github.com/Cinnamon/kotaemon/issues)\n"
  },
  {
    "path": "libs/ktem/ktem/assets/md/changelogs.md",
    "content": "# Changelogs\n\n## v0.0.1\n\n- Chat: interact with chatbot with simple pipeline, rewoo and react agents\n- Chat: conversation management: create, delete, rename conversations\n- Files: upload files\n- Files: select files as context for chatbot\n- User management: create, sign-in, sign-out, change password\n- Setting: common settings and pipeline-based settings\n- Info panel: show Cinnamon AI and Kotaemon information\n"
  },
  {
    "path": "libs/ktem/ktem/assets/md/usage.md",
    "content": "# Basic Usage\n\n## 1. Add your AI models\n\n![resources tab](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/resources-tab.png)\n\n- The tool uses Large Language Model (LLMs) to perform various tasks in a QA pipeline.\n  So, you need to provide the application with access to the LLMs you want\n  to use.\n- You only need to provide at least one. However, tt is recommended that you include all the LLMs\n  that you have access to, you will be able to switch between them while using the\n  application.\n\nTo add a model:\n\n1. Navigate to the `Resources` tab.\n2. Select the `LLMs` sub-tab.\n3. Select the `Add` sub-tab.\n4. Config the model to add:\n   - Give it a name.\n   - Pick a vendor/provider (e.g. `ChatOpenAI`).\n   - Provide the specifications.\n   - (Optional) Set the model as default.\n5. Click `Add` to add the model.\n6. Select `Embedding Models` sub-tab and repeat the step 3 to 5 to add an embedding model.\n\n<details markdown>\n\n<summary>(Optional) Configure model via the .env file</summary>\n\nAlternatively, you can configure the models via the `.env` file with the information needed to connect to the LLMs. This file is located in\nthe folder of the application. If you don't see it, you can create one.\n\nCurrently, the following providers are supported:\n\n### OpenAI\n\nIn the `.env` file, set the `OPENAI_API_KEY` variable with your OpenAI API key in order\nto enable access to OpenAI's models. There are other variables that can be modified,\nplease feel free to edit them to fit your case. Otherwise, the default parameter should\nwork for most people.\n\n```shell\nOPENAI_API_BASE=https://api.openai.com/v1\nOPENAI_API_KEY=<your OpenAI API key here>\nOPENAI_CHAT_MODEL=gpt-3.5-turbo\nOPENAI_EMBEDDINGS_MODEL=text-embedding-ada-002\n```\n\n### Azure OpenAI\n\nFor OpenAI models via Azure platform, you need to provide your Azure endpoint and API\nkey. Your might also need to provide your developments' name for the chat model and the\nembedding model depending on how you set up Azure development.\n\n```shell\nAZURE_OPENAI_ENDPOINT=\nAZURE_OPENAI_API_KEY=\nOPENAI_API_VERSION=2024-02-15-preview\nAZURE_OPENAI_CHAT_DEPLOYMENT=gpt-35-turbo\nAZURE_OPENAI_EMBEDDINGS_DEPLOYMENT=text-embedding-ada-002\n```\n\n### Local models\n\n- Pros:\n- Privacy. Your documents will be stored and process locally.\n- Choices. There are a wide range of LLMs in terms of size, domain, language to choose\n  from.\n- Cost. It's free.\n- Cons:\n- Quality. Local models are much smaller and thus have lower generative quality than\n  paid APIs.\n- Speed. Local models are deployed using your machine so the processing speed is\n  limited by your hardware.\n\n#### Find and download a LLM\n\nYou can search and download a LLM to be ran locally from the [Hugging Face\nHub](https://huggingface.co/models). Currently, these model formats are supported:\n\n- GGUF\n\nYou should choose a model whose size is less than your device's memory and should leave\nabout 2 GB. For example, if you have 16 GB of RAM in total, of which 12 GB is available,\nthen you should choose a model that take up at most 10 GB of RAM. Bigger models tend to\ngive better generation but also take more processing time.\n\nHere are some recommendations and their size in memory:\n\n- [Qwen1.5-1.8B-Chat-GGUF](https://huggingface.co/Qwen/Qwen1.5-1.8B-Chat-GGUF/resolve/main/qwen1_5-1_8b-chat-q8_0.gguf?download=true):\n  around 2 GB\n\n#### Enable local models\n\nTo add a local model to the model pool, set the `LOCAL_MODEL` variable in the `.env`\nfile to the path of the model file.\n\n```shell\nLOCAL_MODEL=<full path to your model file>\n```\n\nHere is how to get the full path of your model file:\n\n- On Windows 11: right click the file and select `Copy as Path`.\n</details>\n\n## 2. Upload your documents\n\n![file index tab](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/file-index-tab.png)\n\nIn order to do QA on your documents, you need to upload them to the application first.\nNavigate to the `File Index` tab and you will see 2 sections:\n\n1. File upload:\n   - Drag and drop your file to the UI or select it from your file system.\n     Then click `Upload and Index`.\n   - The application will take some time to process the file and show a message once it is done.\n2. File list:\n   - This section shows the list of files that have been uploaded to the application and allows users to delete them.\n\n## 3. Chat with your documents\n\n![chat tab](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/chat-tab.png)\n\nNow navigate back to the `Chat` tab. The chat tab is divided into 3 regions:\n\n1. Conversation Settings Panel\n   - Here you can select, create, rename, and delete conversations.\n     - By default, a new conversation is created automatically if no conversation is selected.\n   - Below that you have the file index, where you can choose whether to disable, select all files, or select which files to retrieve references from.\n     - If you choose \"Disabled\", no files will be considered as context during chat.\n     - If you choose \"Search All\", all files will be considered during chat.\n     - If you choose \"Select\", a dropdown will appear for you to select the\n       files to be considered during chat. If no files are selected, then no\n       files will be considered during chat.\n2. Chat Panel\n   - This is where you can chat with the chatbot.\n3. Information Panel\n   - Supporting information such as the retrieved evidence and reference will be\n     displayed here.\n"
  },
  {
    "path": "libs/ktem/ktem/assets/theme.py",
    "content": "from typing import Iterable\n\nfrom gradio.themes import Soft\nfrom gradio.themes.utils import colors, fonts, sizes\n\ngray = colors.Color(\n    name=\"dark\",\n    c50=\"#f9fafb\",\n    c100=\"#edeef0\",\n    c200=\"#e1e2e6\",\n    c300=\"#d5d6dd\",\n    c400=\"#cacbd5\",\n    c500=\"#acadb7\",\n    c600=\"#313138\",\n    c700=\"#25252b\",\n    c800=\"#19191e\",\n    c900=\"#0d0d11\",\n    c950=\"#010104\",\n)\n\nerr_txt = \"#f05656\"\ngradient = \"linear-gradient(90deg, *primary_400 20%, *secondary_500 80%)\"\ngradient_muted = \"linear-gradient(90deg, *primary_500 20%, *secondary_600 80%)\"\n\nerr_dark = \"rgba(228, 98, 98, 1)\"\nerr_dark_muted = \"rgba(228, 98, 98, 0.75)\"\n\nerr = \"rgba(255, 93, 93, 1)\"\nerr_muted = \"rgba(237, 80, 80, 1)\"\n\n\ncommon = dict(\n    # element colours\n    color_accent=\"*primary_400\",\n    # shadows\n    shadow_drop=\"0 0px 5px 1px rgb(0 0 0 / 0.05)\",\n    shadow_drop_lg=\"0 0 10px 3px rgba(0 0 0 / 0.06)\",\n    # layout atoms\n    block_label_margin=\"*spacing_xl\",\n    block_label_padding=\"*spacing_xl\",\n    block_label_shadow=\"none\",\n    layout_gap=\"*spacing_xxl\",\n    section_header_text_size=\"*text_lg\",\n    # buttons\n    button_shadow=\"none\",\n    button_shadow_active=\"*shadow_drop\",\n    button_shadow_hover=\"none\",\n    # button_large_radius=\"*radius_xxl\",\n    # button_small_radius=\"*radius_xxl\",\n    # -----\n)\ndark_mode = dict(\n    # body attributes\n    body_text_color_subdued_dark=\"*neutral_300\",\n    # element colours\n    background_fill_secondary_dark=\"*neutral_950\",\n    border_color_accent_dark=\"rgba(255,255,255,0)\",\n    border_color_primary_dark=\"*neutral_600\",\n    color_accent_soft_dark=\"*secondary_400\",\n    # text\n    link_text_color_dark=\"*secondary_200\",\n    link_text_color_active_dark=\"*secondary_300\",\n    link_text_color_visited_dark=\"*secondary_400\",\n    # layout atoms\n    block_label_background_fill_dark=\"*neutral_800\",\n    block_label_border_width_dark=\"0px\",\n    block_label_text_color_dark=\"*primary_200\",\n    block_shadow_dark=\"none\",\n    block_title_text_color_dark=\"*primary_200\",\n    panel_border_width_dark=\"0px\",\n    # component atoms\n    checkbox_background_color_selected_dark=\"*primary_400\",\n    checkbox_border_color_focus_dark=\"*primary_400\",\n    checkbox_border_color_selected_dark=\"*primary_500\",\n    checkbox_label_background_fill_selected_dark=\"*primary_200\",\n    checkbox_label_text_color_selected_dark=\"*neutral_700\",\n    error_border_color_dark=err_dark,\n    error_text_color_dark=\"*neutral_100\",\n    error_icon_color_dark=err_dark,\n    input_background_fill_dark=\"*neutral_600\",\n    input_border_color_dark=\"*input_background_fill\",\n    input_border_color_focus_dark=\"*input_background_fill\",\n    input_placeholder_color_dark=\"*neutral_500\",\n    loader_color_dark=\"*primary_200\",\n    slider_color_dark=\"*primary_300\",\n    stat_background_fill_dark=\"*secondary_100\",\n    table_border_color_dark=\"*neutral_800\",\n    table_even_background_fill_dark=\"*neutral_900\",\n    table_odd_background_fill_dark=\"*neutral_800\",\n    table_row_focus_dark=\"*neutral_600\",\n    # buttons\n    button_primary_background_fill_dark=gradient,\n    button_primary_background_fill_hover_dark=gradient_muted,\n    button_secondary_background_fill_hover_dark=\"*neutral_700\",\n    button_cancel_background_fill_dark=err_dark,\n    button_cancel_background_fill_hover_dark=err_dark_muted,\n)\nlight_mode = dict(\n    background_fill_primary=\"*neutral_50\",\n    background_fill_secondary=\"*neutral_50\",\n    # body attributes\n    body_background_fill=\"*background_fill_primary\",\n    body_text_color_subdued=\"*neutral_600\",\n    border_color_accent=\"rgba(255,255,255,0)\",\n    border_color_primary=\"*neutral_300\",\n    color_accent_soft=\"*secondary_100\",\n    # text\n    link_text_color=\"*secondary_400\",\n    link_text_color_visited=\"*secondary_700\",\n    # layout atoms\n    block_label_border_width=\"0px\",\n    block_label_background_fill=\"white\",\n    block_label_text_color=\"*primary_600\",\n    block_shadow=\"none\",\n    block_title_text_color=\"*primary_600\",\n    panel_border_width=\"0px\",\n    # component atoms\n    checkbox_background_color_selected=\"*primary_400\",\n    checkbox_border_color_focus=\"*primary_400\",\n    checkbox_border_color_selected=\"*primary_400\",\n    checkbox_label_border_color=\"*primary_200\",\n    error_background_fill=\"*background_fill_primary\",\n    error_border_color=err_muted,\n    error_text_color=\"*neutral_800\",\n    input_background_fill=\"*neutral_200\",\n    input_border_color=\"*input_background_fill\",\n    input_border_color_focus=\"*input_background_fill\",\n    input_placeholder_color=\"*neutral_500\",\n    loader_color=\"*primary_300\",\n    slider_color=\"*primary_400\",\n    stat_background_fill=\"*secondary_300\",\n    table_even_background_fill=\"*neutral_100\",\n    table_odd_background_fill=\"*neutral_300\",\n    table_row_focus=\"*secondary_200\",\n    # buttons\n    button_primary_background_fill=gradient_muted,\n    button_primary_background_fill_hover=gradient,\n    button_secondary_background_fill=\"*neutral_300\",\n    button_secondary_background_fill_hover=\"*neutral_100\",\n    button_cancel_background_fill=err_muted,\n    button_cancel_background_fill_hover=err,\n    button_cancel_text_color=\"*neutral_50\",\n)\n\n\nclass Kotaemon(Soft):\n    \"\"\"\n    Official theme of Kotaemon.\n    Public version: https://huggingface.co/spaces/lone17/kotaemon\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        primary_hue: colors.Color | str = colors.emerald,\n        secondary_hue: colors.Color | str = colors.blue,\n        neutral_hue: colors.Color | str = gray,\n        spacing_size: sizes.Size | str = sizes.spacing_md,\n        radius_size: sizes.Size | str = sizes.radius_md,\n        text_size: sizes.Size | str = sizes.text_md,\n        font: fonts.Font\n        | str\n        | Iterable[fonts.Font | str] = (\n            fonts.GoogleFont(\"Quicksand\"),\n            \"ui-sans-serif\",\n            \"sans-serif\",\n        ),\n        font_mono: fonts.Font\n        | str\n        | Iterable[fonts.Font | str] = (\n            fonts.GoogleFont(\"IBM Plex Mono\"),\n            \"ui-monospace\",\n            \"monospace\",\n        ),\n    ):\n        super().__init__(\n            primary_hue=primary_hue,\n            secondary_hue=secondary_hue,\n            neutral_hue=neutral_hue,\n            spacing_size=spacing_size,\n            radius_size=radius_size,\n            text_size=text_size,\n            font=font,\n            font_mono=font_mono,\n        )\n        self.name = \"kotaemon\"\n        super().set(\n            **common,\n            **dark_mode,\n            **light_mode,\n        )\n"
  },
  {
    "path": "libs/ktem/ktem/components.py",
    "content": "\"\"\"Common components, some kind of config\"\"\"\n\nimport logging\nfrom functools import cache\nfrom pathlib import Path\nfrom typing import Optional\n\nfrom theflow.settings import settings\nfrom theflow.utils.modules import deserialize\n\nfrom kotaemon.base import BaseComponent\nfrom kotaemon.storages import BaseDocumentStore, BaseVectorStore\n\nlogger = logging.getLogger(__name__)\n\n\nfilestorage_path = Path(settings.KH_FILESTORAGE_PATH)\nfilestorage_path.mkdir(parents=True, exist_ok=True)\n\n\n@cache\ndef get_docstore(collection_name: str = \"default\") -> BaseDocumentStore:\n    from copy import deepcopy\n\n    ds_conf = deepcopy(settings.KH_DOCSTORE)\n    ds_conf[\"collection_name\"] = collection_name\n    return deserialize(ds_conf, safe=False)\n\n\n@cache\ndef get_vectorstore(collection_name: str = \"default\") -> BaseVectorStore:\n    from copy import deepcopy\n\n    vs_conf = deepcopy(settings.KH_VECTORSTORE)\n    vs_conf[\"collection_name\"] = collection_name\n    return deserialize(vs_conf, safe=False)\n\n\nclass ModelPool:\n    \"\"\"Represent a pool of models\"\"\"\n\n    def __init__(self, category: str, conf: dict):\n        self._category = category\n        self._conf = conf\n\n        self._models: dict[str, BaseComponent] = {}\n        self._accuracy: list[str] = []\n        self._cost: list[str] = []\n        self._default: list[str] = []\n\n        for name, model in conf.items():\n            self._models[name] = deserialize(model[\"spec\"], safe=False)\n            if model.get(\"default\", False):\n                self._default.append(name)\n\n        self._accuracy = list(\n            sorted(conf, key=lambda x: conf[x].get(\"accuracy\", float(\"-inf\")))\n        )\n        self._cost = list(sorted(conf, key=lambda x: conf[x].get(\"cost\", float(\"inf\"))))\n\n    def __getitem__(self, key: str) -> BaseComponent:\n        \"\"\"Get model by name\"\"\"\n        return self._models[key]\n\n    def __setitem__(self, key: str, value: BaseComponent):\n        \"\"\"Set model by name\"\"\"\n        self._models[key] = value\n\n    def __delitem__(self, key: str):\n        \"\"\"Delete model by name\"\"\"\n        del self._models[key]\n\n    def __contains__(self, key: str) -> bool:\n        \"\"\"Check if model exists\"\"\"\n        return key in self._models\n\n    def get(\n        self, key: str, default: Optional[BaseComponent] = None\n    ) -> Optional[BaseComponent]:\n        \"\"\"Get model by name with default value\"\"\"\n        return self._models.get(key, default)\n\n    def settings(self) -> dict:\n        \"\"\"Present model pools option for gradio\"\"\"\n        return {\n            \"label\": self._category,\n            \"choices\": list(self._models.keys()),\n            \"value\": self.get_default_name(),\n        }\n\n    def options(self) -> dict:\n        \"\"\"Present a dict of models\"\"\"\n        return self._models\n\n    def get_random_name(self) -> str:\n        \"\"\"Get the name of random model\n\n        Returns:\n            str: random model name in the pool\n        \"\"\"\n        import random\n\n        if not self._conf:\n            raise ValueError(\"No models in pool\")\n\n        return random.choice(list(self._conf.keys()))\n\n    def get_default_name(self) -> str:\n        \"\"\"Get the name of default model\n\n        In case there is no default model, choose random model from pool. In\n        case there are multiple default models, choose random from them.\n\n        Returns:\n            str: model name\n        \"\"\"\n        if not self._conf:\n            raise ValueError(\"No models in pool\")\n\n        if self._default:\n            import random\n\n            return random.choice(self._default)\n\n        return self.get_random_name()\n\n    def get_random(self) -> BaseComponent:\n        \"\"\"Get random model\"\"\"\n        return self._models[self.get_random_name()]\n\n    def get_default(self) -> BaseComponent:\n        \"\"\"Get default model\n\n        In case there is no default model, choose random model from pool. In\n        case there are multiple default models, choose random from them.\n\n        Returns:\n            BaseComponent: model\n        \"\"\"\n        return self._models[self.get_default_name()]\n\n    def get_highest_accuracy_name(self) -> str:\n        \"\"\"Get the name of model with highest accuracy\n\n        Returns:\n            str: model name\n        \"\"\"\n        if not self._conf:\n            raise ValueError(\"No models in pool\")\n        return self._accuracy[-1]\n\n    def get_highest_accuracy(self) -> BaseComponent:\n        \"\"\"Get model with highest accuracy\n\n        Returns:\n            BaseComponent: model\n        \"\"\"\n        if not self._conf:\n            raise ValueError(\"No models in pool\")\n\n        return self._models[self._accuracy[-1]]\n\n    def get_lowest_cost_name(self) -> str:\n        \"\"\"Get the name of model with lowest cost\n\n        Returns:\n            str: model name\n        \"\"\"\n        if not self._conf:\n            raise ValueError(\"No models in pool\")\n        return self._cost[0]\n\n    def get_lowest_cost(self) -> BaseComponent:\n        \"\"\"Get model with lowest cost\n\n        Returns:\n            BaseComponent: model\n        \"\"\"\n        if not self._conf:\n            raise ValueError(\"No models in pool\")\n\n        return self._models[self._cost[0]]\n\n\nreasonings: dict = {}\ntools = ModelPool(\"Tools\", {})\n"
  },
  {
    "path": "libs/ktem/ktem/db/__init__.py",
    "content": ""
  },
  {
    "path": "libs/ktem/ktem/db/base_models.py",
    "content": "import datetime\nimport uuid\nfrom typing import Optional\n\nfrom sqlalchemy import JSON, Column\nfrom sqlmodel import Field, SQLModel\nfrom tzlocal import get_localzone\n\n\nclass BaseConversation(SQLModel):\n    \"\"\"Store the chat conversation between the user and the bot\n\n    Attributes:\n        id: canonical id to identify the conversation\n        name: human-friendly name of the conversation\n        user: the user id\n        data_source: the data source of the conversation\n        date_created: the date the conversation was created\n        date_updated: the date the conversation was updated\n    \"\"\"\n\n    __table_args__ = {\"extend_existing\": True}\n\n    id: str = Field(\n        default_factory=lambda: uuid.uuid4().hex, primary_key=True, index=True\n    )\n    name: str = Field(\n        default_factory=lambda: \"Untitled - {}\".format(\n            datetime.datetime.now(get_localzone()).strftime(\"%Y-%m-%d %H:%M:%S\")\n        )\n    )\n    user: str = Field(default=\"\")  # For now we only have one user\n\n    is_public: bool = Field(default=False)\n\n    # contains messages + current files + chat_suggestions\n    data_source: dict = Field(default={}, sa_column=Column(JSON))\n\n    date_created: datetime.datetime = Field(\n        default_factory=lambda: datetime.datetime.now(get_localzone())\n    )\n    date_updated: datetime.datetime = Field(\n        default_factory=lambda: datetime.datetime.now(get_localzone())\n    )\n\n\nclass BaseUser(SQLModel):\n    \"\"\"Store the user information\n\n    Attributes:\n        id: canonical id to identify the user\n        username: the username of the user\n        password: the hashed password of the user\n    \"\"\"\n\n    __table_args__ = {\"extend_existing\": True}\n\n    id: str = Field(\n        default_factory=lambda: uuid.uuid4().hex, primary_key=True, index=True\n    )\n    username: str = Field(unique=True)\n    username_lower: str = Field(unique=True)\n    password: str\n    admin: bool = Field(default=False)\n\n\nclass BaseSettings(SQLModel):\n    \"\"\"Record of user settings\n\n    Attributes:\n        id: canonical id to identify the settings\n        user: the user id\n        setting: the user settings (in dict/json format)\n    \"\"\"\n\n    __table_args__ = {\"extend_existing\": True}\n\n    id: str = Field(\n        default_factory=lambda: uuid.uuid4().hex, primary_key=True, index=True\n    )\n    user: str = Field(default=\"\")\n    setting: dict = Field(default={}, sa_column=Column(JSON))\n\n\nclass BaseIssueReport(SQLModel):\n    \"\"\"Store user-reported issues\n\n    Attributes:\n        id: canonical id to identify the issue report\n        issues: the issues reported by the user, formatted as a dict\n        chat: the conversation id when the user reported the issue\n        settings: the user settings at the time of the issue report\n        user: the user id\n    \"\"\"\n\n    __table_args__ = {\"extend_existing\": True}\n\n    id: Optional[int] = Field(default=None, primary_key=True)\n    issues: dict = Field(default={}, sa_column=Column(JSON))\n    chat: Optional[dict] = Field(default=None, sa_column=Column(JSON))\n    settings: Optional[dict] = Field(default=None, sa_column=Column(JSON))\n    user: Optional[str] = Field(default=None)\n"
  },
  {
    "path": "libs/ktem/ktem/db/engine.py",
    "content": "from sqlmodel import create_engine\nfrom theflow.settings import settings\n\nengine = create_engine(settings.KH_DATABASE)\n"
  },
  {
    "path": "libs/ktem/ktem/db/models.py",
    "content": "import ktem.db.base_models as base_models\nfrom ktem.db.engine import engine\nfrom sqlmodel import SQLModel\nfrom theflow.settings import settings\nfrom theflow.utils.modules import import_dotted_string\n\n_base_conv = (\n    import_dotted_string(settings.KH_TABLE_CONV, safe=False)\n    if hasattr(settings, \"KH_TABLE_CONV\")\n    else base_models.BaseConversation\n)\n\n_base_user = (\n    import_dotted_string(settings.KH_TABLE_USER, safe=False)\n    if hasattr(settings, \"KH_TABLE_USER\")\n    else base_models.BaseUser\n)\n\n_base_settings = (\n    import_dotted_string(settings.KH_TABLE_SETTINGS, safe=False)\n    if hasattr(settings, \"KH_TABLE_SETTINGS\")\n    else base_models.BaseSettings\n)\n\n_base_issue_report = (\n    import_dotted_string(settings.KH_TABLE_ISSUE_REPORT, safe=False)\n    if hasattr(settings, \"KH_TABLE_ISSUE_REPORT\")\n    else base_models.BaseIssueReport\n)\n\n\nclass Conversation(_base_conv, table=True):  # type: ignore\n    \"\"\"Conversation record\"\"\"\n\n\nclass User(_base_user, table=True):  # type: ignore\n    \"\"\"User table\"\"\"\n\n\nclass Settings(_base_settings, table=True):  # type: ignore\n    \"\"\"Record of settings\"\"\"\n\n\nclass IssueReport(_base_issue_report, table=True):  # type: ignore\n    \"\"\"Record of issues\"\"\"\n\n\nif not getattr(settings, \"KH_ENABLE_ALEMBIC\", False):\n    SQLModel.metadata.create_all(engine)\n"
  },
  {
    "path": "libs/ktem/ktem/embeddings/__init__.py",
    "content": ""
  },
  {
    "path": "libs/ktem/ktem/embeddings/db.py",
    "content": "from typing import Type\n\nfrom ktem.db.engine import engine\nfrom sqlalchemy import JSON, Boolean, Column, String\nfrom sqlalchemy.orm import DeclarativeBase\nfrom theflow.settings import settings as flowsettings\nfrom theflow.utils.modules import import_dotted_string\n\n\nclass Base(DeclarativeBase):\n    pass\n\n\nclass BaseEmbeddingTable(Base):\n    \"\"\"Base table to store language model\"\"\"\n\n    __abstract__ = True\n\n    name = Column(String, primary_key=True, unique=True)\n    spec = Column(JSON, default={})\n    default = Column(Boolean, default=False)\n\n\n_base_llm: Type[BaseEmbeddingTable] = (\n    import_dotted_string(flowsettings.KH_EMBEDDING_LLM, safe=False)\n    if hasattr(flowsettings, \"KH_EMBEDDING_LLM\")\n    else BaseEmbeddingTable\n)\n\n\nclass EmbeddingTable(_base_llm):  # type: ignore\n    __tablename__ = \"embedding\"\n\n\nif not getattr(flowsettings, \"KH_ENABLE_ALEMBIC\", False):\n    EmbeddingTable.metadata.create_all(engine)\n"
  },
  {
    "path": "libs/ktem/ktem/embeddings/manager.py",
    "content": "from typing import Optional, Type\n\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import Session\nfrom theflow.settings import settings as flowsettings\nfrom theflow.utils.modules import deserialize\n\nfrom kotaemon.embeddings.base import BaseEmbeddings\n\nfrom .db import EmbeddingTable, engine\n\n\nclass EmbeddingManager:\n    \"\"\"Represent a pool of models\"\"\"\n\n    def __init__(self):\n        self._models: dict[str, BaseEmbeddings] = {}\n        self._info: dict[str, dict] = {}\n        self._default: str = \"\"\n        self._vendors: list[Type] = []\n\n        # populate the pool if empty\n        if hasattr(flowsettings, \"KH_EMBEDDINGS\"):\n            with Session(engine) as sess:\n                count = sess.query(EmbeddingTable).count()\n            if not count:\n                for name, model in flowsettings.KH_EMBEDDINGS.items():\n                    self.add(\n                        name=name,\n                        spec=model[\"spec\"],\n                        default=model.get(\"default\", False),\n                    )\n\n        self.load()\n        self.load_vendors()\n\n    def load(self):\n        \"\"\"Load the model pool from database\"\"\"\n        self._models, self._info, self._default = {}, {}, \"\"\n        with Session(engine) as sess:\n            stmt = select(EmbeddingTable)\n            items = sess.execute(stmt)\n\n            for (item,) in items:\n                self._models[item.name] = deserialize(item.spec, safe=False)\n                self._info[item.name] = {\n                    \"name\": item.name,\n                    \"spec\": item.spec,\n                    \"default\": item.default,\n                }\n                if item.default:\n                    self._default = item.name\n                    self._models[\"default\"] = self._models[item.name]\n\n    def load_vendors(self):\n        from kotaemon.embeddings import (\n            AzureOpenAIEmbeddings,\n            FastEmbedEmbeddings,\n            LCCohereEmbeddings,\n            LCGoogleEmbeddings,\n            LCHuggingFaceEmbeddings,\n            LCMistralEmbeddings,\n            OpenAIEmbeddings,\n            TeiEndpointEmbeddings,\n            VoyageAIEmbeddings,\n        )\n\n        self._vendors = [\n            AzureOpenAIEmbeddings,\n            OpenAIEmbeddings,\n            FastEmbedEmbeddings,\n            LCCohereEmbeddings,\n            LCHuggingFaceEmbeddings,\n            LCGoogleEmbeddings,\n            LCMistralEmbeddings,\n            TeiEndpointEmbeddings,\n            VoyageAIEmbeddings,\n        ]\n\n    def __getitem__(self, key: str) -> BaseEmbeddings:\n        \"\"\"Get model by name\"\"\"\n        return self._models[key]\n\n    def __contains__(self, key: str) -> bool:\n        \"\"\"Check if model exists\"\"\"\n        return key in self._models\n\n    def get(\n        self, key: str, default: Optional[BaseEmbeddings] = None\n    ) -> Optional[BaseEmbeddings]:\n        \"\"\"Get model by name with default value\"\"\"\n        return self._models.get(key, default)\n\n    def settings(self) -> dict:\n        \"\"\"Present model pools option for gradio\"\"\"\n        return {\n            \"label\": \"Embedding\",\n            \"choices\": list(self._models.keys()),\n            \"value\": self.get_default_name(),\n        }\n\n    def options(self) -> dict:\n        \"\"\"Present a dict of models\"\"\"\n        return self._models\n\n    def get_random_name(self) -> str:\n        \"\"\"Get the name of random model\n\n        Returns:\n            str: random model name in the pool\n        \"\"\"\n        import random\n\n        if not self._models:\n            raise ValueError(\"No models in pool\")\n\n        return random.choice(list(self._models.keys()))\n\n    def get_default_name(self) -> str:\n        \"\"\"Get the name of default model\n\n        In case there is no default model, choose random model from pool. In\n        case there are multiple default models, choose random from them.\n\n        Returns:\n            str: model name\n        \"\"\"\n        if not self._models:\n            raise ValueError(\"No models in pool\")\n\n        if not self._default:\n            return self.get_random_name()\n\n        return self._default\n\n    def get_random(self) -> BaseEmbeddings:\n        \"\"\"Get random model\"\"\"\n        return self._models[self.get_random_name()]\n\n    def get_default(self) -> BaseEmbeddings:\n        \"\"\"Get default model\n\n        In case there is no default model, choose random model from pool. In\n        case there are multiple default models, choose random from them.\n\n        Returns:\n            BaseEmbeddings: model\n        \"\"\"\n        return self._models[self.get_default_name()]\n\n    def info(self) -> dict:\n        \"\"\"List all models\"\"\"\n        return self._info\n\n    def add(self, name: str, spec: dict, default: bool):\n        \"\"\"Add a new model to the pool\"\"\"\n        if not name:\n            raise ValueError(\"Name must not be empty\")\n\n        try:\n            with Session(engine) as sess:\n                if default:\n                    # turn all models to non-default\n                    sess.query(EmbeddingTable).update({\"default\": False})\n                    sess.commit()\n\n                item = EmbeddingTable(name=name, spec=spec, default=default)\n                sess.add(item)\n                sess.commit()\n        except Exception as e:\n            raise ValueError(f\"Failed to add model {name}: {e}\")\n\n        self.load()\n\n    def delete(self, name: str):\n        \"\"\"Delete a model from the pool\"\"\"\n        try:\n            with Session(engine) as sess:\n                item = sess.query(EmbeddingTable).filter_by(name=name).first()\n                sess.delete(item)\n                sess.commit()\n        except Exception as e:\n            raise ValueError(f\"Failed to delete model {name}: {e}\")\n\n        self.load()\n\n    def update(self, name: str, spec: dict, default: bool, new_name: str = \"\"):\n        \"\"\"Update a model in the pool, optionally renaming it.\"\"\"\n        if not name:\n            raise ValueError(\"Name must not be empty\")\n\n        # If update name\n        if new_name and new_name != name:\n            if new_name in self._info:\n                raise ValueError(\n                    f\"Model '{new_name}' already exists. Use a unique name.\"\n                )\n            self.delete(name)\n            self.add(new_name, spec=spec, default=default)\n            return\n\n        try:\n            with Session(engine) as sess:\n\n                if default:\n                    # turn all models to non-default\n                    sess.query(EmbeddingTable).update({\"default\": False})\n                    sess.commit()\n\n                item = sess.query(EmbeddingTable).filter_by(name=name).first()\n                if not item:\n                    raise ValueError(f\"Model {name} not found\")\n                item.spec = spec\n                item.default = default\n                sess.commit()\n        except Exception as e:\n            raise ValueError(f\"Failed to update model {name}: {e}\")\n\n        self.load()\n\n    def vendors(self) -> dict:\n        \"\"\"Return list of vendors\"\"\"\n        return {vendor.__qualname__: vendor for vendor in self._vendors}\n\n\nembedding_models_manager = EmbeddingManager()\n"
  },
  {
    "path": "libs/ktem/ktem/embeddings/ui.py",
    "content": "from copy import deepcopy\n\nimport gradio as gr\nimport pandas as pd\nimport yaml\nfrom ktem.app import BasePage\nfrom ktem.utils.file import YAMLNoDateSafeLoader\nfrom theflow.utils.modules import deserialize\n\nfrom .manager import embedding_models_manager\n\n\ndef format_description(cls):\n    params = cls.describe()[\"params\"]\n    params_lines = [\"| Name | Type | Description |\", \"| --- | --- | --- |\"]\n    for key, value in params.items():\n        if isinstance(value[\"auto_callback\"], str):\n            continue\n        params_lines.append(f\"| {key} | {value['type']} | {value['help']} |\")\n    return f\"{cls.__doc__}\\n\\n\" + \"\\n\".join(params_lines)\n\n\nclass EmbeddingManagement(BasePage):\n    def __init__(self, app):\n        self._app = app\n        self.spec_desc_default = (\n            \"# Spec description\\n\\nSelect a model to view the spec description.\"\n        )\n        self.on_building_ui()\n\n    def on_building_ui(self):\n        with gr.Tab(label=\"View\"):\n            self.emb_list = gr.DataFrame(\n                headers=[\"name\", \"vendor\", \"default\"],\n                interactive=False,\n                column_widths=[30, 40, 30],\n            )\n\n            with gr.Column(visible=False) as self._selected_panel:\n                self.selected_emb_name = gr.Textbox(value=\"\", visible=False)\n                with gr.Row():\n                    with gr.Column():\n                        self.edit_default = gr.Checkbox(\n                            label=\"Set default\",\n                            info=(\n                                \"Set this Embedding model as default. This default \"\n                                \"Embedding will be used by other components by default \"\n                                \"if no Embedding is specified for such components.\"\n                            ),\n                        )\n                        self.edit_name = gr.Textbox(\n                            label=\"Name\",\n                            info=\"Edit to rename this Embedding model.\",\n                        )\n                        self.edit_spec = gr.Textbox(\n                            label=\"Specification\",\n                            info=\"Specification of the Embedding model in YAML format\",\n                            lines=10,\n                        )\n\n                        with gr.Accordion(\n                            label=\"Test connection\", visible=False, open=False\n                        ) as self._check_connection_panel:\n                            with gr.Row():\n                                with gr.Column(scale=4):\n                                    self.connection_logs = gr.HTML(\n                                        \"Logs\",\n                                    )\n\n                                with gr.Column(scale=1):\n                                    self.btn_test_connection = gr.Button(\"Test\")\n\n                        with gr.Row(visible=False) as self._selected_panel_btn:\n                            with gr.Column():\n                                self.btn_edit_save = gr.Button(\n                                    \"Save\", min_width=10, variant=\"primary\"\n                                )\n                            with gr.Column():\n                                self.btn_delete = gr.Button(\n                                    \"Delete\", min_width=10, variant=\"stop\"\n                                )\n                                with gr.Row():\n                                    self.btn_delete_yes = gr.Button(\n                                        \"Confirm Delete\",\n                                        variant=\"stop\",\n                                        visible=False,\n                                        min_width=10,\n                                    )\n                                    self.btn_delete_no = gr.Button(\n                                        \"Cancel\", visible=False, min_width=10\n                                    )\n                            with gr.Column():\n                                self.btn_close = gr.Button(\"Close\", min_width=10)\n\n                    with gr.Column():\n                        self.edit_spec_desc = gr.Markdown(\"# Spec description\")\n\n        with gr.Tab(label=\"Add\"):\n            with gr.Row():\n                with gr.Column(scale=2):\n                    self.name = gr.Textbox(\n                        label=\"Name\",\n                        info=(\n                            \"Must be unique and non-empty. \"\n                            \"The name will be used to identify the embedding model.\"\n                        ),\n                    )\n                    self.emb_choices = gr.Dropdown(\n                        label=\"Vendors\",\n                        info=(\n                            \"Choose the vendor of the Embedding model. Each vendor \"\n                            \"has different specification.\"\n                        ),\n                    )\n                    self.spec = gr.Textbox(\n                        label=\"Specification\",\n                        info=\"Specification of the Embedding model in YAML format.\",\n                    )\n                    self.default = gr.Checkbox(\n                        label=\"Set default\",\n                        info=(\n                            \"Set this Embedding model as default. This default \"\n                            \"Embedding will be used by other components by default \"\n                            \"if no Embedding is specified for such components.\"\n                        ),\n                    )\n                    self.btn_new = gr.Button(\"Add\", variant=\"primary\")\n\n                with gr.Column(scale=3):\n                    self.spec_desc = gr.Markdown(self.spec_desc_default)\n\n    def _on_app_created(self):\n        \"\"\"Called when the app is created\"\"\"\n        self._app.app.load(\n            self.list_embeddings,\n            inputs=[],\n            outputs=[self.emb_list],\n        )\n        self._app.app.load(\n            lambda: gr.update(choices=list(embedding_models_manager.vendors().keys())),\n            outputs=[self.emb_choices],\n        )\n\n    def on_emb_vendor_change(self, vendor):\n        vendor = embedding_models_manager.vendors()[vendor]\n\n        required: dict = {}\n        desc = vendor.describe()\n        for key, value in desc[\"params\"].items():\n            if value.get(\"required\", False):\n                required[key] = value.get(\"default\", None)\n\n        return yaml.dump(required), format_description(vendor)\n\n    def on_register_events(self):\n        self.emb_choices.select(\n            self.on_emb_vendor_change,\n            inputs=[self.emb_choices],\n            outputs=[self.spec, self.spec_desc],\n        )\n        self.btn_new.click(\n            self.create_emb,\n            inputs=[self.name, self.emb_choices, self.spec, self.default],\n            outputs=None,\n        ).success(self.list_embeddings, inputs=[], outputs=[self.emb_list]).success(\n            lambda: (\"\", None, \"\", False, self.spec_desc_default),\n            outputs=[\n                self.name,\n                self.emb_choices,\n                self.spec,\n                self.default,\n                self.spec_desc,\n            ],\n        )\n        self.emb_list.select(\n            self.select_emb,\n            inputs=self.emb_list,\n            outputs=[self.selected_emb_name],\n            show_progress=\"hidden\",\n        )\n        self.selected_emb_name.change(\n            self.on_selected_emb_change,\n            inputs=[self.selected_emb_name],\n            outputs=[\n                self._selected_panel,\n                self._selected_panel_btn,\n                # delete section\n                self.btn_delete,\n                self.btn_delete_yes,\n                self.btn_delete_no,\n                # edit section\n                self.edit_name,\n                self.edit_spec,\n                self.edit_spec_desc,\n                self.edit_default,\n            ],\n            show_progress=\"hidden\",\n        ).success(lambda: gr.update(value=\"\"), outputs=[self.connection_logs])\n\n        self.btn_delete.click(\n            self.on_btn_delete_click,\n            inputs=[],\n            outputs=[self.btn_delete, self.btn_delete_yes, self.btn_delete_no],\n            show_progress=\"hidden\",\n        )\n        self.btn_delete_yes.click(\n            self.delete_emb,\n            inputs=[self.selected_emb_name],\n            outputs=[self.selected_emb_name],\n            show_progress=\"hidden\",\n        ).then(\n            self.list_embeddings,\n            inputs=[],\n            outputs=[self.emb_list],\n        )\n        self.btn_delete_no.click(\n            lambda: (\n                gr.update(visible=True),\n                gr.update(visible=False),\n                gr.update(visible=False),\n            ),\n            inputs=[],\n            outputs=[self.btn_delete, self.btn_delete_yes, self.btn_delete_no],\n            show_progress=\"hidden\",\n        )\n        self.btn_edit_save.click(\n            self.save_emb,\n            inputs=[\n                self.selected_emb_name,\n                self.edit_name,\n                self.edit_default,\n                self.edit_spec,\n            ],\n            outputs=[self.selected_emb_name],\n            show_progress=\"hidden\",\n        ).then(\n            self.list_embeddings,\n            inputs=[],\n            outputs=[self.emb_list],\n        )\n        self.btn_close.click(\n            lambda: \"\",\n            outputs=[self.selected_emb_name],\n        )\n\n        self.btn_test_connection.click(\n            self.check_connection,\n            inputs=[self.selected_emb_name, self.edit_spec],\n            outputs=[self.connection_logs],\n        )\n\n    def create_emb(self, name, choices, spec, default):\n        try:\n            name = name.strip()\n            spec = yaml.load(spec, Loader=YAMLNoDateSafeLoader)\n            spec[\"__type__\"] = (\n                embedding_models_manager.vendors()[choices].__module__\n                + \".\"\n                + embedding_models_manager.vendors()[choices].__qualname__\n            )\n\n            embedding_models_manager.add(name, spec=spec, default=default)\n            gr.Info(f'Embedding model \"{name}\" created successfully')\n        except ValueError as e:\n            raise gr.Error(str(e))\n        except Exception as e:\n            raise gr.Error(f\"Failed to create Embedding model '{name}': {e}\")\n\n    def list_embeddings(self):\n        \"\"\"List the Embedding models\"\"\"\n        items = []\n        for item in embedding_models_manager.info().values():\n            record = {}\n            record[\"name\"] = item[\"name\"]\n            record[\"vendor\"] = item[\"spec\"].get(\"__type__\", \"-\").split(\".\")[-1]\n            record[\"default\"] = item[\"default\"]\n            items.append(record)\n\n        if items:\n            emb_list = pd.DataFrame.from_records(items)\n        else:\n            emb_list = pd.DataFrame.from_records(\n                [{\"name\": \"-\", \"vendor\": \"-\", \"default\": \"-\"}]\n            )\n\n        return emb_list\n\n    def select_emb(self, emb_list, ev: gr.SelectData):\n        if ev.value == \"-\" and ev.index[0] == 0:\n            gr.Info(\"No embedding model is loaded. Please add first\")\n            return \"\"\n\n        if not ev.selected:\n            return \"\"\n\n        return emb_list[\"name\"][ev.index[0]]\n\n    def on_selected_emb_change(self, selected_emb_name):\n        if selected_emb_name == \"\":\n            _selected_panel = gr.update(visible=False)\n            _selected_panel_btn = gr.update(visible=False)\n            btn_delete = gr.update(visible=True)\n            btn_delete_yes = gr.update(visible=False)\n            btn_delete_no = gr.update(visible=False)\n            edit_name = gr.update(value=\"\")\n            edit_spec = gr.update(value=\"\")\n            edit_spec_desc = gr.update(value=\"\")\n            edit_default = gr.update(value=False)\n        else:\n            _selected_panel = gr.update(visible=True)\n            _selected_panel_btn = gr.update(visible=True)\n            btn_delete = gr.update(visible=True)\n            btn_delete_yes = gr.update(visible=False)\n            btn_delete_no = gr.update(visible=False)\n\n            info = deepcopy(embedding_models_manager.info()[selected_emb_name])\n            vendor_str = info[\"spec\"].pop(\"__type__\", \"-\").split(\".\")[-1]\n            vendor = embedding_models_manager.vendors()[vendor_str]\n\n            edit_name = selected_emb_name\n            edit_spec = yaml.dump(info[\"spec\"])\n            edit_spec_desc = format_description(vendor)\n            edit_default = info[\"default\"]\n\n        return (\n            _selected_panel,\n            _selected_panel_btn,\n            btn_delete,\n            btn_delete_yes,\n            btn_delete_no,\n            edit_name,\n            edit_spec,\n            edit_spec_desc,\n            edit_default,\n        )\n\n    def on_btn_delete_click(self):\n        btn_delete = gr.update(visible=False)\n        btn_delete_yes = gr.update(visible=True)\n        btn_delete_no = gr.update(visible=True)\n\n        return btn_delete, btn_delete_yes, btn_delete_no\n\n    def check_connection(self, selected_emb_name, selected_spec):\n        log_content: str = \"\"\n        try:\n            log_content += f\"- Testing model: {selected_emb_name}<br>\"\n            yield log_content\n\n            # Parse content & init model\n            info = deepcopy(embedding_models_manager.info()[selected_emb_name])\n\n            # Parse content & create dummy embedding\n            spec = yaml.load(selected_spec, Loader=YAMLNoDateSafeLoader)\n            info[\"spec\"].update(spec)\n\n            emb = deserialize(info[\"spec\"], safe=False)\n\n            if emb is None:\n                raise Exception(f\"Can not found model: {selected_emb_name}\")\n\n            log_content += \"- Sending a message `Hi`<br>\"\n            yield log_content\n            _ = emb(\"Hi\")\n\n            log_content += (\n                \"<mark style='background: green; color: white'>- Connection success. \"\n                \"</mark><br>\"\n            )\n            yield log_content\n\n            gr.Info(f\"Embedding {selected_emb_name} connect successfully\")\n        except Exception as e:\n            print(e)\n            log_content += (\n                f\"<mark style='color: yellow; background: red'>- Connection failed. \"\n                f\"Got error:\\n {str(e)}</mark>\"\n            )\n            yield log_content\n\n        return log_content\n\n    def save_emb(self, selected_emb_name, edit_name, default, spec):\n        try:\n            new_name = edit_name.strip()\n            spec = yaml.load(spec, Loader=YAMLNoDateSafeLoader)\n            spec[\"__type__\"] = embedding_models_manager.info()[selected_emb_name][\n                \"spec\"\n            ][\"__type__\"]\n            embedding_models_manager.update(\n                selected_emb_name, spec=spec, default=default, new_name=new_name\n            )\n            final_name = (\n                new_name if new_name != selected_emb_name else selected_emb_name\n            )\n            gr.Info(f'Embedding model \"{final_name}\" saved successfully')\n            return final_name\n        except ValueError as e:\n            raise gr.Error(str(e))\n        except Exception as e:\n            raise gr.Error(f'Failed to save Embedding model \"{selected_emb_name}\": {e}')\n\n    def delete_emb(self, selected_emb_name):\n        try:\n            embedding_models_manager.delete(selected_emb_name)\n        except Exception as e:\n            gr.Error(f'Failed to delete Embedding model \"{selected_emb_name}\": {e}')\n            return selected_emb_name\n\n        return \"\"\n"
  },
  {
    "path": "libs/ktem/ktem/exceptions.py",
    "content": "class KHException(Exception):\n    pass\n\n\nclass HookNotDeclared(KHException):\n    pass\n\n\nclass HookAlreadyDeclared(KHException):\n    pass\n"
  },
  {
    "path": "libs/ktem/ktem/extension_protocol.py",
    "content": "import pluggy\n\nhookspec = pluggy.HookspecMarker(\"ktem\")\nhookimpl = pluggy.HookimplMarker(\"ktem\")\n\n\n@hookspec\ndef ktem_declare_extensions() -> dict:  # type: ignore\n    \"\"\"Called before the run() function is executed.\n\n    This hook is called without any arguments, and should return a dictionary.\n    The dictionary has the following structure:\n\n        ```\n        {\n            \"id\": str,      # cannot contain . or /\n            \"name\": str,    # human-friendly name of the plugin\n            \"version\": str,\n            \"support_host\": str,\n            \"functionality\": {\n                \"reasoning\": {\n                    id: {                         # cannot contain . or /\n                        \"name\": str,\n                        \"callbacks\": {},\n                        \"settings\": {},\n                    },\n                },\n                \"index\": {\n                    \"name\": str,\n                    \"callbacks\": {\n                        \"get_index_pipeline\": callable,\n                        \"get_retrievers\": {name: callable}\n                    },\n                    \"settings\": {},\n                },\n            },\n        }\n        ```\n    \"\"\"\n"
  },
  {
    "path": "libs/ktem/ktem/index/__init__.py",
    "content": "from .manager import IndexManager\n\n__all__ = [\"IndexManager\"]\n"
  },
  {
    "path": "libs/ktem/ktem/index/base.py",
    "content": "import abc\nimport logging\nfrom typing import TYPE_CHECKING, Any, Optional\n\nif TYPE_CHECKING:\n    from ktem.app import BasePage\n\n    from kotaemon.base import BaseComponent\n\n\nlogger = logging.getLogger(__name__)\n\n\nclass BaseIndex(abc.ABC):\n    \"\"\"The base class for the index\n\n    The index is responsible for storing information in a searchable manner, and\n    retrieving that information.\n\n    An application can have multiple indices. For example:\n        - An index of files locally in the computer\n        - An index of chat messages on Discord, Slack, etc.\n        - An index of files stored on Google Drie, Dropbox, etc.\n        - ...\n\n    User can create, delete, and manage the indices in this application. They\n    can create an index, set it to track a local folder in their computer, and\n    then the chatbot can search for files in that folder. The user can create\n    another index to track their chat messages on Discords. And so on.\n\n    This class defines the interface for the index. It concerns with:\n        - Setting up the necessary software infrastructure for the index to work\n        (e.g. database table, vector store collection, etc.).\n        - Providing the UI for user interaction with the index, including settings.\n\n    Methods:\n\n        __init__: initiate any resource definition required for the index to work\n            (e.g. database table, vector store collection, etc.).\n        on_create: called only once, when the user creates the index.\n        on_delete: called only once, when the user deletes the index.\n        on_start: called when the index starts.\n        get_selector_component_ui: return the UI component to select the entities in\n            the Chat page. Called in the ChatUI page.\n        get_index_page_ui: return the index page UI to manage the entities. Called in\n            the main application UI page.\n        get_user_settings: return default user settings. Called only when the app starts\n        get_admin_settings: return the admin settings. Called only when the user\n            creates the index (for the admin to customize it). The output will be\n            stored in the Index's config.\n        get_indexing_pipeline: return the indexing pipeline when the entities are\n            populated into the index\n        get_retriever_pipelines: return the retriever pipelines when the user chat\n    \"\"\"\n\n    def __init__(self, app, id, name, config):\n        self._app = app\n        self.id = id\n        self.name = name\n        self.config = config  # admin settings\n\n    def on_create(self):\n        \"\"\"Create the index for the first time\"\"\"\n\n    def on_delete(self):\n        \"\"\"Trigger when the user delete the index\"\"\"\n\n    def on_start(self):\n        \"\"\"Trigger when the index start\n\n        Args:\n            id (int): the id of the index\n            name (str): the name of the index\n            config (dict): the config of the index\n        \"\"\"\n\n    def get_selector_component_ui(self) -> Optional[\"BasePage\"]:\n        \"\"\"The UI component to select the entities in the Chat page\"\"\"\n        return None\n\n    def get_index_page_ui(self) -> Optional[\"BasePage\"]:\n        \"\"\"The index page UI to manage the entities\"\"\"\n        return None\n\n    @classmethod\n    def get_user_settings(cls) -> dict:\n        \"\"\"Return default user settings. These are the runtime settings.\n\n        The settings will be populated in the user settings page. And will be used\n        when initiating the indexing & retriever pipelines.\n\n        Returns:\n            dict: user settings in the dictionary format of\n                `ktem.settings.SettingItem`\n        \"\"\"\n        return {}\n\n    @classmethod\n    def get_admin_settings(cls) -> dict:\n        \"\"\"Return the default admin settings. These are the build-time settings.\n\n        The settings will be populated in the admin settings page. And will be used\n        when initiating the indexing & retriever pipelines.\n\n        Returns:\n            dict: user settings in the dictionary format of\n                `ktem.settings.SettingItem`\n        \"\"\"\n        return {}\n\n    @abc.abstractmethod\n    def get_indexing_pipeline(\n        self, settings: dict, user_id: Optional[int]\n    ) -> \"BaseComponent\":\n        \"\"\"Return the indexing pipeline that populates the entities into the index\n\n        Args:\n            settings: the user settings of the index\n            user_id: the user id who is accessing the index\n                TODO: instead of having a user_id, should have an app_state\n                which might also contain the settings.\n\n        Returns:\n            BaseIndexing: the indexing pipeline\n        \"\"\"\n        ...\n\n    def get_retriever_pipelines(\n        self, settings: dict, user_id: int, selected: Any = None\n    ) -> list[\"BaseComponent\"]:\n        \"\"\"Return the retriever pipelines to retrieve the entity from the index\"\"\"\n        return []\n"
  },
  {
    "path": "libs/ktem/ktem/index/file/__init__.py",
    "content": "from .index import FileIndex\n\n__all__ = [\"FileIndex\"]\n"
  },
  {
    "path": "libs/ktem/ktem/index/file/base.py",
    "content": "from pathlib import Path\nfrom typing import Generator, Optional\n\nfrom kotaemon.base import BaseComponent, Document, Param\n\n\nclass BaseFileIndexRetriever(BaseComponent):\n\n    Source = Param(help=\"The SQLAlchemy Source table\")\n    Index = Param(help=\"The SQLAlchemy Index table\")\n    VS = Param(help=\"The VectorStore\")\n    DS = Param(help=\"The DocStore\")\n    FSPath = Param(help=\"The file storage path\")\n    user_id = Param(help=\"The user id\")\n\n    @classmethod\n    def get_user_settings(cls) -> dict:\n        \"\"\"Get the user settings for indexing\n\n        Returns:\n            dict: user settings in the dictionary format of\n                `ktem.settings.SettingItem`\n        \"\"\"\n        return {}\n\n    @classmethod\n    def get_pipeline(\n        cls,\n        user_settings: dict,\n        index_settings: dict,\n        selected: Optional[list] = None,\n    ) -> \"BaseFileIndexRetriever\":\n        raise NotImplementedError\n\n\nclass BaseFileIndexIndexing(BaseComponent):\n    \"\"\"The pipeline to index information into the data store\n\n    You should define the following method:\n        - run(self, file_paths): run the indexing given the pipeline\n        - get_pipeline(cls, user_settings, index_settings): return the\n          fully-initialized pipeline, ready to be used by ktem.\n\n    You will have access to the following resources:\n        - self._Source: the source table\n        - self._Index: the index table\n        - self._VS: the vector store\n        - self._DS: the docstore\n    \"\"\"\n\n    Source = Param(help=\"The SQLAlchemy Source table\")\n    Index = Param(help=\"The SQLAlchemy Index table\")\n    VS = Param(help=\"The VectorStore\")\n    DS = Param(help=\"The DocStore\")\n    FSPath = Param(help=\"The file storage path\")\n    user_id = Param(help=\"The user id\")\n    private = Param(False, help=\"Whether this is private index\")\n    chunk_size = Param(help=\"Chunk size for this index\")\n    chunk_overlap = Param(help=\"Chunk overlap for this index\")\n\n    def run(\n        self, file_paths: str | Path | list[str | Path], *args, **kwargs\n    ) -> tuple[list[str | None], list[str | None]]:\n        \"\"\"Run the indexing pipeline\n\n        Args:\n            file_paths (str | Path | list[str | Path]): the file paths to index\n\n        Returns:\n            - the indexed file ids (each file id corresponds to an input file path, or\n                None if the indexing failed for that file path)\n            - the error messages (each error message corresponds to an input file path,\n                or None if the indexing was successful for that file path)\n        \"\"\"\n        raise NotImplementedError\n\n    def stream(\n        self, file_paths: str | Path | list[str | Path], *args, **kwargs\n    ) -> Generator[\n        Document, None, tuple[list[str | None], list[str | None], list[Document]]\n    ]:\n        \"\"\"Stream the indexing pipeline\n\n        Args:\n            file_paths (str | Path | list[str | Path]): the file paths to index\n\n        Yields:\n            Document: the output message to the UI, must have channel == index or debug\n\n        Returns:\n            - the indexed file ids (each file id corresponds to an input file path, or\n                None if the indexing failed for that file path)\n            - the error messages (each error message corresponds to an input file path,\n                or None if the indexing was successful for that file path)\n            - the indexed documents in form of list[Documents]\n        \"\"\"\n        raise NotImplementedError\n\n    @classmethod\n    def get_pipeline(\n        cls, user_settings: dict, index_settings: dict\n    ) -> \"BaseFileIndexIndexing\":\n        raise NotImplementedError\n\n    @classmethod\n    def get_user_settings(cls) -> dict:\n        \"\"\"Get the user settings for indexing\n\n        Returns:\n            dict: user settings in the dictionary format of\n                `ktem.settings.SettingItem`\n        \"\"\"\n        return {}\n\n    def copy_to_filestorage(\n        self, file_paths: str | Path | list[str | Path]\n    ) -> list[str]:\n        \"\"\"Copy to file storage and return the new path, relative to the file storage\n\n        Args:\n            file_path: the file path to copy\n\n        Returns:\n            the new file paths, relative to the file storage\n        \"\"\"\n        import shutil\n        from hashlib import sha256\n\n        if not isinstance(file_paths, list):\n            file_paths = [file_paths]\n\n        paths = []\n        for file_path in file_paths:\n            with open(file_path, \"rb\") as f:\n                paths.append(sha256(f.read()).hexdigest())\n            shutil.copy(file_path, self.FSPath / paths[-1])\n\n        return paths\n\n    def get_filestorage_path(self, rel_paths: str | list[str]) -> list[str]:\n        \"\"\"Get the file storage path for the relative path\n\n        Args:\n            rel_paths: the relative path to the file storage\n\n        Returns:\n            the absolute file storage path to the file\n        \"\"\"\n        raise NotImplementedError\n\n    def warning(self, msg):\n        \"\"\"Log a warning message\n\n        Args:\n            msg: the message to log\n        \"\"\"\n        print(msg)\n\n    def rebuild_index(self):\n        \"\"\"Rebuild the index\"\"\"\n        raise NotImplementedError\n"
  },
  {
    "path": "libs/ktem/ktem/index/file/exceptions.py",
    "content": "from ktem.exceptions import KHException\n\n\nclass FileExistsError(KHException):\n    pass\n"
  },
  {
    "path": "libs/ktem/ktem/index/file/graph/__init__.py",
    "content": "from .graph_index import GraphRAGIndex\nfrom .light_graph_index import LightRAGIndex\nfrom .nano_graph_index import NanoGraphRAGIndex\n\n__all__ = [\"GraphRAGIndex\", \"NanoGraphRAGIndex\", \"LightRAGIndex\"]\n"
  },
  {
    "path": "libs/ktem/ktem/index/file/graph/graph_index.py",
    "content": "from typing import Any\n\nfrom ktem.index.file import FileIndex\n\nfrom ..base import BaseFileIndexIndexing, BaseFileIndexRetriever\nfrom .pipelines import GraphRAGIndexingPipeline, GraphRAGRetrieverPipeline\n\n\nclass GraphRAGIndex(FileIndex):\n    def _setup_indexing_cls(self):\n        self._indexing_pipeline_cls = GraphRAGIndexingPipeline\n\n    def _setup_retriever_cls(self):\n        self._retriever_pipeline_cls = [GraphRAGRetrieverPipeline]\n\n    def get_indexing_pipeline(self, settings, user_id) -> BaseFileIndexIndexing:\n        \"\"\"Define the interface of the indexing pipeline\"\"\"\n\n        obj = super().get_indexing_pipeline(settings, user_id)\n        # disable vectorstore for this kind of Index\n        obj.VS = None\n\n        return obj\n\n    def get_retriever_pipelines(\n        self, settings: dict, user_id: int, selected: Any = None\n    ) -> list[\"BaseFileIndexRetriever\"]:\n        file_ids = self._selector_ui.get_selected_ids(selected)\n        retrievers = [\n            GraphRAGRetrieverPipeline(\n                file_ids=file_ids,\n                Index=self._resources[\"Index\"],\n            )\n        ]\n\n        return retrievers\n"
  },
  {
    "path": "libs/ktem/ktem/index/file/graph/light_graph_index.py",
    "content": "from typing import Any, Optional\nfrom uuid import uuid4\n\nfrom ktem.db.engine import engine\nfrom sqlalchemy.orm import Session\n\nfrom ..base import BaseFileIndexIndexing, BaseFileIndexRetriever\nfrom .graph_index import GraphRAGIndex\nfrom .lightrag_pipelines import LightRAGIndexingPipeline, LightRAGRetrieverPipeline\n\n\nclass LightRAGIndex(GraphRAGIndex):\n    def __init__(self, app, id: int, name: str, config: dict):\n        super().__init__(app, id, name, config)\n        self._collection_graph_id: Optional[str] = None\n\n    def _setup_indexing_cls(self):\n        self._indexing_pipeline_cls = LightRAGIndexingPipeline\n\n    def _setup_retriever_cls(self):\n        self._retriever_pipeline_cls = [LightRAGRetrieverPipeline]\n\n    def _get_or_create_collection_graph_id(self):\n        if self._collection_graph_id:\n            return self._collection_graph_id\n\n        # Try to find existing graph ID for this collection\n        with Session(engine) as session:\n            result = (\n                session.query(self._resources[\"Index\"].target_id)  # type: ignore\n                .filter(\n                    self._resources[\"Index\"].relation_type == \"graph\"  # type: ignore\n                )\n                .first()\n            )\n            if result:\n                self._collection_graph_id = result[0]\n            else:\n                self._collection_graph_id = str(uuid4())\n        return self._collection_graph_id\n\n    def get_indexing_pipeline(self, settings, user_id) -> BaseFileIndexIndexing:\n        pipeline = super().get_indexing_pipeline(settings, user_id)\n        # indexing settings\n        prefix = f\"index.options.{self.id}.\"\n        striped_settings = {\n            key[len(prefix) :]: value\n            for key, value in settings.items()\n            if key.startswith(prefix)\n        }\n        # set the prompts\n        pipeline.prompts = striped_settings\n        # set collection graph id\n        pipeline.collection_graph_id = self._get_or_create_collection_graph_id()\n        # set index batch size\n        pipeline.index_batch_size = striped_settings.get(\n            \"batch_size\", pipeline.index_batch_size\n        )\n        return pipeline\n\n    def get_retriever_pipelines(\n        self, settings: dict, user_id: int, selected: Any = None\n    ) -> list[\"BaseFileIndexRetriever\"]:\n        file_ids = self._selector_ui.get_selected_ids(selected)\n        # retrieval settings\n        prefix = f\"index.options.{self.id}.\"\n        search_type = settings.get(prefix + \"search_type\", \"local\")\n\n        retrievers = [\n            LightRAGRetrieverPipeline(\n                file_ids=file_ids,\n                Index=self._resources[\"Index\"],\n                search_type=search_type,\n            )\n        ]\n\n        return retrievers\n"
  },
  {
    "path": "libs/ktem/ktem/index/file/graph/lightrag_pipelines.py",
    "content": "import asyncio\nimport glob\nimport logging\nimport os\nimport re\nfrom pathlib import Path\nfrom typing import Generator\n\nimport numpy as np\nimport pandas as pd\nfrom ktem.db.models import engine\nfrom ktem.embeddings.manager import embedding_models_manager as embeddings\nfrom ktem.llms.manager import llms\nfrom sqlalchemy.orm import Session\nfrom tenacity import (\n    retry,\n    retry_if_exception_type,\n    stop_after_attempt,\n    wait_exponential,\n)\nfrom theflow.settings import settings\n\nfrom kotaemon.base import Document, Param, RetrievedDocument\nfrom kotaemon.base.schema import AIMessage, HumanMessage, SystemMessage\n\nfrom ..pipelines import BaseFileIndexRetriever\nfrom .pipelines import GraphRAGIndexingPipeline\nfrom .visualize import create_knowledge_graph, visualize_graph\n\ntry:\n    from lightrag import LightRAG, QueryParam\n\n    # newer versions of LightRAG needs to be initialized before using\n    from lightrag.kg.shared_storage import initialize_pipeline_status\n    from lightrag.operate import (\n        _find_most_related_edges_from_entities,\n        _find_most_related_text_unit_from_entities,\n    )\n    from lightrag.utils import EmbeddingFunc, compute_args_hash\n\nexcept ImportError:\n    print(\n        (\n            \"LightRAG dependencies not installed. \"\n            \"Try `pip install git+https://github.com/HKUDS/LightRAG.git` to install. \"\n            \"LighthRAG retriever pipeline will not work properly.\"\n        )\n    )\n\n\nlogging.getLogger(\"lightrag\").setLevel(logging.INFO)\n\n\nfilestorage_path = Path(settings.KH_FILESTORAGE_PATH) / \"lightrag\"\nfilestorage_path.mkdir(parents=True, exist_ok=True)\n\nINDEX_BATCHSIZE = 4\n\n\ndef get_llm_func(model):\n    @retry(\n        stop=stop_after_attempt(3),\n        wait=wait_exponential(multiplier=1, min=4, max=10),\n        retry=retry_if_exception_type((Exception,)),\n        after=lambda retry_state: logging.warning(\n            f\"LLM API call attempt {retry_state.attempt_number} failed. Retrying...\"\n        ),\n    )\n    async def _call_model(model, input_messages):\n        return (await model.ainvoke(input_messages)).text\n\n    async def llm_func(\n        prompt, system_prompt=None, history_messages=[], **kwargs\n    ) -> str:\n        input_messages = [SystemMessage(text=system_prompt)] if system_prompt else []\n\n        hashing_kv = kwargs.pop(\"hashing_kv\", None)\n        if history_messages:\n            for msg in history_messages:\n                if msg.get(\"role\") == \"user\":\n                    input_messages.append(HumanMessage(text=msg[\"content\"]))\n                else:\n                    input_messages.append(AIMessage(text=msg[\"content\"]))\n\n        input_messages.append(HumanMessage(text=prompt))\n\n        if hashing_kv is not None:\n            args_hash = compute_args_hash(\"model\", input_messages)\n            if_cache_return = await hashing_kv.get_by_id(args_hash)\n            if if_cache_return is not None:\n                return if_cache_return[\"return\"]\n\n        try:\n            output = await _call_model(model, input_messages)\n        except Exception as e:\n            logging.error(f\"Failed to call LLM API after 3 retries: {str(e)}\")\n            raise\n\n        print(\"-\" * 50)\n        print(output, \"\\n\", \"-\" * 50)\n\n        if hashing_kv is not None:\n            await hashing_kv.upsert({args_hash: {\"return\": output, \"model\": \"model\"}})\n\n        return output\n\n    return llm_func\n\n\ndef get_embedding_func(model):\n    async def embedding_func(texts: list[str]) -> np.ndarray:\n        outputs = model(texts)\n        embedding_outputs = np.array([doc.embedding for doc in outputs])\n\n        return embedding_outputs\n\n    return embedding_func\n\n\ndef get_default_models_wrapper():\n    # setup model functions\n    default_embedding = embeddings.get_default()\n    default_embedding_dim = len(default_embedding([\"Hi\"])[0].embedding)\n    embedding_func = EmbeddingFunc(\n        embedding_dim=default_embedding_dim,\n        max_token_size=8192,\n        func=get_embedding_func(default_embedding),\n    )\n    print(\"GraphRAG embedding dim\", default_embedding_dim)\n\n    default_llm = llms.get_default()\n    llm_func = get_llm_func(default_llm)\n\n    return llm_func, embedding_func, default_llm, default_embedding\n\n\ndef prepare_graph_index_path(graph_id: str):\n    root_path = Path(filestorage_path) / graph_id\n    input_path = root_path / \"input\"\n\n    return root_path, input_path\n\n\ndef list_of_list_to_df(data: list[list]) -> pd.DataFrame:\n    df = pd.DataFrame(data[1:], columns=data[0])\n    return df\n\n\ndef clean_quote(input: str) -> str:\n    return re.sub(r\"[\\\"']\", \"\", input)\n\n\nasync def lightrag_build_local_query_context(\n    graph_func,\n    query,\n    query_param,\n):\n    knowledge_graph_inst = graph_func.chunk_entity_relation_graph\n    entities_vdb = graph_func.entities_vdb\n    text_chunks_db = graph_func.text_chunks\n\n    results = await entities_vdb.query(query, top_k=query_param.top_k)\n    if not len(results):\n        raise ValueError(\"No results found\")\n\n    node_datas = await asyncio.gather(\n        *[knowledge_graph_inst.get_node(r[\"entity_name\"]) for r in results]\n    )\n    node_degrees = await asyncio.gather(\n        *[knowledge_graph_inst.node_degree(r[\"entity_name\"]) for r in results]\n    )\n    node_datas = [\n        {**n, \"entity_name\": k[\"entity_name\"], \"rank\": d}\n        for k, n, d in zip(results, node_datas, node_degrees)\n        if n is not None\n    ]\n\n    try:\n        use_text_units = await _find_most_related_text_unit_from_entities(\n            node_datas, query_param, text_chunks_db, knowledge_graph_inst\n        )\n    except Exception:\n        use_text_units = []\n\n    try:\n        use_relations = await _find_most_related_edges_from_entities(\n            node_datas, query_param, knowledge_graph_inst\n        )\n    except Exception:\n        use_relations = []\n\n    logging.info(\n        f\"Local query uses {len(node_datas)} entities, \"\n        f\"{len(use_relations)} relations, {len(use_text_units)} text units\"\n    )\n\n    entites_section_list = [[\"id\", \"entity\", \"type\", \"description\", \"rank\"]]\n    for i, n in enumerate(node_datas):\n        entites_section_list.append(\n            [\n                str(i),\n                clean_quote(n[\"entity_name\"]),\n                n.get(\"entity_type\", \"UNKNOWN\"),\n                clean_quote(n.get(\"description\", \"UNKNOWN\")),\n                n[\"rank\"],\n            ]\n        )\n    entities_df = list_of_list_to_df(entites_section_list)\n\n    relations_section_list = [\n        [\"id\", \"source\", \"target\", \"description\", \"keywords\", \"weight\", \"rank\"]\n    ]\n    for i, e in enumerate(use_relations):\n        relations_section_list.append(\n            [\n                str(i),\n                clean_quote(e[\"src_tgt\"][0]),\n                clean_quote(e[\"src_tgt\"][1]),\n                clean_quote(e[\"description\"]),\n                e[\"keywords\"],\n                e[\"weight\"],\n                e[\"rank\"],\n            ]\n        )\n    relations_df = list_of_list_to_df(relations_section_list)\n\n    text_units_section_list = [[\"id\", \"content\"]]\n    for i, t in enumerate(use_text_units):\n        text_units_section_list.append([str(i), t[\"content\"]])\n    sources_df = list_of_list_to_df(text_units_section_list)\n\n    return entities_df, relations_df, sources_df\n\n\ndef build_graphrag(working_dir, llm_func, embedding_func):\n    graphrag_func = LightRAG(\n        working_dir=working_dir,\n        llm_model_func=llm_func,\n        embedding_func=embedding_func,\n    )\n\n    # newer versions of LightRAG needs to be initialized before using\n    asyncio.run(graphrag_func.initialize_storages())\n    asyncio.run(initialize_pipeline_status())\n\n    return graphrag_func\n\n\nclass LightRAGIndexingPipeline(GraphRAGIndexingPipeline):\n    \"\"\"GraphRAG specific indexing pipeline\"\"\"\n\n    prompts: dict[str, str] = {}\n    collection_graph_id: str\n    index_batch_size: int = INDEX_BATCHSIZE\n\n    def store_file_id_with_graph_id(self, file_ids: list[str | None]):\n        if not settings.USE_GLOBAL_GRAPHRAG:\n            return super().store_file_id_with_graph_id(file_ids)\n\n        # Use the collection-wide graph ID for LightRAG\n        graph_id = self.collection_graph_id\n\n        # Record all files under this graph_id\n        with Session(engine) as session:\n            for file_id in file_ids:\n                if not file_id:\n                    continue\n                # Check if mapping already exists\n                existing = (\n                    session.query(self.Index)\n                    .filter(\n                        self.Index.source_id == file_id,\n                        self.Index.target_id == graph_id,\n                        self.Index.relation_type == \"graph\",\n                    )\n                    .first()\n                )\n                if not existing:\n                    node = self.Index(\n                        source_id=file_id,\n                        target_id=graph_id,\n                        relation_type=\"graph\",\n                    )\n                    session.add(node)\n            session.commit()\n\n        return graph_id\n\n    @classmethod\n    def get_user_settings(cls) -> dict:\n        try:\n            from lightrag.prompt import PROMPTS\n\n            blacklist_keywords = [\"default\", \"response\", \"process\"]\n            settings_dict = {\n                \"batch_size\": {\n                    \"name\": (\n                        \"Index batch size \" \"(reduce if you have rate limit issues)\"\n                    ),\n                    \"value\": INDEX_BATCHSIZE,\n                    \"component\": \"number\",\n                }\n            }\n            settings_dict.update(\n                {\n                    prompt_name: {\n                        \"name\": f\"Prompt for '{prompt_name}'\",\n                        \"value\": content,\n                        \"component\": \"text\",\n                    }\n                    for prompt_name, content in PROMPTS.items()\n                    if all(\n                        keyword not in prompt_name.lower()\n                        for keyword in blacklist_keywords\n                    )\n                    and isinstance(content, str)\n                }\n            )\n            return settings_dict\n        except ImportError as e:\n            print(e)\n            return {}\n\n    def call_graphrag_index(self, graph_id: str, docs: list[Document]):\n        from lightrag.prompt import PROMPTS\n\n        # modify the prompt if it is set in the settings\n        for prompt_name, content in self.prompts.items():\n            if prompt_name in PROMPTS:\n                PROMPTS[prompt_name] = content\n\n        _, input_path = prepare_graph_index_path(graph_id)\n        input_path.mkdir(parents=True, exist_ok=True)\n\n        (\n            llm_func,\n            embedding_func,\n            default_llm,\n            default_embedding,\n        ) = get_default_models_wrapper()\n        print(\n            f\"Indexing GraphRAG with LLM {default_llm} \"\n            f\"and Embedding {default_embedding}...\"\n        )\n\n        all_docs = [\n            doc.text\n            for doc in docs\n            if doc.metadata.get(\"type\", \"text\") == \"text\" and len(doc.text.strip()) > 0\n        ]\n\n        yield Document(\n            channel=\"debug\",\n            text=\"[GraphRAG] Creating/Updating index... This can take a long time.\",\n        )\n\n        # Check if graph already exists\n        graph_file = input_path / \"graph_chunk_entity_relation.graphml\"\n        is_incremental = graph_file.exists()\n\n        # Only clear cache if it's a new graph\n        if not is_incremental:\n            json_files = glob.glob(f\"{input_path}/*.json\")\n            for json_file in json_files:\n                os.remove(json_file)\n\n        # Initialize or load existing GraphRAG\n        graphrag_func = build_graphrag(\n            input_path,\n            llm_func=llm_func,\n            embedding_func=embedding_func,\n        )\n\n        total_docs = len(all_docs)\n        process_doc_count = 0\n        yield Document(\n            channel=\"debug\",\n            text=(\n                f\"[GraphRAG] {'Updating' if is_incremental else 'Creating'} index: \"\n                f\"{process_doc_count} / {total_docs} documents.\"\n            ),\n        )\n\n        for doc_id in range(0, len(all_docs), self.index_batch_size):\n            cur_docs = all_docs[doc_id : doc_id + self.index_batch_size]\n            combined_doc = \"\\n\".join(cur_docs)\n\n            # Use insert for incremental updates\n            graphrag_func.insert(combined_doc)\n            process_doc_count += len(cur_docs)\n            yield Document(\n                channel=\"debug\",\n                text=(\n                    f\"[GraphRAG] {'Updated' if is_incremental else 'Indexed'} \"\n                    f\"{process_doc_count} / {total_docs} documents.\"\n                ),\n            )\n\n        yield Document(\n            channel=\"debug\",\n            text=f\"[GraphRAG] {'Update' if is_incremental else 'Indexing'} finished.\",\n        )\n\n    def stream(\n        self, file_paths: str | Path | list[str | Path], reindex: bool = False, **kwargs\n    ) -> Generator[\n        Document, None, tuple[list[str | None], list[str | None], list[Document]]\n    ]:\n        file_ids, errors, all_docs = yield from super().stream(\n            file_paths, reindex=reindex, **kwargs\n        )\n\n        return file_ids, errors, all_docs\n\n\nclass LightRAGRetrieverPipeline(BaseFileIndexRetriever):\n    \"\"\"GraphRAG specific retriever pipeline\"\"\"\n\n    Index = Param(help=\"The SQLAlchemy Index table\")\n    file_ids: list[str] = []\n    search_type: str = \"local\"\n\n    @classmethod\n    def get_user_settings(cls) -> dict:\n        return {\n            \"search_type\": {\n                \"name\": \"Search type\",\n                \"value\": \"local\",\n                \"choices\": [\"local\", \"global\", \"hybrid\"],\n                \"component\": \"dropdown\",\n                \"info\": \"Whether to use local or global search in the graph.\",\n            }\n        }\n\n    def _build_graph_search(self):\n        file_id = self.file_ids[0]\n\n        # retrieve the graph_id from the index\n        with Session(engine) as session:\n            graph_id = (\n                session.query(self.Index.target_id)\n                .filter(self.Index.source_id == file_id)\n                .filter(self.Index.relation_type == \"graph\")\n                .first()\n            )\n            graph_id = graph_id[0] if graph_id else None\n            assert graph_id, f\"GraphRAG index not found for file_id: {file_id}\"\n\n        _, input_path = prepare_graph_index_path(graph_id)\n        input_path.mkdir(parents=True, exist_ok=True)\n\n        llm_func, embedding_func, _, _ = get_default_models_wrapper()\n        graphrag_func = build_graphrag(\n            input_path,\n            llm_func=llm_func,\n            embedding_func=embedding_func,\n        )\n        print(\"search_type\", self.search_type)\n        query_params = QueryParam(mode=self.search_type, only_need_context=True)\n\n        return graphrag_func, query_params\n\n    def _to_document(self, header: str, context_text: str) -> RetrievedDocument:\n        return RetrievedDocument(\n            text=context_text,\n            metadata={\n                \"file_name\": header,\n                \"type\": \"table\",\n                \"llm_trulens_score\": 1.0,\n            },\n            score=1.0,\n        )\n\n    def format_context_records(\n        self, entities, relationships, sources\n    ) -> list[RetrievedDocument]:\n        docs = []\n        context: str = \"\"\n\n        # entities current parsing error\n        header = \"<b>Entities</b>\\n\"\n        context = entities[[\"entity\", \"description\"]].to_markdown(index=False)\n        docs.append(self._to_document(header, context))\n\n        header = \"\\n<b>Relationships</b>\\n\"\n        context = relationships[[\"source\", \"target\", \"description\"]].to_markdown(\n            index=False\n        )\n        docs.append(self._to_document(header, context))\n\n        header = \"\\n<b>Sources</b>\\n\"\n        context = \"\"\n        for _, row in sources.iterrows():\n            title, content = row[\"id\"], row[\"content\"]\n            context += f\"\\n\\n<h5>Source <b>#{title}</b></h5>\\n\"\n            context += content\n        docs.append(self._to_document(header, context))\n\n        return docs\n\n    def plot_graph(self, relationships):\n        G = create_knowledge_graph(relationships)\n        plot = visualize_graph(G)\n        return plot\n\n    def run(\n        self,\n        text: str,\n    ) -> list[RetrievedDocument]:\n        if not self.file_ids:\n            return []\n\n        graphrag_func, query_params = self._build_graph_search()\n\n        # only local mode support graph visualization\n        if query_params.mode == \"local\":\n            entities, relationships, sources = asyncio.run(\n                lightrag_build_local_query_context(graphrag_func, text, query_params)\n            )\n            documents = self.format_context_records(entities, relationships, sources)\n            plot = self.plot_graph(relationships)\n            documents += [\n                RetrievedDocument(\n                    text=\"\",\n                    metadata={\n                        \"file_name\": \"GraphRAG\",\n                        \"type\": \"plot\",\n                        \"data\": plot,\n                    },\n                ),\n            ]\n        else:\n            context = graphrag_func.query(text, query_params)\n\n            # account for missing ``` for closing code block\n            context += \"\\n```\"\n\n            documents = [\n                RetrievedDocument(\n                    text=context,\n                    metadata={\n                        \"file_name\": \"GraphRAG {} Search\".format(\n                            query_params.mode.capitalize()\n                        ),\n                        \"type\": \"table\",\n                    },\n                )\n            ]\n\n        return documents\n"
  },
  {
    "path": "libs/ktem/ktem/index/file/graph/nano_graph_index.py",
    "content": "from typing import Any, Optional\nfrom uuid import uuid4\n\nfrom ktem.db.engine import engine\nfrom sqlalchemy.orm import Session\n\nfrom ..base import BaseFileIndexIndexing, BaseFileIndexRetriever\nfrom .graph_index import GraphRAGIndex\nfrom .nano_pipelines import NanoGraphRAGIndexingPipeline, NanoGraphRAGRetrieverPipeline\n\n\nclass NanoGraphRAGIndex(GraphRAGIndex):\n    def __init__(self, app, id: int, name: str, config: dict):\n        super().__init__(app, id, name, config)\n        self._collection_graph_id: Optional[str] = None\n\n    def _setup_indexing_cls(self):\n        self._indexing_pipeline_cls = NanoGraphRAGIndexingPipeline\n\n    def _setup_retriever_cls(self):\n        self._retriever_pipeline_cls = [NanoGraphRAGRetrieverPipeline]\n\n    def _get_or_create_collection_graph_id(self):\n        if self._collection_graph_id:\n            return self._collection_graph_id\n\n        # Try to find existing graph ID for this collection\n        with Session(engine) as session:\n            result = (\n                session.query(self._resources[\"Index\"].target_id)  # type: ignore\n                .filter(\n                    self._resources[\"Index\"].relation_type == \"graph\"  # type: ignore\n                )\n                .first()\n            )\n            if result:\n                self._collection_graph_id = result[0]\n            else:\n                self._collection_graph_id = str(uuid4())\n        return self._collection_graph_id\n\n    def get_indexing_pipeline(self, settings, user_id) -> BaseFileIndexIndexing:\n        pipeline = super().get_indexing_pipeline(settings, user_id)\n        # indexing settings\n        prefix = f\"index.options.{self.id}.\"\n        striped_settings = {\n            key[len(prefix) :]: value\n            for key, value in settings.items()\n            if key.startswith(prefix)\n        }\n        # set the prompts\n        pipeline.prompts = striped_settings\n        # set collection graph id\n        pipeline.collection_graph_id = self._get_or_create_collection_graph_id()\n        # set index batch size\n        pipeline.index_batch_size = striped_settings.get(\n            \"batch_size\", pipeline.index_batch_size\n        )\n        return pipeline\n\n    def get_retriever_pipelines(\n        self, settings: dict, user_id: int, selected: Any = None\n    ) -> list[\"BaseFileIndexRetriever\"]:\n        file_ids = self._selector_ui.get_selected_ids(selected)\n        # retrieval settings\n        prefix = f\"index.options.{self.id}.\"\n        search_type = settings.get(prefix + \"search_type\", \"local\")\n\n        retrievers = [\n            NanoGraphRAGRetrieverPipeline(\n                file_ids=file_ids,\n                Index=self._resources[\"Index\"],\n                search_type=search_type,\n            )\n        ]\n\n        return retrievers\n"
  },
  {
    "path": "libs/ktem/ktem/index/file/graph/nano_pipelines.py",
    "content": "import asyncio\nimport glob\nimport logging\nimport os\nimport re\nfrom pathlib import Path\nfrom typing import Generator\n\nimport numpy as np\nimport pandas as pd\nfrom ktem.db.models import engine\nfrom ktem.embeddings.manager import embedding_models_manager as embeddings\nfrom ktem.llms.manager import llms\nfrom sqlalchemy.orm import Session\nfrom tenacity import (\n    retry,\n    retry_if_exception_type,\n    stop_after_attempt,\n    wait_exponential,\n)\nfrom theflow.settings import settings\n\nfrom kotaemon.base import Document, Param, RetrievedDocument\nfrom kotaemon.base.schema import AIMessage, HumanMessage, SystemMessage\n\nfrom ..pipelines import BaseFileIndexRetriever\nfrom .pipelines import GraphRAGIndexingPipeline\nfrom .visualize import create_knowledge_graph, visualize_graph\n\ntry:\n    from nano_graphrag import GraphRAG, QueryParam\n    from nano_graphrag._op import (\n        _find_most_related_community_from_entities,\n        _find_most_related_edges_from_entities,\n        _find_most_related_text_unit_from_entities,\n    )\n    from nano_graphrag._utils import EmbeddingFunc, compute_args_hash\n\nexcept ImportError:\n    print(\n        (\n            \"Nano-GraphRAG dependencies not installed. \"\n            \"Try `pip install nano-graphrag` to install. \"\n            \"Nano-GraphRAG retriever pipeline will not work properly.\"\n        )\n    )\n\n\nlogging.getLogger(\"nano-graphrag\").setLevel(logging.INFO)\n\n\nfilestorage_path = Path(settings.KH_FILESTORAGE_PATH) / \"nano_graphrag\"\nfilestorage_path.mkdir(parents=True, exist_ok=True)\n\nINDEX_BATCHSIZE = 4\n\n\ndef get_llm_func(model):\n    @retry(\n        stop=stop_after_attempt(3),\n        wait=wait_exponential(multiplier=1, min=4, max=10),\n        retry=retry_if_exception_type((Exception,)),\n        after=lambda retry_state: logging.warning(\n            f\"LLM API call attempt {retry_state.attempt_number} failed. Retrying...\"\n        ),\n    )\n    async def _call_model(model, input_messages):\n        return (await model.ainvoke(input_messages)).text\n\n    async def llm_func(\n        prompt, system_prompt=None, history_messages=[], **kwargs\n    ) -> str:\n        input_messages = [SystemMessage(text=system_prompt)] if system_prompt else []\n\n        hashing_kv = kwargs.pop(\"hashing_kv\", None)\n        if history_messages:\n            for msg in history_messages:\n                if msg.get(\"role\") == \"user\":\n                    input_messages.append(HumanMessage(text=msg[\"content\"]))\n                else:\n                    input_messages.append(AIMessage(text=msg[\"content\"]))\n\n        input_messages.append(HumanMessage(text=prompt))\n\n        if hashing_kv is not None:\n            args_hash = compute_args_hash(\"model\", input_messages)\n            if_cache_return = await hashing_kv.get_by_id(args_hash)\n            if if_cache_return is not None:\n                return if_cache_return[\"return\"]\n\n        try:\n            output = await _call_model(model, input_messages)\n        except Exception as e:\n            logging.error(f\"Failed to call LLM API after 3 retries: {str(e)}\")\n            raise\n\n        print(\"-\" * 50)\n        print(output, \"\\n\", \"-\" * 50)\n\n        if hashing_kv is not None:\n            await hashing_kv.upsert({args_hash: {\"return\": output, \"model\": \"model\"}})\n\n        return output\n\n    return llm_func\n\n\ndef get_embedding_func(model):\n    async def embedding_func(texts: list[str]) -> np.ndarray:\n        outputs = model(texts)\n        embedding_outputs = np.array([doc.embedding for doc in outputs])\n\n        return embedding_outputs\n\n    return embedding_func\n\n\ndef get_default_models_wrapper():\n    # setup model functions\n    default_embedding = embeddings.get_default()\n    default_embedding_dim = len(default_embedding([\"Hi\"])[0].embedding)\n    embedding_func = EmbeddingFunc(\n        embedding_dim=default_embedding_dim,\n        max_token_size=8192,\n        func=get_embedding_func(default_embedding),\n    )\n    print(\"GraphRAG embedding dim\", default_embedding_dim)\n\n    default_llm = llms.get_default()\n    llm_func = get_llm_func(default_llm)\n\n    return llm_func, embedding_func, default_llm, default_embedding\n\n\ndef prepare_graph_index_path(graph_id: str):\n    root_path = Path(filestorage_path) / graph_id\n    input_path = root_path / \"input\"\n\n    return root_path, input_path\n\n\ndef list_of_list_to_df(data: list[list]) -> pd.DataFrame:\n    df = pd.DataFrame(data[1:], columns=data[0])\n    return df\n\n\ndef clean_quote(input: str) -> str:\n    return re.sub(r\"[\\\"']\", \"\", input)\n\n\nasync def nano_graph_rag_build_local_query_context(\n    graph_func,\n    query,\n    query_param,\n):\n    knowledge_graph_inst = graph_func.chunk_entity_relation_graph\n    entities_vdb = graph_func.entities_vdb\n    community_reports = graph_func.community_reports\n    text_chunks_db = graph_func.text_chunks\n\n    results = await entities_vdb.query(query, top_k=query_param.top_k)\n    if not len(results):\n        raise ValueError(\"No results found\")\n\n    node_datas = await asyncio.gather(\n        *[knowledge_graph_inst.get_node(r[\"entity_name\"]) for r in results]\n    )\n    node_degrees = await asyncio.gather(\n        *[knowledge_graph_inst.node_degree(r[\"entity_name\"]) for r in results]\n    )\n    node_datas = [\n        {**n, \"entity_name\": k[\"entity_name\"], \"rank\": d}\n        for k, n, d in zip(results, node_datas, node_degrees)\n        if n is not None\n    ]\n    use_communities = await _find_most_related_community_from_entities(\n        node_datas, query_param, community_reports\n    )\n    use_text_units = await _find_most_related_text_unit_from_entities(\n        node_datas, query_param, text_chunks_db, knowledge_graph_inst\n    )\n    use_relations = await _find_most_related_edges_from_entities(\n        node_datas, query_param, knowledge_graph_inst\n    )\n    entites_section_list = [[\"id\", \"entity\", \"type\", \"description\", \"rank\"]]\n    for i, n in enumerate(node_datas):\n        entites_section_list.append(\n            [\n                str(i),\n                clean_quote(n[\"entity_name\"]),\n                n.get(\"entity_type\", \"UNKNOWN\"),\n                clean_quote(n.get(\"description\", \"UNKNOWN\")),\n                n[\"rank\"],\n            ]\n        )\n    entities_df = list_of_list_to_df(entites_section_list)\n\n    relations_section_list = [\n        [\"id\", \"source\", \"target\", \"description\", \"weight\", \"rank\"]\n    ]\n    for i, e in enumerate(use_relations):\n        relations_section_list.append(\n            [\n                str(i),\n                clean_quote(e[\"src_tgt\"][0]),\n                clean_quote(e[\"src_tgt\"][1]),\n                clean_quote(e[\"description\"]),\n                e[\"weight\"],\n                e[\"rank\"],\n            ]\n        )\n    relations_df = list_of_list_to_df(relations_section_list)\n\n    communities_section_list = [[\"id\", \"content\"]]\n    for i, c in enumerate(use_communities):\n        communities_section_list.append([str(i), c[\"report_string\"]])\n    communities_df = list_of_list_to_df(communities_section_list)\n\n    text_units_section_list = [[\"id\", \"content\"]]\n    for i, t in enumerate(use_text_units):\n        text_units_section_list.append([str(i), t[\"content\"]])\n    sources_df = list_of_list_to_df(text_units_section_list)\n\n    return entities_df, relations_df, communities_df, sources_df\n\n\ndef build_graphrag(working_dir, llm_func, embedding_func):\n    graphrag_func = GraphRAG(\n        working_dir=working_dir,\n        best_model_func=llm_func,\n        cheap_model_func=llm_func,\n        embedding_func=embedding_func,\n    )\n    return graphrag_func\n\n\nclass NanoGraphRAGIndexingPipeline(GraphRAGIndexingPipeline):\n    \"\"\"GraphRAG specific indexing pipeline\"\"\"\n\n    prompts: dict[str, str] = {}\n    collection_graph_id: str\n    index_batch_size: int = INDEX_BATCHSIZE\n\n    def store_file_id_with_graph_id(self, file_ids: list[str | None]):\n        if not settings.USE_GLOBAL_GRAPHRAG:\n            return super().store_file_id_with_graph_id(file_ids)\n\n        # Use the collection-wide graph ID for LightRAG\n        graph_id = self.collection_graph_id\n\n        # Record all files under this graph_id\n        with Session(engine) as session:\n            for file_id in file_ids:\n                if not file_id:\n                    continue\n                # Check if mapping already exists\n                existing = (\n                    session.query(self.Index)\n                    .filter(\n                        self.Index.source_id == file_id,\n                        self.Index.target_id == graph_id,\n                        self.Index.relation_type == \"graph\",\n                    )\n                    .first()\n                )\n                if not existing:\n                    node = self.Index(\n                        source_id=file_id,\n                        target_id=graph_id,\n                        relation_type=\"graph\",\n                    )\n                    session.add(node)\n            session.commit()\n\n        return graph_id\n\n    @classmethod\n    def get_user_settings(cls) -> dict:\n        try:\n            from nano_graphrag.prompt import PROMPTS\n\n            blacklist_keywords = [\"default\", \"response\", \"process\"]\n            settings_dict = {\n                \"batch_size\": {\n                    \"name\": (\n                        \"Index batch size \" \"(reduce if you have rate limit issues)\"\n                    ),\n                    \"value\": INDEX_BATCHSIZE,\n                    \"component\": \"number\",\n                }\n            }\n            settings_dict.update(\n                {\n                    prompt_name: {\n                        \"name\": f\"Prompt for '{prompt_name}'\",\n                        \"value\": content,\n                        \"component\": \"text\",\n                    }\n                    for prompt_name, content in PROMPTS.items()\n                    if all(\n                        keyword not in prompt_name.lower()\n                        for keyword in blacklist_keywords\n                    )\n                    and isinstance(content, str)\n                }\n            )\n            return settings_dict\n        except ImportError as e:\n            print(e)\n            return {}\n\n    def call_graphrag_index(self, graph_id: str, docs: list[Document]):\n        from nano_graphrag.prompt import PROMPTS\n\n        # modify the prompt if it is set in the settings\n        for prompt_name, content in self.prompts.items():\n            if prompt_name in PROMPTS:\n                PROMPTS[prompt_name] = content\n\n        _, input_path = prepare_graph_index_path(graph_id)\n        input_path.mkdir(parents=True, exist_ok=True)\n\n        (\n            llm_func,\n            embedding_func,\n            default_llm,\n            default_embedding,\n        ) = get_default_models_wrapper()\n        print(\n            f\"Indexing GraphRAG with LLM {default_llm} \"\n            f\"and Embedding {default_embedding}...\"\n        )\n\n        all_docs = [\n            doc.text\n            for doc in docs\n            if doc.metadata.get(\"type\", \"text\") == \"text\" and len(doc.text.strip()) > 0\n        ]\n\n        yield Document(\n            channel=\"debug\",\n            text=\"[GraphRAG] Creating/Updating index... This can take a long time.\",\n        )\n\n        # Check if graph already exists\n        graph_file = input_path / \"graph_chunk_entity_relation.graphml\"\n        is_incremental = graph_file.exists()\n\n        # Only clear cache if it's a new graph\n        if not is_incremental:\n            json_files = glob.glob(f\"{input_path}/*.json\")\n            for json_file in json_files:\n                os.remove(json_file)\n\n        # Initialize or load existing GraphRAG\n        graphrag_func = build_graphrag(\n            input_path,\n            llm_func=llm_func,\n            embedding_func=embedding_func,\n        )\n\n        total_docs = len(all_docs)\n        process_doc_count = 0\n        yield Document(\n            channel=\"debug\",\n            text=(\n                f\"[GraphRAG] {'Updating' if is_incremental else 'Creating'} index: \"\n                f\"{process_doc_count} / {total_docs} documents.\"\n            ),\n        )\n\n        for doc_id in range(0, len(all_docs), self.index_batch_size):\n            cur_docs = all_docs[doc_id : doc_id + self.index_batch_size]\n            combined_doc = \"\\n\".join(cur_docs)\n\n            # Use insert for incremental updates\n            graphrag_func.insert(combined_doc)\n            process_doc_count += len(cur_docs)\n            yield Document(\n                channel=\"debug\",\n                text=(\n                    f\"[GraphRAG] {'Updated' if is_incremental else 'Indexed'} \"\n                    f\"{process_doc_count} / {total_docs} documents.\"\n                ),\n            )\n\n        yield Document(\n            channel=\"debug\",\n            text=f\"[GraphRAG] {'Update' if is_incremental else 'Indexing'} finished.\",\n        )\n\n    def stream(\n        self, file_paths: str | Path | list[str | Path], reindex: bool = False, **kwargs\n    ) -> Generator[\n        Document, None, tuple[list[str | None], list[str | None], list[Document]]\n    ]:\n        file_ids, errors, all_docs = yield from super().stream(\n            file_paths, reindex=reindex, **kwargs\n        )\n\n        return file_ids, errors, all_docs\n\n\nclass NanoGraphRAGRetrieverPipeline(BaseFileIndexRetriever):\n    \"\"\"GraphRAG specific retriever pipeline\"\"\"\n\n    Index = Param(help=\"The SQLAlchemy Index table\")\n    file_ids: list[str] = []\n    search_type: str = \"local\"\n\n    @classmethod\n    def get_user_settings(cls) -> dict:\n        return {\n            \"search_type\": {\n                \"name\": \"Search type\",\n                \"value\": \"local\",\n                \"choices\": [\"local\", \"global\"],\n                \"component\": \"dropdown\",\n                \"info\": \"Whether to use local or global search in the graph.\",\n            }\n        }\n\n    def _build_graph_search(self):\n        file_id = self.file_ids[0]\n\n        # retrieve the graph_id from the index\n        with Session(engine) as session:\n            graph_id = (\n                session.query(self.Index.target_id)\n                .filter(self.Index.source_id == file_id)\n                .filter(self.Index.relation_type == \"graph\")\n                .first()\n            )\n            graph_id = graph_id[0] if graph_id else None\n            assert graph_id, f\"GraphRAG index not found for file_id: {file_id}\"\n\n        _, input_path = prepare_graph_index_path(graph_id)\n        input_path.mkdir(parents=True, exist_ok=True)\n\n        llm_func, embedding_func, _, _ = get_default_models_wrapper()\n        graphrag_func = build_graphrag(\n            input_path,\n            llm_func=llm_func,\n            embedding_func=embedding_func,\n        )\n        print(\"search_type\", self.search_type)\n        query_params = QueryParam(mode=self.search_type, only_need_context=True)\n\n        return graphrag_func, query_params\n\n    def _to_document(self, header: str, context_text: str) -> RetrievedDocument:\n        return RetrievedDocument(\n            text=context_text,\n            metadata={\n                \"file_name\": header,\n                \"type\": \"table\",\n                \"llm_trulens_score\": 1.0,\n            },\n            score=1.0,\n        )\n\n    def format_context_records(\n        self, entities, relationships, reports, sources\n    ) -> list[RetrievedDocument]:\n        docs = []\n        context: str = \"\"\n\n        # entities current parsing error\n        header = \"<b>Entities</b>\\n\"\n        context = entities[[\"entity\", \"description\"]].to_markdown(index=False)\n        docs.append(self._to_document(header, context))\n\n        header = \"\\n<b>Relationships</b>\\n\"\n        context = relationships[[\"source\", \"target\", \"description\"]].to_markdown(\n            index=False\n        )\n        docs.append(self._to_document(header, context))\n\n        header = \"\\n<b>Reports</b>\\n\"\n        context = \"\"\n        for _, row in reports.iterrows():\n            title, content = row[\"id\"], row[\"content\"]  # not contain title\n            context += f\"\\n\\n<h5>Report <b>{title}</b></h5>\\n\"\n            context += content\n        docs.append(self._to_document(header, context))\n\n        header = \"\\n<b>Sources</b>\\n\"\n        context = \"\"\n        for _, row in sources.iterrows():\n            title, content = row[\"id\"], row[\"content\"]\n            context += f\"\\n\\n<h5>Source <b>#{title}</b></h5>\\n\"\n            context += content\n        docs.append(self._to_document(header, context))\n\n        return docs\n\n    def plot_graph(self, relationships):\n        G = create_knowledge_graph(relationships)\n        plot = visualize_graph(G)\n        return plot\n\n    def run(\n        self,\n        text: str,\n    ) -> list[RetrievedDocument]:\n        if not self.file_ids:\n            return []\n\n        graphrag_func, query_params = self._build_graph_search()\n\n        # only local mode support graph visualization\n        if query_params.mode == \"local\":\n            entities, relationships, reports, sources = asyncio.run(\n                nano_graph_rag_build_local_query_context(\n                    graphrag_func, text, query_params\n                )\n            )\n\n            documents = self.format_context_records(\n                entities, relationships, reports, sources\n            )\n            plot = self.plot_graph(relationships)\n\n            documents += [\n                RetrievedDocument(\n                    text=\"\",\n                    metadata={\n                        \"file_name\": \"GraphRAG\",\n                        \"type\": \"plot\",\n                        \"data\": plot,\n                    },\n                ),\n            ]\n        else:\n            context = graphrag_func.query(text, query_params)\n\n            documents = [\n                RetrievedDocument(\n                    text=context,\n                    metadata={\n                        \"file_name\": \"GraphRAG {} Search\".format(\n                            query_params.mode.capitalize()\n                        ),\n                        \"type\": \"table\",\n                    },\n                )\n            ]\n\n        return documents\n"
  },
  {
    "path": "libs/ktem/ktem/index/file/graph/pipelines.py",
    "content": "import os\nimport shutil\nimport subprocess\nfrom pathlib import Path\nfrom shutil import rmtree\nfrom typing import Generator\nfrom uuid import uuid4\n\nimport pandas as pd\nimport tiktoken\nimport yaml\nfrom decouple import config\nfrom ktem.db.models import engine\nfrom sqlalchemy.orm import Session\nfrom theflow.settings import settings\n\nfrom kotaemon.base import Document, Param, RetrievedDocument\n\nfrom ..pipelines import BaseFileIndexRetriever, IndexDocumentPipeline, IndexPipeline\nfrom .visualize import create_knowledge_graph, visualize_graph\n\ntry:\n    from graphrag.query.context_builder.entity_extraction import EntityVectorStoreKey\n    from graphrag.query.indexer_adapters import (\n        read_indexer_entities,\n        read_indexer_relationships,\n        read_indexer_reports,\n        read_indexer_text_units,\n    )\n    from graphrag.query.input.loaders.dfs import store_entity_semantic_embeddings\n    from graphrag.query.llm.oai.embedding import OpenAIEmbedding\n    from graphrag.query.llm.oai.typing import OpenaiApiType\n    from graphrag.query.structured_search.local_search.mixed_context import (\n        LocalSearchMixedContext,\n    )\n    from graphrag.vector_stores.lancedb import LanceDBVectorStore\nexcept ImportError:\n    print(\n        (\n            \"GraphRAG dependencies not installed. \"\n            \"Try `pip install graphrag future` to install. \"\n            \"GraphRAG retriever pipeline will not work properly.\"\n        )\n    )\n\n\nfilestorage_path = Path(settings.KH_FILESTORAGE_PATH) / \"graphrag\"\nfilestorage_path.mkdir(parents=True, exist_ok=True)\n\nGRAPHRAG_KEY_MISSING_MESSAGE = (\n    \"GRAPHRAG_API_KEY is not set. Please set it to use the GraphRAG retriever pipeline.\"\n)\n\n\ndef check_graphrag_api_key():\n    return len(os.getenv(\"GRAPHRAG_API_KEY\", \"\")) > 0\n\n\ndef prepare_graph_index_path(graph_id: str):\n    root_path = Path(filestorage_path) / graph_id\n    input_path = root_path / \"input\"\n\n    return root_path, input_path\n\n\nclass GraphRAGIndexingPipeline(IndexDocumentPipeline):\n    \"\"\"GraphRAG specific indexing pipeline\"\"\"\n\n    def route(self, file_path: str | Path) -> IndexPipeline:\n        \"\"\"Simply disable the splitter (chunking) for this pipeline\"\"\"\n        pipeline = super().route(file_path)\n        pipeline.splitter = None\n\n        return pipeline\n\n    def store_file_id_with_graph_id(self, file_ids: list[str | None]):\n        # create new graph_id and assign them to doc_id in self.Index\n        # record in the index\n        graph_id = str(uuid4())\n        with Session(engine) as session:\n            nodes = []\n            for file_id in file_ids:\n                if not file_id:\n                    continue\n                nodes.append(\n                    self.Index(\n                        source_id=file_id,\n                        target_id=graph_id,\n                        relation_type=\"graph\",\n                    )\n                )\n\n            session.add_all(nodes)\n            session.commit()\n\n        return graph_id\n\n    def write_docs_to_files(self, graph_id: str, docs: list[Document]):\n        root_path, input_path = prepare_graph_index_path(graph_id)\n        input_path.mkdir(parents=True, exist_ok=True)\n\n        for doc in docs:\n            if doc.metadata.get(\"type\", \"text\") == \"text\":\n                with open(input_path / f\"{doc.doc_id}.txt\", \"w\") as f:\n                    f.write(doc.text)\n\n        return root_path\n\n    def call_graphrag_index(self, graph_id: str, all_docs: list[Document]):\n        if not check_graphrag_api_key():\n            raise ValueError(GRAPHRAG_KEY_MISSING_MESSAGE)\n\n        # call GraphRAG index with docs and graph_id\n        input_path = self.write_docs_to_files(graph_id, all_docs)\n        input_path = str(input_path.absolute())\n\n        # Construct the command\n        command = [\n            \"python\",\n            \"-m\",\n            \"graphrag.index\",\n            \"--root\",\n            input_path,\n            \"--reporter\",\n            \"rich\",\n            \"--init\",\n        ]\n\n        # Run the command\n        yield Document(\n            channel=\"debug\",\n            text=\"[GraphRAG] Creating index... This can take a long time.\",\n        )\n        result = subprocess.run(command, capture_output=True, text=True)\n        print(result.stdout)\n        command = command[:-1]\n\n        # copy customized GraphRAG config file if it exists\n        if config(\"USE_CUSTOMIZED_GRAPHRAG_SETTING\", default=\"value\").lower() == \"true\":\n            setting_file_path = os.path.join(os.getcwd(), \"settings.yaml.example\")\n            destination_file_path = os.path.join(input_path, \"settings.yaml\")\n            try:\n                shutil.copy(setting_file_path, destination_file_path)\n            except shutil.Error:\n                # Handle the error if the file copy fails\n                print(\"failed to copy customized GraphRAG config file. \")\n\n        # Run the command and stream stdout\n        with subprocess.Popen(command, stdout=subprocess.PIPE, text=True) as process:\n            if process.stdout:\n                for line in process.stdout:\n                    yield Document(channel=\"debug\", text=line)\n\n    def stream(\n        self, file_paths: str | Path | list[str | Path], reindex: bool = False, **kwargs\n    ) -> Generator[\n        Document, None, tuple[list[str | None], list[str | None], list[Document]]\n    ]:\n        file_ids, errors, all_docs = yield from super().stream(\n            file_paths, reindex=reindex, **kwargs\n        )\n\n        # assign graph_id to file_ids\n        graph_id = self.store_file_id_with_graph_id(file_ids)\n        # call GraphRAG index with docs and graph_id\n        yield from self.call_graphrag_index(graph_id, all_docs)\n\n        return file_ids, errors, all_docs\n\n\nclass GraphRAGRetrieverPipeline(BaseFileIndexRetriever):\n    \"\"\"GraphRAG specific retriever pipeline\"\"\"\n\n    Index = Param(help=\"The SQLAlchemy Index table\")\n    file_ids: list[str] = []\n\n    @classmethod\n    def get_user_settings(cls) -> dict:\n        return {\n            \"search_type\": {\n                \"name\": \"Search type\",\n                \"value\": \"local\",\n                \"choices\": [\"local\"],\n                \"component\": \"dropdown\",\n                \"info\": \"Whether to use local or global search in the graph.\",\n            }\n        }\n\n    def _build_graph_search(self):\n        assert (\n            len(self.file_ids) <= 1\n        ), \"GraphRAG retriever only supports one file_id at a time\"\n\n        file_id = self.file_ids[0]\n        # retrieve the graph_id from the index\n        with Session(engine) as session:\n            graph_id = (\n                session.query(self.Index.target_id)\n                .filter(self.Index.source_id == file_id)\n                .filter(self.Index.relation_type == \"graph\")\n                .first()\n            )\n            graph_id = graph_id[0] if graph_id else None\n            assert graph_id, f\"GraphRAG index not found for file_id: {file_id}\"\n\n        root_path, _ = prepare_graph_index_path(graph_id)\n        output_path = root_path / \"output\"\n\n        INPUT_DIR = output_path\n        LANCEDB_URI = str(INPUT_DIR / \"lancedb\")\n        COMMUNITY_REPORT_TABLE = \"create_final_community_reports\"\n        ENTITY_TABLE = \"create_final_nodes\"\n        ENTITY_EMBEDDING_TABLE = \"create_final_entities\"\n        RELATIONSHIP_TABLE = \"create_final_relationships\"\n        TEXT_UNIT_TABLE = \"create_final_text_units\"\n        COMMUNITY_LEVEL = 2\n\n        # read nodes table to get community and degree data\n        entity_df = pd.read_parquet(f\"{INPUT_DIR}/{ENTITY_TABLE}.parquet\")\n        entity_embedding_df = pd.read_parquet(\n            f\"{INPUT_DIR}/{ENTITY_EMBEDDING_TABLE}.parquet\"\n        )\n        entities = read_indexer_entities(\n            entity_df, entity_embedding_df, COMMUNITY_LEVEL\n        )\n\n        # load description embeddings to an in-memory lancedb vectorstore\n        # to connect to a remote db, specify url and port values.\n        description_embedding_store = LanceDBVectorStore(\n            collection_name=\"entity_description_embeddings\",\n        )\n        description_embedding_store.connect(db_uri=LANCEDB_URI)\n        if Path(LANCEDB_URI).is_dir():\n            rmtree(LANCEDB_URI)\n        _ = store_entity_semantic_embeddings(\n            entities=entities, vectorstore=description_embedding_store\n        )\n        print(f\"Entity count: {len(entity_df)}\")\n\n        # Read relationships\n        relationship_df = pd.read_parquet(f\"{INPUT_DIR}/{RELATIONSHIP_TABLE}.parquet\")\n        relationships = read_indexer_relationships(relationship_df)\n\n        # Read community reports\n        report_df = pd.read_parquet(f\"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet\")\n        reports = read_indexer_reports(report_df, entity_df, COMMUNITY_LEVEL)\n\n        # Read text units\n        text_unit_df = pd.read_parquet(f\"{INPUT_DIR}/{TEXT_UNIT_TABLE}.parquet\")\n        text_units = read_indexer_text_units(text_unit_df)\n\n        # initialize default settings\n        embedding_model = os.getenv(\n            \"GRAPHRAG_EMBEDDING_MODEL\", \"text-embedding-3-small\"\n        )\n        embedding_api_key = os.getenv(\"GRAPHRAG_API_KEY\")\n        embedding_api_base = None\n\n        # use customized GraphRAG settings if the flag is set\n        if config(\"USE_CUSTOMIZED_GRAPHRAG_SETTING\", default=\"value\").lower() == \"true\":\n            settings_yaml_path = Path(root_path) / \"settings.yaml\"\n            with open(settings_yaml_path, \"r\") as f:\n                settings = yaml.safe_load(f)\n            if settings[\"embeddings\"][\"llm\"][\"model\"]:\n                embedding_model = settings[\"embeddings\"][\"llm\"][\"model\"]\n            if settings[\"embeddings\"][\"llm\"][\"api_key\"]:\n                embedding_api_key = settings[\"embeddings\"][\"llm\"][\"api_key\"]\n            if settings[\"embeddings\"][\"llm\"][\"api_base\"]:\n                embedding_api_base = settings[\"embeddings\"][\"llm\"][\"api_base\"]\n\n        text_embedder = OpenAIEmbedding(\n            api_key=embedding_api_key,\n            api_base=embedding_api_base,\n            api_type=OpenaiApiType.OpenAI,\n            model=embedding_model,\n            deployment_name=embedding_model,\n            max_retries=20,\n        )\n        token_encoder = tiktoken.get_encoding(\"cl100k_base\")\n\n        context_builder = LocalSearchMixedContext(\n            community_reports=reports,\n            text_units=text_units,\n            entities=entities,\n            relationships=relationships,\n            covariates=None,\n            entity_text_embeddings=description_embedding_store,\n            embedding_vectorstore_key=EntityVectorStoreKey.ID,\n            # if the vectorstore uses entity title as ids,\n            # set this to EntityVectorStoreKey.TITLE\n            text_embedder=text_embedder,\n            token_encoder=token_encoder,\n        )\n        return context_builder\n\n    def _to_document(self, header: str, context_text: str) -> RetrievedDocument:\n        return RetrievedDocument(\n            text=context_text,\n            metadata={\n                \"file_name\": header,\n                \"type\": \"table\",\n                \"llm_trulens_score\": 1.0,\n            },\n            score=1.0,\n        )\n\n    def format_context_records(self, context_records) -> list[RetrievedDocument]:\n        entities = context_records.get(\"entities\", [])\n        relationships = context_records.get(\"relationships\", [])\n        reports = context_records.get(\"reports\", [])\n        sources = context_records.get(\"sources\", [])\n\n        docs = []\n\n        context: str = \"\"\n\n        header = \"<b>Entities</b>\\n\"\n        context = entities[[\"entity\", \"description\"]].to_markdown(index=False)\n        docs.append(self._to_document(header, context))\n\n        header = \"\\n<b>Relationships</b>\\n\"\n        context = relationships[[\"source\", \"target\", \"description\"]].to_markdown(\n            index=False\n        )\n        docs.append(self._to_document(header, context))\n\n        header = \"\\n<b>Reports</b>\\n\"\n        context = \"\"\n        for idx, row in reports.iterrows():\n            title, content = row[\"title\"], row[\"content\"]\n            context += f\"\\n\\n<h5>Report <b>{title}</b></h5>\\n\"\n            context += content\n        docs.append(self._to_document(header, context))\n\n        header = \"\\n<b>Sources</b>\\n\"\n        context = \"\"\n        for idx, row in sources.iterrows():\n            title, content = row[\"id\"], row[\"text\"]\n            context += f\"\\n\\n<h5>Source <b>#{title}</b></h5>\\n\"\n            context += content\n        docs.append(self._to_document(header, context))\n\n        return docs\n\n    def plot_graph(self, context_records):\n        relationships = context_records.get(\"relationships\", [])\n        G = create_knowledge_graph(relationships)\n        plot = visualize_graph(G)\n        return plot\n\n    def generate_relevant_scores(self, text, documents: list[RetrievedDocument]):\n        return documents\n\n    def run(\n        self,\n        text: str,\n    ) -> list[RetrievedDocument]:\n        if not self.file_ids:\n            return []\n\n        if not check_graphrag_api_key():\n            raise ValueError(GRAPHRAG_KEY_MISSING_MESSAGE)\n\n        context_builder = self._build_graph_search()\n\n        local_context_params = {\n            \"text_unit_prop\": 0.5,\n            \"community_prop\": 0.1,\n            \"conversation_history_max_turns\": 5,\n            \"conversation_history_user_turns_only\": True,\n            \"top_k_mapped_entities\": 10,\n            \"top_k_relationships\": 10,\n            \"include_entity_rank\": False,\n            \"include_relationship_weight\": False,\n            \"include_community_rank\": False,\n            \"return_candidate_context\": False,\n            \"embedding_vectorstore_key\": EntityVectorStoreKey.ID,\n            # set this to EntityVectorStoreKey.TITLE i\n            # f the vectorstore uses entity title as ids\n            \"max_tokens\": 12_000,\n            # change this based on the token limit you have on your model\n            # (if you are using a model with 8k limit, a good setting could be 5000)\n        }\n\n        context_text, context_records = context_builder.build_context(\n            query=text,\n            conversation_history=None,\n            **local_context_params,\n        )\n        documents = self.format_context_records(context_records)\n        plot = self.plot_graph(context_records)\n\n        return documents + [\n            RetrievedDocument(\n                text=\"\",\n                metadata={\n                    \"file_name\": \"GraphRAG\",\n                    \"type\": \"plot\",\n                    \"data\": plot,\n                },\n            ),\n        ]\n"
  },
  {
    "path": "libs/ktem/ktem/index/file/graph/visualize.py",
    "content": "import networkx as nx\nimport plotly.graph_objects as go\nfrom plotly.io import to_json\n\n\ndef create_knowledge_graph(df):\n    \"\"\"\n    create nx Graph from DataFrame relations data\n    \"\"\"\n    G = nx.Graph()\n    for _, row in df.iterrows():\n        source = row[\"source\"]\n        target = row[\"target\"]\n        attributes = {k: v for k, v in row.items() if k not in [\"source\", \"target\"]}\n        G.add_edge(source, target, **attributes)\n\n    return G\n\n\ndef visualize_graph(G):\n    pos = nx.spring_layout(G, dim=2)\n\n    edge_x = []\n    edge_y = []\n    edge_texts = nx.get_edge_attributes(G, \"description\")\n    to_display_edge_texts = []\n    for edge in G.edges():\n        x0, y0 = pos[edge[0]]\n        x1, y1 = pos[edge[1]]\n        edge_x.append(x0)\n        edge_x.append(x1)\n        edge_x.append(None)\n        edge_y.append(y0)\n        edge_y.append(y1)\n        edge_y.append(None)\n        to_display_edge_texts.append(edge_texts[edge])\n\n    edge_trace = go.Scatter(\n        x=edge_x,\n        y=edge_y,\n        text=to_display_edge_texts,\n        line=dict(width=0.5, color=\"#888\"),\n        hoverinfo=\"text\",\n        mode=\"lines\",\n    )\n\n    node_x = []\n    node_y = []\n    for node in G.nodes():\n        x, y = pos[node]\n        node_x.append(x)\n        node_y.append(y)\n\n    node_adjacencies = []\n    node_text = []\n    node_size = []\n    for node_id, adjacencies in enumerate(G.adjacency()):\n        degree = len(adjacencies[1])\n        node_adjacencies.append(degree)\n        node_text.append(adjacencies[0])\n        node_size.append(15 if degree < 5 else (30 if degree < 10 else 60))\n\n    node_trace = go.Scatter(\n        x=node_x,\n        y=node_y,\n        textfont=dict(\n            family=\"Courier New, monospace\",\n            size=10,  # Set the font size here\n        ),\n        textposition=\"top center\",\n        mode=\"markers+text\",\n        hoverinfo=\"text\",\n        text=node_text,\n        marker=dict(\n            showscale=True,\n            # colorscale options\n            size=node_size,\n            colorscale=\"YlGnBu\",\n            reversescale=True,\n            color=node_adjacencies,\n            colorbar=dict(\n                thickness=5,\n                xanchor=\"left\",\n                titleside=\"right\",\n            ),\n            line_width=2,\n        ),\n    )\n\n    fig = go.Figure(\n        data=[edge_trace, node_trace],\n        layout=go.Layout(\n            showlegend=False,\n            hovermode=\"closest\",\n            margin=dict(b=20, l=5, r=5, t=40),\n            xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),\n            yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),\n        ),\n    )\n    fig.update_layout(autosize=True)\n\n    return to_json(fig)\n"
  },
  {
    "path": "libs/ktem/ktem/index/file/index.py",
    "content": "import uuid\nfrom datetime import datetime\nfrom typing import Any, Optional, Type\n\nfrom ktem.components import filestorage_path, get_docstore, get_vectorstore\nfrom ktem.db.engine import engine\nfrom ktem.index.base import BaseIndex\nfrom sqlalchemy import JSON, Column, DateTime, Integer, String, UniqueConstraint\nfrom sqlalchemy.ext.declarative import declarative_base\nfrom sqlalchemy.ext.mutable import MutableDict\nfrom theflow.settings import settings as flowsettings\nfrom theflow.utils.modules import import_dotted_string\nfrom tzlocal import get_localzone\n\nfrom kotaemon.storages import BaseDocumentStore, BaseVectorStore\n\nfrom .base import BaseFileIndexIndexing, BaseFileIndexRetriever\n\n\ndef generate_uuid():\n    return str(uuid.uuid4())\n\n\nclass FileIndex(BaseIndex):\n    \"\"\"\n    File index to store and allow retrieval of files\n\n    The file index stores files in a local folder and index them for retrieval.\n    This file index provides the following infrastructure to support the indexing:\n        - SQL table Source: store the list of files that are indexed by the system\n        - Vector store: contain the embedding of segments of the files\n        - Document store: contain the text of segments of the files. Each text stored\n        in this document store is associated with a vector in the vector store.\n        - SQL table Index: store the relationship between (1) the source and the\n        docstore, and (2) the source and the vector store.\n    \"\"\"\n\n    def __init__(self, app, id: int, name: str, config: dict):\n        super().__init__(app, id, name, config)\n\n        self._indexing_pipeline_cls: Type[BaseFileIndexIndexing]\n        self._retriever_pipeline_cls: list[Type[BaseFileIndexRetriever]]\n        self._selector_ui_cls: Type\n        self._selector_ui: Any = None\n        self._index_ui_cls: Type\n        self._index_ui: Any = None\n\n        self._default_settings: dict[str, dict] = {}\n        self._setting_mappings: dict[str, dict] = {}\n\n    def _setup_resources(self):\n        \"\"\"Setup resources for the file index\n\n        The resources include:\n            - Database table\n            - Vector store\n            - Document store\n            - File storage path\n        \"\"\"\n        Base = declarative_base()\n\n        if self.config.get(\"private\", False):\n            Source = type(\n                \"Source\",\n                (Base,),\n                {\n                    \"__tablename__\": f\"index__{self.id}__source\",\n                    \"__table_args__\": (\n                        UniqueConstraint(\"name\", \"user\", name=\"_name_user_uc\"),\n                    ),\n                    \"id\": Column(\n                        String,\n                        primary_key=True,\n                        default=lambda: str(uuid.uuid4()),\n                        unique=True,\n                    ),\n                    \"name\": Column(String),\n                    \"path\": Column(String),\n                    \"size\": Column(Integer, default=0),\n                    \"date_created\": Column(\n                        DateTime(timezone=True), default=datetime.now(get_localzone())\n                    ),\n                    \"user\": Column(String, default=\"\"),\n                    \"note\": Column(\n                        MutableDict.as_mutable(JSON),  # type: ignore\n                        default={},\n                    ),\n                },\n            )\n        else:\n            Source = type(\n                \"Source\",\n                (Base,),\n                {\n                    \"__tablename__\": f\"index__{self.id}__source\",\n                    \"id\": Column(\n                        String,\n                        primary_key=True,\n                        default=lambda: str(uuid.uuid4()),\n                        unique=True,\n                    ),\n                    \"name\": Column(String, unique=True),\n                    \"path\": Column(String),\n                    \"size\": Column(Integer, default=0),\n                    \"date_created\": Column(\n                        DateTime(timezone=True), default=datetime.now(get_localzone())\n                    ),\n                    \"user\": Column(String, default=\"\"),\n                    \"note\": Column(\n                        MutableDict.as_mutable(JSON),  # type: ignore\n                        default={},\n                    ),\n                },\n            )\n        Index = type(\n            \"IndexTable\",\n            (Base,),\n            {\n                \"__tablename__\": f\"index__{self.id}__index\",\n                \"id\": Column(Integer, primary_key=True, autoincrement=True),\n                \"source_id\": Column(String),\n                \"target_id\": Column(String),\n                \"relation_type\": Column(String),\n                \"user\": Column(String, default=\"\"),\n            },\n        )\n        FileGroup = type(\n            \"FileGroupTable\",\n            (Base,),\n            {\n                \"__tablename__\": f\"index__{self.id}__group\",\n                \"__table_args__\": (\n                    UniqueConstraint(\"name\", \"user\", name=\"_name_user_uc\"),\n                ),\n                \"id\": Column(\n                    String,\n                    primary_key=True,\n                    default=lambda: str(uuid.uuid4()),\n                    unique=True,\n                ),\n                \"date_created\": Column(\n                    DateTime(timezone=True), default=datetime.now(get_localzone())\n                ),\n                \"name\": Column(String),\n                \"user\": Column(String, default=\"\"),\n                \"data\": Column(\n                    MutableDict.as_mutable(JSON),  # type: ignore\n                    default={\"files\": []},\n                ),\n            },\n        )\n\n        self._vs: BaseVectorStore = get_vectorstore(f\"index_{self.id}\")\n        self._docstore: BaseDocumentStore = get_docstore(f\"index_{self.id}\")\n        self._fs_path = filestorage_path / f\"index_{self.id}\"\n        self._resources = {\n            \"Source\": Source,\n            \"Index\": Index,\n            \"FileGroup\": FileGroup,\n            \"VectorStore\": self._vs,\n            \"DocStore\": self._docstore,\n            \"FileStoragePath\": self._fs_path,\n        }\n\n    def _setup_indexing_cls(self):\n        \"\"\"Retrieve the indexing class for the file index\n\n        There is only one indexing class.\n\n        The indexing class will is retrieved from the following order. Stop at the\n        first order found:\n            - `FILE_INDEX_PIPELINE` in self.config\n            - `FILE_INDEX_{id}_PIPELINE` in the flowsettings\n            - `FILE_INDEX_PIPELINE` in the flowsettings\n            - The default .pipelines.IndexDocumentPipeline\n        \"\"\"\n        if \"FILE_INDEX_PIPELINE\" in self.config:\n            self._indexing_pipeline_cls = import_dotted_string(\n                self.config[\"FILE_INDEX_PIPELINE\"], safe=False\n            )\n            return\n\n        if hasattr(flowsettings, f\"FILE_INDEX_{self.id}_PIPELINE\"):\n            self._indexing_pipeline_cls = import_dotted_string(\n                getattr(flowsettings, f\"FILE_INDEX_{self.id}_PIPELINE\"), safe=False\n            )\n            return\n\n        if hasattr(flowsettings, \"FILE_INDEX_PIPELINE\"):\n            self._indexing_pipeline_cls = import_dotted_string(\n                getattr(flowsettings, \"FILE_INDEX_PIPELINE\"), safe=False\n            )\n            return\n\n        from .pipelines import IndexDocumentPipeline\n\n        self._indexing_pipeline_cls = IndexDocumentPipeline\n\n    def _setup_retriever_cls(self):\n        \"\"\"Retrieve the retriever classes for the file index\n\n        There can be multiple retriever classes.\n\n        The retriever classes will is retrieved from the following order. Stop at the\n        first order found:\n            - `FILE_INDEX_RETRIEVER_PIPELINES` in self.config\n            - `FILE_INDEX_{id}_RETRIEVER_PIPELINES` in the flowsettings\n            - `FILE_INDEX_RETRIEVER_PIPELINES` in the flowsettings\n            - The default .pipelines.DocumentRetrievalPipeline\n        \"\"\"\n        if \"FILE_INDEX_RETRIEVER_PIPELINES\" in self.config:\n            self._retriever_pipeline_cls = [\n                import_dotted_string(each, safe=False)\n                for each in self.config[\"FILE_INDEX_RETRIEVER_PIPELINES\"]\n            ]\n            return\n\n        if hasattr(flowsettings, f\"FILE_INDEX_{self.id}_RETRIEVER_PIPELINES\"):\n            self._retriever_pipeline_cls = [\n                import_dotted_string(each, safe=False)\n                for each in getattr(\n                    flowsettings, f\"FILE_INDEX_{self.id}_RETRIEVER_PIPELINES\"\n                )\n            ]\n            return\n\n        if hasattr(flowsettings, \"FILE_INDEX_RETRIEVER_PIPELINES\"):\n            self._retriever_pipeline_cls = [\n                import_dotted_string(each, safe=False)\n                for each in getattr(flowsettings, \"FILE_INDEX_RETRIEVER_PIPELINES\")\n            ]\n            return\n\n        from .pipelines import DocumentRetrievalPipeline\n\n        self._retriever_pipeline_cls = [DocumentRetrievalPipeline]\n\n    def _setup_file_selector_ui_cls(self):\n        \"\"\"Retrieve the file selector UI for the file index\n\n        There can be multiple retriever classes.\n\n        The retriever classes will is retrieved from the following order. Stop at the\n        first order found:\n            - `FILE_INDEX_SELECTOR_UI` in self.config\n            - `FILE_INDEX_{id}_SELECTOR_UI` in the flowsettings\n            - `FILE_INDEX_SELECTOR_UI` in the flowsettings\n            - The default .ui.FileSelector\n        \"\"\"\n        if \"FILE_INDEX_SELECTOR_UI\" in self.config:\n            self._selector_ui_cls = import_dotted_string(\n                self.config[\"FILE_INDEX_SELECTOR_UI\"], safe=False\n            )\n            return\n\n        if hasattr(flowsettings, f\"FILE_INDEX_{self.id}_SELECTOR_UI\"):\n            self._selector_ui_cls = import_dotted_string(\n                getattr(flowsettings, f\"FILE_INDEX_{self.id}_SELECTOR_UI\"),\n                safe=False,\n            )\n            return\n\n        if hasattr(flowsettings, \"FILE_INDEX_SELECTOR_UI\"):\n            self._selector_ui_cls = import_dotted_string(\n                getattr(flowsettings, \"FILE_INDEX_SELECTOR_UI\"), safe=False\n            )\n            return\n\n        from .ui import FileSelector\n\n        self._selector_ui_cls = FileSelector\n\n    def _setup_file_index_ui_cls(self):\n        \"\"\"Retrieve the Index UI class\n\n        There can be multiple retriever classes.\n\n        The retriever classes will is retrieved from the following order. Stop at the\n        first order found:\n            - `FILE_INDEX_UI` in self.config\n            - `FILE_INDEX_{id}_UI` in the flowsettings\n            - `FILE_INDEX_UI` in the flowsettings\n            - The default .ui.FileIndexPage\n        \"\"\"\n        if \"FILE_INDEX_UI\" in self.config:\n            self._index_ui_cls = import_dotted_string(\n                self.config[\"FILE_INDEX_UI\"], safe=False\n            )\n            return\n\n        if hasattr(flowsettings, f\"FILE_INDEX_{self.id}_UI\"):\n            self._index_ui_cls = import_dotted_string(\n                getattr(flowsettings, f\"FILE_INDEX_{self.id}_UI\"),\n                safe=False,\n            )\n            return\n\n        if hasattr(flowsettings, \"FILE_INDEX_UI\"):\n            self._index_ui_cls = import_dotted_string(\n                getattr(flowsettings, \"FILE_INDEX_UI\"), safe=False\n            )\n            return\n\n        from .ui import FileIndexPage\n\n        self._index_ui_cls = FileIndexPage\n\n    def on_create(self):\n        \"\"\"Create the index for the first time\n\n        For the file index, this will:\n            1. Postprocess the config\n            2. Create the index and the source table if not already exists\n            3. Create the vectorstore\n            4. Create the docstore\n        \"\"\"\n        # default user's value\n        config = {}\n        for key, value in self.get_admin_settings().items():\n            config[key] = value[\"value\"]\n\n        # user's modification\n        config.update(self.config)\n\n        self.config = config\n\n        # create the resources\n        self._setup_resources()\n        self._resources[\"Source\"].metadata.create_all(engine)  # type: ignore\n        self._resources[\"Index\"].metadata.create_all(engine)  # type: ignore\n        self._resources[\"FileGroup\"].metadata.create_all(engine)  # type: ignore\n        self._fs_path.mkdir(parents=True, exist_ok=True)\n\n    def on_delete(self):\n        \"\"\"Clean up the index when the user delete it\"\"\"\n        import shutil\n\n        self._setup_resources()\n        self._resources[\"Source\"].__table__.drop(engine)  # type: ignore\n        self._resources[\"Index\"].__table__.drop(engine)  # type: ignore\n        self._resources[\"FileGroup\"].__table__.drop(engine)  # type: ignore\n        self._vs.drop()\n        self._docstore.drop()\n        shutil.rmtree(self._fs_path)\n\n    def on_start(self):\n        \"\"\"Setup the classes and hooks\"\"\"\n        self._setup_resources()\n        self._setup_indexing_cls()\n        self._setup_retriever_cls()\n        self._setup_file_index_ui_cls()\n        self._setup_file_selector_ui_cls()\n\n    def get_selector_component_ui(self):\n        if self._selector_ui is None:\n            self._selector_ui = self._selector_ui_cls(self._app, self)\n        return self._selector_ui\n\n    def get_index_page_ui(self):\n        if self._index_ui is None:\n            self._index_ui = self._index_ui_cls(self._app, self)\n        return self._index_ui\n\n    def get_user_settings(self):\n        if self._default_settings:\n            return self._default_settings\n\n        settings = {}\n        settings.update(self._indexing_pipeline_cls.get_user_settings())\n        for cls in self._retriever_pipeline_cls:\n            settings.update(cls.get_user_settings())\n\n        self._default_settings = settings\n        return settings\n\n    @classmethod\n    def get_admin_settings(cls):\n        from ktem.embeddings.manager import embedding_models_manager\n\n        embedding_default = \"default\"\n        embedding_choices = list(embedding_models_manager.options().keys())\n\n        return {\n            \"embedding\": {\n                \"name\": \"Embedding model\",\n                \"value\": embedding_default,\n                \"component\": \"dropdown\",\n                \"choices\": embedding_choices,\n                \"info\": \"The name of embedding model to use.\",\n            },\n            \"supported_file_types\": {\n                \"name\": \"Supported file types\",\n                \"value\": \".pdf, .txt\",\n                \"component\": \"text\",\n                \"info\": \"The file types that can be indexed, separated by comma.\",\n            },\n            \"max_file_size\": {\n                \"name\": \"Max file size (MB)\",\n                \"value\": 1000,\n                \"component\": \"number\",\n                \"info\": \"The maximum size of file. Set 0 to disable.\",\n            },\n            \"max_number_of_files\": {\n                \"name\": \"Max number of files that can be indexed\",\n                \"value\": 0,\n                \"component\": \"number\",\n                \"info\": (\n                    \"The total number of files that can be indexed on the system. \"\n                    \"Set 0 to disable.\"\n                ),\n            },\n            \"private\": {\n                \"name\": \"Make private\",\n                \"value\": False,\n                \"component\": \"radio\",\n                \"choices\": [(\"Yes\", True), (\"No\", False)],\n                \"info\": \"If private, files will not be accessible across users.\",\n            },\n            \"chunk_size\": {\n                \"name\": \"Size of chunk (number of tokens)\",\n                \"value\": 0,\n                \"component\": \"number\",\n                \"info\": (\n                    \"Number of tokens of each text segment. \"\n                    \"Set 0 to use developer setting.\"\n                ),\n            },\n            \"chunk_overlap\": {\n                \"name\": \"Number of overlapping tokens between chunks\",\n                \"value\": 0,\n                \"component\": \"number\",\n                \"info\": (\n                    \"Number of tokens that consecutive text segments \"\n                    \"should overlap with each other. \"\n                    \"Set 0 to use developer setting.\"\n                ),\n            },\n        }\n\n    def get_indexing_pipeline(self, settings, user_id) -> BaseFileIndexIndexing:\n        \"\"\"Define the interface of the indexing pipeline\"\"\"\n\n        prefix = f\"index.options.{self.id}.\"\n        stripped_settings = {}\n        for key, value in settings.items():\n            if key.startswith(prefix):\n                stripped_settings[key[len(prefix) :]] = value\n\n        obj = self._indexing_pipeline_cls.get_pipeline(stripped_settings, self.config)\n        obj.Source = self._resources[\"Source\"]\n        obj.Index = self._resources[\"Index\"]\n        obj.VS = self._vs\n        obj.DS = self._docstore\n        obj.FSPath = self._fs_path\n        obj.user_id = user_id\n        obj.private = self.config.get(\"private\", False)\n        obj.chunk_size = self.config.get(\"chunk_size\", 0)\n        obj.chunk_overlap = self.config.get(\"chunk_overlap\", 0)\n\n        return obj\n\n    def get_retriever_pipelines(\n        self, settings: dict, user_id: int, selected: Any = None\n    ) -> list[\"BaseFileIndexRetriever\"]:\n        # retrieval settings\n        prefix = f\"index.options.{self.id}.\"\n        stripped_settings = {}\n        for key, value in settings.items():\n            if key.startswith(prefix):\n                stripped_settings[key[len(prefix) :]] = value\n\n        # transform selected id\n        selected_ids: Optional[list[str]] = self._selector_ui.get_selected_ids(selected)\n\n        retrievers = []\n        for cls in self._retriever_pipeline_cls:\n            obj = cls.get_pipeline(stripped_settings, self.config, selected_ids)\n            if obj is None:\n                continue\n            obj.Source = self._resources[\"Source\"]\n            obj.Index = self._resources[\"Index\"]\n            obj.VS = self._vs\n            obj.DS = self._docstore\n            obj.FSPath = self._fs_path\n            obj.user_id = user_id\n            retrievers.append(obj)\n\n        return retrievers\n"
  },
  {
    "path": "libs/ktem/ktem/index/file/knet/__init__.py",
    "content": "from .knet_index import KnowledgeNetworkFileIndex\n\n__all__ = [\"KnowledgeNetworkFileIndex\"]\n"
  },
  {
    "path": "libs/ktem/ktem/index/file/knet/knet_index.py",
    "content": "from typing import Any\n\nfrom ktem.index.file import FileIndex\n\nfrom ..base import BaseFileIndexIndexing, BaseFileIndexRetriever\nfrom .pipelines import KnetIndexingPipeline, KnetRetrievalPipeline\n\n\nclass KnowledgeNetworkFileIndex(FileIndex):\n    @classmethod\n    def get_admin_settings(cls):\n        admin_settings = super().get_admin_settings()\n\n        # remove embedding from admin settings\n        # as we don't need it\n        admin_settings.pop(\"embedding\")\n        return admin_settings\n\n    def _setup_indexing_cls(self):\n        self._indexing_pipeline_cls = KnetIndexingPipeline\n\n    def _setup_retriever_cls(self):\n        self._retriever_pipeline_cls = [KnetRetrievalPipeline]\n\n    def get_indexing_pipeline(self, settings, user_id) -> BaseFileIndexIndexing:\n        \"\"\"Define the interface of the indexing pipeline\"\"\"\n\n        obj = super().get_indexing_pipeline(settings, user_id)\n        # disable vectorstore for this kind of Index\n        # also set the collection_name for API call\n        obj.VS = None\n        obj.collection_name = f\"kh_index_{self.id}\"\n\n        return obj\n\n    def get_retriever_pipelines(\n        self, settings: dict, user_id: int, selected: Any = None\n    ) -> list[\"BaseFileIndexRetriever\"]:\n        retrievers = super().get_retriever_pipelines(settings, user_id, selected)\n\n        for obj in retrievers:\n            # disable vectorstore for this kind of Index\n            # also set the collection_name for API call\n            obj.VS = None\n            obj.collection_name = f\"kh_index_{self.id}\"\n\n        return retrievers\n"
  },
  {
    "path": "libs/ktem/ktem/index/file/knet/pipelines.py",
    "content": "import base64\nimport json\nimport os\nfrom pathlib import Path\nfrom typing import Optional, Sequence\n\nimport requests\nimport yaml\n\nfrom kotaemon.base import RetrievedDocument\nfrom kotaemon.indices.rankings import BaseReranking, LLMReranking, LLMTrulensScoring\n\nfrom ..pipelines import BaseFileIndexRetriever, IndexDocumentPipeline, IndexPipeline\n\n\nclass KnetIndexingPipeline(IndexDocumentPipeline):\n    \"\"\"Knowledge Network specific indexing pipeline\"\"\"\n\n    # collection name for external indexing call\n    collection_name: str = \"default\"\n\n    @classmethod\n    def get_user_settings(cls):\n        return {\n            \"reader_mode\": {\n                \"name\": \"Index parser\",\n                \"value\": \"knowledge_network\",\n                \"choices\": [\n                    (\"Default (KN)\", \"knowledge_network\"),\n                ],\n                \"component\": \"dropdown\",\n            },\n        }\n\n    def route(self, file_path: str | Path) -> IndexPipeline:\n        \"\"\"Simply disable the splitter (chunking) for this pipeline\"\"\"\n        pipeline = super().route(file_path)\n        pipeline.splitter = None\n        # assign IndexPipeline collection name to parse to loader\n        pipeline.collection_name = self.collection_name\n\n        return pipeline\n\n\nclass KnetRetrievalPipeline(BaseFileIndexRetriever):\n    DEFAULT_KNET_ENDPOINT: str = \"http://127.0.0.1:8081/retrieve\"\n\n    collection_name: str = \"default\"\n    rerankers: Sequence[BaseReranking] = [LLMReranking.withx()]\n\n    def encode_image_base64(self, image_path: str | Path) -> bytes | str:\n        \"\"\"Convert image to base64\"\"\"\n        img_base64 = \"data:image/png;base64,{}\"\n        with open(image_path, \"rb\") as image_file:\n            return img_base64.format(\n                base64.b64encode(image_file.read()).decode(\"utf-8\")\n            )\n\n    def run(\n        self,\n        text: str,\n        doc_ids: Optional[list[str]] = None,\n        *args,\n        **kwargs,\n    ) -> list[RetrievedDocument]:\n        \"\"\"Retrieve document excerpts similar to the text\n\n        Args:\n            text: the text to retrieve similar documents\n            doc_ids: list of document ids to constraint the retrieval\n        \"\"\"\n        print(\"searching in doc_ids\", doc_ids)\n        if not doc_ids:\n            return []\n\n        docs: list[RetrievedDocument] = []\n        params = {\n            \"query\": text,\n            \"collection\": self.collection_name,\n            \"meta_filters\": {\"doc_name\": doc_ids},\n        }\n        params[\"meta_filters\"] = json.dumps(params[\"meta_filters\"])\n        response = requests.get(self.DEFAULT_KNET_ENDPOINT, params=params)\n        metadata_translation = {\n            \"TABLE\": \"table\",\n            \"FIGURE\": \"image\",\n        }\n\n        if response.status_code == 200:\n            # Load YAML content from the response content\n            chunks = yaml.safe_load(response.content)\n            for chunk in chunks:\n                metadata = chunk[\"node\"][\"metadata\"]\n                metadata[\"type\"] = metadata_translation.get(\n                    metadata.pop(\"content_type\", \"\"), \"\"\n                )\n                metadata[\"file_name\"] = metadata.pop(\"company_name\", \"\")\n\n                # load image from returned path\n                image_path = metadata.get(\"image_path\", \"\")\n                if image_path and os.path.isfile(image_path):\n                    base64_im = self.encode_image_base64(image_path)\n                    # explicitly set document type\n                    metadata[\"type\"] = \"image\"\n                    metadata[\"image_origin\"] = base64_im\n\n                docs.append(\n                    RetrievedDocument(text=chunk[\"node\"][\"text\"], metadata=metadata)\n                )\n        else:\n            raise IOError(f\"{response.status_code}: {response.text}\")\n\n        for reranker in self.rerankers:\n            docs = reranker(documents=docs, query=text)\n\n        return docs\n\n    @classmethod\n    def get_user_settings(cls) -> dict:\n        from ktem.llms.manager import llms\n\n        try:\n            reranking_llm = llms.get_default_name()\n            reranking_llm_choices = list(llms.options().keys())\n        except Exception:\n            reranking_llm = None\n            reranking_llm_choices = []\n\n        return {\n            \"reranking_llm\": {\n                \"name\": \"LLM for scoring\",\n                \"value\": reranking_llm,\n                \"component\": \"dropdown\",\n                \"choices\": reranking_llm_choices,\n                \"special_type\": \"llm\",\n            },\n            \"retrieval_mode\": {\n                \"name\": \"Retrieval mode\",\n                \"value\": \"hybrid\",\n                \"choices\": [\"vector\", \"text\", \"hybrid\"],\n                \"component\": \"dropdown\",\n            },\n        }\n\n    @classmethod\n    def get_pipeline(cls, user_settings, index_settings, selected):\n        \"\"\"Get retriever objects associated with the index\n\n        Args:\n            settings: the settings of the app\n            kwargs: other arguments\n        \"\"\"\n        from ktem.llms.manager import llms\n\n        retriever = cls(\n            rerankers=[LLMTrulensScoring()],\n        )\n\n        # hacky way to input doc_ids to retriever.run() call (through theflow)\n        kwargs = {\".doc_ids\": selected}\n        retriever.set_run(kwargs, temp=False)\n\n        for reranker in retriever.rerankers:\n            if isinstance(reranker, LLMReranking):\n                reranker.llm = llms.get(\n                    user_settings[\"reranking_llm\"], llms.get_default()\n                )\n\n        return retriever\n"
  },
  {
    "path": "libs/ktem/ktem/index/file/pipelines.py",
    "content": "from __future__ import annotations\n\nimport json\nimport logging\nimport shutil\nimport threading\nimport time\nimport warnings\nfrom collections import defaultdict\nfrom copy import deepcopy\nfrom functools import lru_cache\nfrom hashlib import sha256\nfrom pathlib import Path\nfrom typing import Generator, Optional, Sequence\n\nimport tiktoken\nfrom decouple import config\nfrom ktem.db.models import engine\nfrom ktem.embeddings.manager import embedding_models_manager\nfrom ktem.llms.manager import llms\nfrom ktem.rerankings.manager import reranking_models_manager\nfrom llama_index.core.readers.base import BaseReader\nfrom llama_index.core.readers.file.base import default_file_metadata_func\nfrom llama_index.core.vector_stores import (\n    FilterCondition,\n    FilterOperator,\n    MetadataFilter,\n    MetadataFilters,\n)\nfrom llama_index.core.vector_stores.types import VectorStoreQueryMode\nfrom sqlalchemy import delete, select\nfrom sqlalchemy.orm import Session\nfrom theflow.settings import settings\nfrom theflow.utils.modules import import_dotted_string\n\nfrom kotaemon.base import BaseComponent, Document, Node, Param, RetrievedDocument\nfrom kotaemon.embeddings import BaseEmbeddings\nfrom kotaemon.indices import VectorIndexing, VectorRetrieval\nfrom kotaemon.indices.ingests.files import (\n    KH_DEFAULT_FILE_EXTRACTORS,\n    adobe_reader,\n    azure_reader,\n    docling_reader,\n    unstructured,\n    web_reader,\n)\nfrom kotaemon.indices.rankings import BaseReranking, LLMReranking, LLMTrulensScoring\nfrom kotaemon.indices.splitters import BaseSplitter, TokenSplitter\n\nfrom .base import BaseFileIndexIndexing, BaseFileIndexRetriever\n\nlogger = logging.getLogger(__name__)\n\n\n@lru_cache\ndef dev_settings():\n    \"\"\"Retrieve the developer settings from flowsettings.py\"\"\"\n    file_extractors = {}\n\n    if hasattr(settings, \"FILE_INDEX_PIPELINE_FILE_EXTRACTORS\"):\n        file_extractors = {\n            key: import_dotted_string(value, safe=False)()\n            for key, value in settings.FILE_INDEX_PIPELINE_FILE_EXTRACTORS.items()\n        }\n\n    chunk_size = None\n    if hasattr(settings, \"FILE_INDEX_PIPELINE_SPLITTER_CHUNK_SIZE\"):\n        chunk_size = settings.FILE_INDEX_PIPELINE_SPLITTER_CHUNK_SIZE\n\n    chunk_overlap = None\n    if hasattr(settings, \"FILE_INDEX_PIPELINE_SPLITTER_CHUNK_OVERLAP\"):\n        chunk_overlap = settings.FILE_INDEX_PIPELINE_SPLITTER_CHUNK_OVERLAP\n\n    return file_extractors, chunk_size, chunk_overlap\n\n\n_default_token_func = tiktoken.encoding_for_model(\"gpt-3.5-turbo\").encode\n\n\nclass DocumentRetrievalPipeline(BaseFileIndexRetriever):\n    \"\"\"Retrieve relevant document\n\n    Args:\n        vector_retrieval: the retrieval pipeline that return the relevant documents\n            given a text query\n        reranker: the reranking pipeline that re-rank and filter the retrieved\n            documents\n        get_extra_table: if True, for each retrieved document, the pipeline will look\n            for surrounding tables (e.g. within the page)\n        top_k: number of documents to retrieve\n        mmr: whether to use mmr to re-rank the documents\n    \"\"\"\n\n    embedding: BaseEmbeddings\n    rerankers: Sequence[BaseReranking] = []\n    # use LLM to create relevant scores for displaying on UI\n    llm_scorer: LLMReranking | None = LLMReranking.withx()\n    get_extra_table: bool = False\n    mmr: bool = False\n    top_k: int = 5\n    retrieval_mode: str = \"hybrid\"\n\n    @Node.auto(depends_on=[\"embedding\", \"VS\", \"DS\"])\n    def vector_retrieval(self) -> VectorRetrieval:\n        return VectorRetrieval(\n            embedding=self.embedding,\n            vector_store=self.VS,\n            doc_store=self.DS,\n            retrieval_mode=self.retrieval_mode,  # type: ignore\n            rerankers=self.rerankers,\n        )\n\n    def run(\n        self,\n        text: str,\n        doc_ids: Optional[list[str]] = None,\n        *args,\n        **kwargs,\n    ) -> list[RetrievedDocument]:\n        \"\"\"Retrieve document excerpts similar to the text\n\n        Args:\n            text: the text to retrieve similar documents\n            doc_ids: list of document ids to constraint the retrieval\n        \"\"\"\n        # flatten doc_ids in case of group of doc_ids are passed\n        if doc_ids:\n            flatten_doc_ids = []\n            for doc_id in doc_ids:\n                if doc_id is None:\n                    raise ValueError(\"No document is selected\")\n\n                if doc_id.startswith(\"[\"):\n                    flatten_doc_ids.extend(json.loads(doc_id))\n                else:\n                    flatten_doc_ids.append(doc_id)\n            doc_ids = flatten_doc_ids\n\n        print(\"searching in doc_ids\", doc_ids)\n        if not doc_ids:\n            logger.info(f\"Skip retrieval because of no selected files: {self}\")\n            return []\n\n        retrieval_kwargs: dict = {}\n        with Session(engine) as session:\n            stmt = select(self.Index).where(\n                self.Index.relation_type == \"document\",\n                self.Index.source_id.in_(doc_ids),\n            )\n            results = session.execute(stmt)\n            chunk_ids = [r[0].target_id for r in results.all()]\n\n        # do first round top_k extension\n        retrieval_kwargs[\"do_extend\"] = True\n        retrieval_kwargs[\"scope\"] = chunk_ids\n        retrieval_kwargs[\"filters\"] = MetadataFilters(\n            filters=[\n                MetadataFilter(\n                    key=\"file_id\",\n                    value=doc_ids,\n                    operator=FilterOperator.IN,\n                )\n            ],\n            condition=FilterCondition.OR,\n        )\n\n        if self.mmr:\n            # TODO: double check that llama-index MMR works correctly\n            retrieval_kwargs[\"mode\"] = VectorStoreQueryMode.MMR\n            retrieval_kwargs[\"mmr_threshold\"] = 0.5\n\n        # rerank\n        s_time = time.time()\n        print(f\"retrieval_kwargs: {retrieval_kwargs.keys()}\")\n        docs = self.vector_retrieval(text=text, top_k=self.top_k, **retrieval_kwargs)\n        print(\"retrieval step took\", time.time() - s_time)\n\n        if not self.get_extra_table:\n            return docs\n\n        # retrieve extra nodes relate to table\n        table_pages = defaultdict(list)\n        retrieved_id = set([doc.doc_id for doc in docs])\n        for doc in docs:\n            if \"page_label\" not in doc.metadata:\n                continue\n            if \"file_name\" not in doc.metadata:\n                warnings.warn(\n                    \"file_name not in metadata while page_label is in metadata: \"\n                    f\"{doc.metadata}\"\n                )\n            table_pages[doc.metadata[\"file_name\"]].append(doc.metadata[\"page_label\"])\n\n        queries: list[dict] = [\n            {\"$and\": [{\"file_name\": {\"$eq\": fn}}, {\"page_label\": {\"$in\": pls}}]}\n            for fn, pls in table_pages.items()\n        ]\n        if queries:\n            try:\n                extra_docs = self.vector_retrieval(\n                    text=\"\",\n                    top_k=50,\n                    where=queries[0] if len(queries) == 1 else {\"$or\": queries},\n                )\n                for doc in extra_docs:\n                    if doc.doc_id not in retrieved_id:\n                        docs.append(doc)\n            except Exception:\n                print(\"Error retrieving additional tables\")\n\n        return docs\n\n    def generate_relevant_scores(\n        self, query: str, documents: list[RetrievedDocument]\n    ) -> list[RetrievedDocument]:\n        docs = (\n            documents\n            if not self.llm_scorer\n            else self.llm_scorer(documents=documents, query=query)\n        )\n        return docs\n\n    @classmethod\n    def get_user_settings(cls) -> dict:\n        from ktem.llms.manager import llms\n\n        try:\n            reranking_llm = llms.get_default_name()\n            reranking_llm_choices = list(llms.options().keys())\n        except Exception as e:\n            logger.error(e)\n            reranking_llm = None\n            reranking_llm_choices = []\n\n        return {\n            \"reranking_llm\": {\n                \"name\": \"LLM for relevant scoring\",\n                \"value\": reranking_llm,\n                \"component\": \"dropdown\",\n                \"choices\": reranking_llm_choices,\n                \"special_type\": \"llm\",\n            },\n            \"num_retrieval\": {\n                \"name\": \"Number of document chunks to retrieve\",\n                \"value\": 10,\n                \"component\": \"number\",\n            },\n            \"retrieval_mode\": {\n                \"name\": \"Retrieval mode\",\n                \"value\": \"hybrid\",\n                \"choices\": [\"vector\", \"text\", \"hybrid\"],\n                \"component\": \"dropdown\",\n            },\n            \"prioritize_table\": {\n                \"name\": \"Prioritize table\",\n                \"value\": False,\n                \"choices\": [True, False],\n                \"component\": \"checkbox\",\n            },\n            \"mmr\": {\n                \"name\": \"Use MMR\",\n                \"value\": False,\n                \"choices\": [True, False],\n                \"component\": \"checkbox\",\n            },\n            \"use_reranking\": {\n                \"name\": \"Use reranking\",\n                \"value\": True,\n                \"choices\": [True, False],\n                \"component\": \"checkbox\",\n            },\n            \"use_llm_reranking\": {\n                \"name\": \"Use LLM relevant scoring\",\n                \"value\": not config(\"USE_LOW_LLM_REQUESTS\", default=False, cast=bool),\n                \"choices\": [True, False],\n                \"component\": \"checkbox\",\n            },\n        }\n\n    @classmethod\n    def get_pipeline(cls, user_settings, index_settings, selected):\n        \"\"\"Get retriever objects associated with the index\n\n        Args:\n            settings: the settings of the app\n            kwargs: other arguments\n        \"\"\"\n        use_llm_reranking = user_settings.get(\"use_llm_reranking\", False)\n\n        retriever = cls(\n            get_extra_table=user_settings[\"prioritize_table\"],\n            top_k=user_settings[\"num_retrieval\"],\n            mmr=user_settings[\"mmr\"],\n            embedding=embedding_models_manager[\n                index_settings.get(\n                    \"embedding\", embedding_models_manager.get_default_name()\n                )\n            ],\n            retrieval_mode=user_settings[\"retrieval_mode\"],\n            llm_scorer=(LLMTrulensScoring() if use_llm_reranking else None),\n            rerankers=[\n                reranking_models_manager[\n                    index_settings.get(\n                        \"reranking\", reranking_models_manager.get_default_name()\n                    )\n                ]\n            ],\n        )\n        if not user_settings[\"use_reranking\"]:\n            retriever.rerankers = []  # type: ignore\n\n        for reranker in retriever.rerankers:\n            if isinstance(reranker, LLMReranking):\n                reranker.llm = llms.get(\n                    user_settings[\"reranking_llm\"], llms.get_default()\n                )\n\n        if retriever.llm_scorer:\n            retriever.llm_scorer.llm = llms.get(\n                user_settings[\"reranking_llm\"], llms.get_default()\n            )\n\n        kwargs = {\".doc_ids\": selected}\n        retriever.set_run(kwargs, temp=False)\n        return retriever\n\n\nclass IndexPipeline(BaseComponent):\n    \"\"\"Index a single file\"\"\"\n\n    loader: BaseReader\n    splitter: BaseSplitter | None\n    chunk_batch_size: int = 200\n\n    Source = Param(help=\"The SQLAlchemy Source table\")\n    Index = Param(help=\"The SQLAlchemy Index table\")\n    VS = Param(help=\"The VectorStore\")\n    DS = Param(help=\"The DocStore\")\n    FSPath = Param(help=\"The file storage path\")\n    user_id = Param(help=\"The user id\")\n    collection_name: str = \"default\"\n    private: bool = False\n    run_embedding_in_thread: bool = False\n    embedding: BaseEmbeddings\n\n    @Node.auto(depends_on=[\"Source\", \"Index\", \"embedding\"])\n    def vector_indexing(self) -> VectorIndexing:\n        return VectorIndexing(\n            vector_store=self.VS, doc_store=self.DS, embedding=self.embedding\n        )\n\n    def handle_docs(self, docs, file_id, file_name) -> Generator[Document, None, int]:\n        s_time = time.time()\n        text_docs = []\n        non_text_docs = []\n        thumbnail_docs = []\n\n        for doc in docs:\n            doc_type = doc.metadata.get(\"type\", \"text\")\n            if doc_type == \"text\":\n                text_docs.append(doc)\n            elif doc_type == \"thumbnail\":\n                thumbnail_docs.append(doc)\n            else:\n                non_text_docs.append(doc)\n\n        print(f\"Got {len(thumbnail_docs)} page thumbnails\")\n        page_label_to_thumbnail = {\n            doc.metadata[\"page_label\"]: doc.doc_id for doc in thumbnail_docs\n        }\n\n        if self.splitter:\n            all_chunks = self.splitter(text_docs)\n        else:\n            all_chunks = text_docs\n\n        # add the thumbnails doc_id to the chunks\n        for chunk in all_chunks:\n            page_label = chunk.metadata.get(\"page_label\", None)\n            if page_label and page_label in page_label_to_thumbnail:\n                chunk.metadata[\"thumbnail_doc_id\"] = page_label_to_thumbnail[page_label]\n\n        to_index_chunks = all_chunks + non_text_docs + thumbnail_docs\n\n        # add to doc store\n        chunks = []\n        n_chunks = 0\n        chunk_size = self.chunk_batch_size * 4\n        for start_idx in range(0, len(to_index_chunks), chunk_size):\n            chunks = to_index_chunks[start_idx : start_idx + chunk_size]\n            self.handle_chunks_docstore(chunks, file_id)\n            n_chunks += len(chunks)\n            yield Document(\n                f\" => [{file_name}] Processed {n_chunks} chunks\",\n                channel=\"debug\",\n            )\n\n        def insert_chunks_to_vectorstore():\n            chunks = []\n            n_chunks = 0\n            chunk_size = self.chunk_batch_size\n            for start_idx in range(0, len(to_index_chunks), chunk_size):\n                chunks = to_index_chunks[start_idx : start_idx + chunk_size]\n                self.handle_chunks_vectorstore(chunks, file_id)\n                n_chunks += len(chunks)\n                if self.VS:\n                    yield Document(\n                        f\" => [{file_name}] Created embedding for {n_chunks} chunks\",\n                        channel=\"debug\",\n                    )\n\n        # run vector indexing in thread if specified\n        if self.run_embedding_in_thread:\n            print(\"Running embedding in thread\")\n            threading.Thread(\n                target=lambda: list(insert_chunks_to_vectorstore())\n            ).start()\n        else:\n            yield from insert_chunks_to_vectorstore()\n\n        print(\"indexing step took\", time.time() - s_time)\n        return n_chunks\n\n    def handle_chunks_docstore(self, chunks, file_id):\n        \"\"\"Run chunks\"\"\"\n        # run embedding, add to both vector store and doc store\n        self.vector_indexing.add_to_docstore(chunks)\n\n        # record in the index\n        with Session(engine) as session:\n            nodes = []\n            for chunk in chunks:\n                nodes.append(\n                    self.Index(\n                        source_id=file_id,\n                        target_id=chunk.doc_id,\n                        relation_type=\"document\",\n                    )\n                )\n            session.add_all(nodes)\n            session.commit()\n\n    def handle_chunks_vectorstore(self, chunks, file_id):\n        \"\"\"Run chunks\"\"\"\n        # run embedding, add to both vector store and doc store\n        self.vector_indexing.add_to_vectorstore(chunks)\n        self.vector_indexing.write_chunk_to_file(chunks)\n\n        if self.VS:\n            # record in the index\n            with Session(engine) as session:\n                nodes = []\n                for chunk in chunks:\n                    nodes.append(\n                        self.Index(\n                            source_id=file_id,\n                            target_id=chunk.doc_id,\n                            relation_type=\"vector\",\n                        )\n                    )\n                session.add_all(nodes)\n                session.commit()\n\n    def get_id_if_exists(self, file_path: str | Path) -> Optional[str]:\n        \"\"\"Check if the file is already indexed\n\n        Args:\n            file_path: the path to the file\n\n        Returns:\n            the file id if the file is indexed, otherwise None\n        \"\"\"\n        file_name = file_path.name if isinstance(file_path, Path) else file_path\n        if self.private:\n            cond: tuple = (\n                self.Source.name == file_name,\n                self.Source.user == self.user_id,\n            )\n        else:\n            cond = (self.Source.name == file_name,)\n\n        with Session(engine) as session:\n            stmt = select(self.Source).where(*cond)\n            item = session.execute(stmt).first()\n            if item:\n                return item[0].id\n\n        return None\n\n    def store_url(self, url: str) -> str:\n        \"\"\"Store URL into the database and storage, return the file id\n\n        Args:\n            url: the URL\n\n        Returns:\n            the file id\n        \"\"\"\n        file_hash = sha256(url.encode()).hexdigest()\n        source = self.Source(\n            name=url,\n            path=file_hash,\n            size=0,\n            user=self.user_id,  # type: ignore\n        )\n        with Session(engine) as session:\n            session.add(source)\n            session.commit()\n            file_id = source.id\n\n        return file_id\n\n    def store_file(self, file_path: Path) -> str:\n        \"\"\"Store file into the database and storage, return the file id\n\n        Args:\n            file_path: the path to the file\n\n        Returns:\n            the file id\n        \"\"\"\n        with file_path.open(\"rb\") as fi:\n            file_hash = sha256(fi.read()).hexdigest()\n\n        shutil.copy(file_path, self.FSPath / file_hash)\n        source = self.Source(\n            name=file_path.name,\n            path=file_hash,\n            size=file_path.stat().st_size,\n            user=self.user_id,  # type: ignore\n        )\n        with Session(engine) as session:\n            session.add(source)\n            session.commit()\n            file_id = source.id\n\n        return file_id\n\n    def finish(self, file_id: str, file_path: str | Path) -> str:\n        \"\"\"Finish the indexing\"\"\"\n        with Session(engine) as session:\n            stmt = select(self.Source).where(self.Source.id == file_id)\n            result = session.execute(stmt).first()\n            if not result:\n                return file_id\n\n            item = result[0]\n\n            # populate the number of tokens\n            doc_ids_stmt = select(self.Index.target_id).where(\n                self.Index.source_id == file_id,\n                self.Index.relation_type == \"document\",\n            )\n            doc_ids = [_[0] for _ in session.execute(doc_ids_stmt)]\n            token_func = self.get_token_func()\n            if doc_ids and token_func:\n                docs = self.DS.get(doc_ids)\n                item.note[\"tokens\"] = sum([len(token_func(doc.text)) for doc in docs])\n\n            # populate the note\n            item.note[\"loader\"] = self.get_from_path(\"loader\").__class__.__name__\n\n            session.add(item)\n            session.commit()\n\n        return file_id\n\n    def get_token_func(self):\n        \"\"\"Get the token function for calculating the number of tokens\"\"\"\n        return _default_token_func\n\n    def delete_file(self, file_id: str):\n        \"\"\"Delete a file from the db, including its chunks in docstore and vectorstore\n\n        Args:\n            file_id: the file id\n        \"\"\"\n        with Session(engine) as session:\n            session.execute(delete(self.Source).where(self.Source.id == file_id))\n            vs_ids, ds_ids = [], []\n            index = session.execute(\n                select(self.Index).where(self.Index.source_id == file_id)\n            ).all()\n            for each in index:\n                if each[0].relation_type == \"vector\":\n                    vs_ids.append(each[0].target_id)\n                elif each[0].relation_type == \"document\":\n                    ds_ids.append(each[0].target_id)\n                session.delete(each[0])\n            session.commit()\n\n        if vs_ids and self.VS:\n            self.VS.delete(vs_ids)\n        if ds_ids:\n            self.DS.delete(ds_ids)\n\n    def run(\n        self, file_path: str | Path, reindex: bool, **kwargs\n    ) -> tuple[str, list[Document]]:\n        raise NotImplementedError\n\n    def stream(\n        self, file_path: str | Path, reindex: bool, **kwargs\n    ) -> Generator[Document, None, tuple[str, list[Document]]]:\n        # check if the file is already indexed\n        if isinstance(file_path, Path):\n            file_path = file_path.resolve()\n\n        file_id = self.get_id_if_exists(file_path)\n\n        if isinstance(file_path, Path):\n            if file_id is not None:\n                if not reindex:\n                    raise ValueError(\n                        f\"File {file_path.name} already indexed. Please rerun with \"\n                        \"reindex=True to force reindexing.\"\n                    )\n                else:\n                    # remove the existing records\n                    yield Document(\n                        f\" => Removing old {file_path.name}\", channel=\"debug\"\n                    )\n                    self.delete_file(file_id)\n                    file_id = self.store_file(file_path)\n            else:\n                # add record to db\n                file_id = self.store_file(file_path)\n        else:\n            if file_id is not None:\n                raise ValueError(f\"URL {file_path} already indexed.\")\n            else:\n                # add record to db\n                file_id = self.store_url(file_path)\n\n        # extract the file\n        if isinstance(file_path, Path):\n            extra_info = default_file_metadata_func(str(file_path))\n            file_name = file_path.name\n        else:\n            extra_info = {\"file_name\": file_path}\n            file_name = file_path\n\n        extra_info[\"file_id\"] = file_id\n        extra_info[\"collection_name\"] = self.collection_name\n\n        yield Document(f\" => Converting {file_name} to text\", channel=\"debug\")\n        docs = self.loader.load_data(file_path, extra_info=extra_info)\n        yield Document(f\" => Converted {file_name} to text\", channel=\"debug\")\n        yield from self.handle_docs(docs, file_id, file_name)\n\n        self.finish(file_id, file_path)\n\n        yield Document(f\" => Finished indexing {file_name}\", channel=\"debug\")\n        return file_id, docs\n\n\nclass IndexDocumentPipeline(BaseFileIndexIndexing):\n    \"\"\"Index the file. Decide which pipeline based on the file type.\n\n    This method is essentially a factory to decide which indexing pipeline to use.\n\n    We can decide the pipeline programmatically, and/or automatically based on an LLM.\n    If we based on the LLM, essentially we will log the LLM thought process in a file,\n    and then during the indexing, we will read that file to decide which pipeline\n    to use, and then log the operation in that file. Overtime, the LLM can learn to\n    decide which pipeline should be used.\n    \"\"\"\n\n    reader_mode: str = Param(\"default\", help=\"The reader mode\")\n    embedding: BaseEmbeddings\n    run_embedding_in_thread: bool = False\n\n    @Param.auto(depends_on=\"reader_mode\")\n    def readers(self):\n        readers = deepcopy(KH_DEFAULT_FILE_EXTRACTORS)\n        print(\"reader_mode\", self.reader_mode)\n        if self.reader_mode == \"adobe\":\n            readers[\".pdf\"] = adobe_reader\n        elif self.reader_mode == \"azure-di\":\n            readers[\".pdf\"] = azure_reader\n        elif self.reader_mode == \"docling\":\n            readers[\".pdf\"] = docling_reader\n\n        dev_readers, _, _ = dev_settings()\n        readers.update(dev_readers)\n\n        return readers\n\n    @classmethod\n    def get_user_settings(cls):\n        return {\n            \"reader_mode\": {\n                \"name\": \"File loader\",\n                \"value\": \"default\",\n                \"choices\": [\n                    (\"Default (open-source)\", \"default\"),\n                    (\"Adobe API (figure+table extraction)\", \"adobe\"),\n                    (\n                        \"Azure AI Document Intelligence (figure+table extraction)\",\n                        \"azure-di\",\n                    ),\n                    (\"Docling (figure+table extraction)\", \"docling\"),\n                ],\n                \"component\": \"dropdown\",\n            },\n        }\n\n    @classmethod\n    def get_pipeline(cls, user_settings, index_settings) -> BaseFileIndexIndexing:\n        use_quick_index_mode = user_settings.get(\"quick_index_mode\", False)\n        print(\"use_quick_index_mode\", use_quick_index_mode)\n        obj = cls(\n            embedding=embedding_models_manager[\n                index_settings.get(\n                    \"embedding\", embedding_models_manager.get_default_name()\n                )\n            ],\n            run_embedding_in_thread=use_quick_index_mode,\n            reader_mode=user_settings.get(\"reader_mode\", \"default\"),\n        )\n        return obj\n\n    def is_url(self, file_path: str | Path) -> bool:\n        return isinstance(file_path, str) and (\n            file_path.startswith(\"http://\") or file_path.startswith(\"https://\")\n        )\n\n    def route(self, file_path: str | Path) -> IndexPipeline:\n        \"\"\"Decide the pipeline based on the file type\n\n        Can subclass this method for a more elaborate pipeline routing strategy.\n        \"\"\"\n\n        _, dev_chunk_size, dev_chunk_overlap = dev_settings()\n\n        chunk_size = self.chunk_size or dev_chunk_size\n        chunk_overlap = self.chunk_overlap or dev_chunk_overlap\n\n        # check if file_path is a URL\n        if self.is_url(file_path):\n            reader = web_reader\n        else:\n            assert isinstance(file_path, Path)\n            ext = file_path.suffix.lower()\n            reader = self.readers.get(ext, unstructured)\n            if reader is None:\n                raise NotImplementedError(\n                    f\"No supported pipeline to index {file_path.name}. Please specify \"\n                    \"the suitable pipeline for this file type in the settings.\"\n                )\n\n        print(f\"Chunk size: {chunk_size}, chunk overlap: {chunk_overlap}\")\n\n        print(\"Using reader\", reader)\n        pipeline: IndexPipeline = IndexPipeline(\n            loader=reader,\n            splitter=TokenSplitter(\n                chunk_size=chunk_size or 1024,\n                chunk_overlap=chunk_overlap or 256,\n                separator=\"\\n\\n\",\n                backup_separators=[\"\\n\", \".\", \"\\u200B\"],\n            ),\n            run_embedding_in_thread=self.run_embedding_in_thread,\n            Source=self.Source,\n            Index=self.Index,\n            VS=self.VS,\n            DS=self.DS,\n            FSPath=self.FSPath,\n            user_id=self.user_id,\n            private=self.private,\n            embedding=self.embedding,\n        )\n\n        return pipeline\n\n    def run(\n        self, file_paths: str | Path | list[str | Path], *args, **kwargs\n    ) -> tuple[list[str | None], list[str | None]]:\n        raise NotImplementedError\n\n    def stream(\n        self, file_paths: str | Path | list[str | Path], reindex: bool = False, **kwargs\n    ) -> Generator[\n        Document, None, tuple[list[str | None], list[str | None], list[Document]]\n    ]:\n        \"\"\"Return a list of indexed file ids, and a list of errors\"\"\"\n        if not isinstance(file_paths, list):\n            file_paths = [file_paths]\n\n        file_ids: list[str | None] = []\n        errors: list[str | None] = []\n        all_docs = []\n\n        n_files = len(file_paths)\n        for idx, file_path in enumerate(file_paths):\n            if self.is_url(file_path):\n                file_name = file_path\n            else:\n                file_path = Path(file_path)\n                file_name = file_path.name\n\n            yield Document(\n                content=f\"Indexing [{idx + 1}/{n_files}]: {file_name}\",\n                channel=\"debug\",\n            )\n\n            try:\n                pipeline = self.route(file_path)\n                file_id, docs = yield from pipeline.stream(\n                    file_path, reindex=reindex, **kwargs\n                )\n                all_docs.extend(docs)\n                file_ids.append(file_id)\n                errors.append(None)\n                yield Document(\n                    content={\n                        \"file_path\": file_path,\n                        \"file_name\": file_name,\n                        \"status\": \"success\",\n                    },\n                    channel=\"index\",\n                )\n            except Exception as e:\n                logger.exception(e)\n                file_ids.append(None)\n                errors.append(str(e))\n                yield Document(\n                    content={\n                        \"file_path\": file_path,\n                        \"file_name\": file_name,\n                        \"status\": \"failed\",\n                        \"message\": str(e),\n                    },\n                    channel=\"index\",\n                )\n\n        return file_ids, errors, all_docs\n"
  },
  {
    "path": "libs/ktem/ktem/index/file/ui.py",
    "content": "import html\nimport json\nimport os\nimport shutil\nimport tempfile\nimport zipfile\nfrom copy import deepcopy\nfrom pathlib import Path\nfrom typing import Generator\n\nimport gradio as gr\nimport pandas as pd\nfrom gradio.data_classes import FileData\nfrom gradio.utils import NamedString\nfrom ktem.app import BasePage\nfrom ktem.db.engine import engine\nfrom ktem.utils.render import Render\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import Session\nfrom theflow.settings import settings as flowsettings\n\nfrom ...utils.commands import WEB_SEARCH_COMMAND\nfrom ...utils.rate_limit import check_rate_limit\nfrom .utils import download_arxiv_pdf, is_arxiv_url\n\nKH_DEMO_MODE = getattr(flowsettings, \"KH_DEMO_MODE\", False)\nKH_SSO_ENABLED = getattr(flowsettings, \"KH_SSO_ENABLED\", False)\nDOWNLOAD_MESSAGE = \"Start download\"\nMAX_FILENAME_LENGTH = 20\nMAX_FILE_COUNT = 200\n\nchat_input_focus_js = \"\"\"\nfunction() {\n    let chatInput = document.querySelector(\"#chat-input textarea\");\n    chatInput.focus();\n}\n\"\"\"\n\nchat_input_focus_js_with_submit = \"\"\"\nfunction() {\n    let chatInput = document.querySelector(\"#chat-input textarea\");\n    let chatInputSubmit = document.querySelector(\"#chat-input button.submit-button\");\n    chatInputSubmit.click();\n    chatInput.focus();\n}\n\"\"\"\n\nupdate_file_list_js = \"\"\"\nfunction(file_list) {\n    var values = [];\n    for (var i = 0; i < file_list.length; i++) {\n        values.push({\n            key: file_list[i][0],\n            value: '\"' + file_list[i][0] + '\"',\n        });\n    }\n\n    // manually push web search tag\n    values.push({\n        key: \"web_search\",\n        value: '\"web_search\"',\n    });\n\n    var tribute = new Tribute({\n        values: values,\n        noMatchTemplate: \"\",\n        allowSpaces: true,\n    })\n    input_box = document.querySelector('#chat-input textarea');\n    tribute.detach(input_box);\n    tribute.attach(input_box);\n}\n\"\"\".replace(\n    \"web_search\", WEB_SEARCH_COMMAND\n)\n\n\nclass File(gr.File):\n    \"\"\"Subclass from gr.File to maintain the original filename\n\n    The issue happens when user uploads file with name like: !@#$%%^&*().pdf\n    \"\"\"\n\n    def _process_single_file(self, f: FileData) -> NamedString | bytes:\n        file_name = f.path\n        if self.type == \"filepath\":\n            if f.orig_name and Path(file_name).name != f.orig_name:\n                file_name = str(Path(file_name).parent / f.orig_name)\n                os.rename(f.path, file_name)\n            file = tempfile.NamedTemporaryFile(delete=False, dir=self.GRADIO_CACHE)\n            file.name = file_name\n            return NamedString(file_name)\n        elif self.type == \"binary\":\n            with open(file_name, \"rb\") as file_data:\n                return file_data.read()\n        else:\n            raise ValueError(\n                \"Unknown type: \"\n                + str(type)\n                + \". Please choose from: 'filepath', 'binary'.\"\n            )\n\n\nclass DirectoryUpload(BasePage):\n    def __init__(self, app, index):\n        super().__init__(app)\n        self._index = index\n        self._supported_file_types_str = self._index.config.get(\n            \"supported_file_types\", \"\"\n        )\n        self._supported_file_types = [\n            each.strip() for each in self._supported_file_types_str.split(\",\")\n        ]\n        self.on_building_ui()\n\n    def on_building_ui(self):\n        with gr.Accordion(label=\"Directory upload\", open=False):\n            gr.Markdown(f\"Supported file types: {self._supported_file_types_str}\")\n            self.path = gr.Textbox(\n                placeholder=\"Directory path...\", lines=1, max_lines=1, container=False\n            )\n            with gr.Accordion(\"Advanced indexing options\", open=False):\n                with gr.Row():\n                    self.reindex = gr.Checkbox(\n                        value=False, label=\"Force reindex file\", container=False\n                    )\n\n            self.upload_button = gr.Button(\"Upload and Index\")\n\n\nclass FileIndexPage(BasePage):\n    def __init__(self, app, index):\n        super().__init__(app)\n        self._index = index\n        self._supported_file_types_str = self._index.config.get(\n            \"supported_file_types\", \"\"\n        )\n        self._supported_file_types = [\n            each.strip() for each in self._supported_file_types_str.split(\",\")\n        ]\n        self.selected_panel_false = \"Selected file: (please select above)\"\n        self.selected_panel_true = \"Selected file: {name}\"\n        # TODO: on_building_ui is not correctly named if it's always called in\n        # the constructor\n        self.public_events = [f\"onFileIndex{index.id}Changed\"]\n\n        if not KH_DEMO_MODE:\n            self.on_building_ui()\n\n    def upload_instruction(self) -> str:\n        msgs = []\n        if self._supported_file_types:\n            msgs.append(f\"- Supported file types: {self._supported_file_types_str}\")\n\n        if max_file_size := self._index.config.get(\"max_file_size\", 0):\n            msgs.append(f\"- Maximum file size: {max_file_size} MB\")\n\n        if max_number_of_files := self._index.config.get(\"max_number_of_files\", 0):\n            msgs.append(f\"- The index can have maximum {max_number_of_files} files\")\n\n        if msgs:\n            return \"\\n\".join(msgs)\n\n        return \"\"\n\n    def render_file_list(self):\n        self.filter = gr.Textbox(\n            value=\"\",\n            label=\"Filter by name:\",\n            info=(\n                \"(1) Case-insensitive. \"\n                \"(2) Search with empty string to show all files.\"\n            ),\n        )\n        self.file_list_state = gr.State(value=None)\n        self.file_list = gr.DataFrame(\n            headers=[\n                \"id\",\n                \"name\",\n                \"size\",\n                \"tokens\",\n                \"loader\",\n                \"date_created\",\n            ],\n            column_widths=[0, 50, 8, 7, 15, 20],\n            interactive=False,\n            wrap=False,\n            elem_id=\"file_list_view\",\n        )\n\n        with gr.Row():\n\n            self.chat_button = gr.Button(\n                \"Go to Chat\",\n                visible=False,\n            )\n            self.is_zipped_state = gr.State(value=False)\n            self.download_single_button = gr.DownloadButton(\n                \"Download\",\n                visible=False,\n            )\n            self.delete_button = gr.Button(\n                \"Delete\",\n                variant=\"stop\",\n                visible=False,\n            )\n            self.deselect_button = gr.Button(\n                \"Close\",\n                visible=False,\n            )\n\n        with gr.Row() as self.selection_info:\n            self.selected_file_id = gr.State(value=None)\n            with gr.Column(scale=2):\n                self.selected_panel = gr.Markdown(self.selected_panel_false)\n\n        self.chunks = gr.HTML(visible=False)\n\n        with gr.Accordion(\"Advance options\", open=False):\n            with gr.Row():\n                if not KH_SSO_ENABLED:\n                    self.download_all_button = gr.DownloadButton(\n                        \"Download all files\",\n                    )\n                self.delete_all_button = gr.Button(\n                    \"Delete all files\",\n                    variant=\"stop\",\n                    visible=True,\n                )\n                self.delete_all_button_confirm = gr.Button(\n                    \"Confirm delete\", variant=\"stop\", visible=False\n                )\n                self.delete_all_button_cancel = gr.Button(\"Cancel\", visible=False)\n\n    def render_group_list(self):\n        self.group_list_state = gr.State(value=None)\n        self.group_list = gr.DataFrame(\n            headers=[\n                \"id\",\n                \"name\",\n                \"files\",\n                \"date_created\",\n            ],\n            column_widths=[0, 25, 55, 20],\n            interactive=False,\n            wrap=False,\n        )\n\n        with gr.Row():\n            self.group_add_button = gr.Button(\n                \"Add\",\n                variant=\"primary\",\n            )\n            self.group_chat_button = gr.Button(\n                \"Go to Chat\",\n                visible=False,\n            )\n            self.group_delete_button = gr.Button(\n                \"Delete\",\n                variant=\"stop\",\n                visible=False,\n            )\n            self.group_close_button = gr.Button(\n                \"Close\",\n                visible=False,\n            )\n\n        with gr.Column(visible=False) as self._group_info_panel:\n            self.selected_group_id = gr.State(value=None)\n            self.group_label = gr.Markdown()\n            self.group_name = gr.Textbox(\n                label=\"Group name\",\n                placeholder=\"Group name\",\n                lines=1,\n                max_lines=1,\n            )\n            self.group_files = gr.Dropdown(\n                label=\"Attached files\",\n                multiselect=True,\n            )\n            self.group_save_button = gr.Button(\n                \"Save\",\n                variant=\"primary\",\n            )\n\n    def on_building_ui(self):\n        \"\"\"Build the UI of the app\"\"\"\n        with gr.Row():\n            with gr.Column(scale=1):\n                with gr.Column() as self.upload:\n                    with gr.Tab(\"Upload Files\"):\n                        self.files = File(\n                            file_types=self._supported_file_types,\n                            file_count=\"multiple\",\n                            container=True,\n                            show_label=False,\n                        )\n\n                        msg = self.upload_instruction()\n                        if msg:\n                            gr.Markdown(msg)\n\n                    with gr.Tab(\"Use Web Links\"):\n                        self.urls = gr.Textbox(\n                            label=\"Input web URLs\",\n                            lines=8,\n                        )\n                        gr.Markdown(\"(separated by new line)\")\n\n                    with gr.Accordion(\"Advanced indexing options\", open=False):\n                        with gr.Row():\n                            self.reindex = gr.Checkbox(\n                                value=False, label=\"Force reindex file\", container=False\n                            )\n\n                    self.upload_button = gr.Button(\n                        \"Upload and Index\", variant=\"primary\"\n                    )\n\n            with gr.Column(scale=4):\n                with gr.Column(visible=False) as self.upload_progress_panel:\n                    gr.Markdown(\"## Upload Progress\")\n                    with gr.Row():\n                        self.upload_result = gr.Textbox(\n                            lines=1, max_lines=20, label=\"Upload result\"\n                        )\n                        self.upload_info = gr.Textbox(\n                            lines=1, max_lines=20, label=\"Upload info\"\n                        )\n                    self.btn_close_upload_progress_panel = gr.Button(\n                        \"Clear Upload Info and Close\",\n                        variant=\"secondary\",\n                        elem_classes=[\"right-button\"],\n                    )\n\n                with gr.Tab(\"Files\"):\n                    self.render_file_list()\n\n                with gr.Tab(\"Groups\"):\n                    self.render_group_list()\n\n    def on_subscribe_public_events(self):\n        \"\"\"Subscribe to the declared public event of the app\"\"\"\n        if KH_DEMO_MODE:\n            return\n\n        self._app.subscribe_event(\n            name=f\"onFileIndex{self._index.id}Changed\",\n            definition={\n                \"fn\": self.list_file_names,\n                \"inputs\": [self.file_list_state],\n                \"outputs\": [self.group_files],\n                \"show_progress\": \"hidden\",\n            },\n        )\n\n        if self._app.f_user_management:\n            self._app.subscribe_event(\n                name=\"onSignIn\",\n                definition={\n                    \"fn\": self.list_file,\n                    \"inputs\": [self._app.user_id],\n                    \"outputs\": [self.file_list_state, self.file_list],\n                    \"show_progress\": \"hidden\",\n                },\n            )\n            self._app.subscribe_event(\n                name=\"onSignIn\",\n                definition={\n                    \"fn\": self.list_group,\n                    \"inputs\": [self._app.user_id, self.file_list_state],\n                    \"outputs\": [self.group_list_state, self.group_list],\n                    \"show_progress\": \"hidden\",\n                },\n            )\n            self._app.subscribe_event(\n                name=\"onSignIn\",\n                definition={\n                    \"fn\": self.list_file_names,\n                    \"inputs\": [self.file_list_state],\n                    \"outputs\": [self.group_files],\n                    \"show_progress\": \"hidden\",\n                },\n            )\n            self._app.subscribe_event(\n                name=\"onSignOut\",\n                definition={\n                    \"fn\": self.list_file,\n                    \"inputs\": [self._app.user_id],\n                    \"outputs\": [self.file_list_state, self.file_list],\n                    \"show_progress\": \"hidden\",\n                },\n            )\n\n    def file_selected(self, file_id):\n        chunks = []\n        if file_id is not None:\n            # get the chunks\n\n            Index = self._index._resources[\"Index\"]\n            with Session(engine) as session:\n                matches = session.execute(\n                    select(Index).where(\n                        Index.source_id == file_id,\n                        Index.relation_type == \"document\",\n                    )\n                )\n                doc_ids = [doc.target_id for (doc,) in matches]\n                docs = self._index._docstore.get(doc_ids)\n                docs = sorted(\n                    docs, key=lambda x: x.metadata.get(\"page_label\", float(\"inf\"))\n                )\n\n                for idx, doc in enumerate(docs):\n                    title = html.escape(\n                        f\"{doc.text[:50]}...\" if len(doc.text) > 50 else doc.text\n                    )\n                    doc_type = doc.metadata.get(\"type\", \"text\")\n                    content = \"\"\n                    if doc_type == \"text\":\n                        content = html.escape(doc.text)\n                    elif doc_type == \"table\":\n                        content = Render.table(doc.text)\n                    elif doc_type == \"image\":\n                        content = Render.image(\n                            url=doc.metadata.get(\"image_origin\", \"\"), text=doc.text\n                        )\n\n                    header_prefix = f\"[{idx+1}/{len(docs)}]\"\n                    if doc.metadata.get(\"page_label\"):\n                        header_prefix += f\" [Page {doc.metadata['page_label']}]\"\n\n                    chunks.append(\n                        Render.collapsible(\n                            header=f\"{header_prefix} {title}\",\n                            content=content,\n                        )\n                    )\n        return (\n            gr.update(value=\"\".join(chunks), visible=file_id is not None),\n            gr.update(visible=file_id is not None),\n            gr.update(visible=file_id is not None),\n            gr.update(visible=file_id is not None),\n            gr.update(visible=file_id is not None),\n        )\n\n    def delete_event(self, file_id):\n        file_name = \"\"\n        with Session(engine) as session:\n            source = session.execute(\n                select(self._index._resources[\"Source\"]).where(\n                    self._index._resources[\"Source\"].id == file_id\n                )\n            ).first()\n            if source:\n                file_name = source[0].name\n                session.delete(source[0])\n\n            vs_ids, ds_ids = [], []\n            index = session.execute(\n                select(self._index._resources[\"Index\"]).where(\n                    self._index._resources[\"Index\"].source_id == file_id\n                )\n            ).all()\n            for each in index:\n                if each[0].relation_type == \"vector\":\n                    vs_ids.append(each[0].target_id)\n                elif each[0].relation_type == \"document\":\n                    ds_ids.append(each[0].target_id)\n                session.delete(each[0])\n            session.commit()\n\n        if vs_ids:\n            self._index._vs.delete(vs_ids)\n        self._index._docstore.delete(ds_ids)\n\n        gr.Info(f\"File {file_name} has been deleted\")\n\n        return None, self.selected_panel_false\n\n    def delete_no_event(self):\n        return (\n            gr.update(visible=True),\n            gr.update(visible=False),\n        )\n\n    def download_single_file(self, is_zipped_state, file_id):\n        with Session(engine) as session:\n            source = session.execute(\n                select(self._index._resources[\"Source\"]).where(\n                    self._index._resources[\"Source\"].id == file_id\n                )\n            ).first()\n        if source:\n            target_file_name = Path(source[0].name)\n        zip_files = []\n        for file_name in os.listdir(flowsettings.KH_CHUNKS_OUTPUT_DIR):\n            if target_file_name.stem in file_name:\n                zip_files.append(\n                    os.path.join(flowsettings.KH_CHUNKS_OUTPUT_DIR, file_name)\n                )\n        for file_name in os.listdir(flowsettings.KH_MARKDOWN_OUTPUT_DIR):\n            if target_file_name.stem in file_name:\n                zip_files.append(\n                    os.path.join(flowsettings.KH_MARKDOWN_OUTPUT_DIR, file_name)\n                )\n        zip_file_path = os.path.join(\n            flowsettings.KH_ZIP_OUTPUT_DIR, target_file_name.stem\n        )\n        with zipfile.ZipFile(f\"{zip_file_path}.zip\", \"w\") as zipMe:\n            for file in zip_files:\n                zipMe.write(file, arcname=os.path.basename(file))\n\n        if is_zipped_state:\n            new_button = gr.DownloadButton(label=\"Download\", value=None)\n        else:\n            new_button = gr.DownloadButton(\n                label=DOWNLOAD_MESSAGE, value=f\"{zip_file_path}.zip\"\n            )\n\n        return not is_zipped_state, new_button\n\n    def download_single_file_simple(self, is_zipped_state, file_html, file_id):\n        with Session(engine) as session:\n            source = session.execute(\n                select(self._index._resources[\"Source\"]).where(\n                    self._index._resources[\"Source\"].id == file_id\n                )\n            ).first()\n        if source:\n            target_file_name = Path(source[0].name)\n\n        # create a temporary file with a path to export\n        output_file_path = os.path.join(\n            flowsettings.KH_ZIP_OUTPUT_DIR, target_file_name.stem + \".html\"\n        )\n        with open(output_file_path, \"w\") as f:\n            f.write(file_html)\n\n        if is_zipped_state:\n            new_button = gr.DownloadButton(label=\"Download\", value=None)\n        else:\n            # export the file path\n            new_button = gr.DownloadButton(\n                label=DOWNLOAD_MESSAGE,\n                value=output_file_path,\n            )\n\n        return not is_zipped_state, new_button\n\n    def download_all_files(self):\n        if self._index.config.get(\"private\", False):\n            raise gr.Error(\"This feature is not available for private collection.\")\n\n        zip_files = []\n        for file_name in os.listdir(flowsettings.KH_CHUNKS_OUTPUT_DIR):\n            zip_files.append(os.path.join(flowsettings.KH_CHUNKS_OUTPUT_DIR, file_name))\n        for file_name in os.listdir(flowsettings.KH_MARKDOWN_OUTPUT_DIR):\n            zip_files.append(\n                os.path.join(flowsettings.KH_MARKDOWN_OUTPUT_DIR, file_name)\n            )\n        zip_file_path = os.path.join(flowsettings.KH_ZIP_OUTPUT_DIR, \"all\")\n        with zipfile.ZipFile(f\"{zip_file_path}.zip\", \"w\") as zipMe:\n            for file in zip_files:\n                arcname = Path(file)\n                zipMe.write(file, arcname=arcname.name)\n        return gr.DownloadButton(label=DOWNLOAD_MESSAGE, value=f\"{zip_file_path}.zip\")\n\n    def delete_all_files(self, file_list):\n        for file_id in file_list.id.values:\n            self.delete_event(file_id)\n\n    def set_file_id_selector(self, selected_file_id):\n        return [selected_file_id, \"select\", gr.Tabs(selected=\"chat-tab\")]\n\n    def show_delete_all_confirm(self, file_list):\n        # when the list of files is empty it shows a single line with id equal to -\n        if len(file_list) == 0 or (\n            len(file_list) == 1 and file_list.id.values[0] == \"-\"\n        ):\n            gr.Info(\"No file to delete\")\n            return [\n                gr.update(visible=True),\n                gr.update(visible=False),\n                gr.update(visible=False),\n            ]\n        else:\n            return [\n                gr.update(visible=False),\n                gr.update(visible=True),\n                gr.update(visible=True),\n            ]\n\n    def on_register_quick_uploads(self):\n        try:\n            # quick file upload event registration of first Index only\n            if self._index.id == 1:\n                self.quick_upload_state = gr.State(value=[])\n                print(\"Setting up quick upload event\")\n\n                # override indexing function from chat page\n                self._app.chat_page.first_indexing_url_fn = (\n                    self.index_fn_url_with_default_loaders\n                )\n\n                if not KH_DEMO_MODE:\n                    quickUploadedEvent = (\n                        self._app.chat_page.quick_file_upload.upload(\n                            fn=lambda: gr.update(\n                                value=\"Please wait for the indexing process \"\n                                \"to complete before adding your question.\"\n                            ),\n                            outputs=self._app.chat_page.quick_file_upload_status,\n                        )\n                        .then(\n                            fn=self.index_fn_file_with_default_loaders,\n                            inputs=[\n                                self._app.chat_page.quick_file_upload,\n                                gr.State(value=False),\n                                self._app.settings_state,\n                                self._app.user_id,\n                            ],\n                            outputs=self.quick_upload_state,\n                            concurrency_limit=10,\n                        )\n                        .success(\n                            fn=lambda: [\n                                gr.update(value=None),\n                                gr.update(value=\"select\"),\n                            ],\n                            outputs=[\n                                self._app.chat_page.quick_file_upload,\n                                self._app.chat_page._indices_input[0],\n                            ],\n                        )\n                    )\n                    for event in self._app.get_event(\n                        f\"onFileIndex{self._index.id}Changed\"\n                    ):\n                        quickUploadedEvent = quickUploadedEvent.then(**event)\n\n                    quickUploadedEvent = (\n                        quickUploadedEvent.success(\n                            fn=lambda x: x,\n                            inputs=self.quick_upload_state,\n                            outputs=self._app.chat_page._indices_input[1],\n                        )\n                        .then(\n                            fn=lambda: gr.update(value=\"Indexing completed.\"),\n                            outputs=self._app.chat_page.quick_file_upload_status,\n                        )\n                        .then(\n                            fn=self.list_file,\n                            inputs=[self._app.user_id, self.filter],\n                            outputs=[self.file_list_state, self.file_list],\n                            concurrency_limit=20,\n                        )\n                        .then(\n                            fn=lambda: True,\n                            inputs=None,\n                            outputs=None,\n                            js=chat_input_focus_js_with_submit,\n                        )\n                    )\n\n                quickURLUploadedEvent = (\n                    self._app.chat_page.quick_urls.submit(\n                        fn=lambda: gr.update(\n                            value=\"Please wait for the indexing process \"\n                            \"to complete before adding your question.\"\n                        ),\n                        outputs=self._app.chat_page.quick_file_upload_status,\n                    )\n                    .then(\n                        fn=self.index_fn_url_with_default_loaders,\n                        inputs=[\n                            self._app.chat_page.quick_urls,\n                            gr.State(value=False),\n                            self._app.settings_state,\n                            self._app.user_id,\n                        ],\n                        outputs=self.quick_upload_state,\n                        concurrency_limit=10,\n                    )\n                    .success(\n                        fn=lambda: [\n                            gr.update(value=None),\n                            gr.update(value=\"select\"),\n                        ],\n                        outputs=[\n                            self._app.chat_page.quick_urls,\n                            self._app.chat_page._indices_input[0],\n                        ],\n                    )\n                )\n                for event in self._app.get_event(f\"onFileIndex{self._index.id}Changed\"):\n                    quickURLUploadedEvent = quickURLUploadedEvent.then(**event)\n\n                quickURLUploadedEvent = quickURLUploadedEvent.success(\n                    fn=lambda x: x,\n                    inputs=self.quick_upload_state,\n                    outputs=self._app.chat_page._indices_input[1],\n                ).then(\n                    fn=lambda: gr.update(value=\"Indexing completed.\"),\n                    outputs=self._app.chat_page.quick_file_upload_status,\n                )\n\n                if not KH_DEMO_MODE:\n                    quickURLUploadedEvent = quickURLUploadedEvent.then(\n                        fn=self.list_file,\n                        inputs=[self._app.user_id, self.filter],\n                        outputs=[self.file_list_state, self.file_list],\n                        concurrency_limit=20,\n                    )\n\n                quickURLUploadedEvent = quickURLUploadedEvent.then(\n                    fn=lambda: True,\n                    inputs=None,\n                    outputs=None,\n                    js=chat_input_focus_js_with_submit,\n                )\n\n        except Exception as e:\n            print(e)\n\n    def on_register_events(self):\n        \"\"\"Register all events to the app\"\"\"\n        self.on_register_quick_uploads()\n\n        if KH_DEMO_MODE:\n            return\n\n        onDeleted = (\n            self.delete_button.click(\n                fn=self.delete_event,\n                inputs=[self.selected_file_id],\n                outputs=None,\n            )\n            .then(\n                fn=lambda: (None, self.selected_panel_false),\n                inputs=[],\n                outputs=[self.selected_file_id, self.selected_panel],\n                show_progress=\"hidden\",\n            )\n            .then(\n                fn=self.list_file,\n                inputs=[self._app.user_id, self.filter],\n                outputs=[self.file_list_state, self.file_list],\n            )\n            .then(\n                fn=self.file_selected,\n                inputs=[self.selected_file_id],\n                outputs=[\n                    self.chunks,\n                    self.deselect_button,\n                    self.delete_button,\n                    self.download_single_button,\n                    self.chat_button,\n                ],\n                show_progress=\"hidden\",\n            )\n        )\n        for event in self._app.get_event(f\"onFileIndex{self._index.id}Changed\"):\n            onDeleted = onDeleted.then(**event)\n\n        self.deselect_button.click(\n            fn=lambda: (None, self.selected_panel_false),\n            inputs=[],\n            outputs=[self.selected_file_id, self.selected_panel],\n            show_progress=\"hidden\",\n        ).then(\n            fn=self.file_selected,\n            inputs=[self.selected_file_id],\n            outputs=[\n                self.chunks,\n                self.deselect_button,\n                self.delete_button,\n                self.download_single_button,\n                self.chat_button,\n            ],\n            show_progress=\"hidden\",\n        )\n\n        self.chat_button.click(\n            fn=self.set_file_id_selector,\n            inputs=[self.selected_file_id],\n            outputs=[\n                self._index.get_selector_component_ui().selector,\n                self._index.get_selector_component_ui().mode,\n                self._app.tabs,\n            ],\n        )\n\n        if not KH_SSO_ENABLED:\n            self.download_all_button.click(\n                fn=self.download_all_files,\n                inputs=[],\n                outputs=self.download_all_button,\n                show_progress=\"hidden\",\n            )\n\n        self.delete_all_button.click(\n            self.show_delete_all_confirm,\n            [self.file_list],\n            [\n                self.delete_all_button,\n                self.delete_all_button_confirm,\n                self.delete_all_button_cancel,\n            ],\n        )\n        self.delete_all_button_cancel.click(\n            lambda: [\n                gr.update(visible=True),\n                gr.update(visible=False),\n                gr.update(visible=False),\n            ],\n            None,\n            [\n                self.delete_all_button,\n                self.delete_all_button_confirm,\n                self.delete_all_button_cancel,\n            ],\n        )\n\n        self.delete_all_button_confirm.click(\n            fn=self.delete_all_files,\n            inputs=[self.file_list],\n            outputs=[],\n            show_progress=\"hidden\",\n        ).then(\n            fn=self.list_file,\n            inputs=[self._app.user_id, self.filter],\n            outputs=[self.file_list_state, self.file_list],\n        ).then(\n            lambda: [\n                gr.update(visible=True),\n                gr.update(visible=False),\n                gr.update(visible=False),\n            ],\n            None,\n            [\n                self.delete_all_button,\n                self.delete_all_button_confirm,\n                self.delete_all_button_cancel,\n            ],\n        )\n\n        if not KH_SSO_ENABLED:\n            self.download_single_button.click(\n                fn=self.download_single_file,\n                inputs=[self.is_zipped_state, self.selected_file_id],\n                outputs=[self.is_zipped_state, self.download_single_button],\n                show_progress=\"hidden\",\n            )\n        else:\n            self.download_single_button.click(\n                fn=self.download_single_file_simple,\n                inputs=[self.is_zipped_state, self.chunks, self.selected_file_id],\n                outputs=[self.is_zipped_state, self.download_single_button],\n                show_progress=\"hidden\",\n            )\n\n        onUploaded = (\n            self.upload_button.click(\n                fn=lambda: gr.update(visible=True),\n                outputs=[self.upload_progress_panel],\n            )\n            .then(\n                fn=self.index_fn,\n                inputs=[\n                    self.files,\n                    self.urls,\n                    self.reindex,\n                    self._app.settings_state,\n                    self._app.user_id,\n                ],\n                outputs=[self.upload_result, self.upload_info],\n                concurrency_limit=20,\n            )\n            .then(\n                fn=lambda: gr.update(value=\"\"),\n                outputs=[self.urls],\n            )\n        )\n\n        uploadedEvent = onUploaded.then(\n            fn=self.list_file,\n            inputs=[self._app.user_id, self.filter],\n            outputs=[self.file_list_state, self.file_list],\n            concurrency_limit=20,\n        )\n        for event in self._app.get_event(f\"onFileIndex{self._index.id}Changed\"):\n            uploadedEvent = uploadedEvent.then(**event)\n\n        _ = onUploaded.success(\n            fn=lambda: None,\n            outputs=[self.files],\n        )\n\n        self.btn_close_upload_progress_panel.click(\n            fn=lambda: (gr.update(visible=False), \"\", \"\"),\n            outputs=[self.upload_progress_panel, self.upload_result, self.upload_info],\n        )\n\n        self.file_list.select(\n            fn=self.interact_file_list,\n            inputs=[self.file_list],\n            outputs=[self.selected_file_id, self.selected_panel],\n            show_progress=\"hidden\",\n        ).then(\n            fn=self.file_selected,\n            inputs=[self.selected_file_id],\n            outputs=[\n                self.chunks,\n                self.deselect_button,\n                self.delete_button,\n                self.download_single_button,\n                self.chat_button,\n            ],\n            show_progress=\"hidden\",\n        )\n\n        self.group_list.select(\n            fn=self.interact_group_list,\n            inputs=[self.group_list_state],\n            outputs=[\n                self.group_label,\n                self.selected_group_id,\n                self.group_name,\n                self.group_files,\n            ],\n            show_progress=\"hidden\",\n        ).then(\n            fn=lambda: (\n                gr.update(visible=True),\n                gr.update(visible=False),\n                gr.update(visible=True),\n                gr.update(visible=True),\n                gr.update(visible=True),\n            ),\n            outputs=[\n                self._group_info_panel,\n                self.group_add_button,\n                self.group_close_button,\n                self.group_delete_button,\n                self.group_chat_button,\n            ],\n        )\n\n        self.filter.submit(\n            fn=self.list_file,\n            inputs=[self._app.user_id, self.filter],\n            outputs=[self.file_list_state, self.file_list],\n            show_progress=\"hidden\",\n        )\n\n        self.group_add_button.click(\n            fn=lambda: [\n                gr.update(visible=False),\n                gr.update(value=\"### Add new group\"),\n                gr.update(visible=True),\n                gr.update(value=\"\"),\n                gr.update(value=[]),\n                None,\n            ],\n            outputs=[\n                self.group_add_button,\n                self.group_label,\n                self._group_info_panel,\n                self.group_name,\n                self.group_files,\n                self.selected_group_id,\n            ],\n        )\n\n        self.group_chat_button.click(\n            fn=self.set_group_id_selector,\n            inputs=[self.selected_group_id],\n            outputs=[\n                self._index.get_selector_component_ui().selector,\n                self._index.get_selector_component_ui().mode,\n                self._app.tabs,\n            ],\n        )\n\n        onGroupClosedEvent = {\n            \"fn\": lambda: [\n                gr.update(visible=True),\n                gr.update(visible=False),\n                gr.update(visible=False),\n                gr.update(visible=False),\n                gr.update(visible=False),\n                None,\n            ],\n            \"outputs\": [\n                self.group_add_button,\n                self._group_info_panel,\n                self.group_close_button,\n                self.group_delete_button,\n                self.group_chat_button,\n                self.selected_group_id,\n            ],\n        }\n        self.group_close_button.click(**onGroupClosedEvent)\n        onGroupSaved = (\n            self.group_save_button.click(\n                fn=self.save_group,\n                inputs=[\n                    self.selected_group_id,\n                    self.group_name,\n                    self.group_files,\n                    self._app.user_id,\n                ],\n            )\n            .then(\n                self.list_group,\n                inputs=[self._app.user_id, self.file_list_state],\n                outputs=[self.group_list_state, self.group_list],\n            )\n            .then(**onGroupClosedEvent)\n        )\n        onGroupDeleted = (\n            self.group_delete_button.click(\n                fn=self.delete_group,\n                inputs=[self.selected_group_id],\n            )\n            .then(\n                self.list_group,\n                inputs=[self._app.user_id, self.file_list_state],\n                outputs=[self.group_list_state, self.group_list],\n            )\n            .then(**onGroupClosedEvent)\n        )\n\n        for event in self._app.get_event(f\"onFileIndex{self._index.id}Changed\"):\n            onGroupDeleted = onGroupDeleted.then(**event)\n            onGroupSaved = onGroupSaved.then(**event)\n\n    def _on_app_created(self):\n        \"\"\"Called when the app is created\"\"\"\n        if KH_DEMO_MODE:\n            return\n\n        self._app.app.load(\n            self.list_file,\n            inputs=[self._app.user_id, self.filter],\n            outputs=[self.file_list_state, self.file_list],\n        ).then(\n            self.list_group,\n            inputs=[self._app.user_id, self.file_list_state],\n            outputs=[self.group_list_state, self.group_list],\n        ).then(\n            self.list_file_names,\n            inputs=[self.file_list_state],\n            outputs=[self.group_files],\n        )\n\n    def _may_extract_zip(self, files, zip_dir: str):\n        \"\"\"Handle zip files\"\"\"\n        zip_files = [file for file in files if file.endswith(\".zip\")]\n        remaining_files = [file for file in files if not file.endswith(\"zip\")]\n        errors: list[str] = []\n\n        # Clean-up <zip_dir> before unzip to remove old files\n        shutil.rmtree(zip_dir, ignore_errors=True)\n\n        # Unzip\n        for zip_file in zip_files:\n            # Prepare new zip output dir, separated for each files\n            basename = os.path.splitext(os.path.basename(zip_file))[0]\n            zip_out_dir = os.path.join(zip_dir, basename)\n            os.makedirs(zip_out_dir, exist_ok=True)\n\n            with zipfile.ZipFile(zip_file, \"r\") as zip_ref:\n                zip_ref.extractall(zip_out_dir)\n\n        n_zip_file = 0\n        for root, dirs, files in os.walk(zip_dir):\n            for file in files:\n                ext = os.path.splitext(file)[1]\n\n                # only allow supported file-types ( not zip )\n                if ext not in [\".zip\"] and ext in self._supported_file_types:\n                    remaining_files += [os.path.join(root, file)]\n                    n_zip_file += 1\n\n        if n_zip_file > 0:\n            print(f\"Update zip files: {n_zip_file}\")\n\n        return remaining_files, errors\n\n    def index_fn(\n        self, files, urls, reindex: bool, settings, user_id\n    ) -> Generator[tuple[str, str], None, None]:\n        \"\"\"Upload and index the files\n\n        Args:\n            files: the list of files to be uploaded\n            urls: list of web URLs to be indexed\n            reindex: whether to reindex the files\n            selected_files: the list of files already selected\n            settings: the settings of the app\n        \"\"\"\n        if urls:\n            files = [it.strip() for it in urls.split(\"\\n\")]\n            errors = self.validate_urls(files)\n        else:\n            if not files:\n                gr.Info(\"No uploaded file\")\n                yield \"\", \"\"\n                return\n            files, unzip_errors = self._may_extract_zip(\n                files, flowsettings.KH_ZIP_INPUT_DIR\n            )\n            errors = self.validate_files(files)\n            errors.extend(unzip_errors)\n\n        if errors:\n            gr.Warning(\", \".join(errors))\n            yield \"\", \"\"\n            return\n\n        gr.Info(f\"Start indexing {len(files)} files...\")\n\n        # get the pipeline\n        indexing_pipeline = self._index.get_indexing_pipeline(settings, user_id)\n\n        outputs, debugs = [], []\n        # stream the output\n        output_stream = indexing_pipeline.stream(files, reindex=reindex)\n        try:\n            while True:\n                response = next(output_stream)\n                if response is None:\n                    continue\n                if response.channel == \"index\":\n                    if response.content[\"status\"] == \"success\":\n                        outputs.append(f\"\\u2705 | {response.content['file_name']}\")\n                    elif response.content[\"status\"] == \"failed\":\n                        outputs.append(\n                            f\"\\u274c | {response.content['file_name']}: \"\n                            f\"{response.content['message']}\"\n                        )\n                elif response.channel == \"debug\":\n                    debugs.append(response.text)\n                yield \"\\n\".join(outputs), \"\\n\".join(debugs)\n        except StopIteration as e:\n            results, index_errors, docs = e.value\n        except Exception as e:\n            debugs.append(f\"Error: {e}\")\n            yield \"\\n\".join(outputs), \"\\n\".join(debugs)\n            return\n\n        n_successes = len([_ for _ in results if _])\n        if n_successes:\n            gr.Info(f\"Successfully index {n_successes} files\")\n        n_errors = len([_ for _ in errors if _])\n        if n_errors:\n            gr.Warning(f\"Have errors for {n_errors} files\")\n\n        return results\n\n    def index_fn_file_with_default_loaders(\n        self, files, reindex: bool, settings, user_id\n    ) -> list[\"str\"]:\n        \"\"\"Function for quick upload with default loaders\n\n        Args:\n            files: the list of files to be uploaded\n            reindex: whether to reindex the files\n            selected_files: the list of files already selected\n            settings: the settings of the app\n        \"\"\"\n        print(\"Overriding with default loaders\")\n        exist_ids = []\n        to_process_files = []\n        for str_file_path in files:\n            file_path = Path(str(str_file_path))\n            exist_id = (\n                self._index.get_indexing_pipeline(settings, user_id)\n                .route(file_path)\n                .get_id_if_exists(file_path)\n            )\n            if exist_id:\n                exist_ids.append(exist_id)\n            else:\n                to_process_files.append(str_file_path)\n\n        returned_ids = []\n        settings = deepcopy(settings)\n        settings[f\"index.options.{self._index.id}.reader_mode\"] = \"default\"\n        settings[f\"index.options.{self._index.id}.quick_index_mode\"] = True\n        if to_process_files:\n            _iter = self.index_fn(to_process_files, [], reindex, settings, user_id)\n            try:\n                while next(_iter):\n                    pass\n            except StopIteration as e:\n                returned_ids = e.value\n\n        return exist_ids + returned_ids\n\n    def index_fn_url_with_default_loaders(\n        self,\n        urls,\n        reindex: bool,\n        settings,\n        user_id,\n        request: gr.Request,\n    ):\n        if KH_DEMO_MODE:\n            check_rate_limit(\"file_upload\", request)\n\n        returned_ids: list[str] = []\n        settings = deepcopy(settings)\n        settings[f\"index.options.{self._index.id}.reader_mode\"] = \"default\"\n        settings[f\"index.options.{self._index.id}.quick_index_mode\"] = True\n\n        if KH_DEMO_MODE:\n            urls_splitted = urls.split(\"\\n\")\n            if not all(is_arxiv_url(url) for url in urls_splitted):\n                raise ValueError(\"All URLs must be valid arXiv URLs\")\n\n            output_files = [\n                download_arxiv_pdf(\n                    url,\n                    output_path=os.environ.get(\"GRADIO_TEMP_DIR\", \"/tmp\"),\n                )\n                for url in urls_splitted\n            ]\n\n            exist_ids = []\n            to_process_files = []\n            for str_file_path in output_files:\n                file_path = Path(str_file_path)\n                exist_id = (\n                    self._index.get_indexing_pipeline(settings, user_id)\n                    .route(file_path)\n                    .get_id_if_exists(file_path)\n                )\n                if exist_id:\n                    exist_ids.append(exist_id)\n                else:\n                    to_process_files.append(str_file_path)\n\n            returned_ids = []\n            if to_process_files:\n                _iter = self.index_fn(to_process_files, [], reindex, settings, user_id)\n                try:\n                    while next(_iter):\n                        pass\n                except StopIteration as e:\n                    returned_ids = e.value\n\n            returned_ids = exist_ids + returned_ids\n        else:\n            if urls:\n                _iter = self.index_fn([], urls, reindex, settings, user_id)\n                try:\n                    while next(_iter):\n                        pass\n                except StopIteration as e:\n                    returned_ids = e.value\n\n        return returned_ids\n\n    def index_files_from_dir(\n        self, folder_path, reindex, settings, user_id\n    ) -> Generator[tuple[str, str], None, None]:\n        \"\"\"This should be constructable by users\n\n        It means that the users can build their own index.\n        Build your own index:\n            - Input:\n                - Type: based on the type, then there are ranges of. Use can select\n                multiple panels:\n                    - Panels\n                    - Data sources\n                    - Include patterns\n                    - Exclude patterns\n                - Indexing functions. Can be a list of indexing functions. Each declared\n                function is:\n                    - Condition (the source that will go through this indexing function)\n                    - Function (the pipeline that run this)\n            - Output: artifacts that can be used to -> this is the artifacts that we\n            wish\n                - Build the UI\n                    - Upload page: fixed standard, based on the type\n                    - Read page: fixed standard, based on the type\n                    - Delete page: fixed standard, based on the type\n                - Build the index function\n                - Build the chat function\n\n        Step:\n            1. Decide on the artifacts\n            2. Implement the transformation from artifacts to UI\n        \"\"\"\n        if not folder_path:\n            yield \"\", \"\"\n            return\n\n        import fnmatch\n        from pathlib import Path\n\n        include_patterns: list[str] = []\n        exclude_patterns: list[str] = [\"*.png\", \"*.gif\", \"*/.*\"]\n        if include_patterns and exclude_patterns:\n            raise ValueError(\"Cannot have both include and exclude patterns\")\n\n        # clean up the include patterns\n        for idx in range(len(include_patterns)):\n            if include_patterns[idx].startswith(\"*\"):\n                include_patterns[idx] = str(Path.cwd() / \"**\" / include_patterns[idx])\n            else:\n                include_patterns[idx] = str(\n                    Path.cwd() / include_patterns[idx].strip(\"/\")\n                )\n\n        # clean up the exclude patterns\n        for idx in range(len(exclude_patterns)):\n            if exclude_patterns[idx].startswith(\"*\"):\n                exclude_patterns[idx] = str(Path.cwd() / \"**\" / exclude_patterns[idx])\n            else:\n                exclude_patterns[idx] = str(\n                    Path.cwd() / exclude_patterns[idx].strip(\"/\")\n                )\n\n        # get the files\n        files: list[str] = [str(p) for p in Path(folder_path).glob(\"**/*.*\")]\n        if include_patterns:\n            for p in include_patterns:\n                files = fnmatch.filter(names=files, pat=p)\n\n        if exclude_patterns:\n            for p in exclude_patterns:\n                files = [f for f in files if not fnmatch.fnmatch(name=f, pat=p)]\n\n        yield from self.index_fn(files, [], reindex, settings, user_id)\n\n    def format_size_human_readable(self, num: float | str, suffix=\"B\"):\n        try:\n            num = float(num)\n        except ValueError:\n            return num\n\n        for unit in (\"\", \"K\", \"M\", \"G\", \"T\", \"P\", \"E\", \"Z\"):\n            if abs(num) < 1024.0:\n                return f\"{num:3.0f}{unit}{suffix}\"\n            num /= 1024.0\n        return f\"{num:.0f}Yi{suffix}\"\n\n    def list_file(self, user_id, name_pattern=\"\"):\n        if user_id is None:\n            # not signed in\n            return [], pd.DataFrame.from_records(\n                [\n                    {\n                        \"id\": \"-\",\n                        \"name\": \"-\",\n                        \"size\": \"-\",\n                        \"tokens\": \"-\",\n                        \"loader\": \"-\",\n                        \"date_created\": \"-\",\n                    }\n                ]\n            )\n\n        Source = self._index._resources[\"Source\"]\n        with Session(engine) as session:\n            statement = select(Source)\n            if self._index.config.get(\"private\", False):\n                statement = statement.where(Source.user == user_id)\n            if name_pattern:\n                statement = statement.where(Source.name.ilike(f\"%{name_pattern}%\"))\n            results = [\n                {\n                    \"id\": each[0].id,\n                    \"name\": each[0].name,\n                    \"size\": self.format_size_human_readable(each[0].size),\n                    \"tokens\": self.format_size_human_readable(\n                        each[0].note.get(\"tokens\", \"-\"), suffix=\"\"\n                    ),\n                    \"loader\": each[0].note.get(\"loader\", \"-\"),\n                    \"date_created\": each[0].date_created.strftime(\"%Y-%m-%d %H:%M:%S\"),\n                }\n                for each in session.execute(statement).all()\n            ]\n\n        if results:\n            file_list = pd.DataFrame.from_records(results)\n        else:\n            file_list = pd.DataFrame.from_records(\n                [\n                    {\n                        \"id\": \"-\",\n                        \"name\": \"-\",\n                        \"size\": \"-\",\n                        \"tokens\": \"-\",\n                        \"loader\": \"-\",\n                        \"date_created\": \"-\",\n                    }\n                ]\n            )\n\n        return results, file_list\n\n    def list_file_names(self, file_list_state):\n        if file_list_state:\n            file_names = [(item[\"name\"], item[\"id\"]) for item in file_list_state]\n        else:\n            file_names = []\n\n        return gr.update(choices=file_names)\n\n    def list_group(self, user_id, file_list):\n        # supply file_list to display the file names in the group\n        if file_list:\n            file_id_to_name = {item[\"id\"]: item[\"name\"] for item in file_list}\n        else:\n            file_id_to_name = {}\n\n        if user_id is None:\n            # not signed in\n            return [], pd.DataFrame.from_records(\n                [\n                    {\n                        \"id\": \"-\",\n                        \"name\": \"-\",\n                        \"files\": \"-\",\n                        \"date_created\": \"-\",\n                    }\n                ]\n            )\n\n        FileGroup = self._index._resources[\"FileGroup\"]\n        with Session(engine) as session:\n            statement = select(FileGroup)\n            if self._index.config.get(\"private\", False):\n                statement = statement.where(FileGroup.user == user_id)\n\n            results = [\n                {\n                    \"id\": each[0].id,\n                    \"name\": each[0].name,\n                    \"files\": each[0].data.get(\"files\", []),\n                    \"date_created\": each[0].date_created.strftime(\"%Y-%m-%d %H:%M:%S\"),\n                }\n                for each in session.execute(statement).all()\n            ]\n\n        if results:\n            formated_results = deepcopy(results)\n            for item in formated_results:\n                file_names = [\n                    file_id_to_name.get(file_id, \"-\") for file_id in item[\"files\"]\n                ]\n                item[\"files\"] = \", \".join(\n                    f\"'{it[:MAX_FILENAME_LENGTH]}..'\"\n                    if len(it) > MAX_FILENAME_LENGTH\n                    else f\"'{it}'\"\n                    for it in file_names\n                )\n                item_count = len(file_names)\n                item_postfix = \"s\" if item_count > 1 else \"\"\n                item[\"files\"] = f\"[{item_count} item{item_postfix}] \" + item[\"files\"]\n\n            group_list = pd.DataFrame.from_records(formated_results)\n        else:\n            group_list = pd.DataFrame.from_records(\n                [\n                    {\n                        \"id\": \"-\",\n                        \"name\": \"-\",\n                        \"files\": \"-\",\n                        \"date_created\": \"-\",\n                    }\n                ]\n            )\n\n        return results, group_list\n\n    def set_group_id_selector(self, selected_group_id):\n        FileGroup = self._index._resources[\"FileGroup\"]\n\n        # check if group_name exist\n        with Session(engine) as session:\n            current_group = (\n                session.query(FileGroup).filter_by(id=selected_group_id).first()\n            )\n\n        file_ids = [json.dumps(current_group.data[\"files\"])]\n        return [file_ids, \"select\", gr.Tabs(selected=\"chat-tab\")]\n\n    def save_group(self, group_id, group_name, group_files, user_id):\n        FileGroup = self._index._resources[\"FileGroup\"]\n        current_group = None\n\n        # check if group_name exist\n        with Session(engine) as session:\n            if group_id:\n                current_group = session.query(FileGroup).filter_by(id=group_id).first()\n                # update current group with new info\n                current_group.name = group_name\n                current_group.data[\"files\"] = group_files  # Update the files\n                session.commit()\n            else:\n                current_group = (\n                    session.query(FileGroup)\n                    .filter_by(\n                        name=group_name,\n                        user=user_id,\n                    )\n                    .first()\n                )\n                if current_group:\n                    raise gr.Error(f\"Group {group_name} already exists\")\n\n                current_group = FileGroup(\n                    name=group_name,\n                    data={\"files\": group_files},  # type: ignore\n                    user=user_id,\n                )\n                session.add(current_group)\n                session.commit()\n\n            group_id = current_group.id\n\n        gr.Info(f\"Group {group_name} has been saved\")\n        return group_id\n\n    def delete_group(self, group_id):\n        if not group_id:\n            raise gr.Error(\"No group is selected\")\n\n        FileGroup = self._index._resources[\"FileGroup\"]\n        with Session(engine) as session:\n            group = session.execute(\n                select(FileGroup).where(FileGroup.id == group_id)\n            ).first()\n            if group:\n                item = group[0]\n                group_name = item.name\n                session.delete(item)\n                session.commit()\n                gr.Info(f\"Group {group_name} has been deleted\")\n            else:\n                raise gr.Error(\"No group found\")\n\n        return None\n\n    def interact_file_list(self, list_files, ev: gr.SelectData):\n        if ev.value == \"-\" and ev.index[0] == 0:\n            gr.Info(\"No file is uploaded\")\n            return None, self.selected_panel_false\n\n        if not ev.selected:\n            return None, self.selected_panel_false\n\n        return list_files[\"id\"][ev.index[0]], self.selected_panel_true.format(\n            name=list_files[\"name\"][ev.index[0]]\n        )\n\n    def interact_group_list(self, list_groups, ev: gr.SelectData):\n        selected_id = ev.index[0]\n        if (not ev.value or ev.value == \"-\") and selected_id == 0:\n            raise gr.Error(\"No group is selected\")\n\n        selected_item = list_groups[selected_id]\n        selected_group_id = selected_item[\"id\"]\n        return (\n            \"### Group Information\",\n            selected_group_id,\n            selected_item[\"name\"],\n            selected_item[\"files\"],\n        )\n\n    def validate_files(self, files: list[str]):\n        \"\"\"Validate if the files are valid\"\"\"\n        paths = [Path(file) for file in files]\n        errors = []\n        if max_file_size := self._index.config.get(\"max_file_size\", 0):\n            errors_max_size = []\n            for path in paths:\n                if path.stat().st_size > max_file_size * 1e6:\n                    errors_max_size.append(path.name)\n            if errors_max_size:\n                str_errors = \", \".join(errors_max_size)\n                if len(str_errors) > 60:\n                    str_errors = str_errors[:55] + \"...\"\n                errors.append(\n                    f\"Maximum file size ({max_file_size} MB) exceeded: {str_errors}\"\n                )\n\n        if max_number_of_files := self._index.config.get(\"max_number_of_files\", 0):\n            with Session(engine) as session:\n                current_num_files = session.query(\n                    self._index._resources[\"Source\"].id\n                ).count()\n            if len(paths) + current_num_files > max_number_of_files:\n                errors.append(\n                    f\"Maximum number of files ({max_number_of_files}) will be exceeded\"\n                )\n\n        return errors\n\n    def validate_urls(self, urls: list[str]):\n        \"\"\"Validate if the urls are valid\"\"\"\n        errors = []\n        for url in urls:\n            if not url.startswith(\"http\") and not url.startswith(\"https\"):\n                errors.append(f\"Invalid url `{url}`\")\n        return errors\n\n\nclass FileSelector(BasePage):\n    \"\"\"File selector UI in the Chat page\"\"\"\n\n    def __init__(self, app, index):\n        super().__init__(app)\n        self._index = index\n        self.on_building_ui()\n\n    def default(self):\n        if self._app.f_user_management:\n            return \"disabled\", [], -1\n        return \"disabled\", [], 1\n\n    def on_building_ui(self):\n        default_mode, default_selector, user_id = self.default()\n\n        self.mode = gr.Radio(\n            value=default_mode,\n            choices=[\n                (\"Search All\", \"all\"),\n                (\"Search In File(s)\", \"select\"),\n            ],\n            container=False,\n        )\n        self.selector = gr.Dropdown(\n            label=\"Files\",\n            value=default_selector,\n            choices=[],\n            multiselect=True,\n            container=False,\n            interactive=True,\n            visible=False,\n        )\n        self.selector_user_id = gr.State(value=user_id)\n        self.selector_choices = gr.JSON(\n            value=[],\n            visible=False,\n        )\n\n    def on_register_events(self):\n        self.mode.change(\n            fn=lambda mode, user_id: (gr.update(visible=mode == \"select\"), user_id),\n            inputs=[self.mode, self._app.user_id],\n            outputs=[self.selector, self.selector_user_id],\n        )\n        # attach special event for the first index\n        if self._index.id == 1:\n            self.selector_choices.change(\n                fn=None,\n                inputs=[self.selector_choices],\n                js=update_file_list_js,\n                show_progress=\"hidden\",\n            )\n\n    def as_gradio_component(self):\n        return [self.mode, self.selector, self.selector_user_id]\n\n    def get_selected_ids(self, components):\n        mode, selected, user_id = components[0], components[1], components[2]\n        if user_id is None:\n            return []\n\n        if mode == \"disabled\":\n            return []\n        elif mode == \"select\":\n            return selected\n\n        file_ids = []\n        with Session(engine) as session:\n            statement = select(self._index._resources[\"Source\"].id)\n            if self._index.config.get(\"private\", False):\n                statement = statement.where(\n                    self._index._resources[\"Source\"].user == user_id\n                )\n            results = session.execute(statement).all()\n            for (id,) in results:\n                file_ids.append(id)\n\n        return file_ids\n\n    def load_files(self, selected_files, user_id):\n        options: list = []\n        available_ids = []\n        if user_id is None:\n            # not signed in\n            return gr.update(value=selected_files, choices=options), options\n\n        with Session(engine) as session:\n            # get file list from Source table\n            statement = select(self._index._resources[\"Source\"])\n            if self._index.config.get(\"private\", False):\n                statement = statement.where(\n                    self._index._resources[\"Source\"].user == user_id\n                )\n\n            if KH_DEMO_MODE:\n                # limit query by MAX_FILE_COUNT\n                statement = statement.limit(MAX_FILE_COUNT)\n\n            results = session.execute(statement).all()\n            for result in results:\n                available_ids.append(result[0].id)\n                options.append((result[0].name, result[0].id))\n\n            # get group list from FileGroup table\n            FileGroup = self._index._resources[\"FileGroup\"]\n            statement = select(FileGroup)\n            if self._index.config.get(\"private\", False):\n                statement = statement.where(FileGroup.user == user_id)\n            results = session.execute(statement).all()\n            for result in results:\n                item = result[0]\n                options.append(\n                    (f\"group: '{item.name}'\", json.dumps(item.data.get(\"files\", [])))\n                )\n\n        if selected_files:\n            available_ids_set = set(available_ids)\n            selected_files = [\n                each for each in selected_files if each in available_ids_set\n            ]\n\n        return gr.update(value=selected_files, choices=options), options\n\n    def _on_app_created(self):\n        self._app.app.load(\n            self.load_files,\n            inputs=[self.selector, self._app.user_id],\n            outputs=[self.selector, self.selector_choices],\n        )\n\n    def on_subscribe_public_events(self):\n        self._app.subscribe_event(\n            name=f\"onFileIndex{self._index.id}Changed\",\n            definition={\n                \"fn\": self.load_files,\n                \"inputs\": [self.selector, self._app.user_id],\n                \"outputs\": [self.selector, self.selector_choices],\n                \"show_progress\": \"hidden\",\n            },\n        )\n        if self._app.f_user_management:\n            for event_name in [\"onSignIn\", \"onSignOut\"]:\n                self._app.subscribe_event(\n                    name=event_name,\n                    definition={\n                        \"fn\": self.load_files,\n                        \"inputs\": [self.selector, self._app.user_id],\n                        \"outputs\": [self.selector, self.selector_choices],\n                        \"show_progress\": \"hidden\",\n                    },\n                )\n"
  },
  {
    "path": "libs/ktem/ktem/index/file/utils.py",
    "content": "import os\n\nimport requests\n\n# regex patterns for Arxiv URL\nARXIV_URL_PATTERNS = [\n    \"https://arxiv.org/abs/\",\n    \"https://arxiv.org/pdf/\",\n]\n\nILLEGAL_NAME_CHARS = [\"\\\\\", \"/\", \":\", \"*\", \"?\", '\"', \"<\", \">\", \"|\"]\n\n\ndef clean_name(name):\n    for char in ILLEGAL_NAME_CHARS:\n        name = name.replace(char, \"_\")\n    return name\n\n\ndef is_arxiv_url(url):\n    return any(url.startswith(pattern) for pattern in ARXIV_URL_PATTERNS)\n\n\n# download PDF from Arxiv URL\ndef download_arxiv_pdf(url, output_path):\n    if not is_arxiv_url(url):\n        raise ValueError(\"Invalid Arxiv URL\")\n\n    is_abstract_url = \"abs\" in url\n    if is_abstract_url:\n        pdf_url = url.replace(\"abs\", \"pdf\")\n        abstract_url = url\n    else:\n        pdf_url = url\n        abstract_url = url.replace(\"pdf\", \"abs\")\n\n    # get paper name from abstract url\n    response = requests.get(abstract_url)\n\n    # parse HTML response and get h1.title\n    from bs4 import BeautifulSoup\n\n    soup = BeautifulSoup(response.content, \"html.parser\")\n    name = clean_name(\n        soup.find(\"h1\", class_=\"title\").text.strip().replace(\"Title:\", \"\")\n    )\n    if not name:\n        raise ValueError(\"Failed to get paper name\")\n\n    output_file_path = os.path.join(output_path, name + \".pdf\")\n    # prevent downloading if file already exists\n    if not os.path.exists(output_file_path):\n        response = requests.get(pdf_url)\n\n        with open(output_file_path, \"wb\") as f:\n            f.write(response.content)\n\n    return output_file_path\n"
  },
  {
    "path": "libs/ktem/ktem/index/manager.py",
    "content": "from typing import Optional, Type\n\nfrom ktem.db.models import engine\nfrom sqlmodel import Session, select\nfrom theflow.settings import settings\nfrom theflow.utils.modules import import_dotted_string\n\nfrom .base import BaseIndex\nfrom .models import Index\n\n\nclass IndexManager:\n    \"\"\"Manage the application indices\n\n    The index manager is responsible for:\n        - Managing the range of possible indices and their extensions\n        - Each actual index built by user\n\n    Attributes:\n        - indices: list of indices built by user\n    \"\"\"\n\n    def __init__(self, app):\n        self._app = app\n        self._indices = []\n        self._index_types: dict[str, Type[BaseIndex]] = {}\n\n    @property\n    def index_types(self) -> dict:\n        \"\"\"List the index_type of the index\"\"\"\n        return self._index_types\n\n    def build_index(self, name: str, config: dict, index_type: str):\n        \"\"\"Build the index\n\n        Building the index simply means recording the index information into the\n        database and returning the index object.\n\n        Args:\n            name (str): the name of the index\n            config (dict): the config of the index\n            index_type (str): the type of the index\n            id (int, optional): the id of the index. If None, the id will be\n                generated automatically. Defaults to None.\n\n        Returns:\n            BaseIndex: the index object\n        \"\"\"\n\n        with Session(engine) as sess:\n            entry = Index(name=name, config=config, index_type=index_type)\n            sess.add(entry)\n            sess.commit()\n            sess.refresh(entry)\n\n            try:\n                # build the index\n                index_cls = import_dotted_string(index_type, safe=False)\n                index = index_cls(app=self._app, id=entry.id, name=name, config=config)\n                index.on_create()\n\n                # update the entry\n                entry.config = index.config\n                sess.commit()\n            except Exception as e:\n                sess.delete(entry)\n                sess.commit()\n                raise ValueError(f'Cannot create index \"{name}\": {e}')\n\n        return index\n\n    def update_index(self, id: int, name: str, config: dict):\n        \"\"\"Update the index information\n\n        Args:\n            id: the id of the index\n            name: the new name of the index\n            config: the new config of the index\n        \"\"\"\n        with Session(engine) as sess:\n            entry = sess.get(Index, id)\n            if entry is None:\n                raise ValueError(f\"Index with id {id} does not exist\")\n\n            entry.name = name\n            entry.config = config\n            sess.commit()\n\n        for index in self._indices:\n            if index.id == id:\n                index.name = name\n                index.config = config\n                break\n\n    def start_index(self, id: int, name: str, config: dict, index_type: str):\n        \"\"\"Start the index\n\n        Args:\n            id (int): the id of the index\n            name (str): the name of the index\n            config (dict): the config of the index\n            index_type (str): the type of the index\n        \"\"\"\n        index_cls = import_dotted_string(index_type, safe=False)\n        index = index_cls(app=self._app, id=id, name=name, config=config)\n        index.on_start()\n\n        self._indices.append(index)\n        return index\n\n    def delete_index(self, id: int):\n        \"\"\"Delete the index from the database\"\"\"\n        index: Optional[BaseIndex] = None\n        for _ in self._indices:\n            if _.id == id:\n                index = _\n                break\n\n        if index is None:\n            raise ValueError(\n                \"Index does not exist. If you have already removed the index, \"\n                \"please restart to reflect the changes.\"\n            )\n\n        try:\n            try:\n                # clean up\n                index.on_delete()\n            except Exception as e:\n                print(f\"Error while deleting index {index.name}: {e}\")\n\n            # remove from database\n            with Session(engine) as sess:\n                item = sess.query(Index).filter_by(id=id).first()\n                sess.delete(item)\n                sess.commit()\n\n            new_indices = [_ for _ in self._indices if _.id != id]\n            self._indices = new_indices\n        except Exception as e:\n            raise ValueError(f\"Cannot delete index {index.name}: {e}\")\n\n    def load_index_types(self):\n        \"\"\"Load the supported index types\"\"\"\n        self._index_types = {}\n\n        # built-in index types\n        from .file.index import FileIndex\n\n        for index in [FileIndex]:\n            self._index_types[f\"{index.__module__}.{index.__qualname__}\"] = index\n\n        # developer-defined custom index types\n        for index_str in settings.KH_INDEX_TYPES:\n            cls: Type[BaseIndex] = import_dotted_string(index_str, safe=False)\n            self._index_types[f\"{cls.__module__}.{cls.__qualname__}\"] = cls\n\n    def exists(self, id: Optional[int] = None, name: Optional[str] = None) -> bool:\n        \"\"\"Check if the index exists\n\n        Args:\n            id (int): the id of the index\n\n        Returns:\n            bool: True if the index exists, False otherwise\n        \"\"\"\n        if id:\n            with Session(engine) as sess:\n                index = sess.get(Index, id)\n                return index is not None\n\n        if name:\n            with Session(engine) as sess:\n                index = sess.exec(select(Index).where(Index.name == name)).one_or_none()\n                return index is not None\n\n        return False\n\n    def on_application_startup(self):\n        \"\"\"This method is called by the base application when the application starts\n\n        Load the index from database\n        \"\"\"\n        self.load_index_types()\n\n        for index in settings.KH_INDICES:\n            if not self.exists(name=index[\"name\"]):\n                self.build_index(**index)\n\n        with Session(engine) as sess:\n            index_defs = sess.exec(select(Index))\n            for index_def in index_defs:\n                self.start_index(**index_def.model_dump())\n\n    @property\n    def indices(self):\n        return self._indices\n\n    def info(self):\n        return {index.id: index for index in self._indices}\n"
  },
  {
    "path": "libs/ktem/ktem/index/models.py",
    "content": "from typing import Optional\n\nfrom ktem.db.engine import engine\nfrom sqlalchemy import JSON, Column\nfrom sqlmodel import Field, SQLModel\n\n\n# TODO: simplify with using SQLAlchemy directly\nclass Index(SQLModel, table=True):\n    __table_args__ = {\"extend_existing\": True}\n    __tablename__ = \"ktem__index\"  # type: ignore\n\n    id: Optional[int] = Field(default=None, primary_key=True)\n    name: str = Field(unique=True)\n    index_type: str = Field()\n    config: dict = Field(default={}, sa_column=Column(JSON))\n\n\nIndex.metadata.create_all(engine)\n"
  },
  {
    "path": "libs/ktem/ktem/index/ui.py",
    "content": "import gradio as gr\nimport pandas as pd\nimport yaml\nfrom ktem.app import BasePage\nfrom ktem.utils.file import YAMLNoDateSafeLoader\n\nfrom .manager import IndexManager\n\n\n# UGLY way to restart gradio server by updating atime\ndef update_current_module_atime():\n    import os\n    import time\n\n    # Define the file path\n    file_path = __file__\n    print(\"Updating atime for\", file_path)\n\n    # Get the current time\n    current_time = time.time()\n    # Set the modified time (and access time) to the current time\n    os.utime(file_path, (current_time, current_time))\n\n\ndef format_description(cls):\n    user_settings = cls.get_admin_settings()\n    params_lines = [\"| Name | Default | Description |\", \"| --- | --- | --- |\"]\n    for key, value in user_settings.items():\n        params_lines.append(\n            f\"| {key} | {value.get('value', '')} | {value.get('info', '')} |\"\n        )\n    return f\"{cls.__doc__}\\n\\n\" + \"\\n\".join(params_lines)\n\n\nclass IndexManagement(BasePage):\n    def __init__(self, app):\n        self._app = app\n        self.manager: IndexManager = app.index_manager\n        self.spec_desc_default = (\n            \"# Spec description\\n\\nSelect an index to view the spec description.\"\n        )\n        self.on_building_ui()\n\n    def on_building_ui(self):\n        with gr.Tab(label=\"View\"):\n            self.index_list = gr.DataFrame(\n                headers=[\"id\", \"name\", \"index type\"],\n                interactive=False,\n                column_widths=[10, 30, 60],\n            )\n\n            with gr.Column(visible=False) as self._selected_panel:\n                self.selected_index_id = gr.Number(value=-1, visible=False)\n                with gr.Row():\n                    with gr.Column():\n                        self.edit_name = gr.Textbox(\n                            label=\"Index name\",\n                        )\n                        self.edit_spec = gr.Textbox(\n                            label=\"Index config\",\n                            info=\"Admin configuration of the Index in YAML format\",\n                            lines=10,\n                        )\n\n                        gr.Markdown(\n                            \"IMPORTANT: Changing or deleting the index will require \"\n                            \"restarting the system. Some config settings will require \"\n                            \"rebuilding the index for the index to work properly.\"\n                        )\n                        with gr.Row():\n                            self.btn_edit_save = gr.Button(\n                                \"Save\", min_width=10, variant=\"primary\"\n                            )\n                            self.btn_delete = gr.Button(\n                                \"Delete\", min_width=10, variant=\"stop\"\n                            )\n                            with gr.Row(visible=False) as self._delete_confirm:\n                                self.btn_delete_yes = gr.Button(\n                                    \"Confirm Delete\",\n                                    variant=\"stop\",\n                                    min_width=10,\n                                )\n                                self.btn_delete_no = gr.Button(\"Cancel\", min_width=10)\n                            self.btn_close = gr.Button(\"Close\", min_width=10)\n\n                    with gr.Column():\n                        self.edit_spec_desc = gr.Markdown(\"# Spec description\")\n\n        with gr.Tab(label=\"Add\"):\n            with gr.Row():\n                with gr.Column(scale=2):\n                    self.name = gr.Textbox(\n                        label=\"Index name\",\n                        info=\"Must be unique and non-empty.\",\n                    )\n                    self.index_type = gr.Dropdown(label=\"Index type\")\n                    self.spec = gr.Textbox(\n                        label=\"Specification\",\n                        info=\"Specification of the index in YAML format.\",\n                    )\n                    gr.Markdown(\n                        \"<mark>Note</mark>: \"\n                        \"After creating index, please restart the app\"\n                    )\n                    self.btn_new = gr.Button(\"Add\", variant=\"primary\")\n\n                with gr.Column(scale=3):\n                    self.spec_desc = gr.Markdown(self.spec_desc_default)\n\n    def _on_app_created(self):\n        \"\"\"Called when the app is created\"\"\"\n        self._app.app.load(\n            self.list_indices,\n            inputs=[],\n            outputs=[self.index_list],\n        )\n        self._app.app.load(\n            lambda: gr.update(\n                choices=[\n                    (key.split(\".\")[-1], key) for key in self.manager.index_types.keys()\n                ]\n            ),\n            outputs=[self.index_type],\n        )\n\n    def on_register_events(self):\n        self.index_type.select(\n            self.on_index_type_change,\n            inputs=[self.index_type],\n            outputs=[self.spec, self.spec_desc],\n        )\n        self.btn_new.click(\n            self.create_index,\n            inputs=[self.name, self.index_type, self.spec],\n            outputs=None,\n        ).success(self.list_indices, inputs=[], outputs=[self.index_list]).success(\n            lambda: (\"\", None, \"\", self.spec_desc_default),\n            outputs=[\n                self.name,\n                self.index_type,\n                self.spec,\n                self.spec_desc,\n            ],\n        ).success(\n            update_current_module_atime\n        )\n        self.index_list.select(\n            self.select_index,\n            inputs=self.index_list,\n            outputs=[self.selected_index_id],\n            show_progress=\"hidden\",\n        )\n\n        self.selected_index_id.change(\n            self.on_selected_index_change,\n            inputs=[self.selected_index_id],\n            outputs=[\n                self._selected_panel,\n                # edit section\n                self.edit_spec,\n                self.edit_spec_desc,\n                self.edit_name,\n            ],\n            show_progress=\"hidden\",\n        )\n        self.btn_delete.click(\n            lambda: (\n                gr.update(visible=False),\n                gr.update(visible=False),\n                gr.update(visible=False),\n                gr.update(visible=True),\n            ),\n            inputs=[],\n            outputs=[\n                self.btn_edit_save,\n                self.btn_delete,\n                self.btn_close,\n                self._delete_confirm,\n            ],\n            show_progress=\"hidden\",\n        )\n        self.btn_delete_yes.click(\n            self.delete_index,\n            inputs=[self.selected_index_id],\n            outputs=[self.selected_index_id],\n            show_progress=\"hidden\",\n        ).then(self.list_indices, inputs=[], outputs=[self.index_list],).success(\n            update_current_module_atime\n        )\n        self.btn_delete_no.click(\n            lambda: (\n                gr.update(visible=True),\n                gr.update(visible=True),\n                gr.update(visible=True),\n                gr.update(visible=False),\n            ),\n            inputs=[],\n            outputs=[\n                self.btn_edit_save,\n                self.btn_delete,\n                self.btn_close,\n                self._delete_confirm,\n            ],\n            show_progress=\"hidden\",\n        )\n        self.btn_edit_save.click(\n            self.update_index,\n            inputs=[\n                self.selected_index_id,\n                self.edit_name,\n                self.edit_spec,\n            ],\n            show_progress=\"hidden\",\n        ).then(\n            self.list_indices,\n            inputs=[],\n            outputs=[self.index_list],\n        )\n        self.btn_close.click(\n            lambda: -1,\n            outputs=[self.selected_index_id],\n        )\n\n    def on_index_type_change(self, index_type: str):\n        \"\"\"Update the spec description and pre-fill the default values\n\n        Args:\n            index_type: the name of the index type, this is usually the class name\n\n        Returns:\n            A tuple of the default spec and the description\n        \"\"\"\n        index_type_cls = self.manager.index_types[index_type]\n        required: dict = {\n            key: value.get(\"value\", None)\n            for key, value in index_type_cls.get_admin_settings().items()\n        }\n\n        return yaml.dump(required, sort_keys=False), format_description(index_type_cls)\n\n    def create_index(self, name: str, index_type: str, config: str):\n        \"\"\"Create the index\"\"\"\n        name = name.strip()\n        if not name:\n            raise gr.Error(\"Name must not be empty\")\n\n        existing_names = {idx.name for idx in self.manager.indices}\n        if name in existing_names:\n            raise gr.Error(f\"Index '{name}' already exists. Please use a unique name.\")\n\n        try:\n            self.manager.build_index(\n                name=name,\n                config=yaml.load(config, Loader=YAMLNoDateSafeLoader),\n                index_type=index_type,\n            )\n            gr.Info(f'Index \"{name}\" created successfully. Please restart the app!')\n        except Exception as e:\n            raise gr.Error(f'Failed to create index \"{name}\": {e}')\n\n    def list_indices(self):\n        \"\"\"List the indices constructed by the user\"\"\"\n        items = []\n        for item in self.manager.indices:\n            record = {}\n            record[\"id\"] = item.id\n            record[\"name\"] = item.name\n            record[\"index type\"] = item.__class__.__name__\n            items.append(record)\n\n        if items:\n            indices_list = pd.DataFrame.from_records(items)\n        else:\n            indices_list = pd.DataFrame.from_records(\n                [{\"id\": \"-\", \"name\": \"-\", \"index type\": \"-\"}]\n            )\n\n        return indices_list\n\n    def select_index(self, index_list, ev: gr.SelectData) -> int:\n        \"\"\"Return the index id\"\"\"\n        if ev.value == \"-\" and ev.index[0] == 0:\n            gr.Info(\"No index is constructed. Please create one first!\")\n            return -1\n\n        if not ev.selected:\n            return -1\n\n        return int(index_list[\"id\"][ev.index[0]])\n\n    def on_selected_index_change(self, selected_index_id: int):\n        \"\"\"Show the relevant index as user selects it on the UI\n\n        Args:\n            selected_index_id: the id of the selected index\n        \"\"\"\n        if selected_index_id == -1:\n            _selected_panel = gr.update(visible=False)\n            edit_spec = gr.update(value=\"\")\n            edit_spec_desc = gr.update(value=\"\")\n            edit_name = gr.update(value=\"\")\n        else:\n            _selected_panel = gr.update(visible=True)\n            index = self.manager.info()[selected_index_id]\n            edit_spec = yaml.dump(index.config)\n            edit_spec_desc = format_description(index.__class__)\n            edit_name = index.name\n\n        return (\n            _selected_panel,\n            edit_spec,\n            edit_spec_desc,\n            edit_name,\n        )\n\n    def update_index(self, selected_index_id: int, name: str, config: str):\n        name = name.strip()\n        if not name:\n            raise gr.Error(\"Name must not be empty\")\n\n        # Check uniqueness (excluding current index)\n        for idx in self.manager.indices:\n            if idx.name == name and idx.id != selected_index_id:\n                raise gr.Error(\n                    f\"Index '{name}' already exists. Please use a unique name.\"\n                )\n\n        try:\n            spec = yaml.load(config, Loader=YAMLNoDateSafeLoader)\n            self.manager.update_index(selected_index_id, name, spec)\n            gr.Info(f'Index \"{name}\" updated successfully. Please restart the app!')\n        except gr.Error:\n            raise\n        except Exception as e:\n            raise gr.Error(f'Failed to save index \"{name}\": {e}')\n\n    def delete_index(self, selected_index_id):\n        try:\n            self.manager.delete_index(selected_index_id)\n            gr.Info(\"Delete index successfully. Please restart the app!\")\n        except Exception as e:\n            gr.Warning(f\"Fail to delete index: {e}\")\n            return selected_index_id\n\n        return -1\n"
  },
  {
    "path": "libs/ktem/ktem/llms/__init__.py",
    "content": ""
  },
  {
    "path": "libs/ktem/ktem/llms/db.py",
    "content": "from typing import Type\n\nfrom ktem.db.engine import engine\nfrom sqlalchemy import JSON, Boolean, Column, String\nfrom sqlalchemy.orm import DeclarativeBase\nfrom theflow.settings import settings as flowsettings\nfrom theflow.utils.modules import import_dotted_string\n\n\nclass Base(DeclarativeBase):\n    pass\n\n\nclass BaseLLMTable(Base):\n    \"\"\"Base table to store language model\"\"\"\n\n    __abstract__ = True\n\n    name = Column(String, primary_key=True, unique=True)\n    spec = Column(JSON, default={})\n    default = Column(Boolean, default=False)\n\n\n_base_llm: Type[BaseLLMTable] = (\n    import_dotted_string(flowsettings.KH_TABLE_LLM, safe=False)\n    if hasattr(flowsettings, \"KH_TABLE_LLM\")\n    else BaseLLMTable\n)\n\n\nclass LLMTable(_base_llm):  # type: ignore\n    __tablename__ = \"llm_table\"\n\n\nif not getattr(flowsettings, \"KH_ENABLE_ALEMBIC\", False):\n    LLMTable.metadata.create_all(engine)\n"
  },
  {
    "path": "libs/ktem/ktem/llms/manager.py",
    "content": "from typing import Optional, Type, overload\n\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import Session\nfrom theflow.settings import settings as flowsettings\nfrom theflow.utils.modules import deserialize, import_dotted_string\n\nfrom kotaemon.llms import ChatLLM\n\nfrom .db import LLMTable, engine\n\n\nclass LLMManager:\n    \"\"\"Represent a pool of models\"\"\"\n\n    def __init__(self):\n        self._models: dict[str, ChatLLM] = {}\n        self._info: dict[str, dict] = {}\n        self._default: str = \"\"\n        self._vendors: list[Type] = []\n\n        if hasattr(flowsettings, \"KH_LLMS\"):\n            for name, model in flowsettings.KH_LLMS.items():\n                with Session(engine) as session:\n                    stmt = select(LLMTable).where(LLMTable.name == name)\n                    result = session.execute(stmt)\n                    if not result.first():\n                        item = LLMTable(\n                            name=name,\n                            spec=model[\"spec\"],\n                            default=model.get(\"default\", False),\n                        )\n                        session.add(item)\n                        session.commit()\n\n        self.load()\n        self.load_vendors()\n\n    def load(self):\n        \"\"\"Load the model pool from database\"\"\"\n        self._models, self._info, self._default = {}, {}, \"\"\n        with Session(engine) as session:\n            stmt = select(LLMTable)\n            items = session.execute(stmt)\n\n            for (item,) in items:\n                self._models[item.name] = deserialize(item.spec, safe=False)\n                self._info[item.name] = {\n                    \"name\": item.name,\n                    \"spec\": item.spec,\n                    \"default\": item.default,\n                }\n                if item.default:\n                    self._default = item.name\n\n    def load_vendors(self):\n        from kotaemon.llms import (\n            AzureChatOpenAI,\n            ChatOpenAI,\n            LCAnthropicChat,\n            LCCohereChat,\n            LCGeminiChat,\n            LCOllamaChat,\n            LlamaCppChat,\n        )\n\n        self._vendors = [\n            ChatOpenAI,\n            AzureChatOpenAI,\n            LCAnthropicChat,\n            LCGeminiChat,\n            LCCohereChat,\n            LCOllamaChat,\n            LlamaCppChat,\n        ]\n\n        for extra_vendor in getattr(flowsettings, \"KH_LLM_EXTRA_VENDORS\", []):\n            self._vendors.append(import_dotted_string(extra_vendor, safe=False))\n\n    def __getitem__(self, key: str) -> ChatLLM:\n        \"\"\"Get model by name\"\"\"\n        return self._models[key]\n\n    def __contains__(self, key: str) -> bool:\n        \"\"\"Check if model exists\"\"\"\n        return key in self._models\n\n    @overload\n    def get(self, key: str, default: None) -> Optional[ChatLLM]:\n        ...\n\n    @overload\n    def get(self, key: str, default: ChatLLM) -> ChatLLM:\n        ...\n\n    def get(self, key: str, default: Optional[ChatLLM] = None) -> Optional[ChatLLM]:\n        \"\"\"Get model by name with default value\"\"\"\n        return self._models.get(key, default)\n\n    def settings(self) -> dict:\n        \"\"\"Present model pools option for gradio\"\"\"\n        return {\n            \"label\": \"LLM\",\n            \"choices\": list(self._models.keys()),\n            \"value\": self.get_default_name(),\n        }\n\n    def options(self) -> dict:\n        \"\"\"Present a dict of models\"\"\"\n        return self._models\n\n    def get_random_name(self) -> str:\n        \"\"\"Get the name of random model\n\n        Returns:\n            str: random model name in the pool\n        \"\"\"\n        import random\n\n        if not self._models:\n            raise ValueError(\"No models in pool\")\n\n        return random.choice(list(self._models.keys()))\n\n    def get_default_name(self) -> str:\n        \"\"\"Get the name of default model\n\n        In case there is no default model, choose random model from pool. In\n        case there are multiple default models, choose random from them.\n\n        Returns:\n            str: model name\n        \"\"\"\n        if not self._models:\n            raise ValueError(\"No models in pool\")\n\n        if not self._default:\n            return self.get_random_name()\n\n        return self._default\n\n    def get_random(self) -> ChatLLM:\n        \"\"\"Get random model\"\"\"\n        return self._models[self.get_random_name()]\n\n    def get_default(self) -> ChatLLM:\n        \"\"\"Get default model\n\n        In case there is no default model, choose random model from pool. In\n        case there are multiple default models, choose random from them.\n\n        Returns:\n            ChatLLM: model\n        \"\"\"\n        return self._models[self.get_default_name()]\n\n    def info(self) -> dict:\n        \"\"\"List all models\"\"\"\n        return self._info\n\n    def add(self, name: str, spec: dict, default: bool):\n        \"\"\"Add a new model to the pool\"\"\"\n        if not name:\n            raise ValueError(\"Name must not be empty\")\n\n        try:\n            with Session(engine) as session:\n\n                if default:\n                    # turn all models to non-default\n                    session.query(LLMTable).update({\"default\": False})\n                    session.commit()\n\n                item = LLMTable(name=name, spec=spec, default=default)\n                session.add(item)\n                session.commit()\n        except Exception as e:\n            raise ValueError(f\"Failed to add model {name}: {e}\")\n\n        self.load()\n\n    def delete(self, name: str):\n        \"\"\"Delete a model from the pool\"\"\"\n        try:\n            with Session(engine) as session:\n                item = session.query(LLMTable).filter_by(name=name).first()\n                session.delete(item)\n                session.commit()\n        except Exception as e:\n            raise ValueError(f\"Failed to delete model {name}: {e}\")\n\n        self.load()\n\n    def update(self, name: str, spec: dict, default: bool, new_name: str = \"\"):\n        \"\"\"Update a model in the pool, optionally renaming it.\"\"\"\n        if not name:\n            raise ValueError(\"Name must not be empty\")\n\n        if new_name and new_name != name:\n            # Check uniqueness before destructive delete\n            if new_name in self._info:\n                raise ValueError(\n                    f\"Model '{new_name}' already exists. Use a unique name.\"\n                )\n            self.delete(name)\n            self.add(new_name, spec=spec, default=default)\n            return\n\n        try:\n            with Session(engine) as session:\n\n                if default:\n                    # turn all models to non-default\n                    session.query(LLMTable).update({\"default\": False})\n                    session.commit()\n\n                item = session.query(LLMTable).filter_by(name=name).first()\n                if not item:\n                    raise ValueError(f\"Model {name} not found\")\n                item.spec = spec\n                item.default = default\n                session.commit()\n        except Exception as e:\n            raise ValueError(f\"Failed to update model {name}: {e}\")\n\n        self.load()\n\n    def vendors(self) -> dict:\n        \"\"\"Return list of vendors\"\"\"\n        return {vendor.__qualname__: vendor for vendor in self._vendors}\n\n\nllms = LLMManager()\n"
  },
  {
    "path": "libs/ktem/ktem/llms/ui.py",
    "content": "from copy import deepcopy\n\nimport gradio as gr\nimport pandas as pd\nimport yaml\nfrom ktem.app import BasePage\nfrom ktem.utils.file import YAMLNoDateSafeLoader\nfrom theflow.utils.modules import deserialize\n\nfrom .manager import llms\n\n\ndef format_description(cls):\n    params = cls.describe()[\"params\"]\n    params_lines = [\"| Name | Type | Description |\", \"| --- | --- | --- |\"]\n    for key, value in params.items():\n        if isinstance(value[\"auto_callback\"], str):\n            continue\n        params_lines.append(f\"| {key} | {value['type']} | {value['help']} |\")\n    return f\"{cls.__doc__}\\n\\n\" + \"\\n\".join(params_lines)\n\n\nclass LLMManagement(BasePage):\n    def __init__(self, app):\n        self._app = app\n        self.spec_desc_default = (\n            \"# Spec description\\n\\nSelect an LLM to view the spec description.\"\n        )\n        self.on_building_ui()\n\n    def on_building_ui(self):\n        with gr.Tab(label=\"View\"):\n            self.llm_list = gr.DataFrame(\n                headers=[\"name\", \"vendor\", \"default\"],\n                interactive=False,\n                column_widths=[30, 40, 30],\n            )\n\n            with gr.Column(visible=False) as self._selected_panel:\n                self.selected_llm_name = gr.Textbox(value=\"\", visible=False)\n                with gr.Row():\n                    with gr.Column():\n                        self.edit_default = gr.Checkbox(\n                            label=\"Set default\",\n                            info=(\n                                \"Set this LLM as default. If no default is set, \"\n                                \"a random LLM will be used. \"\n                                \"This default LLM will be used by other components \"\n                                \"by default if no LLM is specified for such components.\"\n                            ),\n                        )\n                        self.edit_name = gr.Textbox(\n                            label=\"Name\",\n                            info=\"Edit to rename this LLM.\",\n                        )\n                        self.edit_spec = gr.Textbox(\n                            label=\"Specification\",\n                            info=\"Specification of the LLM in YAML format\",\n                            lines=10,\n                        )\n\n                        with gr.Accordion(\n                            label=\"Test connection\", visible=False, open=False\n                        ) as self._check_connection_panel:\n                            with gr.Row():\n                                with gr.Column(scale=4):\n                                    self.connection_logs = gr.HTML(\"Logs\")\n\n                                with gr.Column(scale=1):\n                                    self.btn_test_connection = gr.Button(\n                                        \"Test\",\n                                    )\n\n                        with gr.Row(visible=False) as self._selected_panel_btn:\n                            with gr.Column():\n                                self.btn_edit_save = gr.Button(\n                                    \"Save\", min_width=10, variant=\"primary\"\n                                )\n                            with gr.Column():\n                                self.btn_delete = gr.Button(\n                                    \"Delete\", min_width=10, variant=\"stop\"\n                                )\n                                with gr.Row():\n                                    self.btn_delete_yes = gr.Button(\n                                        \"Confirm Delete\",\n                                        variant=\"stop\",\n                                        visible=False,\n                                        min_width=10,\n                                    )\n                                    self.btn_delete_no = gr.Button(\n                                        \"Cancel\", visible=False, min_width=10\n                                    )\n                            with gr.Column():\n                                self.btn_close = gr.Button(\"Close\", min_width=10)\n\n                    with gr.Column():\n                        self.edit_spec_desc = gr.Markdown(\"# Spec description\")\n\n        with gr.Tab(label=\"Add\"):\n            with gr.Row():\n                with gr.Column(scale=2):\n                    self.name = gr.Textbox(\n                        label=\"LLM name\",\n                        info=(\n                            \"Must be unique. The name will be used to identify the LLM.\"\n                        ),\n                    )\n                    self.llm_choices = gr.Dropdown(\n                        label=\"LLM vendors\",\n                        info=(\n                            \"Choose the vendor for the LLM. Each vendor has different \"\n                            \"specification.\"\n                        ),\n                    )\n                    self.spec = gr.Textbox(\n                        label=\"Specification\",\n                        info=\"Specification of the LLM in YAML format\",\n                    )\n                    self.default = gr.Checkbox(\n                        label=\"Set default\",\n                        info=(\n                            \"Set this LLM as default. This default LLM will be used \"\n                            \"by default across the application.\"\n                        ),\n                    )\n                    self.btn_new = gr.Button(\"Add LLM\", variant=\"primary\")\n\n                with gr.Column(scale=3):\n                    self.spec_desc = gr.Markdown(self.spec_desc_default)\n\n    def _on_app_created(self):\n        \"\"\"Called when the app is created\"\"\"\n        self._app.app.load(\n            self.list_llms,\n            inputs=[],\n            outputs=[self.llm_list],\n        )\n        self._app.app.load(\n            lambda: gr.update(choices=list(llms.vendors().keys())),\n            outputs=[self.llm_choices],\n        )\n\n    def on_llm_vendor_change(self, vendor):\n        vendor = llms.vendors()[vendor]\n\n        required: dict = {}\n        desc = vendor.describe()\n        for key, value in desc[\"params\"].items():\n            if value.get(\"required\", False):\n                required[key] = None\n\n        return yaml.dump(required), format_description(vendor)\n\n    def on_register_events(self):\n        self.llm_choices.select(\n            self.on_llm_vendor_change,\n            inputs=[self.llm_choices],\n            outputs=[self.spec, self.spec_desc],\n        )\n        self.btn_new.click(\n            self.create_llm,\n            inputs=[self.name, self.llm_choices, self.spec, self.default],\n            outputs=[],\n        ).success(self.list_llms, inputs=[], outputs=[self.llm_list]).success(\n            lambda: (\"\", None, \"\", False, self.spec_desc_default),\n            outputs=[\n                self.name,\n                self.llm_choices,\n                self.spec,\n                self.default,\n                self.spec_desc,\n            ],\n        )\n        self.llm_list.select(\n            self.select_llm,\n            inputs=self.llm_list,\n            outputs=[self.selected_llm_name],\n            show_progress=\"hidden\",\n        )\n        self.selected_llm_name.change(\n            self.on_selected_llm_change,\n            inputs=[self.selected_llm_name],\n            outputs=[\n                self._selected_panel,\n                self._selected_panel_btn,\n                # delete section\n                self.btn_delete,\n                self.btn_delete_yes,\n                self.btn_delete_no,\n                # edit section\n                self.edit_name,\n                self.edit_spec,\n                self.edit_spec_desc,\n                self.edit_default,\n            ],\n            show_progress=\"hidden\",\n        ).success(lambda: gr.update(value=\"\"), outputs=[self.connection_logs])\n\n        self.btn_delete.click(\n            self.on_btn_delete_click,\n            inputs=[],\n            outputs=[self.btn_delete, self.btn_delete_yes, self.btn_delete_no],\n            show_progress=\"hidden\",\n        )\n        self.btn_delete_yes.click(\n            self.delete_llm,\n            inputs=[self.selected_llm_name],\n            outputs=[self.selected_llm_name],\n            show_progress=\"hidden\",\n        ).then(\n            self.list_llms,\n            inputs=[],\n            outputs=[self.llm_list],\n        )\n        self.btn_delete_no.click(\n            lambda: (\n                gr.update(visible=True),\n                gr.update(visible=False),\n                gr.update(visible=False),\n            ),\n            inputs=[],\n            outputs=[self.btn_delete, self.btn_delete_yes, self.btn_delete_no],\n            show_progress=\"hidden\",\n        )\n        self.btn_edit_save.click(\n            self.save_llm,\n            inputs=[\n                self.selected_llm_name,\n                self.edit_name,\n                self.edit_default,\n                self.edit_spec,\n            ],\n            outputs=[self.selected_llm_name],\n            show_progress=\"hidden\",\n        ).then(\n            self.list_llms,\n            inputs=[],\n            outputs=[self.llm_list],\n        )\n        self.btn_close.click(\n            lambda: \"\",\n            outputs=[self.selected_llm_name],\n        )\n\n        self.btn_test_connection.click(\n            self.check_connection,\n            inputs=[self.selected_llm_name, self.edit_spec],\n            outputs=[self.connection_logs],\n        )\n\n    def create_llm(self, name, choices, spec, default):\n        try:\n            name = name.strip()\n            spec = yaml.load(spec, Loader=YAMLNoDateSafeLoader)\n            spec[\"__type__\"] = (\n                llms.vendors()[choices].__module__\n                + \".\"\n                + llms.vendors()[choices].__qualname__\n            )\n\n            llms.add(name, spec=spec, default=default)\n            gr.Info(f\"LLM '{name}' created successfully\")\n        except ValueError as e:\n            raise gr.Error(str(e))\n        except Exception as e:\n            raise gr.Error(f\"Failed to create LLM '{name}': {e}\")\n\n    def list_llms(self):\n        \"\"\"List the LLMs\"\"\"\n        items = []\n        for item in llms.info().values():\n            record = {}\n            record[\"name\"] = item[\"name\"]\n            record[\"vendor\"] = item[\"spec\"].get(\"__type__\", \"-\").split(\".\")[-1]\n            record[\"default\"] = item[\"default\"]\n            items.append(record)\n\n        if items:\n            llm_list = pd.DataFrame.from_records(items)\n        else:\n            llm_list = pd.DataFrame.from_records(\n                [{\"name\": \"-\", \"vendor\": \"-\", \"default\": \"-\"}]\n            )\n\n        return llm_list\n\n    def select_llm(self, llm_list, ev: gr.SelectData):\n        if ev.value == \"-\" and ev.index[0] == 0:\n            gr.Info(\"No LLM is loaded. Please add LLM first\")\n            return \"\"\n\n        if not ev.selected:\n            return \"\"\n\n        return llm_list[\"name\"][ev.index[0]]\n\n    def on_selected_llm_change(self, selected_llm_name):\n        if selected_llm_name == \"\":\n            _selected_panel = gr.update(visible=False)\n            _selected_panel_btn = gr.update(visible=False)\n            btn_delete = gr.update(visible=True)\n            btn_delete_yes = gr.update(visible=False)\n            btn_delete_no = gr.update(visible=False)\n            edit_name = gr.update(value=\"\")\n            edit_spec = gr.update(value=\"\")\n            edit_spec_desc = gr.update(value=\"\")\n            edit_default = gr.update(value=False)\n        else:\n            _selected_panel = gr.update(visible=True)\n            _selected_panel_btn = gr.update(visible=True)\n            btn_delete = gr.update(visible=True)\n            btn_delete_yes = gr.update(visible=False)\n            btn_delete_no = gr.update(visible=False)\n\n            info = deepcopy(llms.info()[selected_llm_name])\n            vendor_str = info[\"spec\"].pop(\"__type__\", \"-\").split(\".\")[-1]\n            vendor = llms.vendors()[vendor_str]\n\n            edit_name = selected_llm_name\n            edit_spec = yaml.dump(info[\"spec\"])\n            edit_spec_desc = format_description(vendor)\n            edit_default = info[\"default\"]\n\n        return (\n            _selected_panel,\n            _selected_panel_btn,\n            btn_delete,\n            btn_delete_yes,\n            btn_delete_no,\n            edit_name,\n            edit_spec,\n            edit_spec_desc,\n            edit_default,\n        )\n\n    def on_btn_delete_click(self):\n        btn_delete = gr.update(visible=False)\n        btn_delete_yes = gr.update(visible=True)\n        btn_delete_no = gr.update(visible=True)\n\n        return btn_delete, btn_delete_yes, btn_delete_no\n\n    def check_connection(self, selected_llm_name: str, selected_spec):\n        log_content: str = \"\"\n\n        try:\n            log_content += f\"- Testing model: {selected_llm_name}<br>\"\n            yield log_content\n\n            # Parse content & init model\n            info = deepcopy(llms.info()[selected_llm_name])\n\n            # Parse content & create dummy embedding\n            spec = yaml.load(selected_spec, Loader=YAMLNoDateSafeLoader)\n            info[\"spec\"].update(spec)\n\n            llm = deserialize(info[\"spec\"], safe=False)\n\n            if llm is None:\n                raise Exception(f\"Can not found model: {selected_llm_name}\")\n\n            log_content += \"- Sending a message `Hi`<br>\"\n            yield log_content\n            respond = llm(\"Hi\")\n\n            log_content += (\n                f\"<mark style='background: green; color: white'>- Connection success. \"\n                f\"Got response:\\n {respond}</mark><br>\"\n            )\n            yield log_content\n\n            gr.Info(f\"LLM {selected_llm_name} connect successfully\")\n        except Exception as e:\n            log_content += (\n                f\"<mark style='color: yellow; background: red'>- Connection failed. \"\n                f\"Got error:\\n {e}</mark>\"\n            )\n            yield log_content\n\n        return log_content\n\n    def save_llm(self, selected_llm_name, edit_name, default, spec):\n        try:\n            new_name = edit_name.strip()\n            spec = yaml.load(spec, Loader=YAMLNoDateSafeLoader)\n            spec[\"__type__\"] = llms.info()[selected_llm_name][\"spec\"][\"__type__\"]\n            llms.update(\n                selected_llm_name, spec=spec, default=default, new_name=new_name\n            )\n            final_name = (\n                new_name if new_name != selected_llm_name else selected_llm_name\n            )\n            gr.Info(f\"LLM '{final_name}' saved successfully\")\n            return final_name\n        except ValueError as e:\n            raise gr.Error(str(e))\n        except Exception as e:\n            raise gr.Error(f\"Failed to save LLM '{selected_llm_name}': {e}\")\n\n    def delete_llm(self, selected_llm_name):\n        try:\n            llms.delete(selected_llm_name)\n        except Exception as e:\n            gr.Error(f\"Failed to delete LLM {selected_llm_name}: {e}\")\n            return selected_llm_name\n\n        return \"\"\n"
  },
  {
    "path": "libs/ktem/ktem/main.py",
    "content": "import gradio as gr\nfrom decouple import config\nfrom ktem.app import BaseApp\nfrom ktem.pages.chat import ChatPage\nfrom ktem.pages.help import HelpPage\nfrom ktem.pages.resources import ResourcesTab\nfrom ktem.pages.settings import SettingsPage\nfrom ktem.pages.setup import SetupPage\nfrom theflow.settings import settings as flowsettings\n\nKH_DEMO_MODE = getattr(flowsettings, \"KH_DEMO_MODE\", False)\nKH_SSO_ENABLED = getattr(flowsettings, \"KH_SSO_ENABLED\", False)\nKH_ENABLE_FIRST_SETUP = getattr(flowsettings, \"KH_ENABLE_FIRST_SETUP\", False)\nKH_APP_DATA_EXISTS = getattr(flowsettings, \"KH_APP_DATA_EXISTS\", True)\n\n# override first setup setting\nif config(\"KH_FIRST_SETUP\", default=False, cast=bool):\n    KH_APP_DATA_EXISTS = False\n\n\ndef toggle_first_setup_visibility():\n    global KH_APP_DATA_EXISTS\n    is_first_setup = not KH_DEMO_MODE and not KH_APP_DATA_EXISTS\n    KH_APP_DATA_EXISTS = True\n    return gr.update(visible=is_first_setup), gr.update(visible=not is_first_setup)\n\n\nclass App(BaseApp):\n    \"\"\"The main app of Kotaemon\n\n    The main application contains app-level information:\n        - setting state\n        - user id\n\n    App life-cycle:\n        - Render\n        - Declare public events\n        - Subscribe public events\n        - Register events\n    \"\"\"\n\n    def ui(self):\n        \"\"\"Render the UI\"\"\"\n        self._tabs = {}\n\n        with gr.Tabs() as self.tabs:\n            if self.f_user_management:\n                from ktem.pages.login import LoginPage\n\n                with gr.Tab(\n                    \"Welcome\", elem_id=\"login-tab\", id=\"login-tab\"\n                ) as self._tabs[\"login-tab\"]:\n                    self.login_page = LoginPage(self)\n\n            with gr.Tab(\n                \"Chat\",\n                elem_id=\"chat-tab\",\n                id=\"chat-tab\",\n                visible=not self.f_user_management,\n            ) as self._tabs[\"chat-tab\"]:\n                self.chat_page = ChatPage(self)\n\n            if len(self.index_manager.indices) == 1:\n                for index in self.index_manager.indices:\n                    with gr.Tab(\n                        f\"{index.name}\",\n                        elem_id=\"indices-tab\",\n                        elem_classes=[\n                            \"fill-main-area-height\",\n                            \"scrollable\",\n                            \"indices-tab\",\n                        ],\n                        id=\"indices-tab\",\n                        visible=not self.f_user_management and not KH_DEMO_MODE,\n                    ) as self._tabs[f\"{index.id}-tab\"]:\n                        page = index.get_index_page_ui()\n                        setattr(self, f\"_index_{index.id}\", page)\n            elif len(self.index_manager.indices) > 1:\n                with gr.Tab(\n                    \"Files\",\n                    elem_id=\"indices-tab\",\n                    elem_classes=[\"fill-main-area-height\", \"scrollable\", \"indices-tab\"],\n                    id=\"indices-tab\",\n                    visible=not self.f_user_management and not KH_DEMO_MODE,\n                ) as self._tabs[\"indices-tab\"]:\n                    for index in self.index_manager.indices:\n                        with gr.Tab(\n                            index.name,\n                            elem_id=f\"{index.id}-tab\",\n                        ) as self._tabs[f\"{index.id}-tab\"]:\n                            page = index.get_index_page_ui()\n                            setattr(self, f\"_index_{index.id}\", page)\n\n            if not KH_DEMO_MODE:\n                if not KH_SSO_ENABLED:\n                    with gr.Tab(\n                        \"Resources\",\n                        elem_id=\"resources-tab\",\n                        id=\"resources-tab\",\n                        visible=not self.f_user_management,\n                        elem_classes=[\"fill-main-area-height\", \"scrollable\"],\n                    ) as self._tabs[\"resources-tab\"]:\n                        self.resources_page = ResourcesTab(self)\n\n                with gr.Tab(\n                    \"Settings\",\n                    elem_id=\"settings-tab\",\n                    id=\"settings-tab\",\n                    visible=not self.f_user_management,\n                    elem_classes=[\"fill-main-area-height\", \"scrollable\"],\n                ) as self._tabs[\"settings-tab\"]:\n                    self.settings_page = SettingsPage(self)\n\n            with gr.Tab(\n                \"Help\",\n                elem_id=\"help-tab\",\n                id=\"help-tab\",\n                visible=not self.f_user_management,\n                elem_classes=[\"fill-main-area-height\", \"scrollable\"],\n            ) as self._tabs[\"help-tab\"]:\n                self.help_page = HelpPage(self)\n\n        if KH_ENABLE_FIRST_SETUP:\n            with gr.Column(visible=False) as self.setup_page_wrapper:\n                self.setup_page = SetupPage(self)\n\n    def on_subscribe_public_events(self):\n        if self.f_user_management:\n            from ktem.db.engine import engine\n            from ktem.db.models import User\n            from sqlmodel import Session, select\n\n            def toggle_login_visibility(user_id):\n                if not user_id:\n                    return list(\n                        (\n                            gr.update(visible=True)\n                            if k == \"login-tab\"\n                            else gr.update(visible=False)\n                        )\n                        for k in self._tabs.keys()\n                    ) + [gr.update(selected=\"login-tab\")]\n\n                with Session(engine) as session:\n                    user = session.exec(select(User).where(User.id == user_id)).first()\n                    if user is None:\n                        return list(\n                            (\n                                gr.update(visible=True)\n                                if k == \"login-tab\"\n                                else gr.update(visible=False)\n                            )\n                            for k in self._tabs.keys()\n                        )\n\n                    is_admin = user.admin\n\n                tabs_update = []\n                for k in self._tabs.keys():\n                    if k == \"login-tab\":\n                        tabs_update.append(gr.update(visible=False))\n                    elif k == \"resources-tab\":\n                        tabs_update.append(gr.update(visible=is_admin))\n                    else:\n                        tabs_update.append(gr.update(visible=True))\n\n                tabs_update.append(gr.update(selected=\"chat-tab\"))\n\n                return tabs_update\n\n            self.subscribe_event(\n                name=\"onSignIn\",\n                definition={\n                    \"fn\": toggle_login_visibility,\n                    \"inputs\": [self.user_id],\n                    \"outputs\": list(self._tabs.values()) + [self.tabs],\n                    \"show_progress\": \"hidden\",\n                },\n            )\n\n            self.subscribe_event(\n                name=\"onSignOut\",\n                definition={\n                    \"fn\": toggle_login_visibility,\n                    \"inputs\": [self.user_id],\n                    \"outputs\": list(self._tabs.values()) + [self.tabs],\n                    \"show_progress\": \"hidden\",\n                },\n            )\n\n        if KH_ENABLE_FIRST_SETUP:\n            self.subscribe_event(\n                name=\"onFirstSetupComplete\",\n                definition={\n                    \"fn\": toggle_first_setup_visibility,\n                    \"inputs\": [],\n                    \"outputs\": [self.setup_page_wrapper, self.tabs],\n                    \"show_progress\": \"hidden\",\n                },\n            )\n\n    def _on_app_created(self):\n        \"\"\"Called when the app is created\"\"\"\n\n        if KH_ENABLE_FIRST_SETUP:\n            self.app.load(\n                toggle_first_setup_visibility,\n                inputs=[],\n                outputs=[self.setup_page_wrapper, self.tabs],\n            )\n"
  },
  {
    "path": "libs/ktem/ktem/mcp/__init__.py",
    "content": "# MCP (Model Context Protocol) integration for kotaemon\r\n"
  },
  {
    "path": "libs/ktem/ktem/mcp/db.py",
    "content": "from ktem.db.engine import engine\r\nfrom sqlalchemy import JSON, Column, String\r\nfrom sqlalchemy import inspect as sa_inspect\r\nfrom sqlalchemy.orm import DeclarativeBase\r\n\r\n\r\nclass Base(DeclarativeBase):\r\n    pass\r\n\r\n\r\nclass BaseMCPTable(Base):\r\n    \"\"\"Base table to store MCP server configurations\"\"\"\r\n\r\n    __abstract__ = True\r\n\r\n    name = Column(String, primary_key=True, unique=True)\r\n    config = Column(JSON, default={})  # Full JSON config for the MCP server\r\n\r\n\r\nclass MCPTable(BaseMCPTable):\r\n    __tablename__ = \"mcp_table\"\r\n\r\n\r\n# Drop and recreate to handle schema changes from old multi-column layout.\r\n_inspector = sa_inspect(engine)\r\nif _inspector.has_table(\"mcp_table\"):\r\n    _columns = {col[\"name\"] for col in _inspector.get_columns(\"mcp_table\")}\r\n    if \"config\" not in _columns:\r\n        MCPTable.__table__.drop(engine)  # type: ignore[attr-defined]\r\n\r\nMCPTable.metadata.create_all(engine)\r\n"
  },
  {
    "path": "libs/ktem/ktem/mcp/manager.py",
    "content": "\"\"\"Manager for MCP server configurations.\r\n\r\nProvides CRUD operations on the MCPTable.\r\nAll tool building/discovery logic lives in kotaemon.agents.tools.mcp.\r\n\"\"\"\r\n\r\nimport logging\r\n\r\nfrom sqlalchemy import select\r\nfrom sqlalchemy.orm import Session\r\n\r\nfrom .db import MCPTable, engine\r\n\r\nlogger = logging.getLogger(__name__)\r\n\r\n\r\nclass MCPManager:\r\n    \"\"\"Manages MCP server configurations stored in the database.\"\"\"\r\n\r\n    def __init__(self):\r\n        self._configs: dict[str, dict] = {}\r\n        self.load()\r\n\r\n    def load(self):\r\n        \"\"\"Reload configurations from the database.\"\"\"\r\n        self._info = {}\r\n        with Session(engine) as session:\r\n            stmt = select(MCPTable)\r\n            items = session.execute(stmt)\r\n            for (item,) in items:\r\n                self._info[item.name] = {\r\n                    \"name\": item.name,\r\n                    \"config\": item.config,\r\n                }\r\n\r\n    def info(self) -> dict:\r\n        \"\"\"Return all MCP server configurations.\"\"\"\r\n        return self._info\r\n\r\n    def get(self, name: str) -> dict | None:\r\n        \"\"\"Get a single configuration by name.\"\"\"\r\n        return self._info.get(name)\r\n\r\n    def add(self, name: str, config: dict):\r\n        \"\"\"Add a new MCP server configuration.\"\"\"\r\n        name = name.strip()\r\n        if not name:\r\n            raise ValueError(\"Name must not be empty\")\r\n\r\n        with Session(engine) as session:\r\n            item = MCPTable(name=name, config=config)\r\n            session.add(item)\r\n            session.commit()\r\n\r\n        self.load()\r\n\r\n    def update(self, name: str, config: dict):\r\n        \"\"\"Update an existing MCP server configuration.\"\"\"\r\n        if not name:\r\n            raise ValueError(\"Name must not be empty\")\r\n\r\n        with Session(engine) as session:\r\n            item = session.query(MCPTable).filter_by(name=name).first()\r\n            if not item:\r\n                raise ValueError(f\"MCP server '{name}' not found\")\r\n            item.config = config  # type: ignore[assignment]\r\n            session.commit()\r\n\r\n        self.load()\r\n\r\n    def delete(self, name: str):\r\n        \"\"\"Delete an MCP server configuration.\"\"\"\r\n        with Session(engine) as session:\r\n            item = session.query(MCPTable).filter_by(name=name).first()\r\n            if item:\r\n                session.delete(item)\r\n                session.commit()\r\n\r\n        self.load()\r\n\r\n    def get_enabled_tools(self) -> list[str]:\r\n        \"\"\"Return tool choice names for all MCP servers.\"\"\"\r\n        choices = []\r\n        for name, entry in self._info.items():\r\n            config = entry.get(\"config\", {})\r\n            enabled_tools = config.get(\"enabled_tools\", None)\r\n            if enabled_tools is not None:\r\n                choices.append(f\"[MCP] {name}\")\r\n        return choices\r\n\r\n\r\nmcp_manager = MCPManager()\r\n"
  },
  {
    "path": "libs/ktem/ktem/mcp/ui.py",
    "content": "import json\r\nimport logging\r\n\r\nimport gradio as gr\r\nimport pandas as pd\r\nfrom ktem.app import BasePage\r\n\r\nfrom kotaemon.agents.tools.mcp import discover_tools_info, format_tool_list\r\n\r\nfrom .manager import mcp_manager\r\n\r\nlogger = logging.getLogger(__name__)\r\n\r\nTOOLS_DEFAULT = \"# Available Tools\\n\\nSelect or add an MCP server to view its tools.\"\r\n\r\nMCP_SERVERS_KEY = \"mcpServers\"\r\n\r\nEXAMPLE_CONFIG = \"\"\"{\r\n  \"mcpServers\": {\r\n  }\r\n}\"\"\"\r\n\r\n\r\nclass MCPManagement(BasePage):\r\n    def __init__(self, app):\r\n        self._app = app\r\n        self.on_building_ui()\r\n\r\n    def on_building_ui(self):\r\n        with gr.Tab(label=\"View\"):\r\n            self.mcp_list = gr.DataFrame(\r\n                headers=[\"name\", \"config\"],\r\n                interactive=False,\r\n                column_widths=[30, 70],\r\n            )\r\n\r\n            with gr.Column(visible=False) as self._selected_panel:\r\n                self.selected_mcp_name = gr.Textbox(value=\"\", visible=False)\r\n                with gr.Row():\r\n                    with gr.Column():\r\n                        self.edit_config = gr.Code(\r\n                            label=\"Configuration (JSON)\",\r\n                            language=\"json\",\r\n                            lines=10,\r\n                        )\r\n\r\n                        with gr.Row(visible=False) as self._selected_panel_btn:\r\n                            with gr.Column():\r\n                                self.btn_edit_save = gr.Button(\r\n                                    \"Save\", min_width=10, variant=\"primary\"\r\n                                )\r\n                            with gr.Column():\r\n                                self.btn_delete = gr.Button(\r\n                                    \"Delete\", min_width=10, variant=\"stop\"\r\n                                )\r\n                                with gr.Row():\r\n                                    self.btn_delete_yes = gr.Button(\r\n                                        \"Confirm Delete\",\r\n                                        variant=\"stop\",\r\n                                        visible=False,\r\n                                        min_width=10,\r\n                                    )\r\n                                    self.btn_delete_no = gr.Button(\r\n                                        \"Cancel\", visible=False, min_width=10\r\n                                    )\r\n                            with gr.Column():\r\n                                self.btn_close = gr.Button(\"Close\", min_width=10)\r\n\r\n                    with gr.Column():\r\n                        self.edit_tools_display = gr.Markdown(TOOLS_DEFAULT)\r\n\r\n        with gr.Tab(label=\"Add\"):\r\n            with gr.Row():\r\n                with gr.Column(scale=2):\r\n                    self.config = gr.Code(\r\n                        label=\"Configuration (JSON)\",\r\n                        language=\"json\",\r\n                        lines=10,\r\n                        value=EXAMPLE_CONFIG,\r\n                    )\r\n                    gr.HTML(\r\n                        \"<br/>\"\r\n                    )  # Fix: Prevent the overflow of the gr.Code affect click button\r\n                    with gr.Row():\r\n                        self.btn_new = gr.Button(\"Add MCP Servers\", variant=\"primary\")\r\n\r\n                with gr.Column(scale=3):\r\n                    self.add_tools_display = gr.Markdown(TOOLS_DEFAULT)\r\n\r\n    def _on_app_created(self):\r\n        \"\"\"Called when the app is created.\"\"\"\r\n        self._app.app.load(\r\n            self.list_servers,\r\n            inputs=[],\r\n            outputs=[self.mcp_list],\r\n        )\r\n\r\n    def on_register_events(self):\r\n        # Add new server — save first, then fetch tools async\r\n        self.btn_new.click(\r\n            self.create_server,\r\n            inputs=[self.config],\r\n            outputs=[self.add_tools_display],\r\n        ).success(self.list_servers, inputs=[], outputs=[self.mcp_list]).then(\r\n            self.fetch_tools_for_add,\r\n            inputs=[self.config],\r\n            outputs=[self.add_tools_display],\r\n        ).then(\r\n            lambda: EXAMPLE_CONFIG,\r\n            outputs=[self.config],\r\n        )\r\n\r\n        # Select a server from list\r\n        self.mcp_list.select(\r\n            self.select_server,\r\n            inputs=self.mcp_list,\r\n            outputs=[self.selected_mcp_name],\r\n            show_progress=\"hidden\",\r\n        )\r\n        self.selected_mcp_name.change(\r\n            self.on_selected_server_change,\r\n            inputs=[self.selected_mcp_name],\r\n            outputs=[\r\n                self._selected_panel,\r\n                self._selected_panel_btn,\r\n                self.btn_delete,\r\n                self.btn_delete_yes,\r\n                self.btn_delete_no,\r\n                self.edit_config,\r\n                self.edit_tools_display,\r\n            ],\r\n            show_progress=\"hidden\",\r\n        ).then(\r\n            self.fetch_tools_for_view,\r\n            inputs=[self.selected_mcp_name],\r\n            outputs=[self.edit_tools_display],\r\n        )\r\n\r\n        # Delete flow\r\n        self.btn_delete.click(\r\n            self.on_btn_delete_click,\r\n            inputs=[],\r\n            outputs=[self.btn_delete, self.btn_delete_yes, self.btn_delete_no],\r\n            show_progress=\"hidden\",\r\n        )\r\n        self.btn_delete_yes.click(\r\n            self.delete_server,\r\n            inputs=[self.selected_mcp_name],\r\n            outputs=[self.selected_mcp_name],\r\n            show_progress=\"hidden\",\r\n        ).then(self.list_servers, inputs=[], outputs=[self.mcp_list])\r\n        self.btn_delete_no.click(\r\n            lambda: (\r\n                gr.update(visible=True),\r\n                gr.update(visible=False),\r\n                gr.update(visible=False),\r\n            ),\r\n            inputs=[],\r\n            outputs=[self.btn_delete, self.btn_delete_yes, self.btn_delete_no],\r\n            show_progress=\"hidden\",\r\n        )\r\n\r\n        # Save edits — save first, then refresh tools\r\n        self.btn_edit_save.click(\r\n            self.save_server,\r\n            inputs=[self.selected_mcp_name, self.edit_config],\r\n            outputs=[self.edit_tools_display],\r\n            show_progress=\"hidden\",\r\n        ).then(self.list_servers, inputs=[], outputs=[self.mcp_list]).then(\r\n            self.fetch_tools_for_view,\r\n            inputs=[self.selected_mcp_name],\r\n            outputs=[self.edit_tools_display],\r\n        )\r\n\r\n        # Close panel\r\n        self.btn_close.click(lambda: \"\", outputs=[self.selected_mcp_name])\r\n\r\n    # --- Handlers ---\r\n\r\n    def _fetch_tools_markdown(self, config: dict) -> str:\r\n        \"\"\"Fetch tools from MCP server and return as formatted HTML.\"\"\"\r\n        try:\r\n            tool_infos = discover_tools_info(config)\r\n            enabled_tools = config.get(\"enabled_tools\", None)\r\n            return format_tool_list(tool_infos, enabled_tools)\r\n        except Exception as e:\r\n            return f\"❌ Failed to fetch tools: {e}\"\r\n\r\n    def create_server(self, config_str):\r\n        \"\"\"Create server(s), show loading placeholder.\"\"\"\r\n        try:\r\n            configs = json.loads(config_str)\r\n        except json.JSONDecodeError as e:\r\n            raise gr.Error(f\"Invalid JSON: {e}\")\r\n\r\n        if not isinstance(configs, dict) or MCP_SERVERS_KEY not in configs:\r\n            raise gr.Error(\r\n                f\"Config must be a dictionary with '{MCP_SERVERS_KEY}' root key.\"\r\n            )\r\n\r\n        mcp_servers = configs[MCP_SERVERS_KEY]\r\n        if not isinstance(mcp_servers, dict):\r\n            raise gr.Error(\r\n                f\"'{MCP_SERVERS_KEY}' must be a mapping of server names to configs.\"\r\n            )\r\n\r\n        # Validate that no names are empty before processing\r\n        for name in mcp_servers:\r\n            name = name.strip()\r\n            if not name:\r\n                raise gr.Error(\"Server names cannot be empty.\")\r\n\r\n        success_count = 0\r\n        failed_count = 0\r\n        msgs = []\r\n        for name, config in mcp_servers.items():\r\n            name = name.strip()\r\n            if name in mcp_manager.info():\r\n                gr.Warning(f\"MCP server '{name}' already exists. Skipping.\")\r\n                failed_count += 1\r\n                continue\r\n\r\n            try:\r\n                mcp_manager.add(name, config)\r\n                success_count += 1\r\n                msgs.append(f\"# Tools for '{name}'\\n\\n⏳ Fetching tools...\")\r\n            except Exception as e:\r\n                gr.Warning(f\"Failed to create MCP server '{name}': {e}\")\r\n                failed_count += 1\r\n\r\n        if success_count > 0:\r\n            gr.Info(f\"{success_count} MCP server(s) created successfully\")\r\n\r\n        if not msgs:\r\n            return TOOLS_DEFAULT\r\n\r\n        return \"\\n\\n\".join(msgs)\r\n\r\n    def fetch_tools_for_add(self, config_str):\r\n        \"\"\"Fetch tools after server was added (chained call).\"\"\"\r\n        if not config_str:\r\n            return TOOLS_DEFAULT\r\n        try:\r\n            configs = json.loads(config_str)\r\n        except json.JSONDecodeError:\r\n            return \"❌ Invalid JSON config\"\r\n\r\n        if not isinstance(configs, dict) or MCP_SERVERS_KEY not in configs:\r\n            return f\"❌ Config must be a dictionary with '{MCP_SERVERS_KEY}' root key\"\r\n\r\n        mcp_servers = configs[MCP_SERVERS_KEY]\r\n        if not isinstance(mcp_servers, dict):\r\n            return f\"❌ '{MCP_SERVERS_KEY}' must be a dictionary\"\r\n\r\n        msgs = []\r\n        for name, config in mcp_servers.items():\r\n            msgs.append(\r\n                f\"# Tools for '{name.strip()}'\\n\\n{self._fetch_tools_markdown(config)}\"\r\n            )\r\n        return \"\\n\\n\".join(msgs)\r\n\r\n    def fetch_tools_for_view(self, selected_name):\r\n        \"\"\"Fetch tools for the View panel (chained call).\"\"\"\r\n        if not selected_name:\r\n            return TOOLS_DEFAULT\r\n        entry = mcp_manager.info().get(selected_name)\r\n        if not entry:\r\n            return TOOLS_DEFAULT\r\n        config = entry.get(\"config\", {})\r\n        return f\"# Tools for '{selected_name}'\\n\\n{self._fetch_tools_markdown(config)}\"\r\n\r\n    def list_servers(self):\r\n        items = []\r\n        for entry in mcp_manager.info().values():\r\n            items.append(\r\n                {\r\n                    \"name\": entry[\"name\"],\r\n                    \"config\": json.dumps(entry.get(\"config\", {})),\r\n                }\r\n            )\r\n\r\n        if items:\r\n            return pd.DataFrame.from_records(items)\r\n        return pd.DataFrame.from_records([{\"name\": \"-\", \"config\": \"-\"}])\r\n\r\n    def select_server(self, mcp_list, ev: gr.SelectData):\r\n        if ev.value == \"-\" and ev.index[0] == 0:\r\n            gr.Info(\"No MCP server configured. Please add one first.\")\r\n            return \"\"\r\n        if not ev.selected:\r\n            return \"\"\r\n        return mcp_list[\"name\"][ev.index[0]]\r\n\r\n    def on_selected_server_change(self, selected_name):\r\n        if selected_name == \"\":\r\n            return (\r\n                gr.update(visible=False),  # panel\r\n                gr.update(visible=False),  # buttons\r\n                gr.update(visible=True),  # delete\r\n                gr.update(visible=False),  # delete_yes\r\n                gr.update(visible=False),  # delete_no\r\n                gr.update(value=\"{}\"),  # config\r\n                gr.update(value=TOOLS_DEFAULT),  # tools display\r\n            )\r\n\r\n        entry = mcp_manager.info()[selected_name]\r\n        config = entry.get(\"config\", {})\r\n        config_str = json.dumps(config, indent=2)\r\n\r\n        return (\r\n            gr.update(visible=True),\r\n            gr.update(visible=True),\r\n            gr.update(visible=True),\r\n            gr.update(visible=False),\r\n            gr.update(visible=False),\r\n            gr.update(value=config_str),\r\n            gr.update(value=f\"# Tools for '{selected_name}'\\n\\n⏳ Fetching tools...\"),\r\n        )\r\n\r\n    def on_btn_delete_click(self):\r\n        return (\r\n            gr.update(visible=False),\r\n            gr.update(visible=True),\r\n            gr.update(visible=True),\r\n        )\r\n\r\n    def delete_server(self, selected_name):\r\n        try:\r\n            mcp_manager.delete(selected_name)\r\n            gr.Info(f\"MCP server '{selected_name}' deleted successfully\")\r\n        except Exception as e:\r\n            gr.Error(f\"Failed to delete MCP server '{selected_name}': {e}\")\r\n            return selected_name\r\n        return \"\"\r\n\r\n    def save_server(self, selected_name, config_str):\r\n        try:\r\n            config = json.loads(config_str)\r\n        except json.JSONDecodeError as e:\r\n            raise gr.Error(f\"Invalid JSON: {e}\")\r\n\r\n        try:\r\n            mcp_manager.update(selected_name, config)\r\n            gr.Info(f\"MCP server '{selected_name}' saved successfully\")\r\n        except Exception as e:\r\n            raise gr.Error(f\"Failed to save MCP server '{selected_name}': {e}\")\r\n\r\n        # Show loading placeholder; tools fetched in chained .then()\r\n        return f\"# Tools for '{selected_name}'\\n\\n⏳ Refreshing tools...\"\r\n"
  },
  {
    "path": "libs/ktem/ktem/pages/__init__.py",
    "content": ""
  },
  {
    "path": "libs/ktem/ktem/pages/chat/__init__.py",
    "content": "import asyncio\nimport json\nimport re\nfrom copy import deepcopy\nfrom typing import Optional\n\nimport gradio as gr\nfrom decouple import config\nfrom ktem.app import BasePage\nfrom ktem.components import reasonings\nfrom ktem.db.models import Conversation, engine\nfrom ktem.index.file.ui import File\nfrom ktem.reasoning.prompt_optimization.mindmap import MINDMAP_HTML_EXPORT_TEMPLATE\nfrom ktem.reasoning.prompt_optimization.suggest_conversation_name import (\n    SuggestConvNamePipeline,\n)\nfrom ktem.reasoning.prompt_optimization.suggest_followup_chat import (\n    SuggestFollowupQuesPipeline,\n)\nfrom plotly.io import from_json\nfrom sqlmodel import Session, select\nfrom theflow.settings import settings as flowsettings\nfrom theflow.utils.modules import import_dotted_string\n\nfrom kotaemon.base import Document\nfrom kotaemon.indices.ingests.files import KH_DEFAULT_FILE_EXTRACTORS\nfrom kotaemon.indices.qa.utils import strip_think_tag\n\nfrom ...utils import SUPPORTED_LANGUAGE_MAP, get_file_names_regex, get_urls\nfrom ...utils.commands import WEB_SEARCH_COMMAND\nfrom ...utils.hf_papers import get_recommended_papers\nfrom ...utils.rate_limit import check_rate_limit\nfrom .chat_panel import ChatPanel\nfrom .chat_suggestion import ChatSuggestion\nfrom .common import STATE\nfrom .control import ConversationControl\nfrom .demo_hint import HintPage\nfrom .paper_list import PaperListPage\nfrom .report import ReportIssue\n\nKH_DEMO_MODE = getattr(flowsettings, \"KH_DEMO_MODE\", False)\nKH_SSO_ENABLED = getattr(flowsettings, \"KH_SSO_ENABLED\", False)\nKH_WEB_SEARCH_BACKEND = getattr(flowsettings, \"KH_WEB_SEARCH_BACKEND\", None)\nWebSearch = None\nif KH_WEB_SEARCH_BACKEND:\n    try:\n        WebSearch = import_dotted_string(KH_WEB_SEARCH_BACKEND, safe=False)\n    except (ImportError, AttributeError) as e:\n        print(f\"Error importing {KH_WEB_SEARCH_BACKEND}: {e}\")\n\nREASONING_LIMITS = 2 if KH_DEMO_MODE else 10\nDEFAULT_SETTING = \"(default)\"\nINFO_PANEL_SCALES = {True: 8, False: 4}\nDEFAULT_QUESTION = (\n    \"What is the summary of this document?\"\n    if not KH_DEMO_MODE\n    else \"What is the summary of this paper?\"\n)\n\nchat_input_focus_js = \"\"\"\nfunction() {\n    let chatInput = document.querySelector(\"#chat-input textarea\");\n    chatInput.focus();\n}\n\"\"\"\n\nquick_urls_submit_js = \"\"\"\nfunction() {\n    let urlInput = document.querySelector(\"#quick-url-demo textarea\");\n    console.log(\"URL input:\", urlInput);\n    urlInput.dispatchEvent(new KeyboardEvent('keypress', {'key': 'Enter'}));\n}\n\"\"\"\n\nrecommended_papers_js = \"\"\"\nfunction() {\n    // Get all links and attach click event\n    var links = document.querySelectorAll(\"#related-papers a\");\n\n    function submitPaper(event) {\n        event.preventDefault();\n        var target = event.currentTarget;\n        var url = target.getAttribute(\"href\");\n        console.log(\"URL:\", url);\n\n        let newChatButton = document.querySelector(\"#new-conv-button\");\n        newChatButton.click();\n\n        setTimeout(() => {\n            let urlInput = document.querySelector(\"#quick-url-demo textarea\");\n            // Fill the URL input\n            urlInput.value = url;\n            urlInput.dispatchEvent(new Event(\"input\", { bubbles: true }));\n            urlInput.dispatchEvent(new KeyboardEvent('keypress', {'key': 'Enter'}));\n            }, 500\n        );\n    }\n\n    for (var i = 0; i < links.length; i++) {\n        links[i].onclick = submitPaper;\n    }\n}\n\"\"\"\n\nclear_bot_message_selection_js = \"\"\"\nfunction() {\n    var bot_messages = document.querySelectorAll(\n        \"div#main-chat-bot div.message-row.bot-row\"\n    );\n    bot_messages.forEach(message => {\n        message.classList.remove(\"text_selection\");\n    });\n}\n\"\"\"\n\npdfview_js = \"\"\"\nfunction() {\n    setTimeout(fullTextSearch(), 100);\n\n    // Get all links and attach click event\n    var links = document.getElementsByClassName(\"pdf-link\");\n    for (var i = 0; i < links.length; i++) {\n        links[i].onclick = openModal;\n    }\n\n    // Get all citation links and attach click event\n    var links = document.querySelectorAll(\"a.citation\");\n    for (var i = 0; i < links.length; i++) {\n        links[i].onclick = scrollToCitation;\n    }\n\n    var markmap_div = document.querySelector(\"div.markmap\");\n    var mindmap_el_script = document.querySelector('div.markmap script');\n\n    if (mindmap_el_script) {\n        markmap_div_html = markmap_div.outerHTML;\n    }\n\n    // render the mindmap if the script tag is present\n    if (mindmap_el_script) {\n        markmap.autoLoader.renderAll();\n    }\n\n    setTimeout(() => {\n        var mindmap_el = document.querySelector('svg.markmap');\n\n        var text_nodes = document.querySelectorAll(\"svg.markmap div\");\n        for (var i = 0; i < text_nodes.length; i++) {\n            text_nodes[i].onclick = fillChatInput;\n        }\n\n        if (mindmap_el) {\n            function on_svg_export(event) {\n                html = \"{html_template}\";\n                html = html.replace(\"{markmap_div}\", markmap_div_html);\n                spawnDocument(html, {window: \"width=1000,height=1000\"});\n            }\n\n            var link = document.getElementById(\"mindmap-toggle\");\n            if (link) {\n                link.onclick = function(event) {\n                    event.preventDefault(); // Prevent the default link behavior\n                    var div = document.querySelector(\"div.markmap\");\n                    if (div) {\n                        var currentHeight = div.style.height;\n                        if (currentHeight === '400px' || (currentHeight === '')) {\n                            div.style.height = '650px';\n                        } else {\n                            div.style.height = '400px'\n                        }\n                    }\n                };\n            }\n\n            if (markmap_div_html) {\n                var link = document.getElementById(\"mindmap-export\");\n                if (link) {\n                    link.addEventListener('click', on_svg_export);\n                }\n            }\n        }\n    }, 250);\n\n    return [links.length]\n}\n\"\"\".replace(\n    \"{html_template}\",\n    MINDMAP_HTML_EXPORT_TEMPLATE.replace(\"\\n\", \"\").replace('\"', '\\\\\"'),\n)\n\nfetch_api_key_js = \"\"\"\nfunction(_, __) {\n    api_key = getStorage('google_api_key', '');\n    console.log('session API key:', api_key);\n    return [api_key, _];\n}\n\"\"\"\n\n\nclass ChatPage(BasePage):\n    def __init__(self, app):\n        self._app = app\n        self._indices_input = []\n\n        self.on_building_ui()\n\n        self._preview_links = gr.State(value=None)\n        self._reasoning_type = gr.State(value=None)\n        self._conversation_renamed = gr.State(value=False)\n        self._use_suggestion = gr.State(\n            value=getattr(flowsettings, \"KH_FEATURE_CHAT_SUGGESTION\", False)\n        )\n        self._info_panel_expanded = gr.State(value=True)\n        self._command_state = gr.State(value=None)\n        self._user_api_key = gr.Text(value=\"\", visible=False)\n\n    def on_building_ui(self):\n        with gr.Row():\n            self.state_chat = gr.State(STATE)\n            self.state_retrieval_history = gr.State([])\n            self.state_plot_history = gr.State([])\n            self.state_plot_panel = gr.State(None)\n            self.first_selector_choices = gr.State(None)\n\n            with gr.Column(scale=1, elem_id=\"conv-settings-panel\") as self.conv_column:\n                self.chat_control = ConversationControl(self._app)\n\n                for index_id, index in enumerate(self._app.index_manager.indices):\n                    index.selector = None\n                    index_ui = index.get_selector_component_ui()\n                    if not index_ui:\n                        # the index doesn't have a selector UI component\n                        continue\n\n                    index_ui.unrender()  # need to rerender later within Accordion\n                    is_first_index = index_id == 0\n                    index_name = index.name\n\n                    if KH_DEMO_MODE and is_first_index:\n                        index_name = \"Select from Paper Collection\"\n\n                    with gr.Accordion(\n                        label=index_name,\n                        open=is_first_index,\n                        elem_id=f\"index-{index_id}\",\n                    ):\n                        index_ui.render()\n                        gr_index = index_ui.as_gradio_component()\n\n                        # get the file selector choices for the first index\n                        if index_id == 0:\n                            self.first_selector_choices = index_ui.selector_choices\n                            self.first_indexing_url_fn = None\n\n                        if gr_index:\n                            if isinstance(gr_index, list):\n                                index.selector = tuple(\n                                    range(\n                                        len(self._indices_input),\n                                        len(self._indices_input) + len(gr_index),\n                                    )\n                                )\n                                index.default_selector = index_ui.default()\n                                self._indices_input.extend(gr_index)\n                            else:\n                                index.selector = len(self._indices_input)\n                                index.default_selector = index_ui.default()\n                                self._indices_input.append(gr_index)\n                        setattr(self, f\"_index_{index.id}\", index_ui)\n\n                self.chat_suggestion = ChatSuggestion(self._app)\n\n                if len(self._app.index_manager.indices) > 0:\n                    quick_upload_label = (\n                        \"Quick Upload\" if not KH_DEMO_MODE else \"Or input new paper URL\"\n                    )\n\n                    with gr.Accordion(label=quick_upload_label) as _:\n                        self.quick_file_upload_status = gr.Markdown()\n                        if not KH_DEMO_MODE:\n                            self.quick_file_upload = File(\n                                file_types=list(KH_DEFAULT_FILE_EXTRACTORS.keys()),\n                                file_count=\"multiple\",\n                                container=True,\n                                show_label=False,\n                                elem_id=\"quick-file\",\n                            )\n                        self.quick_urls = gr.Textbox(\n                            placeholder=(\n                                \"Or paste URLs\"\n                                if not KH_DEMO_MODE\n                                else \"Paste Arxiv URLs\\n(https://arxiv.org/abs/xxx)\"\n                            ),\n                            lines=1,\n                            container=False,\n                            show_label=False,\n                            elem_id=(\n                                \"quick-url\" if not KH_DEMO_MODE else \"quick-url-demo\"\n                            ),\n                        )\n\n                if not KH_DEMO_MODE:\n                    self.report_issue = ReportIssue(self._app)\n                else:\n                    with gr.Accordion(label=\"Related papers\", open=False):\n                        self.related_papers = gr.Markdown(elem_id=\"related-papers\")\n\n                    self.hint_page = HintPage(self._app)\n\n            with gr.Column(scale=6, elem_id=\"chat-area\"):\n                if KH_DEMO_MODE:\n                    self.paper_list = PaperListPage(self._app)\n\n                self.chat_panel = ChatPanel(self._app)\n\n                with gr.Accordion(\n                    label=\"Chat settings\",\n                    elem_id=\"chat-settings-expand\",\n                    open=False,\n                    visible=not KH_DEMO_MODE,\n                ) as self.chat_settings:\n                    with gr.Row(elem_id=\"quick-setting-labels\"):\n                        gr.HTML(\"Reasoning method\")\n                        gr.HTML(\n                            \"Model\", visible=not KH_DEMO_MODE and not KH_SSO_ENABLED\n                        )\n                        gr.HTML(\"Language\")\n\n                    with gr.Row():\n                        reasoning_setting = (\n                            self._app.default_settings.reasoning.settings[\"use\"]\n                        )\n                        model_setting = self._app.default_settings.reasoning.options[\n                            \"simple\"\n                        ].settings[\"llm\"]\n                        language_setting = (\n                            self._app.default_settings.reasoning.settings[\"lang\"]\n                        )\n                        citation_setting = self._app.default_settings.reasoning.options[\n                            \"simple\"\n                        ].settings[\"highlight_citation\"]\n\n                        self.reasoning_type = gr.Dropdown(\n                            choices=reasoning_setting.choices[:REASONING_LIMITS],\n                            value=reasoning_setting.value,\n                            container=False,\n                            show_label=False,\n                        )\n                        self.model_type = gr.Dropdown(\n                            choices=model_setting.choices,\n                            value=model_setting.value,\n                            container=False,\n                            show_label=False,\n                            visible=not KH_DEMO_MODE and not KH_SSO_ENABLED,\n                        )\n                        self.language = gr.Dropdown(\n                            choices=language_setting.choices,\n                            value=language_setting.value,\n                            container=False,\n                            show_label=False,\n                        )\n\n                        self.citation = gr.Dropdown(\n                            choices=citation_setting.choices,\n                            value=citation_setting.value,\n                            container=False,\n                            show_label=False,\n                            interactive=True,\n                            elem_id=\"citation-dropdown\",\n                        )\n\n                        if not config(\"USE_LOW_LLM_REQUESTS\", default=False, cast=bool):\n                            self.use_mindmap = gr.State(value=True)\n                            self.use_mindmap_check = gr.Checkbox(\n                                label=\"Mindmap (on)\",\n                                container=False,\n                                elem_id=\"use-mindmap-checkbox\",\n                                value=True,\n                            )\n                        else:\n                            self.use_mindmap = gr.State(value=False)\n                            self.use_mindmap_check = gr.Checkbox(\n                                label=\"Mindmap (off)\",\n                                container=False,\n                                elem_id=\"use-mindmap-checkbox\",\n                                value=False,\n                            )\n\n            with gr.Column(\n                scale=INFO_PANEL_SCALES[False], elem_id=\"chat-info-panel\"\n            ) as self.info_column:\n                with gr.Accordion(\n                    label=\"Information panel\", open=True, elem_id=\"info-expand\"\n                ):\n                    self.modal = gr.HTML(\"<div id='pdf-modal'></div>\")\n                    self.plot_panel = gr.Plot(visible=False)\n                    self.info_panel = gr.HTML(elem_id=\"html-info-panel\")\n\n        self.followup_questions = self.chat_suggestion.examples\n        self.followup_questions_ui = self.chat_suggestion.accordion\n\n    def _json_to_plot(self, json_dict: dict | None):\n        if json_dict:\n            plot = from_json(json_dict)\n            plot = gr.update(visible=True, value=plot)\n        else:\n            plot = gr.update(visible=False)\n        return plot\n\n    def on_register_events(self):\n        # first index paper recommendation\n        if KH_DEMO_MODE and len(self._indices_input) > 0:\n            self._indices_input[1].change(\n                self.get_recommendations,\n                inputs=[self.first_selector_choices, self._indices_input[1]],\n                outputs=[self.related_papers],\n            ).then(\n                fn=None,\n                inputs=None,\n                outputs=None,\n                js=recommended_papers_js,\n            )\n\n        chat_event = (\n            gr.on(\n                triggers=[\n                    self.chat_panel.text_input.submit,\n                ],\n                fn=self.submit_msg,\n                inputs=[\n                    self.chat_panel.text_input,\n                    self.chat_panel.chatbot,\n                    self._app.user_id,\n                    self._app.settings_state,\n                    self.chat_control.conversation_id,\n                    self.chat_control.conversation_rn,\n                    self.first_selector_choices,\n                ],\n                outputs=[\n                    self.chat_panel.text_input,\n                    self.chat_panel.chatbot,\n                    self.chat_control.conversation_id,\n                    self.chat_control.conversation,\n                    self.chat_control.conversation_rn,\n                    # file selector from the first index\n                    self._indices_input[0],\n                    self._indices_input[1],\n                    self._command_state,\n                ],\n                concurrency_limit=20,\n                show_progress=\"hidden\",\n            )\n            .success(\n                fn=self.chat_fn,\n                inputs=[\n                    self.chat_control.conversation_id,\n                    self.chat_panel.chatbot,\n                    self._app.settings_state,\n                    self._reasoning_type,\n                    self.model_type,\n                    self.use_mindmap,\n                    self.citation,\n                    self.language,\n                    self.state_chat,\n                    self._command_state,\n                    self._app.user_id,\n                ]\n                + self._indices_input,\n                outputs=[\n                    self.chat_panel.chatbot,\n                    self.info_panel,\n                    self.plot_panel,\n                    self.state_plot_panel,\n                    self.state_chat,\n                ],\n                concurrency_limit=20,\n                show_progress=\"minimal\",\n            )\n            .then(\n                fn=lambda: True,\n                inputs=None,\n                outputs=[self._preview_links],\n                js=pdfview_js,\n            )\n            .success(\n                fn=self.check_and_suggest_name_conv,\n                inputs=self.chat_panel.chatbot,\n                outputs=[\n                    self.chat_control.conversation_rn,\n                    self._conversation_renamed,\n                ],\n            )\n            .success(\n                self.chat_control.rename_conv,\n                inputs=[\n                    self.chat_control.conversation_id,\n                    self.chat_control.conversation_rn,\n                    self._conversation_renamed,\n                    self._app.user_id,\n                ],\n                outputs=[\n                    self.chat_control.conversation,\n                    self.chat_control.conversation,\n                    self.chat_control.conversation_rn,\n                ],\n                show_progress=\"hidden\",\n            )\n        )\n\n        onSuggestChatEvent = {\n            \"fn\": self.suggest_chat_conv,\n            \"inputs\": [\n                self._app.settings_state,\n                self.language,\n                self.chat_panel.chatbot,\n                self._use_suggestion,\n            ],\n            \"outputs\": [\n                self.followup_questions_ui,\n                self.followup_questions,\n            ],\n            \"show_progress\": \"hidden\",\n        }\n        # chat suggestion toggle\n        chat_event = chat_event.success(**onSuggestChatEvent)\n\n        # final data persist\n        if not KH_DEMO_MODE:\n            chat_event = chat_event.then(\n                fn=self.persist_data_source,\n                inputs=[\n                    self.chat_control.conversation_id,\n                    self._app.user_id,\n                    self.info_panel,\n                    self.state_plot_panel,\n                    self.state_retrieval_history,\n                    self.state_plot_history,\n                    self.chat_panel.chatbot,\n                    self.state_chat,\n                ]\n                + self._indices_input,\n                outputs=[\n                    self.state_retrieval_history,\n                    self.state_plot_history,\n                ],\n                concurrency_limit=20,\n            )\n\n        self.chat_control.btn_info_expand.click(\n            fn=lambda is_expanded: (\n                gr.update(scale=INFO_PANEL_SCALES[is_expanded]),\n                not is_expanded,\n            ),\n            inputs=self._info_panel_expanded,\n            outputs=[self.info_column, self._info_panel_expanded],\n        )\n        self.chat_control.btn_chat_expand.click(\n            fn=None, inputs=None, js=\"function() {toggleChatColumn();}\"\n        )\n\n        if KH_DEMO_MODE:\n            self.chat_control.btn_demo_logout.click(\n                fn=None,\n                js=self.chat_control.logout_js,\n            )\n            self.chat_control.btn_new.click(\n                fn=lambda: self.chat_control.select_conv(\"\", None),\n                outputs=[\n                    self.chat_control.conversation_id,\n                    self.chat_control.conversation,\n                    self.chat_control.conversation_rn,\n                    self.chat_panel.chatbot,\n                    self.followup_questions,\n                    self.info_panel,\n                    self.state_plot_panel,\n                    self.state_retrieval_history,\n                    self.state_plot_history,\n                    self.chat_control.cb_is_public,\n                    self.state_chat,\n                ]\n                + self._indices_input,\n            ).then(\n                lambda: (gr.update(visible=False), gr.update(visible=True)),\n                outputs=[self.paper_list.accordion, self.chat_settings],\n            ).then(\n                fn=None,\n                inputs=None,\n                js=chat_input_focus_js,\n            )\n\n        if not KH_DEMO_MODE:\n            self.chat_control.btn_new.click(\n                self.chat_control.new_conv,\n                inputs=self._app.user_id,\n                outputs=[\n                    self.chat_control.conversation_id,\n                    self.chat_control.conversation,\n                ],\n                show_progress=\"hidden\",\n            ).then(\n                self.chat_control.select_conv,\n                inputs=[self.chat_control.conversation, self._app.user_id],\n                outputs=[\n                    self.chat_control.conversation_id,\n                    self.chat_control.conversation,\n                    self.chat_control.conversation_rn,\n                    self.chat_panel.chatbot,\n                    self.followup_questions,\n                    self.info_panel,\n                    self.state_plot_panel,\n                    self.state_retrieval_history,\n                    self.state_plot_history,\n                    self.chat_control.cb_is_public,\n                    self.state_chat,\n                ]\n                + self._indices_input,\n                show_progress=\"hidden\",\n            ).then(\n                fn=self._json_to_plot,\n                inputs=self.state_plot_panel,\n                outputs=self.plot_panel,\n            ).then(\n                fn=None,\n                inputs=None,\n                js=chat_input_focus_js,\n            )\n\n            self.chat_control.btn_del.click(\n                lambda id: self.toggle_delete(id),\n                inputs=[self.chat_control.conversation_id],\n                outputs=[\n                    self.chat_control._new_delete,\n                    self.chat_control._delete_confirm,\n                ],\n            )\n            self.chat_control.btn_del_conf.click(\n                self.chat_control.delete_conv,\n                inputs=[self.chat_control.conversation_id, self._app.user_id],\n                outputs=[\n                    self.chat_control.conversation_id,\n                    self.chat_control.conversation,\n                ],\n                show_progress=\"hidden\",\n            ).then(\n                self.chat_control.select_conv,\n                inputs=[self.chat_control.conversation, self._app.user_id],\n                outputs=[\n                    self.chat_control.conversation_id,\n                    self.chat_control.conversation,\n                    self.chat_control.conversation_rn,\n                    self.chat_panel.chatbot,\n                    self.followup_questions,\n                    self.info_panel,\n                    self.state_plot_panel,\n                    self.state_retrieval_history,\n                    self.state_plot_history,\n                    self.chat_control.cb_is_public,\n                    self.state_chat,\n                ]\n                + self._indices_input,\n                show_progress=\"hidden\",\n            ).then(\n                fn=self._json_to_plot,\n                inputs=self.state_plot_panel,\n                outputs=self.plot_panel,\n            ).then(\n                lambda: self.toggle_delete(\"\"),\n                outputs=[\n                    self.chat_control._new_delete,\n                    self.chat_control._delete_confirm,\n                ],\n            )\n            self.chat_control.btn_del_cnl.click(\n                lambda: self.toggle_delete(\"\"),\n                outputs=[\n                    self.chat_control._new_delete,\n                    self.chat_control._delete_confirm,\n                ],\n            )\n            self.chat_control.btn_conversation_rn.click(\n                lambda: gr.update(visible=True),\n                outputs=[\n                    self.chat_control.conversation_rn,\n                ],\n            )\n            self.chat_control.conversation_rn.submit(\n                self.chat_control.rename_conv,\n                inputs=[\n                    self.chat_control.conversation_id,\n                    self.chat_control.conversation_rn,\n                    gr.State(value=True),\n                    self._app.user_id,\n                ],\n                outputs=[\n                    self.chat_control.conversation,\n                    self.chat_control.conversation,\n                    self.chat_control.conversation_rn,\n                ],\n                show_progress=\"hidden\",\n            )\n\n        onConvSelect = (\n            self.chat_control.conversation.select(\n                self.chat_control.select_conv,\n                inputs=[self.chat_control.conversation, self._app.user_id],\n                outputs=[\n                    self.chat_control.conversation_id,\n                    self.chat_control.conversation,\n                    self.chat_control.conversation_rn,\n                    self.chat_panel.chatbot,\n                    self.followup_questions,\n                    self.info_panel,\n                    self.state_plot_panel,\n                    self.state_retrieval_history,\n                    self.state_plot_history,\n                    self.chat_control.cb_is_public,\n                    self.state_chat,\n                ]\n                + self._indices_input,\n                show_progress=\"hidden\",\n            )\n            .then(\n                fn=self._json_to_plot,\n                inputs=self.state_plot_panel,\n                outputs=self.plot_panel,\n            )\n            .then(\n                lambda: self.toggle_delete(\"\"),\n                outputs=[\n                    self.chat_control._new_delete,\n                    self.chat_control._delete_confirm,\n                ],\n            )\n        )\n\n        if KH_DEMO_MODE:\n            onConvSelect = onConvSelect.then(\n                lambda: (gr.update(visible=False), gr.update(visible=True)),\n                outputs=[self.paper_list.accordion, self.chat_settings],\n            )\n\n        onConvSelect = (\n            onConvSelect.then(\n                fn=lambda: True,\n                js=clear_bot_message_selection_js,\n            )\n            .then(\n                fn=lambda: True,\n                inputs=None,\n                outputs=[self._preview_links],\n                js=pdfview_js,\n            )\n            .then(fn=None, inputs=None, outputs=None, js=chat_input_focus_js)\n        )\n\n        if not KH_DEMO_MODE:\n            # evidence display on message selection\n            self.chat_panel.chatbot.select(\n                self.message_selected,\n                inputs=[\n                    self.state_retrieval_history,\n                    self.state_plot_history,\n                ],\n                outputs=[\n                    self.info_panel,\n                    self.state_plot_panel,\n                ],\n            ).then(\n                fn=self._json_to_plot,\n                inputs=self.state_plot_panel,\n                outputs=self.plot_panel,\n            ).then(\n                fn=lambda: True,\n                inputs=None,\n                outputs=[self._preview_links],\n                js=pdfview_js,\n            )\n\n        self.chat_control.cb_is_public.change(\n            self.on_set_public_conversation,\n            inputs=[self.chat_control.cb_is_public, self.chat_control.conversation],\n            outputs=None,\n            show_progress=\"hidden\",\n        )\n\n        if not KH_DEMO_MODE:\n            # user feedback events\n            self.chat_panel.chatbot.like(\n                fn=self.is_liked,\n                inputs=[self.chat_control.conversation_id],\n                outputs=None,\n            )\n            self.report_issue.report_btn.click(\n                self.report_issue.report,\n                inputs=[\n                    self.report_issue.correctness,\n                    self.report_issue.issues,\n                    self.report_issue.more_detail,\n                    self.chat_control.conversation_id,\n                    self.chat_panel.chatbot,\n                    self._app.settings_state,\n                    self._app.user_id,\n                    self.info_panel,\n                    self.state_chat,\n                ]\n                + self._indices_input,\n                outputs=None,\n            )\n\n        self.reasoning_type.change(\n            self.reasoning_changed,\n            inputs=[self.reasoning_type],\n            outputs=[self._reasoning_type],\n        )\n        self.use_mindmap_check.change(\n            lambda x: (x, gr.update(label=\"Mindmap \" + (\"(on)\" if x else \"(off)\"))),\n            inputs=[self.use_mindmap_check],\n            outputs=[self.use_mindmap, self.use_mindmap_check],\n            show_progress=\"hidden\",\n        )\n\n        def toggle_chat_suggestion(current_state):\n            return current_state, gr.update(visible=current_state)\n\n        def raise_error_on_state(state):\n            if not state:\n                raise ValueError(\"Chat suggestion disabled\")\n\n        self.chat_control.cb_suggest_chat.change(\n            fn=toggle_chat_suggestion,\n            inputs=[self.chat_control.cb_suggest_chat],\n            outputs=[self._use_suggestion, self.followup_questions_ui],\n            show_progress=\"hidden\",\n        ).then(\n            fn=raise_error_on_state,\n            inputs=[self._use_suggestion],\n            show_progress=\"hidden\",\n        ).success(\n            **onSuggestChatEvent\n        )\n        self.chat_control.conversation_id.change(\n            lambda: gr.update(visible=False),\n            outputs=self.plot_panel,\n        )\n\n        self.followup_questions.select(\n            self.chat_suggestion.select_example,\n            outputs=[self.chat_panel.text_input],\n            show_progress=\"hidden\",\n        ).then(\n            fn=None,\n            inputs=None,\n            outputs=None,\n            js=chat_input_focus_js,\n        )\n\n        if KH_DEMO_MODE:\n            self.paper_list.examples.select(\n                self.paper_list.select_example,\n                inputs=[self.paper_list.papers_state],\n                outputs=[self.quick_urls],\n                show_progress=\"hidden\",\n            ).then(\n                lambda: (gr.update(visible=False), gr.update(visible=True)),\n                outputs=[self.paper_list.accordion, self.chat_settings],\n            ).then(\n                fn=None,\n                inputs=None,\n                outputs=None,\n                js=quick_urls_submit_js,\n            )\n\n    def submit_msg(\n        self,\n        chat_input,\n        chat_history,\n        user_id,\n        settings,\n        conv_id,\n        conv_name,\n        first_selector_choices,\n        request: gr.Request,\n    ):\n        \"\"\"Submit a message to the chatbot\"\"\"\n        if KH_DEMO_MODE:\n            sso_user_id = check_rate_limit(\"chat\", request)\n            print(\"User ID:\", sso_user_id)\n\n        if not chat_input:\n            raise ValueError(\"Input is empty\")\n\n        chat_input_text = chat_input.get(\"text\", \"\")\n        file_ids = []\n        used_command = None\n\n        first_selector_choices_map = {\n            item[0]: item[1] for item in first_selector_choices\n        }\n\n        # get all file names with pattern @\"filename\" in input_str\n        file_names, chat_input_text = get_file_names_regex(chat_input_text)\n\n        # check if web search command is in file_names\n        if WEB_SEARCH_COMMAND in file_names:\n            used_command = WEB_SEARCH_COMMAND\n\n        # get all urls in input_str\n        urls, chat_input_text = get_urls(chat_input_text)\n\n        if urls and self.first_indexing_url_fn:\n            print(\"Detected URLs\", urls)\n            file_ids = self.first_indexing_url_fn(\n                \"\\n\".join(urls),\n                True,\n                settings,\n                user_id,\n                request=None,\n            )\n        elif file_names:\n            for file_name in file_names:\n                file_id = first_selector_choices_map.get(file_name)\n                if file_id:\n                    file_ids.append(file_id)\n\n        # add new file ids to the first selector choices\n        first_selector_choices.extend(zip(urls, file_ids))\n\n        # if file_ids is not empty and chat_input_text is empty\n        # set the input to summary\n        if not chat_input_text and file_ids:\n            chat_input_text = DEFAULT_QUESTION\n\n        # if start of conversation and no query is specified\n        if not chat_input_text and not chat_history:\n            chat_input_text = DEFAULT_QUESTION\n\n        if file_ids:\n            selector_output = [\n                \"select\",\n                gr.update(value=file_ids, choices=first_selector_choices),\n            ]\n        else:\n            selector_output = [gr.update(), gr.update()]\n\n        # check if regen mode is active\n        if chat_input_text:\n            chat_history = chat_history + [(chat_input_text, None)]\n        else:\n            if not chat_history:\n                raise gr.Error(\"Empty chat\")\n\n        if not conv_id:\n            if not KH_DEMO_MODE:\n                id_, update = self.chat_control.new_conv(user_id)\n                with Session(engine) as session:\n                    statement = select(Conversation).where(Conversation.id == id_)\n                    name = session.exec(statement).one().name\n                    new_conv_id = id_\n                    conv_update = update\n                    new_conv_name = name\n            else:\n                new_conv_id, new_conv_name, conv_update = None, None, gr.update()\n        else:\n            new_conv_id = conv_id\n            conv_update = gr.update()\n            new_conv_name = conv_name\n\n        return (\n            [\n                {},\n                chat_history,\n                new_conv_id,\n                conv_update,\n                new_conv_name,\n            ]\n            + selector_output\n            + [used_command]\n        )\n\n    def get_recommendations(self, first_selector_choices, file_ids):\n        first_selector_choices_map = {\n            item[1]: item[0] for item in first_selector_choices\n        }\n        file_names = [first_selector_choices_map[file_id] for file_id in file_ids]\n        if not file_names:\n            return \"\"\n\n        first_file_name = file_names[0].split(\".\")[0].replace(\"_\", \" \")\n        return get_recommended_papers(first_file_name)\n\n    def toggle_delete(self, conv_id):\n        if conv_id:\n            return gr.update(visible=False), gr.update(visible=True)\n        else:\n            return gr.update(visible=True), gr.update(visible=False)\n\n    def on_set_public_conversation(self, is_public, convo_id):\n        if not convo_id:\n            gr.Warning(\"No conversation selected\")\n            return\n\n        with Session(engine) as session:\n            statement = select(Conversation).where(Conversation.id == convo_id)\n\n            result = session.exec(statement).one()\n            name = result.name\n\n            if result.is_public != is_public:\n                # Only trigger updating when user\n                # select different value from the current\n                result.is_public = is_public\n                session.add(result)\n                session.commit()\n\n                gr.Info(\n                    f\"Conversation: {name} is {'public' if is_public else 'private'}.\"\n                )\n\n    def on_subscribe_public_events(self):\n        if self._app.f_user_management:\n            self._app.subscribe_event(\n                name=\"onSignIn\",\n                definition={\n                    \"fn\": self.chat_control.reload_conv,\n                    \"inputs\": [self._app.user_id],\n                    \"outputs\": [self.chat_control.conversation],\n                    \"show_progress\": \"hidden\",\n                },\n            )\n\n            self._app.subscribe_event(\n                name=\"onSignOut\",\n                definition={\n                    \"fn\": lambda: self.chat_control.select_conv(\"\", None),\n                    \"outputs\": [\n                        self.chat_control.conversation_id,\n                        self.chat_control.conversation,\n                        self.chat_control.conversation_rn,\n                        self.chat_panel.chatbot,\n                        self.followup_questions,\n                        self.info_panel,\n                        self.state_plot_panel,\n                        self.state_retrieval_history,\n                        self.state_plot_history,\n                        self.chat_control.cb_is_public,\n                        self.state_chat,\n                    ]\n                    + self._indices_input,\n                    \"show_progress\": \"hidden\",\n                },\n            )\n\n    def _on_app_created(self):\n        if KH_DEMO_MODE:\n            self._app.app.load(\n                fn=lambda x: x,\n                inputs=[self._user_api_key],\n                outputs=[self._user_api_key],\n                js=fetch_api_key_js,\n            ).then(\n                fn=self.chat_control.toggle_demo_login_visibility,\n                inputs=[self._user_api_key],\n                outputs=[\n                    self.chat_control.cb_suggest_chat,\n                    self.chat_control.btn_new,\n                    self.chat_control.btn_demo_logout,\n                    self.chat_control.btn_demo_login,\n                ],\n            ).then(\n                fn=None,\n                inputs=None,\n                js=chat_input_focus_js,\n            )\n\n    def persist_data_source(\n        self,\n        convo_id,\n        user_id,\n        retrieval_msg,\n        plot_data,\n        retrival_history,\n        plot_history,\n        messages,\n        state,\n        *selecteds,\n    ):\n        \"\"\"Update the data source\"\"\"\n        if not convo_id:\n            gr.Warning(\"No conversation selected\")\n            return\n\n        # if not regen, then append the new message\n        if not state[\"app\"].get(\"regen\", False):\n            retrival_history = retrival_history + [retrieval_msg]\n            plot_history = plot_history + [plot_data]\n        else:\n            if retrival_history:\n                print(\"Updating retrieval history (regen=True)\")\n                retrival_history[-1] = retrieval_msg\n                plot_history[-1] = plot_data\n\n        # reset regen state\n        state[\"app\"][\"regen\"] = False\n\n        selecteds_ = {}\n        for index in self._app.index_manager.indices:\n            if index.selector is None:\n                continue\n            if isinstance(index.selector, int):\n                selecteds_[str(index.id)] = selecteds[index.selector]\n            else:\n                selecteds_[str(index.id)] = [selecteds[i] for i in index.selector]\n\n        with Session(engine) as session:\n            statement = select(Conversation).where(Conversation.id == convo_id)\n            result = session.exec(statement).one()\n\n            data_source = result.data_source\n            old_selecteds = data_source.get(\"selected\", {})\n            is_owner = result.user == user_id\n\n            # Write down to db\n            result.data_source = {\n                \"selected\": selecteds_ if is_owner else old_selecteds,\n                \"messages\": messages,\n                \"retrieval_messages\": retrival_history,\n                \"plot_history\": plot_history,\n                \"state\": state,\n                \"likes\": deepcopy(data_source.get(\"likes\", [])),\n            }\n            session.add(result)\n            session.commit()\n\n        return retrival_history, plot_history\n\n    def reasoning_changed(self, reasoning_type):\n        if reasoning_type != DEFAULT_SETTING:\n            # override app settings state (temporary)\n            gr.Info(\"Reasoning type changed to `{}`\".format(reasoning_type))\n        return reasoning_type\n\n    def is_liked(self, convo_id, liked: gr.LikeData):\n        with Session(engine) as session:\n            statement = select(Conversation).where(Conversation.id == convo_id)\n            result = session.exec(statement).one()\n\n            data_source = deepcopy(result.data_source)\n            likes = data_source.get(\"likes\", [])\n            likes.append([liked.index, liked.value, liked.liked])\n            data_source[\"likes\"] = likes\n\n            result.data_source = data_source\n            session.add(result)\n            session.commit()\n\n    def message_selected(self, retrieval_history, plot_history, msg: gr.SelectData):\n        index = msg.index[0]\n        try:\n            retrieval_content, plot_content = (\n                retrieval_history[index],\n                plot_history[index],\n            )\n        except IndexError:\n            retrieval_content, plot_content = gr.update(), None\n\n        return retrieval_content, plot_content\n\n    def create_pipeline(\n        self,\n        settings: dict,\n        session_reasoning_type: str,\n        session_llm: str,\n        session_use_mindmap: bool | str,\n        session_use_citation: str,\n        session_language: str,\n        state: dict,\n        command_state: str | None,\n        user_id: int,\n        *selecteds,\n    ):\n        \"\"\"Create the pipeline from settings\n\n        Args:\n            settings: the settings of the app\n            state: the state of the app\n            selected: the list of file ids that will be served as context. If None, then\n                consider using all files\n\n        Returns:\n            - the pipeline objects\n        \"\"\"\n        # override reasoning_mode by temporary chat page state\n        print(\n            \"Session reasoning type\",\n            session_reasoning_type,\n            \"use mindmap\",\n            session_use_mindmap,\n            \"use citation\",\n            session_use_citation,\n            \"language\",\n            session_language,\n        )\n        print(\"Session LLM\", session_llm)\n        reasoning_mode = (\n            settings[\"reasoning.use\"]\n            if session_reasoning_type in (DEFAULT_SETTING, None)\n            else session_reasoning_type\n        )\n        reasoning_cls = reasonings[reasoning_mode]\n        print(\"Reasoning class\", reasoning_cls)\n        reasoning_id = reasoning_cls.get_info()[\"id\"]\n\n        settings = deepcopy(settings)\n        llm_setting_key = f\"reasoning.options.{reasoning_id}.llm\"\n        if llm_setting_key in settings and session_llm not in (\n            DEFAULT_SETTING,\n            None,\n            \"\",\n        ):\n            settings[llm_setting_key] = session_llm\n\n        if session_use_mindmap not in (DEFAULT_SETTING, None):\n            settings[\"reasoning.options.simple.create_mindmap\"] = session_use_mindmap\n\n        if session_use_citation not in (DEFAULT_SETTING, None):\n            settings[\n                \"reasoning.options.simple.highlight_citation\"\n            ] = session_use_citation\n\n        if session_language not in (DEFAULT_SETTING, None):\n            settings[\"reasoning.lang\"] = session_language\n\n        # get retrievers\n        retrievers = []\n\n        if command_state == WEB_SEARCH_COMMAND:\n            # set retriever for web search\n            if not WebSearch:\n                raise ValueError(\"Web search back-end is not available.\")\n\n            web_search = WebSearch()\n            retrievers.append(web_search)\n        else:\n            for index in self._app.index_manager.indices:\n                index_selected = []\n                if isinstance(index.selector, int):\n                    index_selected = selecteds[index.selector]\n                if isinstance(index.selector, tuple):\n                    for i in index.selector:\n                        index_selected.append(selecteds[i])\n                iretrievers = index.get_retriever_pipelines(\n                    settings, user_id, index_selected\n                )\n                retrievers += iretrievers\n\n        # prepare states\n        reasoning_state = {\n            \"app\": deepcopy(state[\"app\"]),\n            \"pipeline\": deepcopy(state.get(reasoning_id, {})),\n        }\n\n        pipeline = reasoning_cls.get_pipeline(settings, reasoning_state, retrievers)\n\n        return pipeline, reasoning_state\n\n    def chat_fn(\n        self,\n        conversation_id,\n        chat_history,\n        settings,\n        reasoning_type,\n        llm_type,\n        use_mind_map,\n        use_citation,\n        language,\n        chat_state,\n        command_state,\n        user_id,\n        *selecteds,\n    ):\n        \"\"\"Chat function\"\"\"\n        chat_input, chat_output = chat_history[-1]\n        chat_history = chat_history[:-1]\n\n        # if chat_input is empty, assume regen mode\n        if chat_output:\n            chat_state[\"app\"][\"regen\"] = True\n\n        queue: asyncio.Queue[Optional[dict]] = asyncio.Queue()\n\n        # construct the pipeline\n        pipeline, reasoning_state = self.create_pipeline(\n            settings,\n            reasoning_type,\n            llm_type,\n            use_mind_map,\n            use_citation,\n            language,\n            chat_state,\n            command_state,\n            user_id,\n            *selecteds,\n        )\n        print(\"Reasoning state\", reasoning_state)\n        pipeline.set_output_queue(queue)\n\n        text, refs, plot, plot_gr = \"\", \"\", None, gr.update(visible=False)\n        msg_placeholder = getattr(\n            flowsettings, \"KH_CHAT_MSG_PLACEHOLDER\", \"Thinking ...\"\n        )\n        print(msg_placeholder)\n        yield (\n            chat_history + [(chat_input, text or msg_placeholder)],\n            refs,\n            plot_gr,\n            plot,\n            chat_state,\n        )\n\n        try:\n            for response in pipeline.stream(chat_input, conversation_id, chat_history):\n\n                if not isinstance(response, Document):\n                    continue\n\n                if response.channel is None:\n                    continue\n\n                if response.channel == \"chat\":\n                    if response.content is None:\n                        text = \"\"\n                    else:\n                        text += response.content\n\n                if response.channel == \"info\":\n                    if response.content is None:\n                        refs = \"\"\n                    else:\n                        refs += response.content\n\n                if response.channel == \"plot\":\n                    plot = response.content\n                    plot_gr = self._json_to_plot(plot)\n\n                chat_state[pipeline.get_info()[\"id\"]] = reasoning_state[\"pipeline\"]\n\n                yield (\n                    chat_history + [(chat_input, text or msg_placeholder)],\n                    refs,\n                    plot_gr,\n                    plot,\n                    chat_state,\n                )\n        except ValueError as e:\n            print(e)\n\n        if not text:\n            empty_msg = getattr(\n                flowsettings, \"KH_CHAT_EMPTY_MSG_PLACEHOLDER\", \"(Sorry, I don't know)\"\n            )\n            print(f\"Generate nothing: {empty_msg}\")\n            yield (\n                chat_history + [(chat_input, text or empty_msg)],\n                refs,\n                plot_gr,\n                plot,\n                chat_state,\n            )\n\n    def check_and_suggest_name_conv(self, chat_history):\n        suggest_pipeline = SuggestConvNamePipeline()\n        new_name = gr.update()\n        renamed = False\n\n        # check if this is a newly created conversation\n        if len(chat_history) == 1:\n            suggested_name = suggest_pipeline(chat_history).text\n            suggested_name = strip_think_tag(suggested_name)\n            suggested_name = suggested_name.replace('\"', \"\").replace(\"'\", \"\")[:40]\n            new_name = gr.update(value=suggested_name)\n            renamed = True\n\n        return new_name, renamed\n\n    def suggest_chat_conv(\n        self,\n        settings,\n        session_language,\n        chat_history,\n        use_suggestion,\n    ):\n        target_language = (\n            session_language\n            if session_language not in (DEFAULT_SETTING, None)\n            else settings[\"reasoning.lang\"]\n        )\n        if use_suggestion:\n            suggest_pipeline = SuggestFollowupQuesPipeline()\n            suggest_pipeline.lang = SUPPORTED_LANGUAGE_MAP.get(\n                target_language, \"English\"\n            )\n            suggested_questions = [[each] for each in ChatSuggestion.CHAT_SAMPLES]\n\n            if len(chat_history) >= 1:\n                suggested_resp = suggest_pipeline(chat_history).text\n                if ques_res := re.search(\n                    r\"\\[(.*?)\\]\", re.sub(\"\\n\", \"\", suggested_resp)\n                ):\n                    ques_res_str = ques_res.group()\n                    try:\n                        suggested_questions = json.loads(ques_res_str)\n                        suggested_questions = [[x] for x in suggested_questions]\n                    except Exception:\n                        pass\n\n            return gr.update(visible=True), suggested_questions\n\n        return gr.update(visible=False), gr.update()\n"
  },
  {
    "path": "libs/ktem/ktem/pages/chat/chat_panel.py",
    "content": "import gradio as gr\nfrom ktem.app import BasePage\nfrom theflow.settings import settings as flowsettings\n\nKH_DEMO_MODE = getattr(flowsettings, \"KH_DEMO_MODE\", False)\n\nif not KH_DEMO_MODE:\n    PLACEHOLDER_TEXT = (\n        \"This is the beginning of a new conversation.\\n\"\n        \"Start by uploading a file or a web URL. \"\n        \"Visit Files tab for more options (e.g: GraphRAG).\"\n    )\nelse:\n    PLACEHOLDER_TEXT = (\n        \"Welcome to Kotaemon Demo. \"\n        \"Start by browsing preloaded conversations to get onboard.\\n\"\n        \"Check out Hint section for more tips.\"\n    )\n\n\nclass ChatPanel(BasePage):\n    def __init__(self, app):\n        self._app = app\n        self.on_building_ui()\n\n    def on_building_ui(self):\n        self.chatbot = gr.Chatbot(\n            label=self._app.app_name,\n            placeholder=PLACEHOLDER_TEXT,\n            show_label=False,\n            elem_id=\"main-chat-bot\",\n            show_copy_button=True,\n            likeable=True,\n            bubble_full_width=False,\n        )\n        with gr.Row():\n            self.text_input = gr.MultimodalTextbox(\n                interactive=True,\n                scale=20,\n                file_count=\"multiple\",\n                placeholder=(\n                    \"Type a message, search the @web, or tag a file with @filename\"\n                ),\n                container=False,\n                show_label=False,\n                elem_id=\"chat-input\",\n            )\n\n    def submit_msg(self, chat_input, chat_history):\n        \"\"\"Submit a message to the chatbot\"\"\"\n        return \"\", chat_history + [(chat_input, None)]\n"
  },
  {
    "path": "libs/ktem/ktem/pages/chat/chat_suggestion.py",
    "content": "import gradio as gr\nfrom ktem.app import BasePage\nfrom theflow.settings import settings as flowsettings\n\n\nclass ChatSuggestion(BasePage):\n    CHAT_SAMPLES = getattr(\n        flowsettings,\n        \"KH_FEATURE_CHAT_SUGGESTION_SAMPLES\",\n        [\n            \"Summary this document\",\n            \"Generate a FAQ for this document\",\n            \"Identify the main highlights in bullet points\",\n        ],\n    )\n\n    def __init__(self, app):\n        self._app = app\n        self.on_building_ui()\n\n    def on_building_ui(self):\n        self.chat_samples = [[each] for each in self.CHAT_SAMPLES]\n        with gr.Accordion(\n            label=\"Chat Suggestion\",\n            visible=getattr(flowsettings, \"KH_FEATURE_CHAT_SUGGESTION\", False),\n        ) as self.accordion:\n            self.default_example = gr.State(\n                value=self.chat_samples,\n            )\n            self.examples = gr.DataFrame(\n                value=self.chat_samples,\n                headers=[\"Next Question\"],\n                interactive=False,\n                elem_id=\"chat-suggestion\",\n                wrap=True,\n            )\n\n    def as_gradio_component(self):\n        return self.examples\n\n    def select_example(self, ev: gr.SelectData):\n        return {\"text\": ev.value}\n"
  },
  {
    "path": "libs/ktem/ktem/pages/chat/common.py",
    "content": "DEFAULT_APPLICATION_STATE = {\"regen\": False}\nSTATE = {\n    \"app\": DEFAULT_APPLICATION_STATE,\n}\n"
  },
  {
    "path": "libs/ktem/ktem/pages/chat/control.py",
    "content": "import logging\nimport os\nfrom copy import deepcopy\n\nimport gradio as gr\nfrom ktem.app import BasePage\nfrom ktem.db.models import Conversation, User, engine\nfrom sqlmodel import Session, or_, select\n\nimport flowsettings\n\nfrom ...utils.conversation import sync_retrieval_n_message\nfrom .chat_suggestion import ChatSuggestion\nfrom .common import STATE\n\nlogger = logging.getLogger(__name__)\n\nKH_DEMO_MODE = getattr(flowsettings, \"KH_DEMO_MODE\", False)\nKH_SSO_ENABLED = getattr(flowsettings, \"KH_SSO_ENABLED\", False)\nASSETS_DIR = \"assets/icons\"\nif not os.path.isdir(ASSETS_DIR):\n    ASSETS_DIR = \"libs/ktem/ktem/assets/icons\"\n\n\nlogout_js = \"\"\"\nfunction () {\n    removeFromStorage('google_api_key');\n    window.location.href = \"/logout\";\n}\n\"\"\"\n\n\ndef is_conv_name_valid(name):\n    \"\"\"Check if the conversation name is valid\"\"\"\n    errors = []\n    if len(name) == 0:\n        errors.append(\"Name cannot be empty\")\n    elif len(name) > 40:\n        errors.append(\"Name cannot be longer than 40 characters\")\n\n    return \"; \".join(errors)\n\n\nclass ConversationControl(BasePage):\n    \"\"\"Manage conversation\"\"\"\n\n    def __init__(self, app):\n        self._app = app\n        self.logout_js = logout_js\n        self.on_building_ui()\n\n    def on_building_ui(self):\n        with gr.Row():\n            title_text = \"Conversations\" if not KH_DEMO_MODE else \"Kotaemon Papers\"\n            gr.Markdown(\"## {}\".format(title_text))\n            self.btn_toggle_dark_mode = gr.Button(\n                value=\"\",\n                icon=f\"{ASSETS_DIR}/dark_mode.svg\",\n                scale=1,\n                size=\"sm\",\n                elem_classes=[\"no-background\", \"body-text-color\"],\n                elem_id=\"toggle-dark-button\",\n            )\n            self.btn_chat_expand = gr.Button(\n                value=\"\",\n                icon=f\"{ASSETS_DIR}/expand.svg\",\n                scale=1,\n                size=\"sm\",\n                elem_classes=[\"no-background\", \"body-text-color\"],\n                elem_id=\"chat-expand-button\",\n            )\n            self.btn_info_expand = gr.Button(\n                value=\"\",\n                icon=f\"{ASSETS_DIR}/expand.svg\",\n                min_width=2,\n                scale=1,\n                size=\"sm\",\n                elem_classes=[\"no-background\", \"body-text-color\"],\n                elem_id=\"info-expand-button\",\n            )\n\n            self.btn_toggle_dark_mode.click(\n                None,\n                js=\"\"\"\n                () => {\n                    document.body.classList.toggle('dark');\n                }\n                \"\"\",\n            )\n\n        self.conversation_id = gr.State(value=\"\")\n        self.conversation = gr.Dropdown(\n            label=\"Chat sessions\",\n            choices=[],\n            container=False,\n            filterable=True,\n            interactive=True,\n            elem_classes=[\"unset-overflow\"],\n            elem_id=\"conversation-dropdown\",\n        )\n\n        with gr.Row() as self._new_delete:\n            self.cb_suggest_chat = gr.Checkbox(\n                value=False,\n                label=\"Suggest chat\",\n                min_width=10,\n                scale=6,\n                elem_id=\"suggest-chat-checkbox\",\n                container=False,\n                visible=not KH_DEMO_MODE,\n            )\n            self.cb_is_public = gr.Checkbox(\n                value=False,\n                label=\"Share this conversation\",\n                elem_id=\"is-public-checkbox\",\n                container=False,\n                visible=not KH_DEMO_MODE and not KH_SSO_ENABLED,\n            )\n\n            if not KH_DEMO_MODE:\n                self.btn_conversation_rn = gr.Button(\n                    value=\"\",\n                    icon=f\"{ASSETS_DIR}/rename.svg\",\n                    min_width=2,\n                    scale=1,\n                    size=\"sm\",\n                    elem_classes=[\"no-background\", \"body-text-color\"],\n                )\n                self.btn_del = gr.Button(\n                    value=\"\",\n                    icon=f\"{ASSETS_DIR}/delete.svg\",\n                    min_width=2,\n                    scale=1,\n                    size=\"sm\",\n                    elem_classes=[\"no-background\", \"body-text-color\"],\n                )\n                self.btn_new = gr.Button(\n                    value=\"\",\n                    icon=f\"{ASSETS_DIR}/new.svg\",\n                    min_width=2,\n                    scale=1,\n                    size=\"sm\",\n                    elem_classes=[\"no-background\", \"body-text-color\"],\n                    elem_id=\"new-conv-button\",\n                )\n            else:\n                self.btn_new = gr.Button(\n                    value=\"New chat\",\n                    min_width=120,\n                    size=\"sm\",\n                    scale=1,\n                    variant=\"primary\",\n                    elem_id=\"new-conv-button\",\n                    visible=False,\n                )\n\n        if KH_DEMO_MODE:\n            with gr.Row():\n                self.btn_demo_login = gr.Button(\n                    \"Sign-in to create new chat\",\n                    min_width=120,\n                    size=\"sm\",\n                    scale=1,\n                    variant=\"primary\",\n                )\n                _js_redirect = \"\"\"\n                () => {\n                    url = '/login' + window.location.search;\n                    window.open(url, '_blank');\n                }\n                \"\"\"\n                self.btn_demo_login.click(None, js=_js_redirect)\n\n                self.btn_demo_logout = gr.Button(\n                    \"Sign-out\",\n                    min_width=120,\n                    size=\"sm\",\n                    scale=1,\n                    visible=False,\n                )\n\n        with gr.Row(visible=False) as self._delete_confirm:\n            self.btn_del_conf = gr.Button(\n                value=\"Delete\",\n                variant=\"stop\",\n                min_width=10,\n            )\n            self.btn_del_cnl = gr.Button(value=\"Cancel\", min_width=10)\n\n        with gr.Row():\n            self.conversation_rn = gr.Text(\n                label=\"(Enter) to save\",\n                placeholder=\"Conversation name\",\n                container=True,\n                scale=5,\n                min_width=10,\n                interactive=True,\n                visible=False,\n            )\n\n    def load_chat_history(self, user_id):\n        \"\"\"Reload chat history\"\"\"\n\n        # In case user are admin. They can also watch the\n        # public conversations\n        can_see_public: bool = False\n        with Session(engine) as session:\n            statement = select(User).where(User.id == user_id)\n            result = session.exec(statement).one_or_none()\n\n            if result is not None:\n                if flowsettings.KH_USER_CAN_SEE_PUBLIC:\n                    can_see_public = (\n                        result.username == flowsettings.KH_USER_CAN_SEE_PUBLIC\n                    )\n                else:\n                    can_see_public = True\n\n        print(f\"User-id: {user_id}, can see public conversations: {can_see_public}\")\n\n        options = []\n        with Session(engine) as session:\n            # Define condition based on admin-role:\n            # - can_see: can see their conversations & public files\n            # - can_not_see: only see their conversations\n            if can_see_public:\n                statement = (\n                    select(Conversation)\n                    .where(\n                        or_(\n                            Conversation.user == user_id,\n                            Conversation.is_public,\n                        )\n                    )\n                    .order_by(\n                        Conversation.is_public.desc(), Conversation.date_created.desc()\n                    )  # type: ignore\n                )\n            else:\n                statement = (\n                    select(Conversation)\n                    .where(Conversation.user == user_id)\n                    .order_by(Conversation.date_created.desc())  # type: ignore\n                )\n\n            results = session.exec(statement).all()\n            for result in results:\n                options.append((result.name, result.id))\n\n        return options\n\n    def reload_conv(self, user_id):\n        conv_list = self.load_chat_history(user_id)\n        if conv_list:\n            return gr.update(value=None, choices=conv_list)\n        else:\n            return gr.update(value=None, choices=[])\n\n    def new_conv(self, user_id):\n        \"\"\"Create new chat\"\"\"\n        if user_id is None:\n            gr.Warning(\"Please sign in first (Settings → User Settings)\")\n            return None, gr.update()\n        with Session(engine) as session:\n            new_conv = Conversation(user=user_id)\n            session.add(new_conv)\n            session.commit()\n\n            id_ = new_conv.id\n\n        history = self.load_chat_history(user_id)\n\n        return id_, gr.update(value=id_, choices=history)\n\n    def delete_conv(self, conversation_id, user_id):\n        \"\"\"Delete the selected conversation\"\"\"\n        if not conversation_id:\n            gr.Warning(\"No conversation selected.\")\n            return None, gr.update()\n\n        if user_id is None:\n            gr.Warning(\"Please sign in first (Settings → User Settings)\")\n            return None, gr.update()\n\n        with Session(engine) as session:\n            statement = select(Conversation).where(Conversation.id == conversation_id)\n            result = session.exec(statement).one()\n\n            session.delete(result)\n            session.commit()\n\n        history = self.load_chat_history(user_id)\n        if history:\n            id_ = history[0][1]\n            return id_, gr.update(value=id_, choices=history)\n        else:\n            return None, gr.update(value=None, choices=[])\n\n    def select_conv(self, conversation_id, user_id):\n        \"\"\"Select the conversation\"\"\"\n        default_chat_suggestions = [[each] for each in ChatSuggestion.CHAT_SAMPLES]\n\n        with Session(engine) as session:\n            statement = select(Conversation).where(Conversation.id == conversation_id)\n            try:\n                result = session.exec(statement).one()\n                id_ = result.id\n                name = result.name\n                is_conv_public = result.is_public\n\n                # disable file selection ids state if\n                # not the owner of the conversation\n                if user_id == result.user:\n                    selected = result.data_source.get(\"selected\", {})\n                else:\n                    selected = {}\n\n                chats = result.data_source.get(\"messages\", [])\n                chat_suggestions = result.data_source.get(\n                    \"chat_suggestions\", default_chat_suggestions\n                )\n\n                retrieval_history: list[str] = result.data_source.get(\n                    \"retrieval_messages\", []\n                )\n                plot_history: list[dict] = result.data_source.get(\"plot_history\", [])\n\n                # On initialization\n                # Ensure len of retrieval and messages are equal\n                retrieval_history = sync_retrieval_n_message(chats, retrieval_history)\n\n                info_panel = (\n                    retrieval_history[-1]\n                    if retrieval_history\n                    else \"<h5><b>No evidence found.</b></h5>\"\n                )\n                plot_data = plot_history[-1] if plot_history else None\n                state = result.data_source.get(\"state\", STATE)\n\n            except Exception as e:\n                logger.warning(e)\n                id_ = \"\"\n                name = \"\"\n                selected = {}\n                chats = []\n                chat_suggestions = default_chat_suggestions\n                retrieval_history = []\n                plot_history = []\n                info_panel = \"\"\n                plot_data = None\n                state = STATE\n                is_conv_public = False\n\n        indices = []\n        for index in self._app.index_manager.indices:\n            # assume that the index has selector\n            if index.selector is None:\n                continue\n            if isinstance(index.selector, int):\n                indices.append(selected.get(str(index.id), index.default_selector))\n            if isinstance(index.selector, tuple):\n                indices.extend(selected.get(str(index.id), index.default_selector))\n\n        return (\n            id_,\n            id_,\n            name,\n            chats,\n            chat_suggestions,\n            info_panel,\n            plot_data,\n            retrieval_history,\n            plot_history,\n            is_conv_public,\n            state,\n            *indices,\n        )\n\n    def rename_conv(self, conversation_id, new_name, is_renamed, user_id):\n        \"\"\"Rename the conversation\"\"\"\n        if not is_renamed or KH_DEMO_MODE or user_id is None or not conversation_id:\n            return (\n                gr.update(),\n                conversation_id,\n                gr.update(visible=False),\n            )\n\n        errors = is_conv_name_valid(new_name)\n        if errors:\n            gr.Warning(errors)\n            return (\n                gr.update(),\n                conversation_id,\n                gr.update(visible=False),\n            )\n\n        with Session(engine) as session:\n            statement = select(Conversation).where(Conversation.id == conversation_id)\n            result = session.exec(statement).one()\n            result.name = new_name\n            session.add(result)\n            session.commit()\n\n        history = self.load_chat_history(user_id)\n        gr.Info(\"Conversation renamed.\")\n        return (\n            gr.update(choices=history),\n            conversation_id,\n            gr.update(visible=False),\n        )\n\n    def persist_chat_suggestions(\n        self, conversation_id, new_suggestions, is_updated, user_id\n    ):\n        \"\"\"Update the conversation's chat suggestions\"\"\"\n        if not is_updated:\n            return\n\n        if user_id is None:\n            gr.Warning(\"Please sign in first (Settings → User Settings)\")\n            return gr.update(), \"\"\n\n        if not conversation_id:\n            gr.Warning(\"No conversation selected.\")\n            return gr.update(), \"\"\n\n        with Session(engine) as session:\n            statement = select(Conversation).where(Conversation.id == conversation_id)\n            result = session.exec(statement).one()\n\n            data_source = deepcopy(result.data_source)\n            data_source[\"chat_suggestions\"] = [\n                [x] for x in new_suggestions.iloc[:, 0].tolist()\n            ]\n\n            result.data_source = data_source\n            session.add(result)\n            session.commit()\n\n        gr.Info(\"Chat suggestions updated.\")\n\n    def toggle_demo_login_visibility(self, user_api_key, request: gr.Request):\n        try:\n            import gradiologin as grlogin\n\n            user = grlogin.get_user(request)\n        except (ImportError, AssertionError):\n            user = None\n\n        if user:  # or user_api_key:\n            return [\n                gr.update(visible=True),\n                gr.update(visible=True),\n                gr.update(visible=True),\n                gr.update(visible=False),\n            ]\n        else:\n            return [\n                gr.update(visible=False),\n                gr.update(visible=False),\n                gr.update(visible=False),\n                gr.update(visible=True),\n            ]\n\n    def _on_app_created(self):\n        \"\"\"Reload the conversation once the app is created\"\"\"\n        self._app.app.load(\n            self.reload_conv,\n            inputs=[self._app.user_id],\n            outputs=[self.conversation],\n        )\n"
  },
  {
    "path": "libs/ktem/ktem/pages/chat/demo_hint.py",
    "content": "from textwrap import dedent\n\nimport gradio as gr\nfrom ktem.app import BasePage\n\n\nclass HintPage(BasePage):\n    def __init__(self, app):\n        self._app = app\n        self.on_building_ui()\n\n    def on_building_ui(self):\n        with gr.Accordion(label=\"Hint\", open=False):\n            gr.Markdown(\n                dedent(\n                    \"\"\"\n                - You can select any text from the chat answer to **highlight relevant citation(s)** on the right panel.\n                - **Citations** can be viewed on both PDF viewer and raw text.\n                - You can tweak the citation format and use advance (CoT) reasoning in **Chat settings** menu.\n                - Want to **explore more**? Check out the **Help** section to create your private space.\n            \"\"\"  # noqa\n                )\n            )\n"
  },
  {
    "path": "libs/ktem/ktem/pages/chat/paper_list.py",
    "content": "import gradio as gr\nfrom ktem.app import BasePage\nfrom pandas import DataFrame\n\nfrom ...utils.hf_papers import fetch_papers\n\n\nclass PaperListPage(BasePage):\n    def __init__(self, app):\n        self._app = app\n        self.on_building_ui()\n\n    def on_building_ui(self):\n        self.papers_state = gr.State(None)\n        with gr.Accordion(\n            label=\"Browse popular daily papers\",\n            open=True,\n        ) as self.accordion:\n            self.examples = gr.DataFrame(\n                value=[],\n                headers=[\"title\", \"url\", \"upvotes\"],\n                column_widths=[60, 30, 10],\n                interactive=False,\n                elem_id=\"paper-suggestion\",\n                wrap=True,\n            )\n        return self.examples\n\n    def load(self):\n        papers = fetch_papers(top_n=5)\n        papers_df = DataFrame(papers)\n        return (papers_df, papers)\n\n    def _on_app_created(self):\n        self._app.app.load(\n            self.load,\n            outputs=[self.examples, self.papers_state],\n        )\n\n    def select_example(self, state, ev: gr.SelectData):\n        return state[ev.index[0]][\"url\"]\n"
  },
  {
    "path": "libs/ktem/ktem/pages/chat/report.py",
    "content": "from typing import Optional\n\nimport gradio as gr\nfrom ktem.app import BasePage\nfrom ktem.db.models import IssueReport, engine\nfrom sqlmodel import Session\n\n\nclass ReportIssue(BasePage):\n    def __init__(self, app):\n        self._app = app\n        self.on_building_ui()\n\n    def on_building_ui(self):\n        with gr.Accordion(label=\"Feedback\", open=False, elem_id=\"report-accordion\"):\n            self.correctness = gr.Radio(\n                choices=[\n                    (\"The answer is correct\", \"correct\"),\n                    (\"The answer is incorrect\", \"incorrect\"),\n                ],\n                label=\"Correctness:\",\n            )\n            self.issues = gr.CheckboxGroup(\n                choices=[\n                    (\"The answer is offensive\", \"offensive\"),\n                    (\"The evidence is incorrect\", \"wrong-evidence\"),\n                ],\n                label=\"Other issue:\",\n            )\n            self.more_detail = gr.Textbox(\n                placeholder=(\n                    \"More detail (e.g. how wrong is it, what is the \"\n                    \"correct answer, etc...)\"\n                ),\n                container=False,\n                lines=3,\n            )\n            gr.Markdown(\n                \"This will send the current chat and the user settings to \"\n                \"help with investigation\"\n            )\n            self.report_btn = gr.Button(\"Report\")\n\n    def report(\n        self,\n        correctness: str,\n        issues: list[str],\n        more_detail: str,\n        conv_id: str,\n        chat_history: list,\n        settings: dict,\n        user_id: Optional[int],\n        info_panel: str,\n        chat_state: dict,\n        *selecteds,\n    ):\n        selecteds_ = {}\n        for index in self._app.index_manager.indices:\n            if index.selector is not None:\n                if isinstance(index.selector, int):\n                    selecteds_[str(index.id)] = selecteds[index.selector]\n                elif isinstance(index.selector, tuple):\n                    selecteds_[str(index.id)] = [selecteds[_] for _ in index.selector]\n                else:\n                    print(f\"Unknown selector type: {index.selector}\")\n\n        with Session(engine) as session:\n            issue = IssueReport(\n                issues={\n                    \"correctness\": correctness,\n                    \"issues\": issues,\n                    \"more_detail\": more_detail,\n                },\n                chat={\n                    \"conv_id\": conv_id,\n                    \"chat_history\": chat_history,\n                    \"info_panel\": info_panel,\n                    \"chat_state\": chat_state,\n                    \"selecteds\": selecteds_,\n                },\n                settings=settings,\n                user=user_id,\n            )\n            session.add(issue)\n            session.commit()\n        gr.Info(\"Thank you for your feedback\")\n"
  },
  {
    "path": "libs/ktem/ktem/pages/help.py",
    "content": "from importlib.metadata import version\nfrom pathlib import Path\n\nimport gradio as gr\nimport requests\nfrom decouple import config\nfrom theflow.settings import settings\n\nKH_DEMO_MODE = getattr(settings, \"KH_DEMO_MODE\", False)\nHF_SPACE_URL = config(\"HF_SPACE_URL\", default=\"\")\n\n\ndef get_remote_doc(url: str) -> str:\n    try:\n        res = requests.get(url)\n        res.raise_for_status()\n        return res.text\n    except Exception as e:\n        print(f\"Failed to fetch document from {url}: {e}\")\n        return \"\"\n\n\ndef download_changelogs(release_url: str) -> str:\n    try:\n        res = requests.get(release_url).json()\n        changelogs = res.get(\"body\", \"\")\n\n        return changelogs\n    except Exception as e:\n        print(f\"Failed to fetch changelogs from {release_url}: {e}\")\n        return \"\"\n\n\nclass HelpPage:\n    def __init__(\n        self,\n        app,\n        doc_dir: str = settings.KH_DOC_DIR,\n        remote_content_url: str = \"https://raw.githubusercontent.com/Cinnamon/kotaemon\",\n        app_version: str | None = settings.KH_APP_VERSION,\n        changelogs_cache_dir: str\n        | Path = (Path(settings.KH_APP_DATA_DIR) / \"changelogs\"),\n    ):\n        self._app = app\n        self.doc_dir = Path(doc_dir)\n        self.remote_content_url = remote_content_url\n        self.app_version = app_version\n        self.changelogs_cache_dir = Path(changelogs_cache_dir)\n\n        self.changelogs_cache_dir.mkdir(parents=True, exist_ok=True)\n\n        about_md_dir = self.doc_dir / \"about.md\"\n        if about_md_dir.exists():\n            with (self.doc_dir / \"about.md\").open(encoding=\"utf-8\") as fi:\n                about_md = fi.read()\n        else:  # fetch from remote\n            about_md = get_remote_doc(\n                f\"{self.remote_content_url}/v{self.app_version}/docs/about.md\"\n            )\n        if about_md:\n            with gr.Accordion(\"About\"):\n                if self.app_version:\n                    about_md = f\"Version: {self.app_version}\\n\\n{about_md}\"\n                gr.Markdown(about_md)\n\n        if KH_DEMO_MODE:\n            with gr.Accordion(\"Create Your Own Space\"):\n                gr.Markdown(\n                    \"This is a demo with limited functionality. \"\n                    \"Use **Create space** button to install Kotaemon \"\n                    \"in your own space with all features \"\n                    \"(including upload and manage your private \"\n                    \"documents securely).\"\n                )\n                gr.Button(\n                    value=\"Create Your Own Space\",\n                    link=HF_SPACE_URL,\n                    variant=\"primary\",\n                    size=\"lg\",\n                )\n\n        user_guide_md_dir = self.doc_dir / \"usage.md\"\n        if user_guide_md_dir.exists():\n            with (self.doc_dir / \"usage.md\").open(encoding=\"utf-8\") as fi:\n                user_guide_md = fi.read()\n        else:  # fetch from remote\n            user_guide_md = get_remote_doc(\n                f\"{self.remote_content_url}/v{self.app_version}/docs/usage.md\"\n            )\n        if user_guide_md:\n            with gr.Accordion(\"User Guide\", open=not KH_DEMO_MODE):\n                gr.Markdown(user_guide_md)\n\n        if self.app_version:\n            # try retrieve from cache\n            changelogs = \"\"\n\n            if (self.changelogs_cache_dir / f\"{version}.md\").exists():\n                with open(self.changelogs_cache_dir / f\"{version}.md\", \"r\") as fi:\n                    changelogs = fi.read()\n            else:\n                release_url_base = (\n                    \"https://api.github.com/repos/Cinnamon/kotaemon/releases\"\n                )\n                changelogs = download_changelogs(\n                    release_url=f\"{release_url_base}/tags/v{self.app_version}\"\n                )\n\n                # cache the changelogs\n                if not self.changelogs_cache_dir.exists():\n                    self.changelogs_cache_dir.mkdir(parents=True, exist_ok=True)\n                with open(\n                    self.changelogs_cache_dir / f\"{self.app_version}.md\", \"w\"\n                ) as fi:\n                    fi.write(changelogs)\n\n            if changelogs:\n                with gr.Accordion(f\"Changelogs (v{self.app_version})\"):\n                    gr.Markdown(changelogs)\n"
  },
  {
    "path": "libs/ktem/ktem/pages/login.py",
    "content": "import hashlib\n\nimport gradio as gr\nfrom ktem.app import BasePage\nfrom ktem.db.models import User, engine\nfrom ktem.pages.resources.user import create_user\nfrom sqlmodel import Session, select\n\nfetch_creds = \"\"\"\nfunction() {\n    const username = getStorage('username', '')\n    const password = getStorage('password', '')\n    return [username, password, null];\n}\n\"\"\"\n\nsignin_js = \"\"\"\nfunction(usn, pwd) {\n    setStorage('username', usn);\n    setStorage('password', pwd);\n    return [usn, pwd];\n}\n\"\"\"\n\n\nclass LoginPage(BasePage):\n\n    public_events = [\"onSignIn\"]\n\n    def __init__(self, app):\n        self._app = app\n        self.on_building_ui()\n\n    def on_building_ui(self):\n        gr.Markdown(f\"# Welcome to {self._app.app_name}!\")\n        self.usn = gr.Textbox(label=\"Username\", visible=False)\n        self.pwd = gr.Textbox(label=\"Password\", type=\"password\", visible=False)\n        self.btn_login = gr.Button(\"Login\", visible=False)\n\n    def on_register_events(self):\n        onSignIn = gr.on(\n            triggers=[self.btn_login.click, self.pwd.submit],\n            fn=self.login,\n            inputs=[self.usn, self.pwd],\n            outputs=[self._app.user_id, self.usn, self.pwd],\n            show_progress=\"hidden\",\n            js=signin_js,\n        ).then(\n            self.toggle_login_visibility,\n            inputs=[self._app.user_id],\n            outputs=[self.usn, self.pwd, self.btn_login],\n        )\n        for event in self._app.get_event(\"onSignIn\"):\n            onSignIn = onSignIn.success(**event)\n\n    def toggle_login_visibility(self, user_id):\n        return (\n            gr.update(visible=user_id is None),\n            gr.update(visible=user_id is None),\n            gr.update(visible=user_id is None),\n        )\n\n    def _on_app_created(self):\n        onSignIn = self._app.app.load(\n            self.login,\n            inputs=[self.usn, self.pwd],\n            outputs=[self._app.user_id, self.usn, self.pwd],\n            show_progress=\"hidden\",\n            js=fetch_creds,\n        ).then(\n            self.toggle_login_visibility,\n            inputs=[self._app.user_id],\n            outputs=[self.usn, self.pwd, self.btn_login],\n        )\n        for event in self._app.get_event(\"onSignIn\"):\n            onSignIn = onSignIn.success(**event)\n\n    def on_subscribe_public_events(self):\n        self._app.subscribe_event(\n            name=\"onSignOut\",\n            definition={\n                \"fn\": self.toggle_login_visibility,\n                \"inputs\": [self._app.user_id],\n                \"outputs\": [self.usn, self.pwd, self.btn_login],\n                \"show_progress\": \"hidden\",\n            },\n        )\n\n    def login(self, usn, pwd, request: gr.Request):\n        try:\n            import gradiologin as grlogin\n\n            user = grlogin.get_user(request)\n        except (ImportError, AssertionError):\n            user = None\n\n        if user:\n            user_id = user[\"sub\"]\n            with Session(engine) as session:\n                stmt = select(User).where(\n                    User.id == user_id,\n                )\n                result = session.exec(stmt).all()\n\n            if result:\n                print(\"Existing user:\", user)\n                return user_id, \"\", \"\"\n            else:\n                print(\"Creating new user:\", user)\n                create_user(\n                    usn=user[\"email\"],\n                    pwd=\"\",\n                    user_id=user_id,\n                    is_admin=False,\n                )\n                return user_id, \"\", \"\"\n        else:\n            if not usn or not pwd:\n                return None, usn, pwd\n\n            hashed_password = hashlib.sha256(pwd.encode()).hexdigest()\n            with Session(engine) as session:\n                stmt = select(User).where(\n                    User.username_lower == usn.lower().strip(),\n                    User.password == hashed_password,\n                )\n                result = session.exec(stmt).all()\n                if result:\n                    return result[0].id, \"\", \"\"\n\n                gr.Warning(\"Invalid username or password\")\n                return None, usn, pwd\n"
  },
  {
    "path": "libs/ktem/ktem/pages/resources/__init__.py",
    "content": "import gradio as gr\nfrom ktem.app import BasePage\nfrom ktem.db.models import User, engine\nfrom ktem.embeddings.ui import EmbeddingManagement\nfrom ktem.index.ui import IndexManagement\nfrom ktem.llms.ui import LLMManagement\nfrom ktem.mcp.ui import MCPManagement\nfrom ktem.rerankings.ui import RerankingManagement\nfrom sqlmodel import Session, select\n\nfrom .user import UserManagement\n\n\nclass ResourcesTab(BasePage):\n    def __init__(self, app):\n        self._app = app\n        self.on_building_ui()\n\n    def on_building_ui(self):\n        with gr.Tab(\"Index Collections\") as self.index_management_tab:\n            self.index_management = IndexManagement(self._app)\n\n        with gr.Tab(\"LLMs\") as self.llm_management_tab:\n            self.llm_management = LLMManagement(self._app)\n\n        with gr.Tab(\"Embeddings\") as self.emb_management_tab:\n            self.emb_management = EmbeddingManagement(self._app)\n\n        with gr.Tab(\"Rerankings\") as self.rerank_management_tab:\n            self.rerank_management = RerankingManagement(self._app)\n\n        with gr.Tab(\"MCP Servers\") as self.mcp_management_tab:\n            self.mcp_management = MCPManagement(self._app)\n\n        if self._app.f_user_management:\n            with gr.Tab(\"Users\", visible=False) as self.user_management_tab:\n                self.user_management = UserManagement(self._app)\n\n    def on_subscribe_public_events(self):\n        if self._app.f_user_management:\n            self._app.subscribe_event(\n                name=\"onSignIn\",\n                definition={\n                    \"fn\": self.toggle_user_management,\n                    \"inputs\": [self._app.user_id],\n                    \"outputs\": [self.user_management_tab],\n                    \"show_progress\": \"hidden\",\n                },\n            )\n\n            self._app.subscribe_event(\n                name=\"onSignOut\",\n                definition={\n                    \"fn\": self.toggle_user_management,\n                    \"inputs\": [self._app.user_id],\n                    \"outputs\": [self.user_management_tab],\n                    \"show_progress\": \"hidden\",\n                },\n            )\n\n    def toggle_user_management(self, user_id):\n        \"\"\"Show/hide the user management, depending on the user's role\"\"\"\n        with Session(engine) as session:\n            user = session.exec(select(User).where(User.id == user_id)).first()\n            if user and user.admin:\n                return gr.update(visible=True)\n\n            return gr.update(visible=False)\n"
  },
  {
    "path": "libs/ktem/ktem/pages/resources/user.py",
    "content": "import hashlib\n\nimport gradio as gr\nimport pandas as pd\nfrom ktem.app import BasePage\nfrom ktem.db.models import User, engine\nfrom sqlmodel import Session, select\nfrom theflow.settings import settings as flowsettings\n\nUSERNAME_RULE = \"\"\"**Username rule:**\n\n- Username is case-insensitive\n- Username must be at least 3 characters long\n- Username must be at most 32 characters long\n- Username must contain only alphanumeric characters and underscores\n\"\"\"\n\n\nPASSWORD_RULE = \"\"\"**Password rule:**\n\n- Password must be at least 8 characters long\n- Password must contain at least one uppercase letter\n- Password must contain at least one lowercase letter\n- Password must contain at least one digit\n- Password must contain at least one special character from the following:\n    ^ $ * . [ ] { } ( ) ? - \" ! @ # % & / \\\\ , > < ' : ; | _ ~  + =\n\"\"\"\n\n\ndef validate_username(usn):\n    \"\"\"Validate that whether username is valid\n\n    Args:\n        usn (str): Username\n    \"\"\"\n    errors = []\n    if len(usn) < 3:\n        errors.append(\"Username must be at least 3 characters long\")\n\n    if len(usn) > 32:\n        errors.append(\"Username must be at most 32 characters long\")\n\n    if not usn.replace(\"_\", \"\").isalnum():\n        errors.append(\n            \"Username must contain only alphanumeric characters and underscores\"\n        )\n\n    return \"; \".join(errors)\n\n\ndef validate_password(pwd, pwd_cnf):\n    \"\"\"Validate that whether password is valid\n\n    - Password must be at least 8 characters long\n    - Password must contain at least one uppercase letter\n    - Password must contain at least one lowercase letter\n    - Password must contain at least one digit\n    - Password must contain at least one special character from the following:\n        ^ $ * . [ ] { } ( ) ? - \" ! @ # % & / \\\\ , > < ' : ; | _ ~  + =\n\n    Args:\n        pwd (str): Password\n        pwd_cnf (str): Confirm password\n\n    Returns:\n        str: Error message if password is not valid\n    \"\"\"\n    errors = []\n    if pwd != pwd_cnf:\n        errors.append(\"Password does not match\")\n\n    if len(pwd) < 8:\n        errors.append(\"Password must be at least 8 characters long\")\n\n    if not any(c.isupper() for c in pwd):\n        errors.append(\"Password must contain at least one uppercase letter\")\n\n    if not any(c.islower() for c in pwd):\n        errors.append(\"Password must contain at least one lowercase letter\")\n\n    if not any(c.isdigit() for c in pwd):\n        errors.append(\"Password must contain at least one digit\")\n\n    special_chars = \"^$*.[]{}()?-\\\"!@#%&/\\\\,><':;|_~+=\"\n    if not any(c in special_chars for c in pwd):\n        errors.append(\n            \"Password must contain at least one special character from the \"\n            f\"following: {special_chars}\"\n        )\n\n    if errors:\n        return \"; \".join(errors)\n\n    return \"\"\n\n\ndef create_user(usn, pwd, user_id=None, is_admin=True) -> bool:\n    with Session(engine) as session:\n        statement = select(User).where(User.username_lower == usn.lower())\n        result = session.exec(statement).all()\n        if result:\n            print(f'User \"{usn}\" already exists')\n            return False\n\n        else:\n            hashed_password = hashlib.sha256(pwd.encode()).hexdigest()\n            user = User(\n                id=user_id,\n                username=usn,\n                username_lower=usn.lower(),\n                password=hashed_password,\n                admin=is_admin,\n            )\n            session.add(user)\n            session.commit()\n\n            return True\n\n\nclass UserManagement(BasePage):\n    def __init__(self, app):\n        self._app = app\n\n        self.on_building_ui()\n        if hasattr(flowsettings, \"KH_FEATURE_USER_MANAGEMENT_ADMIN\") and hasattr(\n            flowsettings, \"KH_FEATURE_USER_MANAGEMENT_PASSWORD\"\n        ):\n            usn = flowsettings.KH_FEATURE_USER_MANAGEMENT_ADMIN\n            pwd = flowsettings.KH_FEATURE_USER_MANAGEMENT_PASSWORD\n\n            is_created = create_user(usn, pwd)\n            if is_created:\n                gr.Info(f'User \"{usn}\" created successfully')\n\n    def on_building_ui(self):\n        with gr.Tab(label=\"User list\"):\n            self.state_user_list = gr.State(value=None)\n            self.user_list = gr.DataFrame(\n                headers=[\"id\", \"name\", \"admin\"],\n                column_widths=[0, 50, 50],\n                interactive=False,\n            )\n\n            with gr.Group(visible=False) as self._selected_panel:\n                self.selected_user_id = gr.State(value=-1)\n                self.usn_edit = gr.Textbox(label=\"Username\")\n                with gr.Row():\n                    self.pwd_edit = gr.Textbox(label=\"Change password\", type=\"password\")\n                    self.pwd_cnf_edit = gr.Textbox(\n                        label=\"Confirm change password\",\n                        type=\"password\",\n                    )\n                self.admin_edit = gr.Checkbox(label=\"Admin\")\n\n            with gr.Row(visible=False) as self._selected_panel_btn:\n                with gr.Column():\n                    self.btn_edit_save = gr.Button(\"Save\")\n                with gr.Column():\n                    self.btn_delete = gr.Button(\"Delete\")\n                    with gr.Row():\n                        self.btn_delete_yes = gr.Button(\n                            \"Confirm delete\", variant=\"primary\", visible=False\n                        )\n                        self.btn_delete_no = gr.Button(\"Cancel\", visible=False)\n                with gr.Column():\n                    self.btn_close = gr.Button(\"Close\")\n\n        with gr.Tab(label=\"Create user\"):\n            self.usn_new = gr.Textbox(label=\"Username\", interactive=True)\n            self.pwd_new = gr.Textbox(\n                label=\"Password\", type=\"password\", interactive=True\n            )\n            self.pwd_cnf_new = gr.Textbox(\n                label=\"Confirm password\", type=\"password\", interactive=True\n            )\n            with gr.Row():\n                gr.Markdown(USERNAME_RULE)\n                gr.Markdown(PASSWORD_RULE)\n            self.btn_new = gr.Button(\"Create user\")\n\n    def on_register_events(self):\n        self.btn_new.click(\n            self.create_user,\n            inputs=[self.usn_new, self.pwd_new, self.pwd_cnf_new],\n            outputs=[self.usn_new, self.pwd_new, self.pwd_cnf_new],\n        ).then(\n            self.list_users,\n            inputs=self._app.user_id,\n            outputs=[self.state_user_list, self.user_list],\n        )\n        self.user_list.select(\n            self.select_user,\n            inputs=self.user_list,\n            outputs=[self.selected_user_id],\n            show_progress=\"hidden\",\n        )\n        self.selected_user_id.change(\n            self.on_selected_user_change,\n            inputs=[self.selected_user_id],\n            outputs=[\n                self._selected_panel,\n                self._selected_panel_btn,\n                # delete section\n                self.btn_delete,\n                self.btn_delete_yes,\n                self.btn_delete_no,\n                # edit section\n                self.usn_edit,\n                self.pwd_edit,\n                self.pwd_cnf_edit,\n                self.admin_edit,\n            ],\n            show_progress=\"hidden\",\n        )\n        self.btn_delete.click(\n            self.on_btn_delete_click,\n            inputs=[self.selected_user_id],\n            outputs=[self.btn_delete, self.btn_delete_yes, self.btn_delete_no],\n            show_progress=\"hidden\",\n        )\n        self.btn_delete_yes.click(\n            self.delete_user,\n            inputs=[self._app.user_id, self.selected_user_id],\n            outputs=[self.selected_user_id],\n            show_progress=\"hidden\",\n        ).then(\n            self.list_users,\n            inputs=self._app.user_id,\n            outputs=[self.state_user_list, self.user_list],\n        )\n        self.btn_delete_no.click(\n            lambda: (\n                gr.update(visible=True),\n                gr.update(visible=False),\n                gr.update(visible=False),\n            ),\n            inputs=[],\n            outputs=[self.btn_delete, self.btn_delete_yes, self.btn_delete_no],\n            show_progress=\"hidden\",\n        )\n        self.btn_edit_save.click(\n            self.save_user,\n            inputs=[\n                self.selected_user_id,\n                self.usn_edit,\n                self.pwd_edit,\n                self.pwd_cnf_edit,\n                self.admin_edit,\n            ],\n            outputs=[self.pwd_edit, self.pwd_cnf_edit],\n            show_progress=\"hidden\",\n        ).then(\n            self.list_users,\n            inputs=self._app.user_id,\n            outputs=[self.state_user_list, self.user_list],\n        )\n        self.btn_close.click(\n            lambda: -1,\n            outputs=[self.selected_user_id],\n        )\n\n    def on_subscribe_public_events(self):\n        self._app.subscribe_event(\n            name=\"onSignIn\",\n            definition={\n                \"fn\": self.list_users,\n                \"inputs\": [self._app.user_id],\n                \"outputs\": [self.state_user_list, self.user_list],\n            },\n        )\n        self._app.subscribe_event(\n            name=\"onSignOut\",\n            definition={\n                \"fn\": lambda: (\"\", \"\", \"\", None, None, -1),\n                \"outputs\": [\n                    self.usn_new,\n                    self.pwd_new,\n                    self.pwd_cnf_new,\n                    self.state_user_list,\n                    self.user_list,\n                    self.selected_user_id,\n                ],\n            },\n        )\n\n    def create_user(self, usn, pwd, pwd_cnf):\n        errors = validate_username(usn)\n        if errors:\n            gr.Warning(errors)\n            return usn, pwd, pwd_cnf\n\n        errors = validate_password(pwd, pwd_cnf)\n        print(errors)\n        if errors:\n            gr.Warning(errors)\n            return usn, pwd, pwd_cnf\n\n        with Session(engine) as session:\n            statement = select(User).where(User.username_lower == usn.lower())\n            result = session.exec(statement).all()\n            if result:\n                gr.Warning(f'Username \"{usn}\" already exists')\n                return\n\n            hashed_password = hashlib.sha256(pwd.encode()).hexdigest()\n            user = User(\n                username=usn, username_lower=usn.lower(), password=hashed_password\n            )\n            session.add(user)\n            session.commit()\n            gr.Info(f'User \"{usn}\" created successfully')\n\n        return \"\", \"\", \"\"\n\n    def list_users(self, user_id):\n        if user_id is None:\n            return [], pd.DataFrame.from_records(\n                [{\"id\": \"-\", \"username\": \"-\", \"admin\": \"-\"}]\n            )\n\n        with Session(engine) as session:\n            statement = select(User).where(User.id == user_id)\n            user = session.exec(statement).one()\n            if not user.admin:\n                return [], pd.DataFrame.from_records(\n                    [{\"id\": \"-\", \"username\": \"-\", \"admin\": \"-\"}]\n                )\n\n            statement = select(User)\n            results = [\n                {\"id\": user.id, \"username\": user.username, \"admin\": user.admin}\n                for user in session.exec(statement).all()\n            ]\n            if results:\n                user_list = pd.DataFrame.from_records(results)\n            else:\n                user_list = pd.DataFrame.from_records(\n                    [{\"id\": \"-\", \"username\": \"-\", \"admin\": \"-\"}]\n                )\n\n        return results, user_list\n\n    def select_user(self, user_list, ev: gr.SelectData):\n        if ev.value == \"-\" and ev.index[0] == 0:\n            gr.Info(\"No user is loaded. Please refresh the user list\")\n            return -1\n\n        if not ev.selected:\n            return -1\n\n        return user_list[\"id\"][ev.index[0]]\n\n    def on_selected_user_change(self, selected_user_id):\n        if selected_user_id == -1:\n            _selected_panel = gr.update(visible=False)\n            _selected_panel_btn = gr.update(visible=False)\n            btn_delete = gr.update(visible=True)\n            btn_delete_yes = gr.update(visible=False)\n            btn_delete_no = gr.update(visible=False)\n            usn_edit = gr.update(value=\"\")\n            pwd_edit = gr.update(value=\"\")\n            pwd_cnf_edit = gr.update(value=\"\")\n            admin_edit = gr.update(value=False)\n        else:\n            _selected_panel = gr.update(visible=True)\n            _selected_panel_btn = gr.update(visible=True)\n            btn_delete = gr.update(visible=True)\n            btn_delete_yes = gr.update(visible=False)\n            btn_delete_no = gr.update(visible=False)\n\n            with Session(engine) as session:\n                statement = select(User).where(User.id == selected_user_id)\n                user = session.exec(statement).one()\n\n            usn_edit = gr.update(value=user.username)\n            pwd_edit = gr.update(value=\"\")\n            pwd_cnf_edit = gr.update(value=\"\")\n            admin_edit = gr.update(value=user.admin)\n\n        return (\n            _selected_panel,\n            _selected_panel_btn,\n            btn_delete,\n            btn_delete_yes,\n            btn_delete_no,\n            usn_edit,\n            pwd_edit,\n            pwd_cnf_edit,\n            admin_edit,\n        )\n\n    def on_btn_delete_click(self, selected_user_id):\n        if selected_user_id is None:\n            gr.Warning(\"No user is selected\")\n            btn_delete = gr.update(visible=True)\n            btn_delete_yes = gr.update(visible=False)\n            btn_delete_no = gr.update(visible=False)\n            return\n\n        btn_delete = gr.update(visible=False)\n        btn_delete_yes = gr.update(visible=True)\n        btn_delete_no = gr.update(visible=True)\n\n        return btn_delete, btn_delete_yes, btn_delete_no\n\n    def save_user(self, selected_user_id, usn, pwd, pwd_cnf, admin):\n        errors = validate_username(usn)\n        if errors:\n            gr.Warning(errors)\n            return pwd, pwd_cnf\n\n        if pwd:\n            errors = validate_password(pwd, pwd_cnf)\n            if errors:\n                gr.Warning(errors)\n                return pwd, pwd_cnf\n\n        with Session(engine) as session:\n            # Check username uniqueness (excluding current user)\n            statement = select(User).where(\n                User.username_lower == usn.lower(),\n                User.id != selected_user_id,\n            )\n            existing = session.exec(statement).first()\n            if existing:\n                gr.Warning(\n                    f'Username \"{usn}\" already exists. Please use a unique name.'\n                )\n                return pwd, pwd_cnf\n\n            statement = select(User).where(User.id == selected_user_id)\n            user = session.exec(statement).one()\n            user.username = usn\n            user.username_lower = usn.lower()\n            user.admin = admin\n            if pwd:\n                user.password = hashlib.sha256(pwd.encode()).hexdigest()\n            session.commit()\n            gr.Info(f'User \"{usn}\" updated successfully')\n\n        return \"\", \"\"\n\n    def delete_user(self, current_user, selected_user_id):\n        if current_user == selected_user_id:\n            gr.Warning(\"You cannot delete yourself\")\n            return selected_user_id\n\n        with Session(engine) as session:\n            statement = select(User).where(User.id == selected_user_id)\n            user = session.exec(statement).one()\n            session.delete(user)\n            session.commit()\n            gr.Info(f'User \"{user.username}\" deleted successfully')\n        return -1\n"
  },
  {
    "path": "libs/ktem/ktem/pages/settings.py",
    "content": "import hashlib\n\nimport gradio as gr\nfrom ktem.app import BasePage\nfrom ktem.components import reasonings\nfrom ktem.db.models import Settings, User, engine\nfrom sqlmodel import Session, select\nfrom theflow.settings import settings as flowsettings\n\nKH_SSO_ENABLED = getattr(flowsettings, \"KH_SSO_ENABLED\", False)\n\n\nsignout_js = \"\"\"\nfunction(u, c, pw, pwc) {\n    removeFromStorage('username');\n    removeFromStorage('password');\n    return [u, c, pw, pwc];\n}\n\"\"\"\n\n\ngr_cls_single_value = {\n    \"text\": gr.Textbox,\n    \"number\": gr.Number,\n    \"checkbox\": gr.Checkbox,\n}\n\n\ngr_cls_choices = {\n    \"dropdown\": gr.Dropdown,\n    \"radio\": gr.Radio,\n    \"checkboxgroup\": gr.CheckboxGroup,\n}\n\n\ndef render_setting_item(setting_item, value):\n    \"\"\"Render the setting component into corresponding Gradio UI component\"\"\"\n    kwargs = {\n        \"label\": setting_item.name,\n        \"value\": value,\n        \"interactive\": True,\n    }\n\n    if setting_item.component in gr_cls_single_value:\n        return gr_cls_single_value[setting_item.component](**kwargs)\n\n    kwargs[\"choices\"] = setting_item.choices\n\n    if setting_item.component in gr_cls_choices:\n        return gr_cls_choices[setting_item.component](**kwargs)\n\n    raise ValueError(\n        f\"Unknown component {setting_item.component}, allowed are: \"\n        f\"{list(gr_cls_single_value.keys()) + list(gr_cls_choices.keys())}.\\n\"\n        f\"Setting item: {setting_item}\"\n    )\n\n\nclass SettingsPage(BasePage):\n    \"\"\"Responsible for allowing the users to customize the application\n\n    **IMPORTANT**: the name and id of the UI setting components should match the\n    name of the setting in the `app.default_settings`\n    \"\"\"\n\n    public_events = [\"onSignOut\"]\n\n    def __init__(self, app):\n        \"\"\"Initiate the page and render the UI\"\"\"\n        self._app = app\n\n        self._settings_state = app.settings_state\n        self._user_id = app.user_id\n        self._default_settings = app.default_settings\n        self._settings_dict = self._default_settings.flatten()\n        self._settings_keys = list(self._settings_dict.keys())\n\n        self._components = {}\n        self._reasoning_mode = {}\n\n        # store llms and embeddings components\n        self._llms = []\n        self._embeddings = []\n\n        # render application page if there are application settings\n        self._render_app_tab = False\n\n        if not KH_SSO_ENABLED and self._default_settings.application.settings:\n            self._render_app_tab = True\n\n        # render index page if there are index settings (general and/or specific)\n        self._render_index_tab = False\n\n        if not KH_SSO_ENABLED:\n            if self._default_settings.index.settings:\n                self._render_index_tab = True\n            else:\n                for sig in self._default_settings.index.options.values():\n                    if sig.settings:\n                        self._render_index_tab = True\n                        break\n\n        # render reasoning page if there are reasoning settings\n        self._render_reasoning_tab = False\n\n        if not KH_SSO_ENABLED:\n            if len(self._default_settings.reasoning.settings) > 1:\n                self._render_reasoning_tab = True\n            else:\n                for sig in self._default_settings.reasoning.options.values():\n                    if sig.settings:\n                        self._render_reasoning_tab = True\n                        break\n\n        self.on_building_ui()\n\n    def on_building_ui(self):\n        if not KH_SSO_ENABLED:\n            self.setting_save_btn = gr.Button(\n                \"Save & Close\",\n                variant=\"primary\",\n                elem_classes=[\"right-button\"],\n                elem_id=\"save-setting-btn\",\n            )\n        if self._app.f_user_management:\n            with gr.Tab(\"User settings\"):\n                self.user_tab()\n\n        self.app_tab()\n        self.index_tab()\n        self.reasoning_tab()\n\n    def on_subscribe_public_events(self):\n        \"\"\"\n        Subscribes to public events related to user management.\n\n        This function is responsible for subscribing to the \"onSignIn\" event, which is\n        triggered when a user signs in. It registers two event handlers for this event.\n\n        The first event handler, \"load_setting\", is responsible for loading the user's\n        settings when they sign in. It takes the user ID as input and returns the\n        settings state and a list of component outputs. The progress indicator for this\n        event is set to \"hidden\".\n\n        The second event handler, \"get_name\", is responsible for retrieving the\n        username of the current user. It takes the user ID as input and returns the\n        username if it exists, otherwise it returns \"___\". The progress indicator for\n        this event is also set to \"hidden\".\n\n        Parameters:\n            self (object): The instance of the class.\n\n        Returns:\n            None\n        \"\"\"\n        if self._app.f_user_management:\n            self._app.subscribe_event(\n                name=\"onSignIn\",\n                definition={\n                    \"fn\": self.load_setting,\n                    \"inputs\": self._user_id,\n                    \"outputs\": [self._settings_state] + self.components(),\n                    \"show_progress\": \"hidden\",\n                },\n            )\n\n            def get_name(user_id):\n                name = \"Current user: \"\n                if user_id:\n                    with Session(engine) as session:\n                        statement = select(User).where(User.id == user_id)\n                        result = session.exec(statement).all()\n                        if result:\n                            return name + result[0].username\n                return name + \"___\"\n\n            self._app.subscribe_event(\n                name=\"onSignIn\",\n                definition={\n                    \"fn\": get_name,\n                    \"inputs\": self._user_id,\n                    \"outputs\": [self.current_name],\n                    \"show_progress\": \"hidden\",\n                },\n            )\n\n    def on_register_events(self):\n        if not KH_SSO_ENABLED:\n            self.setting_save_btn.click(\n                self.save_setting,\n                inputs=[self._user_id] + self.components(),\n                outputs=self._settings_state,\n            ).then(\n                lambda: gr.Tabs(selected=\"chat-tab\"),\n                outputs=self._app.tabs,\n            )\n        self._components[\"reasoning.use\"].change(\n            self.change_reasoning_mode,\n            inputs=[self._components[\"reasoning.use\"]],\n            outputs=list(self._reasoning_mode.values()),\n            show_progress=\"hidden\",\n        )\n        if self._app.f_user_management and not KH_SSO_ENABLED:\n            self.password_change_btn.click(\n                self.change_password,\n                inputs=[\n                    self._user_id,\n                    self.password_change,\n                    self.password_change_confirm,\n                ],\n                outputs=[self.password_change, self.password_change_confirm],\n                show_progress=\"hidden\",\n            )\n            onSignOutClick = self.signout.click(\n                lambda: (None, \"Current user: ___\", \"\", \"\"),\n                inputs=[],\n                outputs=[\n                    self._user_id,\n                    self.current_name,\n                    self.password_change,\n                    self.password_change_confirm,\n                ],\n                show_progress=\"hidden\",\n                js=signout_js,\n            ).then(\n                self.load_setting,\n                inputs=self._user_id,\n                outputs=[self._settings_state] + self.components(),\n                show_progress=\"hidden\",\n            )\n            for event in self._app.get_event(\"onSignOut\"):\n                onSignOutClick = onSignOutClick.then(**event)\n\n    def user_tab(self):\n        # user management\n        self.current_name = gr.Markdown(\"Current user: ___\")\n\n        if KH_SSO_ENABLED:\n            import gradiologin as grlogin\n\n            self.sso_signout = grlogin.LogoutButton(\"Logout\")\n        else:\n            self.signout = gr.Button(\"Logout\")\n\n            self.password_change = gr.Textbox(\n                label=\"New password\", interactive=True, type=\"password\"\n            )\n            self.password_change_confirm = gr.Textbox(\n                label=\"Confirm password\", interactive=True, type=\"password\"\n            )\n            self.password_change_btn = gr.Button(\"Change password\", interactive=True)\n\n    def change_password(self, user_id, password, password_confirm):\n        from ktem.pages.resources.user import validate_password\n\n        errors = validate_password(password, password_confirm)\n        if errors:\n            print(errors)\n            gr.Warning(errors)\n            return password, password_confirm\n\n        with Session(engine) as session:\n            statement = select(User).where(User.id == user_id)\n            result = session.exec(statement).all()\n            if result:\n                user = result[0]\n                hashed_password = hashlib.sha256(password.encode()).hexdigest()\n                user.password = hashed_password\n                session.add(user)\n                session.commit()\n                gr.Info(\"Password changed\")\n            else:\n                gr.Warning(\"User not found\")\n\n        return \"\", \"\"\n\n    def app_tab(self):\n        with gr.Tab(\"General\", visible=self._render_app_tab):\n            for n, si in self._default_settings.application.settings.items():\n                obj = render_setting_item(si, si.value)\n                self._components[f\"application.{n}\"] = obj\n                if si.special_type == \"llm\":\n                    self._llms.append(obj)\n                if si.special_type == \"embedding\":\n                    self._embeddings.append(obj)\n\n    def index_tab(self):\n        # TODO: double check if we need general\n        # with gr.Tab(\"General\"):\n        #     for n, si in self._default_settings.index.settings.items():\n        #         obj = render_setting_item(si, si.value)\n        #         self._components[f\"index.{n}\"] = obj\n\n        id2name = {k: v.name for k, v in self._app.index_manager.info().items()}\n        with gr.Tab(\"Retrieval settings\", visible=self._render_index_tab):\n            for pn, sig in self._default_settings.index.options.items():\n                name = id2name.get(pn, f\"<id {pn}>\")\n                with gr.Tab(name):\n                    for n, si in sig.settings.items():\n                        obj = render_setting_item(si, si.value)\n                        self._components[f\"index.options.{pn}.{n}\"] = obj\n                        if si.special_type == \"llm\":\n                            self._llms.append(obj)\n                        if si.special_type == \"embedding\":\n                            self._embeddings.append(obj)\n\n    def reasoning_tab(self):\n        with gr.Tab(\"Reasoning settings\", visible=self._render_reasoning_tab):\n            with gr.Group():\n                for n, si in self._default_settings.reasoning.settings.items():\n                    if n == \"use\":\n                        continue\n                    obj = render_setting_item(si, si.value)\n                    self._components[f\"reasoning.{n}\"] = obj\n                    if si.special_type == \"llm\":\n                        self._llms.append(obj)\n                    if si.special_type == \"embedding\":\n                        self._embeddings.append(obj)\n\n            gr.Markdown(\"### Reasoning-specific settings\")\n            self._components[\"reasoning.use\"] = render_setting_item(\n                self._default_settings.reasoning.settings[\"use\"],\n                self._default_settings.reasoning.settings[\"use\"].value,\n            )\n\n            for idx, (pn, sig) in enumerate(\n                self._default_settings.reasoning.options.items()\n            ):\n                with gr.Group(\n                    visible=idx == 0,\n                    elem_id=pn,\n                ) as self._reasoning_mode[pn]:\n                    reasoning = reasonings.get(pn, None)\n                    if reasoning is None:\n                        gr.Markdown(\"**Name**: Description\")\n                    else:\n                        info = reasoning.get_info()\n                        gr.Markdown(f\"**{info['name']}**: {info['description']}\")\n                    for n, si in sig.settings.items():\n                        obj = render_setting_item(si, si.value)\n                        self._components[f\"reasoning.options.{pn}.{n}\"] = obj\n                        if si.special_type == \"llm\":\n                            self._llms.append(obj)\n                        if si.special_type == \"embedding\":\n                            self._embeddings.append(obj)\n\n    def change_reasoning_mode(self, value):\n        output = []\n        for each in self._reasoning_mode.values():\n            if value == each.elem_id:\n                output.append(gr.update(visible=True))\n            else:\n                output.append(gr.update(visible=False))\n        return output\n\n    def load_setting(self, user_id=None):\n        settings = self._settings_dict\n        with Session(engine) as session:\n            statement = select(Settings).where(Settings.user == user_id)\n            result = session.exec(statement).all()\n            if result:\n                settings = result[0].setting\n\n        output = [settings]\n        output += tuple(settings[name] for name in self.component_names())\n        return output\n\n    def save_setting(self, user_id: int, *args):\n        \"\"\"Save the setting to disk and persist the setting to session state\n\n        Args:\n            user_id: the user id\n            args: all the values from the settings\n        \"\"\"\n        setting = {key: value for key, value in zip(self.component_names(), args)}\n        if user_id is None:\n            gr.Warning(\"Need to login before saving settings\")\n            return setting\n\n        with Session(engine) as session:\n            statement = select(Settings).where(Settings.user == user_id)\n            try:\n                user_setting = session.exec(statement).one()\n            except Exception:\n                user_setting = Settings()\n                user_setting.user = user_id\n            user_setting.setting = setting\n            session.add(user_setting)\n            session.commit()\n\n        gr.Info(\"Setting saved\")\n        return setting\n\n    def components(self) -> list:\n        \"\"\"Get the setting components\"\"\"\n        output = []\n        for name in self._settings_keys:\n            output.append(self._components[name])\n        return output\n\n    def component_names(self):\n        \"\"\"Get the setting components\"\"\"\n        return self._settings_keys\n\n    def _on_app_created(self):\n        if not self._app.f_user_management:\n            self._app.app.load(\n                self.load_setting,\n                inputs=self._user_id,\n                outputs=[self._settings_state] + self.components(),\n                show_progress=\"hidden\",\n            )\n\n        def update_llms():\n            from ktem.llms.manager import llms\n\n            if llms._default:\n                llm_choices = [(f\"{llms._default} (default)\", \"\")]\n            else:\n                llm_choices = [(\"(random)\", \"\")]\n            llm_choices += [(_, _) for _ in llms.options().keys()]\n            return gr.update(choices=llm_choices)\n\n        def update_embeddings():\n            from ktem.embeddings.manager import embedding_models_manager\n\n            if embedding_models_manager._default:\n                emb_choices = [(f\"{embedding_models_manager._default} (default)\", \"\")]\n            else:\n                emb_choices = [(\"(random)\", \"\")]\n            emb_choices += [(_, _) for _ in embedding_models_manager.options().keys()]\n            return gr.update(choices=emb_choices)\n\n        for llm in self._llms:\n            self._app.app.load(\n                update_llms,\n                inputs=[],\n                outputs=[llm],\n                show_progress=\"hidden\",\n            )\n        for emb in self._embeddings:\n            self._app.app.load(\n                update_embeddings,\n                inputs=[],\n                outputs=[emb],\n                show_progress=\"hidden\",\n            )\n"
  },
  {
    "path": "libs/ktem/ktem/pages/setup.py",
    "content": "import json\n\nimport gradio as gr\nimport requests\nfrom decouple import config\nfrom ktem.app import BasePage\nfrom ktem.embeddings.manager import embedding_models_manager as embeddings\nfrom ktem.llms.manager import llms\nfrom ktem.rerankings.manager import reranking_models_manager as rerankers\nfrom theflow.settings import settings as flowsettings\n\nKH_OLLAMA_URL = getattr(flowsettings, \"KH_OLLAMA_URL\", \"http://localhost:11434/v1/\")\nDEFAULT_OLLAMA_URL = KH_OLLAMA_URL.replace(\"v1\", \"api\")\nif DEFAULT_OLLAMA_URL.endswith(\"/\"):\n    DEFAULT_OLLAMA_URL = DEFAULT_OLLAMA_URL[:-1]\n\n\nDEMO_MESSAGE = (\n    \"This is a public space. Please use the \"\n    '\"Duplicate Space\" function on the top right '\n    \"corner to setup your own space.\"\n)\n\n\ndef pull_model(name: str, stream: bool = True):\n    payload = {\"name\": name}\n    headers = {\"Content-Type\": \"application/json\"}\n\n    response = requests.post(\n        DEFAULT_OLLAMA_URL + \"/pull\", json=payload, headers=headers, stream=stream\n    )\n\n    # Check if the request was successful\n    response.raise_for_status()\n\n    if stream:\n        for line in response.iter_lines():\n            if line:\n                data = json.loads(line.decode(\"utf-8\"))\n                yield data\n                if data.get(\"status\") == \"success\":\n                    break\n    else:\n        data = response.json()\n\n    return data\n\n\nclass SetupPage(BasePage):\n\n    public_events = [\"onFirstSetupComplete\"]\n\n    def __init__(self, app):\n        self._app = app\n        self.on_building_ui()\n\n    def on_building_ui(self):\n        gr.Markdown(f\"# Welcome to {self._app.app_name} first setup!\")\n        self.radio_model = gr.Radio(\n            [\n                (\"Cohere API (*free registration*) - recommended\", \"cohere\"),\n                (\"Google API (*free registration*)\", \"google\"),\n                (\"OpenAI API (for GPT-based models)\", \"openai\"),\n                (\"Local LLM (for completely *private RAG*)\", \"ollama\"),\n            ],\n            label=\"Select your model provider\",\n            value=\"cohere\",\n            info=(\n                \"Note: You can change this later. \"\n                \"If you are not sure, go with the first option \"\n                \"which fits most normal users.\"\n            ),\n            interactive=True,\n        )\n\n        with gr.Column(visible=False) as self.openai_option:\n            gr.Markdown(\n                (\n                    \"#### OpenAI API Key\\n\\n\"\n                    \"(create at https://platform.openai.com/api-keys)\"\n                )\n            )\n            self.openai_api_key = gr.Textbox(\n                show_label=False, placeholder=\"OpenAI API Key\"\n            )\n\n        with gr.Column(visible=True) as self.cohere_option:\n            gr.Markdown(\n                (\n                    \"#### Cohere API Key\\n\\n\"\n                    \"(register your free API key \"\n                    \"at https://dashboard.cohere.com/api-keys)\"\n                )\n            )\n            self.cohere_api_key = gr.Textbox(\n                show_label=False, placeholder=\"Cohere API Key\"\n            )\n\n        with gr.Column(visible=False) as self.google_option:\n            gr.Markdown(\n                (\n                    \"#### Google API Key\\n\\n\"\n                    \"(register your free API key \"\n                    \"at https://aistudio.google.com/app/apikey)\"\n                )\n            )\n            self.google_api_key = gr.Textbox(\n                show_label=False, placeholder=\"Google API Key\"\n            )\n\n        with gr.Column(visible=False) as self.ollama_option:\n            gr.Markdown(\n                (\n                    \"#### Setup Ollama\\n\\n\"\n                    \"Download and install Ollama from \"\n                    \"https://ollama.com/. Check out latest models at \"\n                    \"https://ollama.com/library. \"\n                )\n            )\n            self.ollama_model_name = gr.Textbox(\n                label=\"LLM model name\",\n                value=config(\"LOCAL_MODEL\", default=\"qwen2.5:7b\"),\n            )\n            self.ollama_emb_model_name = gr.Textbox(\n                label=\"Embedding model name\",\n                value=config(\"LOCAL_MODEL_EMBEDDINGS\", default=\"nomic-embed-text\"),\n            )\n\n        self.setup_log = gr.HTML(\n            show_label=False,\n        )\n\n        with gr.Row():\n            self.btn_finish = gr.Button(\"Proceed\", variant=\"primary\")\n            self.btn_skip = gr.Button(\n                \"I am an advance user. Skip this.\", variant=\"stop\"\n            )\n\n    def on_register_events(self):\n        onFirstSetupComplete = gr.on(\n            triggers=[\n                self.btn_finish.click,\n                self.cohere_api_key.submit,\n                self.openai_api_key.submit,\n            ],\n            fn=self.update_model,\n            inputs=[\n                self.cohere_api_key,\n                self.openai_api_key,\n                self.google_api_key,\n                self.ollama_model_name,\n                self.ollama_emb_model_name,\n                self.radio_model,\n            ],\n            outputs=[self.setup_log],\n            show_progress=\"hidden\",\n        )\n        onSkipSetup = gr.on(\n            triggers=[self.btn_skip.click],\n            fn=lambda: None,\n            inputs=[],\n            show_progress=\"hidden\",\n            outputs=[self.radio_model],\n        )\n\n        for event in self._app.get_event(\"onFirstSetupComplete\"):\n            onSkipSetup = onSkipSetup.success(**event)\n\n        onFirstSetupComplete = onFirstSetupComplete.success(\n            fn=self.update_default_settings,\n            inputs=[self.radio_model, self._app.settings_state],\n            outputs=self._app.settings_state,\n        )\n        for event in self._app.get_event(\"onFirstSetupComplete\"):\n            onFirstSetupComplete = onFirstSetupComplete.success(**event)\n\n        self.radio_model.change(\n            fn=self.switch_options_view,\n            inputs=[self.radio_model],\n            show_progress=\"hidden\",\n            outputs=[\n                self.cohere_option,\n                self.openai_option,\n                self.ollama_option,\n                self.google_option,\n            ],\n        )\n\n    def update_model(\n        self,\n        cohere_api_key,\n        openai_api_key,\n        google_api_key,\n        ollama_model_name,\n        ollama_emb_model_name,\n        radio_model_value,\n    ):\n        log_content = \"\"\n        if not radio_model_value:\n            gr.Info(\"Skip setup models.\")\n            yield gr.value(visible=False)\n            return\n\n        if radio_model_value == \"cohere\":\n            if cohere_api_key:\n                llms.update(\n                    name=\"cohere\",\n                    spec={\n                        \"__type__\": \"kotaemon.llms.chats.LCCohereChat\",\n                        \"model_name\": \"command-r-plus-08-2024\",\n                        \"api_key\": cohere_api_key,\n                    },\n                    default=True,\n                )\n                embeddings.update(\n                    name=\"cohere\",\n                    spec={\n                        \"__type__\": \"kotaemon.embeddings.LCCohereEmbeddings\",\n                        \"model\": \"embed-multilingual-v3.0\",\n                        \"cohere_api_key\": cohere_api_key,\n                        \"user_agent\": \"default\",\n                    },\n                    default=True,\n                )\n                rerankers.update(\n                    name=\"cohere\",\n                    spec={\n                        \"__type__\": \"kotaemon.rerankings.CohereReranking\",\n                        \"model_name\": \"rerank-multilingual-v2.0\",\n                        \"cohere_api_key\": cohere_api_key,\n                    },\n                    default=True,\n                )\n        elif radio_model_value == \"openai\":\n            if openai_api_key:\n                llms.update(\n                    name=\"openai\",\n                    spec={\n                        \"__type__\": \"kotaemon.llms.ChatOpenAI\",\n                        \"base_url\": \"https://api.openai.com/v1\",\n                        \"model\": \"gpt-4o\",\n                        \"api_key\": openai_api_key,\n                        \"timeout\": 20,\n                    },\n                    default=True,\n                )\n                embeddings.update(\n                    name=\"openai\",\n                    spec={\n                        \"__type__\": \"kotaemon.embeddings.OpenAIEmbeddings\",\n                        \"base_url\": \"https://api.openai.com/v1\",\n                        \"model\": \"text-embedding-3-large\",\n                        \"api_key\": openai_api_key,\n                        \"timeout\": 10,\n                        \"context_length\": 8191,\n                    },\n                    default=True,\n                )\n        elif radio_model_value == \"google\":\n            if google_api_key:\n                llms.update(\n                    name=\"google\",\n                    spec={\n                        \"__type__\": \"kotaemon.llms.chats.LCGeminiChat\",\n                        \"model_name\": \"gemini-1.5-flash\",\n                        \"api_key\": google_api_key,\n                    },\n                    default=True,\n                )\n                embeddings.update(\n                    name=\"google\",\n                    spec={\n                        \"__type__\": \"kotaemon.embeddings.LCGoogleEmbeddings\",\n                        \"model\": \"models/text-embedding-004\",\n                        \"google_api_key\": google_api_key,\n                    },\n                    default=True,\n                )\n        elif radio_model_value == \"ollama\":\n            llms.update(\n                name=\"ollama\",\n                spec={\n                    \"__type__\": \"kotaemon.llms.ChatOpenAI\",\n                    \"base_url\": KH_OLLAMA_URL,\n                    \"model\": ollama_model_name,\n                    \"api_key\": \"ollama\",\n                },\n                default=True,\n            )\n            embeddings.update(\n                name=\"ollama\",\n                spec={\n                    \"__type__\": \"kotaemon.embeddings.OpenAIEmbeddings\",\n                    \"base_url\": KH_OLLAMA_URL,\n                    \"model\": ollama_emb_model_name,\n                    \"api_key\": \"ollama\",\n                },\n                default=True,\n            )\n\n            # download required models through ollama\n            llm_model_name = llms.get(\"ollama\").model  # type: ignore\n            emb_model_name = embeddings.get(\"ollama\").model  # type: ignore\n\n            try:\n                for model_name in [emb_model_name, llm_model_name]:\n                    log_content += f\"- Downloading model `{model_name}` from Ollama<br>\"\n                    yield log_content\n\n                    pre_download_log = log_content\n\n                    for response in pull_model(model_name):\n                        complete = response.get(\"completed\", 0)\n                        total = response.get(\"total\", 0)\n                        if complete > 0 and total > 0:\n                            ratio = int(complete / total * 100)\n                            log_content = (\n                                pre_download_log\n                                + f\"- {response.get('status')}: {ratio}%<br>\"\n                            )\n                        else:\n                            if \"pulling\" not in response.get(\"status\", \"\"):\n                                log_content += f\"- {response.get('status')}<br>\"\n\n                        yield log_content\n            except Exception as e:\n                log_content += (\n                    \"Make sure you have download and installed Ollama correctly. \"\n                    f\"Got error: {str(e)}\"\n                )\n                yield log_content\n                raise gr.Error(\"Failed to download model from Ollama.\")\n\n        # test models connection\n        llm_output = emb_output = None\n\n        # LLM model\n        log_content += f\"- Testing LLM model: {radio_model_value}<br>\"\n        yield log_content\n\n        llm = llms.get(radio_model_value)  # type: ignore\n        log_content += \"- Sending a message `Hi`<br>\"\n        yield log_content\n        try:\n            llm_output = llm(\"Hi\")\n        except Exception as e:\n            log_content += (\n                f\"<mark style='color: yellow; background: red'>- Connection failed. \"\n                f\"Got error:\\n {str(e)}</mark>\"\n            )\n\n        if llm_output:\n            log_content += (\n                \"<mark style='background: green; color: white'>- Connection success. \"\n                \"</mark><br>\"\n            )\n        yield log_content\n\n        if llm_output:\n            # embedding model\n            log_content += f\"- Testing Embedding model: {radio_model_value}<br>\"\n            yield log_content\n\n            emb = embeddings.get(radio_model_value)\n            assert emb, f\"Embedding model {radio_model_value} not found.\"\n\n            log_content += \"- Sending a message `Hi`<br>\"\n            yield log_content\n            try:\n                emb_output = emb(\"Hi\")\n            except Exception as e:\n                log_content += (\n                    f\"<mark style='color: yellow; background: red'>\"\n                    \"- Connection failed. \"\n                    f\"Got error:\\n {str(e)}</mark>\"\n                )\n\n            if emb_output:\n                log_content += (\n                    \"<mark style='background: green; color: white'>\"\n                    \"- Connection success. \"\n                    \"</mark><br>\"\n                )\n            yield log_content\n\n        if llm_output and emb_output:\n            gr.Info(\"Setup models completed successfully!\")\n        else:\n            raise gr.Error(\n                \"Setup models failed. Please verify your connection and API key.\"\n            )\n\n    def update_default_settings(self, radio_model_value, default_settings):\n        # revise default settings\n        # reranking llm\n        default_settings[\"index.options.1.reranking_llm\"] = radio_model_value\n        if radio_model_value == \"ollama\":\n            default_settings[\"index.options.1.use_llm_reranking\"] = False\n\n        return default_settings\n\n    def switch_options_view(self, radio_model_value):\n        components_visible = [gr.update(visible=False) for _ in range(4)]\n\n        values = [\"cohere\", \"openai\", \"ollama\", \"google\", None]\n        assert radio_model_value in values, f\"Invalid value {radio_model_value}\"\n\n        if radio_model_value is not None:\n            idx = values.index(radio_model_value)\n            components_visible[idx] = gr.update(visible=True)\n\n        return components_visible\n"
  },
  {
    "path": "libs/ktem/ktem/reasoning/__init__.py",
    "content": ""
  },
  {
    "path": "libs/ktem/ktem/reasoning/base.py",
    "content": "from typing import Optional\n\nfrom kotaemon.base import BaseComponent\n\n\nclass BaseReasoning(BaseComponent):\n    \"\"\"The reasoning pipeline that handles each of the user chat messages\n\n    This reasoning pipeline has access to:\n        - the retrievers\n        - the user settings\n        - the message\n        - the conversation id\n        - the message history\n    \"\"\"\n\n    @classmethod\n    def get_info(cls) -> dict:\n        \"\"\"Get the pipeline information for the app to organize and display\n\n        Returns:\n            a dictionary that contains the following keys:\n                - \"id\": the unique id of the pipeline\n                - \"name\": the human-friendly name of the pipeline\n                - \"description\": the overview short description of the pipeline, for\n                user to grasp what does the pipeline do\n        \"\"\"\n        raise NotImplementedError\n\n    @classmethod\n    def get_user_settings(cls) -> dict:\n        \"\"\"Get the default user settings for this pipeline\"\"\"\n        return {}\n\n    @classmethod\n    def get_pipeline(\n        cls,\n        user_settings: dict,\n        state: dict,\n        retrievers: Optional[list[\"BaseComponent\"]] = None,\n    ) -> \"BaseReasoning\":\n        \"\"\"Get the reasoning pipeline for the app to execute\n\n        Args:\n            user_setting: user settings\n            state: conversation state\n            retrievers (list): List of retrievers\n        \"\"\"\n        return cls()\n\n    def run(self, message: str, conv_id: str, history: list, **kwargs):  # type: ignore\n        \"\"\"Execute the reasoning pipeline\"\"\"\n        raise NotImplementedError\n"
  },
  {
    "path": "libs/ktem/ktem/reasoning/prompt_optimization/__init__.py",
    "content": "from .decompose_question import DecomposeQuestionPipeline\nfrom .fewshot_rewrite_question import FewshotRewriteQuestionPipeline\nfrom .mindmap import CreateMindmapPipeline\nfrom .rewrite_question import RewriteQuestionPipeline\n\n__all__ = [\n    \"DecomposeQuestionPipeline\",\n    \"FewshotRewriteQuestionPipeline\",\n    \"RewriteQuestionPipeline\",\n    \"CreateMindmapPipeline\",\n]\n"
  },
  {
    "path": "libs/ktem/ktem/reasoning/prompt_optimization/decompose_question.py",
    "content": "import logging\n\nfrom ktem.llms.manager import llms\nfrom ktem.reasoning.prompt_optimization.rewrite_question import RewriteQuestionPipeline\nfrom pydantic import BaseModel, Field\n\nfrom kotaemon.base import Document, HumanMessage, Node, SystemMessage\nfrom kotaemon.llms import ChatLLM\n\nlogger = logging.getLogger(__name__)\n\n\nclass SubQuery(BaseModel):\n    \"\"\"Search over a database of insurance rulebooks or financial reports\"\"\"\n\n    sub_query: str = Field(\n        ...,\n        description=\"A very specific query against the database.\",\n    )\n\n\nclass DecomposeQuestionPipeline(RewriteQuestionPipeline):\n    \"\"\"Decompose user complex question into multiple sub-questions\n\n    Args:\n        llm: the language model to rewrite question\n        lang: the language of the answer. Currently support English and Japanese\n    \"\"\"\n\n    llm: ChatLLM = Node(\n        default_callback=lambda _: llms.get(\"openai-gpt4-turbo\", llms.get_default())\n    )\n    DECOMPOSE_SYSTEM_PROMPT_TEMPLATE = (\n        \"You are an expert at converting user complex questions into sub questions. \"\n        \"Perform query decomposition using provided function_call. \"\n        \"Given a user question, break it down into the most specific sub\"\n        \" questions you can (at most 3) \"\n        \"which will help you answer the original question. \"\n        \"Each sub question should be about a single concept/fact/idea. \"\n        \"If there are acronyms or words you are not familiar with, \"\n        \"do not try to rephrase them.\"\n    )\n    prompt_template: str = DECOMPOSE_SYSTEM_PROMPT_TEMPLATE\n\n    def create_prompt(self, question):\n        schema = SubQuery.model_json_schema()\n        function = {\n            \"name\": schema[\"title\"],\n            \"description\": schema[\"description\"],\n            \"parameters\": schema,\n        }\n        llm_kwargs = {\n            \"tools\": [{\"type\": \"function\", \"function\": function}],\n            \"tool_choice\": \"auto\",\n            \"tools_pydantic\": [SubQuery],\n        }\n\n        messages = [\n            SystemMessage(content=self.prompt_template),\n            HumanMessage(content=question),\n        ]\n\n        return messages, llm_kwargs\n\n    def run(self, question: str) -> list:  # type: ignore\n        messages, llm_kwargs = self.create_prompt(question)\n        result = self.llm(messages, **llm_kwargs)\n        tool_calls = result.additional_kwargs.get(\"tool_calls\", None)\n        sub_queries = []\n        if tool_calls:\n            for tool_call in tool_calls:\n                if \"function\" in tool_call:\n                    # openai and cohere format\n                    function_output = tool_call[\"function\"][\"arguments\"]\n                else:\n                    # anthropic format\n                    function_output = tool_call[\"args\"]\n\n                if isinstance(function_output, str):\n                    sub_query = SubQuery.parse_raw(function_output).sub_query\n                else:\n                    sub_query = SubQuery.parse_obj(function_output).sub_query\n\n                sub_queries.append(\n                    Document(\n                        content=sub_query,\n                    )\n                )\n\n        return sub_queries\n"
  },
  {
    "path": "libs/ktem/ktem/reasoning/prompt_optimization/fewshot_rewrite_question.py",
    "content": "import json\nimport uuid\nfrom pathlib import Path\n\nfrom ktem.components import get_docstore, get_vectorstore\nfrom ktem.llms.manager import llms\nfrom ktem.reasoning.prompt_optimization.rewrite_question import (\n    DEFAULT_REWRITE_PROMPT,\n    RewriteQuestionPipeline,\n)\nfrom theflow.settings import settings as flowsettings\n\nfrom kotaemon.base import AIMessage, Document, HumanMessage, Node, SystemMessage\nfrom kotaemon.embeddings import BaseEmbeddings\nfrom kotaemon.llms import ChatLLM\nfrom kotaemon.storages import BaseDocumentStore, BaseVectorStore\n\n\nclass FewshotRewriteQuestionPipeline(RewriteQuestionPipeline):\n    \"\"\"Rewrite user question\n\n    Args:\n        llm: the language model to rewrite question\n        rewrite_template: the prompt template for llm to paraphrase a text input\n        lang: the language of the answer. Currently support English and Japanese\n        embedding: the embedding model to encode the question\n        vector_store: the vector store to store the encoded question\n        doc_store: the document store to store the original question\n        k: the number of examples to retrieve for rewriting\n    \"\"\"\n\n    llm: ChatLLM = Node(default_callback=lambda _: llms.get_default())\n    rewrite_template: str = DEFAULT_REWRITE_PROMPT\n    lang: str = \"English\"\n    embedding: BaseEmbeddings\n    vector_store: BaseVectorStore\n    doc_store: BaseDocumentStore\n    k: int = getattr(flowsettings, \"N_PROMPT_OPT_EXAMPLES\", 3)\n\n    def add_documents(self, examples, batch_size: int = 50):\n        print(\"Adding fewshot examples for rewriting\")\n        documents = []\n        for example in examples:\n            doc = Document(\n                text=example[\"input\"], id_=str(uuid.uuid4()), metadata=example\n            )\n            documents.append(doc)\n\n        for i in range(0, len(documents), batch_size):\n            embeddings = self.embedding(documents[i : i + batch_size])\n            ids = [t.doc_id for t in documents[i : i + batch_size]]\n            self.vector_store.add(\n                embeddings=embeddings,\n                ids=ids,\n            )\n            self.doc_store.add(documents[i : i + batch_size])\n\n    @classmethod\n    def get_pipeline(\n        cls,\n        embedding,\n        example_path=Path(__file__).parent / \"rephrase_question_train.json\",\n        collection_name: str = \"fewshot_rewrite_examples\",\n    ):\n        vector_store = get_vectorstore(collection_name)\n        doc_store = get_docstore(collection_name)\n\n        pipeline = cls(\n            embedding=embedding, vector_store=vector_store, doc_store=doc_store\n        )\n        if doc_store.count():\n            return pipeline\n\n        examples = json.load(open(example_path, \"r\"))\n        pipeline.add_documents(examples)\n\n        return pipeline\n\n    def run(self, question: str) -> Document:  # type: ignore\n        emb = self.embedding(question)[0].embedding\n        _, _, ids = self.vector_store.query(embedding=emb, top_k=self.k)\n        examples = self.doc_store.get(ids)\n        messages = [SystemMessage(content=\"You are a helpful assistant\")]\n        for example in examples:\n            messages.append(\n                HumanMessage(\n                    content=self.rewrite_template.format(\n                        question=example.metadata[\"input\"], lang=self.lang\n                    )\n                )\n            )\n            messages.append(AIMessage(content=example.metadata[\"output\"]))\n        messages.append(\n            HumanMessage(\n                content=self.rewrite_template.format(question=question, lang=self.lang)\n            )\n        )\n\n        result = self.llm(messages)\n        return result\n"
  },
  {
    "path": "libs/ktem/ktem/reasoning/prompt_optimization/mindmap.py",
    "content": "import logging\nfrom textwrap import dedent\n\nfrom ktem.llms.manager import llms\n\nfrom kotaemon.base import BaseComponent, Document, HumanMessage, Node, SystemMessage\nfrom kotaemon.llms import ChatLLM, PromptTemplate\n\nlogger = logging.getLogger(__name__)\n\n\nMINDMAP_HTML_EXPORT_TEMPLATE = dedent(\n    \"\"\"\n<!DOCTYPE html>\n<html lang=\"en\">\n  <head>\n    <meta charset=\"UTF-8\" />\n    <meta http-equiv=\"X-UA-Compatible\" content=\"IE=edge\" />\n    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\" />\n    <title>Mindmap</title>\n    <style>\n      svg.markmap {\n        width: 100%;\n        height: 100vh;\n      }\n    </style>\n    <script src=\"https://cdn.jsdelivr.net/npm/markmap-autoloader@0.16\"></script>\n  </head>\n  <body>\n    {markmap_div}\n  </body>\n</html>\n\"\"\"\n)\n\n\nclass CreateMindmapPipeline(BaseComponent):\n    \"\"\"Create a mindmap from the question and context\"\"\"\n\n    llm: ChatLLM = Node(default_callback=lambda _: llms.get_default())\n\n    SYSTEM_PROMPT = \"\"\"\nFrom now on you will behave as \"MapGPT\" and, for every text the user will submit, you are going to create a PlantUML mind map file for the inputted text to best describe main ideas. Format it as a code and remember that the mind map should be in the same language as the inputted context. You don't have to provide a general example for the mind map format before the user inputs the text.\n    \"\"\"  # noqa: E501\n    MINDMAP_PROMPT_TEMPLATE = \"\"\"\nQuestion:\n{question}\n\nContext:\n{context}\n\nGenerate a sample PlantUML mindmap for based on the provided question and context above. Only includes context relevant to the question to produce the mindmap.\n\nUse the template like this:\n\n@startmindmap\n* Title\n** Item A\n*** Item B\n**** Item C\n*** Item D\n@endmindmap\n    \"\"\"  # noqa: E501\n    prompt_template: str = MINDMAP_PROMPT_TEMPLATE\n\n    @classmethod\n    def convert_uml_to_markdown(cls, text: str) -> str:\n        start_phrase = \"@startmindmap\"\n        end_phrase = \"@endmindmap\"\n\n        try:\n            text = text.split(start_phrase)[-1]\n            text = text.split(end_phrase)[0]\n            text = text.strip().replace(\"*\", \"#\")\n        except IndexError:\n            text = \"\"\n\n        return text\n\n    def run(self, question: str, context: str) -> Document:  # type: ignore\n        prompt_template = PromptTemplate(self.prompt_template)\n        prompt = prompt_template.populate(\n            question=question,\n            context=context,\n        )\n\n        messages = [\n            SystemMessage(content=self.SYSTEM_PROMPT),\n            HumanMessage(content=prompt),\n        ]\n\n        uml_text = self.llm(messages).text\n        markdown_text = self.convert_uml_to_markdown(uml_text)\n\n        return Document(\n            text=markdown_text,\n        )\n"
  },
  {
    "path": "libs/ktem/ktem/reasoning/prompt_optimization/rephrase_question_train.json",
    "content": "[\n  {\n    \"input\": \"What was the percentage increase in General and Administrative expenses in the year 2018 compared to the previous year?\",\n    \"output\": \"What was the increase in the General and administrative in 2018?\"\n  },\n  {\n    \"input\": \"What was the specific percentage increase in the General and Administrative expenses during the year 2018?\",\n    \"output\": \"What was the increase in the General and administrative in 2018?\"\n  },\n  {\n    \"input\": \"What was the decrease in the Other net expense in 2019?\",\n    \"output\": \"What was the specific amount of the decrease in the Other net expense in the financial year of 2019?\"\n  },\n  {\n    \"input\": \"What was the decrease in the Other net expense in 2019?\",\n    \"output\": \"What was the specific decrease amount experienced in the Other net expense during the year 2019?\"\n  },\n  {\n    \"input\": \"What was the increase in interest income?\",\n    \"output\": \"What was the exact amount by which the interest income increased?\"\n  },\n  {\n    \"input\": \"What was the increase in interest income?\",\n    \"output\": \"What was the exact amount of the increase in interest income?\"\n  },\n  {\n    \"input\": \"What was the average Other expense, net for 2018 and 2019?\",\n    \"output\": \"What was the average net Other expense for the years 2018 and 2019 combined?\"\n  },\n  {\n    \"input\": \"In which year was Other expense, net less than 5,000 thousands?\",\n    \"output\": \"In what particular year did the amount of Other expense, net fall below 5,000 thousands?\"\n  },\n  {\n    \"input\": \"What were the income tax expenses in the years 2019, 2018, and 2017, respectively?\",\n    \"output\": \"What was the income tax expense in 2019,2018 and 2017 respectively?\"\n  },\n  {\n    \"input\": \"\\\"What were the income tax expenses for the years 2019, 2018, and 2017?\\\"\",\n    \"output\": \"What was the income tax expense in 2019,2018 and 2017 respectively?\"\n  },\n  {\n    \"input\": \"How much money did the company have in unremitted earnings for the year 2019?\",\n    \"output\": \"What was the company's unremitted earnings in 2019?\"\n  },\n  {\n    \"input\": \"How much unremitted earnings did the company have in 2019, without releasing or distributing them?\",\n    \"output\": \"What was the company's unremitted earnings in 2019?\"\n  },\n  {\n    \"input\": \"Which year witnessed interest and penalties that amounted to less than 20 thousand?\",\n    \"output\": \"In which year was Interest and penalties less than 20 thousand?\"\n  },\n  {\n    \"input\": \"What is the net carrying amount in 2019?\",\n    \"output\": \"What is the net carrying amount for the year 2019? Please provide details to assist in providing a more accurate response.\"\n  },\n  {\n    \"input\": \"What is the net carrying amount in 2019?\",\n    \"output\": \"What is the net carrying amount for the year 2019? Can you provide more details or context about the specific asset, liability or financial element in consideration?\"\n  },\n  {\n    \"input\": \"What is the change in the debt discount, net of amortization from December 31, 2019 to December 31, 2018?\",\n    \"output\": \"What was the net change in the debt discount's value, after taking into account amortization, between December 31, 2019 and December 31, 2018?\"\n  },\n  {\n    \"input\": \"What is the change in Net carrying amount from December 31, 2019 to December 31, 2018?\",\n    \"output\": \"What is the difference in the net carrying amount between December 31, 2019 and December 31, 2018?\"\n  },\n  {\n    \"input\": \"What is the change in Furniture and equipment from December 31, 2019 to December 31, 2018?\",\n    \"output\": \"What is the net difference in the value of furniture and equipment between December 31, 2019 and December 31, 2018?\"\n  },\n  {\n    \"input\": \"What is the change in Furniture and equipment from December 31, 2019 to December 31, 2018?\",\n    \"output\": \"What was the net change in the value of Furniture and equipment between December 31, 2019 and December 31, 2018?\"\n  },\n  {\n    \"input\": \"What is the change in System hardware from December 31, 2019 to December 31, 2018?\",\n    \"output\": \"What are the differences in the hardware of the System between December 31, 2019, and December 31, 2018?\"\n  },\n  {\n    \"input\": \"What was the exact percentage increase in the cost of revenue between 2017 and 2018?\",\n    \"output\": \"What was the increase in the cost of revenue in 2018?\"\n  },\n  {\n    \"input\": \"What was the percentage increase in the total amount spent on expenses directly related to generating revenue in 2018 compared to the previous year?\",\n    \"output\": \"What was the increase in the cost of revenue in 2018?\"\n  },\n  {\n    \"input\": \"In which year was cost of revenue less than 40,000 thousands?\",\n    \"output\": \"In what specific year did the cost of revenue fall below the amount of 40,000 thousands?\"\n  },\n  {\n    \"input\": \"If a company used its earnings to finance its domestic operations, what are the potential outcomes or consequences of this decision?\",\n    \"output\": \"What would happen if earnings were used to fund domestic operations?\"\n  },\n  {\n    \"input\": \"What was the change in Cash, cash equivalents, and restricted cash at the beginning of the period, comparing December 31, 2019, to December 31, 2018?\",\n    \"output\": \"What is the change in Cash, cash equivalents and restricted cash at beginning of period from December 31, 2019 to December 31, 2018?\"\n  },\n  {\n    \"input\": \"What is the net difference in Cash provided by operating activities between December 31, 2019 and December 31, 2018?\",\n    \"output\": \"What is the change in Cash provided by operating activities from December 31, 2019 to December 31, 2018?\"\n  },\n  {\n    \"input\": \"What is the change in Cash used in investing activities from December 31, 2019 to December 31, 2018?\",\n    \"output\": \"What is the difference in the amount of Cash used in investing activities between December 31, 2019, and December 31, 2018?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the Sales and marketing expense from 2017 to 2018?\",\n    \"output\": \"What was the increase in the Sales and marketing expense in 2018 compared to 2017?\"\n  },\n  {\n    \"input\": \"In which year was Sales and marketing expenses less than 50,000 thousands?\",\n    \"output\": \"In what specific year did the total expenses for sales and marketing fall below 50,000 thousands (50 million) currency units?\"\n  },\n  {\n    \"input\": \"In which year was General and administrative expenses less than 50,000 thousands?\",\n    \"output\": \"In which specific year did the amount for General and Administrative expenses fall below 50,000 thousands (50 million) dollars?\"\n  },\n  {\n    \"input\": \"What is the average Carrying Value for the period December 31, 2019 to December 31, 2018?\",\n    \"output\": \"What is the average Carrying Value from December 31, 2018 to December 31, 2019?\"\n  },\n  {\n    \"input\": \"In which specific year did the fair value of the 2022 Notes measure below 200,000 thousands?\",\n    \"output\": \"In which year was the 2022 Notes fair value less than 200,000 thousands?\"\n  },\n  {\n    \"input\": \"In the year when the fair value of the 2022 Notes was below 200,000 thousands, what specific year was it?\",\n    \"output\": \"In which year was the 2022 Notes fair value less than 200,000 thousands?\"\n  },\n  {\n    \"input\": \"What was the 2022 Notes cap price?\",\n    \"output\": \"\\\"What was the cap price for the 2022 Notes? Please provide any additional details that could assist in retrieving the information accurately.\\\"\"\n  },\n  {\n    \"input\": \"What were the average expenses for sales and marketing in the years 2018 and 2019?\",\n    \"output\": \"What was the average Sales and marketing expenses for 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What were the average expenses for Sales and Marketing in 2018 and 2019, respectively?\",\n    \"output\": \"What was the average Sales and marketing expenses for 2018 and 2019?\"\n  },\n  {\n    \"input\": \"In which year was Sales and marketing expenses less than 90,000 thousands?\",\n    \"output\": \"When was the year that the sales and marketing expenses were recorded to be under 90,000 thousands?\"\n  },\n  {\n    \"input\": \"In which year was Sales and marketing expenses less than 90,000 thousands?\",\n    \"output\": \"In which specific year did the total expenses for sales and marketing amount to a figure less than 90,000 thousands?\"\n  },\n  {\n    \"input\": \"What was the exact amount of gross profit recorded in the fiscal year of 2018?\",\n    \"output\": \"What was the Gross profit in 2018?\"\n  },\n  {\n    \"input\": \"What was the specific value of the gross profit for the year 2018?\",\n    \"output\": \"What was the Gross profit in 2018?\"\n  },\n  {\n    \"input\": \"In what specific year did the revenue fall below one hundred thousand thousands (100,000,000)?\",\n    \"output\": \"In which year was revenue less than 100,000 thousands?\"\n  },\n  {\n    \"input\": \"\\\"In which specific year did the revenue fall below 100,000 thousands (100 million)?\\\"\",\n    \"output\": \"In which year was revenue less than 100,000 thousands?\"\n  },\n  {\n    \"input\": \"What is the difference in the Gross Profit between the fiscal years 2018 and 2019?\",\n    \"output\": \"What is the change in the Gross Profit from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What were the capitalized software development costs for the company in the years ending on December 31, 2017?\",\n    \"output\": \"What was the company capitalized software development costs during the years ended December 31, 2017?\"\n  },\n  {\n    \"input\": \"What does the Adjusted EBITDA represent?\",\n    \"output\": \"What is the meaning and purpose of the term Adjusted EBITDA?\"\n  },\n  {\n    \"input\": \"What does the Adjusted EBITDA represent?\",\n    \"output\": \"What is the meaning and significance of Adjusted EBITDA?\"\n  },\n  {\n    \"input\": \"\\\"What were the federal statutory income tax rates in 2019, 2018, and 2017?\\\"\",\n    \"output\": \"What was the Income tax at federal statutory rate in 2019, 2018 and 2017?\"\n  },\n  {\n    \"input\": \"What were the federal statutory income tax rates in 2019, 2018, and 2017?\",\n    \"output\": \"What was the Income tax at federal statutory rate in 2019, 2018 and 2017?\"\n  },\n  {\n    \"input\": \"In what year did the State income tax expense, taking into account the federal tax effect, fall below negative one thousand thousands?\",\n    \"output\": \"In which year was State income tax expense, net of federal tax effect less than (1,000) thousands?\"\n  },\n  {\n    \"input\": \"What were the amounts of the Other expense, net in 2018 and 2017?\",\n    \"output\": \"What was the Other expense, net in 2018 and 2017 respectively?\"\n  },\n  {\n    \"input\": \"What is the difference in the amount of money owed from customers (net accounts receivable) between December 31, 2019, and December 31, 2018?\",\n    \"output\": \"What is the change in Net accounts receivable from December 31, 2019 to December 31, 2018?\"\n  },\n  {\n    \"input\": \"What are the respective federal income tax expense at statutory rates in 2018 and 2019?\",\n    \"output\": \"What were the federal income tax expenses at statutory rates for the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What were the specific state income tax rates, after accounting for federal benefit deductions, for the years 2017 and 2018?\",\n    \"output\": \"What are the respective state income taxes, net of federal benefit in 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What were the state income tax rates, after accounting for federal benefit, for both the years 2017 and 2018?\",\n    \"output\": \"What are the respective state income taxes, net of federal benefit in 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What are the respective service revenue at March 31 and June 30, 2018?\",\n    \"output\": \"What were the service revenues as of March 31 and June 30, 2018?\"\n  },\n  {\n    \"input\": \"What are the respective service revenue at June 30 and September 30, 2018?\",\n    \"output\": \"What is the service revenue as of June 30 and September 30, 2018?\"\n  },\n  {\n    \"input\": \"What are the respective service revenue at June 30 and September 30, 2018?\",\n    \"output\": \"What is the amount of service revenue reported on the financial statements for June 30 and September 30, 2018?\"\n  },\n  {\n    \"input\": \"What percentage of the total revenue generated in North America in 2019 can be attributed to the non-core segment?\",\n    \"output\": \"What is the value of the revenue from the non-core segment from North America as a percentage of the total revenue earned in North America in 2019?\"\n  },\n  {\n    \"input\": \"What is the average amount of net cash generated from operating activities for the years 2018 and 2019?\",\n    \"output\": \"What is the average net cash provided by operating activities in 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the amount of cash-based equity compensation expense included in network operations expenses in both 2018 and 2017?\",\n    \"output\": \"What are the respective on-cash equity-based compensation expense included in network operations expenses in 2018 and 2017?\"\n  },\n  {\n    \"input\": \"What are the respective service revenue in 2018 and 2019?\",\n    \"output\": \"What was the amount of service revenue in the year 2018 and 2019 respectively?\"\n  },\n  {\n    \"input\": \"What is the total amount of interest and principal payments required for the debt relating to the 2022 Notes?\",\n    \"output\": \"What is the value of the interest and principal payment obligations included in the debt under 2022 Notes?\"\n  },\n  {\n    \"input\": \"What is the total amount of both interest and principal that needs to be paid for the debt included in the 2022 Notes?\",\n    \"output\": \"What is the value of the interest and principal payment obligations included in the debt under 2022 Notes?\"\n  },\n  {\n    \"input\": \"What is the total amount of debt that the company needs to repay within the next 3 years?\",\n    \"output\": \"What is the company's total debt due within 3 years?\"\n  },\n  {\n    \"input\": \"What is the aggregate amount of debt that the company needs to repay within the next three years?\",\n    \"output\": \"What is the company's total debt due within 3 years?\"\n  },\n  {\n    \"input\": \"What is the total amount of finance lease obligations that the company needs to repay within the next 3 years?\",\n    \"output\": \"What is the company's total finance lease obligations due within 3 years?\"\n  },\n  {\n    \"input\": \"What were the specific values of system infrastructure in the years 2018 and 2019?\",\n    \"output\": \"What are the respective values of system infrastructure in 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What were the values of system infrastructure in 2018 and 2019?\",\n    \"output\": \"What are the respective values of system infrastructure in 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage increase or decrease in the value of network equipment from 2018 to 2019?\",\n    \"output\": \"What is the percentage change in the value of network equipment between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage change in the value of leasehold improvements between 2018 and 2019?\",\n    \"output\": \"What is the precise percentage increase or decrease in the value of leasehold improvements from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage change in the value of system infrastructure between 2018 and 2019?\",\n    \"output\": \"What is the percentage difference in the value of system infrastructure from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What were the pre-tax domestic incomes for 2018 and 2019?\",\n    \"output\": \"What are the respective domestic income before income taxes in 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the mean pre-tax domestic income for individuals in the years 2017 and 2018?\",\n    \"output\": \"What is the average domestic income before income taxes in 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What were the average pre-tax domestic incomes in the years 2017 and 2018?\",\n    \"output\": \"What is the average domestic income before income taxes in 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What was the average pre-tax domestic income for households in 2018 and 2019?\",\n    \"output\": \"What is the average domestic income before income taxes in 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the average number of basic weighted average common shares in 2018 and 2019?\",\n    \"output\": \"What is the mean value of the basic weighted average common shares for the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the average number of basic weighted average common shares in 2018 and 2019?\",\n    \"output\": \"What is the average number of basic weighted average common shares for the years 2018 and 2019 combined?\"\n  },\n  {\n    \"input\": \"What was the average impact on stock dilution from stock options in 2018 and 2019?\",\n    \"output\": \"What is the average dilutive effect of stock options in 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the average impact of stock options on dilution for the years 2018 and 2019?\",\n    \"output\": \"What is the average dilutive effect of stock options in 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What does the cost of operations represent?\",\n    \"output\": \"What is the meaning or significance of the term \\\"cost of operations\\\"?\"\n  },\n  {\n    \"input\": \"How many quarters had operating revenues that was below $2,000 million?\",\n    \"output\": \"How many quarters had operating revenues below $2,000 million? Can you provide a breakdown of the number of quarters where the operating revenues were below $2,000 million?\"\n  },\n  {\n    \"input\": \"What was the exact change in Operating revenues from the Three Months Ended March to the Three Months Ended June?\",\n    \"output\": \"What was the change in Operating revenues between Three Months Ended March and June?\"\n  },\n  {\n    \"input\": \"What was the difference in operating revenues for the Three Months Ended March and the Three Months Ended June?\",\n    \"output\": \"What was the change in Operating revenues between Three Months Ended March and June?\"\n  },\n  {\n    \"input\": \"What was the amount of net property and equipment in 2015?\",\n    \"output\": \"What was the exact monetary value of net property and equipment in the year 2015?\"\n  },\n  {\n    \"input\": \"What was the amount of net property and equipment in 2015?\",\n    \"output\": \"What was the specific amount of net property and equipment recorded on the financial statements for the year 2015?\"\n  },\n  {\n    \"input\": \"What was the value of redeemable noncontrolling interests in the year 2019?\",\n    \"output\": \"What were the Redeemable noncontrolling interests in 2019?\"\n  },\n  {\n    \"input\": \"What was the value of the redeemable noncontrolling interests in 2019?\",\n    \"output\": \"What were the Redeemable noncontrolling interests in 2019?\"\n  },\n  {\n    \"input\": \"What was the percent increase or decrease in the equity of American Tower Corporation from 2015 to 2016 in the United States?\",\n    \"output\": \"What was the percentage change in Total American Tower Corporation equity between 2015 and 2016?\"\n  },\n  {\n    \"input\": \"What was the percentage increase or decrease in the equity of American Tower Corporation from 2015 to 2016?\",\n    \"output\": \"What was the percentage change in Total American Tower Corporation equity between 2015 and 2016?\"\n  },\n  {\n    \"input\": \"What was the difference in the overall value of assets from 2017 to 2018?\",\n    \"output\": \"What was the change in total assets between 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What was the total amount received from the sales of senior notes in the year 2019 after deducting any expenses or fees?\",\n    \"output\": \"What were the net Proceeds from issuance of senior notes in 2019?\"\n  },\n  {\n    \"input\": \"What was the total amount of money received from the repayment of credit facilities in 2018?\",\n    \"output\": \"What were the net Proceeds from (repayments of) credit facilities in 2018?\"\n  },\n  {\n    \"input\": \"\\\"What was the total amount received or repaid through credit facilities in 2018?\\\"\",\n    \"output\": \"What were the net Proceeds from (repayments of) credit facilities in 2018?\"\n  },\n  {\n    \"input\": \"What was the value of Equipment in 2018?\",\n    \"output\": \"What was the total monetary worth or market value of Equipment in the year 2018?\"\n  },\n  {\n    \"input\": \"What was the value of Equipment in 2018?\",\n    \"output\": \"What was the monetary worth of the Equipment in the year 2018?\"\n  },\n  {\n    \"input\": \"What was the exact monetary worth of buildings and any related improvements in the year 2019?\",\n    \"output\": \"What was the value of buildings and improvements in 2019?\"\n  },\n  {\n    \"input\": \"What was the difference in the value of land and improvements from 2018 to 2019?\",\n    \"output\": \"What was the change in Land and improvements between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage increase or decrease in the net value of Property and equipment between the years 2018 and 2019?\",\n    \"output\": \"What was the percentage change in Property and equipment, net between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What are the components that make up redevelopment capital expenditures?\",\n    \"output\": \"What does Redevelopment capital expenditures include?\"\n  },\n  {\n    \"input\": \"What actions are authorized by the company's Buyback Programs?\",\n    \"output\": \"What do the company's Buyback Programs authorize the company to do?\"\n  },\n  {\n    \"input\": \"What was the change in the total number of shares purchased between November and December?\",\n    \"output\": \"How much did the total number of shares purchased increase or decrease from November to December?\"\n  },\n  {\n    \"input\": \"What was the total value of the Property, Plant, and Equipment (PPE) in 2016?\",\n    \"output\": \"What was the gross PPE in 2016?\"\n  },\n  {\n    \"input\": \"What was the total value of personal protective equipment (PPE) in terms of sales or revenue for the year 2016?\",\n    \"output\": \"What was the gross PPE in 2016?\"\n  },\n  {\n    \"input\": \"For how many years has the total value of gross intangibles been above $15,000 million?\",\n    \"output\": \"How many years did gross intangibles exceed $15,000 million?\"\n  },\n  {\n    \"input\": \"How many consecutive years did the value of gross intangibles surpass $15,000 million?\",\n    \"output\": \"How many years did gross intangibles exceed $15,000 million?\"\n  },\n  {\n    \"input\": \"What was the percentage change in gross goodwill between 2018 and 2019?\",\n    \"output\": \"What was the percentage difference in the amount of gross goodwill from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in gross goodwill between 2018 and 2019?\",\n    \"output\": \"What was the percentage difference in gross goodwill from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"\\\"What was the overall amount paid in lease payments for the operating lease agreement?\\\"\",\n    \"output\": \"What were the total lease payments for operating lease?\"\n  },\n  {\n    \"input\": \"What was the sum total of all lease payments made for an operating lease?\",\n    \"output\": \"What were the total lease payments for operating lease?\"\n  },\n  {\n    \"input\": \"\\\"What was the total amount of operating lease expenses incurred during the fiscal years 2020, 2021, and 2022?\\\"\",\n    \"output\": \"What was the sum of operating lease in fiscal years 2020-2022?\"\n  },\n  {\n    \"input\": \"What is the proportion of non-current lease liability in relation to the total lease liability, expressed as a percentage?\",\n    \"output\": \"What is non-current lease liability as a percentage of Total lease liability?\"\n  },\n  {\n    \"input\": \"Which years did the company allocate a valuation allowance?\",\n    \"output\": \"In which years did the company provide a valuation allowance?\"\n  },\n  {\n    \"input\": \"For how many years, starting from January 1, was the balance consistently higher than $100 million?\",\n    \"output\": \"How many years was the balance as of January 1 above $100 million?\"\n  },\n  {\n    \"input\": \"How many years, starting from January 1, had a balance above $100 million?\",\n    \"output\": \"How many years was the balance as of January 1 above $100 million?\"\n  },\n  {\n    \"input\": \"What specific details about the company's taxes can be found in their state tax returns?\",\n    \"output\": \"What do the company's state tax returns reflect?\"\n  },\n  {\n    \"input\": \"In 2017, how many instances were there where the deferred Income tax benefit (provision) exceeded $50 million?\",\n    \"output\": \"How many of the deferred Income tax benefit (provision) were above $50 million in 2017?\"\n  },\n  {\n    \"input\": \"How many instances occurred in 2017 where the deferred income tax benefit (provision) exceeded $50 million?\",\n    \"output\": \"How many of the deferred Income tax benefit (provision) were above $50 million in 2017?\"\n  },\n  {\n    \"input\": \"In 2019, what is the number of income tax benefits (provisions) that exceeded $(4 million)?\",\n    \"output\": \"How many of the current Income tax benefit (provision) were above $(4 million) in 2019?\"\n  },\n  {\n    \"input\": \"In 2019, what is the total number of income tax benefits (provisions) that exceeded $(4 million)?\",\n    \"output\": \"How many of the current Income tax benefit (provision) were above $(4 million) in 2019?\"\n  },\n  {\n    \"input\": \"What was the change in the net cash provided by (used for) by investing activities between 2017 and 2018?\",\n    \"output\": \"What was the difference in the net cash provided by (used for) investing activities from 2017 to 2018?\"\n  },\n  {\n    \"input\": \"What was the percentage change in diluted net income per share attributable to common stockholders of American Tower Corporation between the Three Months Ended in March and June?\",\n    \"output\": \"What was the change in Diluted net income per share attributable to American Tower Corporation common stockholders between Three Months Ended  March and June?\"\n  },\n  {\n    \"input\": \"What was the specific difference in the value of Prepaids and other current assets between the fiscal years 2018 and 2019?\",\n    \"output\": \"What was the change in Prepaids and other current assets between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the difference in the amount of Prepaids and other current assets reported in the financial statements between the years 2018 and 2019?\",\n    \"output\": \"What was the change in Prepaids and other current assets between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the total amount of property and real estate taxes that were accumulated or owed in 2018?\",\n    \"output\": \"What was the Accrued property and real estate taxes in 2018?\"\n  },\n  {\n    \"input\": \"What was the change in Other accrued expenses between 2018 and 2019?\",\n    \"output\": \"What was the difference in the amount of Other accrued expenses recorded in 2018 compared to 2019?\"\n  },\n  {\n    \"input\": \"What was the change in Other accrued expenses between 2018 and 2019?\",\n    \"output\": \"What was the difference in the amount of Other accrued expenses recorded between the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the total amount of Net operating loss carryforwards recorded for the financial year 2019?\",\n    \"output\": \"What were the Net operating loss carryforwards in 2019?\"\n  },\n  {\n    \"input\": \"What were the amounts of the liability decreases included in the company's revisions in estimates for both 2019 and 2018?\",\n    \"output\": \"How much were the decreases to the liability included in the revisions in estimates by the company in 2019 and 2018 respectively?\"\n  },\n  {\n    \"input\": \"What was the change in balance as of December 31 between 2018 and 2019?\",\n    \"output\": \"What was the difference in the balance as of December 31st, 2018 and December 31st, 2019?\"\n  },\n  {\n    \"input\": \"What was the value of the Eure-et-Loir interests as of December 31, 2019?\",\n    \"output\": \"What was the monetary worth of the Eure-et-Loir interests on the specific date of December 31, 2019?\"\n  },\n  {\n    \"input\": \"How many of the non-current liabilities components in 2018 were above $500 million?\",\n    \"output\": \"How many components of non-current liabilities in the year 2018 exceeded $500 million in value?\"\n  },\n  {\n    \"input\": \"How many of the non-current liabilities components in 2018 were above $500 million?\",\n    \"output\": \"In 2018, how many components of non-current liabilities exceeded $500 million in value?\"\n  },\n  {\n    \"input\": \"How many years has the expense for stock-based compensation for Total exceeded $100 million?\",\n    \"output\": \"How many years did Total stock-based compensation expense exceed $100 million?\"\n  },\n  {\n    \"input\": \"For how many years has the expense for Stock-based compensation - Services been higher than $1 million?\",\n    \"output\": \"How many years did Stock-based compensation expense - Services exceed $1 million?\"\n  },\n  {\n    \"input\": \"What differentiates the characteristics of Federal and State NOLs from 2025 to 2029?\",\n    \"output\": \"What is the difference between Federal and State NOLs in the period 2025 to 2029?\"\n  },\n  {\n    \"input\": \"What differentiates Federal and State NOLs in the 2025-2029 timeframe, and how do they vary during this period?\",\n    \"output\": \"What is the difference between Federal and State NOLs in the period 2025 to 2029?\"\n  },\n  {\n    \"input\": \"What was the approximate percentage of revenue in the company's property segments were attributable to their communication sites in 2018?\",\n    \"output\": \"What was the estimated percentage of the company's total revenue in 2018 that came from their property segments, specifically from their communication sites?\"\n  },\n  {\n    \"input\": \"What was the approximate percentage of revenue in the company's property segments were attributable to their communication sites in 2018?\",\n    \"output\": \"What percentage of the company's total revenue in 2018 can be attributed to their communication sites within their property segments?\"\n  },\n  {\n    \"input\": \"What percentage of the total revenue in the United States in 2019 can be attributed to property segments?\",\n    \"output\": \"How many percent of total revenue in 2019 was accounted for by property segments in the U.S.?\"\n  },\n  {\n    \"input\": \"How many percent of total revenue in 2018 was accounted for by property segments in Asia?\",\n    \"output\": \"What was the percentage of the total revenue in 2018 contributed by the property segments specifically in Asia?\"\n  },\n  {\n    \"input\": \"What were the intangible assets related to the Tower and network location specifically in the year 2019?\",\n    \"output\": \"What were the Tower and network location intangible assets in 2019?\"\n  },\n  {\n    \"input\": \"What were the intangible assets related to the tower and network locations observed in 2019?\",\n    \"output\": \"What were the Tower and network location intangible assets in 2019?\"\n  },\n  {\n    \"input\": \"How many years were the total impairment charges above $200 million?\",\n    \"output\": \"How many years did the total impairment charges exceed or surpass $200 million?\"\n  },\n  {\n    \"input\": \"How many years were the total impairment charges above $200 million?\",\n    \"output\": \"For how many years did the total impairment charges exceed $200 million?\"\n  },\n  {\n    \"input\": \"What was the percentage change in Total impairment charges between 2018 and 2019?\",\n    \"output\": \"What percentage represents the difference in Total impairment charges between the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in Total impairment charges between 2018 and 2019?\",\n    \"output\": \"What is the percentage difference in Total impairment charges from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"For how long has the sum of all assets reached or surpassed $40,000 million?\",\n    \"output\": \"How many years did the total assets exceed $40,000 million?\"\n  },\n  {\n    \"input\": \"What was the balance as at January 1, 2019?\",\n    \"output\": \"What was the amount of money in the account on January 1, 2019?\"\n  },\n  {\n    \"input\": \"What was the difference in account balance on January 1st, comparing 2018 to 2019?\",\n    \"output\": \"What was the change in balance as of January 1 between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the difference in the account balance between January 1, 2018 and January 1, 2019?\",\n    \"output\": \"What was the change in balance as of January 1 between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the difference in the percentage increase of current year for 2017 and 2018?\",\n    \"output\": \"What was the change in current year increases between 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What was the difference in the yearly increases of current year between 2017 and 2018?\",\n    \"output\": \"What was the change in current year increases between 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What was the total amount spent on depreciation in the year 2019?\",\n    \"output\": \"What was the depreciation cost in 2019?\"\n  },\n  {\n    \"input\": \"What was the change in depreciation between 2018 and 2019?\",\n    \"output\": \"What was the difference in the amount of depreciation incurred in 2018 compared to 2019?\"\n  },\n  {\n    \"input\": \"What was the gross amount of accumulated depreciation at the beginning of 2017 and how did it change by the beginning of 2018?\",\n    \"output\": \"What was the change in Gross amount of accumulated depreciation at beginning between 2017 and 2018?\"\n  },\n  {\n    \"input\": \"Which specific years have the statute of limitations expired on certain unrecognized tax benefits?\",\n    \"output\": \"Which years did the statute of limitations on certain unrecognized tax benefits lapse?\"\n  },\n  {\n    \"input\": \"In 2019, what was the total value of receivables that had not yet been invoiced to customers?\",\n    \"output\": \"What were the unbilled receivables in 2019?\"\n  },\n  {\n    \"input\": \"What was the change in Unbilled receivables between 2018 and 2019?\",\n    \"output\": \"What was the net difference in Unbilled receivables from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the change in the investments into Nippon Yttrium Co., Ltd (\\\"NYC\\\") between 2018 and 2019?\",\n    \"output\": \"\\\"What was the difference in the amount of money invested in Nippon Yttrium Co., Ltd (\\\"NYC\\\") from 2018 to 2019?\\\"\"\n  },\n  {\n    \"input\": \"What was the change in the investments into Nippon Yttrium Co., Ltd (\\\"NYC\\\") between 2018 and 2019?\",\n    \"output\": \"What was the difference in the amount of investments made into Nippon Yttrium Co., Ltd (\\\"NYC\\\") during the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in total investments between 2018 and 2019?\",\n    \"output\": \"What was the exact percentage difference in the total amount of investments from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage increase or decrease in Pro forma revenues from 2017 to 2018?\",\n    \"output\": \"What was the change in Pro forma revenues between 2017 and 2018?\"\n  },\n  {\n    \"input\": \"How many years did net income exceed $100,000 thousand?\",\n    \"output\": \"For how many consecutive years has the net income surpassed $100,000 thousand?\"\n  },\n  {\n    \"input\": \"How many years did net income exceed $100,000 thousand?\",\n    \"output\": \"For how many consecutive years has the net income been greater than $100,000 thousand?\"\n  },\n  {\n    \"input\": \"What was the specific financial difference in adjustments made to the defined benefit post-retirement plan between the years 2017 and 2019?\",\n    \"output\": \"What was the change in Defined benefit post-retirement plan adjustments between 2017 and 2019?\"\n  },\n  {\n    \"input\": \"What was the difference in the adjustments made to Defined Benefit post-retirement plans between the years 2017 and 2019?\",\n    \"output\": \"What was the change in Defined benefit post-retirement plan adjustments between 2017 and 2019?\"\n  },\n  {\n    \"input\": \"What were the total sales revenues in Hong Kong in the year 2018?\",\n    \"output\": \"What was the net sales in Hong Kong in 2018?\"\n  },\n  {\n    \"input\": \"What was the exact amount of net sales generated in Singapore during the year 2017?\",\n    \"output\": \"What was the net sales in Singapore in 2017?\"\n  },\n  {\n    \"input\": \"What was the change in net sales in Mexico between 2017 and 2018?\",\n    \"output\": \"What was the difference in net sales in Mexico from 2017 to 2018 and how does it impact the overall sales performance in the country?\"\n  },\n  {\n    \"input\": \"In which specific years did the cumulative net sales in every region surpass one million dollars?\",\n    \"output\": \"Which years did the total net sales in all regions exceed $1,000,000 thousand?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the net sales from Other Countries between 2018 and 2019?\",\n    \"output\": \"What was the percentage difference in net sales from Other Countries in 2019 compared to 2018?\"\n  },\n  {\n    \"input\": \"What was the difference in the amount of Amortization of debt issuance costs between the years 2017 and 2018?\",\n    \"output\": \"What was the change in the Amortization of debt issuance costs between 2017 and 2018?\"\n  },\n  {\n    \"input\": \"For how many years did the interest expense on capital leases surpass $200 thousand?\",\n    \"output\": \"How many years did Interest expense on capital leases exceed $200 thousand?\"\n  },\n  {\n    \"input\": \"By what percentage did the total interest expense increase or decrease from 2018 to 2019?\",\n    \"output\": \"What was the percentage change in the total interest expense between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage increase or decrease in the total interest expense from 2018 to 2019?\",\n    \"output\": \"What was the percentage change in the total interest expense between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the total net actuarial loss or gain specifically associated with Other Benefits for the year 2018?\",\n    \"output\": \"What was the Net actuarial loss (gain) for Other Benefits in 2018?\"\n  },\n  {\n    \"input\": \"What was the change in net sales between 2017 and 2019?\",\n    \"output\": \"What is the difference in net sales from 2017 to 2019?\"\n  },\n  {\n    \"input\": \"For how many years was the cost of sales higher than $800,000 thousand?\",\n    \"output\": \"How many years did cost of sales exceed $800,000 thousand?\"\n  },\n  {\n    \"input\": \"For how many years has the cost of sales been higher than $800,000 thousand?\",\n    \"output\": \"How many years did cost of sales exceed $800,000 thousand?\"\n  },\n  {\n    \"input\": \"What was the precise net value of property, plant, and equipment in Japan specifically for the year 2019?\",\n    \"output\": \"What was the net amount of property, plant and equipment in Japan in 2019?\"\n  },\n  {\n    \"input\": \"What was the net amount of property, plant and equipment in Thailand in 2018?\",\n    \"output\": \"What was the exact net value of property, plant, and equipment in Thailand during the year 2018?\"\n  },\n  {\n    \"input\": \"How many years did Total net property, plant and equipment from Non-United States regions exceed $400,000 thousand?\",\n    \"output\": \"How many years was the total value of property, plant, and equipment from regions outside the United States greater than $400,000 thousand?\"\n  },\n  {\n    \"input\": \"What was the change in the net property, plant and equipment in China between 2018 and 2019?\",\n    \"output\": \"What was the exact difference in the net value of property, plant, and equipment in China during the period from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the net interest expense in 2019?\",\n    \"output\": \"What was the amount of interest expenses incurred in 2019, after subtracting any interest income earned during the same year?\"\n  },\n  {\n    \"input\": \"What was the change in the Loss on early extinguishment of debt between 2018 and 2019?\",\n    \"output\": \"What was the difference in the Loss on early extinguishment of debt from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage increase or decrease in the net interest expense from 2018 to 2019?\",\n    \"output\": \"What was the percentage change in the net interest expense between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What percentage increase or decrease occurred in the net interest expense from 2018 to 2019?\",\n    \"output\": \"What was the percentage change in the net interest expense between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the change in the Returns reserves between 2018 and 2019?\",\n    \"output\": \"What was the difference in the amount allocated to Returns reserves between the fiscal years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"How much did the Additions for tax positions change from 2018 to 2019 in the current year?\",\n    \"output\": \"What was the change in the Additions for tax positions of the current year between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the reflection of Pension and other post-retirement benefits?\",\n    \"output\": \"What did Pension and other post-retirement benefits reflect?\"\n  },\n  {\n    \"input\": \"What were the interest obligations that had payments due more than 5 years?\",\n    \"output\": \"What were the interest obligations that had payments due for a period longer than 5 years?\"\n  },\n  {\n    \"input\": \"What was the precise difference in the total amount between Employee separation liability and Restructuring liability?\",\n    \"output\": \"What was the difference in the total between Employee separation liability and Restructuring liability?\"\n  },\n  {\n    \"input\": \"What is the discrepancy in the total amount between Employee Separation Liability and Restructuring Liability?\",\n    \"output\": \"What was the difference in the total between Employee separation liability and Restructuring liability?\"\n  },\n  {\n    \"input\": \"What was the distinction in the payment amounts expected in Year 1 for interest obligations compared to operating lease obligations?\",\n    \"output\": \"What was the difference between the payments due by Year 1 between Interest obligations and operating lease obligations?\"\n  },\n  {\n    \"input\": \"What were the contrasting payment amounts between Year 1 for interest obligations and operating lease obligations?\",\n    \"output\": \"What was the difference between the payments due by Year 1 between Interest obligations and operating lease obligations?\"\n  },\n  {\n    \"input\": \"What is the percentage of total debt obligations in relation to the overall contractual obligations?\",\n    \"output\": \"What were the total debt obligations as a percentage of the total contractual obligations?\"\n  },\n  {\n    \"input\": \"What was the amount of cash generated or used in financing activities during the year 2017?\",\n    \"output\": \"What was the Net cash provided by (used in) financing activities in 2017?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the amount of cash, cash equivalents, and restricted cash from 2018 to 2019?\",\n    \"output\": \"What was the percentage change in the Net increase in cash, cash equivalents, and restricted cash between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"How much of a discount did Customer Advances receive?\",\n    \"output\": \"What was the amount of discount for Customer Advances?\"\n  },\n  {\n    \"input\": \"What was the total amount of discount provided for Customer Advances?\",\n    \"output\": \"What was the amount of discount for Customer Advances?\"\n  },\n  {\n    \"input\": \"What was the net gain or loss from acquisitions in the year 2018?\",\n    \"output\": \"What was the Acquisition (gain) loss in 2018?\"\n  },\n  {\n    \"input\": \"What were the restructuring charges in 2017?\",\n    \"output\": \"\\\"What were the specific charges related to restructuring expenses incurred during the year 2017?\\\"\"\n  },\n  {\n    \"input\": \"What were the restructuring charges in 2017?\",\n    \"output\": \"What were the specific charges related to the corporate restructuring that occurred in 2017?\"\n  },\n  {\n    \"input\": \"For how many years has the income tax expense from continuing operations exceeded $10,000 thousand?\",\n    \"output\": \"How many years did Total current income tax expense from continuing operations exceed $10,000 thousand?\"\n  },\n  {\n    \"input\": \"What was the difference in foreign income tax expense between 2017 and 2018?\",\n    \"output\": \"What was the change in current foreign income tax expense between 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What was the difference in the amount of foreign income tax expense recorded for the years 2017 and 2018?\",\n    \"output\": \"What was the change in current foreign income tax expense between 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the deferred federal income tax expense between 2018 and 2019?\",\n    \"output\": \"What was the percentage increase or decrease in the amount of deferred federal income tax expense from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What were the restructuring charges in 2019?\",\n    \"output\": \"What were the specific charges incurred as part of the restructuring efforts in the year 2019?\"\n  },\n  {\n    \"input\": \"What was the change in Personnel reduction costs between 2017 and 2018?\",\n    \"output\": \"What was the difference in the costs associated with reducing personnel between the years 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What is the percentage change in the costs associated with relocation and exit from 2018 to 2019?\",\n    \"output\": \"What was the percentage change in the Relocation and exit costs between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage difference in the Relocation and exit costs from 2018 to 2019?\",\n    \"output\": \"What was the percentage change in the Relocation and exit costs between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the change in Foreign income between 2017 and 2018?\",\n    \"output\": \"What was the difference in the amount of income from foreign sources recorded for the years 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What were the net sales in 2019?\",\n    \"output\": \"What was the total value of sales generated in 2019?\"\n  },\n  {\n    \"input\": \"What were the net sales in 2019?\",\n    \"output\": \"What was the total amount of revenue generated from sales in the year 2019?\"\n  },\n  {\n    \"input\": \"\\\"What was the gross margin percentage for the quarter ending on September 30th?\\\"\",\n    \"output\": \"What was the gross margin for the Sep-30 quarter?\"\n  },\n  {\n    \"input\": \"\\\"What was the exact gross margin percentage for the quarter ending on September 30th?\\\"\",\n    \"output\": \"What was the gross margin for the Sep-30 quarter?\"\n  },\n  {\n    \"input\": \"\\\"For which specific time periods did the Gross Margin exceed $120,000 thousand?\\\"\",\n    \"output\": \"Which quarters ended did the Gross Margin exceed $120,000 thousand?\"\n  },\n  {\n    \"input\": \"What was the total value of contract assets in 2019?\",\n    \"output\": \"What were the amount of contract assets in 2019?\"\n  },\n  {\n    \"input\": \"What were the specific values of contract assets recorded in the year 2019?\",\n    \"output\": \"What were the amount of contract assets in 2019?\"\n  },\n  {\n    \"input\": \"What was the difference in the amount of prepaid expenses between the fiscal years 2018 and 2019?\",\n    \"output\": \"What was the change in prepaid expenses between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the difference in the amount of prepaid expenses between the years 2018 and 2019?\",\n    \"output\": \"What was the change in prepaid expenses between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage difference in the variance between Software Solutions and Data and Analytics?\",\n    \"output\": \"What was the difference in the percent variance between Software Solutions and Data and Analytics?\"\n  },\n  {\n    \"input\": \"What was the reported amount of net trade receivables in 2017?\",\n    \"output\": \"What were the net trade receivables as reported in 2017?\"\n  },\n  {\n    \"input\": \"What was the adjustmentments for ASC 606 adoption for net computer software?\",\n    \"output\": \"What adjustments were made for the adoption of ASC 606 in relation to net computer software? Please provide a detailed explanation of the adjustments made for net computer software under ASC 606.\"\n  },\n  {\n    \"input\": \"What was the adjustmentments for ASC 606 adoption for net computer software?\",\n    \"output\": \"What were the adjustments made for the adoption of ASC 606 regarding net computer software?\"\n  },\n  {\n    \"input\": \"In which specific years did the total value of Deferred contract costs surpass $30 million?\",\n    \"output\": \"Which years did Deferred contract costs exceed $30 million?\"\n  },\n  {\n    \"input\": \"What was the change in cash equivalents between 2018 and 2019?\",\n    \"output\": \"What was the difference in the amount of cash equivalents reported for the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the change in cash equivalents between 2018 and 2019?\",\n    \"output\": \"What was the difference in the amount of cash equivalents held between the year 2018 and the year 2019?\"\n  },\n  {\n    \"input\": \"What was the change in Cash between 2018 and 2019?\",\n    \"output\": \"What was the difference in the Cash amount reported in the financial statements of the company between the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the amount of Equity method investments in 2019?\",\n    \"output\": \"How much money was invested through the equity method in 2019?\"\n  },\n  {\n    \"input\": \"Which years does the table provide information for the EBITDA by segment?\",\n    \"output\": \"For which years does the table contain EBITDA information segmented by different categories?\"\n  },\n  {\n    \"input\": \"Which years does the table provide information for the EBITDA by segment?\",\n    \"output\": \"For which specific years does the table provide EBITDA data segmented by different categories?\"\n  },\n  {\n    \"input\": \"What was the total value of EBITDA (Earnings Before Interest, Taxes, Depreciation, and Amortization) for the year 2018?\",\n    \"output\": \"What was the sum of the EBITDA in 2018?\"\n  },\n  {\n    \"input\": \"\\\"What was the specific monetary balance amount for Software Solutions in the year 2017?\\\"\",\n    \"output\": \"What was the balance in 2017 for Software Solutions?\"\n  },\n  {\n    \"input\": \"What was the specific balance amount, in USD, for Software Solutions in the year 2017?\",\n    \"output\": \"What was the balance in 2017 for Software Solutions?\"\n  },\n  {\n    \"input\": \"What was the specific amount of federal income tax expense incurred in the fiscal year of 2019?\",\n    \"output\": \"What was the current federal income tax expense in 2019?\"\n  },\n  {\n    \"input\": \"Which years does the table provide information for depreciation and amortization by segment?\",\n    \"output\": \"For which specific periods does the table offer data regarding depreciation and amortization broken down by segment?\"\n  },\n  {\n    \"input\": \"What was the value of the total assets before the implementation of ASC 606 revenue recognition standards?\",\n    \"output\": \"What were the total assets without the adoption of ASC 606?\"\n  },\n  {\n    \"input\": \"What was the sum of all assets before the implementation of ASC 606?\",\n    \"output\": \"What were the total assets without the adoption of ASC 606?\"\n  },\n  {\n    \"input\": \"How did the adoption of ASC 606 affect the difference between Total Assets and Total Liabilities?\",\n    \"output\": \"What was the difference the effect of ASC 606 Adoption between Total Assets and Total Liabilities?\"\n  },\n  {\n    \"input\": \"What is the impact of ASC 606 Adoption on the difference between Total Assets and Total Liabilities?\",\n    \"output\": \"What was the difference the effect of ASC 606 Adoption between Total Assets and Total Liabilities?\"\n  },\n  {\n    \"input\": \"What was the percentage change in cash flows from operating activities in 2019 compared to 2018?\",\n    \"output\": \"What was the cash flows provided by operating activities in 2019 as a percentage of the cash flow in 2018?\"\n  },\n  {\n    \"input\": \"For how many consecutive years did the Net increase in cash and cash equivalents remain positive?\",\n    \"output\": \"How many years was the Net  increase in cash and cash equivalents positive?\"\n  },\n  {\n    \"input\": \"What was the EBITDA difference specifically recorded between the Software Solutions division and the Data and Analytics division?\",\n    \"output\": \"What was the difference in the EBITDA between Software Solutions and Data and Analytics?\"\n  },\n  {\n    \"input\": \"What was the exact difference in EBITDA (earnings before interest, taxes, depreciation, and amortization) between Software Solutions and Data and Analytics?\",\n    \"output\": \"What was the difference in the EBITDA between Software Solutions and Data and Analytics?\"\n  },\n  {\n    \"input\": \"What specific expenses were included in the category of operating expenses for Corporate and Other?\",\n    \"output\": \"What did operating expenses for Corporate and Other include?\"\n  },\n  {\n    \"input\": \"What was the discrepancy or variance between the overall value of assets and the specific intangible asset of goodwill in the context of data and analytics?\",\n    \"output\": \"What was the difference between the total assets and goodwill from data and analytics?\"\n  },\n  {\n    \"input\": \"What was the difference between Operating expenses and Revenues from Software Solutions?\",\n    \"output\": \"What differentiates Operating expenses and Revenues from Software Solutions? Please provide a detailed explanation comparing the two.\"\n  },\n  {\n    \"input\": \"What was the percentage change in the Net earnings margin between 2017 and 2019?\",\n    \"output\": \"What was the percentage difference in the Net earnings margin from 2017 to 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the Net earnings margin between 2017 and 2019?\",\n    \"output\": \"What was the exact percentage difference in the net earnings margin from 2017 to 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in revenues between 2018 and 2019?\",\n    \"output\": \"What was the percentage increase or decrease in revenues from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in revenues between 2018 and 2019?\",\n    \"output\": \"What was the percentage difference in the amount of money earned in revenue from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What were the specific prepaid expenses incurred during the year 2018?\",\n    \"output\": \"What were the prepaid expenses in 2018?\"\n  },\n  {\n    \"input\": \"What were the specific categories of Other current assets in the financial statements for the year 2019?\",\n    \"output\": \"What were the Other current assets in 2019?\"\n  },\n  {\n    \"input\": \"What was the change in contract assets between 2018 and 2019?\",\n    \"output\": \"What was the difference in the amount of contract assets from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"How many years did prepaid expenses exceed $40.0 million?\",\n    \"output\": \"For how many years was the amount of prepaid expenses higher than $40.0 million?\"\n  },\n  {\n    \"input\": \"How many years did the operating margin exceed 20.0%?\",\n    \"output\": \"For how many years was the operating margin greater than 20.0%?\"\n  },\n  {\n    \"input\": \"What was the amount of Corporate Services in 2018?\",\n    \"output\": \"What was the specific amount of funds allocated to Corporate Services during the year 2018?\"\n  },\n  {\n    \"input\": \"What was the amount of Corporate Services in 2018?\",\n    \"output\": \"How much did Corporate Services amount to in the year 2018?\"\n  },\n  {\n    \"input\": \"What specific factors or elements were taken into consideration when calculating the debt?\",\n    \"output\": \"What did the calculation for Debt include?\"\n  },\n  {\n    \"input\": \"What was the difference between Total Debt and Total Interest on Debt?\",\n    \"output\": \"What is the distinction between the total amount of debt and the total interest incurred on that debt?\"\n  },\n  {\n    \"input\": \"How many contracts with a value exceeding $100 million were made during the period of 2021-2022?\",\n    \"output\": \"For the period 2021-2022, how many contractual obligations exceeded $100 million?\"\n  },\n  {\n    \"input\": \"What is the dollar difference between the actual cost and the estimated cost for software solutions?\",\n    \"output\": \"What was the dollar variance for software solutions?\"\n  },\n  {\n    \"input\": \"What was the specific revenue generated from the field of Data and Analytics during the year 2017?\",\n    \"output\": \"What was revenue from Data and Analytics in 2017?\"\n  },\n  {\n    \"input\": \"For how many consecutive years has revenue from Data and Analytics surpassed $150 million?\",\n    \"output\": \"How many years did revenue from Data and Analytics exceed $150 million?\"\n  },\n  {\n    \"input\": \"What was the average amount of money earned between the years 2017 and 2018?\",\n    \"output\": \"What was the average total revenue between 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What was the average revenue from Corporate and Other between 2017 and 2018?\",\n    \"output\": \"What was the average revenue generated from the Corporate and Other category during the two-year period encompassing 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What was the change in purchased software between 2018 and 2019?\",\n    \"output\": \"What was the difference in the amount of software that was bought between the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"For how many years has the internally developed software generated revenue exceeding $800 million?\",\n    \"output\": \"How many years did internally developed software exceed $800 million?\"\n  },\n  {\n    \"input\": \"For how long did internally developed software generate revenue exceeding $800 million?\",\n    \"output\": \"How many years did internally developed software exceed $800 million?\"\n  },\n  {\n    \"input\": \"What was the percentage change in net computer software between 2018 and 2019?\",\n    \"output\": \"What was the percentage increase or decrease in net computer software from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"How many years did Percentage of gross lease receivables exceed 2.0%?\",\n    \"output\": \"For how many years was the percentage of gross lease receivables above 2.0%?\"\n  },\n  {\n    \"input\": \"How many years did Percentage of gross lease receivables exceed 2.0%?\",\n    \"output\": \"For how long has the percentage of gross lease receivables been above 2.0%?\"\n  },\n  {\n    \"input\": \"What was the percentage change in Allowance for doubtful accounts between 2018 and 2019?\",\n    \"output\": \"What is the percentage difference in the allowance for doubtful accounts from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in Allowance for doubtful accounts between 2018 and 2019?\",\n    \"output\": \"What was the percentage increase or decrease in the Allowance for doubtful accounts from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in revenue from the Europe, Middle East, and Africa (EMEA) region between 2017 and 2018?\",\n    \"output\": \"What was the change for revenue from EMEA between 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What was the percentage change in revenue from the Europe, Middle East, and Africa (EMEA) region from 2017 to 2018?\",\n    \"output\": \"What was the change for revenue from EMEA between 2017 and 2018?\"\n  },\n  {\n    \"input\": \"Why would actual maturities differ from the contractual maturities?\",\n    \"output\": \"Why do the actual time periods until maturity sometimes vary from the specified contractual time periods?\"\n  },\n  {\n    \"input\": \"What is the distinction in amortized cost between debt investments held for less than 1 year compared to those held for a period ranging from 1 year to 5 years?\",\n    \"output\": \"What was the difference between the amortized cost of debt investments that were within 1 year and after 1 year through 5 years?\"\n  },\n  {\n    \"input\": \"What was the total amount of money allocated to the gradual write-off of acquired intangible assets throughout the year 2018?\",\n    \"output\": \"What was the total amortization of purchased intangible assets in 2018?\"\n  },\n  {\n    \"input\": \"What was the difference in the variance in dollars between Available-for-sale debt investments and net Other gains (losses)?\",\n    \"output\": \"What was the variance in dollars between the variance in Available-for-sale debt investments and the variance in net Other gains (losses)?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the net other gains (losses) between 2017 and 2018?\",\n    \"output\": \"What was the percent change in the net amount of other gains (losses) from 2017 to 2018?\"\n  },\n  {\n    \"input\": \"What were the dividends in 2018?\",\n    \"output\": \"What were the dividend payments made in the year 2018?\"\n  },\n  {\n    \"input\": \"What were the dividends in 2018?\",\n    \"output\": \"What was the total amount of dividends paid out in the year 2018?\"\n  },\n  {\n    \"input\": \"What units are being utilized in the table and what purpose do they serve in the given context?\",\n    \"output\": \"What are the units used in the table?\"\n  },\n  {\n    \"input\": \"What was the difference in the balance at the start of the fiscal year in 2017 compared to 2018?\",\n    \"output\": \"What was the change in Balance at beginning of fiscal year between 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What was the difference in the balance at the start of the fiscal year from 2017 to 2018?\",\n    \"output\": \"What was the change in Balance at beginning of fiscal year between 2017 and 2018?\"\n  },\n  {\n    \"input\": \"How does the company typically recommend analyzing their liquidity and capital resources for future periods?\",\n    \"output\": \"How does the company believe that their liquidity and capital resources in future periods should be analyzed?\"\n  },\n  {\n    \"input\": \"What differentiates operating leases with durations of less than 1 year from those lasting 1 to 3 years?\",\n    \"output\": \"What was the difference in operating leases between those that were less than 1 year and 1 to 3 years?\"\n  },\n  {\n    \"input\": \"How did the characteristics of operating leases differ between those with a duration of less than 1 year and those lasting from 1 to 3 years?\",\n    \"output\": \"What was the difference in operating leases between those that were less than 1 year and 1 to 3 years?\"\n  },\n  {\n    \"input\": \"What percentage of the total contractual obligations is represented by the senior notes?\",\n    \"output\": \"What was the total senior notes as a percentage of total contractual obligations?\"\n  },\n  {\n    \"input\": \"What is the period that has the greatest Operating leases?\",\n    \"output\": \"What is the time frame during which Operating leases are most prevalent?\"\n  },\n  {\n    \"input\": \"How does the company manage and minimize the potential financial loss related to credit risk in relation to derivatives?\",\n    \"output\": \"How does the company mitigate credit risk associated with derivatives?\"\n  },\n  {\n    \"input\": \"Which years does the table provide information for the company's outstanding derivatives?\",\n    \"output\": \"For which specific years does the table contain information regarding the outstanding derivatives of the company?\"\n  },\n  {\n    \"input\": \"Which years does the table provide information for the company's outstanding derivatives?\",\n    \"output\": \"For which specific years does the table present information about the company's exceptional derivatives?\"\n  },\n  {\n    \"input\": \"What was the change in the Net investment hedging instruments between 2018 and 2019?\",\n    \"output\": \"What was the difference in the amount of net investment hedging instruments between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the change in the Net investment hedging instruments between 2018 and 2019?\",\n    \"output\": \"What was the difference in the amount of Net investment hedging instruments between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the total amount of outstanding derivatives between 2018 and 2019?\",\n    \"output\": \"What was the percentage difference in the total value of outstanding derivatives from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"How much was the total amount spent on Purchased Considerations specifically for BroadSoft?\",\n    \"output\": \"What was the amount of Purchased Considerations for BroadSoft?\"\n  },\n  {\n    \"input\": \"What were the differences in terms of Purchase Consideration specifically between Springpath and Broadsoft? Please provide additional details and insights to help understand this comparison better.\",\n    \"output\": \"What was the difference in Purchase Consideration between Springpath and Broadsoft?\"\n  },\n  {\n    \"input\": \"What was the specific variance in terms of Purchase Consideration when comparing Springpath and Broadsoft?\",\n    \"output\": \"What was the difference in Purchase Consideration between Springpath and Broadsoft?\"\n  },\n  {\n    \"input\": \"What was the amount of unearned income specifically derived from lease receivables in the year 2019?\",\n    \"output\": \"What was unearned income from lease receivables in 2019?\"\n  },\n  {\n    \"input\": \"\\\"What was the amount of unearned income generated specifically from lease receivables during the year 2019?\\\"\",\n    \"output\": \"What was unearned income from lease receivables in 2019?\"\n  },\n  {\n    \"input\": \"What was the difference in the reported total between current and noncurrent financing receivables?\",\n    \"output\": \"How much greater or lesser was the total reported amount of financing receivables between current and noncurrent categories?\"\n  },\n  {\n    \"input\": \"What were the inclusions of foreign exchange transactions and other related activities?\",\n    \"output\": \"What did foreign exchange and other include?\"\n  },\n  {\n    \"input\": \"What was the difference, expressed as a percentage, in the balance of financing receivables at the end of fiscal year 2018 compared to the balance at the end of fiscal year 2019?\",\n    \"output\": \"What was the percentage change in the balance at the end of fiscal year for financing receivables between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What percentage increase or decrease occurred in the balance of financing receivables at the end of the fiscal year from 2018 to 2019?\",\n    \"output\": \"What was the percentage change in the balance at the end of fiscal year for financing receivables between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"How many years did Revenue exceed $50,000 million?\",\n    \"output\": \"How many consecutive years has the revenue exceeded $50,000 million?\"\n  },\n  {\n    \"input\": \"How many years did Revenue exceed $50,000 million?\",\n    \"output\": \"For how many complete years has the revenue consistently surpassed the $50,000 million mark?\"\n  },\n  {\n    \"input\": \"What is the difference in the amount of Goodwill between Duo and Luxtera and how does it impact their overall value?\",\n    \"output\": \"What was the difference in Goodwill between Duo and Luxtera?\"\n  },\n  {\n    \"input\": \"What were the variations in the level of Goodwill between Duo and Luxtera?\",\n    \"output\": \"What was the difference in Goodwill between Duo and Luxtera?\"\n  },\n  {\n    \"input\": \"What was the discrepancy in the value of purchased intangible assets between Luxtera and other companies?\",\n    \"output\": \"What was the difference in Purchased intangible assets between Luxtera and Others?\"\n  },\n  {\n    \"input\": \"What were the variations in the value of purchased intangible assets specifically between Luxtera and other companies/entities?\",\n    \"output\": \"What was the difference in Purchased intangible assets between Luxtera and Others?\"\n  },\n  {\n    \"input\": \"What is the acquisition with the highest Purchase Consideration?\",\n    \"output\": \"What is the highest purchase consideration acquisition?\"\n  },\n  {\n    \"input\": \"What was the total value of technology products purchased before accounting for any deductions or expenses?\",\n    \"output\": \"What was the amount of gross purchased technology?\"\n  },\n  {\n    \"input\": \"What is the net value difference between Technology and Customer relationships?\",\n    \"output\": \"What was the difference in the net values between Technology and Customer relationships?\"\n  },\n  {\n    \"input\": \"What was the numerical difference in the net values specifically attributed to Technology and Customer relationships in relation to a certain context?\",\n    \"output\": \"What was the difference in the net values between Technology and Customer relationships?\"\n  },\n  {\n    \"input\": \"Which years does the table provide information for the company's activity related to their product warranty liability?\",\n    \"output\": \"For which specific years does the table display data regarding the company's product warranty liability?\"\n  },\n  {\n    \"input\": \"Which years does the table provide information for the company's activity related to their product warranty liability?\",\n    \"output\": \"For which years does the table displayed provide information about the company's activity in terms of their product warranty liability?\"\n  },\n  {\n    \"input\": \"What were the Acquisitions and divestitures in 2019?\",\n    \"output\": \"What were the acquisitions and divestitures that took place in the year 2019?\"\n  },\n  {\n    \"input\": \"What was the difference in the amount allocated for warranty provisions issued in 2017 compared to 2018?\",\n    \"output\": \"What was the change in Provisions for warranties issued between 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What was the difference in the amount allocated for warranties issued between the years 2017 and 2018?\",\n    \"output\": \"What was the change in Provisions for warranties issued between 2017 and 2018?\"\n  },\n  {\n    \"input\": \"For which specific years does the table offer data on cash and cash equivalents as well as investments?\",\n    \"output\": \"Which years does the table provide information for  cash and cash equivalents and investments?\"\n  },\n  {\n    \"input\": \"What was the change in the value of available-for-sale debt investments, indicating whether there was an increase or decrease?\",\n    \"output\": \"What was the increase (decrease) in available-for-sale debt investments?\"\n  },\n  {\n    \"input\": \"In 2019, how did the distinction between cash and cash equivalents and available-for-sale debt investments manifest and how were they different from each other?\",\n    \"output\": \"What was the difference between cash and cash equivalents and Available-for-sale debt investments in 2019?\"\n  },\n  {\n    \"input\": \"What differentiates cash and cash equivalents from available-for-sale debt investments in the financial records for the year 2019?\",\n    \"output\": \"What was the difference between cash and cash equivalents and Available-for-sale debt investments in 2019?\"\n  },\n  {\n    \"input\": \"What was the change in Interest expense between 2017 and 2018?\",\n    \"output\": \"What is the difference in the amount of interest expense incurred from 2017 to 2018?\"\n  },\n  {\n    \"input\": \"What was the change in Interest expense between 2017 and 2018?\",\n    \"output\": \"What was the difference in the amount of money paid for interest in 2017 compared to 2018?\"\n  },\n  {\n    \"input\": \"What was the difference in the amount of income earned from interest between the years 2018 and 2019?\",\n    \"output\": \"What was the change in Interest income between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the amount of federal net operating loss carryforwards that the company had for income tax purposes in the year 2019?\",\n    \"output\": \"What was the company's federal net operating loss carryforwards for income tax purposes in 2019?\"\n  },\n  {\n    \"input\": \"What was the amount of the company's federal net operating loss carryforwards specifically for income tax purposes in the year 2019?\",\n    \"output\": \"What was the company's federal net operating loss carryforwards for income tax purposes in 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in total deferred tax assets between 2018 and 2019?\",\n    \"output\": \"What is the percentage increase or decrease in the total amount of deferred tax assets from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in total deferred tax assets between 2018 and 2019?\",\n    \"output\": \"What was the percentage increase or decrease in the total amount of deferred tax assets from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"In fiscal year 2019, what was the total amount of net interest expense recognized by the company?\",\n    \"output\": \"How much net interest expense did the company recognize in fiscal year 2019?\"\n  },\n  {\n    \"input\": \"How much interest expense, after deducting interest income, did the company record as a net amount during fiscal year 2019?\",\n    \"output\": \"How much net interest expense did the company recognize in fiscal year 2019?\"\n  },\n  {\n    \"input\": \"What were the Additions for tax positions of prior years in 2019?\",\n    \"output\": \"What were the additions made to the tax positions of previous years in the year 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in net income between 2018 and 2019?\",\n    \"output\": \"What was the percentage difference in net income from 2018 to 2019 and how can it be calculated?\"\n  },\n  {\n    \"input\": \"Why has the direct effect of foreign currency fluctuations on revenue not been material?\",\n    \"output\": \"Why has the direct impact of changes in foreign currency exchange rates on our revenue not had a significant financial effect so far? Please provide an explanation for the lack of material impact despite fluctuations in currency values.\"\n  },\n  {\n    \"input\": \"Why has the direct effect of foreign currency fluctuations on revenue not been material?\",\n    \"output\": \"Why has the impact of foreign currency fluctuations on revenue not been significant enough to have a material effect?\"\n  },\n  {\n    \"input\": \"What was the difference in the fair value of forward contracts that were sold between the years 2018 and 2019?\",\n    \"output\": \"What was the change in the fair value of sold forward contracts between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"Why does the company prioritize inventory and supply chain management as a specific area of focus?\",\n    \"output\": \"Why is inventory and supply chain management an area of focus for the company?\"\n  },\n  {\n    \"input\": \"Why does the company prioritize inventory and supply chain management as a focal point for its operations?\",\n    \"output\": \"Why is inventory and supply chain management an area of focus for the company?\"\n  },\n  {\n    \"input\": \"What was the total sum of purchase commitments made in the year 2018?\",\n    \"output\": \"What were the total purchase commitments in 2018?\"\n  },\n  {\n    \"input\": \"What was the change in purchase commitments that were less than 1 year between 2018 and 2019?\",\n    \"output\": \"What was the difference in purchase commitments of less than 1 year between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the change in purchase commitments that were less than 1 year between 2018 and 2019?\",\n    \"output\": \"What was the difference in purchase commitments shorter than one year between the year 2018 and the year 2019?\"\n  },\n  {\n    \"input\": \"For how long did contracts ranging from 1 to 3 years surpass a cumulative worth of $700 million?\",\n    \"output\": \"How many years did commitments that were 1 to 3 years exceed $700 million?\"\n  },\n  {\n    \"input\": \"What was the percentage change in product revenue for the APJC region from 2017 to 2018?\",\n    \"output\": \"What was the change in the product revenue from APJC between 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What was the change in the product revenue from Americas between 2017 and 2018?\",\n    \"output\": \"What was the difference in the amount of revenue generated from products in the Americas region between the years 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What was the numerical difference in the total value of net property and equipment in International regions between the years 2017 and 2018?\",\n    \"output\": \"What was the change in net property and equipment from International regions between 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What is the percentage increase or decrease in the total net property and equipment from 2018 to 2019?\",\n    \"output\": \"What was the percentage change in the total net property and equipment between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in revenue generated from security services between 2017 and 2018?\",\n    \"output\": \"What was the change in revenue from security between 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What was the total amount paid for the shares during the period from April 28, 2019 to May 25, 2019 and May 26, 2019 to June 22, 2019?\",\n    \"output\": \"What was the total payment for the shares from April 28, 2019 to May 25, 2019 and from May 26, 2019 to June 22, 2019?\"\n  },\n  {\n    \"input\": \"What was the total amount paid for the shares during the period from April 28, 2019 to May 25, 2019 and May 26, 2019 to June 22, 2019?\",\n    \"output\": \"What was the overall payment for the shares between April 28, 2019 and May 25, 2019, as well as between May 26, 2019 and June 22, 2019?\"\n  },\n  {\n    \"input\": \"What was the average total amount paid for the shares during the period from April 28, 2019 to May 25, 2019 and May 26, 2019 to June 22, 2019?\",\n    \"output\": \"What was the average total payment for shares from April 28, 2019 to June 22, 2019, and how does it compare between the periods April 28, 2019 to May 25, 2019 and May 26, 2019 to June 22, 2019?\"\n  },\n  {\n    \"input\": \"What was the difference in the balance of Americas between the years 2018 and 2019?\",\n    \"output\": \"What was the change in balance from Americas between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the difference in America's balance between 2018 and 2019 and how did it change?\",\n    \"output\": \"What was the change in balance from Americas between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"Which years does the table provide information for the company's available-for-sale debt investments and equity investments?\",\n    \"output\": \"For which time period does the table provide data on the available-for-sale debt investments and equity investments of the company?\"\n  },\n  {\n    \"input\": \"\\\"What were the total provisions, also known as benefits, provided in this context?\",\n    \"output\": \"What were the total provisions (benefits)?\"\n  },\n  {\n    \"input\": \"What was the difference between total provisions (benefits) and net recoveries (write-offs)?\",\n    \"output\": \"What differentiates total provisions, also referred to as benefits, from net recoveries, which are write-offs?\"\n  },\n  {\n    \"input\": \"What was the allowance for credit loss for financed service contracts as a percentage of total allowance for credit loss in 2018?\",\n    \"output\": \"In 2018, what was the proportion or ratio of the allowance for credit loss specifically assigned to financed service contracts compared to the total allowance for credit loss?\"\n  },\n  {\n    \"input\": \"What was the allowance for credit loss for financed service contracts as a percentage of total allowance for credit loss in 2018?\",\n    \"output\": \"What was the percentage of the total allowance for credit loss in 2018 that was allocated specifically for financed service contracts?\"\n  },\n  {\n    \"input\": \"What is the percentage difference in Loss from continuing operations before income tax between the years 2018 and 2019?\",\n    \"output\": \"What is the percentage change in the Loss from continuing operations before income tax from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage difference in the Loss from continuing operations before income tax between the years 2018 and 2019?\",\n    \"output\": \"What is the percentage change in the Loss from continuing operations before income tax from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage change in the income tax benefit from 2018 to 2019?\",\n    \"output\": \"What is the percentage difference in the income tax benefit received between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"In which year did the net loss for the year exceed the net loss of any other year?\",\n    \"output\": \"In which year is there a greater net loss for the year?\"\n  },\n  {\n    \"input\": \"What does the net deferred tax liabilities include?\",\n    \"output\": \"What is included in the calculation of net deferred tax liabilities?\"\n  },\n  {\n    \"input\": \"What is the exact percentage change in the net deferred tax liabilities from 2018 to 2019?\",\n    \"output\": \"What is the percentage change in the net deferred tax liabilities from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the parent entity's current ratio in 2019?\",\n    \"output\": \"What is the current ratio of the parent entity in 2019?\"\n  },\n  {\n    \"input\": \"What was the parent entity's current ratio in 2019? Please provide the numerical value.\",\n    \"output\": \"What is the current ratio of the parent entity in 2019?\"\n  },\n  {\n    \"input\": \"What was the debts to assets ratio of the parent entity in the year 2018? Can you provide the specific ratio value or percentage?\",\n    \"output\": \"What is the debts to assets ratio of the parent entity in 2018?\"\n  },\n  {\n    \"input\": \"What is the debt-to-assets ratio of the parent company for the year 2018?\",\n    \"output\": \"What is the debts to assets ratio of the parent entity in 2018?\"\n  },\n  {\n    \"input\": \"What is the percentage change in the net cash generated or used for investing activities?\",\n    \"output\": \"What is the percentage change in the net cash provided from investing activities?\"\n  },\n  {\n    \"input\": \"What is the change in the net cash provided from operating activities from 2018 to 2019?\",\n    \"output\": \"What is the difference between the net cash provided from operating activities in 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the change in the net cash provided from operating activities from 2018 to 2019?\",\n    \"output\": \"What is the difference in the amount of net cash provided from operating activities between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the change in Net change in cash and cash equivalent from 2018 to 2019?\",\n    \"output\": \"What is the difference in the amount of cash and cash equivalents between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the change in Net change in cash and cash equivalent from 2018 to 2019?\",\n    \"output\": \"What is the difference in the net change in cash and cash equivalents between the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage increase or decrease in the value of current assets from 2018 to 2019?\",\n    \"output\": \"What is the percentage change in current assets from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the current ratio in the year 2019 and what factors should be considered while determining it?\",\n    \"output\": \"What is the current ratio in 2019?\"\n  },\n  {\n    \"input\": \"What was the debts to assets ratio for the year 2019?\",\n    \"output\": \"What is the debts to assets ratio in 2019?\"\n  },\n  {\n    \"input\": \"\\\"What was the debts to assets ratio for the year 2019?\\\"\",\n    \"output\": \"What is the debts to assets ratio in 2019?\"\n  },\n  {\n    \"input\": \"What is the purpose and significance of trial commissions, and how are they defined?\",\n    \"output\": \"What are trial commissions?\"\n  },\n  {\n    \"input\": \"\\\"What is the purpose and definition of trial commissions?\\\"\",\n    \"output\": \"What are trial commissions?\"\n  },\n  {\n    \"input\": \"\\\"How does the Group calculate or determine its total income or financial proceeds?\\\"\",\n    \"output\": \"How does the Group determine its revenue?\"\n  },\n  {\n    \"input\": \"\\\"How does the Group calculate or establish its revenue? Provide details about the methods or criteria used for determining the Group's revenue.\\\"\",\n    \"output\": \"How does the Group determine its revenue?\"\n  },\n  {\n    \"input\": \"What is the percentage difference in the amount of upfront fees charged from 2018 to 2019?\",\n    \"output\": \"What is the percentage change in upfront fees from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage difference in upfront fees between 2018 and 2019?\",\n    \"output\": \"What is the percentage change in upfront fees from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the change in the Business combination reserve between 2018 and 2019?\",\n    \"output\": \"What is the monetary difference in the Business combination reserve from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage change in the total reserves from 2018 to 2019?\",\n    \"output\": \"By what percentage did the total reserves increase or decrease from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage increase or decrease in the trail commission asset comparing the values from 2018 and 2019?\",\n    \"output\": \"What is the percentage change in the current trail commission asset from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage change in the cash receipts from 2018 to 2019?\",\n    \"output\": \"What is the percentage increase or decrease in the cash receipts comparing the amounts in 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What are the payroll tax rates for both 2019 and 2018?\",\n    \"output\": \"What is the payroll tax for 2019 and 2018 respectively?\"\n  },\n  {\n    \"input\": \"\\\"What was the amount of post-tax loss incurred from discontinued operations in the year 2019?\\\"\",\n    \"output\": \"What is the Post-tax loss of  discontinued operations in 2019?\"\n  },\n  {\n    \"input\": \"\\\"In which year did the revenue increase compared to other years?\\\"\",\n    \"output\": \"In which year is there a higher revenue?\"\n  },\n  {\n    \"input\": \"In which specific year did the revenue increase, resulting in a higher amount compared to previous years?\",\n    \"output\": \"In which year is there a higher revenue?\"\n  },\n  {\n    \"input\": \"What is the percentage difference in the amount of lease liabilities between 2018 and 2019?\",\n    \"output\": \"What is the percentage change in the current lease liabilities from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the exact percentage difference in the lease liabilities as of 2018 compared to 2019?\",\n    \"output\": \"What is the percentage change in the current lease liabilities from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What method does the Company use to determine and calculate the amount for its provision?\",\n    \"output\": \"How does the Company calculate its provision?\"\n  },\n  {\n    \"input\": \"How does the Company determine the amount of money set aside for provisions and what factors are taken into consideration during the calculation process?\",\n    \"output\": \"How does the Company calculate its provision?\"\n  },\n  {\n    \"input\": \"What is the percentage change in the contract assets from 2018 to 2019?\",\n    \"output\": \"What is the percentage change in the value of the contract assets from the year 2018 to the year 2019?\"\n  },\n  {\n    \"input\": \"In which year does the number of outstanding shares have a higher value at the start of the period as compared to other years?\",\n    \"output\": \"In which year is the number of outstanding shares at the beginning of the period higher?\"\n  },\n  {\n    \"input\": \"What is the pre-tax discount rate that was utilized to determine the value of projected cash flows for the year 2019?\",\n    \"output\": \"What is the pre-tax discount rate applied to cash flow projections in 2019?\"\n  },\n  {\n    \"input\": \"What was the pre-tax discount rate used to calculate cash flow projections in 2019?\",\n    \"output\": \"What is the pre-tax discount rate applied to cash flow projections in 2019?\"\n  },\n  {\n    \"input\": \"In which year is the health CGU higher?\",\n    \"output\": \"\\\"In which specific year does the health Comprehensive Grand Unit (CGU) exhibit a higher value compared to other years?\\\"\"\n  },\n  {\n    \"input\": \"In which year is the car CGU higher?\",\n    \"output\": \"\\\"In which specific year does the car CGU (Car Gross Utilization) exhibit a higher value compared to other years? Please provide the year.\\\"\"\n  },\n  {\n    \"input\": \"What is the post-employment benefits in 2018?\",\n    \"output\": \"What were the post-employment benefits in 2018, and can you provide more details about them?\"\n  },\n  {\n    \"input\": \"What were the share-based payment transactions and their impact in 2019?\",\n    \"output\": \"What is the share-based payments in 2019?\"\n  },\n  {\n    \"input\": \"What were the share-based payments in 2019 and could you provide further details on this topic?\",\n    \"output\": \"What is the share-based payments in 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage change in the post-employment benefits from 2018 to 2019?\",\n    \"output\": \"What is the percentage difference in post-employment benefits between the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What specific expenses or expenditure categories are included in the unallocated corporate costs for the current year?\",\n    \"output\": \"What do the unalloacated corporate costs include in the current year?\"\n  },\n  {\n    \"input\": \"What is the exact percentage change in revenue in Australia from the year 2018 to the year 2019?\",\n    \"output\": \"What is the percentage change in the revenue in Australia from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the revenue percentage change experienced in Asia between 2018 and 2019?\",\n    \"output\": \"What is the percentage change in the revenue in Asia from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage increase or decrease in revenue in the Asian region from 2018 to 2019?\",\n    \"output\": \"What is the percentage change in the revenue in Asia from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the total number of outstanding stock options for VMware in 2017?\",\n    \"output\": \"What was the outstanding number of shares for VMware stock options in 2017?\"\n  },\n  {\n    \"input\": \"What was the total number of shares allotted for VMware stock options in the year 2017?\",\n    \"output\": \"What was the outstanding number of shares for VMware stock options in 2017?\"\n  },\n  {\n    \"input\": \"What was the number of granted shares from Pivotal Stock Options in 2018?\",\n    \"output\": \"\\\"How many shares were granted by Pivotal Stock Options during the year 2018?\\\"\"\n  },\n  {\n    \"input\": \"For how long did the Weighted-Average Exercise Price (per share) for VMware stock options remain above $60.00?\",\n    \"output\": \"How many years did the outstanding Weighted-Average Exercise Price (per share) for VMware stock options exceed $60.00?\"\n  },\n  {\n    \"input\": \"What was the difference in the overall amount of money earned by the company in 2018 compared to 2019?\",\n    \"output\": \"What was the change in total revenue between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the difference in total revenue between the years 2018 and 2019?\",\n    \"output\": \"What was the change in total revenue between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"How many years did total revenue exceed $500 million?\",\n    \"output\": \"For how many consecutive years has the total revenue been greater than $500 million?\"\n  },\n  {\n    \"input\": \"What was the percentage change in net income between 2019 and 2020?\",\n    \"output\": \"What was the percentage difference in net income from 2019 to 2020, and how can we calculate it accurately?\"\n  },\n  {\n    \"input\": \"\\\"What was the variation in the increase in goodwill associated with mergers and acquisitions from 2019 to 2020?\\\"\",\n    \"output\": \"What was the change in the Increase in goodwill related to business combinations between 2019 and 2020?\"\n  },\n  {\n    \"input\": \"What was the difference in balance at the start of 2019 and 2020?\",\n    \"output\": \"What was the change in balance at the beginning of the year between 2019 and 2020?\"\n  },\n  {\n    \"input\": \"What was the difference in balance at the beginning of 2019 and 2020?\",\n    \"output\": \"What was the change in balance at the beginning of the year between 2019 and 2020?\"\n  },\n  {\n    \"input\": \"What did the Senior Notes consists of?\",\n    \"output\": \"What were the contents or components of the Senior Notes?\"\n  },\n  {\n    \"input\": \"What was the difference between total Senior Notes and total Notes payable to Dell?\",\n    \"output\": \"What is the distinction between the combined value of Senior Notes and the combined value of Notes payable to Dell?\"\n  },\n  {\n    \"input\": \"What differentiates the total amount of Term Loans from the total amount of Future Lease Commitments?\",\n    \"output\": \"What was the difference between total Term Loans and total Future Lease Commitments?\"\n  },\n  {\n    \"input\": \"What differentiates the total amount of Term Loans from the total value of Future Lease Commitments? Elaborate on the distinctions between these two financial concepts.\",\n    \"output\": \"What was the difference between total Term Loans and total Future Lease Commitments?\"\n  },\n  {\n    \"input\": \"Which specific time periods are covered in the table containing data on long-lived assets categorized by geographic area? These assets primarily consist of net property and equipment.\",\n    \"output\": \"Which years does the table include information for long-lived assets by geographic area, which primarily include property and equipment, net?\"\n  },\n  {\n    \"input\": \"How long has the outstanding balance owed to related parties been surpassing $100 million?\",\n    \"output\": \"How many years did current amounts due to related parties exceed $100 million?\"\n  },\n  {\n    \"input\": \"What was the cumulative amount of sales invoices acknowledged and recorded as revenue throughout the entire fiscal year of 2020?\",\n    \"output\": \"What was the total billings recognized during the year ended 2020?\"\n  },\n  {\n    \"input\": \"\\\"What was the exact amount of billings that were recorded and acknowledged as revenue throughout the entirety of the year that concluded in 2020?\\\"\",\n    \"output\": \"What was the total billings recognized during the year ended 2020?\"\n  },\n  {\n    \"input\": \"What was the change in Unearned software maintenance revenue between 2019 and 2020?\",\n    \"output\": \"\\\"What was the difference in Unearned software maintenance revenue from 2019 to 2020 and how did it change?\\\"\"\n  },\n  {\n    \"input\": \"What was the change in Unearned software maintenance revenue between 2019 and 2020?\",\n    \"output\": \"What was the difference in Unearned software maintenance revenue from 2019 to 2020?\"\n  },\n  {\n    \"input\": \"What is the percentage of unearned revenue in the total revenue for the years 2019 and 2020?\",\n    \"output\": \"What was the percentage of total unearned revenue between 2019 and 2020?\"\n  },\n  {\n    \"input\": \"\\\"What was the percentage of unearned revenue as a proportion of the total revenue generated in the years 2019 and 2020?\\\"\",\n    \"output\": \"What was the percentage of total unearned revenue between 2019 and 2020?\"\n  },\n  {\n    \"input\": \"What was the average price per share in 2018, taking into account the weight assigned to each share?\",\n    \"output\": \"What was the Weighted-average price per share in 2018?\"\n  },\n  {\n    \"input\": \"For how many years did the repurchase of Class A common stock surpass a cumulative value of $10,000 million?\",\n    \"output\": \"How many years did Class A common stock repurchased exceed $10,000 million?\"\n  },\n  {\n    \"input\": \"For how many years did the repurchase of Class A common stock amount to more than $10 billion?\",\n    \"output\": \"How many years did Class A common stock repurchased exceed $10,000 million?\"\n  },\n  {\n    \"input\": \"For which years does the table provide details regarding the exclusion of weighted-average common share equivalents of Class A common stock from the calculations of diluted net income per share?\",\n    \"output\": \"Which years does the table provide information for the weighted-average common share equivalents of Class A common stock that were excluded from the diluted net income per share calculations?\"\n  },\n  {\n    \"input\": \"What were the employee stock options for the year 2019?\",\n    \"output\": \"What was the Employee stock options in 2019?\"\n  },\n  {\n    \"input\": \"What were the employee stock options offered in 2019?\",\n    \"output\": \"What was the Employee stock options in 2019?\"\n  },\n  {\n    \"input\": \"How does Dell engage in the procurement of products and services from other companies?\",\n    \"output\": \"How does Dell purchase products and services from the company?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the internal-use revenue between 2019 and 2020?\",\n    \"output\": \"What is the percentage difference in internal-use revenue from 2019 to 2020?\"\n  },\n  {\n    \"input\": \"Which years does the table provide information for accrued expenses and other?\",\n    \"output\": \"For which specific years does the provided table offer information related to accrued expenses and other similar items?\"\n  },\n  {\n    \"input\": \"Which years does the table provide information for accrued expenses and other?\",\n    \"output\": \"For which specific years does the table contain data regarding accrued expenses and other items?\"\n  },\n  {\n    \"input\": \"What was the difference in the total amount of expenses that accumulated over time between the years 2019 and 2020?\",\n    \"output\": \"What was the change in the total accrued expenses between 2019 and 2020?\"\n  },\n  {\n    \"input\": \"What is the difference in the total amount of expenses that accumulated over time between 2019 and 2020?\",\n    \"output\": \"What was the change in the total accrued expenses between 2019 and 2020?\"\n  },\n  {\n    \"input\": \"In 2019, what were the specific support and administrative costs incurred by Dell's subsidiary?\",\n    \"output\": \"What was the Dell subsidiary support and administrative costs in 2019?\"\n  },\n  {\n    \"input\": \"What was the change in Purchases and leases of products and purchases of services between 2018 and 2019?\",\n    \"output\": \"What was the difference in the amount spent on purchasing and leasing products as well as purchasing services from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the change in Purchases and leases of products and purchases of services between 2018 and 2019?\",\n    \"output\": \"What was the difference in the amount spent on purchasing and leasing products and purchasing services from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"For how many consecutive years did Dell subsidiary incur support and administrative costs exceeding $200 million?\",\n    \"output\": \"How many years did Dell subsidiary support and administrative costs exceed $200 million?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the Dell subsidiary support and administrative costs between 2019 and 2020?\",\n    \"output\": \"What percentage increase or decrease was observed in the support and administrative costs of Dell's subsidiary from 2019 to 2020?\"\n  },\n  {\n    \"input\": \"What was the significance or meaning of the aggregate intrinsic value?\",\n    \"output\": \"What did the aggregate intrinsic value represent?\"\n  },\n  {\n    \"input\": \"What differentiates outstanding options that are both exercisable and vested as well as expected to vest?\",\n    \"output\": \"What was the difference between outstanding options that were exercisable and vested and expected to vest?\"\n  },\n  {\n    \"input\": \"What was the specific amount allocated for the federal income tax provision in the year 2019?\",\n    \"output\": \"What was the current federal income tax provision in 2019?\"\n  },\n  {\n    \"input\": \"What was the specific provision for federal income tax in 2019?\",\n    \"output\": \"What was the current federal income tax provision in 2019?\"\n  },\n  {\n    \"input\": \"What was the change in the current federal income tax provision between 2018 and 2019?\",\n    \"output\": \"What was the difference in the amount allocated for federal income tax in 2018 compared to 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the Total income tax provision between 2018 and 2019?\",\n    \"output\": \"What was the precise percentage increase or decrease in the Total income tax provision from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the Total income tax provision between 2018 and 2019?\",\n    \"output\": \"What is the percentage difference in the Total income tax provision from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the specific monetary value of Restricted cash categorized under other current assets in the financial records for the year 2019?\",\n    \"output\": \"What was the amount of Restricted cash within other current assets in 2019?\"\n  },\n  {\n    \"input\": \"What were the specific additions made to tax positions related to the current year of 2020 in the field of taxation?\",\n    \"output\": \"What were the additions to tax positions related to current year in 2020?\"\n  },\n  {\n    \"input\": \"What categories are encompassed within the scope of purchases and leases of products, and what does this also include in terms of purchases?\",\n    \"output\": \"What did Purchases and leases of products and purchases of services include?\"\n  },\n  {\n    \"input\": \"What was the change in Purchases and leases of products and purchases of services between 2018 and 2019?\",\n    \"output\": \"What was the year-on-year difference in the amounts spent on purchasing and leasing products and purchasing services between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the change in Purchases and leases of products and purchases of services between 2018 and 2019?\",\n    \"output\": \"What was the difference in the amount spent on purchasing and leasing products, as well as purchasing services, from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"For how long did the subsidiary of Dell exceed $150 million in annual support and administrative costs?\",\n    \"output\": \"How many years did Dell subsidiary support and administrative costs exceed $150 million?\"\n  },\n  {\n    \"input\": \"What specific expenses or liabilities are encompassed within the current balances owed to affiliated individuals or entities?\",\n    \"output\": \"What did the current amounts due to related parties include?\"\n  },\n  {\n    \"input\": \"What specific items or transactions are included in the present outstanding balances owed to related parties?\",\n    \"output\": \"What did the current amounts due to related parties include?\"\n  },\n  {\n    \"input\": \"For how long has the current outstanding amount owed by related parties been over $1,000 million?\",\n    \"output\": \"How many years did current amount due from related parties exceed $1,000 million?\"\n  },\n  {\n    \"input\": \"For how long has the present outstanding balance owed by affiliated organizations been over $1,000 million?\",\n    \"output\": \"How many years did current amount due from related parties exceed $1,000 million?\"\n  },\n  {\n    \"input\": \"What was the percentage increase or decrease in the net current amount due from related parties from 2019 to 2020?\",\n    \"output\": \"What was the percentage change in the net current amount due from related parities between 2019 and 2020?\"\n  },\n  {\n    \"input\": \"What was the change in Additions to intangible assets between 2019 and 2020?\",\n    \"output\": \"What was the difference in the amount of intangible assets added between the years 2019 and 2020?\"\n  },\n  {\n    \"input\": \"What is the total amount of lease liabilities for operating leases recorded on the current financial statements?\",\n    \"output\": \"What were the current lease liabilities for operating leases?\"\n  },\n  {\n    \"input\": \"What is the total amount of lease liabilities for operating leases as of the present moment?\",\n    \"output\": \"What were the current lease liabilities for operating leases?\"\n  },\n  {\n    \"input\": \"How many lease liabilities for operating leases exceeded $500 million?\",\n    \"output\": \"How many operating lease liabilities were there that exceeded $500 million in amount?\"\n  },\n  {\n    \"input\": \"What differentiates current lease liabilities from non-current lease liabilities in the context of operating leases?\",\n    \"output\": \"What was the difference between current and non-current lease liabilities for operating leases?\"\n  },\n  {\n    \"input\": \"What differentiates current lease liabilities from non-current lease liabilities for operating leases?\",\n    \"output\": \"What was the difference between current and non-current lease liabilities for operating leases?\"\n  },\n  {\n    \"input\": \"What was the discrepancy in the aggregate lease obligations between operating leases and finance leases?\",\n    \"output\": \"What was the difference in total lease liabilities between operating leases and finance leases?\"\n  },\n  {\n    \"input\": \"Which years does the table provide information for unearned revenue?\",\n    \"output\": \"For which specific years does the table contain information regarding unearned revenue?\"\n  },\n  {\n    \"input\": \"Which years does the table provide information for unearned revenue?\",\n    \"output\": \"\\\"For which specific years does the table provide data on unearned revenue?\\\"\"\n  },\n  {\n    \"input\": \"What was the percentage change in Unearned subscription and SaaS revenue from 2019 to 2020?\",\n    \"output\": \"What was the change in Unearned subscription and SaaS revenue between 2019 and 2020?\"\n  },\n  {\n    \"input\": \"What was the foreign income in 2018?\",\n    \"output\": \"What was the total amount of income received from foreign sources during the year 2018?\"\n  },\n  {\n    \"input\": \"What is the percentage difference in the total income prior to income tax between the years 2019 and 2020?\",\n    \"output\": \"What was the percentage change in the total income before income tax between 2019 and 2020?\"\n  },\n  {\n    \"input\": \"What was the total value of construction projects underway in 2020 globally or within a specific country/region?\",\n    \"output\": \"What was the amount of construction in progress in 2020?\"\n  },\n  {\n    \"input\": \"What was the difference in the total value of property and equipment from 2019 to 2020?\",\n    \"output\": \"What was the change in Total property and equipment between 2019 and 2020?\"\n  },\n  {\n    \"input\": \"What was the change in the Unrealized losses on available-for-sale securities between 2017 and 2018?\",\n    \"output\": \"What was the difference in the amount of Unrealized losses on available-for-sale securities from 2017 to 2018?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the Reclassification of realized transactions, net of taxes between 2018 and 2019?\",\n    \"output\": \"What percentage represents the difference in the Reclassification of realized transactions, net of taxes between the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What impact did the alteration in the balance of net accounts receivable have on the financial situation?\",\n    \"output\": \"What was the effect of change in the net accounts receivable?\"\n  },\n  {\n    \"input\": \"What is the ratio of long-term deferred tax liability to accrued liabilities?\",\n    \"output\": \"What was the Long-term deferred tax liability as a ratio of Accrued liabilities?\"\n  },\n  {\n    \"input\": \"How did the Other interest expense change between the years 2017 and 2018?\",\n    \"output\": \"What was the change in Other interest expense between 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What was the difference between the reported net sales and the reported cost of sales in terms of amount?\",\n    \"output\": \"What was the difference in amount as reported between net sales and cost of sales?\"\n  },\n  {\n    \"input\": \"What was the difference between Net income from continuing operations and Income before income taxes?\",\n    \"output\": \"What is the distinction between net income from continuing operations and income before income taxes? Please provide a comprehensive response, retaining all the details provided in the original question. Keep the question brief, succinct, and in the same language as the original.\"\n  },\n  {\n    \"input\": \"What was the impact of stock options and RSUs on dilution in 2019?\",\n    \"output\": \"What was the Dilutive effect of stock options and RSUs in 2019?\"\n  },\n  {\n    \"input\": \"How many consecutive years did the balance at the end of each year surpass $2 million?\",\n    \"output\": \"How many years did the balance at end of the year exceed $2 million?\"\n  },\n  {\n    \"input\": \"For how many years was the balance at the end of each year above $2 million?\",\n    \"output\": \"How many years did the balance at end of the year exceed $2 million?\"\n  },\n  {\n    \"input\": \"How many years did Intercompany prepaid tax asset amortization exceed $7 million?\",\n    \"output\": \"For how many years was the amortization of Intercompany prepaid tax asset greater than $7 million?\"\n  },\n  {\n    \"input\": \"What was the service cost in 2018?\",\n    \"output\": \"What was the cost of the service provided in 2018?\"\n  },\n  {\n    \"input\": \"What was the service cost in 2018?\",\n    \"output\": \"What was the cost of the service in the year 2018 specifically?\"\n  },\n  {\n    \"input\": \"What was the total amount of interest incurred in the year 2019?\",\n    \"output\": \"What was the interest cost in 2019?\"\n  },\n  {\n    \"input\": \"What was the total expense incurred as interest in the year 2019?\",\n    \"output\": \"What was the interest cost in 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage difference in the Discount rate from 2018 to 2019?\",\n    \"output\": \"What was the change in the Discount rate between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the specific difference in the Discount rate from the year 2018 to the year 2019?\",\n    \"output\": \"What was the change in the Discount rate between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"Why could the sums fail to equal the overall total?\",\n    \"output\": \"Why might amounts may not add to the total?\"\n  },\n  {\n    \"input\": \"\\\"What was the numerical distinction between the total amount obtained from sales after deducting all applicable costs, and the total revenue generated from sales before deducting any expenses or costs?\\\"\",\n    \"output\": \"What was the difference between the total net sales and gross profit?\"\n  },\n  {\n    \"input\": \"What is the calculated percentage change in the operating income from the third quarter to the fourth quarter?\",\n    \"output\": \"What was the percentage change in the Operating income between the third and fourth quarter?\"\n  },\n  {\n    \"input\": \"What was the change in the cost of sales between 2017 and 2018?\",\n    \"output\": \"How much did the cost of sales change from 2017 to 2018?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the Net deferred tax asset between 2018 and 2019?\",\n    \"output\": \"What was the percentage increase or decrease in the amount of Net deferred tax asset from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the Net deferred tax asset between 2018 and 2019?\",\n    \"output\": \"What was the percentage increase or decrease in the Net deferred tax asset from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage difference in the total balance from 2018 to 2019? Calculate using the formula for percentage change.\",\n    \"output\": \"What was the percentage change in the total balance between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage increase or decrease in the total balance from 2018 to 2019?\",\n    \"output\": \"What was the percentage change in the total balance between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"For how long, in terms of number of years, has the cost of sales exceeded 40% of net sales?\",\n    \"output\": \"How many years did cost of sales of net sales exceed 40%?\"\n  },\n  {\n    \"input\": \"What is the percentage increase or decrease in operating income as a proportion of net sales from 2018 to 2019?\",\n    \"output\": \"What was the percentage change in operating income of net sales between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the exact percentage increase or decrease in operating income of net sales from 2018 to 2019?\",\n    \"output\": \"What was the percentage change in operating income of net sales between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"How many years have there been increases in tax positions related to the current year that exceeded $30 million?\",\n    \"output\": \"How many years did Increases related to current year tax positions exceed $30 million?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the Ending balance between 2018 and 2019?\",\n    \"output\": \"What was the percentage difference in the Ending balance from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the Ending balance between 2018 and 2019?\",\n    \"output\": \"What is the percent increase or decrease in the Ending balance from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"According to what were the plan benefits provided, and to what were they in accordance with?\",\n    \"output\": \"What were plan benefits provided in accordance with?\"\n  },\n  {\n    \"input\": \"In accordance with what, were the plan benefits provided? Can you expand on the specific benefits mentioned in the plan?\",\n    \"output\": \"What were plan benefits provided in accordance with?\"\n  },\n  {\n    \"input\": \"What were the total service costs incurred during the year 2018?\",\n    \"output\": \"What were the service costs in 2018?\"\n  },\n  {\n    \"input\": \"What were the specific costs associated with services provided in 2018?\",\n    \"output\": \"What were the service costs in 2018?\"\n  },\n  {\n    \"input\": \"What were the total interest expenses incurred in the year 2017?\",\n    \"output\": \"What were the interest costs in 2017?\"\n  },\n  {\n    \"input\": \"What was the total amount spent on interest expenses in the year 2017?\",\n    \"output\": \"What were the interest costs in 2017?\"\n  },\n  {\n    \"input\": \"What was the difference in the amount of Amortization of actuarial loss between 2018 and 2019?\",\n    \"output\": \"What was the change in the Amortization of actuarial loss between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the difference in the amount allocated for the gradual reduction of actuarial loss in the financial statements between the years 2018 and 2019?\",\n    \"output\": \"What was the change in the Amortization of actuarial loss between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the net pension period cost between 2018 and 2019?\",\n    \"output\": \"What was the percentage increase or decrease in the net pension period cost from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the net pension period cost between 2018 and 2019?\",\n    \"output\": \"What was the percentage difference in the net pension period cost from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"How was the lower of cost and net realizable value determined as the valuation method for inventories? Please provide a detailed explanation of the process.\",\n    \"output\": \"How were inventories valued at the lower of cost and net realizable value?\"\n  },\n  {\n    \"input\": \"What was the quantity or volume of raw materials used in the year 2018?\",\n    \"output\": \"What was the amount of raw materials in 2018?\"\n  },\n  {\n    \"input\": \"For how long has the value of Finished goods remained above $200 million?\",\n    \"output\": \"How many years did the amount of Finished goods exceed $200 million?\"\n  },\n  {\n    \"input\": \"What was the total value of both Core and developed technology assets combined?\",\n    \"output\": \"What was the gross amount of Core and developed technology assets?\"\n  },\n  {\n    \"input\": \"What was the value assigned to each share granted on a specific date at March 31, 2017, taking into account their respective weights?\",\n    \"output\": \"What was the Weighted Average Grant Date Fair Value for nonvested shares at March 31, 2017?\"\n  },\n  {\n    \"input\": \"What was the percentage increase or decrease in the number of shares that have not yet vested from 2018 to 2019?\",\n    \"output\": \"What was the percentage change in the number of nonvested shares between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the cumulative intrinsic value of options and stock appreciation rights (SARs) exercised throughout the fiscal years ending on March 31, 2019?\",\n    \"output\": \"What was the total intrinsic value of options and SARs exercised during the years ended March 31, 2019?\"\n  },\n  {\n    \"input\": \"In 2019, how many options and SAR (stock appreciation rights) shares were available for exercise?\",\n    \"output\": \"What was the number of option and SAR shares exercisable in 2019?\"\n  },\n  {\n    \"input\": \"Which years does the table provide the number of outstanding shares for?\",\n    \"output\": \"Which specific years are mentioned in the table that provides the information regarding the number of outstanding shares?\"\n  },\n  {\n    \"input\": \"What was the difference in the Weighted Average Exercise Price per Share for outstanding shares between the years 2017 and 2018?\",\n    \"output\": \"What was the change in the Weighted Average Exercise Price per Share for outstanding shares between 2017 and 2018?\"\n  },\n  {\n    \"input\": \"\\\"What was the calculated percentage difference in the quantity of outstanding shares from 2018 to 2019?\\\"\",\n    \"output\": \"What was the percentage change in the number of outstanding shares between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"For which specific years does the table provide information on Identifiable long-lived assets (property, plant, and equipment net of accumulated amortization) within different geographic areas?\",\n    \"output\": \"Which years does the table provide information for Identifiable long-lived assets (consisting of property, plant and equipment net of accumulated amortization) by geographic area?\"\n  },\n  {\n    \"input\": \"Which geographic areas are included in the table that provides information on identifiable long-lived assets, specifically property, plant, and equipment (net of accumulated amortization) for different years?\",\n    \"output\": \"Which years does the table provide information for Identifiable long-lived assets (consisting of property, plant and equipment net of accumulated amortization) by geographic area?\"\n  },\n  {\n    \"input\": \"What was the total value of assets held in different countries during the year 2019?\",\n    \"output\": \"What was the amount of assets in Various other countries in 2019?\"\n  },\n  {\n    \"input\": \"What was the total value of assets in various countries in 2019?\",\n    \"output\": \"What was the amount of assets in Various other countries in 2019?\"\n  },\n  {\n    \"input\": \"What percentage increase or decrease occurred in the total value of long-lived assets from 2018 to 2019?\",\n    \"output\": \"What was the percentage change in total long-lived assets between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the change in the Deferred expense for State between 2018 and 2019?\",\n    \"output\": \"What is the difference in the amount of Deferred expense for the State from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the change in the Deferred expense for State between 2018 and 2019?\",\n    \"output\": \"What was the difference in the amount of Deferred expense for State between the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the cumulative balance of all time deposits?\",\n    \"output\": \"What was the total balance time deposits?\"\n  },\n  {\n    \"input\": \"What is the aggregate amount of money held in time deposits at present?\",\n    \"output\": \"What was the total balance time deposits?\"\n  },\n  {\n    \"input\": \"What was the specific balance of Inventories on April 1, 2018, as recorded in the financial records or statements?\",\n    \"output\": \"What was the balance of Inventories on April 1, 2018?\"\n  },\n  {\n    \"input\": \"What was the adjustment made in ASC 606 for other current assets? Please provide details regarding how this adjustment affected the accounting treatment and reporting of other current assets under ASC 606.\",\n    \"output\": \"What was the adjustment from ASC 606 for other current assets?\"\n  },\n  {\n    \"input\": \"How many liabilities had a balance on March 31, 2018 that exceeded $300 million?\",\n    \"output\": \"How many liabilities, as of March 31, 2018, had a balance surpassing $300 million?\"\n  },\n  {\n    \"input\": \"What was the percentage change in Other assets specifically attributed to the adjustments made?\",\n    \"output\": \"What was the percentage change in Other assets due to the adjustments?\"\n  },\n  {\n    \"input\": \"What were the net sales in 2016?\",\n    \"output\": \"What was the total amount of sales generated in the year 2016?\"\n  },\n  {\n    \"input\": \"What was the difference in net sales from 2016 to 2017, and how can this information assist in better answering the question?\",\n    \"output\": \"What was the change in net sales between 2016 and 2017?\"\n  },\n  {\n    \"input\": \"What was the difference in net sales revenue from 2016 to 2017?\",\n    \"output\": \"What was the change in net sales between 2016 and 2017?\"\n  },\n  {\n    \"input\": \"Which years did assets from Thailand exceed $200 million?\",\n    \"output\": \"Which specific years did the total value of assets originating from Thailand surpass $200 million?\"\n  },\n  {\n    \"input\": \"Which years did assets from Thailand exceed $200 million?\",\n    \"output\": \"In which specific years did the total value of assets originating from Thailand surpass $200 million?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the total long-lived assets between 2018 and 2019?\",\n    \"output\": \"What was the percentage difference in the total value of long-lived assets from 2018 to 2019, indicating whether it increased or decreased?\"\n  },\n  {\n    \"input\": \"In which years was the total annual compensation calculated in?\",\n    \"output\": \"What are the specific years for which the total annual compensation was calculated?\"\n  },\n  {\n    \"input\": \"Which geographic locations are taken into account for calculating the overall current tax expense as mentioned in the table?\",\n    \"output\": \"What are the geographic locations in the table considered when calculating the total current tax expense?\"\n  },\n  {\n    \"input\": \"What is the highest recorded tax expense in Germany and in which specific year did it occur?\",\n    \"output\": \"In which year was the current tax expense in Germany the largest?\"\n  },\n  {\n    \"input\": \"What is the highest recorded tax expense in Germany, and in which specific year was it observed?\",\n    \"output\": \"In which year was the current tax expense in Germany the largest?\"\n  },\n  {\n    \"input\": \"What was the amount of non-current assets in APJ in 2019?\",\n    \"output\": \"What was the specific value of non-current assets within the Asia-Pacific-Japan (APJ) region during the year 2019?\"\n  },\n  {\n    \"input\": \"During which specific years were the calculations of Non-Current Assets by Region conducted?\",\n    \"output\": \"In which years were the Non-Current Assets by Region calculated?\"\n  },\n  {\n    \"input\": \"In which year was the amount in Rest of Americas larger?\",\n    \"output\": \"In what specific year did the Rest of Americas region experience a higher amount compared to previous years?\"\n  },\n  {\n    \"input\": \"What was the numerical difference in the quantity of Rest of Americas in 2019 compared to 2018?\",\n    \"output\": \"What was the change in the amount in Rest of Americas in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"What was the difference in the quantity of Rest of Americas in 2019 compared to 2018?\",\n    \"output\": \"What was the change in the amount in Rest of Americas in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"How was the Total expense for the share-based payment plans of Executive Board members determined?\",\n    \"output\": \"How was the total expense for the share-based payment plans determined specifically for the Executive Board members?\"\n  },\n  {\n    \"input\": \"During which specific years is the Total Expense for Share-Based Payment given or provided?\",\n    \"output\": \"In which years is the Total Expense for Share-Based Payment provided?\"\n  },\n  {\n    \"input\": \"For which specific years is the Total Expense for Share-Based Payment given?\",\n    \"output\": \"In which years is the Total Expense for Share-Based Payment provided?\"\n  },\n  {\n    \"input\": \"What is the specific value or quantity for the EMEA region in 2019?\",\n    \"output\": \"What is the amount for EMEA in 2019?\"\n  },\n  {\n    \"input\": \"What is the amount for APJ in 2018?\",\n    \"output\": \"What was the total amount of APJ in the year 2018?\"\n  },\n  {\n    \"input\": \"When did Germany have the highest amount?\",\n    \"output\": \"In which year was the amount for Germany the largest?\"\n  },\n  {\n    \"input\": \"When did Germany experience its highest amount?\",\n    \"output\": \"In which year was the amount for Germany the largest?\"\n  },\n  {\n    \"input\": \"What is the specific monetary figure spent on employee benefits in the year 2019?\",\n    \"output\": \"What is the amount of employee benefits expenses in 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage difference in the amount spent on pensions in 2019 compared to 2018?\",\n    \"output\": \"What was the percentage change in Pension expenses in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"What was the total cost of services offered in the year 2019?\",\n    \"output\": \"What was the Cost of services in 2019?\"\n  },\n  {\n    \"input\": \"What was the specific cost of services rendered in the year 2019?\",\n    \"output\": \"What was the Cost of services in 2019?\"\n  },\n  {\n    \"input\": \"\\\"When did the Cost of services reach its highest value?\\\"\",\n    \"output\": \"In which year was Cost of services largest?\"\n  },\n  {\n    \"input\": \"In which specific year did the Cost of services reach its maximum value, considering all available data?\",\n    \"output\": \"In which year was Cost of services largest?\"\n  },\n  {\n    \"input\": \"What was the change in issued capital in 2019 from 2018?\",\n    \"output\": \"What was the net change in the amount of issued capital between the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the change in issued capital in 2019 from 2018?\",\n    \"output\": \"What was the difference in the amount of issued capital between 2019 and 2018? How much did the issued capital change from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage of unused tax losses in 2019 that can be attributed to state tax loss carryforwards in the United States?\",\n    \"output\": \"How much of unused tax losses relate to U.S. state tax loss carryforwards in 2019?\"\n  },\n  {\n    \"input\": \"In which year was the amount Expiring after the following year the largest?\",\n    \"output\": \"In what year did the highest amount of expenses expire after the subsequent year?\"\n  },\n  {\n    \"input\": \"What is the meaning and scope of the rights associated with Bill McDermott?\",\n    \"output\": \"What do the rights for Bill McDermott refer to? \"\n  },\n  {\n    \"input\": \"What specific types of entitlements are included in the table when referring to annual pension entitlements?\",\n    \"output\": \"What kind of entitlements are annual pension entitlements as displayed in the table?\"\n  },\n  {\n    \"input\": \"Which specific years are the annual pension entitlements fully and legally secured to an individual?\",\n    \"output\": \"In which years are the annual pension entitlements vested?\"\n  },\n  {\n    \"input\": \"What was the profit before tax in 2019?\",\n    \"output\": \"What was the pre-tax profit for the fiscal year of 2019?\"\n  },\n  {\n    \"input\": \"What was the profit before tax in 2019?\",\n    \"output\": \"What was the pre-tax profit for the year 2019?\"\n  },\n  {\n    \"input\": \"\\\"What was the specific tax rate applicable in 2018 for individuals or businesses in a particular country?\\\"\",\n    \"output\": \"What was the applicable tax rate in 2018?\"\n  },\n  {\n    \"input\": \"What was the percentage change in Other in 2019 from 2018?\",\n    \"output\": \"What was the percentage difference in the category labeled \\\"Other\\\" between the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What market is the information specifically targeting and representing for Airtel?\",\n    \"output\": \"Which market does the information reflect for Airtel?\"\n  },\n  {\n    \"input\": \"What is the percentage increase in the number of mobile customers for AIS over a specific period?\",\n    \"output\": \"What is the % growth of mobile customers for AIS?\"\n  },\n  {\n    \"input\": \"Which associate had the best % growth in mobile customers?\",\n    \"output\": \"\\\"Which associate experienced the highest percentage growth in the number of mobile customers over a specific period of time?\\\"\"\n  },\n  {\n    \"input\": \"What specific components were encompassed within the exceptional items during the fiscal year 2018?\",\n    \"output\": \"What was included in the exceptional items in FY2018?\"\n  },\n  {\n    \"input\": \"What specific items were considered exceptional in the financial year 2018?\",\n    \"output\": \"What was included in the exceptional items in FY2018?\"\n  },\n  {\n    \"input\": \"What was the amount of cash generated from operations after accounting for capital expenditures and operating expenses in the specified year?\",\n    \"output\": \"What was the free cash flow for the year?\"\n  },\n  {\n    \"input\": \"What is the % change in EBITDA margin from 2018 to 2019?\",\n    \"output\": \"What is the percentage difference in EBITDA margin between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the % change in EBITDA margin from 2018 to 2019?\",\n    \"output\": \"What is the percentage change in EBITDA margin from the year 2018 to the year 2019?\"\n  },\n  {\n    \"input\": \"What is the absolute difference in net profit between 2018 and 2019?\",\n    \"output\": \"What is the change in net profit from 2018 to 2019 in absolute numbers?\"\n  },\n  {\n    \"input\": \"What does the line item labeled as 'Others' provide information about in a given context or dataset?\",\n    \"output\": \"What information does the line item 'Others' relate to?\"\n  },\n  {\n    \"input\": \"What is the average total amount owed in debt for the duration of the two-year period?\",\n    \"output\": \"What is the average gross debt across the 2 years?\"\n  },\n  {\n    \"input\": \"Why does this net deferred gain balance exist?\",\n    \"output\": \"Why is there a net deferred gain balance?\"\n  },\n  {\n    \"input\": \"What is the reason for the continued presence of a net deferred gain balance after Singtel completed the sale of its entire 100% stake in NLT to NetLink NBN Trust in July 2017?\",\n    \"output\": \"Why is there still a balance of net deferred gain since Singtel sold its 100% interest in NLT to NetLink NBN Trust in July 2017?\"\n  },\n  {\n    \"input\": \"What was the year when the net deferred gain balance reached its peak?\",\n    \"output\": \"In which year was the net deferred gain balance the highest?\"\n  },\n  {\n    \"input\": \"What is the subject matter or main focus mentioned in note 22?\",\n    \"output\": \"What is the topic of note 22?\"\n  },\n  {\n    \"input\": \"What is included in the \\\"Others\\\" category in the table?\",\n    \"output\": \"What does the line item \\\"Others\\\" in the table encompass?\"\n  },\n  {\n    \"input\": \"What is the Singtel's largest joint venture in terms of the proportion of the Group's ownership?\",\n    \"output\": \"Which is the largest joint venture of Singtel, in terms of the proportion of Group's ownership?\"\n  },\n  {\n    \"input\": \"Do the non-audit services offered by KPMG LLP have an impact on their independence in conducting audits?\",\n    \"output\": \"Does the non-audit services provided by KPMG LLP affect their independence?\"\n  },\n  {\n    \"input\": \"What is included in the compensation for key management personnel?\",\n    \"output\": \"What does key management personnel compensation comprise?\"\n  },\n  {\n    \"input\": \"What is included in the compensation package for key management personnel?\",\n    \"output\": \"What does key management personnel compensation comprise?\"\n  },\n  {\n    \"input\": \"What are the components included in directors' remuneration?\",\n    \"output\": \"What does directors' remuneration comprise of?\"\n  },\n  {\n    \"input\": \"To what topic does the content of note 18.1 correspond?\",\n    \"output\": \"What is the subject matter of note 18.1?\"\n  },\n  {\n    \"input\": \"What percentage of the company's derivative financial liabilities are classified as non-current liabilities disclosed in their financial statements?\",\n    \"output\": \"How many % of the company's derivative financial liabilities are being disclosed as non-current?\"\n  },\n  {\n    \"input\": \"What is Note 21 about?\",\n    \"output\": \"What is the topic of note 21?\"\n  },\n  {\n    \"input\": \"Can you please provide details on the terms and conditions of the advances made by the shareholders?\",\n    \"output\": \"What are the terms of the shareholders' advances?\"\n  },\n  {\n    \"input\": \"\\\"What are the specific terms and conditions associated with the advances provided to shareholders?\\\"\",\n    \"output\": \"What are the terms of the shareholders' advances?\"\n  },\n  {\n    \"input\": \"How many factors need to be considered when calculating the balance for subsidiaries?\",\n    \"output\": \"How many factors are involved in calculating the balance for subsidiaries?\"\n  },\n  {\n    \"input\": \"\\\"What is typically included in the category of selling and administrative costs?\\\"\",\n    \"output\": \"What does selling and administrative costs include?\"\n  },\n  {\n    \"input\": \"How many distinct categories of operating expenses exist?\",\n    \"output\": \"How many different type of operating expenses are there?\"\n  },\n  {\n    \"input\": \"What is the mean value of the three highest subcategories within operating expenses for the year 2019?\",\n    \"output\": \"What is the average of the top 3 operating expenses subcategories in 2019?\"\n  },\n  {\n    \"input\": \"What is the average value of the three highest subcategories of operating expenses in the year 2019?\",\n    \"output\": \"What is the average of the top 3 operating expenses subcategories in 2019?\"\n  },\n  {\n    \"input\": \"What are the projected maximum future payouts for Barry Litwin and Thomas Clark under the NEO plan of 2019?\",\n    \"output\": \"What is the maximum estimated future payouts under the 2019 NEO plan for Barry Litwin and Thomas Clark?\"\n  },\n  {\n    \"input\": \"What is the maximum estimated future payout amount under the 2019 NEO plan specifically designated for Barry Litwin and Thomas Clark?\",\n    \"output\": \"What is the maximum estimated future payouts under the 2019 NEO plan for Barry Litwin and Thomas Clark?\"\n  },\n  {\n    \"input\": \"What are the projected highest expected future payouts under the 2019 NEO plan for both Manoj Shetty and Lawrence Reinhold?\",\n    \"output\": \"What is the maximum estimated future payouts under the 2019 NEO plan for Manoj Shetty and Lawrence Reinhold?\"\n  },\n  {\n    \"input\": \"What are the audit fees incurred by the company in 2018 and 2019?\",\n    \"output\": \"What were the audit fees that the company had to pay in the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage difference in the total amount of fees paid by the company for all other expenses between the years 2018 and 2019?\",\n    \"output\": \"What is the percentage change in all other fees incurred by the company between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What percentage of common stock does Barry Litwin and Robert D. Rosenthal collectively own?\",\n    \"output\": \"What is the percent of common stock owned by Barry Litwin and Robert D. Rosenthal?\"\n  },\n  {\n    \"input\": \"What is the total compensation received by Robert D. Rosenthal and Chad M. Lindbloom respectively during fiscal 2019?\",\n    \"output\": \"What were the individual total compensations received by Robert D. Rosenthal and Chad M. Lindbloom in fiscal year 2019?\"\n  },\n  {\n    \"input\": \"What is the total compensation received by Paul S. Pearlman and Lawrence Reinhold respectively during fiscal 2019?\",\n    \"output\": \"What were the individual total compensations earned by Paul S. Pearlman and Lawrence Reinhold in fiscal year 2019?\"\n  },\n  {\n    \"input\": \"What is the definition and significance of stock awards?\",\n    \"output\": \"What does stock awards refer to?\"\n  },\n  {\n    \"input\": \"What are the Thomas Clark's accelerated vesting of stock options and unvested performance restricted stock units respectively?\",\n    \"output\": \"Can you please provide specific details about the accelerated vesting of stock options of Thomas Clark? Additionally, what are the details regarding the unvested performance restricted stock units held by Thomas Clark?\"\n  },\n  {\n    \"input\": \"What is the meaning or definition of the \\\"other\\\" description in the allowance for sales return in the year 2017?\",\n    \"output\": \"What does the \\\"other\\\" description in allowance for sales return in 2017 refer to?\"\n  },\n  {\n    \"input\": \"What is the cumulative amount of allowances for sales returns write-offs from 2017 to 2019?\",\n    \"output\": \"What is the total allowances for sales returns write-offs between 2017 to 2019?\"\n  },\n  {\n    \"input\": \"What is the total amount of allowances accounted for sales returns and write-offs from 2017 to 2019?\",\n    \"output\": \"What is the total allowances for sales returns write-offs between 2017 to 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage difference in the allowance for deferred tax assets at the end of the period in 2018 compared to 2019?\",\n    \"output\": \"What is the percentage change in the allowance for deferred tax assets at the end of period between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What are Manoj Shetty's respective accelerated vesting of stock options and unvested performance restricted stock units respectively?\",\n    \"output\": \"What is the accelerated vesting schedule for Manoj Shetty's stock options? Additionally, how many unvested performance restricted stock units does he currently have?\"\n  },\n  {\n    \"input\": \"What are Manoj Shetty's respective accelerated vesting of stock options and unvested performance restricted stock units respectively?\",\n    \"output\": \"What is the accelerated vesting timeline for Manoj Shetty's stock options? Additionally, what is the status of his unvested performance restricted stock units?\"\n  },\n  {\n    \"input\": \"What is the difference in consolidated gross profits comparing 2019 to 2018, and also 2018 to 2017?\",\n    \"output\": \"What is the change in consolidated gross profits between 2019 vs 2018 and 2018 vs 2017 respectively?\"\n  },\n  {\n    \"input\": \"What is the total consolidated net sales in 2019 and 2018?\",\n    \"output\": \"What were the combined net sales for the fiscal years of 2019 and 2018?\"\n  },\n  {\n    \"input\": \"What is the total consolidated net sales in 2017 and 2018\",\n    \"output\": \"What were the combined net sales for the years 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What is the total consolidated net sales in 2017 and 2018\",\n    \"output\": \"What were the combined net sales for 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What is the change in consolidated gross profit between 2017 and 2018?\",\n    \"output\": \"What is the difference in consolidated gross profit for the company between the years 2017 and 2018, and how does the value change during this period?\"\n  },\n  {\n    \"input\": \"What is the change in consolidated gross profit between 2017 and 2018?\",\n    \"output\": \"What is the specific difference in consolidated gross profit when comparing the financial years of 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What was the percentage of the total population or total something specific (specify) from Singapore in the year 2019?\",\n    \"output\": \"What was the percentage of total from Singapore in 2019?\"\n  },\n  {\n    \"input\": \"What was the average amount of money sent from Singapore in the years 2018 and 2019?\",\n    \"output\": \"What was the average amount from Singapore in 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the average amount of money, in Singapore currency, that was received from Singapore in the years 2018 and 2019 combined?\",\n    \"output\": \"What was the average amount from Singapore in 2018 and 2019?\"\n  },\n  {\n    \"input\": \"How is the fair value of a financial instrument of a company determined and defined?\",\n    \"output\": \"How is the fair value of the company's financial instrument defined?\"\n  },\n  {\n    \"input\": \"In what specific year did the interest rate swap fall below the threshold of 1,000 thousands?\",\n    \"output\": \"In which year was interest rate swap less than 1,000 thousands?\"\n  },\n  {\n    \"input\": \"In which specific year did the interest rate swap fall below one million in thousands?\",\n    \"output\": \"In which year was interest rate swap less than 1,000 thousands?\"\n  },\n  {\n    \"input\": \"What are the three levels of subjectivity and could you provide more information to assist in providing a comprehensive answer?\",\n    \"output\": \"What are the 3 levels of subjectivity?\"\n  },\n  {\n    \"input\": \"What are the 3 distinct levels of subjectivity and how can they be described or defined?\",\n    \"output\": \"What are the 3 levels of subjectivity?\"\n  },\n  {\n    \"input\": \"\\\"What were the additional features or updates introduced in the year 2019? Similarly, what were the new additions or improvements made in 2018?\\\"\",\n    \"output\": \"What were the other additions in 2019 and 2018 respectively?\"\n  },\n  {\n    \"input\": \"\\\"What are the average number of other additions made in 2018 and 2019?\\\"\",\n    \"output\": \"What is the average other additions for 2018 and 2019?\"\n  },\n  {\n    \"input\": \"\\\"What is the average number of non-inclusive additions made in the years 2018 and 2019 combined?\\\"\",\n    \"output\": \"What is the average other additions for 2018 and 2019?\"\n  },\n  {\n    \"input\": \"In what specific year did the transfer of developed technology from IPR&D exceed an amount greater than 4,000 thousands?\",\n    \"output\": \"In which year was Transfers to developed technology from IPR&D greater than 4,000 thousands?\"\n  },\n  {\n    \"input\": \"What were the average Selling, General, and Administrative expenses for the financial years ending on December 31, 2019 and 2018?\",\n    \"output\": \"What is the average Selling, general and administrative for the Years Ended December 31, 2019 to 2018?\"\n  },\n  {\n    \"input\": \"\\\"What is the average amount spent on Selling, general and administrative expenses for the Years Ended December 31, 2019 and 2018?\\\"\",\n    \"output\": \"What is the average Selling, general and administrative for the Years Ended December 31, 2019 to 2018?\"\n  },\n  {\n    \"input\": \"What was the percentage decrease in the Selling, general and administrative expenses during the year 2019?\",\n    \"output\": \"What was the decrease in Selling, general and administrative in 2019?\"\n  },\n  {\n    \"input\": \"What was the number of shares granted in 2019?\",\n    \"output\": \"How many shares were awarded in the year 2019?\"\n  },\n  {\n    \"input\": \"Define research and development activities.\",\n    \"output\": \"What are research and development activities?\"\n  },\n  {\n    \"input\": \"What was the average amount spent on net revenue for the years ending on December 31, 2019 and 2018?\",\n    \"output\": \"What is the average Cost of net revenue, for the Years Ended December 31, 2019 to 2018?\"\n  },\n  {\n    \"input\": \"What is the average Cost of net revenue for the period spanning from December 31, 2018, to December 31, 2019?\",\n    \"output\": \"What is the average Cost of net revenue, for the Years Ended December 31, 2019 to 2018?\"\n  },\n  {\n    \"input\": \"In which year was Restructuring expense 0 thousands?\",\n    \"output\": \"In which specific year did the Restructuring expense amount to zero thousands (thousands referring to the unit) without any incurred costs?\"\n  },\n  {\n    \"input\": \"In which year was Restructuring expense 0 thousands?\",\n    \"output\": \"What was the specific year when the Restructuring expense amounted to 0 thousands?\"\n  },\n  {\n    \"input\": \"\\\"What were the transfers of developed technology from IPR&D in the years 2019 and 2018 respectively?\\\"\",\n    \"output\": \"What were the Transfers to developed technology from IPR&D in 2019 and 2018 respectively?\"\n  },\n  {\n    \"input\": \"What was the change in the Transfers to developed technology from IPR&D from 2018 to 2019?\",\n    \"output\": \"What was the difference in the amount of transfers to developed technology from the Intangible Property, Research, and Development (IPR&D) category between the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"In which year was Transfers to developed technology from IPR&D negative?\",\n    \"output\": \"In what specific year did Transfers to developed technology from IPR&D show a negative value?\"\n  },\n  {\n    \"input\": \"What is the average Short-term restricted cash for 2018 and 2019?\",\n    \"output\": \"What is the average amount of Short-term restricted cash for the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the average Net cash provided by operating activities from December 31, 2018 to December 31, 2019?\",\n    \"output\": \"What is the average Net cash provided by operating activities for the year ended December 31, 2019 to 2018?\"\n  },\n  {\n    \"input\": \"What is the average amount of cash generated from operating activities, received or used, for the period spanning from December 31, 2018 to December 31, 2019?\",\n    \"output\": \"What is the average Net cash provided by operating activities for the year ended December 31, 2019 to 2018?\"\n  },\n  {\n    \"input\": \"What was the respective Net cash used in investing activities in 2019 and 2018?\",\n    \"output\": \"What were the net amounts of cash used in investing activities for the years 2019 and 2018 respectively?\"\n  },\n  {\n    \"input\": \"What was the respective Net cash used in investing activities in 2019 and 2018?\",\n    \"output\": \"\\\"What was the amount of net cash utilized in investing activities during the years 2019 and 2018?\\\"\"\n  },\n  {\n    \"input\": \"What was the net value of long-term debt in 2019 after deducting any related expenses or liabilities?\",\n    \"output\": \"What was the Net carrying amount of long-term debt in 2019?\"\n  },\n  {\n    \"input\": \"What was the specific amount of long-term debt after deducting all necessary expenses from the total amount in 2018?\",\n    \"output\": \"What was the Net carrying amount of long-term debt in 2018?\"\n  },\n  {\n    \"input\": \"What was the specific amount of long-term debt, after deducting any reductions or write-offs, recorded on the financial statements for the year 2018?\",\n    \"output\": \"What was the Net carrying amount of long-term debt in 2018?\"\n  },\n  {\n    \"input\": \"What is the mean value of the unamortized debt discount for the fiscal years 2018 and 2019?\",\n    \"output\": \"What is the average Unamortized debt discount for 2018 and 2019?\"\n  },\n  {\n    \"input\": \"When was the year when the net carrying amount of long-term debt was less than 210,000 thousands?\",\n    \"output\": \"In which year was Net carrying amount of long-term debt less than 210,000 thousands?\"\n  },\n  {\n    \"input\": \"What is the average working capital for the period from December 31, 2018 to December 31, 2019?\",\n    \"output\": \"What is the average Working capital for December 31, 2019 to 2018?\"\n  },\n  {\n    \"input\": \"What is the average Cash and cash equivalents for December 31, 2019 to 2018?\",\n    \"output\": \"What is the average amount of Cash and cash equivalents for the period from December 31, 2018 to December 31, 2019?\"\n  },\n  {\n    \"input\": \"What is the average Cash and cash equivalents for December 31, 2019 to 2018?\",\n    \"output\": \"What is the average amount of Cash and cash equivalents as of December 31 for the years 2019 and 2018 combined?\"\n  },\n  {\n    \"input\": \"What is the eligibility of Performance-based restricted stock to vest?\",\n    \"output\": \"What criteria must be met for Performance-based restricted stock to become vested and eligible for acquisition?\"\n  },\n  {\n    \"input\": \"What were the adjustments in 2018?\",\n    \"output\": \"\\\"What specific adjustments, if any, occurred during the year 2018?\\\"\"\n  },\n  {\n    \"input\": \"When was the year when the ending balance fell below $240,000,000?\",\n    \"output\": \"In which year was the ending balance less than 240,000 thousands?\"\n  },\n  {\n    \"input\": \"What was the total amount of expenses incurred during the period from January 1 to December 31, 2018, after accounting for all deductions and offsets?\",\n    \"output\": \"What was the net expense in the year ended December 31, 2018?\"\n  },\n  {\n    \"input\": \"What was the total amount of expenditures for the financial year that concluded on December 31, 2018?\",\n    \"output\": \"What was the net expense in the year ended December 31, 2018?\"\n  },\n  {\n    \"input\": \"What was the total amount of expenses incurred during the period from January 1 to December 31, 2019?\",\n    \"output\": \"What was the net expense in the year ended December 31, 2019?\"\n  },\n  {\n    \"input\": \"What was the total expenditure for the period that concluded on December 31, 2019?\",\n    \"output\": \"What was the net expense in the year ended December 31, 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage increase in the cost of services in 2019?\",\n    \"output\": \"How much did Cost of services increase in 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage increase in the cost of services in the year 2019 compared to the previous year?\",\n    \"output\": \"How much did Cost of services increase in 2019?\"\n  },\n  {\n    \"input\": \"What was the Less Capital expenditures (including capitalized software) in 2019?\",\n    \"output\": \"What was the total amount spent on Less Capital Expenditures, including expenditure on capitalized software, during the year 2019?\"\n  },\n  {\n    \"input\": \"What was the Less Capital expenditures (including capitalized software) in 2019?\",\n    \"output\": \"\\\"What was the total amount of capital expenditures, including capitalized software, that occurred in the year 2019?\\\"\"\n  },\n  {\n    \"input\": \"What was the free cash flow in 2019?\",\n    \"output\": \"What was the amount of free cash flow generated in the year 2019?\"\n  },\n  {\n    \"input\": \"What was the average free cash flow for 2018 and 2019?\",\n    \"output\": \"What was the average amount of free cash flow generated for the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the Less Capital expenditures from 2018 to 2019?\",\n    \"output\": \"What was the percentage difference in the amount spent on capital expenditures between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the exact total value of the assets owned by a particular entity/company during the year 2019?\",\n    \"output\": \"What was the Total assets in 2019?\"\n  },\n  {\n    \"input\": \"What was the total value of assets in the year 2019?\",\n    \"output\": \"What was the Total assets in 2019?\"\n  },\n  {\n    \"input\": \"What was the average amount of operating income recorded during the period from 2015 to 2019?\",\n    \"output\": \"What was the average operating income for 2015-2019?\"\n  },\n  {\n    \"input\": \"What was the change in the Per common share – basic from 2018 to 2019?\",\n    \"output\": \"What was the exact change in the Per common share – basic value from 2018 to 2019? Please provide the specific numerical difference between the two years.\"\n  },\n  {\n    \"input\": \"What were the amounts of Service and other revenues for the years ended December 31, 2019 and 2018?\",\n    \"output\": \"How much did Service and other revenues amounted for   years ended December 31, 2019 and 2018 respectively?\"\n  },\n  {\n    \"input\": \"What were the respective amounts of Service and other revenues for the years ended December 31, 2019 and 2018?\",\n    \"output\": \"How much did Service and other revenues amounted for   years ended December 31, 2019 and 2018 respectively?\"\n  },\n  {\n    \"input\": \"\\\"What were the respective revenue amounts generated from Wireless equipment for the years ending December 31, 2019 and 2018?\\\"\",\n    \"output\": \"How much did Wireless equipment revenues amounted for   years ended December 31, 2019 and 2018 respectively?\"\n  },\n  {\n    \"input\": \"What was the total amount of revenue generated from Wireless equipment during the years ending December 31, 2019 and 2018?\",\n    \"output\": \"How much did Wireless equipment revenues amounted for   years ended December 31, 2019 and 2018 respectively?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the value of Small and Medium Businesses from 2018 to 2019?\",\n    \"output\": \"What is the change in Small and Medium Business value from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the Consolidated Net Income in 2019?\",\n    \"output\": \"What is the Consolidated Net Income for the year 2019?\"\n  },\n  {\n    \"input\": \"What is the Consolidated Net Income in 2019?\",\n    \"output\": \"What is the exact amount of Consolidated Net Income recorded in the fiscal year of 2019?\"\n  },\n  {\n    \"input\": \"What is the difference in Consolidated Adjusted EBITDA between 2018 and 2019?\",\n    \"output\": \"What is the change in Consolidated Adjusted EBITDA from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the net change in accumulated other comprehensive income for the year 2016?\",\n    \"output\": \"What was the net increase to accumulated other comprehensive income in 2016?\"\n  },\n  {\n    \"input\": \"What is the difference in the amount of projected employee benefits that a company is obligated to pay, as of 2018 compared to 2019?\",\n    \"output\": \"What is the change in the projected benefit obligation from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"\\\"What was the specific percentage of income tax that individuals had to pay during the tax year of 2018?\\\"\",\n    \"output\": \"What was the effective income tax rate for the period 2018?\"\n  },\n  {\n    \"input\": \"What is the percentage difference in the effective income tax rate between 2018 and 2019?\",\n    \"output\": \"What is the change in Effective income tax rate from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What specific amount, included in the year 2018, would have a positive impact on the effective income tax rate?\",\n    \"output\": \"What was the amount that would favorably affect the effective income tax rate if included in 2018?\"\n  },\n  {\n    \"input\": \"What specific amount, included in 2017, would positively impact the effective income tax rate?\",\n    \"output\": \"What was the amount that would favorably affect the effective income tax rate if included in 2017?\"\n  },\n  {\n    \"input\": \"What specific amount, if included in the 2017 earnings, would have a positive impact on the effective income tax rate?\",\n    \"output\": \"What was the amount that would favorably affect the effective income tax rate if included in 2017?\"\n  },\n  {\n    \"input\": \"How did the Additions based on tax positions related to the current year change from 2018 to 2019?\",\n    \"output\": \"What was the change in the Additions based on tax positions related to the current year from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the average Additions for tax positions of prior years for 2017-2019?\",\n    \"output\": \"What were the average additions made for tax positions in the previous years, specifically from 2017 to 2019?\"\n  },\n  {\n    \"input\": \"What was the total long-term debt?\",\n    \"output\": \"\\\"What was the precise amount of long-term debt accrued by the entity in question, taking into account all outstanding obligations over an extended period of time?\\\"\"\n  },\n  {\n    \"input\": \"What was the value of the long-term debt that was payable within less than 1 year?\",\n    \"output\": \"What was the long-term debt less than 1 year?\"\n  },\n  {\n    \"input\": \"What was the exact amount of the finance lease obligation in its entirety?\",\n    \"output\": \"What was the total finance lease obligation?\"\n  },\n  {\n    \"input\": \"What is the difference between the long-term debt due less than 1 year and 1 to 3 years?\",\n    \"output\": \"What distinguishes long-term debt due within less than one year from long-term debt due within one to three years?\"\n  },\n  {\n    \"input\": \"What is the difference between the long-term debt due less than 1 year and 1 to 3 years?\",\n    \"output\": \"What differentiates long-term debt with a maturity of less than 1 year from debt with a maturity ranging from 1 to 3 years?\"\n  },\n  {\n    \"input\": \"By what percentage did Consumer's total operating revenues increase in the year 2019?\",\n    \"output\": \"How much did Consumer’s total operating revenues increase in 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage increase in Consumer's total operating revenues in 2019 compared to the previous year?\",\n    \"output\": \"How much did Consumer’s total operating revenues increase in 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage increase in Service Revenue for the year 2019 compared to the previous year?\",\n    \"output\": \"How much did Service Revenue increase in 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage change in revenue from wireless equipment sales between 2018 and 2019?\",\n    \"output\": \"What is the change in Wireless equipment revenue from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"By what percentage did the consolidated revenue increase from 2018 to 2019?\",\n    \"output\": \"What was the increase in the consolidated revenue from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage increase/decrease in consumer revenue from 2018 to 2019?\",\n    \"output\": \"What was the change in the consumer revenue from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the source of financing for the firm's capital spending requirements?\",\n    \"output\": \"How is the capital spending requirements for the firm financed?\"\n  },\n  {\n    \"input\": \"How does the firm obtain financing for its capital spending needs?\",\n    \"output\": \"How is the capital spending requirements for the firm financed?\"\n  },\n  {\n    \"input\": \"What was the average cash flow used in investing activities for 2018 and 2019?\",\n    \"output\": \"What was the average amount of cash used in investing activities during the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in cash flow used in financing activities from 2018 to 2019?\",\n    \"output\": \"What was the percentage change in cash flow utilized for financing activities between the fiscal years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in cash flow used in financing activities from 2018 to 2019?\",\n    \"output\": \"What was the exact percentage of increase or decrease in cash flow used for financing activities between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage difference in the cost of services between 2018 and 2019?\",\n    \"output\": \"What was the change in the cost of services from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What were the adopted standards on January 1, 2018?\",\n    \"output\": \"Which Standards were Adopted on January 1, 2018?\"\n  },\n  {\n    \"input\": \"What is the change in Accumulated other comprehensive income from December 31, 2017 to January 1, 2018?\",\n    \"output\": \"What is the difference in the amount of Accumulated Other Comprehensive Income between December 31, 2017 and January 1, 2018?\"\n  },\n  {\n    \"input\": \"What is the net change in Noncontrolling interests between December 31, 2017, and January 1, 2018?\",\n    \"output\": \"What is the change in Noncontrolling interests from December 31, 2017 to January 1, 2018?\"\n  },\n  {\n    \"input\": \"What was the difference in Noncontrolling interests' value between December 31, 2017, and January 1, 2018?\",\n    \"output\": \"What is the change in Noncontrolling interests from December 31, 2017 to January 1, 2018?\"\n  },\n  {\n    \"input\": \"What was the ratio of the amortized cost due within one year or less to the fair value for the same period?\",\n    \"output\": \"What was the amortized cost that was due in one year or less as a ratio of the fair value for the same period?\"\n  },\n  {\n    \"input\": \"What was the ratio between the amortized cost to be paid within one year or less and the fair value for the same period?\",\n    \"output\": \"What was the amortized cost that was due in one year or less as a ratio of the fair value for the same period?\"\n  },\n  {\n    \"input\": \"What is the specific difference between the amortized cost and fair value, at the end of five years and at the end of ten years?\",\n    \"output\": \"What was the difference between the amortized cost and fair value that was due after five years through ten years?\"\n  },\n  {\n    \"input\": \"Which specific years can be found in the table that offer additional details pertaining to the stock options of the company?\",\n    \"output\": \"Which years does the table provide Additional information related to the company's stock options?\"\n  },\n  {\n    \"input\": \"What was the specific intrinsic value associated with exercises in the year 2019?\",\n    \"output\": \"What was the intrinsic value of exercises in 2019?\"\n  },\n  {\n    \"input\": \"What were the Proceeds received from exercises in 2018?\",\n    \"output\": \"What was the total amount of money received from exercises held in 2018?\"\n  },\n  {\n    \"input\": \"What were the Proceeds received from exercises in 2018?\",\n    \"output\": \"What was the total amount of money received from exercises in the year 2018?\"\n  },\n  {\n    \"input\": \"What was the gross income in 2017, prior to deducting income taxes?\",\n    \"output\": \"What was the total income before income taxes in 2017?\"\n  },\n  {\n    \"input\": \"What was the change in foreign income before income taxes between 2017 and 2018?\",\n    \"output\": \"What was the difference in the amount of income from foreign sources prior to income taxes, comparing the years 2017 and 2018? Please provide the change in figures for better understanding.\"\n  },\n  {\n    \"input\": \"What was the specific difference in foreign income, without considering income taxes, from 2018 to 2019?\",\n    \"output\": \"What was the change in foreign income before income taxes between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What were the short-term investment options available in the year 2018?\",\n    \"output\": \"What were the short-term investments in 2018?\"\n  },\n  {\n    \"input\": \"What were the types of short-term investments that were prevalent in 2018?\",\n    \"output\": \"What were the short-term investments in 2018?\"\n  },\n  {\n    \"input\": \"What years does the table provide information for total assets?\",\n    \"output\": \"For which time period does the table provide data on total assets?\"\n  },\n  {\n    \"input\": \"What years does the table provide information for total assets?\",\n    \"output\": \"For which years does the table present data regarding the total assets?\"\n  },\n  {\n    \"input\": \"What was the total debt in 2015?\",\n    \"output\": \"What amount of debt, in terms of total outstanding obligations, was recorded in the year 2015?\"\n  },\n  {\n    \"input\": \"What was the change in working capital between 2015 and 2016?\",\n    \"output\": \"What was the difference in the amount of working capital between 2015 and 2016?\"\n  },\n  {\n    \"input\": \"By what percentage did the Total stockholders' equity change from 2018 to 2019?\",\n    \"output\": \"What was the percentage change in Total stockholders' equity between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"Which years does the table provide information for the funded status of the company's postretirement health care and other defined benefit plans?\",\n    \"output\": \"For which specific years does the provided table present data relating to the funded status of the company's postretirement health care and other defined benefit plans?\"\n  },\n  {\n    \"input\": \"What was the specific amount of fair value attributed to the plan assets during the year 2019?\",\n    \"output\": \"What was the fair value of plan assets in 2019?\"\n  },\n  {\n    \"input\": \"What was the fair value measurements using Level 2 for Corporate Bonds?\",\n    \"output\": \"What are the fair value measurements using Level 2 for Corporate Bonds?\"\n  },\n  {\n    \"input\": \"What was the combined value of cash, cash equivalents, and short-term investments?\",\n    \"output\": \"What were the total cash, cash equivalents and short-term investments?\"\n  },\n  {\n    \"input\": \"What was the sum of all cash, cash equivalents, and short-term investments?\",\n    \"output\": \"What were the total cash, cash equivalents and short-term investments?\"\n  },\n  {\n    \"input\": \"What was the difference in the fair value for U.S. Treasury and government debt securities between Level 1 and Level 2?\",\n    \"output\": \"What was the discrepancy in the fair value of U.S. Treasury and government debt securities when classified as Level 1 and Level 2 assets?\"\n  },\n  {\n    \"input\": \"What was the fair value of Level 2 Total cash, cash equivalents and short-term investments as a percentage of the total cash, cash equivalents and short-term investments?\",\n    \"output\": \"What is the fair value, expressed as a percentage, of Level 2 Total cash, cash equivalents, and short-term investments in relation to the overall total of cash, cash equivalents, and short-term investments?\"\n  },\n  {\n    \"input\": \"What were the distinctions between the levels of Total cash, cash equivalents, and short-term investments in terms of Level 1 and Level 2?\",\n    \"output\": \"What was the difference between the Total cash, cash equivalents and short-term investments for Level 1 and Level 2?\"\n  },\n  {\n    \"input\": \"What were the disparities in the amounts of Total Cash, Cash Equivalents, and Short-Term Investments between Level 1 and Level 2?\",\n    \"output\": \"What was the difference between the Total cash, cash equivalents and short-term investments for Level 1 and Level 2?\"\n  },\n  {\n    \"input\": \"On what basis was the calculation for days inventory outstanding determined?\",\n    \"output\": \"What was days inventory outstanding based on?\"\n  },\n  {\n    \"input\": \"What was the Days sales outstanding for three months ended april 2019?\",\n    \"output\": \"\\\"What was the specific value of Days Sales Outstanding (DSO) specifically for the three-month period ending in April 2019?\\\"\"\n  },\n  {\n    \"input\": \"What was the change in day sales outstanding between 2018 and 2019?\",\n    \"output\": \"What was the difference in the number of days it took to collect sales revenue between the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the change in day sales outstanding between 2018 and 2019?\",\n    \"output\": \"What was the difference in the number of days it took to collect sales revenue between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"For how long was the days inventory outstanding higher than 20 days ?\",\n    \"output\": \"How many years did days inventory outstanding exceed 20 days?\"\n  },\n  {\n    \"input\": \"What was the total amount spent on purchases in 2018?\",\n    \"output\": \"What was the Aggregate purchase price in 2018?\"\n  },\n  {\n    \"input\": \"What was the difference in the total purchase price for all items combined in 2018 and 2019?\",\n    \"output\": \"What was the change in the Aggregate purchase price between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the amount of finished goods in 2019?\",\n    \"output\": \"What was the specific quantity of fully completed and ready-for-sale products in the year 2019?\"\n  },\n  {\n    \"input\": \"What was the exact quantity of inventories during the year 2018?\",\n    \"output\": \"What was the amount of inventories in 2018?\"\n  },\n  {\n    \"input\": \"What was the change in finished goods between 2018 and 2019?\",\n    \"output\": \"How did the amount of finished goods change from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What were the additions made to tax positions related to the current year in 2019, and how do they impact the overall financial situation?\",\n    \"output\": \"What were the Additions based on tax positions related to the current year in 2019?\"\n  },\n  {\n    \"input\": \"What was the change in the Balance at beginning of period between 2017 and 2018?\",\n    \"output\": \"What was the difference in the starting balance from the beginning of the period in 2017 compared to 2018?\"\n  },\n  {\n    \"input\": \"What is the overall difference in the total amount of Additions for tax positions of prior years from 2017 to 2019?\",\n    \"output\": \"What was the total change in Additions for tax positions of prior years between 2017 and 2019?\"\n  },\n  {\n    \"input\": \"What was the difference in the total expenses recorded between 2018 and 2019?\",\n    \"output\": \"What was the change in expense accrued during the period between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the difference in the amount of expenses recorded from 2018 to 2019?\",\n    \"output\": \"What was the change in expense accrued during the period between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the Warranty costs incurred between 2018 and 2019?\",\n    \"output\": \"What was the difference in warranty costs incurred between 2018 and 2019, expressed as a percentage change?\"\n  },\n  {\n    \"input\": \"What were the total assets in 2018?\",\n    \"output\": \"What was the total value of assets in the year 2018?\"\n  },\n  {\n    \"input\": \"What was the percentage change in  Total deferred commissions between 2018 and 2019?\",\n    \"output\": \"What was the percentage change in the total amount of deferred commissions from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the final balance at the end of the specified period in the year 2018?\",\n    \"output\": \"What was the balance at the end of period in 2018?\"\n  },\n  {\n    \"input\": \"What was the final balance at the end of period in 2018? Please provide the balance from the specified period in 2018.\",\n    \"output\": \"What was the balance at the end of period in 2018?\"\n  },\n  {\n    \"input\": \"What was the change in the balance at beginning of period between 2018 and 2019?\",\n    \"output\": \"What was the difference in the balance at the start of the period between the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"For how many years did the ending balance surpass $3,500 million?\",\n    \"output\": \"How many years did balance at end of period exceed $3,500 million?\"\n  },\n  {\n    \"input\": \"How many years did interest income exceed $50 million?\",\n    \"output\": \"For how many years was the interest income greater than $50 million?\"\n  },\n  {\n    \"input\": \"How many years did interest income exceed $50 million?\",\n    \"output\": \"How many years was the total interest income greater than $50 million?\"\n  },\n  {\n    \"input\": \"What was the percentage change in net revenues between 2018 and 2019?\",\n    \"output\": \"What was the exact percentage difference in net revenues from 2018 to 2019, and how can I calculate it accurately?\"\n  },\n  {\n    \"input\": \"Which years does the table provide information for net property and equipment?\",\n    \"output\": \"For which specific years is information regarding net property and equipment provided in the table?\"\n  },\n  {\n    \"input\": \"Which years does the table provide information for net property and equipment?\",\n    \"output\": \"For which specific years does the table present data regarding net property and equipment?\"\n  },\n  {\n    \"input\": \"What was the total revenue generated from operational activities during the year 2017?\",\n    \"output\": \"What was the income from operations in 2017?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the net income between 2018 and 2019?\",\n    \"output\": \"What was the precise percentage increase or decrease in the net income from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the net income between 2018 and 2019?\",\n    \"output\": \"What is the percentage difference in the net income from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"How much money was in circulation in the form of physical cash in the year 2019?\",\n    \"output\": \"What was the amount of cash in 2019?\"\n  },\n  {\n    \"input\": \"In 2019, how much cash was there?\",\n    \"output\": \"What was the amount of cash in 2019?\"\n  },\n  {\n    \"input\": \"What was the specific value or quantity of cash equivalents recorded in the financial records for the year 2018?\",\n    \"output\": \"What was the amount of cash equivalents in 2018?\"\n  },\n  {\n    \"input\": \"Which years does the table provide information for property and equipment information for geographic areas based on the physical location of the assets?\",\n    \"output\": \"For which specific years does the table provide information related to property and equipment in geographic areas, considering the assets' physical locations?\"\n  },\n  {\n    \"input\": \"Which years does the table provide information for property and equipment information for geographic areas based on the physical location of the assets?\",\n    \"output\": \"For which years does the table present property and equipment information? This information is specifically related to geographic areas, and is based on the physical location of the assets.\"\n  },\n  {\n    \"input\": \"What was the total combined value of property and equipment in the United States during the year 2019?\",\n    \"output\": \"What was the amount of property and equipment in U.S. in 2019?\"\n  },\n  {\n    \"input\": \"How many years did International property and equipment exceed $150 million?\",\n    \"output\": \"For how many consecutive years has the value of International property and equipment been greater than $150 million?\"\n  },\n  {\n    \"input\": \"What was the financed unearned services revenue in 2019?\",\n    \"output\": \"\\\"What was the amount of unearned revenue from financed services in 2019?\\\"\"\n  },\n  {\n    \"input\": \"What was the financed unearned services revenue in 2019?\",\n    \"output\": \"What was the specific amount of unearned services revenue that was financed during the year 2019?\"\n  },\n  {\n    \"input\": \"For how many years has the amount of financed unearned services revenue been greater than $100 million?\",\n    \"output\": \"How many years did financed unearned services revenue exceed $100 million?\"\n  },\n  {\n    \"input\": \"For how many consecutive years has the unearned services revenue in financed exceeded $100 million?\",\n    \"output\": \"How many years did financed unearned services revenue exceed $100 million?\"\n  },\n  {\n    \"input\": \"What was the change in deferred services revenue between 2018 and 2019?\",\n    \"output\": \"What was the difference in the amount of deferred services revenue recorded in 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the sum of the net revenues from the last two quarters?\",\n    \"output\": \"What is the total net revenue for the most recent two quarters?\"\n  },\n  {\n    \"input\": \"What was the change in the Dividends per share declared between 2018 and 2019?\",\n    \"output\": \"What was the difference in the amount of Dividends per share declared between the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the change in the Dividends per share declared between 2018 and 2019?\",\n    \"output\": \"What was the difference in the amount of dividends per share declared for the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the change in the Dividend payments allocated to retained earnings (accumulated deficit) between 2017 and 2018?\",\n    \"output\": \"What was the difference in the amounts of dividends allocated to retained earnings (accumulated deficit) from 2017 to 2018?\"\n  },\n  {\n    \"input\": \"What was the change in the Dividend payments allocated to retained earnings (accumulated deficit) between 2017 and 2018?\",\n    \"output\": \"What was the variation in the amount of dividends transferred to retained earnings (accumulated deficit) from 2017 to 2018?\"\n  },\n  {\n    \"input\": \"What factors contributed to the rise in the cash inflows generated from day-to-day business operations during the year 2019?\",\n    \"output\": \"What caused the increase in the cash flow from operating activities in 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage change in the Free Cash Flow, considering both an increase and decrease in value?\",\n    \"output\": \"What is the increase / (decrease) in the Free Cash Flow?\"\n  },\n  {\n    \"input\": \"What is the percentage change in Free Cash Flow?\",\n    \"output\": \"What is the increase / (decrease) in the Free Cash Flow?\"\n  },\n  {\n    \"input\": \"What are the definitions of underlying operating profit and underlying operating margin, and how do they differ from each other?\",\n    \"output\": \"What is the definition of the Underlying operating profit and underlying operating margin?\"\n  },\n  {\n    \"input\": \"What does the underlying effective tax rate measure reflects?\",\n    \"output\": \"What is the definition and significance of the underlying effective tax rate and what does it reflect in terms of a company's financial situation or performance?\"\n  },\n  {\n    \"input\": \"What does the underlying effective tax rate measure reflects?\",\n    \"output\": \"What is the meaning of the underlying effective tax rate measure and what does it reflect?\"\n  },\n  {\n    \"input\": \"What is the methodology used to calculate the effective tax rate?\",\n    \"output\": \"How is the underlying effective tax rate calculated?\"\n  },\n  {\n    \"input\": \"What is the exact difference in the underlying effective tax rate between 2018 and 2019?\",\n    \"output\": \"What is the Underlying effective tax rate change from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage change in the underlying effective tax rate between 2018 and 2019?\",\n    \"output\": \"What is the Underlying effective tax rate change from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"By what percentage did the Biogas production increase from the year 2018 to 2019?\",\n    \"output\": \"What is the increase in the Biogas from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the average total energy?\",\n    \"output\": \"What is the average amount of energy, when considering all types of energy combined?\"\n  },\n  {\n    \"input\": \"\\\"What is the percentage change in the price of Fuel Oils from 2018 to 2019? Please specify if it is an increase or decrease.\\\"\",\n    \"output\": \"What is the percentage increase / (decrease) in Fuel Oils from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage change in the price of Fuel Oils between 2018 and 2019?\",\n    \"output\": \"What is the percentage increase / (decrease) in Fuel Oils from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What method is used to calculate ROIC (Return on Invested Capital)? Please provide a detailed explanation of the calculation process.\",\n    \"output\": \"How is ROIC calculated?\"\n  },\n  {\n    \"input\": \"What is the difference in the diluted combined average number of share units (in millions) between 2018 and 2019?\",\n    \"output\": \"What is the change in the Diluted combined average number of share units (millions of units) from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the difference in the diluted combined average number of share units (measured in millions) between 2018 and 2019?\",\n    \"output\": \"What is the change in the Diluted combined average number of share units (millions of units) from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the specific price quotation for investments on 31 December 2018?\",\n    \"output\": \"What was the price quotation for investments as of 31 December 2018?\"\n  },\n  {\n    \"input\": \"What was the average value of investments made by listed companies in the year 2019?\",\n    \"output\": \"What is the average amount of investments of listed companies in 2019?\"\n  },\n  {\n    \"input\": \"What was the average investment amount of listed companies in 2019?\",\n    \"output\": \"What is the average amount of investments of listed companies in 2019?\"\n  },\n  {\n    \"input\": \"What was the average percentage of ownership or voting rights held by shareholders in listed companies during the year 2019?\",\n    \"output\": \"What is the average Percentage of ownership or voting rights of listed companies in 2019?\"\n  },\n  {\n    \"input\": \"What is the average Unsecured other loans?\",\n    \"output\": \"What is the average amount of unsecured loans that fall under the category of \\\"other loans\\\"?\"\n  },\n  {\n    \"input\": \"What is the average value or rating of Integrated Device Manufacturers (IDMs)?\",\n    \"output\": \"What is the average of Integrated device manufacturers?\"\n  },\n  {\n    \"input\": \"What is the average performance or profitability of Integrated device manufacturers?\",\n    \"output\": \"What is the average of Integrated device manufacturers?\"\n  },\n  {\n    \"input\": \"What is the increase / (decrease) in the Integrated device manufacturers from 2017 to 2018?\",\n    \"output\": \"What is the net change, either increase or decrease, in the number of Integrated device manufacturers from 2017 to 2018?\"\n  },\n  {\n    \"input\": \"What is the average amount of compensation received by Directors?\",\n    \"output\": \"What is the average Directors’ compensation?\"\n  },\n  {\n    \"input\": \"What is the percentage change in compensation for Directors from the year 2018 to 2019?\",\n    \"output\": \"What is the increase/ (decrease) in Directors’ compensation from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What calculation method is employed to determine the total sum of the Company's shares?\",\n    \"output\": \"What method is used to calculate the aggregate amount of the Company’s share?\"\n  },\n  {\n    \"input\": \"What specific method is employed to calculate the total combined value of the shares held by the Company?\",\n    \"output\": \"What method is used to calculate the aggregate amount of the Company’s share?\"\n  },\n  {\n    \"input\": \"What is the average revenue generated from the sale of goods and services from December 31, 2018 to December 31, 2019?\",\n    \"output\": \"What are the average Sales of goods and services for December 31, 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the increase/ (decrease) in Sales of goods and services for December 31, 2018 to 2019?\",\n    \"output\": \"What is the percentage change in the sales of goods and services when comparing December 31, 2018, to December 31, 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage change in the sales of goods and services from January 1, 2018, to December 31, 2018?\",\n    \"output\": \"What is the increase/ (decrease) in Sales of goods and services for January 1, 2018 to December 31, 2018?\"\n  },\n  {\n    \"input\": \"What is the percentage change in the sales of goods and services from January 1, 2018 to December 31, 2018?\",\n    \"output\": \"What is the increase/ (decrease) in Sales of goods and services for January 1, 2018 to December 31, 2018?\"\n  },\n  {\n    \"input\": \"\\\"What is the average amount of lease obligation for the period Less than 1 Year and 1-3 Years? Please provide the average lease obligations separately for both time periods.\\\"\",\n    \"output\": \"What is the average lease obligation for the period Less than 1 Year and 1-3 Years?\"\n  },\n  {\n    \"input\": \"What does Purchase obligations incorporate?\",\n    \"output\": \"What is included in purchase obligations?\"\n  },\n  {\n    \"input\": \"What is the criteria for an employee to receive additional bonuses?\",\n    \"output\": \"What are the specific requirements or factors that determine if an employee is eligible to receive extra bonuses in addition to their regular compensation?\"\n  },\n  {\n    \"input\": \"What is the difference in Time Deposits: Non-Trading Purpose between Carrying Amount and Fair Amount?\",\n    \"output\": \"What is the distinction between the carrying amount and fair amount of Time Deposits used for non-trading purposes?\"\n  },\n  {\n    \"input\": \"What is the difference between Bonds: Non-Trading Purpose Carrying Amount and Fair Amount?\",\n    \"output\": \"What distinguishes Bonds: Non-Trading Purpose Carrying Amount from Fair Amount in terms of their meaning, significance, and financial implications?\"\n  },\n  {\n    \"input\": \"What is the average quantity of supplies and spare parts in general?\",\n    \"output\": \"What was the average Supplies and spare parts?\"\n  },\n  {\n    \"input\": \"What was the average quantity of supplies and spare parts used?\",\n    \"output\": \"What was the average Supplies and spare parts?\"\n  },\n  {\n    \"input\": \"What was the percentage increase / (decrease) in the Finished goods from 2018 to 2019?\",\n    \"output\": \"What was the percentage change in the amount of Finished goods from the year 2018 to 2019, indicating whether it increased or decreased?\"\n  },\n  {\n    \"input\": \"What was the percentage increase / (decrease) in the Finished goods from 2018 to 2019?\",\n    \"output\": \"What was the percentage change in the value of Finished goods from 2018 to 2019, indicating whether it increased or decreased?\"\n  },\n  {\n    \"input\": \"What is the percentage change in the value of financial assets at fair value through profit or loss between 2018 and 2019?\",\n    \"output\": \"What is the increase / (decrease) in the Financial assets at fair value through profit or loss from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the increase / (decrease) in the Short-term loans from 2018 to 2019?\",\n    \"output\": \"What was the percentage change in Short-term loans from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the increase / (decrease) in the Short-term loans from 2018 to 2019?\",\n    \"output\": \"What was the percentage change in Short-term loans between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What measures does the company implement to effectively manage, maintain, or adapt its capital structure in response to changes in the business environment?\",\n    \"output\": \"What steps does the company take To maintain or adjust the capital structure?\"\n  },\n  {\n    \"input\": \"What is the change in the total liabilities, expressed as a percentage, between 2018 and 2019?\",\n    \"output\": \"What is the increase / (decrease) in the Total liabilities from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage change in Total Capital from 2018 to 2019?\",\n    \"output\": \"What is the percentage increase / (decrease) of Total Capital from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"How do non-GAAP measures assist investors in making informed investment decisions and assessing a company's financial performance?\",\n    \"output\": \"How do these non-GAAP measures aid investors?\"\n  },\n  {\n    \"input\": \"How much would diluted earnings per share for fiscal year 2018 be without the net charge (benefit) related to the enactment of the TCJA?\",\n    \"output\": \"What would be the diluted earnings per share for fiscal year 2018 if we exclude the net charge (benefit) associated with the enactment of the Tax Cuts and Jobs Act (TCJA)?\"\n  },\n  {\n    \"input\": \"How much would diluted earnings per share for fiscal year 2018 be without the net charge (benefit) related to the enactment of the TCJA?\",\n    \"output\": \"What would be the diluted earnings per share for fiscal year 2018, excluding the impact of the net charge (benefit) resulting from the implementation of the Tax Cuts and Jobs Act (TCJA)?\"\n  },\n  {\n    \"input\": \"What was the average revenue over the 3 year period from 2017 to 2019?\",\n    \"output\": \"What was the average revenue from 2017 to 2019, calculated over the span of three years?\"\n  },\n  {\n    \"input\": \"What factors contributed to the change in the federal statutory rate during fiscal year 2017?\",\n    \"output\": \"Why did the federal statutory rate in fiscal year 2017? \"\n  },\n  {\n    \"input\": \"How many items accounted for the difference between income taxes computed at the US federal statutory rate and the company's effective rate?\",\n    \"output\": \"How many specific items contributed to the variance between the income taxes calculated using the official US federal statutory rate and the actual rate paid by the company?\"\n  },\n  {\n    \"input\": \"What was the average federal statutory rate over the 3 year period from 2017 to 2019? \",\n    \"output\": \"What was the average federal statutory rate from 2017 to 2019?\"\n  },\n  {\n    \"input\": \"What was the average effective rate for the years 2017, 2018, and 2019?\",\n    \"output\": \"What was the average effective rate over the 3 year period from 2017 to 2019? \"\n  },\n  {\n    \"input\": \"How much were the top 3 components of property and equipment as a % of the total at cost, property and equipment for 2019?\",\n    \"output\": \"What were the percentages for the top 3 components of property and equipment in relation to the total at cost, property, and equipment in 2019?\"\n  },\n  {\n    \"input\": \"What percentage of the overall revenue in 2019 was not generated by the commercial cloud revenue?\",\n    \"output\": \"How much of the total revenue in 2019 did not come from commercial cloud revenue?\"\n  },\n  {\n    \"input\": \"Which were the bottom 2 revenue items for 2017?\",\n    \"output\": \"\\\"What were the two lowest revenue-generating items in 2017?\\\"\"\n  },\n  {\n    \"input\": \"What was the average EPS (earnings per share) for the years 2017, 2018, and 2019?\",\n    \"output\": \"What was the average basic earnings per share over the 3 year period from 2017 to 2019?\"\n  },\n  {\n    \"input\": \"\\\"Which specific locations does the company have both operating leases and finance leases in place?\\\"\",\n    \"output\": \"Which places does the company have operating and finance leases?\"\n  },\n  {\n    \"input\": \"What is the topic or content covered in note 15?\",\n    \"output\": \"What is note 15 about?\"\n  },\n  {\n    \"input\": \"\\\"What is the ascending order of the years when sorted by their total cost of finance leases?\\\"\",\n    \"output\": \"What are the years sorted by total finance lease cost, in ascending order?\"\n  },\n  {\n    \"input\": \"How many different items are included when calculating operating income (loss)?\",\n    \"output\": \"How many items are there for operating income (loss)?\"\n  },\n  {\n    \"input\": \"How many different items are included in the calculation of operating income (loss)?\",\n    \"output\": \"How many items are there for operating income (loss)?\"\n  },\n  {\n    \"input\": \"How is the allocation of amortization and depreciation determined and distributed?\",\n    \"output\": \"How is the portion of amortization and depreciation allocated?\"\n  },\n  {\n    \"input\": \"How is the allocation of the portion of amortization and depreciation determined and distributed?\",\n    \"output\": \"How is the portion of amortization and depreciation allocated?\"\n  },\n  {\n    \"input\": \"How many countries have Long-lived assets classified based on the controlling statutory company's location, excluding financial instruments and tax assets? Also, can you identify the countries where the assets amount to over 10% of the total assets?\",\n    \"output\": \"How many countries have Long-lived assets, excluding financial instruments and tax assets, classified by the location of the controlling statutory company and with countriesover 10% of the total assets?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the total long-lived assets from 2017 to 2018?\",\n    \"output\": \"What was the precise percentage difference in the value of total long-lived assets between the years 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the total long-lived assets from 2017 to 2018?\",\n    \"output\": \"What was the percentage increase or decrease in the value of the total long-lived assets between the years 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What is the definition and significance of purchase commitments?\",\n    \"output\": \"What do the purchase commitments represent?\"\n  },\n  {\n    \"input\": \"What is the meaning and significance of purchase commitments?\",\n    \"output\": \"What do the purchase commitments represent?\"\n  },\n  {\n    \"input\": \"Where can the long-term debt be found?\",\n    \"output\": \"Where is the location to find the information regarding long-term debt?\"\n  },\n  {\n    \"input\": \"How much do the top 3 contractual obligation terms add up to in 2020?\",\n    \"output\": \"What is the total value of the top three contractual obligation terms in 2020?\"\n  },\n  {\n    \"input\": \"How much do the top 3 contractual obligation terms add up to in 2020?\",\n    \"output\": \"In 2020, what is the sum of the top three contractual obligation terms?\"\n  },\n  {\n    \"input\": \"What was the number of stock awards that were granted in 2019, not including the PSUs granted?\",\n    \"output\": \"Excluding the PSUs, how many stock awards were granted in the year 2019?\"\n  },\n  {\n    \"input\": \"What was the number of stock awards that were granted in 2019, not including the PSUs granted?\",\n    \"output\": \"How many stock awards were granted in 2019, excluding the granted PSUs?\"\n  },\n  {\n    \"input\": \"What was the average fair value of the stock awards that were vested between 2017 and 2019 over a three-year period?\",\n    \"output\": \"What was the average fair value of stock awards vested over the 3 year period from 2017 to 2019?\"\n  },\n  {\n    \"input\": \"What was the intelligent cloud as a percentage of total revenue in 2019?\",\n    \"output\": \"What percentage of the total revenue in 2019 was attributed to the intelligent cloud?\"\n  },\n  {\n    \"input\": \"What was the intelligent cloud as a percentage of total revenue in 2019?\",\n    \"output\": \"What percentage of total revenue in 2019 was attributed to the intelligent cloud?\"\n  },\n  {\n    \"input\": \"How many revenue categories are there?\",\n    \"output\": \"How many different categories of revenue exist in the given context?\"\n  },\n  {\n    \"input\": \"What is the company's net loss in 2019?\",\n    \"output\": \"What was the net financial loss incurred by the company during the year 2019?\"\n  },\n  {\n    \"input\": \"What was the cumulative unrealized gain on marketable securities for the company from 2017 to 2019?\",\n    \"output\": \"What is the company's total unrealized gain on marketable securities between 2017 to 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage difference in the total stock base compensation, categorized by type of award, from 2018 to 2019?\",\n    \"output\": \"What is the percentage change in total stock base compensation  by type of award between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage change in the total stock base compensation for each type of award from 2018 to 2019?\",\n    \"output\": \"What is the percentage change in total stock base compensation  by type of award between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the cumulative amount of stock based compensation from 2017 through 2019?\",\n    \"output\": \"What is the total stock based compensation amount between 2017 to 2019?\"\n  },\n  {\n    \"input\": \"What is the sum of stock-based compensation expenses for the years 2017 to 2019?\",\n    \"output\": \"What is the total stock based compensation amount between 2017 to 2019?\"\n  },\n  {\n    \"input\": \"As of December 31, 2018, how many shares were granted and are currently outstanding?\",\n    \"output\": \"What is the number of outstanding shares granted as of December 31, 2018? \"\n  },\n  {\n    \"input\": \"As of December 31, 2018, how many shares were granted and still remain outstanding?\",\n    \"output\": \"What is the number of outstanding shares granted as of December 31, 2018? \"\n  },\n  {\n    \"input\": \"What was the change in the number of outstanding shares between December 31, 2018, and December 31, 2019?\",\n    \"output\": \"What is the difference in outstanding shares as of December 31, 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What data is shown in the table?\",\n    \"output\": \"\\\"What specific information does the table display?\\\"\"\n  },\n  {\n    \"input\": \"What is the units for the data shown in the table?\",\n    \"output\": \"What units are being used to represent the information presented in the table?\"\n  },\n  {\n    \"input\": \"What is the total revenue earned by the company between 2017 to 2019?\",\n    \"output\": \"What is the cumulative revenue generated by the company from 2017 to 2019?\"\n  },\n  {\n    \"input\": \"What is the exact amount of revenue generated in the EMEA region from 2017 to 2019?\",\n    \"output\": \"What is the total revenue earned in EMEA between 2017 to 2019?\"\n  },\n  {\n    \"input\": \"What is the change in total revenue between 2018 to 2019?\",\n    \"output\": \"What is the difference in total revenue from 2018 compared to 2019, and how can this information assist in better understanding the financial performance of the company during this period?\"\n  },\n  {\n    \"input\": \"What is the sum of all non-current liabilities excluding total other non-current liabilities as of 31 December 2019?\",\n    \"output\": \"What is the total other non-current liabilities as at 31 December 2019?\"\n  },\n  {\n    \"input\": \"What is the aggregate amount of all non-current liabilities, excluding any current liabilities, as of December 31, 2019?\",\n    \"output\": \"What is the total other non-current liabilities as at 31 December 2019?\"\n  },\n  {\n    \"input\": \"What is the depreciation expense on property and equipment as at December 31, 2018?\",\n    \"output\": \"What is the specific amount of the depreciation expense recorded for property and equipment on the financial statement as at the closing date of December 31, 2018?\"\n  },\n  {\n    \"input\": \"What is the depreciation expense on property and equipment as at December 31, 2018?\",\n    \"output\": \"What was the amount of depreciation expense booked for property and equipment up until December 31st, 2018?\"\n  },\n  {\n    \"input\": \"What is the depreciation expense on property and equipment as at December 31, 2019?\",\n    \"output\": \"As of December 31, 2019, what is the recorded depreciation expense amount for property and equipment?\"\n  },\n  {\n    \"input\": \"What is the depreciation expense on property and equipment as at December 31, 2019?\",\n    \"output\": \"What is the specific amount of depreciation expense recorded for property and equipment as on December 31, 2019?\"\n  },\n  {\n    \"input\": \"What is the total depreciation expense on property and equipment from 2017 to 2019?\",\n    \"output\": \"From 2017 to 2019, what was the cumulative amount of depreciation expense incurred on property and equipment?\"\n  },\n  {\n    \"input\": \"What is the total depreciation expense on property and equipment from 2017 to 2019?\",\n    \"output\": \"What is the cumulative depreciation expense on property and equipment for the years 2017, 2018, and 2019?\"\n  },\n  {\n    \"input\": \"What is the allowance for doubtful accounts as at December 31, 2018?\",\n    \"output\": \"What is the amount set aside for doubtful accounts on the balance sheet as of December 31, 2018?\"\n  },\n  {\n    \"input\": \"What are some examples of financial instruments that can subject the company to credit risk?\",\n    \"output\": \"What financial instruments expose companies to credit risk? Can you provide some examples of such instruments?\"\n  },\n  {\n    \"input\": \"What are some examples of financial instruments that can subject the company to credit risk?\",\n    \"output\": \"What financial instruments can expose a company to credit risk? Can you provide some examples?\"\n  },\n  {\n    \"input\": \"How does the company effectively manage and reduce its credit risk associated with accounts receivable?\",\n    \"output\": \"How does the company mitigate its credit risk pertaining to accounts receivable?\"\n  },\n  {\n    \"input\": \"How does the company identify and determine its important or major customers in its business operations?\",\n    \"output\": \"How does the company determine its significant customers?\"\n  },\n  {\n    \"input\": \"How does the company identify and assess its significant customers for business purposes?\",\n    \"output\": \"How does the company determine its significant customers?\"\n  },\n  {\n    \"input\": \"What was the monetary value of the company's losses within its own country during the year 2019?\",\n    \"output\": \"What is the company's domestic loss in 2019?\"\n  },\n  {\n    \"input\": \"\\\"What was the total financial loss incurred by the company within its own country during the year 2019?\\\"\",\n    \"output\": \"What is the company's domestic loss in 2019?\"\n  },\n  {\n    \"input\": \"What is the amount of money that the company lost within its own country during the year 2017?\",\n    \"output\": \"What is the company's domestic loss in 2017?\"\n  },\n  {\n    \"input\": \"\\\"What was the fiscal year 2017 domestic financial loss incurred by the company?\\\"\",\n    \"output\": \"What is the company's domestic loss in 2017?\"\n  },\n  {\n    \"input\": \"What is the company's total loss before income taxes between 2017 to 2019?\",\n    \"output\": \"What is the cumulative pre-tax loss of the company from 2017 to 2019?\"\n  },\n  {\n    \"input\": \"What is the company's change in foreign income between 2018 and 2019?\",\n    \"output\": \"What was the percentage change in the company's foreign income from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the cumulative amount of loss incurred by the company domestically during the period from 2017 to 2019?\",\n    \"output\": \"What is the company's total domestic loss between 2017 to 2019?\"\n  },\n  {\n    \"input\": \"What is the aggregate amount of losses incurred by the company within its domestic markets during the period from 2017 to 2019?\",\n    \"output\": \"What is the company's total domestic loss between 2017 to 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage change in total deferred revenue 2018 and 2019?\",\n    \"output\": \"What is the percentage difference in the total deferred revenue between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage change in total deferred revenue 2018 and 2019?\",\n    \"output\": \"What is the percentage difference in the total amount of deferred revenue between the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the proportion of the cost of revenue and research and development expenses as a percentage of the total restructuring expense?\",\n    \"output\": \"What is the ratio of the combined cost of revenue and research and development expenses to the total restructuring expense, expressed as a percentage?\"\n  },\n  {\n    \"input\": \"What is the total legal fees and facilities expenses?\",\n    \"output\": \"What are the combined expenses for legal fees and facilities?\"\n  },\n  {\n    \"input\": \"What is the total legal fees and facilities expenses?\",\n    \"output\": \"What is the cumulative sum of the expenses incurred in legal fees and facilities?\"\n  },\n  {\n    \"input\": \"What is the total increases related to tax positions taken during current year in 2018 and 2019?\",\n    \"output\": \"What is the cumulative increase in tax positions taken during the current year across 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the total increases related to tax positions taken during current year in 2018 and 2019?\",\n    \"output\": \"What is the combined amount of increments in relation to tax positions that were assumed during the present year in both 2018 and 2019?\"\n  },\n  {\n    \"input\": \"As of 31st December 2018, what is the sum of all liabilities that have accumulated or been incurred by the company up to that date?\",\n    \"output\": \"What is the total accrued liabilities as at 31 December 2018?\"\n  },\n  {\n    \"input\": \"What are the units used to measure the values in the table?\",\n    \"output\": \"What is the units that the values in the table are measured in?\"\n  },\n  {\n    \"input\": \"What is the percentage increase or decrease in the total amount of accrued liabilities from 2018 to 2019?\",\n    \"output\": \"What is the percentage change in total accrued liabilities between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage increase or decrease in the total amount of liabilities that have accumulated between the years 2018 and 2019?\",\n    \"output\": \"What is the percentage change in total accrued liabilities between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the total amount of long-lived asset in Japan in both 2018 and 2019?\",\n    \"output\": \"What is the total value of long-lived assets in Japan for the years 2018 and 2019 combined?\"\n  },\n  {\n    \"input\": \"What were the capitalized costs of software development in 2019?\",\n    \"output\": \"What was the Capitalized software development costs in 2019?\"\n  },\n  {\n    \"input\": \"What were the average net costs of software development for both the years 2018 and 2019?\",\n    \"output\": \"What was the average Software development costs, net for 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What were the net average costs for software development in both 2018 and 2019?\",\n    \"output\": \"What was the average Software development costs, net for 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the Cash and cash equivalents in 2019 and 2018 respectively?\",\n    \"output\": \"What were the cash and cash equivalents figures for the years 2019 and 2018, respectively?\"\n  },\n  {\n    \"input\": \"What was the Cash and cash equivalents in 2019 and 2018 respectively?\",\n    \"output\": \"What were the amounts of Cash and cash equivalents for the years 2019 and 2018, respectively?\"\n  },\n  {\n    \"input\": \"What are the average amounts of Restricted cash for the years 2018 and 2019?\",\n    \"output\": \"What is the average Restricted cash for 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the Compensation and related benefits in 2019 and 2018 respectively?\",\n    \"output\": \"What were the compensation and benefits offered in 2019 and 2018?\"\n  },\n  {\n    \"input\": \"\\\"In what specific year did the expenses for professional and legal services amount to a value below 500 thousand dollars?\\\"\",\n    \"output\": \"In which year was Professional and legal fees less than 500 thousands?\"\n  },\n  {\n    \"input\": \"What was the percentage change in Compensation and related benefits from 2018 to 2019?\",\n    \"output\": \"What is the change in the Compensation and related benefits from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What were the average professional and legal fees in 2018 and 2019?\",\n    \"output\": \"What was the average Professional and legal fees for 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What were the factors that led to the decrease in net pool allocation? Can you provide an explanation and further details regarding the decline in net pool allocation?\",\n    \"output\": \"What accounted for the decrease in net pool allocation?\"\n  },\n  {\n    \"input\": \"What are the components of vessel operating and supervision costs?\",\n    \"output\": \"What are the key components that make up the costs associated with vessel operations and supervision?\"\n  },\n  {\n    \"input\": \"How much did the technical maintenance expenses differ between 2017 and 2018?\",\n    \"output\": \"What was the change in technical maintenance expenses from 2017 to 2018?\"\n  },\n  {\n    \"input\": \"What was the exact monetary difference in technical maintenance expenses between the years 2017 and 2018?\",\n    \"output\": \"What was the change in technical maintenance expenses from 2017 to 2018?\"\n  },\n  {\n    \"input\": \"What was the percentage increase or decrease in the combined costs of operating and supervising vessels from 2018 to 2019?\",\n    \"output\": \"What was the percentage change in total vessel operating and supervision costs from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"Why does the Group choose to retain ownership of the trade receivables instead of selling them or assigning them to a third party?\",\n    \"output\": \"Why does the Group hold the trade receivables?\"\n  },\n  {\n    \"input\": \"What does accrued income represent?\",\n    \"output\": \"\\\"What is the meaning and significance of accrued income?\\\"\"\n  },\n  {\n    \"input\": \"What does accrued income represent?\",\n    \"output\": \"What is the meaning and significance of accrued income?\"\n  },\n  {\n    \"input\": \"What was the change in trade receivables from 2018 to 2019?\",\n    \"output\": \"What was the difference in the amount of trade receivables between 2018 and 2019, indicating whether it increased or decreased?\"\n  },\n  {\n    \"input\": \"What was the exact percentage increase or decrease in the total amount of money owed to a company from 2018 to 2019?\",\n    \"output\": \"What was the percentage change in total receivables from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"In which years was the fair value of the derivative assets recorded for?\",\n    \"output\": \"For which specific years were the fair values of the derivative assets recorded? Please provide all relevant information regarding the years for accurate response.\"\n  },\n  {\n    \"input\": \"In which specific year did the fair value of interest rate swaps experience an increase compared to other years?\",\n    \"output\": \"In which year was the fair value of  Interest rate swaps  higher?\"\n  },\n  {\n    \"input\": \"When was the fair value of interest rate swaps higher: in which specific year did the fair value of interest rate swaps surpass that of other years?\",\n    \"output\": \"In which year was the fair value of  Interest rate swaps  higher?\"\n  },\n  {\n    \"input\": \"In which year did the company record the highest amount of expenses pertaining to share-based compensation?\",\n    \"output\": \"In which year was the total expense recognized in respect of share-based compensation the highest?\"\n  },\n  {\n    \"input\": \"What was the highest year in terms of total recognized expense for share-based compensation?\",\n    \"output\": \"In which year was the total expense recognized in respect of share-based compensation the highest?\"\n  },\n  {\n    \"input\": \"What was the difference in the amount of money earned by The Cool Pool Limited in 2017 compared to 2018?\",\n    \"output\": \"What was the change in revenues from The Cool Pool Limited  from 2017 to 2018?\"\n  },\n  {\n    \"input\": \"What was the exact difference in the amount of money earned by The Cool Pool Limited in the year 2017 compared to the year 2018?\",\n    \"output\": \"What was the change in revenues from The Cool Pool Limited  from 2017 to 2018?\"\n  },\n  {\n    \"input\": \"For which years were the movements in tangible fixed assets and vessels under construction documented?\",\n    \"output\": \"In which years was the movements in tangible fixed assets and vessels under construction recorded for?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the cost of vessels from the end of 2018 to the end of 2019?\",\n    \"output\": \"What was the change in vessel cost from end 2018 to end 2019?\"\n  },\n  {\n    \"input\": \"In which year was the other non-current assets recorded for?\",\n    \"output\": \"What is the specific year in which the recording of non-current assets other than the current ones occurred?\"\n  },\n  {\n    \"input\": \"In which year did the value of long-term assets, other than the current year, surpass that of the current year?\",\n    \"output\": \"In which year was the other long-term assets higher?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the total value of other non-current assets from the year 2018 to the year 2019?\",\n    \"output\": \"What was the percentage change in total other non-current assets from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"For which years were the earnings per share (EPS) or losses per share (LPS) recorded?\",\n    \"output\": \"In which years was the earnings/(losses) per share recorded for?\"\n  },\n  {\n    \"input\": \"What was the amount of ship management client accounts in 2018?\",\n    \"output\": \"How many ship management client accounts were there in the year 2018?\"\n  },\n  {\n    \"input\": \"What was the change in current accounts  from 2018 to 2019?\",\n    \"output\": \"What is the difference between the current accounts in 2018 and 2019?\"\n  },\n  {\n    \"input\": \"\\\"What was the specific amount of compensation or payment given to individuals or employees in the year 2018?\\\"\",\n    \"output\": \"What was the remuneration in 2018?\"\n  },\n  {\n    \"input\": \"What was the amount of compensation received in 2018?\",\n    \"output\": \"What was the remuneration in 2018?\"\n  },\n  {\n    \"input\": \"Which year was the short-term benefits the highest?\",\n    \"output\": \"What was the peak year for short-term benefits?\"\n  },\n  {\n    \"input\": \"Which year was the short-term benefits the highest?\",\n    \"output\": \"In which year did the short-term benefits reach their peak level?\"\n  },\n  {\n    \"input\": \"For which specific years were the costs of constructing the vessels recorded?\",\n    \"output\": \"In which years was the vessels under construction costs recorded for?\"\n  },\n  {\n    \"input\": \"For which years were the recorded construction costs of the vessels?\",\n    \"output\": \"In which years was the vessels under construction costs recorded for?\"\n  },\n  {\n    \"input\": \"What is the significance or meaning behind vessels that are currently being constructed?\",\n    \"output\": \"What does vessels under construction represent?\"\n  },\n  {\n    \"input\": \"In which year were the installment payments for the progress shipyard lower compared to other years?\",\n    \"output\": \"Which year was the progress shipyard installments lower?\"\n  },\n  {\n    \"input\": \"In which specific year did the progress shipyard experience a decrease in the number of installments being made?\",\n    \"output\": \"Which year was the progress shipyard installments lower?\"\n  },\n  {\n    \"input\": \"What was the difference in the average daily hire rate between 2017 and 2018?\",\n    \"output\": \"What was the change in average daily hire rate from 2017 to 2018?\"\n  },\n  {\n    \"input\": \"What is the change in pool gross revenues from 2017 to 2018?\",\n    \"output\": \"What was the difference in the total revenue generated from pools in 2017 compared to 2018?\"\n  },\n  {\n    \"input\": \"What is the change in pool gross revenues from 2017 to 2018?\",\n    \"output\": \"What is the difference in total gross revenues generated by the pool in 2017 compared to 2018?\"\n  },\n  {\n    \"input\": \"In which years was the other payables and accruals recorded for?\",\n    \"output\": \"For which years was the recording of other payables and accruals made?\"\n  },\n  {\n    \"input\": \"How many vessels were hired for employment in the year 2018?\",\n    \"output\": \"How many vessels were hired in 2018?\"\n  },\n  {\n    \"input\": \"In 2018, what was the total number of vessels that were hired?\",\n    \"output\": \"How many vessels were hired in 2018?\"\n  },\n  {\n    \"input\": \"In which year was the accrued interest higher?\",\n    \"output\": \"\\\"In which year did the accumulated interest amount exceed the interest accrued in other years?\\\"\"\n  },\n  {\n    \"input\": \"What is the percentage difference in the combined value of total payables and accruals between 2018 and 2019?\",\n    \"output\": \"What was the percentage change in total payables and  accruals from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the difference in the ratio of a company's long-term debt to its equity from 2018 to 2019?\",\n    \"output\": \"What was the change in gearing ratio from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"In 2019, how many operating days were there and what was the total revenue generated during those days?\",\n    \"output\": \"What was the total revenue operating days in 2019?\"\n  },\n  {\n    \"input\": \"How many operating days were there in 2019 and what was the total revenue generated during those days?\",\n    \"output\": \"What was the total revenue operating days in 2019?\"\n  },\n  {\n    \"input\": \"What was the change in average daily hire rate from 2018 to 2019?\",\n    \"output\": \"What was the specific difference in the average rate of daily hiring between 2018 and 2019? How did the average daily hire rate change from one year to the other?\"\n  },\n  {\n    \"input\": \"\\\"What was the specific monetary value of the cash distributions made in the year 2019 for common units?\\\"\",\n    \"output\": \"What was the amount of cash distributions in 2019 for common units?\"\n  },\n  {\n    \"input\": \"How much did the size of the ATM Programme change?\",\n    \"output\": \"What was the change in size of the ATM Programme?\"\n  },\n  {\n    \"input\": \"\\\"When was the specific year when the general and administrative expenses were officially documented or accounted for?\\\"\",\n    \"output\": \"In which year was the general and administrative expenses recorded for?\"\n  },\n  {\n    \"input\": \"Where does the management intend to relocate its employees?\",\n    \"output\": \"\\\"What are the specific plans of the management regarding the relocation of its employees?\\\"\"\n  },\n  {\n    \"input\": \"What was the selling price of Methane Julia Louise?\",\n    \"output\": \"How much was Methane Julia Louise sold for?\"\n  },\n  {\n    \"input\": \"Which company was the vessel sold to?\",\n    \"output\": \"To which company was the vessel sold? Please provide the name of the company that purchased the vessel.\"\n  },\n  {\n    \"input\": \"Which company was the vessel sold to?\",\n    \"output\": \"\\\"To which company was the vessel sold?\\\"\"\n  },\n  {\n    \"input\": \"What was the percentage change in total right-of use assets  from start to end 2019?\",\n    \"output\": \"What was the percentage change in the total value of right-of-use assets between the beginning and end of 2019?\"\n  },\n  {\n    \"input\": \"How much was the change in the repayment amount for the bank loan?\",\n    \"output\": \"By how much did the bank loan repayment change?\"\n  },\n  {\n    \"input\": \"What year experienced a decrease in audit fees compared to previous years?\",\n    \"output\": \"In which year was the audit fees lower?\"\n  },\n  {\n    \"input\": \"What was the percentage change in audit fees between 2018 and 2019?\",\n    \"output\": \"What was the change in audit fees from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in audit fees from the year 2018 to the year 2019?\",\n    \"output\": \"What was the change in audit fees from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What are the components subjected to credit risk?\",\n    \"output\": \"What are the various components that are exposed to the risk of credit?\"\n  },\n  {\n    \"input\": \"What is the significance of bunkers' consumption and other voyage expenses in relation to overall expenses for a voyage?\",\n    \"output\": \"What does bunkers’ consumption and other voyage expenses represent?\"\n  },\n  {\n    \"input\": \"What is the highest year for brokers' commissions on revenue?\",\n    \"output\": \"In which year was the brokers' commissions on revenue the highest?\"\n  },\n  {\n    \"input\": \"What was the change in bunkers’ consumption and other voyage expenses from 2018 to 2019?\",\n    \"output\": \"What was the difference in the consumption of bunkers and other voyage expenses between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the specific monetary value that The Cool Pool Limited is required to pay in 2018?\",\n    \"output\": \"What is the amount due from The Cool Pool Limited in 2018?\"\n  },\n  {\n    \"input\": \"In which years was the dividends receivable and other amounts due from related parties recorded for?\",\n    \"output\": \"For which years were the recorded dividends receivable and other amounts due from related parties?\"\n  },\n  {\n    \"input\": \"In which years was the dividends receivable and other amounts due from related parties recorded for?\",\n    \"output\": \"In which specific years were the dividends receivable and other outstanding amounts due from related parties recorded?\"\n  },\n  {\n    \"input\": \"In which years was the investments recorded for?\",\n    \"output\": \"For which specific years were the investments recorded?\"\n  },\n  {\n    \"input\": \"What was the numerical difference in the number of additions made between 2018 and 2019?\",\n    \"output\": \"What was the change in additions from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the difference in the number of additions between 2018 and 2019?\",\n    \"output\": \"What was the change in additions from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What were the specific Level 2 municipal and corporate bonds held on December 31, 2018?\",\n    \"output\": \"What are the respective Level 2 municipal and corporate bonds as at December 31, 2018?\"\n  },\n  {\n    \"input\": \"What is the value of municipal bonds as a percentage of the total marketable securities?\",\n    \"output\": \"What proportion of the overall marketable securities is represented by municipal bonds?\"\n  },\n  {\n    \"input\": \"What are the respective goodwill amounts in 2015 and 2016?\",\n    \"output\": \"What were the goodwill amounts in 2015 and 2016, respectively?\"\n  },\n  {\n    \"input\": \"What were the average amounts of total cash, cash equivalents, and marketable securities in the years 2015 and 2016?\",\n    \"output\": \"What is the average total cash, cash equivalents, and marketable securities in 2015 and 2016?\"\n  },\n  {\n    \"input\": \"What is the proportion of shares bought in November out of the total shares purchased in the last three months of 2019?\",\n    \"output\": \"What percentage of the total shares purchased in the last three months of 2019 are bought in November?\"\n  },\n  {\n    \"input\": \"What is the proportion of shares bought in November out of the total shares purchased during the last three months of 2019?\",\n    \"output\": \"What percentage of the total shares purchased in the last three months of 2019 are bought in November?\"\n  },\n  {\n    \"input\": \"What is the percentage change in revenue from term license between 2018 and 2019?\",\n    \"output\": \"What is the percent increase or decrease in revenue from term license for the year 2019 compared to 2018?\"\n  },\n  {\n    \"input\": \"What is the percentage increase or decrease in revenue generated from maintenance services from 2018 to 2019?\",\n    \"output\": \"What is the percentage change in revenue from maintenance between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What were the pre-tax losses in 2018 and 2019 for the company?\",\n    \"output\": \"What are the respective loss before income tax benefits in 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What were the pre-tax domestic and foreign earnings in 2017?\",\n    \"output\": \"What are the respective domestic and foreign income before income taxes in 2017?\"\n  },\n  {\n    \"input\": \"\\\"What were the pre-tax incomes in 2017, separated by domestic and foreign sources?\\\"\",\n    \"output\": \"What are the respective domestic and foreign income before income taxes in 2017?\"\n  },\n  {\n    \"input\": \"\\\"What were the total losses incurred in the domestic market and international markets in 2019, excluding any income tax benefits received?\\\"\",\n    \"output\": \"What are the respective domestic and foreign losses before income tax benefits in 2019?\"\n  },\n  {\n    \"input\": \"What is the sum of all outstanding receivables that are yet to be invoiced, and are expected to be collected within a period of two years?\",\n    \"output\": \"What is the total unbilled receivables due within 2 years?\"\n  },\n  {\n    \"input\": \"What is the sum of all outstanding amounts receivable that have not been invoiced yet, and are expected to be collected within the next 1 to 5 years?\",\n    \"output\": \"What is the total unbilled receivables due between 1 to 5 years?\"\n  },\n  {\n    \"input\": \"What is the cumulative value of unrealized billings that are yet to be accounted for and are expected to be collected within a period ranging from 1 to 5 years?\",\n    \"output\": \"What is the total unbilled receivables due between 1 to 5 years?\"\n  },\n  {\n    \"input\": \"What is the difference between the company's federal and state net operating losses?\",\n    \"output\": \"What differentiates the federal and state net operating losses of the company?\"\n  },\n  {\n    \"input\": \"What were the amounts that the company is authorised to repurchase in 2019 and 2018 respectively?\",\n    \"output\": \"What were the authorized repurchase amounts for the company in both 2019 and 2018?\"\n  },\n  {\n    \"input\": \"What were the respective amounts of stock repurchases made by the company as at January 1, 2018 and 2019 respectively?\",\n    \"output\": \"What were the specific amounts of stock repurchases made by the company as of January 1, 2018 and January 1, 2019, respectively?\"\n  },\n  {\n    \"input\": \"What was the average amount of stock repurchases as of January 1st in both 2017 and 2018?\",\n    \"output\": \"What is the average stock repurchases as at January 1, 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What is the average amount of stock repurchases as of January 1, 2017, and January 1, 2018?\",\n    \"output\": \"What is the average stock repurchases as at January 1, 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What is the exact percentage difference in the amount of long-term deferred revenue from 2018 to 2019?\",\n    \"output\": \"What is the percentage change in long-term deferred revenue between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What are the respective values for Term in 2018 and 2019?\",\n    \"output\": \"What are the values for Term in the year 2018? What are the values for Term in the year 2019?\"\n  },\n  {\n    \"input\": \"What are the company's respective stock-based compensation for  cost of revenues in 2019 and 2018?\",\n    \"output\": \"What were the stock-based compensation expenses for the cost of revenues incurred by the company in both 2019 and 2018?\"\n  },\n  {\n    \"input\": \"What are the company's respective stock-based compensation for  cost of revenues in 2019 and 2018?\",\n    \"output\": \"\\\"What were the stock-based compensation expenses for cost of revenues in 2019 and 2018 for the company?\\\"\"\n  },\n  {\n    \"input\": \"What is the average amount of stock-based compensation for the cost of revenue that the company has incurred from 2017 to 2019?\",\n    \"output\": \"What is the company's average stock-based compensation for the cost of revenue between 2017 to 2019?\"\n  },\n  {\n    \"input\": \"What is the average amount of stock-based compensation granted by the company for the selling and marketing department in both 2018 and 2019?\",\n    \"output\": \"What is the company's average stock-based compensation for selling and marketing in 2018 and 2019?\"\n  },\n  {\n    \"input\": \"\\\"What was the average amount of stock-based compensation granted to employees in the selling and marketing department of the company in both 2018 and 2019?\\\"\",\n    \"output\": \"What is the company's average stock-based compensation for selling and marketing in 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage change in revenue earned from term license from 2015 to 2016?\",\n    \"output\": \"What is the percentage change in the revenue earned from term license between 2015 and 2016?\"\n  },\n  {\n    \"input\": \"What is the total revenue earned from maintenance in 2017 and 2018?\",\n    \"output\": \"What is the combined revenue generated from maintenance services in the years 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What is the total revenue earned from maintenance in 2017 and 2018?\",\n    \"output\": \"What was the combined revenue generated from maintenance services in the years 2017 and 2018?\"\n  },\n  {\n    \"input\": \"How did actuarial calculations impact the resulting changes in parameters?\",\n    \"output\": \"What did the changes in parameters on the basis of actuarial calculations lead to?\"\n  },\n  {\n    \"input\": \"What was the precise amount of the actual return on plan assets in the fiscal year 2019?\",\n    \"output\": \"How much did the actual return on plan assets amount to in FY2019?\"\n  },\n  {\n    \"input\": \"What items or expenses are typically excluded from the expected payments?\",\n    \"output\": \"What is not included in expected payments?\"\n  },\n  {\n    \"input\": \"Explain what expenses or items are typically not encompassed within the estimated payments.\",\n    \"output\": \"What is not included in expected payments?\"\n  },\n  {\n    \"input\": \"What was the change in interest income in FY2019 from FY2018?\",\n    \"output\": \"What was the difference in the amount of income generated from interest in the fiscal year 2019 compared to the fiscal year 2018?\"\n  },\n  {\n    \"input\": \"What was the change in interest income in FY2019 from FY2018?\",\n    \"output\": \"What was the specific difference in interest income between the fiscal year 2019 and the fiscal year 2018?\"\n  },\n  {\n    \"input\": \"In which year did Asia experience a higher year-on-year percentage change in GDP compared to 2018 during the period from 2018 to 2019?\",\n    \"output\": \"In which year was the Year-on-year percentage change of GDP in Asia larger from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the change in the year-on-year percentage change in GDP for Germany from 2018 to 2019?\",\n    \"output\": \"What was the difference in the percentage change of Germany's GDP from one year to the next, comparing 2018 and 2019?\"\n  },\n  {\n    \"input\": \"In which year did the total sum of taxes collected exceed the amount collected in all other years?\",\n    \"output\": \"In which year was the total amount of taxes larger?\"\n  },\n  {\n    \"input\": \"What was the exact difference in the total amount of taxes paid in the time period from 2018 to 2019 compared to the time period from 2017 to 2018?\",\n    \"output\": \"What was the change in total taxes in  2018/2019 from 2017/2018?\"\n  },\n  {\n    \"input\": \"What was the percentage increase or decrease in the total amount of taxes in 2018/2019 compared to 2017/2018?\",\n    \"output\": \"What was the percentage change in total taxes in  2018/2019 from 2017/2018?\"\n  },\n  {\n    \"input\": \"What factors or criteria are used as a foundation for the measurements being taken?\",\n    \"output\": \"What is the basis for the measurements?\"\n  },\n  {\n    \"input\": \"What is the fundamental basis or principle used to determine the measurements being referred to in the context?\",\n    \"output\": \"What is the basis for the measurements?\"\n  },\n  {\n    \"input\": \"In which specific year did the amount for Belgium surpass or exceed the previous value?\",\n    \"output\": \"In which year was the amount for Belgium larger?\"\n  },\n  {\n    \"input\": \"In which specific year did the total amount (not specified) exceed the amount for Belgium?\",\n    \"output\": \"In which year was the amount for Belgium larger?\"\n  },\n  {\n    \"input\": \"What was the amount of cash and cash equivalents in FY2019?\",\n    \"output\": \"How much cash and cash equivalents were reported in the financial statements for the fiscal year 2019?\"\n  },\n  {\n    \"input\": \"What was the exact percentage difference in Cash and cash equivalents between FY2018 and FY2019 after considering adjustments?\",\n    \"output\": \"What was the percentage change in Cash and cash equivalents in FY2019 from FY2018 adjusted?\"\n  },\n  {\n    \"input\": \"What was the calculated percentage difference in Cash and cash equivalents between FY2019 and FY2018, after adjusting for any relevant factors?\",\n    \"output\": \"What was the percentage change in Cash and cash equivalents in FY2019 from FY2018 adjusted?\"\n  },\n  {\n    \"input\": \"In which specific year did the absolute value of the Earnings per share exceed the value in another year?\",\n    \"output\": \"In which year was the absolute value of the Earnings per share larger?\"\n  },\n  {\n    \"input\": \"\\\"What specific factor was used to calculate the net cost of the current service?\\\"\",\n    \"output\": \"What was the current service cost netted against?\"\n  },\n  {\n    \"input\": \"\\\"In which specific year did the total sales exceed the sales of other years in terms of maximum revenue generated?\\\"\",\n    \"output\": \"In which year was the Total sales larger?\"\n  },\n  {\n    \"input\": \"In what year did the headcount in METRO Asia exceed the corresponding headcount in other years?\",\n    \"output\": \"In which year was the amount in METRO Asia headcount larger?\"\n  },\n  {\n    \"input\": \"What was the year-over-year change in the number of employees at METRO AG from 2018 to 2019?\",\n    \"output\": \"What was the change in METRO AG headcount in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"With which companies did METRO maintain business relations with, and who were the parties involved in these relations?\",\n    \"output\": \"Who were the parties to which METRO maintained business relations with related companies?\"\n  },\n  {\n    \"input\": \"What were the business relations to related companies listed in the table?\",\n    \"output\": \"What were the business relations between the listed companies in the table and other related entities or organizations?\"\n  },\n  {\n    \"input\": \"What was the difference in the amount and type of services received in the fiscal year 2019 compared to the fiscal year 2018?\",\n    \"output\": \"What was the change in services received in FY2019 from FY2018?\"\n  },\n  {\n    \"input\": \"What is the exact percentage change in the number of services received during fiscal year 2019 compared to fiscal year 2018?\",\n    \"output\": \"What was the percentage change in services received in FY2019 from FY2018?\"\n  },\n  {\n    \"input\": \"What was the exact percentage difference in the quantity of services received during the fiscal year 2019 compared to the fiscal year 2018?\",\n    \"output\": \"What was the percentage change in services received in FY2019 from FY2018?\"\n  },\n  {\n    \"input\": \"In what format is the cash flow statement presented in the table?\",\n    \"output\": \"What version is the cash flow statement in the table in?\"\n  },\n  {\n    \"input\": \"What specific expenses related to income tax were officially acknowledged in the year 2019?\",\n    \"output\": \"What are the recognised income tax expenses in 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in recognised income tax expenses in 2018/2019 from 2017/2018?\",\n    \"output\": \"What was the percentage increase or decrease in recognised income tax expenses for the financial year 2018/2019 compared to the financial year 2017/2018?\"\n  },\n  {\n    \"input\": \"What was the percentage change in recognised income tax expenses in 2018/2019 from 2017/2018?\",\n    \"output\": \"What was the percentage increase or decrease in recognised income tax expenses for the fiscal year 2018/2019 compared to the fiscal year 2017/2018?\"\n  },\n  {\n    \"input\": \"What was the change in EBT in 2018/2019 from 2017/2018?\",\n    \"output\": \"What was the difference in EBT (Earnings Before Taxes) between 2018 and 2019 when compared to 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What does the other operating income include?\",\n    \"output\": \"What type of income is included under \\\"other operating income\\\"? Please provide a comprehensive explanation of the various sources that fall under this category.\"\n  },\n  {\n    \"input\": \"In which specific year did the amount of Services surpass the quantity in other years?\",\n    \"output\": \"In which year was the amount of Services larger?\"\n  },\n  {\n    \"input\": \"During which year did the quantity of services surpass the quantity in previous years?\",\n    \"output\": \"In which year was the amount of Services larger?\"\n  },\n  {\n    \"input\": \"What was the exact amount of equity during the fiscal year 2019?\",\n    \"output\": \"What was the amount of equity in FY2019?\"\n  },\n  {\n    \"input\": \"What proportion of the total earnings in FY2019 can be attributed to the Noncontrolling interests?\",\n    \"output\": \"How much of earnings does the Noncontrolling interests account for in FY2019?\"\n  },\n  {\n    \"input\": \"What proportion of the FY2019 earnings is represented by the Noncontrolling interests account?\",\n    \"output\": \"How much of earnings does the Noncontrolling interests account for in FY2019?\"\n  },\n  {\n    \"input\": \"What was the exact numerical difference in sales between FY2019 and FY2018?\",\n    \"output\": \"What was the change in Sales in FY2019 from FY2018?\"\n  },\n  {\n    \"input\": \"What was the difference in the total amount of sales between fiscal year 2019 and fiscal year 2018?\",\n    \"output\": \"What was the change in Sales in FY2019 from FY2018?\"\n  },\n  {\n    \"input\": \"When were the calculations of employee numbers by segments performed?\",\n    \"output\": \"When were the employee numbers by segments calculated?\"\n  },\n  {\n    \"input\": \"What were the specific segments or categories mentioned under the \\\"METRO\\\" component in the table that accounted for the number of employees?\",\n    \"output\": \"What were the components under METRO in the table when accounting for the employee numbers by segments?\"\n  },\n  {\n    \"input\": \"What was the change in METRO AG in 2019 from 2018?\",\n    \"output\": \"What was the year-on-year change in METRO AG's performance in 2019 compared to 2018?\"\n  },\n  {\n    \"input\": \"What was the percentage difference in METRO AG's performance in 2019 compared to 2018?\",\n    \"output\": \"What was the percentage change in METRO AG in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"What was the percentage difference in the performance of METRO AG in 2019 compared to 2018?\",\n    \"output\": \"What was the percentage change in METRO AG in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"In which year was the amount of total assets larger?\",\n    \"output\": \"\\\"When comparing the amount of total assets, which year had a larger value?\\\"\"\n  },\n  {\n    \"input\": \"What was the percentage change in total assets in FY2019 from FY2018?\",\n    \"output\": \"What was the percentage increase or decrease in the total assets for the fiscal year 2019 compared to the total assets for the fiscal year 2018?\"\n  },\n  {\n    \"input\": \"What is the source of revenue that compensates for the expenses incurred in providing logistics services?\",\n    \"output\": \"What is offset by income from logistics services?\"\n  },\n  {\n    \"input\": \"What are the components under Other operating expenses in the table?\",\n    \"output\": \"What specific components are included in the category of \\\"Other operating expenses\\\" as mentioned in the table?\"\n  },\n  {\n    \"input\": \"What are the components under Other operating expenses in the table?\",\n    \"output\": \"What specifically are the individual components included in the category of Other operating expenses in the provided table?\"\n  },\n  {\n    \"input\": \"What was the change in Losses from the disposal of fixed assets in 2018/2019 from 2017/2018?\",\n    \"output\": \"\\\"What is the difference in losses resulting from the sale or disposal of fixed assets between the fiscal years 2017/2018 and 2018/2019?\\\"\"\n  },\n  {\n    \"input\": \"What was the change in Losses from the disposal of fixed assets in 2018/2019 from 2017/2018?\",\n    \"output\": \"What was the difference in the amount of losses incurred from selling fixed assets between the fiscal years 2018/2019 and 2017/2018?\"\n  },\n  {\n    \"input\": \"What was the percentage increase or decrease in losses resulting from the disposal of fixed assets during the period of 2018/2019 compared to the period of 2017/2018?\",\n    \"output\": \"What was the percentage change in Losses from the disposal of fixed assets in 2018/2019 from 2017/2018?\"\n  },\n  {\n    \"input\": \"What was the percentage change in losses incurred from the disposal of fixed assets in the fiscal year 2018/2019 compared to the fiscal year 2017/2018?\",\n    \"output\": \"What was the percentage change in Losses from the disposal of fixed assets in 2018/2019 from 2017/2018?\"\n  },\n  {\n    \"input\": \"What was the change in interest carry-forwards in FY2019 from FY2018?\",\n    \"output\": \"What was the difference in the amount of interest carry-forwards between the fiscal years of 2019 and 2018?\"\n  },\n  {\n    \"input\": \"What are the primary elements within the table that are utilized to determine the outcome of the interest calculation?\",\n    \"output\": \"What are the broad components in the table which are used to calculate the interest result?\"\n  },\n  {\n    \"input\": \"What are the various broad components included in the table that are utilized for calculating the outcome of interest?\",\n    \"output\": \"What are the broad components in the table which are used to calculate the interest result?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the amount of income earned from interest during the period between 2018 and 2019 compared to the previous period between 2017 and 2018?\",\n    \"output\": \"What was the percentage change in interest income in 2018/2019 from 2017/2018?\"\n  },\n  {\n    \"input\": \"In which specific year did the total sum of taxes other than general administrative expenses exceed the amount incurred in those administrative expenses?\",\n    \"output\": \"In which year was the amount of other taxes thereof from general administrative expenses larger?\"\n  },\n  {\n    \"input\": \"In which years were the reserves retained from earnings calculated in?\",\n    \"output\": \"During which specific years were the calculations for retained reserves from earnings conducted?\"\n  },\n  {\n    \"input\": \"In which years were the reserves retained from earnings calculated in?\",\n    \"output\": \"During which specific years were the calculations for retaining reserves from earnings carried out?\"\n  },\n  {\n    \"input\": \"What was the change in the Income tax on components of other comprehensive income in FY2019 from FY2018?\",\n    \"output\": \"What was the difference in the income tax related to the components of other comprehensive income from fiscal year 2018 to fiscal year 2019?\"\n  },\n  {\n    \"input\": \"What was the change in operating income in the Communications Solutions segment in 2019?\",\n    \"output\": \"What was the specific amount of increase or decrease in operating income for the Communications Solutions segment during the year 2019?\"\n  },\n  {\n    \"input\": \"What was the change in operating income in the Communications Solutions segment in 2019?\",\n    \"output\": \"What was the percentage change in operating income specifically for the Communications Solutions segment during the year 2019 compared to the previous year?\"\n  },\n  {\n    \"input\": \"Why did operating income decrease in fiscal 2019?\",\n    \"output\": \"\\\"What were the reasons for the decrease in operating income during fiscal year 2019?\\\"\"\n  },\n  {\n    \"input\": \"Why did operating income decrease in fiscal 2019?\",\n    \"output\": \"What were the reasons for the decrease in operating income in fiscal 2019?\"\n  },\n  {\n    \"input\": \"In which year did the volume or quantity of raw materials reach its highest point?\",\n    \"output\": \"In which year was Raw materials larger?\"\n  },\n  {\n    \"input\": \"In which specific year did the total volume or quantity of raw materials experience a greater increase or reach a higher value compared to previous years?\",\n    \"output\": \"In which year was Raw materials larger?\"\n  },\n  {\n    \"input\": \"In which specific years was the calculation of the Repurchase value conducted?\",\n    \"output\": \"Which years was the Repurchase value calculated in?\"\n  },\n  {\n    \"input\": \"What was the exact difference in the total number of common shares repurchased in the year 2019 compared to 2018?\",\n    \"output\": \"What was the change in the Number of common shares repurchased in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"What was the calculation for determining the percentage change in the quantity of common shares repurchased during the year 2019 in comparison to 2018?\",\n    \"output\": \"What was the percentage change in the Number of common shares repurchased in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"What are the specific categories or divisions for which the net sales figures are displayed in the provided table?\",\n    \"output\": \"What are the segments for which the net sales are presented in the table?\"\n  },\n  {\n    \"input\": \"What year did Industrial Solutions experience a larger size or growth compared to other years?\",\n    \"output\": \"In which year was Industrial Solutions larger?\"\n  },\n  {\n    \"input\": \"What specific changes occurred in the Industrial Solutions sector between 2018 and 2019?\",\n    \"output\": \"What was the change in Industrial Solutions in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"In 2019, what specific changes occurred in the Industrial Solutions sector compared to the previous year of 2018?\",\n    \"output\": \"What was the change in Industrial Solutions in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"In which years was the basic earnings per share calculated for?\",\n    \"output\": \"For which specific years was the metric of basic earnings per share calculated and used for evaluation?\"\n  },\n  {\n    \"input\": \"In which years was the basic earnings per share calculated for?\",\n    \"output\": \"For which years was the calculation of basic earnings per share carried out? Please provide specific details in your answer.\"\n  },\n  {\n    \"input\": \"What was the change in Dilutive impact of share-based compensation arrangements in 2019 from 2018?\",\n    \"output\": \"What was the difference in the dilutive impact of share-based compensation arrangements between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the year-on-year percentage change in the dilutive impact caused by share-based compensation arrangements in 2019 compared to 2018?\",\n    \"output\": \"What was the percentage change in Dilutive impact of share-based compensation arrangements in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"What was the exact percentage change in the dilutive impact of share-based compensation arrangements between 2018 and 2019?\",\n    \"output\": \"What was the percentage change in Dilutive impact of share-based compensation arrangements in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"In which specific year did the percentage of industrial solutions reach its lowest point?\",\n    \"output\": \"In which year was the percentage of industrial solutions the lowest in?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the Weighted-Average Grant-Date Fair Value for nonvested shares between 2018 and 2019?\",\n    \"output\": \"What was the change in the Weighted-Average Grant-Date Fair Value for nonvested shares in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"In the table, what segments were considered when calculating the backlog?\",\n    \"output\": \"What were the segments for which backlog was calculated in the table?\"\n  },\n  {\n    \"input\": \"For which segments in the table was the backlog calculated?\",\n    \"output\": \"What were the segments for which backlog was calculated in the table?\"\n  },\n  {\n    \"input\": \"What was the change in total backlog in 2019 from 2018?\",\n    \"output\": \"What was the difference in the total backlog between 2019 and 2018?\"\n  },\n  {\n    \"input\": \"What was the percentage difference in the total amount of unfinished work in 2019 compared to 2018?\",\n    \"output\": \"What was the percentage change in total backlog in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"In which specific year did the amount spent on acquisition and integration costs exceed any other year?\",\n    \"output\": \"In which year was Acquisition and integration costs larger?\"\n  },\n  {\n    \"input\": \"In which specific year did the expenses incurred for acquisition and integration costs exceed the expenses in other years?\",\n    \"output\": \"In which year was Acquisition and integration costs larger?\"\n  },\n  {\n    \"input\": \"What are the components under U.S. in the table?\",\n    \"output\": \"Which components are listed under the U.S. category in the table?\"\n  },\n  {\n    \"input\": \"What are the different categories of Income Tax Expense (Benefit) mentioned in the table?\",\n    \"output\": \"What are the types of Income Tax Expense (Benefit) in the table?\"\n  },\n  {\n    \"input\": \"In which specific year did Non-U.S. experience the highest income tax expense (benefit)?\",\n    \"output\": \"Which year was the current income tax expense (benefit) for Non-U.S. the largest?\"\n  },\n  {\n    \"input\": \"In which year did the Non-U.S. current income tax expense (benefit) reach its highest amount?\",\n    \"output\": \"Which year was the current income tax expense (benefit) for Non-U.S. the largest?\"\n  },\n  {\n    \"input\": \"What was the change in Current income tax expense (benefit) in 2019 from 2018?\",\n    \"output\": \"What was the difference in Current income tax expense (benefit) between 2019 and 2018?\"\n  },\n  {\n    \"input\": \"What is the company's policy regarding the allocation and management of funds?\",\n    \"output\": \"What is the company's funding policy?\"\n  },\n  {\n    \"input\": \"What is the minimum amount of contributions that individuals are required to make to the pension plan outside of the United States?\",\n    \"output\": \"How much minimum required contributions is expected to be made to the non-U.S. pension plan?\"\n  },\n  {\n    \"input\": \"Which fiscal year between 2020 and 2024 will witness the highest benefit payments under the United States plans?\",\n    \"output\": \"In which Fiscal year from 2020 to 2024 would the benefit payments under the U.S Plans be the largest?\"\n  },\n  {\n    \"input\": \"During which fiscal year from 2020 to 2024 will the benefit payments under the United States Plans reach their maximum extent?\",\n    \"output\": \"In which Fiscal year from 2020 to 2024 would the benefit payments under the U.S Plans be the largest?\"\n  },\n  {\n    \"input\": \"What is typically not included in the net earnings of companies operating outside the United States?\",\n    \"output\": \"What do the Non-U.S. net earnings exclude?\"\n  },\n  {\n    \"input\": \"What is excluded from the net earnings of countries other than the U.S. and why?\",\n    \"output\": \"What do the Non-U.S. net earnings exclude?\"\n  },\n  {\n    \"input\": \"What was the percentage increase or decrease in the expense for U.S. federal income tax at the statutory rate in 2019 compared to 2018?\",\n    \"output\": \"What was the percentage change in Notional U.S. federal income tax expense at the statutory rate in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"What are the specific elements or factors taken into account when calculating Restructuring and Other Charges, Net?\",\n    \"output\": \"What are the components considered under Restructuring and Other Charges, Net?\"\n  },\n  {\n    \"input\": \"When did the lowest value of net restructuring charges occur?\",\n    \"output\": \"In which year was Restructuring charges, net the lowest?\"\n  },\n  {\n    \"input\": \"When was the year with the lowest net restructuring charges recorded?\",\n    \"output\": \"In which year was Restructuring charges, net the lowest?\"\n  },\n  {\n    \"input\": \"What was the change in Restructuring charges, net in 2019 from 2018?\",\n    \"output\": \"What was the difference in the amount of Restructuring charges, net between 2019 and 2018?\"\n  },\n  {\n    \"input\": \"What was the change in Restructuring charges, net in 2019 from 2018?\",\n    \"output\": \"What was the net change in restructuring charges in 2019 compared to 2018?\"\n  },\n  {\n    \"input\": \"In 2019, what was the percentage difference in Restructuring charges, net compared to 2018?\",\n    \"output\": \"What was the percentage change in Restructuring charges, net in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"What specific regions do the net sales to external customers correspond to?\",\n    \"output\": \"What are the net sales by geographic region to external customers attributed to?\"\n  },\n  {\n    \"input\": \"In which specific year did the percentage in Americas reach its highest point compared to other years?\",\n    \"output\": \"In which year was the percentage in Americas the largest?\"\n  },\n  {\n    \"input\": \"What is the specific year when the largest percentage was recorded in the Americas?\",\n    \"output\": \"In which year was the percentage in Americas the largest?\"\n  },\n  {\n    \"input\": \"What was the average net sales in Asia-Pacific as a percentage of total net sales across 2017, 2018 and 2019?\",\n    \"output\": \"What was the average proportion of net sales generated in the Asia-Pacific region, compared to the total net sales, for the years 2017, 2018, and 2019?\"\n  },\n  {\n    \"input\": \"What was the average net sales in Asia-Pacific as a percentage of total net sales across 2017, 2018 and 2019?\",\n    \"output\": \"What was the average percentage of total net sales in the Asia-Pacific region for the years 2017, 2018, and 2019 combined?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the amount of US dollar-denominated long-term debt from 2018 to 2019?\",\n    \"output\": \"What was the increase / (decrease) in US dollar-denominated long-term debt from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the increase/ (decrease) in Numerator (basic) – Net income from 2018 to 2019?\",\n    \"output\": \"What is the change in Numerator (basic) – Net income from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the increase/ (decrease) in Numerator (basic) – Net income from 2018 to 2019?\",\n    \"output\": \"What is the absolute change in Numerator (basic) – Net income from the year 2018 to the year 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage change in Basic Earnings per share from 2018 to 2019?\",\n    \"output\": \"What is the increase/ (decrease) in Basic Earnings per share from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage change in Diluted Earnings per share from 2018 to 2019?\",\n    \"output\": \"What is the increase/ (decrease) in Diluted Earnings per share from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage change in Diluted Earnings per share (EPS) between 2018 and 2019?\",\n    \"output\": \"What is the increase/ (decrease) in Diluted Earnings per share from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the specific percentage of taxes paid in the year 2019, taking into account all applicable taxes and deductions?\",\n    \"output\": \"What was the effective tax rate in 2019?\"\n  },\n  {\n    \"input\": \"What was the average Computed income tax expense?\",\n    \"output\": \"What was the average expense for income tax computed?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the amount of interest on borrowings from 2018 to 2019?\",\n    \"output\": \"What was the increase / (decrease) in Interest on borrowings from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in Interest on borrowings from 2018 to 2019?\",\n    \"output\": \"What was the increase / (decrease) in Interest on borrowings from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the increase / (decrease) in the Finance costs before interest on lease liabilities from 2018 to 2019?\",\n    \"output\": \"What was the change in the Finance costs before interest on lease liabilities from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in Adjusted EBITDA between 2018 and 2019?\",\n    \"output\": \"What was the increase / (decrease) in Adjusted EBITDA from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the average Depreciation and amortization?\",\n    \"output\": \"What was the average amount of depreciation and amortization expenses incurred over a specific period?\"\n  },\n  {\n    \"input\": \"What factors contributed to the overall rise in both depreciation and amortization expenses?\",\n    \"output\": \"What caused the increase in the total depreciation and amortization?\"\n  },\n  {\n    \"input\": \"What was the Depreciation of right-of-use assets in 2019?\",\n    \"output\": \"What was the amount of depreciation recorded for right-of-use assets during the year 2019?\"\n  },\n  {\n    \"input\": \"What was the Depreciation of right-of-use assets in 2019?\",\n    \"output\": \"\\\"What was the amount of depreciation for right-of-use assets recorded in the financial statements for the year 2019?\\\"\"\n  },\n  {\n    \"input\": \"What was the increase / (decrease) in the Depreciation from 2018 to 2019?\",\n    \"output\": \"What was the net change in Depreciation, either an increase or decrease, between the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the increase / (decrease) in the Depreciation from 2018 to 2019?\",\n    \"output\": \"What was the difference in the Depreciation amount from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the total depreciation and amortization from 2018 to 2019?\",\n    \"output\": \"What was the increase / (decrease) in the Total depreciation and amortization from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in Total depreciation and amortization from 2018 to 2019?\",\n    \"output\": \"What was the increase / (decrease) in the Total depreciation and amortization from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the change in the coupon rate between November 12, 2019, and April 30, 2019?\",\n    \"output\": \"What was the increase / (decrease) in the coupon rate from November 12, 2019 to April 30, 2019?\"\n  },\n  {\n    \"input\": \"What was the change in the coupon rate from November 12, 2019 to April 30, 2019?\",\n    \"output\": \"What was the increase / (decrease) in the coupon rate from November 12, 2019 to April 30, 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the Equivalent (Cdn$) value from 2018 to November 2019?\",\n    \"output\": \"What was the increase / (decrease) in the Equivalent (Cdn$) between 2018 and November 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage change in the amount of interest on borrowings from 2018 to 2019?\",\n    \"output\": \"What is the increase/ (decrease) in Interest on borrowings from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"By what percentage did the Interest on borrowings change from 2018 to 2019? Please state whether there was an increase or decrease.\",\n    \"output\": \"What is the increase/ (decrease) in Interest on borrowings from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the purpose of utilizing valuation-related analysis and making capital structure-related decisions?\",\n    \"output\": \"What is used for to conduct valuation-related analysis and make capital structure-related decisions?\"\n  },\n  {\n    \"input\": \"What items are considered when calculating the adjusted net debt?\",\n    \"output\": \"What is included in the adjusted net debt?\"\n  },\n  {\n    \"input\": \"In which context or industry is the valuation of debt derivatives without incorporating credit risk used as an evaluation tool?\",\n    \"output\": \"Where is debt derivatives valued without adjustment for credit risk used for evaluation?\"\n  },\n  {\n    \"input\": \"What is the net change in long-term debt from December 31, 2018, to December 31, 2019?\",\n    \"output\": \"What is the increase/ (decrease) in Long-term debt from December 31, 2018 to December 31, 2019?\"\n  },\n  {\n    \"input\": \"What is the change in the amount of Long-term debt from December 31, 2018 to December 31, 2019?\",\n    \"output\": \"What is the increase/ (decrease) in Long-term debt from December 31, 2018 to December 31, 2019?\"\n  },\n  {\n    \"input\": \"What is the change in the value of net debt derivative assets without credit risk adjustment from December 31, 2018 to December 31, 2019?\",\n    \"output\": \"What is the increase/ (decrease) in Net debt derivative assets valued without any adjustment for credit risk from December 31, 2018 to December 31, 2019?\"\n  },\n  {\n    \"input\": \"What is the method of measuring accounts receivable and what factors are considered in the measurement process?\",\n    \"output\": \"How is accounts receivable measured?\"\n  },\n  {\n    \"input\": \"What is the percentage change in the amount of customer accounts receivable from 2018 to 2019?\",\n    \"output\": \"What is the increase/ (decrease) in Customer accounts receivable from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"\\\"What is the method employed to settle the principal amount of Convertible Notes in cash upon conversion, and how does it factor into the computation of diluted earnings?\\\"\",\n    \"output\": \"Which method is used for to settle the principal of the Convertible Notes in cash on conversion and calculates diluted earnings?\"\n  },\n  {\n    \"input\": \"What are the consequences when a loss is incurred that can be attributed to shareholders?\",\n    \"output\": \"What happens if a loss attributable to shareholders has been incurred?\"\n  },\n  {\n    \"input\": \"What was the Convertible Notes that had an anti-dilutive effect on the calculation of diluted earnings per common share in the year end 2019, 2018 and 2017 respectively?\",\n    \"output\": \"What were the Convertible Notes that impacted diluted earnings per common share calculation in 2019, 2018, and 2017, specifically in terms of their anti-dilutive effects?\"\n  },\n  {\n    \"input\": \"What is the change in Loss per common share - basic and diluted from 2018 to 2019?\",\n    \"output\": \"What is the difference in the loss per common share - basic and diluted between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the change in Loss per common share - basic and diluted from 2018 to 2019?\",\n    \"output\": \"What is the difference in basic and diluted Loss per common share between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"How are the recognition processes for realized and unrealized losses different?\",\n    \"output\": \"How are the realized and unrealized losses recognized?\"\n  },\n  {\n    \"input\": \"How are both realized and unrealized losses recognized? Please provide a detailed explanation of the recognition process for each type of loss.\",\n    \"output\": \"How are the realized and unrealized losses recognized?\"\n  },\n  {\n    \"input\": \"In which specific year did the Gain on sale or write-down of a cost-accounted investment amount to less than 1,000 thousands?\",\n    \"output\": \"In which year is the Gain on sale / (write-down) of cost-accounted investment less than 1,000 thousands?\"\n  },\n  {\n    \"input\": \"\\\"In which fiscal year did the Gain on sale or write-down of a cost-accounted investment amount to less than 1,000 thousands (1 million)?\\\"\",\n    \"output\": \"In which year is the Gain on sale / (write-down) of cost-accounted investment less than 1,000 thousands?\"\n  },\n  {\n    \"input\": \"What were the total fees paid by Teekay Tankers to KPMG LLP in 2019 and 2018, respectively?\",\n    \"output\": \"What fees was paid to KPMG LLP by Teekay Tankers during 2019 and 2018 respectively?\"\n  },\n  {\n    \"input\": \"What were the fees paid by Teekay Tankers to KPMG LLP in 2019 and 2018?\",\n    \"output\": \"What fees was paid to KPMG LLP by Teekay Tankers during 2019 and 2018 respectively?\"\n  },\n  {\n    \"input\": \"What are the components or factors included in the income tax expense calculation?\",\n    \"output\": \"What is included in the income tax expense?\"\n  },\n  {\n    \"input\": \"What items are incorporated into the income tax expense?\",\n    \"output\": \"What is included in the income tax expense?\"\n  },\n  {\n    \"input\": \"What is the difference in the number of positions taken in previous years between December 31, 2019, and December 31, 2018?\",\n    \"output\": \"What is the change in Changes for positions taken in prior years from December 31, 2019 to December 31, 2018?\"\n  },\n  {\n    \"input\": \"What is the net change in the number of positions taken in prior years from December 31, 2019 to December 31, 2018?\",\n    \"output\": \"What is the change in Changes for positions taken in prior years from December 31, 2019 to December 31, 2018?\"\n  },\n  {\n    \"input\": \"What led to increase in Consolidated net cash flow from operating activities for the year ended December 31, 2019?\",\n    \"output\": \"What factors contributed to the rise in Consolidated net cash flow from operating activities during the year ending on December 31, 2019?\"\n  },\n  {\n    \"input\": \"How much did Brookfield receive from the sale of its interests in Altera in 2019?\",\n    \"output\": \"How much was received from Brookfield for the sale of interests in Altera during 2019?\"\n  },\n  {\n    \"input\": \"What is the change in Net financing cash flows from Year Ended December 31, 2019 to December 31, 2018?\",\n    \"output\": \"What is the difference in the amount of cash generated or used for financing activities in the financial statements for the years ending December 31, 2019 and December 31, 2018?\"\n  },\n  {\n    \"input\": \"What is the change in Net financing cash flows from Year Ended December 31, 2019 to December 31, 2018?\",\n    \"output\": \"What is the difference in net cash flows from financing activities between the fiscal years ended on December 31, 2019 and December 31, 2018?\"\n  },\n  {\n    \"input\": \"What is the change in Net (loss) income from Year Ended December 31, 2019 to December 31, 2018?\",\n    \"output\": \"What was the difference in the Net (loss) income between the fiscal years ending on December 31, 2019 and December 31, 2018?\"\n  },\n  {\n    \"input\": \"What is the change in Net (loss) income from Year Ended December 31, 2019 to December 31, 2018?\",\n    \"output\": \"What is the difference in the amount of net (loss) income between the fiscal year ending on December 31, 2019, and the fiscal year ending on December 31, 2018?\"\n  },\n  {\n    \"input\": \"What is the change in Income tax expense (recovery) from Year Ended December 31, 2019 to December 31, 2018?\",\n    \"output\": \"What is the difference in the amount of income tax expense or recovery reported for the fiscal year ended on December 31, 2019, compared to the fiscal year ended on December 31, 2018?\"\n  },\n  {\n    \"input\": \"What is the change in Income tax expense (recovery) from Year Ended December 31, 2019 to December 31, 2018?\",\n    \"output\": \"How does the income tax expense (recovery) change between the years ended December 31, 2019 and December 31, 2018?\"\n  },\n  {\n    \"input\": \"What is the sum of all minimum lease payments expected to be received in the year 2019?\",\n    \"output\": \"What was the total Total minimum lease payments to be received in 2019？\"\n  },\n  {\n    \"input\": \"What is the total sum of minimum lease payments that were expected to be received in the year 2019?\",\n    \"output\": \"What was the total Total minimum lease payments to be received in 2019？\"\n  },\n  {\n    \"input\": \"What is the change in Initial direct costs and other from December 31, 2019 to December 31, 2018?\",\n    \"output\": \"How did the initial direct costs and other expenses change from December 31, 2018, to December 31, 2019?\"\n  },\n  {\n    \"input\": \"When did Teekay sell to Brookfield all of the Company’s remaining interests in Altera?\",\n    \"output\": \"\\\"When was the complete sale of Teekay's remaining holdings in Altera to Brookfield executed?\\\"\"\n  },\n  {\n    \"input\": \"When did Teekay sell to Brookfield all of the Company’s remaining interests in Altera?\",\n    \"output\": \"\\\"When did Teekay sell its remaining interests in Altera to Brookfield?\\\"\"\n  },\n  {\n    \"input\": \"What were the Fair values at the start of the year for 2019 and 2018, respectively? Please provide the Fair value figures for both years separately.\",\n    \"output\": \"What was the Fair value at the beginning of the year for 2019 and 2018 respectively?\"\n  },\n  {\n    \"input\": \"In which year was Fair value at the beginning of the year less than 15,000 thousands?\",\n    \"output\": \"In which year did the fair value at the beginning of the year fall below 15,000,000 (15 million) dollars?\"\n  },\n  {\n    \"input\": \"\\\"What items are typically included in the calculation of current income tax expense, and what factors can affect this expense?\\\"\",\n    \"output\": \"What is included in current income tax expense?\"\n  },\n  {\n    \"input\": \"What components are typically encompassed in the calculation of contemporary income tax expense?\",\n    \"output\": \"What is included in current income tax expense?\"\n  },\n  {\n    \"input\": \"What is the change in Current income tax expense from December 31, 2019 to December 31, 2018?\",\n    \"output\": \"How much has the Current income tax expense changed from December 31, 2019, to December 31, 2018?\"\n  },\n  {\n    \"input\": \"What is the change in Current income tax expense from December 31, 2019 to December 31, 2018?\",\n    \"output\": \"What was the difference in Current income tax expense between December 31, 2019 and December 31, 2018?\"\n  },\n  {\n    \"input\": \"\\\"Between two years, which one exhibits a higher expense for current income tax?\\\"\",\n    \"output\": \"Which year has higher current income tax expense?\"\n  },\n  {\n    \"input\": \"What were the Lease and Non-Lease commitments in the year 2021, and can you provide specific details about each commitment?\",\n    \"output\": \"What was the Lease and Non-Lease commitment in 2021 respectively?\"\n  },\n  {\n    \"input\": \"What was the amount of lease commitment for the fiscal year 2022?\",\n    \"output\": \"What was the Lease commitment in 2022?\"\n  },\n  {\n    \"input\": \"What was the total value of lease commitments in the year 2022?\",\n    \"output\": \"What was the Lease commitment in 2022?\"\n  },\n  {\n    \"input\": \"In which year was the Lease commitment less than 10,000 thousands?\",\n    \"output\": \"In which specific year did the Lease commitment amount to less than 10,000 thousand units?\"\n  },\n  {\n    \"input\": \"In which year was the Lease commitment less than 10,000 thousands?\",\n    \"output\": \"In which specific year did the Lease commitment amount fall below 10,000 thousands? (For better answering, please state the year and the unit of the Lease commitment.)\"\n  },\n  {\n    \"input\": \"What is the difference in the lease commitment amount between 2020 and 2021?\",\n    \"output\": \"What is the change in the Lease commitment from 2020 to 2021?\"\n  },\n  {\n    \"input\": \"What is the average duration of lease agreements from 2020 to 2022?\",\n    \"output\": \"What is the average Lease Commitment from 2020 to 2022?\"\n  },\n  {\n    \"input\": \"What is the average duration of lease commitments between 2020 and 2022?\",\n    \"output\": \"What is the average Lease Commitment from 2020 to 2022?\"\n  },\n  {\n    \"input\": \"How much was the equity loss for the year ended December 31, 2019?\",\n    \"output\": \"What was the amount of equity lost during the period from January 1 to December 31, 2019?\"\n  },\n  {\n    \"input\": \"How much was the equity loss for the year ended December 31, 2019?\",\n    \"output\": \"What was the total amount of equity loss recorded for the period of the year ending December 31, 2019?\"\n  },\n  {\n    \"input\": \"What is the difference in the combined amount of cash and restricted cash on December 31, 2019, compared to December 31, 2018?\",\n    \"output\": \"What is the change in Cash and restricted cash from December 31, 2019 to December 31, 2018?\"\n  },\n  {\n    \"input\": \"What was the increase or decrease in the category of Other assets – current on the company's balance sheet from December 31, 2019 to December 31, 2018?\",\n    \"output\": \"What is the change in Other assets – current from December 31, 2019 to December 31, 2018?\"\n  },\n  {\n    \"input\": \"What were the values of the other current assets in the years 2019 and 2018?\",\n    \"output\": \"What was the other assets - current in 2019 and 2018 respectively?\"\n  },\n  {\n    \"input\": \"What were the respective amounts of non-current liabilities in 2019 and 2018 aside from the liabilities mentioned earlier?\",\n    \"output\": \"What was the other liabilities - non current in 2019 and 2018 respectively?\"\n  },\n  {\n    \"input\": \"What were the amounts of non-current liabilities in 2019 and 2018, respectively, excluding any other liabilities that may be present on the financial statements?\",\n    \"output\": \"What was the other liabilities - non current in 2019 and 2018 respectively?\"\n  },\n  {\n    \"input\": \"What is the difference in the number of Consolidated LNG carriers between 2020 and 2021?\",\n    \"output\": \"What is the change in Consolidated LNG carriers from 2020 to 2021?\"\n  },\n  {\n    \"input\": \"What is the net difference in the number of Consolidated LNG carriers between 2020 and 2021?\",\n    \"output\": \"What is the change in Consolidated LNG carriers from 2020 to 2021?\"\n  },\n  {\n    \"input\": \"What is the annual growth or decline in the number of Consolidated LNG carriers from 2021 to 2022?\",\n    \"output\": \"What is the change in Consolidated LNG carriers from 2021 to 2022?\"\n  },\n  {\n    \"input\": \"What is the average number of Consolidated LNG carriers for the period from 2020 to 2021?\",\n    \"output\": \"What is the average Consolidated LNG carriers, for the year 2020 to 2021?\"\n  },\n  {\n    \"input\": \"What is the average number of Consolidated LNG carriers between the years 2020 and 2021?\",\n    \"output\": \"What is the average Consolidated LNG carriers, for the year 2020 to 2021?\"\n  },\n  {\n    \"input\": \"What is the change in Realized (losses) gains relating to Interest rate swap agreements from Year Ended December 31, 2019 to December 31, 2018?\",\n    \"output\": \"What was the difference in Realized (losses) gains associated with Interest rate swap agreements between December 31, 2019 and December 31, 2018?\"\n  },\n  {\n    \"input\": \"What is the difference in Realized losses and gains from Forward freight agreements between Year Ended December 31, 2019 and December 31, 2018?\",\n    \"output\": \"What is the change in Realized (losses) gains relating to Forward freight agreements from Year Ended December 31, 2019 to December 31, 2018?\"\n  },\n  {\n    \"input\": \"What was the difference in revenue between 2019 and 2018?\",\n    \"output\": \"What is the change in Revenues from, 2019 to 2018?\"\n  },\n  {\n    \"input\": \"What is the change in Voyage expenses from, 2019 to 2018?\",\n    \"output\": \"What is the difference in expenses for voyages comparing 2019 to 2018?\"\n  },\n  {\n    \"input\": \"How is the expense of share-based compensation quantified and evaluated?\",\n    \"output\": \"How is share-based compensation expense measured?\"\n  },\n  {\n    \"input\": \"How is the expense related to share-based compensation quantified and determined?\",\n    \"output\": \"How is share-based compensation expense measured?\"\n  },\n  {\n    \"input\": \"What are the various types of share-based compensation plans in existence?\",\n    \"output\": \"What are the types of share-based compensation plans?\"\n  },\n  {\n    \"input\": \"By what percentage did the stock-based compensation expense for general and administrative operations decrease from 2018 to 2019?\",\n    \"output\": \"How much did the stock-based compensation expense in the general and administrative operations decreased from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the ending outstanding number of vested RSUs in 2019?\",\n    \"output\": \"What is the final count of vested Restricted Stock Units (RSUs) at the conclusion of 2019?\"\n  },\n  {\n    \"input\": \"What is the ending outstanding number of vested RSUs in 2019?\",\n    \"output\": \"How many RSUs were left as outstanding in the year 2019 once they had vested?\"\n  },\n  {\n    \"input\": \"What was the exact amount of depreciation expense recorded in the financial statements for the year 2018?\",\n    \"output\": \"What was the depreciation expense in 2018?\"\n  },\n  {\n    \"input\": \"What are the accepted accounting methods for handling and recording disposed assets?\",\n    \"output\": \"What are the accounting treatments for disposed assets?\"\n  },\n  {\n    \"input\": \"What are the accounting procedures to be followed specifically for assets that have been disposed of?\",\n    \"output\": \"What are the accounting treatments for disposed assets?\"\n  },\n  {\n    \"input\": \"What is the net total for property, plant and equipment in 2019?\",\n    \"output\": \"What is the total value of property, plant, and equipment after deducting accumulated depreciation in 2019?\"\n  },\n  {\n    \"input\": \"What is the net total for property, plant and equipment in 2019?\",\n    \"output\": \"What is the total value of property, plant, and equipment after deducting accumulated depreciation and any impairment losses, as reported in the financial statements for the year 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage decrease in the net value of Property, Plant, and Equipment between 2018 and 2019?\",\n    \"output\": \"What is the percentage decrease in Net Total Property, Plant and Equipment from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"How much was the increase in Federal R&D tax credits from 2018 to 2019?\",\n    \"output\": \"How much did R&D tax credits from the Federal increased from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage increase in R&D tax credits provided by the Federal government from 2018 to 2019?\",\n    \"output\": \"How much did R&D tax credits from the Federal increased from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"\\\"What was the amount recorded as a long-term liability in the financial statements for the year 2018?\\\"\",\n    \"output\": \"How much was reflected as a long-term liability in 2018?\"\n  },\n  {\n    \"input\": \"What is the percentage difference in Net deferred tax assets from 2018 to 2019?\",\n    \"output\": \"What was the percentage change in Net deferred tax assets between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the total cost of the service provided in the year 2018?\",\n    \"output\": \"What was the service cost in 2018?\"\n  },\n  {\n    \"input\": \"What was the cost of the service provided in the year 2018?\",\n    \"output\": \"What was the service cost in 2018?\"\n  },\n  {\n    \"input\": \"What was the difference in the cost of services from 2018 to 2019?\",\n    \"output\": \"What was the change in service cost between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the change in interest cost between 2018 and 2019?\",\n    \"output\": \"What was the difference in the amount of money spent on interest between the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the change in interest cost between 2018 and 2019?\",\n    \"output\": \"How did the interest cost change from 2018 to 2019, and what is the difference between the two years?\"\n  },\n  {\n    \"input\": \"What was the change in balances at the end of period between 2018 and 2019?\",\n    \"output\": \"What is the difference in balances at the end of the period between 2018 and 2019, and what caused this change?\"\n  },\n  {\n    \"input\": \"What was the specific monetary value or total sum allocated for Machinery and equipment in the year 2018?\",\n    \"output\": \"What was the amount for Machinery and equipment in 2018?\"\n  },\n  {\n    \"input\": \"\\\"What was the specific monetary value allocated to Machinery and equipment in the year 2018?\\\"\",\n    \"output\": \"What was the amount for Machinery and equipment in 2018?\"\n  },\n  {\n    \"input\": \"What was the change in Net cash provided by (used in) operating activities from continuing operations between 2018 and 2019?\",\n    \"output\": \"What was the difference in Net cash provided by (used in) operating activities from continuing operations between the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the difference in the amount of Net cash provided by (used in) operating activities between the years 2018 and 2019?\",\n    \"output\": \"What was the change in Net cash provided by (used in) operating activities between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in Income (loss) from discontinued operations, net of income taxes between 2018 and 2019?\",\n    \"output\": \"What was the percentage change in net income (loss) from discontinued operations, after accounting for income taxes, between the fiscal years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"Which periods does the company's effective tax rates differ from the U.S. federal statutory rate?\",\n    \"output\": \"\\\"During which specific time periods does the effective tax rate of the company deviate from the U.S. federal statutory rate?\\\"\"\n  },\n  {\n    \"input\": \"What was the specific framework or arrangement regarding Federal taxes in the year 2018, including any applicable laws, regulations, rates, and deductions?\",\n    \"output\": \"What was the current provision for Federal taxes in 2018?\"\n  },\n  {\n    \"input\": \"What were the existing federal tax regulations and provisions for the year 2018?\",\n    \"output\": \"What was the current provision for Federal taxes in 2018?\"\n  },\n  {\n    \"input\": \"What was the specific provision made for foreign taxes in the year 2019?\",\n    \"output\": \"What was the current provision for Foreign taxes in 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage increase or decrease in the fair value of intangible assets from 2018 to 2019?\",\n    \"output\": \"What was the percentage change in the fair value of intangible assets between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the change in Pro Forma total sales between 2018 and 2019?\",\n    \"output\": \"What was the difference in the total sales of Pro Forma from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the change in Pro Forma total sales between 2018 and 2019?\",\n    \"output\": \"What was the difference in Pro Forma total sales from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the exact fair value of the property and equipment owned by LumaSense?\",\n    \"output\": \"What was the fair value of  Property and equipment from LumaSense?\"\n  },\n  {\n    \"input\": \"What was the fair value of Property and equipment acquired by LumaSense?\",\n    \"output\": \"What was the fair value of  Property and equipment from LumaSense?\"\n  },\n  {\n    \"input\": \"What was the difference in the assumed exercise of dilutive stock options and restricted stock units between the years 2018 and 2019?\",\n    \"output\": \"What was the change in Assumed exercise of dilutive stock options and restricted stock units between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the disparity in the fair value of cash paid for acquisition between Trek and the Electrostatic Product Line?\",\n    \"output\": \"What was the difference in the fair value of cash paid for acquisition between Trek and Electrostatic Product Line?\"\n  },\n  {\n    \"input\": \"What was the diluted earnings per share (EPS) specifically for Continuing Operations for the quarter ending in September?\",\n    \"output\": \"What was the diluted earnings per share of Continuing Operations in Quarter Ended  September?\"\n  },\n  {\n    \"input\": \"What was the diluted earnings per share of Continuing Operations in the quarter ending in September?\",\n    \"output\": \"What was the diluted earnings per share of Continuing Operations in Quarter Ended  September?\"\n  },\n  {\n    \"input\": \"What was the total Income (loss) from discontinued operations, net of income taxes in 2019?\",\n    \"output\": \"In 2019, what was the net income (loss) from discontinued operations, adjusted for income taxes?\"\n  },\n  {\n    \"input\": \"What was the difference in total operating expense between the fiscal years 2018 and 2019?\",\n    \"output\": \"What was the change in total operating expense between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the difference in the amount of money spent on operational expenses from 2018 to 2019?\",\n    \"output\": \"What was the change in total operating expense between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"How much money was owed to the company for goods or services provided in 2019 but not yet invoiced?\",\n    \"output\": \"What was the amount of unbilled receivables in 2019?\"\n  },\n  {\n    \"input\": \"What were the precise amounts that were billed during the year 2018 after considering all deductions, fees, and payments?\",\n    \"output\": \"What were the net amounts billed in 2018?\"\n  },\n  {\n    \"input\": \"What was the change in unbilled receivables between 2018 and 2019?\",\n    \"output\": \"What was the difference in the amount of unbilled receivables from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the difference in the total amount billed between 2018 and 2019?\",\n    \"output\": \"What was the change in net amounts billed between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the accumulated amortization of customer relationships in 2018?\",\n    \"output\": \"\\\"What was the total amount of accumulated amortization recorded for customer relationships during the year 2018?\\\"\"\n  },\n  {\n    \"input\": \"What were the top two segments with the highest net carrying amounts in the year 2018?\",\n    \"output\": \"What was the two highest net carrying amounts segments in 2018?\"\n  },\n  {\n    \"input\": \"What were the two segments with the highest net carrying amounts in 2018?\",\n    \"output\": \"What was the two highest net carrying amounts segments in 2018?\"\n  },\n  {\n    \"input\": \"How much did the service cost in 2019 and can you provide more details about the specific service being referred to?\",\n    \"output\": \"What was service cost in 2019?\"\n  },\n  {\n    \"input\": \"What was the total amount spent on interest expenses in the year 2018?\",\n    \"output\": \"What was interest cost in 2018?\"\n  },\n  {\n    \"input\": \"What was the expected rate of return on plan assets for the year 2017?\",\n    \"output\": \"What was the Expected return on plan assets in 2017?\"\n  },\n  {\n    \"input\": \"What was the anticipated return on assets for the plan in 2017?\",\n    \"output\": \"What was the Expected return on plan assets in 2017?\"\n  },\n  {\n    \"input\": \"What was the change in Amortization of actuarial gains and losses between 2017 and 2018?\",\n    \"output\": \"What was the difference in the amount of amortization of actuarial gains and losses recorded between the years 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What was the change in Amortization of actuarial gains and losses between 2017 and 2018?\",\n    \"output\": \"What was the difference in the amount of Amortization of actuarial gains and losses between the fiscal years 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What were the specific expenses incurred for severance payments and any associated costs during the year 2019?\",\n    \"output\": \"What were the severance and related charges in 2019?\"\n  },\n  {\n    \"input\": \"What was the amount of severance and associated expenses recorded as charges in the year 2019?\",\n    \"output\": \"What were the severance and related charges in 2019?\"\n  },\n  {\n    \"input\": \"What was the change in Severance and related charges between 2018 and 2019?\",\n    \"output\": \"What was the year-on-year difference in the amount spent on Severance and related charges between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the change in Severance and related charges between 2018 and 2019?\",\n    \"output\": \"What was the difference in the amount of Severance and related charges incurred in 2018 compared to 2019?\"\n  },\n  {\n    \"input\": \"What is the proportion of facility relocation and closure charges in the total amount of restructuring charges incurred in 2019?\",\n    \"output\": \"What percentage of total restructuring charges in 2019 consisted of Facility relocation and closure charges?\"\n  },\n  {\n    \"input\": \"What was the contrast between the combined assets in the Level 1 and Level 2 categories for the year 2019?\",\n    \"output\": \"What was the difference in the total assets in the Level 1 Level 2 categories for 2019?\"\n  },\n  {\n    \"input\": \"What was the difference between the total assets in the Level 1 and Level 2 categories for the year 2019?\",\n    \"output\": \"What was the difference in the total assets in the Level 1 Level 2 categories for 2019?\"\n  },\n  {\n    \"input\": \"What was the combined value of the top three assets in the Level 2 Category for the year 2019?\",\n    \"output\": \"What was the total of the three highest assets in the Level 2 Category for 2019?\"\n  },\n  {\n    \"input\": \"What percentage of total assets across all categories consisted of assets from the Level One category for 2019?\",\n    \"output\": \"What was the proportion or share, in terms of percentage, of Level One assets out of the total assets across all categories for the year 2019?\"\n  },\n  {\n    \"input\": \"What was the specific value or monetary figure of long-lived assets in the United States during the year 2019?\",\n    \"output\": \"What was the amount of long-lived assets in United States in 2019?\"\n  },\n  {\n    \"input\": \"What was the net difference in the value of long-term assets in the United States from 2018 to 2019?\",\n    \"output\": \"What was the change in the amount of long-lived assets between 2018 and 2019 in United States?\"\n  },\n  {\n    \"input\": \"\\\"What was the combined value of the two longest-lasting assets in the year 2018?\\\"\",\n    \"output\": \"What is the sum of the highest two long-lived assets in 2018?\"\n  },\n  {\n    \"input\": \"\\\"What is the total value, in monetary terms, of the two long-lived assets with the highest worth that were recorded in the year 2018?\\\"\",\n    \"output\": \"What is the sum of the highest two long-lived assets in 2018?\"\n  },\n  {\n    \"input\": \"What was the initial estimated value of property and equipment as of December 31, 2019 in relation to their market worth?\",\n    \"output\": \"What was the Preliminary fair value of property and equipment in December 31, 2019?\"\n  },\n  {\n    \"input\": \"What was the initial estimated value of property and equipment as of December 31, 2019, before any adjustments or final calculations were made?\",\n    \"output\": \"What was the Preliminary fair value of property and equipment in December 31, 2019?\"\n  },\n  {\n    \"input\": \"What was the initial estimated value assigned to inventories as of September 10th, 2019?\",\n    \"output\": \"What was the Preliminary fair value of inventories in September 10, 2019?\"\n  },\n  {\n    \"input\": \"\\\"What was the initial estimated market value of the inventories as of September 10, 2019?\\\"\",\n    \"output\": \"What was the Preliminary fair value of inventories in September 10, 2019?\"\n  },\n  {\n    \"input\": \"In 2019, what was the specific amount of revenue that was recognized for product sales and the provision of services at a particular point in time?\",\n    \"output\": \"What was the Product and service revenue recognized at point in time in 2019?\"\n  },\n  {\n    \"input\": \"What specific extended warranty and service contracts were acknowledged and recorded throughout the year of 2018?\",\n    \"output\": \"What was the Extended warranty and service contracts recognized over time in 2018?\"\n  },\n  {\n    \"input\": \"What were the specific extended warranty and service contracts that gained recognition throughout the year 2018?\",\n    \"output\": \"What was the Extended warranty and service contracts recognized over time in 2018?\"\n  },\n  {\n    \"input\": \"What was the change in Product and service revenue recognized at point in time between 2018 and 2019?\",\n    \"output\": \"What was the specific difference in revenue from the sales of products and services recognized at a specific moment between the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What were the sales figures for the Industrial & Medical sectors in the year 2017?\",\n    \"output\": \"What was the sales for Industrial & Medical in 2017?\"\n  },\n  {\n    \"input\": \"What were the sales figures specifically for Industrial & Medical sectors in the year 2017?\",\n    \"output\": \"What was the sales for Industrial & Medical in 2017?\"\n  },\n  {\n    \"input\": \"What was the percentage change in sales of Semiconductor Equipment from 2017 to 2018?\",\n    \"output\": \"What was the change in sales of Semiconductor Equipment between 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What were the income taxes for the 2018 tax year, based on the federal statutory tax rates?\",\n    \"output\": \"What was the Income taxes per federal statutory rate in 2018?\"\n  },\n  {\n    \"input\": \"What was the State income taxes, net of federal deduction in 2017?\",\n    \"output\": \"In 2017, what was the amount of state income taxes paid after deducting the federal deduction?\"\n  },\n  {\n    \"input\": \"What were the specific modifications that occurred in the GILTI Tax legislation from 2018 to 2019?\",\n    \"output\": \"What was the change in GILTI Tax between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the difference in the GILTI Tax rates from 2018 to 2019, and how did the change impact tax calculations?\",\n    \"output\": \"What was the change in GILTI Tax between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the change in Unremitted earnings between 2018 and 2019?\",\n    \"output\": \"What was the difference in Unremitted earnings from 2018 to 2019? Please provide the specific amount of change in Unremitted earnings between these two years.\"\n  },\n  {\n    \"input\": \"What was the percentage difference in Withholding taxes from 2018 to 2019?\",\n    \"output\": \"What was the percentage change in Withholding taxes between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the estimated rate of forfeiture?\",\n    \"output\": \"What was the forfeiture rate estimated to be?\"\n  },\n  {\n    \"input\": \"What is total non-cash compensation expense related to stock-based awards in 2019?\",\n    \"output\": \"\\\"What was the amount of expenses incurred in 2019 for stock-based awards as a form of total non-cash compensation?\\\"\"\n  },\n  {\n    \"input\": \"What is the range of years covered in the table?\",\n    \"output\": \"What are the years included in the table?\"\n  },\n  {\n    \"input\": \"\\\"What is the final balance as of December 31, 2019?\\\"\",\n    \"output\": \"What is the balance at end of year for 2019?\"\n  },\n  {\n    \"input\": \"In which specific markets does the company focus its marketing efforts and aim to capture a significant share of customers?\",\n    \"output\": \"Which markets does the company target?\"\n  },\n  {\n    \"input\": \"What specific types of research and development (R&D) activities are sponsored by companies? Please provide a comprehensive list of these activities as outlined in the table.\",\n    \"output\": \"What are the types of company-sponsored R&D activities in the table?\"\n  },\n  {\n    \"input\": \"What types of research and development (R&D) activities supported by companies are listed in the table?\",\n    \"output\": \"What are the types of company-sponsored R&D activities in the table?\"\n  },\n  {\n    \"input\": \"What was the difference in the total expenditure on research and development sponsored by the company from 2018 to 2019?\",\n    \"output\": \"What is the change in the total company-sponsored research and development expense in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"What is the change in total backlog in 2019?\",\n    \"output\": \"What was the overall increase or decrease in the total backlog during the year 2019?\"\n  },\n  {\n    \"input\": \"What are the segments under Total Backlog in the table?\",\n    \"output\": \"What specific categories or divisions are included within the Total Backlog section of the table?\"\n  },\n  {\n    \"input\": \"How many distinct segments are there included in the Total Backlog?\",\n    \"output\": \"How many segments are there under Total Backlog?\"\n  },\n  {\n    \"input\": \"How many distinct segments are included within the category of Total Backlog?\",\n    \"output\": \"How many segments are there under Total Backlog?\"\n  },\n  {\n    \"input\": \"What is the specific maturity date or time period for the long-term debt that is expected to be due in 2021?\",\n    \"output\": \"What is the maturity of long-term debt for 2021?\"\n  },\n  {\n    \"input\": \"What are the components recorded under income (loss) from continuing operations before income taxes?\",\n    \"output\": \"What specific items are included in the income (loss) from continuing operations category before taxes are deducted?\"\n  },\n  {\n    \"input\": \"In which year was the amount under Foreign the smallest?\",\n    \"output\": \"\\\"When was the lowest recorded value for the amount under the category 'Foreign' observed?\\\"\"\n  },\n  {\n    \"input\": \"What is the change in the amount under Foreign in 2019 from 2018?\",\n    \"output\": \"What is the difference in the amount under the category \\\"Foreign\\\" between 2019 and 2018?\"\n  },\n  {\n    \"input\": \"What is the change in the amount under Foreign in 2019 from 2018?\",\n    \"output\": \"How much did the Foreign amount change from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"In which year was the amount of sales in Other the largest?\",\n    \"output\": \"In which year did the category labeled as \\\"Other\\\" witness the highest recorded sales volume?\"\n  },\n  {\n    \"input\": \"What is the percentage change in the amount of Other in 2019 from 2018?\",\n    \"output\": \"What is the percentage difference in the amount of Other expenses or income in the year 2019 compared to 2018, and how can it be calculated?\"\n  },\n  {\n    \"input\": \"What is the tax expense for 2019 calculated at the U.S. statutory rate?\",\n    \"output\": \"What is the Tax expense at U.S. statutory rate for 2019?\"\n  },\n  {\n    \"input\": \"What was the tax expense for the year 2019 in the United States at the statutory rate?\",\n    \"output\": \"What is the Tax expense at U.S. statutory rate for 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage change in nondeductible expenses in 2019 from 2018?\",\n    \"output\": \"What is the percentage difference in the amount of expenses that cannot be deducted in 2019 compared to 2018?\"\n  },\n  {\n    \"input\": \"What is the percentage change in nondeductible expenses in 2019 from 2018?\",\n    \"output\": \"What is the percentage increase or decrease in expenses that cannot be deducted from taxes in the year 2019 compared to the year 2018?\"\n  },\n  {\n    \"input\": \"\\\"When were the costs associated with purchasing and developing the ERP system recorded?\\\"\",\n    \"output\": \"In which years were the incurred costs related to the purchase and development of the ERP system recorded?\"\n  },\n  {\n    \"input\": \"In which year did the total value of property, plant, and equipment reach its highest level?\",\n    \"output\": \"In which year was the total amount of property, plant and equipment larger?\"\n  },\n  {\n    \"input\": \"In which specific year did the combined value of property, plant, and equipment reach its highest point?\",\n    \"output\": \"In which year was the total amount of property, plant and equipment larger?\"\n  },\n  {\n    \"input\": \"What is the change in leasehold improvements from 2018 to 2019?\",\n    \"output\": \"What was the year-on-year change in the value of leasehold improvements between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage change in leasehold improvements from 2018 to 2019?\",\n    \"output\": \"What is the percentage difference in the value of leasehold improvements from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage change in CGD sales in 2018?\",\n    \"output\": \"What was the percentage increase or decrease in CGD sales in 2018 compared to the previous year?\"\n  },\n  {\n    \"input\": \"What is the percentage change in CGD sales in 2018?\",\n    \"output\": \"What is the exact percentage difference in sales of CGD products between the years 2018 and the previous year?\"\n  },\n  {\n    \"input\": \"For which years is the amortization of purchased intangibles included in the CGD results recorded?\",\n    \"output\": \"In the recorded CGD results, during which specific years is the amortization of purchased intangibles included?\"\n  },\n  {\n    \"input\": \"In which year does the inclusion of the amortization of purchased intangibles result in a higher amount in the Comprehensive Gain or Loss From Discontinued Operations (CGD)?\",\n    \"output\": \"In which year is the amortization of purchased intangibles included in the CGD results larger?\"\n  },\n  {\n    \"input\": \"\\\"What criteria or requirements must be met in order to be eligible for receiving an extra cash payment amounting to $3.0 million?\\\"\",\n    \"output\": \"What is the eligibility to receive an additional cash payment of $3.0 million based on?\"\n  },\n  {\n    \"input\": \"During which years was the method of calculating net income (loss) per common share using the weighted-average number of shares outstanding implemented and documented?\",\n    \"output\": \"For which years was the weighted-average number of shares outstanding used to compute net income (loss) per common share recorded?\"\n  },\n  {\n    \"input\": \"What is the percentage change in adjusted EBITDA in 2019 from 2018?\",\n    \"output\": \"What is the exact percentage change in adjusted EBITDA between the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage change in adjusted EBITDA in 2019 from 2018?\",\n    \"output\": \"What is the percentage difference in adjusted EBITDA between 2019 and 2018? How can this information be used to analyze the financial performance in these two years?\"\n  },\n  {\n    \"input\": \"What resulted in a higher operating income?\",\n    \"output\": \"What factor led to an increase in operating income?\"\n  },\n  {\n    \"input\": \"How many finished products were produced in the year 2019?\",\n    \"output\": \"What is the amount of finished products in 2019?\"\n  },\n  {\n    \"input\": \"What was the total quantity of completed products manufactured in the year 2019?\",\n    \"output\": \"What is the amount of finished products in 2019?\"\n  },\n  {\n    \"input\": \"In which year does the value of finished products surpass the value of finished products in other years, resulting in a higher overall value?\",\n    \"output\": \"In which year is the value of finished products higher?\"\n  },\n  {\n    \"input\": \"What are the differences in the types of materials and purchased parts between 2018 and 2019?\",\n    \"output\": \"What is the change in materials and purchased parts from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"\\\"What factors contributed to the increase in operating profits?\\\"\",\n    \"output\": \"What resulted in the improvement in operating profits?\"\n  },\n  {\n    \"input\": \"For which fiscal year(s) is the amount of amortization of purchased intangibles included in the CGD results recorded?\",\n    \"output\": \"In which fiscal year(s) is the CGD results recorded and does it include the amortization amount of purchased intangibles?\"\n  },\n  {\n    \"input\": \"For which fiscal year(s) is the amount of amortization of purchased intangibles included in the CGD results recorded?\",\n    \"output\": \"In which fiscal year(s) is the amortization amount of purchased intangibles included in the recorded results of the Comprehensive Growth and Development (CGD)?\"\n  },\n  {\n    \"input\": \"Which year, from the given weighted-average assumptions for determining benefit obligation as of September 30, exhibits the highest rate of compensation increase among all years?\",\n    \"output\": \"For the weighted-average assumptions used to determine benefit obligation at September 30, which year has the largest rate of compensation increase?\"\n  },\n  {\n    \"input\": \"What was the change in the discount rate between 2018 and 2019 for determining the benefit obligation at September 30?\",\n    \"output\": \"For the weighted-average assumptions used to determine benefit obligation at September 30, what is the change in the discount rate in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"What is the weighted-average discount rate used to calculate the benefit obligation as of September 30 for the years 2017, 2018, and 2019?\",\n    \"output\": \"For the weighted-average assumptions used to determine benefit obligation at September 30, what is the average discount rate across 2017, 2018 and 2019?\"\n  },\n  {\n    \"input\": \"In what specific year did the rate differential for foreign exchange exceed 10.0?\",\n    \"output\": \"In which year was Foreign rate differential greater than 10.0?\"\n  },\n  {\n    \"input\": \"What was the average amount of state taxes, excluding federal benefits, for the years 2017 to 2019?\",\n    \"output\": \"What was the average State taxes net of federal benefit for 2017-2019?\"\n  },\n  {\n    \"input\": \"How much did the Research and Development credits shift between 2018 and 2019?\",\n    \"output\": \"What is the change in the Research and development credits from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the difference in the amount of Research and Development credits between 2018 and 2019?\",\n    \"output\": \"What is the change in the Research and development credits from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What were the respective gains in 2019 and 2018?\",\n    \"output\": \"What were the gains achieved in 2019 and 2018, respectively?\"\n  },\n  {\n    \"input\": \"What were the respective gains in 2019 and 2018?\",\n    \"output\": \"What were the gains in 2019 and 2018, respectively?\"\n  },\n  {\n    \"input\": \"What is the specific year when the Loss from discontinued operations was exactly zero?\",\n    \"output\": \"In which year was Loss from discontinued operations 0?\"\n  },\n  {\n    \"input\": \"What was the average amount of money lost from 2017 to 2019?\",\n    \"output\": \"What was the average Net loss for 2017 to 2019?\"\n  },\n  {\n    \"input\": \"What is the change in the Weighted average common shares outstanding-basic from 2018 to 2019?\",\n    \"output\": \"What is the difference in the number of weighted average common shares outstanding-basic between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the depreciation and amortization expense related to property and equipment for 2019?\",\n    \"output\": \"What was the specific amount of depreciation and amortization expense attributed to property and equipment for the fiscal year 2019?\"\n  },\n  {\n    \"input\": \"\\\"What was the precise monetary worth or estimated value, expressed in the currency of the specific country or region, of the category of Machinery and Equipment during the calendar year of 2019?\\\"\",\n    \"output\": \"What was the value of Machinery and equipment in 2019?\"\n  },\n  {\n    \"input\": \"What was the exact monetary value of Machinery and equipment in the year 2019?\",\n    \"output\": \"What was the value of Machinery and equipment in 2019?\"\n  },\n  {\n    \"input\": \"In what year did the construction in process have a value of less than 30,000 thousands?\",\n    \"output\": \"In which year was value of Construction in process less than 30,000 thousands?\"\n  },\n  {\n    \"input\": \"In which specific year did the value of Construction in process fall below 30,000 thousands?\",\n    \"output\": \"In which year was value of Construction in process less than 30,000 thousands?\"\n  },\n  {\n    \"input\": \"What were the average values of Machinery and equipment in both 2018 and 2019?\",\n    \"output\": \"What was the average value of Machinery and equipment for 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the average telecommunication expenditure during the fiscal years 2018 and 2019?\",\n    \"output\": \"What is the average Telecom for fiscal year 2018 and 2019?\"\n  },\n  {\n    \"input\": \"During which year did the number of Telecom users fall below 200,000 thousand?\",\n    \"output\": \"In which year was Telecom less than 200,000 thousand?\"\n  },\n  {\n    \"input\": \"What was the exact percentage decrease of the revenue in 2019 compared to the previous year?\",\n    \"output\": \"What was the decrease in the revenue in 2019?\"\n  },\n  {\n    \"input\": \"What was the specific percentage decrease in revenue for the fiscal year of 2019?\",\n    \"output\": \"What was the decrease in the revenue in 2019?\"\n  },\n  {\n    \"input\": \"What is the average value of the Telecom industry for the fiscal years 2017 and 2018 combined?\",\n    \"output\": \"What is the average Telecom value for fiscal year 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What was the average value of the telecommunications industry for the fiscal years 2017 and 2018?\",\n    \"output\": \"What is the average Telecom value for fiscal year 2017 and 2018?\"\n  },\n  {\n    \"input\": \"In which year was Telecom value less than 300,000 thousand?\",\n    \"output\": \"In what specific year did the value of Telecom drop below 300,000 thousand?\"\n  },\n  {\n    \"input\": \"What were the values of divestments and acquisitions in 2019, 2018, and 2017?\",\n    \"output\": \"What was the (Divested)/acquired values in 2019, 2018 and 2017 respectively?\"\n  },\n  {\n    \"input\": \"In what specific year did the beginning balance fall below 5,000,000?\",\n    \"output\": \"In which year was Balance — beginning of year less than 5,000 thousands?\"\n  },\n  {\n    \"input\": \"In what specific year did the beginning balance equate to less than 5,000 thousands (5 million)?\",\n    \"output\": \"In which year was Balance — beginning of year less than 5,000 thousands?\"\n  },\n  {\n    \"input\": \"What was the difference in the amount of expenses allocated to provisions between the years 2017 and 2018?\",\n    \"output\": \"What was the change in the Provisions/(expense) from 2017 to 2018?\"\n  },\n  {\n    \"input\": \"What was the difference in the Provisions/(expense) between 2017 and 2018?\",\n    \"output\": \"What was the change in the Provisions/(expense) from 2017 to 2018?\"\n  },\n  {\n    \"input\": \"What was the amount of charge-offs recorded in the year 2019?\",\n    \"output\": \"What was the charge-offs in 2019?\"\n  },\n  {\n    \"input\": \"\\\"What was the total amount of charge-offs incurred in 2019?\\\"\",\n    \"output\": \"What was the charge-offs in 2019?\"\n  },\n  {\n    \"input\": \"\\\"What is the average net Provision for the years 2017, 2018, and 2019?\\\"\",\n    \"output\": \"What was the average Provision, net for 2017-2019?\"\n  },\n  {\n    \"input\": \"What was the average net provision for the years 2017, 2018, and 2019 combined?\",\n    \"output\": \"What was the average Provision, net for 2017-2019?\"\n  },\n  {\n    \"input\": \"What factors determine the value that is obtained upon vesting?\",\n    \"output\": \"What is the value realized on vesting based on?\"\n  },\n  {\n    \"input\": \"What were the specific equity awards held by each named executive officer throughout the year 2019?\",\n    \"output\": \"What were the equity awards held by named executives during 2019?\"\n  },\n  {\n    \"input\": \"What were the equity awards granted to the named executives in 2019?\",\n    \"output\": \"What were the equity awards held by named executives during 2019?\"\n  },\n  {\n    \"input\": \"What is the significance of the number of shares obtained upon vesting?\",\n    \"output\": \"What does the number of shares acquired on vesting represent?\"\n  },\n  {\n    \"input\": \"Where was the reported combined net loss of $82 million disclosed or published?\",\n    \"output\": \"Where was the combined net loss of $82 million reported in?\"\n  },\n  {\n    \"input\": \"What was the percentage difference in the Adjusted EBITDA margin from 2017 to 2019?\",\n    \"output\": \"What is the total change in the Adjusted EBITDA margin between 2019 and 2017?\"\n  },\n  {\n    \"input\": \"What specific expenses are included in the integration and transformation costs that have an impact on the Adjusted EBITDA?\",\n    \"output\": \"What does the integration and transformation costs impacting Adjusted EBITDA include?\"\n  },\n  {\n    \"input\": \"What is the change in total special items impacting adjusted EBITDA between 2018 and 2019?\",\n    \"output\": \"What was the difference in the total number of special items affecting adjusted EBITDA from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the change in total special items impacting adjusted EBITDA between 2018 and 2019?\",\n    \"output\": \"How did the total special items affecting adjusted EBITDA change from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage change in total special items impacting adjusted EBITDA between 2018 and 2019?\",\n    \"output\": \"What is the percentage difference in the amount of special items affecting adjusted EBITDA from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage change in total special items impacting adjusted EBITDA between 2018 and 2019?\",\n    \"output\": \"What is the percentage difference in the total number of special items affecting adjusted EBITDA from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"How is the amortization process for deferred acquisition and fulfillment costs conducted?\",\n    \"output\": \"How are the deferred acquisition and fulfillment costs amortized?\"\n  },\n  {\n    \"input\": \"What is the percentage change for the end of period balance for Fulfillment Costs when comparing the beginning of period balance?\",\n    \"output\": \"What is the percentage change in the end of period balance for Fulfillment Costs when comparing it with the beginning of period balance?\"\n  },\n  {\n    \"input\": \"What was the total value of options that were available to be exercised and outstanding as of December 31, 2019?\",\n    \"output\": \"What was the aggregate intrinsic value of options outstanding and exercisable at December 31, 2019? \"\n  },\n  {\n    \"input\": \"Which year has a higher weighted-average exercise price for options outstanding and exercisable?\",\n    \"output\": \"Which year exhibits a greater weighted-average exercise price for options that are currently outstanding and available for exercise?\"\n  },\n  {\n    \"input\": \"What does the deferred income tax benefit (expense) under post-retirement benefit plans currently recognize? \",\n    \"output\": \"What recognition does the deferred income tax benefit (expense) currently provide for under post-retirement benefit plans?\"\n  },\n  {\n    \"input\": \"What is the percentage increase or decrease in the deferred income tax benefit or expense for post-retirement benefit plans between 2017 and 2018?\",\n    \"output\": \"What is the percentage change in the deferred income tax benefit (expense) for post-retirement benefit plans in 2018 from 2017?\"\n  },\n  {\n    \"input\": \"What was the percentage difference in the deferred income tax benefit (expense) for post-retirement benefit plans between 2018 and 2017?\",\n    \"output\": \"What is the percentage change in the deferred income tax benefit (expense) for post-retirement benefit plans in 2018 from 2017?\"\n  },\n  {\n    \"input\": \"How is the Adjusted EBITDA Run Rate determined?\",\n    \"output\": \"How is the Adjusted EBITDA Run Rate calculated and what factors are considered in determining it?\"\n  },\n  {\n    \"input\": \"What are the different performance levels?\",\n    \"output\": \"What are the various levels of performance and can you provide more details about each level?\"\n  },\n  {\n    \"input\": \"How many levels of performance would result in a payout exceeding 50% of the target award?\",\n    \"output\": \"How many performance levels would the payout as % of target award be above 50%?\"\n  },\n  {\n    \"input\": \"How many performance levels have a payout as a percentage of the target award that exceeds 50%?\",\n    \"output\": \"How many performance levels would the payout as % of target award be above 50%?\"\n  },\n  {\n    \"input\": \"What is the main factor or aspect that goodwill is specifically connected or associated with?\",\n    \"output\": \"What is goodwill attributable to?\"\n  },\n  {\n    \"input\": \"What is property, plant and equipment expressed as a ratio of the total estimated aggregate consideration under the balance as of December 31, 2017?\",\n    \"output\": \"What is the ratio of property, plant, and equipment to the total estimated aggregate consideration shown in the balance as of December 31, 2017?\"\n  },\n  {\n    \"input\": \"What is property, plant and equipment expressed as a ratio of the total estimated aggregate consideration under the balance as of December 31, 2017?\",\n    \"output\": \"What is the ratio of property, plant and equipment to the total estimated aggregate consideration on the balance sheet as of December 31, 2017?\"\n  },\n  {\n    \"input\": \"What is the percentage change in other non current assets in 2018?\",\n    \"output\": \"What was the percentage change in other non-current assets during the year 2018?\"\n  },\n  {\n    \"input\": \"Under what conditions would officers be entitled to the years of welfare benefits?\",\n    \"output\": \"When would officers have the right to receive welfare benefits for a prolonged period of time?\"\n  },\n  {\n    \"input\": \"Which types of officers are listed in the table?\",\n    \"output\": \"\\\"What types of officers are included in the table displayed?\\\"\"\n  },\n  {\n    \"input\": \"Which type of officer has the longest protected period?\",\n    \"output\": \"Which rank of officer within a specific profession or organization is entitled to the longest period of protection?\"\n  },\n  {\n    \"input\": \"Which type of officer has the longest protected period?\",\n    \"output\": \"Which type of officer is granted the longest period of protection against any form of termination or dismissal?\"\n  },\n  {\n    \"input\": \"What was the gross debt amount in the year 2019?\",\n    \"output\": \"What is the gross debt in 2019?\"\n  },\n  {\n    \"input\": \"What was the difference in the total amount of debt in 2019 compared to 2018?\",\n    \"output\": \"What is the change in gross debt in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"What is the percentage change in gross debt in 2019 from 2018?\",\n    \"output\": \"What was the percentage increase/decrease in the gross debt from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage change in gross debt in 2019 from 2018?\",\n    \"output\": \"What is the percentage difference in the amount of gross debt from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"On what basis are the estimated benefit payments made, considering the underlying assumptions?\",\n    \"output\": \"What assumptions are the estimated benefit payments based on?\"\n  },\n  {\n    \"input\": \"How many separate periods are highlighted in the table?\",\n    \"output\": \"How many different period segments are highlighted in the table?\"\n  },\n  {\n    \"input\": \"How many distinct period segments are specifically emphasized in the provided table?\",\n    \"output\": \"How many different period segments are highlighted in the table?\"\n  },\n  {\n    \"input\": \"What is the percentage change in combined pension plan in 2021 from 2020?\",\n    \"output\": \"What is the percentage difference in the combined pension plan between 2021 and 2020?\"\n  },\n  {\n    \"input\": \"What was the extent or percentage of the business that falls under the category of Operations and Other that was sold?\",\n    \"output\": \"What portion of business under Operations and Other was sold?\"\n  },\n  {\n    \"input\": \"What is the total segment revenue in 2019?\",\n    \"output\": \"What is the exact amount of revenue generated by each segment in 2019 and what is the sum of these revenues overall?\"\n  },\n  {\n    \"input\": \"What components are under operating revenue?\",\n    \"output\": \"What specific components contribute to the total operating revenue of a company or organization?\"\n  },\n  {\n    \"input\": \"What additional information does Note 17 provide?\",\n    \"output\": \"What is the additional information provided in Note 17?\"\n  },\n  {\n    \"input\": \"What additional information does Note 17 provide?\",\n    \"output\": \"What is the content of Note 17 and how does it provide additional information?\"\n  },\n  {\n    \"input\": \"\\\"What was the exact sum of adjusted EBITDA for the entire year of 2019?\\\"\",\n    \"output\": \"What is the total adjusted EBITDA in 2019?\"\n  },\n  {\n    \"input\": \"What is the change in the adjusted EBITDA under Wholesale in 2019 from 2018?\",\n    \"output\": \"What was the difference in adjusted EBITDA for the Wholesale category between 2019 and 2018?\"\n  },\n  {\n    \"input\": \"What is the average total adjusted EBITDA over the three years?\",\n    \"output\": \"What is the mean value of the total adjusted EBITDA for the three-year period?\"\n  },\n  {\n    \"input\": \"How many executives among the present ones in the organization receive a short-term incentive bonus that exceeds $500,000 in amount?\",\n    \"output\": \"How many current executives have a STI bonus amount greater than $500,000?\"\n  },\n  {\n    \"input\": \"How many executives presently receive a Short-Term Incentive (STI) bonus exceeding $500,000?\",\n    \"output\": \"How many current executives have a STI bonus amount greater than $500,000?\"\n  },\n  {\n    \"input\": \"What is the salary earned by Indraneel Dev in 2019, and how does it compare to his/her STI bonus amount, expressed as a ratio?\",\n    \"output\": \"What is Indraneel Dev's salary earned during 2019 expressed as a ratio of his/her STI bonus amount?\"\n  },\n  {\n    \"input\": \"What is included in the category of 'Support assets'? Please provide a detailed explanation of the various components that constitute these assets.\",\n    \"output\": \"What do 'Support assets' consist of?\"\n  },\n  {\n    \"input\": \"What is the specific figure for Reported Balances' operating revenue?\",\n    \"output\": \"What is the Reported Balances operating revenue?\"\n  },\n  {\n    \"input\": \"What was the variation in the deferred income tax benefit (expense) related to pension plans between 2019 and 2018?\",\n    \"output\": \"What is the change in the deferred income tax benefit (expense) for pension plans in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"What is the percentage change in the deferred income tax benefit (expense) for pension plans in 2019 from 2018?\",\n    \"output\": \"What is the exact percentage difference in the deferred income tax benefit or expense for pension plans between the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the outcome of the continuous efforts made to downsize the company's workforce?\",\n    \"output\": \"What was recognized as a result of ongoing efforts to reduce the company's workforce?\"\n  },\n  {\n    \"input\": \"What is the average amount of service costs across 2017, 2018 and 2019?\",\n    \"output\": \"What is the mean service cost from 2017 to 2019?\"\n  },\n  {\n    \"input\": \"What was the total amount of money owed by customers that had not been received as of December 31, 2019?\",\n    \"output\": \"What was the gross customer receivables at December 31, 2019?\"\n  },\n  {\n    \"input\": \"What was the total amount of customer receivables as recorded on the balance sheet at the end of December 31, 2019, including all outstanding invoices and amounts owed to the company by its customers?\",\n    \"output\": \"What was the gross customer receivables at December 31, 2019?\"\n  },\n  {\n    \"input\": \"What is included in the total value of long-term debt?\",\n    \"output\": \"What does total long-term debt include?\"\n  },\n  {\n    \"input\": \"What is included in the total amount of long-term debt?\",\n    \"output\": \"What does total long-term debt include?\"\n  },\n  {\n    \"input\": \"What was the approach or method adopted in 2019 to determine the total assets of an entity?\",\n    \"output\": \"Regarding the total assets, what was adopted in 2019?\"\n  },\n  {\n    \"input\": \"How many different dates declared had a dividend per share of $0.250 in 2018 and 2019?\",\n    \"output\": \"In 2018 and 2019, how many dates were declared with a dividend per share of $0.250?\"\n  },\n  {\n    \"input\": \"\\\"What specific categories of expenses were documented in the records for the year 2018?\\\"\",\n    \"output\": \"What types of expenses were recorded during 2018?\"\n  },\n  {\n    \"input\": \"What were the recorded expenses in 2018 and what categories do they fall under?\",\n    \"output\": \"What types of expenses were recorded during 2018?\"\n  },\n  {\n    \"input\": \"\\\"What is the total amount of non-cash goodwill impairment charges that are not tax-deductible, for the years 2019 and 2018?\\\"\",\n    \"output\": \"What is the sum of non-cash, non-tax-deductible goodwill impairment charges for 2019 and 2018?\"\n  },\n  {\n    \"input\": \"How much were the cumulative acquisition-related expenses incurred specifically by Level 3 in the year 2019?\",\n    \"output\": \"What was the amount of cumulative acquisition-related expenses incurred for Level 3 in 2019?\"\n  },\n  {\n    \"input\": \"What was the total amount of expenses related to acquisitions that Level 3 incurred in the year 2019? Please provide the specific cumulative value.\",\n    \"output\": \"What was the amount of cumulative acquisition-related expenses incurred for Level 3 in 2019?\"\n  },\n  {\n    \"input\": \"In which year did the company record the smallest total expenditure associated with acquisitions?\",\n    \"output\": \"Which year incurred the lowest amount of total acquisition-related expenses?\"\n  },\n  {\n    \"input\": \"In which specific year did the company experience the least amount of expenses related to acquisition?\",\n    \"output\": \"Which year incurred the lowest amount of total acquisition-related expenses?\"\n  },\n  {\n    \"input\": \"Against what were the estimates offset for the years 2019, 2018, and 2017?\",\n    \"output\": \"What are the 2019, 2018 and 2017 change in estimates offset against?\"\n  },\n  {\n    \"input\": \"What factors were the changes in estimates for 2019, 2018, and 2017 set off against?\",\n    \"output\": \"What are the 2019, 2018 and 2017 change in estimates offset against?\"\n  },\n  {\n    \"input\": \"In what year was the highest accretion expense recorded?\",\n    \"output\": \"Which year has the largest accretion expense?\"\n  },\n  {\n    \"input\": \"What is the year with the highest accretion expense?\",\n    \"output\": \"Which year has the largest accretion expense?\"\n  },\n  {\n    \"input\": \"What is the average accretion expense across 2017, 2018 and 2019?\",\n    \"output\": \"What is the average expense for accretion incurred during the years 2017, 2018, and 2019?\"\n  },\n  {\n    \"input\": \"Where is the recording location for the current portion of the post-retirement benefit obligations?\",\n    \"output\": \"Where is the current portion of the post-retirement benefit obligations recorded?\"\n  },\n  {\n    \"input\": \"In which specific year is the fair value of plan assets under post-retirement benefit plans higher than in other years?\",\n    \"output\": \"In which year is the fair value of plan assets under post-retirement benefit plans higher?\"\n  },\n  {\n    \"input\": \"\\\"What were the depreciation and amortization figures for the fiscal year 2019?\\\"\",\n    \"output\": \"What is the Depreciation and amortization for 2019?\"\n  },\n  {\n    \"input\": \"What is the total adjusted EBITDA for each segment in 2019?\",\n    \"output\": \"What is the total segment adjusted EBITDA in 2019?\"\n  },\n  {\n    \"input\": \"What is the total amount of income tax expense (benefit) recorded in the financial statements for the years 2018 and 2019?\",\n    \"output\": \"What is the sum of income tax expense (benefit) in 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage change in income tax expense (benefit) in 2019 from 2018?\",\n    \"output\": \"What is the percentage difference in income tax expense (benefit) between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the total amount of interest expenses incurred in the year 2019?\",\n    \"output\": \"What is the total interest expense in 2019?\"\n  },\n  {\n    \"input\": \"What was the total amount of interest paid in expenses during the year 2019?\",\n    \"output\": \"What is the total interest expense in 2019?\"\n  },\n  {\n    \"input\": \"What segments of interest expense are presented in the table?\",\n    \"output\": \"Which specific segments of interest expense are displayed in the table provided?\"\n  },\n  {\n    \"input\": \"What is the specific monetary difference in the total interest expense for the year 2019 compared to 2018?\",\n    \"output\": \"What is the change in the gross interest expense in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"What was the difference in the amount of money spent on interest expenses in 2019 compared to 2018?\",\n    \"output\": \"What is the change in the gross interest expense in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"What is the average amount of interest paid over the years 2017, 2018, and 2019?\",\n    \"output\": \"What is the average total interest expense for 2017 to 2019?\"\n  },\n  {\n    \"input\": \"\\\"What is the average annual value of unvested restricted stock awards that were excluded from antidilutive considerations in the years 2017, 2018, and 2019?\\\"\",\n    \"output\": \"What is the average annual amount of unvested restricted stock awards that are antidilutive excluded in 2017, 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What were the respective average annual amounts of unvested restricted stock awards that were excluded due to being antidilutive in 2017, 2018, and 2019?\",\n    \"output\": \"What is the average annual amount of unvested restricted stock awards that are antidilutive excluded in 2017, 2018 and 2019?\"\n  },\n  {\n    \"input\": \"Which year has a larger amount of contract acquisition costs?\",\n    \"output\": \"In which specific year do the contract acquisition costs exhibit a greater magnitude or quantity compared to other years?\"\n  },\n  {\n    \"input\": \"What is the change in contract assets in 2019?\",\n    \"output\": \"What is the net increase or decrease in contract assets during the year 2019?\"\n  },\n  {\n    \"input\": \"What percentage of Level 3 goodwill is specifically allocated to the consumer segment?\",\n    \"output\": \"How much of Level 3 goodwill is allocated to consumer?\"\n  },\n  {\n    \"input\": \"What is the ratio of goodwill to the total amount as of December 31, 2018, in the context of Business?\",\n    \"output\": \"What is the amount of goodwill under Business as a ratio of the Total amount as of December 31, 2018?\"\n  },\n  {\n    \"input\": \"As of December 31, 2018, what is the ratio of goodwill to the total amount in Business?\",\n    \"output\": \"What is the amount of goodwill under Business as a ratio of the Total amount as of December 31, 2018?\"\n  },\n  {\n    \"input\": \"What is the combined goodwill figure for the years 2017 and 2018?\",\n    \"output\": \"What is the sum of the total amount of goodwill for 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What is the combined value of goodwill for both 2017 and 2018?\",\n    \"output\": \"What is the sum of the total amount of goodwill for 2017 and 2018?\"\n  },\n  {\n    \"input\": \"\\\"What specific items are encompassed within the current liabilities section of the consolidated balance sheets?\\\"\",\n    \"output\": \"What do the current liabilities reflected in the consolidated balance sheets include?\"\n  },\n  {\n    \"input\": \"What specific items are included under current liabilities in the consolidated balance sheets?\",\n    \"output\": \"What do the current liabilities reflected in the consolidated balance sheets include?\"\n  },\n  {\n    \"input\": \"What items are typically categorized as Other current liabilities in financial statements?\",\n    \"output\": \"What are the items included under Other current liabilities?\"\n  },\n  {\n    \"input\": \"In which specific year do capital expenditures account for a greater value of accounts payable?\",\n    \"output\": \"Which year has a larger amount of accounts payable associated with capital expenditures?\"\n  },\n  {\n    \"input\": \"Which specific year indicates a higher level of accounts payable specifically linked to capital expenditures?\",\n    \"output\": \"Which year has a larger amount of accounts payable associated with capital expenditures?\"\n  },\n  {\n    \"input\": \"What distinguishes the total grant values of Indraneel Dev and Stacey W. Goff?\",\n    \"output\": \"What is the difference between Indraneel Dev and Stacey W. Goff's total grant values?\"\n  },\n  {\n    \"input\": \"What is the disparity in total grant amounts between Indraneel Dev and Stacey W. Goff?\",\n    \"output\": \"What is the difference between Indraneel Dev and Stacey W. Goff's total grant values?\"\n  },\n  {\n    \"input\": \"What is the monthly average for the number of shares that are typically withheld for tax purposes?\",\n    \"output\": \"What is the average number of shares withheld for taxes per month?\"\n  },\n  {\n    \"input\": \"\\\"What is the monthly average amount of shares that are typically withheld for tax purposes?\\\"\",\n    \"output\": \"What is the average number of shares withheld for taxes per month?\"\n  },\n  {\n    \"input\": \"What is the total amount of minimum lease payments expected for non-cancellable operating leases in 2019?\",\n    \"output\": \"How much is the 2019 total future minimum lease payments under non-cancellable operating leases?\"\n  },\n  {\n    \"input\": \"What was the percentage change in future minimum lease payments due within one year between 2018 and 2019?\",\n    \"output\": \"How much did future minimum lease payments due within one year change by between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"Which year, 2018 or 2019, had a higher value of capital?\",\n    \"output\": \"Between 2018 and 2019, which year had a greater amount of capital?\"\n  },\n  {\n    \"input\": \"In terms of owned assets, which year, 2018 or 2019, had a higher overall value or quantity of assets?\",\n    \"output\": \"Between 2018 and 2019, which year had a greater amount of owned assets?\"\n  },\n  {\n    \"input\": \"Which year, 2018 or 2019, had a higher total value of assets owned?\",\n    \"output\": \"Between 2018 and 2019, which year had a greater amount of owned assets?\"\n  },\n  {\n    \"input\": \"What were the average values of leased assets for the years 2018 and 2019?\",\n    \"output\": \"What is the average leased assets for 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the average value of leased assets for the years 2018 and 2019?\",\n    \"output\": \"What is the average leased assets for 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the average trade receivables included within non-current assets?\",\n    \"output\": \"What is the average amount of trade receivables included within the category of non-current assets?\"\n  },\n  {\n    \"input\": \"What is the average prepayments included in non-current assets?\",\n    \"output\": \"What is the average amount of prepayments that are included in non-current assets?\"\n  },\n  {\n    \"input\": \"What financial items does guidance basis comprise of?\",\n    \"output\": \"What specific components does the guidance basis in financial terms consist of?\"\n  },\n  {\n    \"input\": \"Which year, either 2018 or 2019, had a higher adjusted EBITDA on a guidance basis?\",\n    \"output\": \"Between 2018 and 2019, which year had higher adjusted EBITDA, guidance basis?\"\n  },\n  {\n    \"input\": \"Which year, 2018 or 2019, had a higher adjusted EBITDA on a guidance basis? Please provide the comparison between the two specified years.\",\n    \"output\": \"Between 2018 and 2019, which year had higher adjusted EBITDA, guidance basis?\"\n  },\n  {\n    \"input\": \"What is the 2019 average adjusted EBITDA, guidance basis?\",\n    \"output\": \"\\\"What was the average adjusted EBITDA on a guidance basis for the year 2019?\\\"\"\n  },\n  {\n    \"input\": \"What is the average amount of cash and cash equivalents that are typically reported in the statement of cash flows?\",\n    \"output\": \"What is the average cash and cash equivalents presented in the statement of cash flows?\"\n  },\n  {\n    \"input\": \"What is the average amount of cash and cash equivalents reported in the statement of cash flows?\",\n    \"output\": \"What is the average cash and cash equivalents presented in the statement of cash flows?\"\n  },\n  {\n    \"input\": \"What are the different types of financial items that are included in the data of a consolidated income statement?\",\n    \"output\": \"What financial items does the consolidated income statement data comprise of?\"\n  },\n  {\n    \"input\": \"\\\"What is the mean total income for the fiscal years 2018 and 2019?\\\"\",\n    \"output\": \"What is the average revenue for 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What were the average revenues for the years 2018 and 2019?\",\n    \"output\": \"What is the average revenue for 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the average revenue in 2017 and 2018?\",\n    \"output\": \"What is the average revenue for 2017 and 2018?\"\n  },\n  {\n    \"input\": \"\\\"What was the average revenue in both 2017 and 2018? Please provide the specific revenue figures for each year.\\\"\",\n    \"output\": \"What is the average revenue for 2017 and 2018?\"\n  },\n  {\n    \"input\": \"\\\"What is the revenue change reported specifically for Europe?\\\"\",\n    \"output\": \"What is the reported change in revenue - europe?\"\n  },\n  {\n    \"input\": \"What is the reported change in revenue for Europe? How has the revenue in Europe been affected according to the reports?\",\n    \"output\": \"What is the reported change in revenue - europe?\"\n  },\n  {\n    \"input\": \"\\\"Which country, Germany or Italy, has experienced a greater overall growth in the organic sector over a specified period of time?\\\"\",\n    \"output\": \"Between Germany and Italy, which one has a higher organic change?\"\n  },\n  {\n    \"input\": \"What does cash consideration paid comprise of?\",\n    \"output\": \"What is included in the cash consideration paid?\"\n  },\n  {\n    \"input\": \"\\\"In the table, what is the timeframe for the financial years whose information is displayed?\\\"\",\n    \"output\": \"Which financial years' information is shown in the table?\"\n  },\n  {\n    \"input\": \"What is the total value of acquisitions made in 2019 throughout the year?\",\n    \"output\": \"How much is the 2019 acquisitions during the year ?\"\n  },\n  {\n    \"input\": \"What was the total value of acquisitions made in 2019 throughout the year?\",\n    \"output\": \"How much is the 2019 acquisitions during the year ?\"\n  },\n  {\n    \"input\": \"Which year, 2018 or 2019, witnessed a higher number of acquisitions throughout the entire year?\",\n    \"output\": \"Between 2018 and 2019, which year had a greater amount of acquisitions during the year?\"\n  },\n  {\n    \"input\": \"What are the total operating lease commitments?\",\n    \"output\": \"What is the sum of all the commitments related to operating leases?\"\n  },\n  {\n    \"input\": \"What are the total operating lease commitments?\",\n    \"output\": \"What is the sum of all commitments made for operating leases?\"\n  },\n  {\n    \"input\": \"What is the total outstanding amount of financial liabilities that need to be paid within the shortest timeframe?\",\n    \"output\": \"How much financial liabilities are due for payment soonest?\"\n  },\n  {\n    \"input\": \"What is the proportion of financial liabilities in relation to the overall sum of contractual obligations and commitments?\",\n    \"output\": \"What percentage of total contractual obligations and commitments is the financial liabilities?\"\n  },\n  {\n    \"input\": \"\\\"What is the average amount of revenue generated from services according to IAS 18 for the years 2018 and 2019?\\\"\",\n    \"output\": \"What is the average service revenue between 2018 and 2019 IAS 18?\"\n  },\n  {\n    \"input\": \"What is the difference between average service revenue and average other revenue?\",\n    \"output\": \"What distinguishes average service revenue from average other revenue?\"\n  },\n  {\n    \"input\": \"Which specific financial years are being represented in the provided table?\",\n    \"output\": \"Which financial years' information is shown in the table?\"\n  },\n  {\n    \"input\": \"What type of short-term investments are shown in the table?\",\n    \"output\": \"\\\"What specific types of short-term investments are displayed in the provided table?\\\"\"\n  },\n  {\n    \"input\": \"What is the percentage change in the value of managed investment funds from 2018 to 2019?\",\n    \"output\": \"What is the change in managed investment funds between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the difference in the value of managed investment funds from 2018 to 2019?\",\n    \"output\": \"What is the change in managed investment funds between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What type of long-term borrowings are shown in the table?\",\n    \"output\": \"What are the specific types of long-term borrowings that are presented in the table?\"\n  },\n  {\n    \"input\": \"What type of long-term borrowings are shown in the table?\",\n    \"output\": \"What is the specific category of long-term borrowings that is displayed in the table?\"\n  },\n  {\n    \"input\": \"What is the percentage change in the number of shares acquired from February to March 2019?\",\n    \"output\": \"What is the percentage change between shares purchased in February and March 2019?\"\n  },\n  {\n    \"input\": \"Which financial years' information is shown in the table?\",\n    \"output\": \"Which fiscal years' data is displayed in the given table?\"\n  },\n  {\n    \"input\": \"Which financial years' information is shown in the table?\",\n    \"output\": \"Which specific financial year's information is depicted in the table?\"\n  },\n  {\n    \"input\": \"How much is the 2019 audit fees ?\",\n    \"output\": \"What is the exact cost of the audit fees for the year 2019?\"\n  },\n  {\n    \"input\": \"What were the average fees for audits in 2018 and 2019?\",\n    \"output\": \"What is the average audit fees for 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What were the average audit fees in both 2018 and 2019?\",\n    \"output\": \"What is the average audit fees for 2018 and 2019?\"\n  },\n  {\n    \"input\": \"\\\"What is the average total fees for the years 2018 and 2019 combined?\\\"\",\n    \"output\": \"What is the average total fees for 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the average sum of fees for the years 2018 and 2019 combined?\",\n    \"output\": \"What is the average total fees for 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the discrepancy between the average fees charged for audits and the average total fees incurred in 2018 and 2019?\",\n    \"output\": \"What is the difference between the average audit fees and the average total fees for 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the disparity between the mean charges for audits and the mean overall charges, comparing the years 2018 and 2019?\",\n    \"output\": \"What is the difference between the average audit fees and the average total fees for 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the amount of the non-current liabilities' 2018 other payables?\",\n    \"output\": \"How much is the 2018 other payables included within non-current liabilities?\"\n  },\n  {\n    \"input\": \"What is the value of the 2019 other payables included in current liabilities, excluding the amount payable for the share buyback?\",\n    \"output\": \"How much is the 2019 other payables included within current liabilities excluding the amount payable in relation to the share buyback?\"\n  },\n  {\n    \"input\": \"What is the value of the 2019 other payables included in the current liabilities, except for the amount payable related to the share buyback?\",\n    \"output\": \"How much is the 2019 other payables included within current liabilities excluding the amount payable in relation to the share buyback?\"\n  },\n  {\n    \"input\": \"What is the total amount of money that is owed to subsidiaries within one year as of 2019?\",\n    \"output\": \"What is the 2019 amounts owed to subsidiaries within one year?\"\n  },\n  {\n    \"input\": \"Which year experiences a greater number of bank loans and other loans that are due within one year?\",\n    \"output\": \"Which year has a higher amount of bank loans and other loans under amounts falling due within one year?\"\n  },\n  {\n    \"input\": \"How much is 2019 cash at bank and in hand ?\",\n    \"output\": \"What is the total amount of cash that was available in the bank and on hand in 2019?\"\n  },\n  {\n    \"input\": \"Between 2018 and 2019, which year had a greater amount of money market funds?\",\n    \"output\": \"Which year, 2018 or 2019, had a higher value of money market funds?\"\n  },\n  {\n    \"input\": \"Between 2018 and 2019, which year had a greater amount of money market funds?\",\n    \"output\": \"Which year had a higher volume of funds invested in money market funds: 2018 or 2019?\"\n  },\n  {\n    \"input\": \"Which year, 2018 or 2019, witnessed a higher quantity of government securities being issued?\",\n    \"output\": \"Between 2018 and 2019, which year had a greater amount of government securities?\"\n  },\n  {\n    \"input\": \"Between 2018 and 2019, which year had more ordinary shares allotted?\",\n    \"output\": \"In the period spanning from 2018 to 2019, which year witnessed a higher allocation of ordinary shares?\"\n  },\n  {\n    \"input\": \"What is the current cost of servicing for the year 2019?\",\n    \"output\": \"How much is the 2019 current service cost?\"\n  },\n  {\n    \"input\": \"What specific types of investments that are no longer held are displayed in the table?\",\n    \"output\": \"What type of non-current investments are shown in the table?\"\n  },\n  {\n    \"input\": \"In the table, which types of investments that are not currently being utilized or held are depicted?\",\n    \"output\": \"What type of non-current investments are shown in the table?\"\n  },\n  {\n    \"input\": \"How much is the 2018 debt securities?\",\n    \"output\": \"What is the value or price of the debt securities issued in 2018?\"\n  },\n  {\n    \"input\": \"How much is the 2018 debt securities?\",\n    \"output\": \"What is the cost of the 2018 debt securities?\"\n  },\n  {\n    \"input\": \"Which financial years' information is shown in the table?\",\n    \"output\": \"Which financial years are displayed in the provided table?\"\n  },\n  {\n    \"input\": \"What is the total projected income and expenditure for the year 2018?\",\n    \"output\": \"How much is the 2018 aggregated expected income expense ?\"\n  },\n  {\n    \"input\": \"What is the projected total income expense for 2018 after aggregating all relevant information?\",\n    \"output\": \"How much is the 2018 aggregated expected income expense ?\"\n  },\n  {\n    \"input\": \"What is the 2018 deferred tax on overseas earnings, excluding the 15€m charge relating to the combination of Vodafone India with Idea Cellular?\",\n    \"output\": \"What is the amount of deferred tax on overseas earnings in 2018, excluding the charge of 15€m related to the merger of Vodafone India and Idea Cellular?\"\n  },\n  {\n    \"input\": \"What is the difference in average income tax expense between the years 2017-2018 and 2018-2019?\",\n    \"output\": \"What is the change between 2017-2018 and 2018-2019 average income tax expense?\"\n  },\n  {\n    \"input\": \"How much is the excluded depreciation and amortisation for 2019 adjusted EBITDA?\",\n    \"output\": \"What is the amount of depreciation and amortization that is excluded from the adjusted EBITDA calculation for the year 2019?\"\n  },\n  {\n    \"input\": \"How much is the excluded depreciation and amortisation for 2018 adjusted EBITDA?\",\n    \"output\": \"What is the total amount of depreciation and amortization that were not included in calculating the adjusted EBITDA for the year 2018?\"\n  },\n  {\n    \"input\": \"\\\"What is the exact amount of the adjusted profit before tax for the year 2018?\\\"\",\n    \"output\": \"How much is the adjusted profit before tax in 2018?\"\n  },\n  {\n    \"input\": \"Which specific financial year's information is displayed in the provided table?\",\n    \"output\": \"Which financial years' information is shown in the table?\"\n  },\n  {\n    \"input\": \"What is the distinction between the average net book value and the average total costs of shares in Group undertakings as of March 31, 2019? How can analyzing this difference assist in better understanding financial performance?\",\n    \"output\": \"What is the difference between 2019 average net book value and 2019 average total costs of shares in Group undertakings as at 31 March?\"\n  },\n  {\n    \"input\": \"What is the significance of the recorded amount for developed technology?\",\n    \"output\": \"What does the amount recorded for developed technology represent?\"\n  },\n  {\n    \"input\": \"What does the recorded amount for developed technology signify, and what is its purpose or significance?\",\n    \"output\": \"What does the amount recorded for developed technology represent?\"\n  },\n  {\n    \"input\": \"What is the useful life for Developed technology?\",\n    \"output\": \"What is the average duration of usefulness or lifespan for technology that has been fully developed and is in practical use?\"\n  },\n  {\n    \"input\": \"What is the difference in useful life between developed technology and customer relationships?\",\n    \"output\": \"What distinguishes the lifespan of developed technology from that of customer relationships?\"\n  },\n  {\n    \"input\": \"What is the difference in fair value between developed technology and customer relationships?\",\n    \"output\": \"What distinguishes the fair value of developed technology from that of customer relationships?\"\n  },\n  {\n    \"input\": \"What identifiable intangible assets have a useful life exceeding five years?\",\n    \"output\": \"What are the identifiable intangible assets with a useful life above 5 years?\"\n  },\n  {\n    \"input\": \"What intangible assets can be identified and have a useful life of more than 5 years?\",\n    \"output\": \"What are the identifiable intangible assets with a useful life above 5 years?\"\n  },\n  {\n    \"input\": \"What does the amount recorded for developed technology represent?\",\n    \"output\": \"\\\"What is the meaning or significance of the recorded amount for developed technology?\\\"\"\n  },\n  {\n    \"input\": \"What is the useful life (in years) of developed technology?\",\n    \"output\": \"What is the average duration, measured in years, of the lifespan of technology that has been developed?\"\n  },\n  {\n    \"input\": \"What is the difference in fair value between developed technology and customer relationships?\",\n    \"output\": \"What distinguishes the fair value of developed technology from that of customer relationships, and how do they differ from each other?\"\n  },\n  {\n    \"input\": \"What are the intangible assets that have a fair value of above $20,000 thousands?\",\n    \"output\": \"What are the intangible assets with a fair value exceeding $20,000 thousands? Please provide a comprehensive list of these assets.\"\n  },\n  {\n    \"input\": \"What are the intangible assets that have a fair value of above $20,000 thousands?\",\n    \"output\": \"What are the intangible assets with a fair value exceeding $20 million dollars?\"\n  },\n  {\n    \"input\": \"What is the distinction between the total amount invested in debt and equity, when considering amortized cost as the basis for evaluation?\",\n    \"output\": \"What is the difference in the total debt investments and the total equity investments, based on amortized cost?\"\n  },\n  {\n    \"input\": \"What is the percentage composition of U.S. treasury securities in relation to the total debt investments when considering fair value?\",\n    \"output\": \"Based on fair value, what is the percentage constitution of U.S. treasury securities among the total debt investments?\"\n  },\n  {\n    \"input\": \"What is the number of potentially anti-dilutive shares that were excluded for fiscal years ended June 30, 2017, 2018 and 2019 respectively? \",\n    \"output\": \"What was the excluded number of potentially anti-dilutive shares for the fiscal years ending June 30, 2017, 2018, and 2019?\"\n  },\n  {\n    \"input\": \"What is the number of potentially anti-dilutive shares that were excluded for fiscal years ended June 30, 2017, 2018 and 2019 respectively? \",\n    \"output\": \"What is the total count of potentially anti-dilutive shares excluded for the fiscal years ending on June 30, 2017, 2018, and 2019, respectively?\"\n  },\n  {\n    \"input\": \"What is the variance in the weighted-average ordinary shares outstanding for basic and diluted calculations in the fiscal year that ended on June 30, 2019?\",\n    \"output\": \"In fiscal year ended June 30, 2019, what is the difference in the weighted-average ordinary shares outstanding between the basic and the diluted?\"\n  },\n  {\n    \"input\": \"What is the average total non-current operating assets for fiscal years ended June 30, 2018 and 2019?\",\n    \"output\": \"\\\"What is the average total value of non-current operating assets for the fiscal years that ended on June 30, 2018 and June 30, 2019?\\\"\"\n  },\n  {\n    \"input\": \"How many geographic regions had non-current operating assets exceeding $10,000 thousand in the fiscal year ending on June 30, 2019?\",\n    \"output\": \"In fiscal year ended June 30, 2019, how many geographic regions have non-current operating assets of more than $10,000 thousand?\"\n  },\n  {\n    \"input\": \"What percentage of the total non-current operating assets owned by the Group in the United States constituted the non-current operating assets in the fiscal year ending June 30, 2018?\",\n    \"output\": \"In fiscal year ended June 30, 2018, what is the percentage constitution of the non-current operating assets in the United States among the total non-current operating assets owned by the Group?\"\n  },\n  {\n    \"input\": \"\\\"What specific adjustments were made to figures in the year 2018?\\\"\",\n    \"output\": \"What was the adjustment done to figures in 2018?\"\n  },\n  {\n    \"input\": \"How much did the value of security deposits change from fiscal year 2018 to 2019?\",\n    \"output\": \"What is the change in value between security deposits between fiscal years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the  Balance at the beginning of 2019?\",\n    \"output\": \"What is the balance amount in the accounts at the start of the year 2019?\"\n  },\n  {\n    \"input\": \"What is the average deferred tax expense for fiscal years 2018 and 2019?\",\n    \"output\": \"What is the average amount of deferred tax expense incurred during fiscal years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage change of deferred tax expenses between fiscal year 2018 to 2019?\",\n    \"output\": \"What is the percentage difference in deferred tax expenses between fiscal year 2018 and fiscal year 2019?\"\n  },\n  {\n    \"input\": \"What is the amount recorded for short-term investments as of June 30, 2018, on the consolidated statements of financial position for the Group?\",\n    \"output\": \"As of June 30, 2018, what is the value of short-term investments on the Group's consolidated statements of financial position?\"\n  },\n  {\n    \"input\": \"Based on fair value, what is the difference in value between U.S. treasury securities and agency securities?\",\n    \"output\": \"What is the quantifiable difference between U.S. treasury securities and agency securities in terms of their fair market value?\"\n  },\n  {\n    \"input\": \"What are the specific revenue figures for the United Kingdom for the fiscal years that concluded in 2017, 2018, and 2019?\",\n    \"output\": \"What are the revenue amounts from the United Kingdom for fiscal years ended 2017, 2018 and 2019 respectively?\"\n  },\n  {\n    \"input\": \"What is the difference in the revenues from Asia Pacific between fiscal years ended 2018 and 2019?\",\n    \"output\": \"How does the revenue for the Asia Pacific region differ between the fiscal years ending in 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the balance of capped call as of June 30, 2019?\",\n    \"output\": \"What is the specific capped call balance as of June 30, 2019?\"\n  },\n  {\n    \"input\": \"What is the difference in the capped call balance between the fiscal year ending on June 30, 2018, and the fiscal year ending on June 30, 2019?\",\n    \"output\": \"What is the change in the balance of capped call between fiscal year ended June 30, 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the difference in balance of capped call and non-marketable investments as of June 30, 2019?\",\n    \"output\": \"What is the difference in the balance of capped call and non-marketable investments as of June 30, 2019, and how do these two types of investments vary in terms of their marketability?\"\n  },\n  {\n    \"input\": \"What is the difference in balance of capped call and non-marketable investments as of June 30, 2019?\",\n    \"output\": \"What is the balance difference between capped call and non-marketable investments as of June 30, 2019?\"\n  },\n  {\n    \"input\": \"\\\"What is the chronological order of the share capital amounts from June 30, 2015 to 2019?\\\"\",\n    \"output\": \"What are the share capital for the years as of June 30, 2015 to 2019 in chronological order?\"\n  },\n  {\n    \"input\": \"List the short-term investments from June 30, 2015 to 2019 in chronological order.\",\n    \"output\": \"What are the short-term investments for the years as of June 30, 2015 to 2019 in chronological order?\"\n  },\n  {\n    \"input\": \"What is the difference in the value of short-term investments between fiscal years 2018 and 2019?\",\n    \"output\": \"What is the variance in the monetary worth of short-term investments from fiscal year 2018 to fiscal year 2019?\"\n  },\n  {\n    \"input\": \"What is the average  Cash and cash equivalents for 2015-2019?\",\n    \"output\": \"What is the average amount of Cash and cash equivalents from 2015 to 2019?\"\n  },\n  {\n    \"input\": \"What is the definition of other fees?\",\n    \"output\": \"What does the term \\\"other fees\\\" mean? Please provide a clear definition of this term, including any relevant details, to facilitate a more accurate response.\"\n  },\n  {\n    \"input\": \"What is the average amount of taxes paid in fees for the fiscal years 2018 and 2019?\",\n    \"output\": \"What is the average tax fees for fiscal years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the disparity in the aggregate of obligations between operating leases and other types of contractual commitments?\",\n    \"output\": \"What is the difference in the total commitments between that of operating leases and other contractual commitments?\"\n  },\n  {\n    \"input\": \"What percentage of the total commitments for operating leases for the year ending in 2020 is constituted by the commitments specifically for operating leases?\",\n    \"output\": \"What is the percentage constitution of the commitments for operating leases for year ending 2020 among the total commitments for operating leases?\"\n  },\n  {\n    \"input\": \"What portion of the total commitments for operating leases for the year ending 2020 represents the percentage constitution?\",\n    \"output\": \"What is the percentage constitution of the commitments for operating leases for year ending 2020 among the total commitments for operating leases?\"\n  },\n  {\n    \"input\": \"What was the percentage increase in maintenance revenues from the fiscal year ending in 2018 to the fiscal year ending in 2019?\",\n    \"output\": \"What was the increase of maintenance revenues from fiscal year ended 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the exact percentage increase in perpetual license revenues from the fiscal year ended 2018 to the fiscal year ended 2019?\",\n    \"output\": \"What was the increase of perpetual license revenues from fiscal year ended 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What percentage of total revenue for the fiscal year ending in 2018 is made up of perpetual license sales?\",\n    \"output\": \"For fiscal year ended 2018, what is the percentage constitution of perpetual license among the total revenue?\"\n  },\n  {\n    \"input\": \"What percentage of the total revenue for the fiscal year ended 2018 was generated from perpetual licenses?\",\n    \"output\": \"For fiscal year ended 2018, what is the percentage constitution of perpetual license among the total revenue?\"\n  },\n  {\n    \"input\": \"\\\"What are the average maintenance revenues for the fiscal years ending in 2018 and 2019, respectively?\\\"\",\n    \"output\": \"What is the average maintenance revenues for fiscal year ended 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the specific provision for dilapidation expenses allocated for the fiscal year 2019?\",\n    \"output\": \"What is the dilapidation provision for fiscal year 2019?\"\n  },\n  {\n    \"input\": \"What is the formula to calculate free cash flow, and can you further explain the steps involved in deriving this measure?\",\n    \"output\": \"How is free cash flow calculated?\"\n  },\n  {\n    \"input\": \"What was the percentage increase in free cash flow from fiscal year 2018 to 2019?\",\n    \"output\": \"What was the increase in free cash flow between fiscal year 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the average net cash flow generated by operating activities for the years 2017, 2018, and 2019?\",\n    \"output\": \"What is the average net cash provided by operating activities from 2017-2019?\"\n  },\n  {\n    \"input\": \"\\\"What is the average amount of net cash generated from operating activities for the years 2017, 2018, and 2019?\\\"\",\n    \"output\": \"What is the average net cash provided by operating activities from 2017-2019?\"\n  },\n  {\n    \"input\": \"What is the percentage difference in the amount of available cash generated by the company between the years 2017 and 2018?\",\n    \"output\": \"What is the percentage change in free cash flow between 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What is the change in capital expenditures between 2018 and 2019?\",\n    \"output\": \"What was the difference in capital expenditures from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the change in capital expenditures between 2018 and 2019?\",\n    \"output\": \"What was the difference in the amount of money spent on capital expenditures from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"As of June 30, 2019, what is the total value or monetary amount of outstanding payments owed by customers or business partners for goods or services provided?\",\n    \"output\": \"As of June 30, 2019, what is the amount of trade receivables?\"\n  },\n  {\n    \"input\": \"How does the net cash provided by operating activities compare between the fiscal years ending in 2018 and 2019?\",\n    \"output\": \"What is the difference in net cash provided by operating activities between fiscal year ended 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the change in net cash provided by operating activities between the fiscal years ending in 2018 and 2019?\",\n    \"output\": \"What is the difference in net cash provided by operating activities between fiscal year ended 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What are the specific components included in the \\\"other current assets\\\" category in the table?\",\n    \"output\": \"What are the components under other current assets in the table?\"\n  },\n  {\n    \"input\": \"What was the difference in the total value of other current assets between 2019 and 2018?\",\n    \"output\": \"What was the change in total other current assets in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"Which items in the table include the cumulative effect of the 2017 Tax Reform Act in their 2017 amounts?\",\n    \"output\": \"For which items in the table does the amount for 2017 includes the sum related to the impact of the 2017 Tax Reform Act?\"\n  },\n  {\n    \"input\": \"Which items in the table include the sum related to the impact of the 2017 Tax Reform Act in their amount for 2017?\",\n    \"output\": \"For which items in the table does the amount for 2017 includes the sum related to the impact of the 2017 Tax Reform Act?\"\n  },\n  {\n    \"input\": \"What was the year when the Diluted Net Income per Common Share reached its highest value?\",\n    \"output\": \"In which year was the Diluted Net Income per Common Share largest?\"\n  },\n  {\n    \"input\": \"In which year did the exclusion of anti-dilutive shares from the calculation of diluted earnings per share reach its highest magnitude?\",\n    \"output\": \"In which year was the Anti-dilutive shares excluded from the diluted earnings per share calculation largest?\"\n  },\n  {\n    \"input\": \"What was the year when the exclusion of anti-dilutive shares from the calculation of diluted earnings per share reached its highest value?\",\n    \"output\": \"In which year was the Anti-dilutive shares excluded from the diluted earnings per share calculation largest?\"\n  },\n  {\n    \"input\": \"Where is Income tax benefit included?\",\n    \"output\": \"Where is the income tax benefit typically included in financial statements or accounting records?\"\n  },\n  {\n    \"input\": \"Where is Income tax benefit included?\",\n    \"output\": \"Where can income tax benefit be found in financial statements or reports?\"\n  },\n  {\n    \"input\": \"When is the calculation of income tax benefit typically performed annually?\",\n    \"output\": \"In which years is income tax benefit calculated?\"\n  },\n  {\n    \"input\": \"What does restricted cash include?\",\n    \"output\": \"What is included in restricted cash and can you provide further details about its components?\"\n  },\n  {\n    \"input\": \"In which year was Restricted cash included in \\\"Other current assets\\\" the lowest?\",\n    \"output\": \"In which year did the inclusion of Restricted cash in the category of \\\"Other current assets\\\" reach its lowest level?\"\n  },\n  {\n    \"input\": \"What was the difference in the amount of Restricted cash included in the category of \\\"Deferred charges and other assets\\\" in the year 2017 compared to 2016?\",\n    \"output\": \"What was the change in Restricted cash included in \\\"Deferred charges and other assets\\\" in 2017 from 2016?\"\n  },\n  {\n    \"input\": \"What is the largest recorded year for Net periodic benefit cost?\",\n    \"output\": \"In which year was Net periodic benefit cost the largest?\"\n  },\n  {\n    \"input\": \"What was the difference in the amount of money spent on interest in 2019 compared to 2018?\",\n    \"output\": \"What was the change in interest cost in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"What was the difference in the cost of interest between 2019 and 2018?\",\n    \"output\": \"What was the change in interest cost in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"\\\"In which year did the amount of deferred revenue reach its highest value and why?\\\"\",\n    \"output\": \"In which year was deferred revenue larger?\"\n  },\n  {\n    \"input\": \"During which year was the accumulated income received in advance, known as deferred revenue, higher?\",\n    \"output\": \"In which year was deferred revenue larger?\"\n  },\n  {\n    \"input\": \"What was the specific dollar amount of the change in deferred revenue between 2019 and 2018?\",\n    \"output\": \"What was the change in deferred revenue in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"What was the percentage change in deferred revenue in 2019 from 2018?\",\n    \"output\": \"What is the percentage increase or decrease in deferred revenue from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the change in Fair value of share awards vested in 2019 from 2018?\",\n    \"output\": \"What was the variation in the fair value of share awards that were vested in the year 2019 compared to 2018?\"\n  },\n  {\n    \"input\": \"What was the change in Fair value of share awards vested in 2019 from 2018?\",\n    \"output\": \"What was the difference in fair value of share awards that were granted in 2018 but vested in 2019?\"\n  },\n  {\n    \"input\": \"During which years was information regarding the credit agreements provided?\",\n    \"output\": \"In which years is information related to the credit agreements provided?\"\n  },\n  {\n    \"input\": \"What is the least year in which the Interest expense reached its minimum amount?\",\n    \"output\": \"In which year was the amount of Interest expense the smallest?\"\n  },\n  {\n    \"input\": \"When was the least amount of interest expense recorded?\",\n    \"output\": \"In which year was the amount of Interest expense the smallest?\"\n  },\n  {\n    \"input\": \"What was the change in Interest expense in 2019 from 2018?\",\n    \"output\": \"What was the difference in the amount of money spent on interest payments in 2019 compared to 2018?\"\n  },\n  {\n    \"input\": \"What was the percentage increase or decrease in the amount spent on interest in 2019 compared to 2018?\",\n    \"output\": \"What was the percentage change in Interest expense in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"What was the value of purchases that were accumulated or outstanding as of the end of 2019?\",\n    \"output\": \"What was the amount of Accrued purchases in 2019?\"\n  },\n  {\n    \"input\": \"What was the specific amount of Accrued legal and professional fees incurred during the year 2018?\",\n    \"output\": \"What was the amount of Accrued legal and professional fees  in 2018?\"\n  },\n  {\n    \"input\": \"\\\"How much money was recorded for Accrued legal and professional fees during the year 2018?\\\"\",\n    \"output\": \"What was the amount of Accrued legal and professional fees  in 2018?\"\n  },\n  {\n    \"input\": \"What was the percentage difference in Accrued roadside assistance claim costs between 2019 and 2018?\",\n    \"output\": \"What was the change in Accrued roadside assistance claim costs in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"\\\"What was the specific amount charged for the service in the year 2019?\\\"\",\n    \"output\": \"What was the service cost in 2019?\"\n  },\n  {\n    \"input\": \"\\\"What was the cost of the service provided in the year 2019?\\\"\",\n    \"output\": \"What was the service cost in 2019?\"\n  },\n  {\n    \"input\": \"What was the total cost of interest expenses in the year 2018?\",\n    \"output\": \"What was the  Interest cost  in 2018?\"\n  },\n  {\n    \"input\": \"What was the total amount spent on interest expenses in 2018?\",\n    \"output\": \"What was the  Interest cost  in 2018?\"\n  },\n  {\n    \"input\": \"\\\"In which specific year did the cost of services exceed the cost of services in the other years?\\\"\",\n    \"output\": \"In which year was service cost larger?\"\n  },\n  {\n    \"input\": \"Which year had a greater service cost compared to other years?\",\n    \"output\": \"In which year was service cost larger?\"\n  },\n  {\n    \"input\": \"What was the difference in the amount spent on interest in 2019 compared to 2018?\",\n    \"output\": \"What was the change in interest cost in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"\\\"What was the gross revenue generated by overseas operations, excluding any income taxes, in the fiscal year 2018?\\\"\",\n    \"output\": \"What was the  Foreign  income before income taxes in 2018?\"\n  },\n  {\n    \"input\": \"What was the total income from foreign sources in 2018, before any deductions for income taxes were made?\",\n    \"output\": \"What was the  Foreign  income before income taxes in 2018?\"\n  },\n  {\n    \"input\": \"What are the components under income before income taxes?\",\n    \"output\": \"What are the individual components that make up the category of income before income taxes?\"\n  },\n  {\n    \"input\": \"In which specific year, prior to the implementation of income taxes, did individuals and/or organizations experience the highest level of income?\",\n    \"output\": \"In which year was income before income taxes the largest?\"\n  },\n  {\n    \"input\": \"What is the largest recorded income before income taxes, and in which specific year was it achieved?\",\n    \"output\": \"In which year was income before income taxes the largest?\"\n  },\n  {\n    \"input\": \"In which specific year did the quantity of an item categorized as \\\"Other\\\" exceed the quantity of another item, considering all provided information?\",\n    \"output\": \"In which year was the amount of Other larger?\"\n  },\n  {\n    \"input\": \"In which year did the quantity of a different item exceed the quantity of the rest?\",\n    \"output\": \"In which year was the amount of Other larger?\"\n  },\n  {\n    \"input\": \"What was the year-over-year difference in accrued severance and associated expenses between 2018 and 2019?\",\n    \"output\": \"What was the change in Accrued severance and related costs in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"What was the total payment made by the Company in the year 2019?\",\n    \"output\": \"How much has the Company paid in total in 2019?\"\n  },\n  {\n    \"input\": \"What was the total amount paid by the Company in 2019?\",\n    \"output\": \"How much has the Company paid in total in 2019?\"\n  },\n  {\n    \"input\": \"When did the amount of non-cash impairment charges exceed previous years?\",\n    \"output\": \"In which year was Non-cash impairment charges larger?\"\n  },\n  {\n    \"input\": \"Which year had a greater amount of non-cash impairment charges compared to other years?\",\n    \"output\": \"In which year was Non-cash impairment charges larger?\"\n  },\n  {\n    \"input\": \"During which years is the fair value of restricted shares or restricted stock units (RSUs) vested determined?\",\n    \"output\": \"In which years is the Fair value of restricted shares/RSUs vested calculated?\"\n  },\n  {\n    \"input\": \"What was the change in Number of restricted shares/ RSUs granted in 2019 from 2018?\",\n    \"output\": \"What was the difference in the number of restricted shares/RSUs granted in 2019 compared to 2018?\"\n  },\n  {\n    \"input\": \"What was the change in Number of restricted shares/ RSUs granted in 2019 from 2018?\",\n    \"output\": \"What was the difference in the number of restricted shares/ RSUs granted in 2019 compared to 2018?\"\n  },\n  {\n    \"input\": \"What was the percentage change in Number of restricted shares/ RSUs granted in 2019 from 2018?\",\n    \"output\": \"What was the percentage change in the number of restricted shares/ RSUs granted from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"As of July 1, 2019, what was the total value of stapled securities of NSR listed on the Australian Securities Exchange?\",\n    \"output\": \"How much did NSR had stapled securities quoted on the Australian Securities Exchange as at 1 July 2019?\"\n  },\n  {\n    \"input\": \"As of July 1, 2019, what was the total value of NSR stapled securities quoted on the Australian Securities Exchange?\",\n    \"output\": \"How much did NSR had stapled securities quoted on the Australian Securities Exchange as at 1 July 2019?\"\n  },\n  {\n    \"input\": \"What would happen if the price of a security dropped by 2.5% and how would this affect the overall market?\",\n    \"output\": \"What would be the impact if security price decreased by 2.5%?\"\n  },\n  {\n    \"input\": \"What is the change in Other intangible assets Opening net book value from 2018 to 2019?\",\n    \"output\": \"What is the difference in the net book value of Other intangible assets at the beginning of 2018 compared to the beginning of 2019?\"\n  },\n  {\n    \"input\": \"What is the change in Other intangible assets Opening net book value from 2018 to 2019?\",\n    \"output\": \"What is the difference in the opening net book value of Other intangible assets between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"How many distinct business segments were there in the years 2019 and 2018?\",\n    \"output\": \"How many business segments were present in 2019 and 2018?\"\n  },\n  {\n    \"input\": \"\\\"What is the average revenue generated from New Zealand in both 2018 and 2019?\\\"\",\n    \"output\": \"What is the average revenue from New Zealand for 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the average revenue generated from New Zealand in both 2018 and 2019?\",\n    \"output\": \"What is the average revenue from New Zealand for 2018 and 2019?\"\n  },\n  {\n    \"input\": \"In which specific year did the revenue from New Zealand amount to less than 10,000 thousands?\",\n    \"output\": \"In which year was revenue from New Zealand under 10,000 thousands?\"\n  },\n  {\n    \"input\": \"In what specific year did the revenue generated from New Zealand fall below 10,000 thousands (in the same currency)?\",\n    \"output\": \"In which year was revenue from New Zealand under 10,000 thousands?\"\n  },\n  {\n    \"input\": \"What was the total NLA in Brisbane?\",\n    \"output\": \"What was the total number of Newly Listed Apartments (NLA) in Brisbane?\"\n  },\n  {\n    \"input\": \"What is the difference in the NLA between Sunshine Cost and Brisbane?\",\n    \"output\": \"What are the NLA differences between the Sunshine Coast and Brisbane, and how do they vary from each other in terms of specific aspects?\"\n  },\n  {\n    \"input\": \"In which year is the Net investment hedge negative?\",\n    \"output\": \"Which specific year does the Net investment hedge demonstrate a negative value?\"\n  },\n  {\n    \"input\": \"What is the change in the Taxation impact on revaluation from 2018 to 2019?\",\n    \"output\": \"What is the difference in the taxation impact on revaluation between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the change in the Taxation impact on revaluation from 2018 to 2019?\",\n    \"output\": \"What is the difference in the effect of taxation on revaluation between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the share of profit from associates representing NSRs in 2019 and 2018?\",\n    \"output\": \"What were the percentages of profit from associates, specifically those representing NSRs, for the years 2019 and 2018?\"\n  },\n  {\n    \"input\": \"What is the difference in the percentage of profit attributed to associates between 2018 and 2019?\",\n    \"output\": \"What is the change in the Share of profit from associates from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the difference in the percentage of profit allocated to associates between 2018 and 2019?\",\n    \"output\": \"What is the change in the Share of profit from associates from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"When was the year when the closing balance on June 30th was below 11,000 thousand units?\",\n    \"output\": \"In which year was the Closing balance at 30 June less than 11,000 thousands?\"\n  },\n  {\n    \"input\": \"What is the year-over-year change in face value for New Zealand Dollar (NZD) interest rate swaps from 2018 to 2019?\",\n    \"output\": \"What is the change in Interest rate swaps (NZD) at face value for Current interest rate swaps from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in face value for current New Zealand Dollar (NZD) interest rate swaps from 2018 to 2019?\",\n    \"output\": \"What is the change in Interest rate swaps (NZD) at face value for Current interest rate swaps from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the assets held for sale under Level 1, Level 2 and Level 3 in 2018?\",\n    \"output\": \"In 2018, what were the assets classified as held for sale and valued at Level 1, Level 2, and Level 3 according to their respective classifications?\"\n  },\n  {\n    \"input\": \"What was the amount of senior secured term loan for fiscal years 2019 and 2018?\",\n    \"output\": \"What is the value of senior secured term loan for fiscal years 2019 and 2018 respectively?\"\n  },\n  {\n    \"input\": \"What is the percentage shift in the amount of finance leases and other financing obligations from fiscal year 2018 to fiscal year 2019?\",\n    \"output\": \"What is the percentage change in the finance leases and other financing obligations between fiscal years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the average finance leases and other financing obligations for fiscal years 2018 and 2019?\",\n    \"output\": \"What is the mean value of finance leases and other financing obligations for fiscal years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What are the income tax expenses for the years 2019, 2018, and 2017?\",\n    \"output\": \"What is the total current income tax expense for 2019, 2018 and 2017 respectively?\"\n  },\n  {\n    \"input\": \"What is the amount of deferred income tax expense (benefit) recorded for the years 2019, 2018, and 2017, respectively?\",\n    \"output\": \"What is the Total deferred income tax expense (benefit) for 2019, 2018 and 2017 respectively?\"\n  },\n  {\n    \"input\": \"What is the average total income tax expense for the years 2017, 2018, and 2019?\",\n    \"output\": \"What is the average Total income tax expense for 2019, 2018 and 2017?\"\n  },\n  {\n    \"input\": \"What is the average aggregate size of the facilities in Arizona and Washington?\",\n    \"output\": \"What is the average total size of the facilities located in both Arizona and Washington?\"\n  },\n  {\n    \"input\": \"\\\"How many states in the United States have a total of more than 5 facilities?\\\"\",\n    \"output\": \"How many states have more than 5 facilities?\"\n  },\n  {\n    \"input\": \"What percentage of the total contractual obligations does the total inventory purchase commitments represent?\",\n    \"output\": \"What is the percentage constitution of total inventory purchase commitments among the total contractual obligations?\"\n  },\n  {\n    \"input\": \"What percentage of the total contractual obligations is constituted by the inventory purchase commitments in the overall inventory?\",\n    \"output\": \"What is the percentage constitution of total inventory purchase commitments among the total contractual obligations?\"\n  },\n  {\n    \"input\": \"\\\"What specific component within a contractual obligation holds the highest overall value?\\\"\",\n    \"output\": \"Which component of contractual obligation has the highest total value?\"\n  },\n  {\n    \"input\": \"At December 29, 2017, how many shares were still unvested?\",\n    \"output\": \"What is the number of shares that are Unvested at December 29, 2017?\"\n  },\n  {\n    \"input\": \"What is the length of time that buildings are typically considered useful for?\",\n    \"output\": \"What is the useful lives of buildings?\"\n  },\n  {\n    \"input\": \"What are the estimated useful lives or lifespans of machinery and equipment commonly used in various industries or sectors?\",\n    \"output\": \"What is the useful lives of Machinery and equipment?\"\n  },\n  {\n    \"input\": \"What are the estimated useful lifespans of machinery and equipment? How long can machinery and equipment be expected to remain functional and productive?\",\n    \"output\": \"What is the useful lives of Machinery and equipment?\"\n  },\n  {\n    \"input\": \"What is the average monetary worth of buildings in the years 2018 and 2019?\",\n    \"output\": \"What is the average value of buildings for 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the average monetary worth of buildings in the years 2018 and 2019 combined?\",\n    \"output\": \"What is the average value of buildings for 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the average adjusted net income from 2017-2019?\",\n    \"output\": \"\\\"What is the average adjusted net income for the years 2017, 2018, and 2019?\\\"\"\n  },\n  {\n    \"input\": \"What were the values of receivables and inventory in the years 2019 and 2018, respectively?\",\n    \"output\": \"What is the value of Receivables and inventory for 2019 and 2018 respectively?\"\n  },\n  {\n    \"input\": \"What is the value of Accrued expenses for 2019 and 2018 respectively?\",\n    \"output\": \"What is the amount of Accrued expenses in 2019 and 2018?\"\n  },\n  {\n    \"input\": \"What is the value of Self-insurance reserves for 2019 and 2018 respectively?\",\n    \"output\": \"What are the self-insurance reserve values for the years 2019 and 2018?\"\n  },\n  {\n    \"input\": \"What are the average values of receivables and inventory for the years 2019 and 2018?\",\n    \"output\": \"What is the average value of Receivables and inventory for 2019 and 2018?\"\n  },\n  {\n    \"input\": \"What are the average values of Receivables and inventory for the years 2019 and 2018?\",\n    \"output\": \"What is the average value of Receivables and inventory for 2019 and 2018?\"\n  },\n  {\n    \"input\": \"In which year did the Accrued expenses reach the highest level?\",\n    \"output\": \"Which year has the highest Accrued expenses?\"\n  },\n  {\n    \"input\": \"In which specific year did the Accrued expenses reach their highest value?\",\n    \"output\": \"Which year has the highest Accrued expenses?\"\n  },\n  {\n    \"input\": \"Which year recorded the highest Senior Secured Term Loan amount?\",\n    \"output\": \"Which year has the highest Senior secured term loan?\"\n  },\n  {\n    \"input\": \"What information does the table present?\",\n    \"output\": \"What data is being shown in the table and what details can be derived from it?\"\n  },\n  {\n    \"input\": \"What is the change in long-term portion of contingent earn-out liabilities for 2018 and 2019?\",\n    \"output\": \"What is the difference in the long-term portion of contingent earn-out liabilities between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What were the statutory U.S. Federal tax rates for the years 2019, 2018, and 2017?\",\n    \"output\": \"What is the Statutory U.S. Federal tax for 2019, 2018 and 2017 respectively?\"\n  },\n  {\n    \"input\": \"What are the respective statutory federal tax rates for the years 2019, 2018, and 2017 in the United States?\",\n    \"output\": \"What is the Statutory U.S. Federal tax for 2019, 2018 and 2017 respectively?\"\n  },\n  {\n    \"input\": \"How much did the valuation allowance change in 2017?\",\n    \"output\": \"What is the change in valuation allowance for 2017?\"\n  },\n  {\n    \"input\": \"What was the difference in valuation allowance between 2016 and 2017?\",\n    \"output\": \"What is the change in valuation allowance for 2017?\"\n  },\n  {\n    \"input\": \"What has been the average amount of Statutory U.S. Federal tax paid annually for the years 2017, 2018, and 2019?\",\n    \"output\": \"What is the average Statutory U.S. Federal tax from 2017-2019?\"\n  },\n  {\n    \"input\": \"What was the average statutory U.S. federal tax rate for the years 2017-2019?\",\n    \"output\": \"What is the average Statutory U.S. Federal tax from 2017-2019?\"\n  },\n  {\n    \"input\": \"What is the Working capital, excluding cash and cash equivalents for fiscal years 2019, 2018 and 2017 respectively?\",\n    \"output\": \"What is the year-end working capital, excluding cash and cash equivalents, for the fiscal years 2019, 2018, and 2017, respectively?\"\n  },\n  {\n    \"input\": \"What is the Working capital, excluding cash and cash equivalents for fiscal years 2019, 2018 and 2017 respectively?\",\n    \"output\": \"What is the Working Capital, excluding cash and cash equivalents, for the fiscal years 2019, 2018, and 2017?\"\n  },\n  {\n    \"input\": \"What are the average values of Cash and cash equivalents for fiscal years 2019, 2018, and 2017?\",\n    \"output\": \"What is the average value for the Cash and cash equivalents for fiscal years 2019, 2018 and 2017?\"\n  },\n  {\n    \"input\": \"What are the average values of the Availability under asset-based loan facility for the fiscal years 2019, 2018, and 2017?\",\n    \"output\": \"What is the average value for the Availability under asset-based loan facility for fiscal years 2019, 2018 and 2017?\"\n  },\n  {\n    \"input\": \"What were the net income (or net losses) for the years 2019, 2018, and 2017 respectively?\",\n    \"output\": \"What was the Net (loss) income in 2019, 2018 and 2017 respectively?\"\n  },\n  {\n    \"input\": \"What were the respective net losses or incomes for the years 2019, 2018, and 2017?\",\n    \"output\": \"What was the Net (loss) income in 2019, 2018 and 2017 respectively?\"\n  },\n  {\n    \"input\": \"In which year was Adjusted EBITDA less than 20,000 thousands?\",\n    \"output\": \"\\\"In which specific year, out of the given years, was the Adjusted EBITDA figure lower than 20,000 thousands?\\\"\"\n  },\n  {\n    \"input\": \"In which year was Adjusted EBITDA less than 20,000 thousands?\",\n    \"output\": \"In which specific year was the Adjusted EBITDA, expressed in thousands, less than 20,000?\"\n  },\n  {\n    \"input\": \"What was the percentage increase in the expense incurred for Research and Development in the year 2018 compared to the previous period?\",\n    \"output\": \"What was the increase in Research and development expense in 2018?\"\n  },\n  {\n    \"input\": \"\\\"What was the percentage increase in Research and Development (R&D) expense during the year 2018 compared to the previous year?\\\"\",\n    \"output\": \"What was the increase in Research and development expense in 2018?\"\n  },\n  {\n    \"input\": \"What were the average research and development expenses in both 2017 and 2018?\",\n    \"output\": \"What was the average Research and development expenses in 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What were the respective average amounts spent on research and development in 2017 and 2018?\",\n    \"output\": \"What was the average Research and development expenses in 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What was the change in the Net operating loss carryforwards from 2018 to 2019?\",\n    \"output\": \"What was the numerical difference between the Net operating loss carryforwards in 2018 and 2019?\"\n  },\n  {\n    \"input\": \"\\\"In which specific year did the value of Deferred revenue fall below 2,000?\\\"\",\n    \"output\": \"In which year was Deferred revenue less than 2,000?\"\n  },\n  {\n    \"input\": \"What is the difference in the amount of accumulated depreciation between March 31, 2018, and March 31, 2019?\",\n    \"output\": \"What is the change in Accumulated depreciation from March 31, 2018 to March 31, 2019?\"\n  },\n  {\n    \"input\": \"How much did the Accumulated depreciation increase between March 31, 2018 and March 31, 2019?\",\n    \"output\": \"What is the change in Accumulated depreciation from March 31, 2018 to March 31, 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the General and administrative expenses in 2019 compared to the previous year?\",\n    \"output\": \"What was the increase in the General and administrative expenses in 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage increase in the General and administrative expenses from 2018 to 2019?\",\n    \"output\": \"What was the increase in the General and administrative expenses in 2019?\"\n  },\n  {\n    \"input\": \"\\\"Who has the authority over the management and administration of the company's operations in a specific country?\\\"\",\n    \"output\": \"Which country controls the company's affairs?\"\n  },\n  {\n    \"input\": \"\\\"Who has jurisdiction over the overall management and operations of the company?\\\"\",\n    \"output\": \"Which country controls the company's affairs?\"\n  },\n  {\n    \"input\": \"What was the change in the U.S. state taxes, net of federal from 2018 to 2019?\",\n    \"output\": \"What was the difference in the amount of state taxes paid in the United States, after deducting federal taxes, from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the change in the U.S. state taxes, net of federal from 2018 to 2019?\",\n    \"output\": \"What was the difference in the amount of state taxes, after deducting federal taxes, between the years 2018 and 2019 in the United States?\"\n  },\n  {\n    \"input\": \"In which year was Tax credits less than 10.0?\",\n    \"output\": \"When was the year when tax credits were less than 10.0 units?\"\n  },\n  {\n    \"input\": \"In which year was Tax credits less than 10.0?\",\n    \"output\": \"In which specific year did the amount of Tax credits fall below 10.0?\"\n  },\n  {\n    \"input\": \"What was the exact amount of share-based compensation expense incurred during the year 2019?\",\n    \"output\": \"What was the Total share-based compensation expense in 2019?\"\n  },\n  {\n    \"input\": \"\\\"What changes, if any, took place once the IPO concluded?\\\"\",\n    \"output\": \"What became effective upon the closing of the IPO?\"\n  },\n  {\n    \"input\": \"What were the initial balances in the years 2019 and 2018, respectively?\",\n    \"output\": \"What was the Beginning balance in 2019 and 2018 respectively?\"\n  },\n  {\n    \"input\": \"What were the starting balances for the year 2019 and 2018?\",\n    \"output\": \"What was the Beginning balance in 2019 and 2018 respectively?\"\n  },\n  {\n    \"input\": \"How much did the Company recorded amortization expense for the year ended March 31, 2018?\",\n    \"output\": \"What was the amount of amortization expense recorded by the Company for the fiscal year ending on March 31, 2018?\"\n  },\n  {\n    \"input\": \"How much did the Company recorded amortization expense for the year ended March 31, 2018?\",\n    \"output\": \"What was the amount of amortization expense recorded by the Company for the fiscal year that ended on March 31, 2018?\"\n  },\n  {\n    \"input\": \"How much did the Company recorded amortization expense for the year ended March 31, 2017?\",\n    \"output\": \"What was the amount of amortization expense recorded by the Company for the fiscal year that ended on March 31, 2017?\"\n  },\n  {\n    \"input\": \"What are the main sources of liquidity in financial markets and institutions?\",\n    \"output\": \"What are the principal sources of liquidity?\"\n  },\n  {\n    \"input\": \"What was the total value of outstanding letters of credit associated with specific operating leases as of March 31, 2018?\",\n    \"output\": \"How much was the outstanding letters of credit related to certain operating leases as of March 31, 2018?\"\n  },\n  {\n    \"input\": \"What were the revenue retention rates for the years 2019, 2018, and 2017 in chronological order?\",\n    \"output\": \"What was the Revenue retention rate in 2019, 2018 and 2017 respectively?\"\n  },\n  {\n    \"input\": \"What is the average number of customers in total from 2017 to 2019?\",\n    \"output\": \"What is the average Total customers between 2017-2019?\"\n  },\n  {\n    \"input\": \"\\\"In which specific year did the Adjusted EBITDA amount to less than 20,000 thousands?\\\"\",\n    \"output\": \"In which year was Adjusted EBITDA less than 20,000 thousands?\"\n  },\n  {\n    \"input\": \"In which specific year did the Adjusted EBITDA amount to less than 20,000 thousands?\",\n    \"output\": \"In which year was Adjusted EBITDA less than 20,000 thousands?\"\n  },\n  {\n    \"input\": \"What was the amount of Depreciation and amortization expense recorded for the fiscal year that ended on March 31, 2017?\",\n    \"output\": \"How much was Depreciation and amortization expense for the years ended March 31, 2017?\"\n  },\n  {\n    \"input\": \"What was the total amount of Depreciation and amortization expense recorded for the fiscal years ending on March 31, 2017?\",\n    \"output\": \"How much was Depreciation and amortization expense for the years ended March 31, 2017?\"\n  },\n  {\n    \"input\": \"What is the net increase or decrease in the Accumulated Amortization balance from March 31, 2018, to March 31, 2019?\",\n    \"output\": \"What is the change in Accumulated amortization from March 31, 2018 to March 31, 2019?\"\n  },\n  {\n    \"input\": \"What is the net increase or decrease in the amount of Accumulated Amortization between March 31, 2018, and March 31, 2019?\",\n    \"output\": \"What is the change in Accumulated amortization from March 31, 2018 to March 31, 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the Risk-free interest rate from 2018 to 2019?\",\n    \"output\": \"What was the change in the Risk-free interest rate from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the average level of expected volatility recorded between 2017 and 2019?\",\n    \"output\": \"What was the average Expected volatility between 2017-2019?\"\n  },\n  {\n    \"input\": \"What was the average expected volatility during the period of 2017-2019?\",\n    \"output\": \"What was the average Expected volatility between 2017-2019?\"\n  },\n  {\n    \"input\": \"In which specific year did the grant date fair value per ordinary share fall below 30.0?\",\n    \"output\": \"In which year was the Grant date fair value per ordinary share less than 30.0?\"\n  },\n  {\n    \"input\": \"When was the year when the grant date fair value per ordinary share fell below 30.0?\",\n    \"output\": \"In which year was the Grant date fair value per ordinary share less than 30.0?\"\n  },\n  {\n    \"input\": \"\\\"What was the specific quantity or value of work that was at the intermediate stage of completion in the year 2018?\\\"\",\n    \"output\": \"What was the amount of work in process in 2018?\"\n  },\n  {\n    \"input\": \"What was the change in finished goods between 2018 and 2019?\",\n    \"output\": \"What was the numerical difference in the amount of finished goods produced between the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the specific amount of interest incurred in the year 2018?\",\n    \"output\": \"What was the interest cost in 2018?\"\n  },\n  {\n    \"input\": \"What was the change in interest cost between 2018 and 2019?\",\n    \"output\": \"By how much did the interest cost change from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"For how many years did the actuarial loss surpass $1,000 thousand and receive recognition?\",\n    \"output\": \"How many years did the recognized actuarial loss exceed $1,000 thousand?\"\n  },\n  {\n    \"input\": \"What was the total amount of insurance proceeds received during the fiscal year that ended on August 31, 2019?\",\n    \"output\": \"What were the insurance proceeds for the fiscal year ended August 31, 2019?\"\n  },\n  {\n    \"input\": \"\\\"What was the total amount of insurance claims paid out during the fiscal year that concluded on August 31, 2019?\\\"\",\n    \"output\": \"What were the insurance proceeds for the fiscal year ended August 31, 2019?\"\n  },\n  {\n    \"input\": \"What was the specific difference in the amount of Restructuring and related charges incurred by the company in the years 2018 and 2019?\",\n    \"output\": \"What was the change in Restructuring and related charges between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What were the reductions for tax positions  primarily related to?\",\n    \"output\": \"\\\"What was the primary reason for the reductions in tax positions?\\\"\"\n  },\n  {\n    \"input\": \"What is the company's definition of working capital and how is it calculated?\",\n    \"output\": \"How does the company define working capital?\"\n  },\n  {\n    \"input\": \"What was the amount of assets in 2019?\",\n    \"output\": \"What were the total assets in 2019?\"\n  },\n  {\n    \"input\": \"What was the value of the total assets in the year 2019?\",\n    \"output\": \"What were the total assets in 2019?\"\n  },\n  {\n    \"input\": \"What is the range of years covered in the table?\",\n    \"output\": \"What are the years included in the table?\"\n  },\n  {\n    \"input\": \"What was the change in Current installments of notes payable and long-term debt between 2018 and 2019?\",\n    \"output\": \"What was the difference in the amount of current installments of notes payable and long-term debt between the year 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the change in Current installments of notes payable and long-term debt between 2018 and 2019?\",\n    \"output\": \"What is the difference in the amount of current installments of notes payable and long-term debt between the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage increase or decrease in the stockholders' equity of Total Jabil Inc. from 2018 to 2019?\",\n    \"output\": \"What was the percentage change in Total Jabil Inc. stockholders’ equity between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the specific net revenue figure reported?\",\n    \"output\": \"What was the net revenue as reported?\"\n  },\n  {\n    \"input\": \"What was the operating income as reported?\",\n    \"output\": \"What was the reported operating income amount?\"\n  },\n  {\n    \"input\": \"What was the specific distinction between the reported net revenue and cost of revenue? Please elaborate on the differences observed between these two financial measures.\",\n    \"output\": \"What was the difference between net revenue and cost of revenue as reported?\"\n  },\n  {\n    \"input\": \"What was the distinction between the net revenue and cost of revenue as stated in the report, and can you provide further details to assist in answering?\",\n    \"output\": \"What was the difference between net revenue and cost of revenue as reported?\"\n  },\n  {\n    \"input\": \"What was the operating income balance expressed as a ratio of the amount reported, before considering the implementation of ASU 2014-09?\",\n    \"output\": \"What was the operating income balance without the adoption of ASU 2014-09 as a ratio of the amount as reported?\"\n  },\n  {\n    \"input\": \"What was the ratio of the operating income balance without the adoption of ASU 2014-09 to the amount as reported?\",\n    \"output\": \"What was the operating income balance without the adoption of ASU 2014-09 as a ratio of the amount as reported?\"\n  },\n  {\n    \"input\": \"By what amount did the gross profit increase or decrease from May 2018 to August 2018?\",\n    \"output\": \"What was the change in gross profit between May 2018 and August 2018?\"\n  },\n  {\n    \"input\": \"What are the distinctions between the September 2018 balances of contract assets and contract liabilities? Please provide a comprehensive explanation while keeping all the details intact.\",\n    \"output\": \"What is the difference between the balance in September 2018 for contract assets and contract liabilities?\"\n  },\n  {\n    \"input\": \"What was the percentage change for Other accrued expenses due to adjustments by the new standard?\",\n    \"output\": \"What was the percentage change specifically for Other accrued expenses as a result of adjustments made under the new standard?\"\n  },\n  {\n    \"input\": \"What was the percentage change for Other accrued expenses due to adjustments by the new standard?\",\n    \"output\": \"What was the percentage change in Other accrued expenses after incorporating adjustments made by the new standard? Please provide the specific percentage change.\"\n  },\n  {\n    \"input\": \"What is the exact amount of net revenue generated in the fiscal year 2019?\",\n    \"output\": \"What was the net revenue in 2019?\"\n  },\n  {\n    \"input\": \"What was the total revenue generated during the year 2019 after deducting all expenses and costs?\",\n    \"output\": \"What was the net revenue in 2019?\"\n  },\n  {\n    \"input\": \"What was the gross profit in 2018?\",\n    \"output\": \"What was the gross profit generated in the year 2018?\"\n  },\n  {\n    \"input\": \"What was the operating income in 2017?\",\n    \"output\": \"What was the operating income for the year 2017?\"\n  },\n  {\n    \"input\": \"What was the change in gross profit between 2017 and 2018?\",\n    \"output\": \"What was the difference in gross profit from 2017 to 2018?\"\n  },\n  {\n    \"input\": \"What was the change in Prepaid expenses and other current assets due to the adoption of ASU 2014-09?\",\n    \"output\": \"\\\"What was the impact on Prepaid expenses and other current assets resulting from the adoption of ASU 2014-09? Please provide details and explain the specific changes that occurred as a result of this accounting standard.\\\"\"\n  },\n  {\n    \"input\": \"How many reported segments have a value exceeding $2,000,000?\",\n    \"output\": \"How many segments as reported exceed $2,000,000 thousand?\"\n  },\n  {\n    \"input\": \"What were the items incorporated within the amounts for the fiscal year that concluded on August 31, 2017?\",\n    \"output\": \"What did the amounts in fiscal year ended August 31, 2017 include?\"\n  },\n  {\n    \"input\": \"What expenses or revenues were included in the amounts reported for the fiscal year that ended on August 31, 2017?\",\n    \"output\": \"What did the amounts in fiscal year ended August 31, 2017 include?\"\n  },\n  {\n    \"input\": \"What was the difference in the costs of severance and employee benefits for the company from 2017 to 2018?\",\n    \"output\": \"What was the change in Employee severance and benefit costs between 2017 and 2018?\"\n  },\n  {\n    \"input\": \"Which periods does the table include?\",\n    \"output\": \"What time periods are encompassed in the table's content?\"\n  },\n  {\n    \"input\": \"\\\"What specific items or transactions were encompassed within the category of acquisitions and adjustments?\\\"\",\n    \"output\": \"What did Acquisitions and adjustments include?\"\n  },\n  {\n    \"input\": \"What were the variations in acquisitions and adjustments made in fiscal year 2018 between the Electronic Manufacturing Services (EMS) and Document Management Systems (DMS)?\",\n    \"output\": \"What is the difference in Acquisitions and adjustments in fiscal 2018 between EMS and DMS?\"\n  },\n  {\n    \"input\": \"What are the distinctions in acquisitions and adjustments, specifically pertaining to fiscal year 2018, when comparing EMS (Emergency Medical Services) and DMS (Document Management Systems)?\",\n    \"output\": \"What is the difference in Acquisitions and adjustments in fiscal 2018 between EMS and DMS?\"\n  },\n  {\n    \"input\": \"What was the balance of EMS as in 2018 as a percentage of the total balance at the end of fiscal 2018?\",\n    \"output\": \"What percentage of the total balance at the end of fiscal 2018 represented the balance of EMS in 2018?\"\n  },\n  {\n    \"input\": \"What categories of income or loss are included in the table's income (loss) figures before accounting for income tax expense?\",\n    \"output\": \"What types of income (loss) before income tax expense is provided by the table?\"\n  },\n  {\n    \"input\": \"What was the foreign income (loss) in 2018?\",\n    \"output\": \"What was the amount of foreign income or loss for the year 2018?\"\n  },\n  {\n    \"input\": \"What was the change in Foreign income (loss) between 2018 and 2019?\",\n    \"output\": \"What was the difference in the amount of income or loss generated from foreign sources, comparing the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the change in Foreign income (loss) between 2018 and 2019?\",\n    \"output\": \"What was the difference in the amount of foreign income or loss reported between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"\\\"How many annual periods experienced a Foreign income loss that was greater than $800,000 thousand dollars?\\\"\",\n    \"output\": \"How many years did Foreign income (loss) exceed $800,000 thousand?\"\n  },\n  {\n    \"input\": \"For how many years was the amount of Foreign income (loss) greater than $800,000 thousand?\",\n    \"output\": \"How many years did Foreign income (loss) exceed $800,000 thousand?\"\n  },\n  {\n    \"input\": \"What was the change in Accumulated benefit obligation between 2018 and 2019?\",\n    \"output\": \"What was the specific numerical difference in the Accumulated Benefit Obligation (ABO) amount between the fiscal years of 2018 and 2019?\"\n  },\n  {\n    \"input\": \"\\\"What were the specific expenses incurred by the company for employee severance and benefits during the year 2018?\\\"\",\n    \"output\": \"What were the Employee severance and benefit costs in 2018?\"\n  },\n  {\n    \"input\": \"What were the severance and benefit costs for employees in the year 2018?\",\n    \"output\": \"What were the Employee severance and benefit costs in 2018?\"\n  },\n  {\n    \"input\": \"What was the precise difference in terms of employee severance and benefit costs from 2018 to 2019?\",\n    \"output\": \"What was the change in Employee severance and benefit costs between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"How many years did cash proceeds received exceed $5,000 million?\",\n    \"output\": \"How many years have the cash proceeds received been above $5,000 million?\"\n  },\n  {\n    \"input\": \"How many years did cash proceeds received exceed $5,000 million?\",\n    \"output\": \"For how many years did the cash proceeds received exceed $5,000 million?\"\n  },\n  {\n    \"input\": \"What was the percentage increase or decrease in pre-tax losses on the sale of receivables from 2017 to 2018?\",\n    \"output\": \"What was the percentage change in Pre-tax losses on sale of receivables between 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What was the difference in the amount of money spent on repurchasing shares in 2016 compared to 2017?\",\n    \"output\": \"What was the change in share repurchases between 2016 and 2017?\"\n  },\n  {\n    \"input\": \"What was the percentage increase or decrease in dividend payments from 2018 to 2019?\",\n    \"output\": \"What was the percentage change in dividends paid between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage increase or decrease in dividends paid from 2018 to 2019?\",\n    \"output\": \"What was the percentage change in dividends paid between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the specific topic of discussion in the DMS segment?\",\n    \"output\": \"What was the DMS segment focused on?\"\n  },\n  {\n    \"input\": \"What was the change in the net revenue from EMS between 2018 and 2019?\",\n    \"output\": \"What was the difference in net revenue generated by EMS between the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the change in the net revenue from EMS between 2018 and 2019?\",\n    \"output\": \"What was the exact difference in net revenue for EMS between the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the average year-on-year percentage change in total net revenue from 2017-2019?\",\n    \"output\": \"What was the average annual percentage increase or decrease in the total net revenue from 2017 to 2019?\"\n  },\n  {\n    \"input\": \"What was the average year-on-year percentage change in total net revenue from 2017-2019?\",\n    \"output\": \"What was the average percentage change in net revenue from year to year for the total period between 2017 and 2019?\"\n  },\n  {\n    \"input\": \"What is the difference in the balance of Employee Severance and Benefit Costs as of August between 2017 and 2018?\",\n    \"output\": \"What is the change in the balance as of August in Employee Severance and Benefit Costs between 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What were the lease costs in 2017 as a percentage of the total balance in 2017?\",\n    \"output\": \"\\\"What was the percentage of lease costs in 2017 compared to the total balance in 2017?\\\"\"\n  },\n  {\n    \"input\": \"What were the lease costs in 2017 as a percentage of the total balance in 2017?\",\n    \"output\": \"\\\"What proportion of the total balance in 2017 was allocated towards lease costs?\\\"\"\n  },\n  {\n    \"input\": \"\\\"What was the amount of the domestic-federal income tax expense (benefit) recorded for the year 2017?\\\"\",\n    \"output\": \"What was the current domestic-federal income tax expense (benefit) in 2017?\"\n  },\n  {\n    \"input\": \"What was the amount of domestic-federal income tax expense (benefit) recorded in the financial statements for the year 2017?\",\n    \"output\": \"What was the current domestic-federal income tax expense (benefit) in 2017?\"\n  },\n  {\n    \"input\": \"What was the amount of income tax expense or benefit incurred for domestic operations in 2019?\",\n    \"output\": \"What was the current domestic-state income tax expense (benefit) in 2019?\"\n  },\n  {\n    \"input\": \"In 2019, what was the amount of income tax expense or benefit incurred at the domestic state level?\",\n    \"output\": \"What was the current domestic-state income tax expense (benefit) in 2019?\"\n  },\n  {\n    \"input\": \"What is the change in the company's domestic-state income tax expense (benefit) between 2018 and 2019?\",\n    \"output\": \"What is the difference in the domestic-state income tax expense (benefit) for the company from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the change in the company's domestic-state income tax expense (benefit) between 2018 and 2019?\",\n    \"output\": \"What was the difference in the domestic-state income tax expense (benefit) of the company from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What were the types of land and the associated improvements that existed in the year 2019?\",\n    \"output\": \"What were the land and improvements in 2019?\"\n  },\n  {\n    \"input\": \"What was the total monetary worth of all buildings in the year 2018?\",\n    \"output\": \"What was the value of buildings in 2018?\"\n  },\n  {\n    \"input\": \"What was the percentage change in total property, plant and equipment between 2018 and 2019?\",\n    \"output\": \"What was the percentage increase or decrease in the total value of property, plant, and equipment from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"Based on what factors or criteria were the fair value estimates for Level 3 determined?\",\n    \"output\": \"What were the Level 3 fair value estimates based on?\"\n  },\n  {\n    \"input\": \"What factors or criteria were taken into consideration when determining the Level 3 fair value estimates?\",\n    \"output\": \"What were the Level 3 fair value estimates based on?\"\n  },\n  {\n    \"input\": \"What was the change in the fair value for the 3.950% Senior Notes between 2018 and 2019?\",\n    \"output\": \"What was the difference in fair value for the 3.950% Senior Notes from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the significance of the amount awarded as shares, which is contingent upon meeting specific performance criteria?\",\n    \"output\": \"What does the amount for shares granted that are based on the achievement of certain performance criteria represent?\"\n  },\n  {\n    \"input\": \"What was the percentage difference in the number of shares from 2018 to 2019 and how do I calculate it accurately?\",\n    \"output\": \"What was the percentage change in the number of shares between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in Expected dividend yield between 2017 and 2018?\",\n    \"output\": \"What was the percentage adjustment in the expected dividend yield from 2017 to 2018?\"\n  },\n  {\n    \"input\": \"What were the restricted stock units in 2019?\",\n    \"output\": \"What is the significance of restricted stock units in 2019, and can you provide more details about them?\"\n  },\n  {\n    \"input\": \"What were the details and features of the Employee stock purchase plan (ESPP) implemented in 2018?\",\n    \"output\": \"What was the Employee stock purchase plan in 2018?\"\n  },\n  {\n    \"input\": \"What was the change in Other stock-based compensation expenses between 2017 and 2018?\",\n    \"output\": \"What was the difference in the expenses related to stock-based compensation for Other category between the years 2017 and 2018?\"\n  },\n  {\n    \"input\": \"\\\"What was the difference in the number of restricted stock units awarded by a company in 2018 compared to 2019?\\\"\",\n    \"output\": \"What was the change in the restricted stock units between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the net change in the number of restricted stock units from 2018 to 2019?\",\n    \"output\": \"What was the change in the restricted stock units between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What does the key management comprise of?\",\n    \"output\": \"What is included in key management?\"\n  },\n  {\n    \"input\": \"What is the combined value of all shares held in joint ventures as of December 31, 2019?\",\n    \"output\": \"What is the total value of shares at 31 December 2019 for each joint venture?\"\n  },\n  {\n    \"input\": \"What is the precise percentage change in revenue losses experienced by the United Kingdom from the year 2018 to 2019?\",\n    \"output\": \"What is the percentage change in the revenue losses in UK from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage increase or decrease in the revenue losses experienced in the United Kingdom from 2018 to 2019?\",\n    \"output\": \"What is the percentage change in the revenue losses in UK from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the net amount of debt in 2019?\",\n    \"output\": \"What is the Net debt in 2019?\"\n  },\n  {\n    \"input\": \"What was the net debt amount in the year 2019?\",\n    \"output\": \"What is the Net debt in 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage change in the net debt from 2018 to 2019?\",\n    \"output\": \"What is the percentage increase or decrease in the net debt from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What were the fees payable by the Group's joint ventures in 2019?\",\n    \"output\": \"What were the specific fees that the Group's joint ventures had to pay in the year 2019?\"\n  },\n  {\n    \"input\": \"What were the fees payable by the Group's joint ventures in 2019?\",\n    \"output\": \"What were the fees that the Group's joint ventures had to pay in 2019? Could you provide details on the specific amounts payable by each joint venture?\"\n  },\n  {\n    \"input\": \"What proportion of the total borrowings in 2019, including both the principal and interest, will be due within a period of 5 years?\",\n    \"output\": \"What is the percentage of borrowings (including interest) that matures over 5 years in the total borrowings in 2019?\"\n  },\n  {\n    \"input\": \"What was the net asset value (NAV) of EPRA in 2018?\",\n    \"output\": \"What is the EPRA NAV in 2018?\"\n  },\n  {\n    \"input\": \"What is the precise percentage difference in the overall investment made in joint ventures for the years 2018 and 2019?\",\n    \"output\": \"What is the percentage change in the total investment in joint ventures from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the interest rate protection provided in 2019?\",\n    \"output\": \"What is the interest rate protection in 2019?\"\n  },\n  {\n    \"input\": \"What is the current interest rate protection in 2019 and how does it work?\",\n    \"output\": \"What is the interest rate protection in 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage difference in the fair value of unallocated swaps between 2018 and 2019?\",\n    \"output\": \"What is the percentage change in the fair value of unallocated swaps from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the difference in the level of interest rate protection from 2018 to 2019?\",\n    \"output\": \"What is the change in interest rate protection between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the year-on-year percentage change in the calculation of contingent rents based on tenants' turnover from 2018 to 2019?\",\n    \"output\": \"What is the percentage change in the amount of contingent rents calculated by reference to tenants' turnover from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"In what year will the future minimum lease amounts receivable be higher than usual?\",\n    \"output\": \"In which year is there  higher future minimum lease amounts receivable?\"\n  },\n  {\n    \"input\": \"What were the dividends in respect of these shares have been waived by agreement in 2018?\",\n    \"output\": \"What was the specific agreement regarding the waiver of dividends on these shares in 2018?\"\n  },\n  {\n    \"input\": \"What is the percentage change in the disposals value from 2018 to 2019?\",\n    \"output\": \"What is the percentage difference in the value of disposals from 2018 to 2019, and how can it be calculated?\"\n  },\n  {\n    \"input\": \"What is the current total quantity of units that the Company possesses?\",\n    \"output\": \"What is the total number of units the Company has?\"\n  },\n  {\n    \"input\": \"What is the weighted average exercise price for the outstanding options in the Save As You Earn Scheme as of 31 December 2019?\",\n    \"output\": \"What is the weighted average exercise prices of the outstanding options exercisable at 31 December 2019 for the Save As You Earn Scheme?\"\n  },\n  {\n    \"input\": \"What is the calculated average exercise prices of the options that are currently outstanding and can be exercised as of December 31, 2019, under the Save As You Earn Scheme?\",\n    \"output\": \"What is the weighted average exercise prices of the outstanding options exercisable at 31 December 2019 for the Save As You Earn Scheme?\"\n  },\n  {\n    \"input\": \"What is the percentage change in the total provided deferred tax provision from 1 January 2018 to 31 December 2019?\",\n    \"output\": \"What is the precise percentage change in the total amount of deferred tax provision that was provided from 1 January 2018 to 31 December 2019?\"\n  },\n  {\n    \"input\": \"What was the total amount of administration expenses incurred in the year 2019?\",\n    \"output\": \"What is the administration expense in 2019?\"\n  },\n  {\n    \"input\": \"What is encompassed in diluted shares?\",\n    \"output\": \"What do the diluted shares include?\"\n  },\n  {\n    \"input\": \"What is the percentage of counterparty #5 exposure in the total exposure in 2019?\",\n    \"output\": \"What is the proportion of the exposure to counterparty #5 compared to the total exposure in the year 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage of counterparty #5 exposure in the total exposure in 2019?\",\n    \"output\": \"What is the proportion of counterparty #5's exposure compared to the total exposure in the year 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage difference in the combined value of cash deposits and derivative financial instrument assets between 2018 and 2019?\",\n    \"output\": \"What is the percentage change in the sum of cash deposits and derivative financial instrument assets from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage difference in the loss before tax, including joint ventures and associates, from 2018 to 2019?\",\n    \"output\": \"What is the percentage change in the loss before tax, joint ventures and associates from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the net exposure to foreign exchange risk (euro) in 2018?\",\n    \"output\": \"\\\"What is the total amount of foreign exchange risk (in euros) faced by a company in the year 2018?\\\"\"\n  },\n  {\n    \"input\": \"What is the impact on equity attributable to owners of the Group when there is a 10% depreciation in foreign exchange rates from 2018 to 2019? Specifically, how does this affect the negative movement of money to equity?\",\n    \"output\": \"What is the change in the negative movement of money to equity attributable to owners of the Group when there is a 10 per cent depreciation in foreign exchange rates from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the impact on the Group's owners' equity when foreign exchange rates depreciate by 10% between 2018 and 2019?\",\n    \"output\": \"What is the change in the negative movement of money to equity attributable to owners of the Group when there is a 10 per cent depreciation in foreign exchange rates from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage difference in the euro amount drawn from 2018 to 2019?\",\n    \"output\": \"What is the percentage change in the amount drawn in euros from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage increase or decrease in the euro amount withdrawn between 2018 and 2019?\",\n    \"output\": \"What is the percentage change in the amount drawn in euros from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"\\\"In which specific year did the total debt reach its highest level, and is it greater than the total debt in any other year?\\\"\",\n    \"output\": \"In which year is there a greater total debt?\"\n  },\n  {\n    \"input\": \"In which year does the total debt amount exceed or surpass the debt levels of other years?\",\n    \"output\": \"In which year is there a greater total debt?\"\n  },\n  {\n    \"input\": \"What is the percentage change in the total debt from 2018 to 2019?\",\n    \"output\": \"What is the percentage increase or decrease in the total debt from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"\\\"What were the average salaries for administrative positions in fiscal years 2019 and 2018?\",\n    \"output\": \"What is the average Administrative salaries for fiscal 2019 and 2018?\"\n  },\n  {\n    \"input\": \"\\\"What was the average annual salary for administrative employees in fiscal years 2019 and 2018?\",\n    \"output\": \"What is the average Administrative salaries for fiscal 2019 and 2018?\"\n  },\n  {\n    \"input\": \"What is the weekly processing capacity comparison between the plants located in Laurel, Mississippi and Collins, Mississippi?\",\n    \"output\": \"What is the difference in capacity per week between the processing plants at Laurel, Mississippi and Collins, Mississippi? \"\n  },\n  {\n    \"input\": \"What are the average amounts for prepaid insurance in both fiscal years 2019 and 2018?\",\n    \"output\": \"What is the average Prepaid insurance for fiscal years 2019 and 2018?\"\n  },\n  {\n    \"input\": \"What is the difference in the amount of Total prepaid expenses recorded in the financial statements for fiscal years 2019 and 2018? I need to determine the change in prepaid expenses for these two specific years.\",\n    \"output\": \"What is the change in Total prepaid expenses between fiscal years 2019 and 2018?\"\n  },\n  {\n    \"input\": \"What is the difference in the total amount of prepaid expenses between fiscal years 2019 and 2018?\",\n    \"output\": \"What is the change in Total prepaid expenses between fiscal years 2019 and 2018?\"\n  },\n  {\n    \"input\": \"What is the difference in the statutory income tax rates between fiscal years 2019 and 2018, and how does this impact income taxes?\",\n    \"output\": \"What is the change in Income taxes at statutory rate between fiscal years 2019 and 2018?\"\n  },\n  {\n    \"input\": \"What is the net sales from Fresh, vacuum-sealed chicken for fiscal years 2019 to 2017 respectively?\",\n    \"output\": \"What were the net sales of Fresh, vacuum-sealed chicken for fiscal years 2019, 2018, and 2017?\"\n  },\n  {\n    \"input\": \"What is the net sales from Fresh, chill-packed chicken for fiscal years 2019 to 2017 respectively?\",\n    \"output\": \"What are the net sales figures for Fresh, chill-packed chicken for fiscal years 2019, 2018, and 2017, respectively?\"\n  },\n  {\n    \"input\": \"What is the net sales from Fresh, chill-packed chicken for fiscal years 2019 to 2017 respectively?\",\n    \"output\": \"How much revenue was generated from the sales of Fresh, chill-packed chicken for the fiscal years of 2019, 2018, and 2017, respectively?\"\n  },\n  {\n    \"input\": \"What is the net sales from Fresh, ice-packed chicken for fiscal years 2019 to 2017 respectively?\",\n    \"output\": \"What were the net sales of Fresh, ice-packed chicken for fiscal years 2019, 2018, and 2017?\"\n  },\n  {\n    \"input\": \"What was the purpose of the company's operating leases, and how did they contribute to the company's operations or financial performance?\",\n    \"output\": \"What was the company's operating leases for?\"\n  },\n  {\n    \"input\": \"What is the discrepancy in the overall amount when comparing revenues gained from revolving loan facility fees to expenses incurred from operating leases?\",\n    \"output\": \"What was the difference in the total between Revolving loan facility fees and operating leases? \"\n  },\n  {\n    \"input\": \"What was the change in the Net cash used in investing activities between 2017 and 2019?\",\n    \"output\": \"What was the difference in the amount of net cash used in investing activities from 2017 to 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the Net cash used in financing activities between 2018 and 2019?\",\n    \"output\": \"What was the percentage difference in the amount of cash used in financing activities from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"Which years does the table provide information for the company's Selected Consolidated Statements of Operations Data?\",\n    \"output\": \"For which specific time periods does the table present the company's Selected Consolidated Statements of Operations Data?\"\n  },\n  {\n    \"input\": \"What was the exact amount of revenue generated in the year 2015?\",\n    \"output\": \"What was the total revenue in 2015?\"\n  },\n  {\n    \"input\": \"What was the specific amount of operating profit generated in the year 2017?\",\n    \"output\": \"What was the operating profit in 2017?\"\n  },\n  {\n    \"input\": \"What was the exact amount of operating profit earned during the year 2017?\",\n    \"output\": \"What was the operating profit in 2017?\"\n  },\n  {\n    \"input\": \"What is the precise percentage difference in operating profit from 2015 to 2016?\",\n    \"output\": \"What was the percentage change in operating profit between 2015 and 2016?\"\n  },\n  {\n    \"input\": \"What was the percentage increase or decrease in operating profit from 2015 to 2016?\",\n    \"output\": \"What was the percentage change in operating profit between 2015 and 2016?\"\n  },\n  {\n    \"input\": \"What was the cash used in Software in 2019?\",\n    \"output\": \"How much money was spent on Software in the year 2019?\"\n  },\n  {\n    \"input\": \"What was the net income in 2019?\",\n    \"output\": \"What was the exact net income amount for the fiscal year of 2019?\"\n  },\n  {\n    \"input\": \"What was the specific amount of interest that was recorded as an expense in the year 2018?\",\n    \"output\": \"What was the interest expense in 2018?\"\n  },\n  {\n    \"input\": \"What is the amount of money paid as interest in 2018?\",\n    \"output\": \"What was the interest expense in 2018?\"\n  },\n  {\n    \"input\": \"For how many consecutive years has the net income exceeded $100,000 thousand?\",\n    \"output\": \"How many years did net income exceed $100,000 thousand?\"\n  },\n  {\n    \"input\": \"What was the change in the interest expense between 2017 and 2018?\",\n    \"output\": \"What were the differences in the amount of interest expense incurred in 2017 and 2018?\"\n  },\n  {\n    \"input\": \"How much was the change in fair value of the company's servicing asset that was included in the fees charged for servicing?\",\n    \"output\": \"How much was the included change in fair value of the company's servicing asset included in its servicing fees?\"\n  },\n  {\n    \"input\": \"For how many consecutive years have Interchange fees surpassed $50,000?\",\n    \"output\": \"How many years did Interchange fees exceed $50,000 thousand?\"\n  },\n  {\n    \"input\": \"How many years have the Interchange fees been over $50,000 thousand?\",\n    \"output\": \"How many years did Interchange fees exceed $50,000 thousand?\"\n  },\n  {\n    \"input\": \"What was the difference in the fees charged for transactions in 2017 compared to 2019? How did the transaction fees change between 2017 and 2019?\",\n    \"output\": \"What was the change in the transaction fees between 2017 and 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the total revenue between 2018 and 2019?\",\n    \"output\": \"What is the percentage increase or decrease in the total revenue from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the total revenue between 2018 and 2019?\",\n    \"output\": \"What was the percentage difference in the overall revenue from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"How much did the company invest in partnerships during the year 2018?\",\n    \"output\": \"What was the company's investment in partnership in 2018?\"\n  },\n  {\n    \"input\": \"How much money did the company invest in partnership during the year 2018?\",\n    \"output\": \"What was the company's investment in partnership in 2018?\"\n  },\n  {\n    \"input\": \"What is the significance and meaning of the number of Settlements?\",\n    \"output\": \"What does the amount of Settlements represent?\"\n  },\n  {\n    \"input\": \"What is the significance or meaning of the number of Settlements?\",\n    \"output\": \"What does the amount of Settlements represent?\"\n  },\n  {\n    \"input\": \"How many years did the face value of term loan exceed $300,000 thousand?\",\n    \"output\": \"For how many years has the term loan been valued above $300,000 thousand?\"\n  },\n  {\n    \"input\": \"How many years did the face value of term loan exceed $300,000 thousand?\",\n    \"output\": \"How many years has the face value of the term loan been consistently higher than $300,000 thousand?\"\n  },\n  {\n    \"input\": \"What was the change in the Loan Servicing Portfolio between 2017 and 2018?\",\n    \"output\": \"What was the difference in the size of the Loan Servicing Portfolio from 2017 to 2018?\"\n  },\n  {\n    \"input\": \"What was the total costs and expenses?\",\n    \"output\": \"What was the overall amount of costs and expenses associated with the given context or situation?\"\n  },\n  {\n    \"input\": \"What was the exact value of the restricted cash in the year 2017?\",\n    \"output\": \"What was the amount of restricted cash in 2017?\"\n  },\n  {\n    \"input\": \"\\\"What is the range of years covered in the table?\\\"\",\n    \"output\": \"Which years does the table provide?\"\n  },\n  {\n    \"input\": \"What was the change in Cash and cash equivalents between 2018 and 2019?\",\n    \"output\": \"What was the difference in the amount of Cash and cash equivalents between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"For how long did the amount of Restricted Cash surpass $200,000 thousand?\",\n    \"output\": \"How many years did Restricted Cash exceed $200,000 thousand?\"\n  },\n  {\n    \"input\": \"What was the total quantity of Furniture sold or produced in the year 2018?\",\n    \"output\": \"What was the amount of Furniture in 2018?\"\n  },\n  {\n    \"input\": \"What was the total quantity or number of furniture items in the year 2018?\",\n    \"output\": \"What was the amount of Furniture in 2018?\"\n  },\n  {\n    \"input\": \"What was the total quantity of computer hardware devices manufactured or sold globally in the year 2019?\",\n    \"output\": \"What was the amount of Computer hardware in 2019?\"\n  },\n  {\n    \"input\": \"How much computer hardware was present in the year 2019?\",\n    \"output\": \"What was the amount of Computer hardware in 2019?\"\n  },\n  {\n    \"input\": \"What was the change in the amount of Software between 2018 and 2019?\",\n    \"output\": \"What was the difference in the quantity of Software from 2018 to 2019, specifically looking at the change in amount?\"\n  },\n  {\n    \"input\": \"What was the change in the amount of Software between 2018 and 2019?\",\n    \"output\": \"How much did the amount of Software change from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the amount of Receipts in 2017?\",\n    \"output\": \"How many receipts were recorded in the year 2017?\"\n  },\n  {\n    \"input\": \"What was the amount of Receipts in 2017?\",\n    \"output\": \"\\\"How many total receipts were recorded in the year 2017?\\\"\"\n  },\n  {\n    \"input\": \"What was the change in the amount of Settlements between 2017 and 2019?\",\n    \"output\": \"What was the net difference in the number of Settlements from 2017 to 2019?\"\n  },\n  {\n    \"input\": \"What was the change in the amount of Settlements between 2017 and 2019?\",\n    \"output\": \"What was the difference in the number of Settlements from 2017 to 2019 and how did it change over the two-year period?\"\n  },\n  {\n    \"input\": \"What was the exact amount of revenue generated in the year 2018?\",\n    \"output\": \"What was the total revenue in 2018?\"\n  },\n  {\n    \"input\": \"What is the total amount of money earned in 2018?\",\n    \"output\": \"What was the total revenue in 2018?\"\n  },\n  {\n    \"input\": \"\\\"What was the specific financial figure measuring earnings from regular business operations, known as operating profit, during the year 2019?\\\"\",\n    \"output\": \"What was the operating profit in 2019?\"\n  },\n  {\n    \"input\": \"What was the operating profit in the first quarter?\",\n    \"output\": \"What is the exact amount of operating profit recorded in the first quarter of the fiscal year?\"\n  },\n  {\n    \"input\": \"What was the specific amount of net income generated in the third quarter?\",\n    \"output\": \"What was the net income in the third quarter?\"\n  },\n  {\n    \"input\": \"What was the percentage increase or decrease in the basic earnings per share of Class A common stock from the first quarter to the second quarter?\",\n    \"output\": \"What was the percentage change in the basic earnings per share of Class A common stock between the first and second quarter?\"\n  },\n  {\n    \"input\": \"\\\"What is the range of years covered in the table?\\\"\",\n    \"output\": \"Which years does the table provide?\"\n  },\n  {\n    \"input\": \"What was the difference in the amount of net income recorded between the years 2017 and 2018?\",\n    \"output\": \"What was the change in net income between 2017 and 2018?\"\n  },\n  {\n    \"input\": \"\\\"What is the meaning and significance of net fair value changes recognized in other gains (losses)?\\\"\",\n    \"output\": \"What does the net Fair value changes recognized in other gains (losses) represent?\"\n  },\n  {\n    \"input\": \"What is the representation of net fair value changes recognized in other gains (losses)?\",\n    \"output\": \"What does the net Fair value changes recognized in other gains (losses) represent?\"\n  },\n  {\n    \"input\": \"What was the beginning balance in 2018?\",\n    \"output\": \"\\\"The starting balance for the year 2018, specifically referring to financial records or accounts, is requested. Please provide the initial amount present at the beginning of that year.\\\"\"\n  },\n  {\n    \"input\": \"Which years did the ending balance exceed $3,000 thousand?\",\n    \"output\": \"In which years was the ending balance higher than $3,000 thousand?\"\n  },\n  {\n    \"input\": \"What is the percentage of fair value time deposits compared to the overall fair value of marketable securities?\",\n    \"output\": \"What percentage of the total fair value marketable securities is made up of fair value time deposits?\"\n  },\n  {\n    \"input\": \"\\\"What is the current interest rate specified in the credit facility agreement that governs our credit arrangement?\\\"\",\n    \"output\": \"What is the interest rate on the credit facility agreement?\"\n  },\n  {\n    \"input\": \"\\\"What is the specific interest rate specified in the credit facility agreement that governs our borrowing terms?\\\"\",\n    \"output\": \"What is the interest rate on the credit facility agreement?\"\n  },\n  {\n    \"input\": \"What factors contributed to the increase in research and development expenses in 2019?\",\n    \"output\": \"What are the reasons for higher research and development expense in 2019?\"\n  },\n  {\n    \"input\": \"What is the net difference in research and development expense between 2019 and 2017?\",\n    \"output\": \"What is the exact amount of change in research and development expense between the fiscal years of 2019 and 2017?\"\n  },\n  {\n    \"input\": \"How much were the marketable securities worth in 2019?\",\n    \"output\": \"What was the amount of marketable securities in 2019?\"\n  },\n  {\n    \"input\": \"What was the precise value of marketable securities held in the year 2019?\",\n    \"output\": \"What was the amount of marketable securities in 2019?\"\n  },\n  {\n    \"input\": \"What is the difference in the total amount of shareholders' equity between the years 2018 and 2019?\",\n    \"output\": \"What is the change in total shareholders' equity from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"How much did the total shareholders' equity change between 2018 and 2019?\",\n    \"output\": \"What is the change in total shareholders' equity from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"How does the net (loss) income of 2019 compare to the net (loss) income of 2018?\",\n    \"output\": \"What is the difference between Net (loss) income in 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the exact percentage increase in gross profit from the year 2018 to the year 2019?\",\n    \"output\": \"What was the percentage increase in gross profit from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What are the different components of expenses that need to be considered when starting production?\",\n    \"output\": \"What are components of production start-up expense?\"\n  },\n  {\n    \"input\": \"What is the total change in expenditure for initiating production between the years 2019 and 2017?\",\n    \"output\": \"What is the net difference in production start-up expense between 2019 and 2017?\"\n  },\n  {\n    \"input\": \"What is the difference in net sales amount in 2019 and 2018?\",\n    \"output\": \"What is the difference in the net sales amount between the years 2019 and 2018? Please provide details and figures to compare the net sales amounts of these two specific years.\"\n  },\n  {\n    \"input\": \"What is the value of foreign government obligations as of December 31, 2019?\",\n    \"output\": \"What is the total amount of foreign government obligations recorded as of December 31, 2019?\"\n  },\n  {\n    \"input\": \"What is the comparative value of money market funds and foreign debt as of December 31, 2019, and how do they differ?\",\n    \"output\": \"What is the difference in the value of money market funds and foreign debt as of December 31, 2019?\"\n  },\n  {\n    \"input\": \"What is the increase in cash provided by operating activities from 2018 to 2019?\",\n    \"output\": \"How much did the cash provided by operating activities increase from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the difference in net cash and cash equivalents between 2019 and 2018?\",\n    \"output\": \"What is the change in net cash and cash equivalents between the fiscal years 2019 and 2018?\"\n  },\n  {\n    \"input\": \"\\\"What specific items or responsibilities are typically considered as part of other obligations?\\\"\",\n    \"output\": \"What are included in other obligations?\"\n  },\n  {\n    \"input\": \"What does the category of \\\"other obligations\\\" encompass and include?\",\n    \"output\": \"What are included in other obligations?\"\n  },\n  {\n    \"input\": \"Why was $72.2 million of unrecognized tax benefits excluded?\",\n    \"output\": \"Why were the $72.2 million in unrecognized tax benefits excluded from something?\"\n  },\n  {\n    \"input\": \"What percentage of the total contractual obligations is made up of total long-term debt obligations?\",\n    \"output\": \"What is the proportion of the overall contractual obligations that consists specifically of the total amount owed in long-term debt obligations?\"\n  },\n  {\n    \"input\": \"What is the distinction between the total amount paid in interest and the total sum of operating lease obligations?\",\n    \"output\": \"What difference between total interest payments and total operating lease obligations?\"\n  },\n  {\n    \"input\": \"What is the interest rate for the term loan that I am inquiring about?\",\n    \"output\": \"What is the interest rate of the term loan?\"\n  },\n  {\n    \"input\": \"What are the differences in the amounts of notes receivable recorded in 2018 and 2019?\",\n    \"output\": \"What is the difference between notes receivables from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the precise percentage by which the total prepaid expenses and other current assets increased from 2018 to 2019?\",\n    \"output\": \"What is the percentage increase in total prepaid expenses and other current assets from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"How are the accounting procedures used to record changes in fair value of hedging instruments?\",\n    \"output\": \"How are changes in fair value of hedging instruments accounted for?\"\n  },\n  {\n    \"input\": \"What are the types of derivative instruments included in the total for other liabilities on a financial statement?\",\n    \"output\": \"What is the total derivate instruments for other liabilities?\"\n  },\n  {\n    \"input\": \"What are the types of derivative instruments used to measure the total value of other liabilities?\",\n    \"output\": \"What is the total derivate instruments for other liabilities?\"\n  },\n  {\n    \"input\": \"What is the ratio of the total derivatives classified as hedging instruments to the total derivatives not classified as hedging instruments under prepaid expenses and other current assets?\",\n    \"output\": \"Under prepaid expenses and other current assets, what is the ratio of the total derivates designated as hedging instruments to those not designated as hedging instruments?\"\n  },\n  {\n    \"input\": \"What was the increase in machinery and equipment from 2018 to 2019?\",\n    \"output\": \"What was the percentage change in the value of machinery and equipment between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the increase in machinery and equipment from 2018 to 2019?\",\n    \"output\": \"What percentage growth was observed in the value of machinery and equipment between the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What are the differences in land statistics comparing the year 2018 and 2019?\",\n    \"output\": \"What is the difference between land from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What are the discrepancies in land conditions between the years 2018 and 2019?\",\n    \"output\": \"What is the difference between land from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the net change in selling, general, and administrative expense between the years 2019 and 2017?\",\n    \"output\": \"What is the net difference in selling, general and administrative expense between 2019 and 2017?\"\n  },\n  {\n    \"input\": \"What is the current percentage shareholding of Tata Sons Private Limited and Life Insurance Corporation of India, and how does it differ between the two entities?\",\n    \"output\": \"What is the difference in percentage shareholding between Tata Sons Private Limited and Life Insurance Corporation of India?\"\n  },\n  {\n    \"input\": \"What is the disparity in the quantity of equity shares possessed by Promoters and Insurance Companies?\",\n    \"output\": \"What is the difference between number of equity shares held between Promoters and Insurance Companies?\"\n  },\n  {\n    \"input\": \"How many shareholding accounts were there as of March 31, 2019?\",\n    \"output\": \"What is the number of existing shareholding accounts as on March 31, 2019?  \"\n  },\n  {\n    \"input\": \"As of March 31, 2019, what is the overall sum of all assets owned by someone or something?\",\n    \"output\": \"What is the grand total of holdings as on March 31, 2019?\"\n  },\n  {\n    \"input\": \"What is the overall value of assets and investments held as of March 31, 2019?\",\n    \"output\": \"What is the grand total of holdings as on March 31, 2019?\"\n  },\n  {\n    \"input\": \"What is the difference in the number of holdings within the share categories of '1-100' and '101-500'? How do the share categories of '1-100' and '101-500' differ in terms of the number of holdings they encompass?\",\n    \"output\": \"What is the difference in holdings between the share categories of '1-100' and '101-500'?\"\n  },\n  {\n    \"input\": \"What is the basis for consolidating shareholdings and how does it affect the overall company structure?\",\n    \"output\": \"On what basis is shareholding consolidated?\"\n  },\n  {\n    \"input\": \"What is the basis for the consolidation of shareholding?\",\n    \"output\": \"On what basis is shareholding consolidated?\"\n  },\n  {\n    \"input\": \"\\\"What is the proportion of unsecured loans out of the total debt at the start of the financial year?\\\"\",\n    \"output\": \"At the beginning of the financial year, what percentage of total indebtedness is made up of unsecured loans?\"\n  },\n  {\n    \"input\": \"What was the change in the quantity of shares transferred from 2011 to 2012 into the Investor Education and Protection Fund (IEPF)?\",\n    \"output\": \"What is the difference in number of shares transferred to IEPF from 2011 to 2012?\"\n  },\n  {\n    \"input\": \"What is the total remuneration for Ramakrishnan V?\",\n    \"output\": \"What is the complete amount of compensation that Ramakrishnan V receives?\"\n  },\n  {\n    \"input\": \"What is the total remuneration for Ramakrishnan V?\",\n    \"output\": \"What is the total amount of money received as payment or compensation for services rendered by Ramakrishnan V?\"\n  },\n  {\n    \"input\": \"What is the monetary worth of the stock option that the Company Secretary receives?\",\n    \"output\": \"What is the value of Stock Option given to the Company Secretary?\"\n  },\n  {\n    \"input\": \"What is the monetary worth or amount of Stock Option that has been granted or allocated to the Company Secretary as part of their compensation or benefits package?\",\n    \"output\": \"What is the value of Stock Option given to the Company Secretary?\"\n  },\n  {\n    \"input\": \"What is the specific value or percentage of commission that is usually given to the Chief Financial Officer (CFO) of a company?\",\n    \"output\": \"What is the value of Commission given to the Chief Financial Officer?\"\n  },\n  {\n    \"input\": \"\\\"What is the specific amount or percentage of commission that is typically awarded to the Chief Financial Officer (CFO) for their role?\\\"\",\n    \"output\": \"What is the value of Commission given to the Chief Financial Officer?\"\n  },\n  {\n    \"input\": \"What distinguishes the CFO from the Company Secretary in terms of Others and Allowances? Clarify and elaborate on the differences between these two roles regarding their responsibilities, remunerations, and any additional benefits they may receive.\",\n    \"output\": \"What is the difference in Others, Allowances between the CFO and Company Secretary?\"\n  },\n  {\n    \"input\": \"Which key managerial personnel had the highest total remuneration?\",\n    \"output\": \"\\\"Who among the key managerial personnel received the highest total remuneration?\\\"\"\n  },\n  {\n    \"input\": \"How does the ratio of median remuneration between the two Executive Directors differ?\",\n    \"output\": \"What is the difference in ratio to median remuneration between the two Executive Directors?\"\n  },\n  {\n    \"input\": \"\\\"What is the difference in the amount of money received from sales or services between the years 2018 and 2019?\\\"\",\n    \"output\": \"What is the change in revenue between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the difference in the amount of money earned by the company between the years 2018 and 2019?\",\n    \"output\": \"What is the change in revenue between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What portion of total unconsolidated revenue in FY 2019 constitutes of export revenue?\",\n    \"output\": \"What percentage of the total unconsolidated revenue for the fiscal year 2019 is comprised of revenue from exports?\"\n  },\n  {\n    \"input\": \"What portion of total unconsolidated revenue in FY 2018 constitutes of export revenue?\",\n    \"output\": \"What percentage of the overall unconsolidated revenue for the fiscal year 2018 is derived from export sales?\"\n  },\n  {\n    \"input\": \"What is the difference in the amount of money earned from foreign exchanges between the fiscal years of 2018 and 2019?\",\n    \"output\": \"What is the change in foreign exchange earnings between FY 2018 and FY 2019?\"\n  },\n  {\n    \"input\": \"What is the change in Cost, Insurance and Freight (CIF) value of imports from FY 2018 to FY 2019?\",\n    \"output\": \"How has the Cost, Insurance and Freight (CIF) value of imports changed from FY 2018 to FY 2019?\"\n  },\n  {\n    \"input\": \"What is the specific amount of money charged as a sitting fee for N Chandrasekaran?\",\n    \"output\": \"What is the sitting fee of N Chandrasekaran?\"\n  },\n  {\n    \"input\": \"What is the difference in sitting fees between O P Bhatt and Aarthi Subramanian?\",\n    \"output\": \"What is the difference in sitting fees between O P Bhatt and Aarthi Subramanian, and in what context are these fees being compared or analyzed?\"\n  },\n  {\n    \"input\": \"What is the difference in sitting fees between O P Bhatt and Aarthi Subramanian?\",\n    \"output\": \"What is the difference in the sitting fees charged by O P Bhatt and Aarthi Subramanian?\"\n  },\n  {\n    \"input\": \"What is the percentage change in pre-tax margin between 2017 and 2018?\",\n    \"output\": \"What is the increase/ (decrease) in Pre-tax margin from 2017 to 2018\"\n  },\n  {\n    \"input\": \"What is the percentage change in Pre-tax margin between 2017 and 2018?\",\n    \"output\": \"What is the increase/ (decrease) in Pre-tax margin from 2017 to 2018\"\n  },\n  {\n    \"input\": \"What does Net Income include?\",\n    \"output\": \"\\\"What elements are included in the calculation of Net Income?\\\"\"\n  },\n  {\n    \"input\": \"What is the increase / (decrease) in revenue from 2018 to 2019?\",\n    \"output\": \"How much did the revenue change from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage increase / (decrease) in Current liabilities from 2018 to 2019?\",\n    \"output\": \"What is the percentage change in Current Liabilities between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What caused the decrease in the actual shares?\",\n    \"output\": \"What factors led to the decrease in the current number of shares held by individuals or entities?\"\n  },\n  {\n    \"input\": \"What was the percentage change in Earnings per Share of common stock from continuing operations (basic) compared to the previous period?\",\n    \"output\": \"What was the increase / (decrease) from the Earnings per share of common stock from continuing operations basic?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the basic earnings per share of common stock from continuing operations?\",\n    \"output\": \"What was the increase / (decrease) from the Earnings per share of common stock from continuing operations basic?\"\n  },\n  {\n    \"input\": \"How did the performance of hardware platforms compare from year to year?\",\n    \"output\": \"How was the performance of hardware platforms year to year\"\n  },\n  {\n    \"input\": \"What does the Pre-tax income performance implied? \",\n    \"output\": \"What is the meaning of Pre-tax income performance?\"\n  },\n  {\n    \"input\": \"What was the pre-tax margin of 2019?\",\n    \"output\": \"\\\"What was the pre-tax margin percentage for the year 2019?\\\"\"\n  },\n  {\n    \"input\": \"What was the pre-tax margin of 2019?\",\n    \"output\": \"\\\"What was the specific pre-tax margin for the fiscal year of 2019?\\\"\"\n  },\n  {\n    \"input\": \"What was the External gross profit margin in 2019?\",\n    \"output\": \"What was the gross profit margin from sources external to the company for the year 2019?\"\n  },\n  {\n    \"input\": \"What is the average of Balance at January 1?\",\n    \"output\": \"What is the average balance as of January 1st for all accounts (or customers, if applicable)?\"\n  },\n  {\n    \"input\": \"What is the average balance amount as of December 31st?\",\n    \"output\": \"What is the average of Balance at December 31?\"\n  },\n  {\n    \"input\": \"What is the average balance amount recorded on December 31st for a specific period of time?\",\n    \"output\": \"What is the average of Balance at December 31?\"\n  },\n  {\n    \"input\": \"What is the increase / (decrease) in the net capitalized software from 2018 to 2019?\",\n    \"output\": \"What is the change in the net capitalized software amount from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in pre-tax income from 2018 to 2019?\",\n    \"output\": \"What was the increase / (decrease) in the Pre-tax income from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage change in gross profit for External Operating Systems Software from 2018 to 2019?\",\n    \"output\": \"What is the increase / (decrease) in the External Operating Systems Software gross profit from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage increase / (decrease) in total short term debt?\",\n    \"output\": \"What was the percentage change in the total amount of short term debt?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the pre-tax margin from 2018 to 2019?\",\n    \"output\": \"What was the increase / (decrease) in the Pre-tax margin from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What percentage change in the pre-tax margin occurred between 2018 and 2019?\",\n    \"output\": \"What was the increase / (decrease) in the Pre-tax margin from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is the meaning or significance of the launch expense?\",\n    \"output\": \"What does launch expense represent?\"\n  },\n  {\n    \"input\": \"How many types of expenses are listed in the table?\",\n    \"output\": \"How many categories of expenses are included in the table?\"\n  },\n  {\n    \"input\": \"What was the average adjusted EBITDA for 2018 and 2019?\",\n    \"output\": \"What was the average amount of adjusted EBITDA for both the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"\\\"What is the chronological order of the gross profit for each financial year end as depicted in the table?\\\"\",\n    \"output\": \"What is the gross profit for each financial year end shown in the table (in chronological order)?\"\n  },\n  {\n    \"input\": \"What was the year with the highest value for other financial expenses?\",\n    \"output\": \"In which year was the amount of other financial expenses the largest?\"\n  },\n  {\n    \"input\": \"What was the net difference in the overall financial expenses incurred during the year 2019 as compared to 2018?\",\n    \"output\": \"What was the change in the total financial expenses in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"What was the sum of all financial costs incurred in the year 2019?\",\n    \"output\": \"What is the total amount of financial expenses in 2019?\"\n  },\n  {\n    \"input\": \"What was the specific year when the amount of Current lease liabilities was greater than in any other year?\",\n    \"output\": \"In which year was the amount of Current lease liabilities larger?\"\n  },\n  {\n    \"input\": \"When did the amount of Current lease liabilities exceed the previous year's amount?\",\n    \"output\": \"In which year was the amount of Current lease liabilities larger?\"\n  },\n  {\n    \"input\": \"What is the total financial income for 2019?\",\n    \"output\": \"What was the overall financial income for the year 2019?\"\n  },\n  {\n    \"input\": \"What was the total number of financial items in the year 2019?\",\n    \"output\": \"What is the amount of total financial items in 2019?\"\n  },\n  {\n    \"input\": \"What is the total number of financial items recorded in the year 2019?\",\n    \"output\": \"What is the amount of total financial items in 2019?\"\n  },\n  {\n    \"input\": \"What are the primary categories that are examined within the Financial Items section of the table?\",\n    \"output\": \"What are the main categories analyzed under Financial Items in the table?\"\n  },\n  {\n    \"input\": \"In which year was the amount of financial income the smallest?\",\n    \"output\": \"What is the year with the minimum financial income amount?\"\n  },\n  {\n    \"input\": \"What was the change in the Average invested capital less average impairment in 2019 from 2018?\",\n    \"output\": \"What was the difference in the average amount of capital invested excluding average impairment between 2019 and 2018?\"\n  },\n  {\n    \"input\": \"What was the change in the Average invested capital less average impairment in 2019 from 2018?\",\n    \"output\": \"What was the difference in the average amount of money invested minus the average amount of impairments in 2019 compared to 2018?\"\n  },\n  {\n    \"input\": \"What was the percentage change in retained earnings in 2019 from 2018?\",\n    \"output\": \"What was the precise percentage difference in retained earnings between 2018 and 2019, specifically referring to the change observed in 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in retained earnings in 2019 from 2018?\",\n    \"output\": \"What was the percentage increase or decrease in the amount of retained earnings from the year 2018 to 2019?\"\n  },\n  {\n    \"input\": \"How is Total Cost of Ownership (TCE) earnings utilized as a widely accepted performance measure for the shipping industry?\",\n    \"output\": \"How is TCE earnings used as a standard shipping industry performance measure?\"\n  },\n  {\n    \"input\": \"What is the specific year when the revenue reached its maximum value?\",\n    \"output\": \"In which year was Revenue the largest?\"\n  },\n  {\n    \"input\": \"What items were included in the freight receivables as of 2019?\",\n    \"output\": \"As of 2019, what did freight receivables include?\"\n  },\n  {\n    \"input\": \"What was included in the category of freight receivables as of 2019?\",\n    \"output\": \"As of 2019, what did freight receivables include?\"\n  },\n  {\n    \"input\": \"What is the determining basis for the calculation and establishment of an allowance for expected credit loss?\",\n    \"output\": \"What is the making of allowance for expected credit loss based on?\"\n  },\n  {\n    \"input\": \"What is the basis for determining the provision for expected credit loss?\",\n    \"output\": \"What is the making of allowance for expected credit loss based on?\"\n  },\n  {\n    \"input\": \"What is the specific year when the total gross freight receivables reached their highest amount?\",\n    \"output\": \"In which year was the amount of total gross freight receivables the largest?\"\n  },\n  {\n    \"input\": \"What was the net increase or decrease in the total gross freight receivables between 2018 and 2019?\",\n    \"output\": \"What was the change in the total net gross freight receivables in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"What information can be derived in note 16?\",\n    \"output\": \"What data or details can be obtained from note 16?\"\n  },\n  {\n    \"input\": \"What information can be derived in note 16?\",\n    \"output\": \"What kind of information can be obtained from note 16?\"\n  },\n  {\n    \"input\": \"What was the change in the carrying amount as of 31 December from 2018 to 2019?\",\n    \"output\": \"What was the difference in the carrying amount as of December 31 between the years 2018 and 2019? Please provide the specific change in the carrying amount.\"\n  },\n  {\n    \"input\": \"What are the specific components present in the table that are utilized for liquidity calculations?\",\n    \"output\": \"What are the components in the table used to calculate liquidity?\"\n  },\n  {\n    \"input\": \"What was the difference in liquidity levels between 2019 and 2018, and how did it change over the course of the year?\",\n    \"output\": \"What was the change in liquidity in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"What does TORM have leases for?\",\n    \"output\": \"What types of leases does TORM currently have?\"\n  },\n  {\n    \"input\": \"How are leases represented on the balance sheet and what specific details are included for each lease?\",\n    \"output\": \"How is each lease reflected on the balance sheet?\"\n  },\n  {\n    \"input\": \"\\\"What is the asset type that experiences the highest rate of depreciation in a year?\\\"\",\n    \"output\": \"Which asset type has the largest depreciation of the year?\"\n  },\n  {\n    \"input\": \"What was the difference in the amount of gross profit in 2019 compared to 2018?\",\n    \"output\": \"What was the change in gross profit in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"What was the exact percentage increase or decrease in the gross profit from 2018 to 2019?\",\n    \"output\": \"What was the percentage change in gross profit in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"What was the percentage increase or decrease in gross profit from 2018 to 2019?\",\n    \"output\": \"What was the percentage change in gross profit in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"\\\"What specific details or data can be found in Note 21 of the provided information?\\\"\",\n    \"output\": \"What information is provided in Note 21?\"\n  },\n  {\n    \"input\": \"What was the percentage change in interest rate swaps between the year 2018 and 2019?\",\n    \"output\": \"What was the change in interest rate swaps from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"How did interest rate swaps change from 2018 to 2019?\",\n    \"output\": \"What was the change in interest rate swaps from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage difference in the interest rate swaps between 2018 and 2019? I would like to know the rate at which the interest rate swaps changed between these two years.\",\n    \"output\": \"What was the percentage change in interest rate swaps from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the precise percentage difference in the interest rate swap market between 2018 and 2019?\",\n    \"output\": \"What was the percentage change in interest rate swaps from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What is TORM's belief or estimation of the amount of net interest-bearing debt they possess?\",\n    \"output\": \"What does TORM believe net interest-bearing debt to be?\"\n  },\n  {\n    \"input\": \"What is the methodology used to calculate the net asset value (NAV) per share in investment funds?\",\n    \"output\": \"How is NAV/share calculated?\"\n  },\n  {\n    \"input\": \"What are the types of audit fees in the table?\",\n    \"output\": \"What are the different types of audit fees listed in the table?\"\n  },\n  {\n    \"input\": \"In what year did the auditor receive the highest overall compensation?\",\n    \"output\": \"In which year was the total remuneration to the auditor the largest?\"\n  },\n  {\n    \"input\": \"For which specific years were the calculations of Other Liabilities amounts conducted?\",\n    \"output\": \"In which years were the amounts of Other Liabilities calculated for?\"\n  },\n  {\n    \"input\": \"In which specific year did the amount allocated to Partners and commercial managements surpass the amount allocated in other years?\",\n    \"output\": \"In which year was the amount under Partners and commercial managements larger?\"\n  },\n  {\n    \"input\": \"\\\"What specific expenses are included in the category of operating expenses that are related to staff costs?\\\"\",\n    \"output\": \"What are the staff costs included in operating expenses related to?\"\n  },\n  {\n    \"input\": \"How is the average number of employees determined and what is the methodology behind its calculation?\",\n    \"output\": \"How is the average number of employees calculated?\"\n  },\n  {\n    \"input\": \"What was the change in the total number of permanent employees from 2018 to 2019?\",\n    \"output\": \"How many permanent employees were added or lost between 2018 and 2019, resulting in the change in the total number of permanent employees?\"\n  },\n  {\n    \"input\": \"What does note 21 include in terms of information?\",\n    \"output\": \"What information does note 21 contain?\"\n  },\n  {\n    \"input\": \"What does accounts receivables in the table include?\",\n    \"output\": \"What specific components are included in the accounts receivables section of the table?\"\n  },\n  {\n    \"input\": \"What specific items or costs are included in the income statement as Port expenses, including bunkers and commissions?\",\n    \"output\": \"What is recognized in the income statement under Port expenses, bunkers and commissions?\"\n  },\n  {\n    \"input\": \"How is allowance for expected credit loss of freight receivables calculated?\",\n    \"output\": \"How is the calculation for allowance for expected credit loss determined specifically for freight receivables?\"\n  },\n  {\n    \"input\": \"\\\"What specific components are accounted for in the carrying amount designated for 'Vessels and capitalized dry-docking'?\\\"\",\n    \"output\": \"What is included in the carrying amount for \\\"Vessels and capitalized dry-docking\\\"?\"\n  },\n  {\n    \"input\": \"What specific sub-elements, denoted in capital letters, can be found under the category of \\\"Vessels\\\" in the table related to dry-docking?\",\n    \"output\": \"What are the sub-elements under Vessels and capitalized dry-docking in the table?\"\n  },\n  {\n    \"input\": \"What specific components or sub-elements fall under the category of Vessels and capitalized dry-docking in the table?\",\n    \"output\": \"What are the sub-elements under Vessels and capitalized dry-docking in the table?\"\n  },\n  {\n    \"input\": \"\\\"What is the meaning of the phrase 'company's retirement obligations' and what specific aspects does it encompass?\\\"\",\n    \"output\": \"What does the company's retirement obligations refer to?\"\n  },\n  {\n    \"input\": \"\\\"What are the retirement obligations of the company and what do they encompass?\\\"\",\n    \"output\": \"What does the company's retirement obligations refer to?\"\n  },\n  {\n    \"input\": \"What is the company's total contractual cash obligations due in more than 5 years?\",\n    \"output\": \"What is the total amount of cash that the company is obligated to pay according to its contracts, and when are these payments due, specifically for obligations that extend beyond a period of 5 years?\"\n  },\n  {\n    \"input\": \"What is the company's total contractual cash obligations due in more than 5 years?\",\n    \"output\": \"\\\"What is the total amount of cash that the company owes based on contractual agreements with a maturity period of more than 5 years?\\\"\"\n  },\n  {\n    \"input\": \"How much does operating leases account for total contractual cash obligations for period of less than 1 year?\",\n    \"output\": \"What is the percentage of total contractual cash obligations attributable to operating leases with a duration of less than one year?\"\n  },\n  {\n    \"input\": \"What is the specific monetary difference in the company's operating income between the years 2018 and 2019?\",\n    \"output\": \"What is the company's increase in income from operating activities between 2018 and 2019? \"\n  },\n  {\n    \"input\": \"What is the cash balance and the amount of cash equivalents held by the company as of August 31, 2019?\",\n    \"output\": \"What is the company's cash and cash equivalents as at 31 August 2019?\"\n  },\n  {\n    \"input\": \"What is the change in cash flow from investing activities from 2018 to 2019, specifically referring to any increase or decrease in the amount of cash generated or used in investing activities during this period?\",\n    \"output\": \"What is the increase in cash flow from investing activities between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the total cashflow from investing activities in both 2018 and 2019?\",\n    \"output\": \"What is the combined amount of cash inflows and outflows from investing activities in 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the total cashflow from investing activities in both 2018 and 2019?\",\n    \"output\": \"What was the net cash flow from investing activities in 2018 and 2019 combined?\"\n  },\n  {\n    \"input\": \"What is the total effect of exchange rate changes on cash and cash equivalents in both 2018 and 2019?\",\n    \"output\": \"What is the cumulative impact of fluctuations in exchange rates on the combined balances of cash and cash equivalents for the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"When was the approval by Accenture's Board of Directors granted for the implementation of an open-market share purchase program specifically designed for the acquisition of Accenture's Class A ordinary shares?\",\n    \"output\": \"When did Accenture's Board of Directors authorize and confirm an open-market share purchase program for acquiring Accenture pls Class A ordinary shares?\"\n  },\n  {\n    \"input\": \"How much of the shares purchased in July were part of publicly announced plan or programs?\",\n    \"output\": \"What proportion of the shares acquired in the month of July were specifically disclosed as part of publicly announced plan or programs?\"\n  },\n  {\n    \"input\": \"How much of the shares purchased in July were part of publicly announced plan or programs?\",\n    \"output\": \"What was the proportion of shares acquired in July that were part of publicly disclosed plans or programs?\"\n  },\n  {\n    \"input\": \"What was the primary source of the company's operational revenue in the year 2019?\",\n    \"output\": \"What is the company's main source of operating income in 2019?\"\n  },\n  {\n    \"input\": \"What was the specific amount spent on expenses directly related to the production or provision of goods and services in the year 2019?\",\n    \"output\": \"What was the cost of revenue in 2019?\"\n  },\n  {\n    \"input\": \"What was the precise amount spent on expenses directly related to generating revenue in the year 2019?\",\n    \"output\": \"What was the cost of revenue in 2019?\"\n  },\n  {\n    \"input\": \"What was the total count of shares that were approved and allocated?\",\n    \"output\": \"What was the number of granted shares?\"\n  },\n  {\n    \"input\": \"How many shares were approved and given to individuals or entities as stocks?\",\n    \"output\": \"What was the number of granted shares?\"\n  },\n  {\n    \"input\": \"What is the disparity in the weighted-average grant date fair value between shares that have been allotted and subsequently forfeited?\",\n    \"output\": \"What is the difference in weighted-average grant date fair value between granted and forfeited shares?\"\n  },\n  {\n    \"input\": \"What is the difference in the weighted-average grant date fair value between unvested shares and vested shares in 2018?\",\n    \"output\": \"What is the difference in weighted-average grant date fair value for unvested shares in 2018 and vested shares?\"\n  },\n  {\n    \"input\": \"What is the discrepancy in the weighted-average grant date fair value between unvested shares and vested shares in the year 2018?\",\n    \"output\": \"What is the difference in weighted-average grant date fair value for unvested shares in 2018 and vested shares?\"\n  },\n  {\n    \"input\": \"What was the previously reported amount of total current assets?\",\n    \"output\": \"What was the total current assets as previously reported?\"\n  },\n  {\n    \"input\": \"What is the total value of the deferred commissions, including the current portion, and the total current assets as reported in the previous financial statement?\",\n    \"output\": \"What was the sum of deferred commissions, current portion and total current assets as previously reported?\"\n  },\n  {\n    \"input\": \"What was the total amount reported for deferred commissions, the current portion of deferred commissions, and the overall total for current assets in the previous report?\",\n    \"output\": \"What was the sum of deferred commissions, current portion and total current assets as previously reported?\"\n  },\n  {\n    \"input\": \"What is the revised percentage of accrued expenses in relation to the total sum of liabilities and stockholders' equity?\",\n    \"output\": \"What was accrued expenses as revised as a percentage of total liabilities and stockholders' equity?\"\n  },\n  {\n    \"input\": \"What was the company's bad debt expense for the year ending December 31, 2018?\",\n    \"output\": \"What was the exact amount recorded as bad debt expense by the company for the entire year ending on December 31, 2018?\"\n  },\n  {\n    \"input\": \"What was the company's bad debt expense for the year ending December 31, 2018?\",\n    \"output\": \"What was the amount of bad debts that the company recorded as an expense for the fiscal year that ended on December 31, 2018?\"\n  },\n  {\n    \"input\": \"What was the total amount of interest earned as income in the year 2017?\",\n    \"output\": \"What was interest income in 2017?\"\n  },\n  {\n    \"input\": \"What was the net income of others in 2019?\",\n    \"output\": \"What was others, net in 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in Interest expense between 2017 and 2018?\",\n    \"output\": \"What was the percentage increase or decrease in Interest expense from 2017 to 2018?\"\n  },\n  {\n    \"input\": \"What is the average interest income from 2017-2019?\",\n    \"output\": \"What is the average income generated from interest for the years 2017 to 2019 inclusive?\"\n  },\n  {\n    \"input\": \"What percentage increase or decrease in interest income occurred from 2018 to 2019?\",\n    \"output\": \"What was the percentage change in interest income between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the average amount of net cash provided by operating activities for the years 2017, 2018, and 2019?\",\n    \"output\": \"What was the average net cash provided by operating activities from 2017-2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in the free flow cash margin from 2017 to 2018?\",\n    \"output\": \"What was the change in free flow cash margin between 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What was the total amount of federal net operating losses incurred by the company in the year 2019?\",\n    \"output\": \"What was the company's federal net operating losses in 2019?\"\n  },\n  {\n    \"input\": \"What was the amount of federal net operating losses incurred by the company during the year 2019 for tax purposes?\",\n    \"output\": \"What was the company's federal net operating losses in 2019?\"\n  },\n  {\n    \"input\": \"What was the company's state net operating losses in 2019?\",\n    \"output\": \"What was the total amount of net operating losses incurred by the company in the state during the year 2019?\"\n  },\n  {\n    \"input\": \"What is the percentage change in Deferred revenue between 2018 and 2019?\",\n    \"output\": \"What is the exact percentage difference in Deferred revenue from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the precise amount spent on sales and marketing in the year 2018?\",\n    \"output\": \"What was the sales and marketing expense in 2018?\"\n  },\n  {\n    \"input\": \"What was the specific fair value amount of cash that is being referred to in the context of a certain situation or event?\",\n    \"output\": \"What was the fair value amount of cash?\"\n  },\n  {\n    \"input\": \"What was the specific fair value figure assigned to the amount of cash? Please provide the determined fair value amount for the cash in question.\",\n    \"output\": \"What was the fair value amount of cash?\"\n  },\n  {\n    \"input\": \"What percentage of fair value total cash equivalents consist of agency bonds?\",\n    \"output\": \"What is the proportion of agency bonds, in terms of percentage, in the total cash equivalents that make up the fair value?\"\n  },\n  {\n    \"input\": \"What updates and advancements were made in computer equipment and software from 2018 to 2019?\",\n    \"output\": \"What is the change in computer equipment and software between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What are the differences in computer equipment and software between the years 2018 and 2019?\",\n    \"output\": \"What is the change in computer equipment and software between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in revenue from research and development from 2017 to 2018?\",\n    \"output\": \"What was the change in percentage of revenue of research and development between 2017 and 2018?\"\n  },\n  {\n    \"input\": \"What are the main promises or obligations that the company is committed to?\",\n    \"output\": \"What are the company's principal commitments?\"\n  },\n  {\n    \"input\": \"What are the main commitments of the company?\",\n    \"output\": \"What are the company's principal commitments?\"\n  },\n  {\n    \"input\": \"What is the sum of total operating lease obligations and Long-term debt obligations including interest?\",\n    \"output\": \"What is the combined amount of operating lease obligations and long-term debt obligations, including accrued interest?\"\n  },\n  {\n    \"input\": \"What portion of the total obligations is attributable to payments that are due within a time frame of 1-3 years?\",\n    \"output\": \"What is the percentage of the total obligations that consists of payments due in 1-3 years?\"\n  },\n  {\n    \"input\": \"What is the specific percentage of the overall obligations that are categorized as payments to be made within a period of 1-3 years?\",\n    \"output\": \"What is the percentage of the total obligations that consists of payments due in 1-3 years?\"\n  },\n  {\n    \"input\": \"\\\"What are the regulations or guidelines regarding the payment of Foreign income tax applicable for the year 2019?\\\"\",\n    \"output\": \"What is the provision for Foreign income tax in 2019?\"\n  },\n  {\n    \"input\": \"\\\"What are the regulations or rules implemented in 2019 regarding the taxation of foreign income?\\\"\",\n    \"output\": \"What is the provision for Foreign income tax in 2019?\"\n  },\n  {\n    \"input\": \"What is the disparity in the weighted-average grant date fair value when comparing shares that have been granted to those that have been forfeited?\",\n    \"output\": \"What is the difference in weighted-average grant date fair value between granted and forfeited shares?\"\n  },\n  {\n    \"input\": \"What is the disparity in weighted-average grant date fair value when comparing shares that have been granted with those that have been forfeited?\",\n    \"output\": \"What is the difference in weighted-average grant date fair value between granted and forfeited shares?\"\n  },\n  {\n    \"input\": \"What is the  Net sales for 2019?\",\n    \"output\": \"What was the total global revenue generated from sales in the year 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in diluted net income per share from continuing operations from 2017 to 2018?\",\n    \"output\": \"What was the change in Diluted Net income per share from continuing operations in 2018 from 2017?\"\n  },\n  {\n    \"input\": \"What was the percentage increase or decrease in the Diluted Net income per share from continuing operations between 2018 and 2017?\",\n    \"output\": \"What was the percentage change in Diluted Net income per share from continuing operations in 2018 from 2017?\"\n  },\n  {\n    \"input\": \"What does the Euro Term Loan due 2024 consist of?\",\n    \"output\": \"What is included in the Euro Term Loan due in 2024?\"\n  },\n  {\n    \"input\": \"What does the Euro Term Loan due 2024 consist of?\",\n    \"output\": \"\\\"What are the components and details of the Euro Term Loan due in 2024?\\\"\"\n  },\n  {\n    \"input\": \"In which year did the value of the 1.0% State of Connecticut term loan, which has a maturity date in 2023, surpass its previous size?\",\n    \"output\": \"In which year was the 1.0% State of Connecticut term loan due 2023 larger?\"\n  },\n  {\n    \"input\": \"In which year did the size of the 1.0% State of Connecticut term loan due in 2023 increase?\",\n    \"output\": \"In which year was the 1.0% State of Connecticut term loan due 2023 larger?\"\n  },\n  {\n    \"input\": \"What was the change in Capital lease obligations from 2018 to 2019?\",\n    \"output\": \"What was the exact difference in the amount of Capital lease obligations between the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"In which year was ILS a higher percentage of total net sales?\",\n    \"output\": \"In which specific year did the Incremental Lifetime Sales (ILS) constitute a larger proportion of the overall net sales?\"\n  },\n  {\n    \"input\": \"What were the net sales figures for ILS in both 2018 and 2019, and what was the average amount for each year?\",\n    \"output\": \"What was the average amount of net sales for ILS in 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What were the average net sales for ILS for the years 2018 and 2019?\",\n    \"output\": \"What was the average amount of net sales for ILS in 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the estimated duration of a building and any associated enhancements before they are deemed no longer useful?\",\n    \"output\": \"What is the useful life of Buildings and improvements?\"\n  },\n  {\n    \"input\": \"How is property and equipment stated in the table?\",\n    \"output\": \"In what manner is property and equipment presented and described within the table?\"\n  },\n  {\n    \"input\": \"In which year was the amount of Land larger?\",\n    \"output\": \"\\\"When comparing the years, which year had a larger amount of land?\\\"\"\n  },\n  {\n    \"input\": \"What was the change in Leasehold improvements from 2018 to 2019?\",\n    \"output\": \"What was the difference in Leasehold improvements value between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in Short-term investments in 2019 from 2018?\",\n    \"output\": \"What was the percentage difference in the value of Short-term investments between 2019 and 2018?\"\n  },\n  {\n    \"input\": \"What was the percentage change in Short-term investments in 2019 from 2018?\",\n    \"output\": \"What was the exact percentage difference in the value of Short-term investments between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the net income for the year 2019?\",\n    \"output\": \"What was Net income in 2019?\"\n  },\n  {\n    \"input\": \"\\\"In which specific year did the company report the highest amount of comprehensive income?\\\"\",\n    \"output\": \"In which year was Comprehensive income largest?\"\n  },\n  {\n    \"input\": \"What is the largest year for Comprehensive income?\",\n    \"output\": \"In which year was Comprehensive income largest?\"\n  },\n  {\n    \"input\": \"What was the difference in the amount of money earned in 2018 compared to 2017 after all expenses and taxes were accounted for?\",\n    \"output\": \"What was the change in Net income in 2018 from 2017?\"\n  },\n  {\n    \"input\": \"What was the Projected Benefit Obligation (PBO) amount for the year 2019? Please provide the numerical value for the PBO as of December 31st, 2019.\",\n    \"output\": \"What was the Projected benefit obligation in 2019?\"\n  },\n  {\n    \"input\": \"In which years was information on defined benefit plans provided?\",\n    \"output\": \"During which specific years was information regarding defined benefit plans provided?\"\n  },\n  {\n    \"input\": \"In which year was the amount of Research and development largest?\",\n    \"output\": \"In which specific year did Research and development reach its highest amount or expenditure?\"\n  },\n  {\n    \"input\": \"What was the specific amount allocated for income taxes in the year 2019?\",\n    \"output\": \"What was the Provision for income taxes in 2019?\"\n  },\n  {\n    \"input\": \"What specific amount was set aside for income taxes in the fiscal year of 2019?\",\n    \"output\": \"What was the Provision for income taxes in 2019?\"\n  },\n  {\n    \"input\": \"What has been the modification in the amount of state income taxes that individuals are required to pay in 2019 compared to 2018?\",\n    \"output\": \"What was the change in state income taxes that is currently payable in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"How much Microelectronics was present in the year 2019?\",\n    \"output\": \"What is the amount of Microelectronics in 2019?\"\n  },\n  {\n    \"input\": \"What is the total volume or quantity of Microelectronics produced or used worldwide in the year 2019?\",\n    \"output\": \"What is the amount of Microelectronics in 2019?\"\n  },\n  {\n    \"input\": \"In which years is net sales calculated?\",\n    \"output\": \"In which specific years does the calculation of net sales occur?\"\n  },\n  {\n    \"input\": \"In which years is net sales calculated?\",\n    \"output\": \"During which specific years is the calculation of net sales undertaken?\"\n  },\n  {\n    \"input\": \"What was the percentage difference in the quantity of OEM components and instrumentation between 2018 and 2019?\",\n    \"output\": \"What was the percentage change in the amount of OEM components and instrumentation in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"What was the Net cash provided by operating activities in 2019?\",\n    \"output\": \"What was the amount of net cash generated from operating activities during the year 2019?\"\n  },\n  {\n    \"input\": \"What was the  Purchases of property and equipment  in 2018?\",\n    \"output\": \"What was the total value of purchases made for property and equipment in the year 2018?\"\n  },\n  {\n    \"input\": \"In which specific years are the instances or applications of physical currency emphasized or showcased in the table?\",\n    \"output\": \"In which years are the uses of cash highlighted in the table?\"\n  },\n  {\n    \"input\": \"What years are specified in the table to illustrate the purpose of utilizing cash?\",\n    \"output\": \"In which years are the uses of cash highlighted in the table?\"\n  },\n  {\n    \"input\": \"In which year was the Issuance of shares under employee stock plans larger?\",\n    \"output\": \"When comparing the issuance of shares under employee stock plans, in which specific year was the volume of shares issued found to be greater?\"\n  },\n  {\n    \"input\": \"In which year were the Additions charged to expenses the largest?\",\n    \"output\": \"What is the specific year when the largest Additions were charged to expenses?\"\n  },\n  {\n    \"input\": \"In which year were the Additions charged to expenses the largest?\",\n    \"output\": \"What was the year when the largest amount of Additions were charged to expenses?\"\n  },\n  {\n    \"input\": \"In which year did tax fees have a higher value or amount than in other years?\",\n    \"output\": \"In which year were tax fees larger?\"\n  },\n  {\n    \"input\": \"What was the difference in the amount of all other fees charged in 2019 compared to 2018?\",\n    \"output\": \"What was the change in All other fees in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"What was Long-term taxes payable in 2019?\",\n    \"output\": \"What was the amount of Long-term taxes payable in the year 2019?\"\n  },\n  {\n    \"input\": \"In which year was Other long-term liabilities larger?\",\n    \"output\": \"\\\"In which specific year did Other long-term liabilities exhibit a greater amount compared to the other years?\\\"\"\n  },\n  {\n    \"input\": \"In 2019, what was the exact amount of deferred compensation liability?\",\n    \"output\": \"What was the Total deferred compensation liability in 2019?\"\n  },\n  {\n    \"input\": \"For which years was the calculation of Total deferred compensation liability performed?\",\n    \"output\": \"In which years was Total deferred compensation liability calculated?\"\n  },\n  {\n    \"input\": \"How was the calculation for diluted earnings per share derived?\",\n    \"output\": \"How was diluted earnings per share computed?\"\n  },\n  {\n    \"input\": \"In which specific years can we find the earnings per share data that was included in the table?\",\n    \"output\": \"In which years was earnings per share provided in the table?\"\n  },\n  {\n    \"input\": \"What year experienced the highest dilutive impact from employee stock awards?\",\n    \"output\": \"In which year was the Dilutive effect of employee stock awards largest?\"\n  },\n  {\n    \"input\": \"In what specific year did employee stock awards have the most significant dilutive effect?\",\n    \"output\": \"In which year was the Dilutive effect of employee stock awards largest?\"\n  },\n  {\n    \"input\": \"What was the change in Dilutive effect of employee stock awards in 2019 from 2018?\",\n    \"output\": \"What was the difference in the dilutive impact caused by employee stock awards between 2019 and 2018?\"\n  },\n  {\n    \"input\": \"What was the change in Dilutive effect of employee stock awards in 2019 from 2018?\",\n    \"output\": \"What was the difference in how much employee stock awards had a dilutive effect in 2019 compared to 2018?\"\n  },\n  {\n    \"input\": \"What was the percentage change in Dilutive effect of employee stock awards in 2019 from 2018?\",\n    \"output\": \"What was the percentage change in the dilutive effect of employee stock awards from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"In which specific year did the Amortization of intangible assets account for a larger proportion of the overall net sales?\",\n    \"output\": \"In which year was Amortization of intangible assets a higher percentage of total net sales?\"\n  },\n  {\n    \"input\": \"What was the change in the amount of Amortization of intangible assets in 2019 from 2018?\",\n    \"output\": \"What was the difference in the amount of Amortization of intangible assets between the fiscal years 2019 and 2018?\"\n  },\n  {\n    \"input\": \"What was the value of Net deferred tax assets in the financial year of 2019?\",\n    \"output\": \"What was the Net deferred tax assets in 2019?\"\n  },\n  {\n    \"input\": \"What was the difference in the initial balance for 2019 compared to 2018?\",\n    \"output\": \"What was the change in Beginning balance in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"What was the difference in the starting balance in 2019 compared to 2018?\",\n    \"output\": \"What was the change in Beginning balance in 2019 from 2018?\"\n  },\n  {\n    \"input\": \"What was the balance in the accounts as of the start of the year in 2018?\",\n    \"output\": \"What was the  Balance as of the beginning of the year  in 2018?\"\n  },\n  {\n    \"input\": \"\\\"What was the exact monetary balance in the accounts as of January 1st, 2018, at the start of the year?\\\"\",\n    \"output\": \"What was the  Balance as of the beginning of the year  in 2018?\"\n  },\n  {\n    \"input\": \"What was the percentage change in Balance as of the beginning of the year in 2019 from 2018?\",\n    \"output\": \"What is the percentage change in the Balance as of the start of 2019 compared to the Balance at the start of 2018?\"\n  },\n  {\n    \"input\": \"What was the percentage change in Prepaid and refundable income taxes from 2018 to 2019?\",\n    \"output\": \"What was the percentage difference in the amount of income taxes that were prepaid and refundable between the years 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage change in Prepaid and refundable income taxes from 2018 to 2019?\",\n    \"output\": \"What was the percentage difference in the amount of prepaid and refundable income taxes between 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What does the Current portion of Euro Term Loan consist of?\",\n    \"output\": \"What is included in the current portion of the Euro Term Loan?\"\n  },\n  {\n    \"input\": \"What was the net change in the amount of outstanding Capital lease obligations between the years 2018 and 2019?\",\n    \"output\": \"What was the change in Capital lease obligations from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage increase or decrease in Capital lease obligations from 2018 to 2019?\",\n    \"output\": \"What was the percentage change in Capital lease obligations from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What was the percentage increase or decrease in Capital lease obligations between 2018 and 2019?\",\n    \"output\": \"What was the percentage change in Capital lease obligations from 2018 to 2019?\"\n  },\n  {\n    \"input\": \"What are the respective values of the company's work in process inventories on March 31, 2018 and 2019?\",\n    \"output\": \"What were the specific values of the company's work in process inventories as of March 31 in both 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What are the respective values of the company's work in process inventories on March 31, 2018 and 2019?\",\n    \"output\": \"What is the dollar amount of the work in process inventories for the company on March 31 of both 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What was the value of the company's finished goods on March 31, 2018 and March 31, 2019?\",\n    \"output\": \"What are the respective values of the company's finished goods on March 31, 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the change in the company's raw materials on March 31, 2018 and 2019?\",\n    \"output\": \"What is the difference in the amount of raw materials held by the company as of March 31, 2018 and March 31, 2019?\"\n  },\n  {\n    \"input\": \"What was the mean value of the company's raw materials on March 31 in both 2018 and 2019?\",\n    \"output\": \"What is the average value of the company's raw materials on March 31, 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the average worth of the company's raw materials specifically on March 31st, both in the years 2018 and 2019?\",\n    \"output\": \"What is the average value of the company's raw materials on March 31, 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the mean amount of the company's completed products on March 31, in both the years 2018 and 2019?\",\n    \"output\": \"What is the average value of the company's finished goods on March 31, 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the average value of the finished goods held by the company on March 31 in both 2018 and 2019?\",\n    \"output\": \"What is the average value of the company's finished goods on March 31, 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What are the company's respective foreign income before taxes in 2018 and 2019?\",\n    \"output\": \"What were the pre-tax foreign incomes of the company in 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the company's average loss before income taxes from the United States in 2018 and 2019?\",\n    \"output\": \"\\\"What was the average pre-tax loss incurred by the company in the United States for the years 2018 and 2019?\\\"\"\n  },\n  {\n    \"input\": \"What is the company's average loss before income taxes from the United States in 2018 and 2019?\",\n    \"output\": \"What was the average pre-tax income loss for the company in the United States for both 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What is the company's average foreign income before taxes in 2018 and 2019?\",\n    \"output\": \"What was the average amount of foreign income before taxes for the company in both 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What are the average pre-tax earnings of the company for the fiscal years 2018 and 2019?\",\n    \"output\": \"What is the company's average total income before income taxes in 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What are the specific cost of goods sold for the company in both 2018 and 2019?\",\n    \"output\": \"What are the company's respective cost of goods sold in 2018 and 2019?\"\n  },\n  {\n    \"input\": \"What were the cost of goods sold for the company in 2018 and 2019?\",\n    \"output\": \"What are the company's respective cost of goods sold in 2018 and 2019?\"\n  }\n]\n"
  },
  {
    "path": "libs/ktem/ktem/reasoning/prompt_optimization/rewrite_question.py",
    "content": "from ktem.llms.manager import llms\n\nfrom kotaemon.base import BaseComponent, Document, HumanMessage, Node, SystemMessage\nfrom kotaemon.llms import ChatLLM, PromptTemplate\n\nDEFAULT_REWRITE_PROMPT = (\n    \"Given the following question, rephrase and expand it \"\n    \"to help you do better answering. Maintain all information \"\n    \"in the original question. Keep the question as concise as possible. \"\n    \"Only output the rephrased question without additional information. \"\n    \"Give answer in {lang}\\n\"\n    \"Original question: {question}\\n\"\n    \"Rephrased question: \"\n)\n\n\nclass RewriteQuestionPipeline(BaseComponent):\n    \"\"\"Rewrite user question\n\n    Args:\n        llm: the language model to rewrite question\n        rewrite_template: the prompt template for llm to paraphrase a text input\n        lang: the language of the answer. Currently support English and Japanese\n    \"\"\"\n\n    llm: ChatLLM = Node(default_callback=lambda _: llms.get_default())\n    rewrite_template: str = DEFAULT_REWRITE_PROMPT\n\n    lang: str = \"English\"\n\n    def run(self, question: str) -> Document:  # type: ignore\n        prompt_template = PromptTemplate(self.rewrite_template)\n        prompt = prompt_template.populate(question=question, lang=self.lang)\n        messages = [\n            SystemMessage(content=\"You are a helpful assistant\"),\n            HumanMessage(content=prompt),\n        ]\n        return self.llm(messages)\n"
  },
  {
    "path": "libs/ktem/ktem/reasoning/prompt_optimization/suggest_conversation_name.py",
    "content": "import logging\n\nfrom ktem.llms.manager import llms\n\nfrom kotaemon.base import AIMessage, BaseComponent, Document, HumanMessage, Node\nfrom kotaemon.llms import ChatLLM, PromptTemplate\n\nlogger = logging.getLogger(__name__)\n\n\nclass SuggestConvNamePipeline(BaseComponent):\n    \"\"\"Suggest a good conversation name based on the chat history.\"\"\"\n\n    llm: ChatLLM = Node(default_callback=lambda _: llms.get_default())\n    SUGGEST_NAME_PROMPT_TEMPLATE = (\n        \"You are an expert at suggesting good and memorable conversation name. \"\n        \"Based on the chat history above, \"\n        \"suggest a good conversation name (max 10 words). \"\n        \"Give answer in {lang}. Just output the conversation \"\n        \"name without any extra.\"\n    )\n    prompt_template: str = SUGGEST_NAME_PROMPT_TEMPLATE\n    lang: str = \"English\"\n\n    def run(self, chat_history: list[tuple[str, str]]) -> Document:  # type: ignore\n        prompt_template = PromptTemplate(self.prompt_template)\n        prompt = prompt_template.populate(lang=self.lang)\n\n        messages = []\n        for human, ai in chat_history:\n            messages.append(HumanMessage(content=human))\n            messages.append(AIMessage(content=ai))\n\n        messages.append(HumanMessage(content=prompt))\n\n        return self.llm(messages)\n"
  },
  {
    "path": "libs/ktem/ktem/reasoning/prompt_optimization/suggest_followup_chat.py",
    "content": "import logging\n\nfrom ktem.llms.manager import llms\n\nfrom kotaemon.base import AIMessage, BaseComponent, Document, HumanMessage, Node\nfrom kotaemon.llms import ChatLLM, PromptTemplate\n\nlogger = logging.getLogger(__name__)\n\n\nclass SuggestFollowupQuesPipeline(BaseComponent):\n    \"\"\"Suggest a list of follow-up questions based on the chat history.\"\"\"\n\n    llm: ChatLLM = Node(default_callback=lambda _: llms.get_default())\n    SUGGEST_QUESTIONS_PROMPT_TEMPLATE = (\n        \"Based on the chat history above. \"\n        \"your task is to generate 3 to 5 relevant follow-up questions. \"\n        \"These questions should be simple, very concise, \"\n        \"and designed to guide the conversation further. \"\n        \"Respond in JSON format with 'questions' key. \"\n        \"Answer using the language {lang} same as the question. \"\n    )\n    prompt_template: str = SUGGEST_QUESTIONS_PROMPT_TEMPLATE\n    extra_prompt: str = \"\"\"Example of valid response:\n```json\n{\n    \"questions\": [\"the weather is good\", \"what's your favorite city\"]\n}\n```\"\"\"\n    lang: str = \"English\"\n\n    def run(self, chat_history: list[tuple[str, str]]) -> Document:\n        prompt_template = PromptTemplate(self.prompt_template)\n        prompt = prompt_template.populate(lang=self.lang) + self.extra_prompt\n\n        messages = []\n        for human, ai in chat_history[-3:]:\n            messages.append(HumanMessage(content=human))\n            messages.append(AIMessage(content=ai))\n\n        messages.append(HumanMessage(content=prompt))\n\n        return self.llm(messages)\n"
  },
  {
    "path": "libs/ktem/ktem/reasoning/react.py",
    "content": "import html\nimport logging\nfrom typing import AnyStr, Optional, Type\n\nfrom ktem.llms.manager import llms\nfrom ktem.mcp.manager import mcp_manager\nfrom ktem.reasoning.base import BaseReasoning\nfrom ktem.utils.generator import Generator\nfrom ktem.utils.render import Render\nfrom langchain.text_splitter import CharacterTextSplitter\nfrom pydantic import BaseModel, Field\n\nfrom kotaemon.agents import (\n    BaseTool,\n    GoogleSearchTool,\n    LLMTool,\n    ReactAgent,\n    WikipediaTool,\n)\nfrom kotaemon.agents.tools.mcp import create_tools_from_config\nfrom kotaemon.base import BaseComponent, Document, HumanMessage, Node, SystemMessage\nfrom kotaemon.llms import ChatLLM, PromptTemplate\n\nfrom ..utils import SUPPORTED_LANGUAGE_MAP\n\nlogger = logging.getLogger(__name__)\nDEFAULT_AGENT_STEPS = 4\n\n\nclass DocSearchArgs(BaseModel):\n    query: str = Field(..., description=\"a search query as input to the doc search\")\n\n\nclass DocSearchTool(BaseTool):\n    name: str = \"docsearch\"\n    description: str = (\n        \"A storage that contains internal documents. If you lack any specific \"\n        \"private information to answer the question, you can search in this \"\n        \"document storage. Furthermore, if you are unsure about which document that \"\n        \"the user refers to, likely the user already selects the target document in \"\n        \"this document storage, you just need to do normal search. If possible, \"\n        \"formulate the search query as specific as possible.\"\n    )\n    args_schema: Optional[Type[BaseModel]] = DocSearchArgs\n    retrievers: list[BaseComponent] = []\n\n    def _run_tool(self, query: AnyStr) -> AnyStr:\n        docs = []\n        doc_ids = []\n        for retriever in self.retrievers:\n            for doc in retriever(text=query):\n                if doc.doc_id not in doc_ids:\n                    docs.append(doc)\n                    doc_ids.append(doc.doc_id)\n\n        return self.prepare_evidence(docs)\n\n    def prepare_evidence(self, docs, trim_len: int = 4000):\n        evidence = \"\"\n        table_found = 0\n\n        for _id, retrieved_item in enumerate(docs):\n            retrieved_content = \"\"\n            page = retrieved_item.metadata.get(\"page_label\", None)\n            source = filename = retrieved_item.metadata.get(\"file_name\", \"-\")\n            if page:\n                source += f\" (Page {page})\"\n            if retrieved_item.metadata.get(\"type\", \"\") == \"table\":\n                if table_found < 5:\n                    retrieved_content = retrieved_item.metadata.get(\"table_origin\", \"\")\n                    if retrieved_content not in evidence:\n                        table_found += 1\n                        evidence += (\n                            f\"<br><b>Table from {source}</b>\\n\"\n                            + retrieved_content\n                            + \"\\n<br>\"\n                        )\n            elif retrieved_item.metadata.get(\"type\", \"\") == \"chatbot\":\n                retrieved_content = retrieved_item.metadata[\"window\"]\n                evidence += (\n                    f\"<br><b>Chatbot scenario from {filename} (Row {page})</b>\\n\"\n                    + retrieved_content\n                    + \"\\n<br>\"\n                )\n            elif retrieved_item.metadata.get(\"type\", \"\") == \"image\":\n                retrieved_content = retrieved_item.metadata.get(\"image_origin\", \"\")\n                retrieved_caption = html.escape(retrieved_item.get_content())\n                evidence += (\n                    f\"<br><b>Figure from {source}</b>\\n\" + retrieved_caption + \"\\n<br>\"\n                )\n            else:\n                if \"window\" in retrieved_item.metadata:\n                    retrieved_content = retrieved_item.metadata[\"window\"]\n                else:\n                    retrieved_content = retrieved_item.text\n                retrieved_content = retrieved_content.replace(\"\\n\", \" \")\n                if retrieved_content not in evidence:\n                    evidence += (\n                        f\"<br><b>Content from {source}: </b> \"\n                        + retrieved_content\n                        + \" \\n<br>\"\n                    )\n\n            print(\"Retrieved #{}: {}\".format(_id, retrieved_content[:100]))\n            print(\"Score\", retrieved_item.metadata.get(\"reranking_score\", None))\n\n        # trim context by trim_len\n        if evidence:\n            text_splitter = CharacterTextSplitter.from_tiktoken_encoder(\n                chunk_size=trim_len,\n                chunk_overlap=0,\n                separator=\" \",\n                model_name=\"gpt-3.5-turbo\",\n            )\n            texts = text_splitter.split_text(evidence)\n            evidence = texts[0]\n\n        return Document(content=evidence)\n\n\nTOOL_REGISTRY = {\n    \"Google\": GoogleSearchTool(),\n    \"Wikipedia\": WikipediaTool(),\n    \"LLM\": LLMTool(),\n    \"SearchDoc\": DocSearchTool(),\n}\n\nDEFAULT_QA_PROMPT = (\n    \"Answer the following questions as best you can. Give answer in {lang}. \"\n    \"You have access to the following tools:\\n\"\n    \"{tool_description}\\n\"\n    \"Use the following format:\\n\\n\"\n    \"Question: the input question you must answer\\n\"\n    \"Thought: you should always think about what to do\\n\\n\"\n    \"Action: the action to take, should be one of [{tool_names}]\\n\\n\"\n    \"Action Input: the input to the action, should be different from the action input \"\n    \"of the same action in previous steps.\\n\\n\"\n    \"Observation: the result of the action\\n\\n\"\n    \"... (this Thought/Action/Action Input/Observation can repeat N times)\\n\"\n    \"#Thought: I now know the final answer\\n\"\n    \"Final Answer: the final answer to the original input question\\n\\n\"\n    \"Begin! After each Action Input.\\n\\n\"\n    \"Question: {instruction}\\n\"\n    \"Thought: {agent_scratchpad}\\n\"\n)\n\nDEFAULT_REWRITE_PROMPT = (\n    \"Given the following question, rephrase and expand it \"\n    \"to help you do better answering. Maintain all information \"\n    \"in the original question. Keep the question as concise as possible. \"\n    \"Give answer in {lang}\\n\"\n    \"Original question: {question}\\n\"\n    \"Rephrased question: \"\n)\n\n\nclass RewriteQuestionPipeline(BaseComponent):\n    \"\"\"Rewrite user question\n\n    Args:\n        llm: the language model to rewrite question\n        rewrite_template: the prompt template for llm to paraphrase a text input\n        lang: the language of the answer. Currently support English and Japanese\n    \"\"\"\n\n    llm: ChatLLM = Node(default_callback=lambda _: llms.get_default())\n    rewrite_template: str = DEFAULT_REWRITE_PROMPT\n\n    lang: str = \"English\"\n\n    def run(self, question: str) -> Document:  # type: ignore\n        prompt_template = PromptTemplate(self.rewrite_template)\n        prompt = prompt_template.populate(question=question, lang=self.lang)\n        messages = [\n            SystemMessage(content=\"You are a helpful assistant\"),\n            HumanMessage(content=prompt),\n        ]\n        return self.llm(messages)\n\n\nclass ReactAgentPipeline(BaseReasoning):\n    \"\"\"Question answering pipeline using ReAct agent.\"\"\"\n\n    class Config:\n        allow_extra = True\n\n    retrievers: list[BaseComponent]\n    agent: ReactAgent = ReactAgent.withx()\n    rewrite_pipeline: RewriteQuestionPipeline = RewriteQuestionPipeline.withx()\n    use_rewrite: bool = False\n\n    def prepare_citation(self, step_id, step, output, status) -> Document:\n        header = \"<b>Step {id}</b>: {log}\".format(id=step_id, log=step.log)\n        content = (\n            \"<b>Action</b>: <em>{tool}[{input}]</em>\\n\\n<b>Output</b>: {output}\"\n        ).format(\n            tool=step.tool if status == \"thinking\" else \"\",\n            input=step.tool_input.replace(\"\\n\", \"\").replace('\"', \"\")\n            if status == \"thinking\"\n            else \"\",\n            output=output if status == \"thinking\" else \"Finished\",\n        )\n        return Document(\n            channel=\"info\",\n            content=Render.collapsible(\n                header=header,\n                content=Render.table(content),\n                open=True,\n            ),\n        )\n\n    async def ainvoke(  # type: ignore\n        self, message, conv_id: str, history: list, **kwargs  # type: ignore\n    ) -> Document:\n        if self.use_rewrite:\n            rewrite = await self.rewrite_pipeline(question=message)\n            message = rewrite.text\n\n        answer = self.agent(message)\n        self.report_output(Document(content=answer.text, channel=\"chat\"))\n\n        intermediate_steps = answer.intermediate_steps\n        for _, step_output in intermediate_steps:\n            self.report_output(Document(content=step_output, channel=\"info\"))\n\n        self.report_output(None)\n        return answer\n\n    def stream(self, message, conv_id: str, history: list, **kwargs):\n        if self.use_rewrite:\n            rewrite = self.rewrite_pipeline(question=message)\n            message = rewrite.text\n            yield Document(\n                channel=\"info\",\n                content=f\"Rewrote the message to: {rewrite.text}\",\n            )\n\n        output_stream = Generator(self.agent.stream(message))\n        idx = 0\n        for item in output_stream:\n            idx += 1\n            if item.status == \"thinking\":\n                step, step_output = item.intermediate_steps\n                yield Document(\n                    channel=\"info\",\n                    content=self.prepare_citation(idx, step, step_output, item.status),\n                )\n            else:\n                yield Document(\n                    channel=\"chat\",\n                    content=item.text,\n                )\n                step, step_output = item.intermediate_steps\n                yield Document(\n                    channel=\"info\",\n                    content=self.prepare_citation(idx, step, step_output, item.status),\n                )\n\n        return output_stream.value\n\n    @classmethod\n    def get_pipeline(\n        cls, settings: dict, states: dict, retrievers: list | None = None\n    ) -> BaseReasoning:\n        _id = cls.get_info()[\"id\"]\n        prefix = f\"reasoning.options.{_id}\"\n\n        llm_name = settings[f\"{prefix}.llm\"]\n        llm = llms.get(llm_name, llms.get_default())\n\n        max_context_length_setting = settings.get(\"reasoning.max_context_length\", None)\n\n        pipeline = ReactAgentPipeline(retrievers=retrievers)\n        pipeline.agent.llm = llm\n        pipeline.agent.max_iterations = settings[f\"{prefix}.max_iterations\"]\n\n        if max_context_length_setting:\n            pipeline.agent.max_context_length = (\n                max_context_length_setting // DEFAULT_AGENT_STEPS\n            )\n\n        tools = []\n        for tool_name in settings[f\"reasoning.options.{_id}.tools\"]:\n            if tool_name.startswith(\"[MCP] \"):\n                server_name = tool_name[len(\"[MCP] \") :]\n                entry = mcp_manager.get(server_name)\n                if entry:\n                    config = entry[\"config\"]\n                    enabled_tools = config.pop(\"enabled_tools\", None)\n                    mcp_tools = create_tools_from_config(config, enabled_tools)\n                    tools.extend(mcp_tools)\n            else:\n                tool = TOOL_REGISTRY[tool_name]\n                if tool_name == \"SearchDoc\":\n                    tool.retrievers = retrievers\n                elif tool_name == \"LLM\":\n                    tool.llm = llm\n                tools.append(tool)\n        pipeline.agent.plugins = tools\n        pipeline.agent.output_lang = SUPPORTED_LANGUAGE_MAP.get(\n            settings[\"reasoning.lang\"], \"English\"\n        )\n        pipeline.use_rewrite = states.get(\"app\", {}).get(\"regen\", False)\n        pipeline.agent.prompt_template = PromptTemplate(settings[f\"{prefix}.qa_prompt\"])\n\n        return pipeline\n\n    @classmethod\n    def get_user_settings(cls) -> dict:\n        llm = \"\"\n        llm_choices = [(\"(default)\", \"\")]\n        try:\n            llm_choices += [(_, _) for _ in llms.options().keys()]\n        except Exception as e:\n            logger.exception(f\"Failed to get LLM options: {e}\")\n\n        tool_choices = [\"Wikipedia\", \"Google\", \"LLM\", \"SearchDoc\"]\n        try:\n            tool_choices += mcp_manager.get_enabled_tools()\n        except Exception as e:\n            logger.exception(f\"Failed to get MCP tool options: {e}\")\n\n        return {\n            \"llm\": {\n                \"name\": \"Language model\",\n                \"value\": llm,\n                \"component\": \"dropdown\",\n                \"choices\": llm_choices,\n                \"special_type\": \"llm\",\n                \"info\": (\n                    \"The language model to use for generating the answer. If None, \"\n                    \"the application default language model will be used.\"\n                ),\n            },\n            \"tools\": {\n                \"name\": \"Tools for knowledge retrieval\",\n                \"value\": [\"SearchDoc\", \"LLM\"],\n                \"component\": \"checkboxgroup\",\n                \"choices\": tool_choices,\n            },\n            \"max_iterations\": {\n                \"name\": \"Maximum number of iterations the LLM can go through\",\n                \"value\": 5,\n                \"component\": \"number\",\n            },\n            \"qa_prompt\": {\n                \"name\": \"QA Prompt\",\n                \"value\": DEFAULT_QA_PROMPT,\n            },\n        }\n\n    @classmethod\n    def get_info(cls) -> dict:\n        return {\n            \"id\": \"ReAct\",\n            \"name\": \"ReAct Agent\",\n            \"description\": (\n                \"Implementing ReAct paradigm: https://arxiv.org/abs/2210.03629. \"\n                \"ReAct agent answers the user's request by iteratively formulating \"\n                \"plan and executing it. The agent can use multiple tools to gather \"\n                \"information and generate the final answer.\"\n            ),\n        }\n"
  },
  {
    "path": "libs/ktem/ktem/reasoning/rewoo.py",
    "content": "import html\nimport logging\nfrom difflib import SequenceMatcher\nfrom typing import AnyStr, Generator, Optional, Type\n\nfrom ktem.llms.manager import llms\nfrom ktem.mcp.manager import mcp_manager\nfrom ktem.reasoning.base import BaseReasoning\nfrom ktem.utils.generator import Generator as GeneratorWrapper\nfrom ktem.utils.render import Render\nfrom langchain.text_splitter import CharacterTextSplitter\nfrom pydantic import BaseModel, Field\n\nfrom kotaemon.agents import (\n    BaseTool,\n    GoogleSearchTool,\n    LLMTool,\n    RewooAgent,\n    WikipediaTool,\n)\nfrom kotaemon.agents.tools.mcp import create_tools_from_config\nfrom kotaemon.base import BaseComponent, Document, HumanMessage, Node, SystemMessage\nfrom kotaemon.llms import ChatLLM, PromptTemplate\n\nfrom ..utils import SUPPORTED_LANGUAGE_MAP\n\nlogger = logging.getLogger(__name__)\nDEFAULT_AGENT_STEPS = 4\n\n\nDEFAULT_PLANNER_PROMPT = (\n    \"You are an AI agent who makes step-by-step plans to solve a problem under the \"\n    \"help of external tools. For each step, make one plan followed by one tool-call, \"\n    \"which will be executed later to retrieve evidence for that step.\\n\"\n    \"You should store each evidence into a distinct variable #E1, #E2, #E3 ... that \"\n    \"can be referred to in later tool-call inputs.\\n\\n\"\n    \"##Available Tools##\\n\"\n    \"{tool_description}\\n\\n\"\n    \"##Output Format (Replace '<...>')##\\n\"\n    \"#Plan1: <describe your plan here>\\n\"\n    \"#E1: <toolname>[<input here>] (eg. Search[What is Python])\\n\"\n    \"#Plan2: <describe next plan>\\n\"\n    \"#E2: <toolname>[<input here, you can use #E1 to represent its expected output>]\\n\"\n    \"And so on...\\n\\n\"\n    \"##Your Task##\\n\"\n    \"{task}\\n\\n\"\n    \"##Now Begin##\\n\"\n)\n\nDEFAULT_SOLVER_PROMPT = (\n    \"You are an AI agent who solves a problem with my assistance. I will provide \"\n    \"step-by-step plans(#Plan) and evidences(#E) that could be helpful.\\n\"\n    \"Your task is to briefly summarize each step, then make a short final conclusion \"\n    \"for your task. Give answer in {lang}.\\n\\n\"\n    \"##My Plans and Evidences##\\n\"\n    \"{plan_evidence}\\n\\n\"\n    \"##Example Output##\\n\"\n    \"First, I <did something> , and I think <...>; Second, I <...>, \"\n    \"and I think <...>; ....\\n\"\n    \"So, <your conclusion>.\\n\\n\"\n    \"##Your Task##\\n\"\n    \"{task}\\n\\n\"\n    \"##Now Begin##\\n\"\n)\n\n\nclass DocSearchArgs(BaseModel):\n    query: str = Field(..., description=\"a search query as input to the doc search\")\n\n\nclass DocSearchTool(BaseTool):\n    name: str = \"docsearch\"\n    description: str = (\n        \"A storage that contains internal documents. If you lack any specific \"\n        \"private information to answer the question, you can search in this \"\n        \"document storage. Furthermore, if you are unsure about which document that \"\n        \"the user refers to, likely the user already selects the target document in \"\n        \"this document storage, you just need to do normal search. If possible, \"\n        \"formulate the search query as specific as possible.\"\n    )\n    args_schema: Optional[Type[BaseModel]] = DocSearchArgs\n    retrievers: list[BaseComponent] = []\n\n    def _run_tool(self, query: AnyStr) -> AnyStr:\n        docs = []\n        doc_ids = []\n        for retriever in self.retrievers:\n            for doc in retriever(text=query):\n                if doc.doc_id not in doc_ids:\n                    docs.append(doc)\n                    doc_ids.append(doc.doc_id)\n\n        return self.prepare_evidence(docs)\n\n    def prepare_evidence(self, docs, trim_len: int = 3000):\n        evidence = \"\"\n        table_found = 0\n\n        for _id, retrieved_item in enumerate(docs):\n            retrieved_content = \"\"\n            page = retrieved_item.metadata.get(\"page_label\", None)\n            source = filename = retrieved_item.metadata.get(\"file_name\", \"-\")\n            if page:\n                source += f\" (Page {page})\"\n            if retrieved_item.metadata.get(\"type\", \"\") == \"table\":\n                if table_found < 5:\n                    retrieved_content = retrieved_item.metadata.get(\"table_origin\", \"\")\n                    if retrieved_content not in evidence:\n                        table_found += 1\n                        evidence += (\n                            f\"<br><b>Table from {source}</b>\\n\"\n                            + retrieved_content\n                            + \"\\n<br>\"\n                        )\n            elif retrieved_item.metadata.get(\"type\", \"\") == \"chatbot\":\n                retrieved_content = retrieved_item.metadata[\"window\"]\n                evidence += (\n                    f\"<br><b>Chatbot scenario from {filename} (Row {page})</b>\\n\"\n                    + retrieved_content\n                    + \"\\n<br>\"\n                )\n            elif retrieved_item.metadata.get(\"type\", \"\") == \"image\":\n                retrieved_content = retrieved_item.metadata.get(\"image_origin\", \"\")\n                retrieved_caption = html.escape(retrieved_item.get_content())\n                # PWS doesn't support VLM for images, we will just store the caption\n                evidence += (\n                    f\"<br><b>Figure from {source}</b>\\n\" + retrieved_caption + \"\\n<br>\"\n                )\n            else:\n                if \"window\" in retrieved_item.metadata:\n                    retrieved_content = retrieved_item.metadata[\"window\"]\n                else:\n                    retrieved_content = retrieved_item.text\n                retrieved_content = retrieved_content.replace(\"\\n\", \" \")\n                if retrieved_content not in evidence:\n                    evidence += (\n                        f\"<br><b>Content from {source}: </b> \"\n                        + retrieved_content\n                        + \" \\n<br>\"\n                    )\n\n            print(\"Retrieved #{}: {}\".format(_id, retrieved_content))\n            print(\"Score\", retrieved_item.metadata.get(\"reranking_score\", None))\n\n        # trim context by trim_len\n        if evidence:\n            text_splitter = CharacterTextSplitter.from_tiktoken_encoder(\n                chunk_size=trim_len,\n                chunk_overlap=0,\n                separator=\" \",\n                model_name=\"gpt-3.5-turbo\",\n            )\n            texts = text_splitter.split_text(evidence)\n            evidence = texts[0]\n\n        return Document(content=evidence)\n\n\nTOOL_REGISTRY = {\n    \"Google\": GoogleSearchTool(),\n    \"Wikipedia\": WikipediaTool(),\n    \"LLM\": LLMTool(),\n    \"SearchDoc\": DocSearchTool(),\n}\n\nDEFAULT_REWRITE_PROMPT = (\n    \"Given the following question, rephrase and expand it \"\n    \"to help you do better answering. Maintain all information \"\n    \"in the original question. Keep the question as concise as possible. \"\n    \"Give answer in {lang}\\n\"\n    \"Original question: {question}\\n\"\n    \"Rephrased question: \"\n)\n\n\nclass RewriteQuestionPipeline(BaseComponent):\n    \"\"\"Rewrite user question\n\n    Args:\n        llm: the language model to rewrite question\n        rewrite_template: the prompt template for llm to paraphrase a text input\n        lang: the language of the answer. Currently support English and Japanese\n    \"\"\"\n\n    llm: ChatLLM = Node(default_callback=lambda _: llms.get_default())\n    rewrite_template: str = DEFAULT_REWRITE_PROMPT\n\n    lang: str = \"English\"\n\n    def run(self, question: str) -> Document:  # type: ignore\n        prompt_template = PromptTemplate(self.rewrite_template)\n        prompt = prompt_template.populate(question=question, lang=self.lang)\n        messages = [\n            SystemMessage(content=\"You are a helpful assistant\"),\n            HumanMessage(content=prompt),\n        ]\n        return self.llm(messages)\n\n\ndef find_text(llm_output, context):\n    sentence_list = llm_output.split(\"\\n\")\n    matches = []\n    for sentence in sentence_list:\n        match = SequenceMatcher(\n            None, sentence, context, autojunk=False\n        ).find_longest_match()\n        matches.append((match.b, match.b + match.size))\n    return matches\n\n\nclass RewooAgentPipeline(BaseReasoning):\n    \"\"\"Question answering pipeline using ReWOO Agent.\"\"\"\n\n    class Config:\n        allow_extra = True\n\n    retrievers: list[BaseComponent]\n    agent: RewooAgent = RewooAgent.withx()\n    rewrite_pipeline: RewriteQuestionPipeline = RewriteQuestionPipeline.withx()\n    use_rewrite: bool = False\n    enable_citation: bool = False\n\n    def format_info_panel_evidence(self, worker_log):\n        header = \"\"\n        content = []\n\n        for line in worker_log.splitlines():\n            if line.startswith(\"#Plan\"):\n                # line starts with #Plan should be marked as a new segment\n                header = line\n            elif line.startswith(\"#Action\"):\n                # small fix for markdown output\n                line = \"\\\\\" + line + \"<br>\"\n                content.append(line)\n            elif line.startswith(\"#\"):\n                # stop markdown from rendering big headers\n                line = \"\\\\\" + line\n                content.append(line)\n            else:\n                content.append(line)\n\n        if not header:\n            return\n\n        return Document(\n            channel=\"info\",\n            content=Render.collapsible(\n                header=header,\n                content=Render.table(\"\\n\".join(content)),\n                open=False,\n            ),\n        )\n\n    def format_info_panel_planner(self, planner_output):\n        planner_output = planner_output.replace(\"\\n\", \"<br>\")\n        return Document(\n            channel=\"info\",\n            content=Render.collapsible(\n                header=\"Planner Output\",\n                content=planner_output,\n                open=True,\n            ),\n        )\n\n    def prepare_citation(self, answer) -> list[Document]:\n        \"\"\"Prepare citation to show on the UI\"\"\"\n        segments = []\n        split_indices = [\n            0,\n        ]\n        start_indices = set()\n        text = \"\"\n\n        if \"citation\" in answer.metadata and answer.metadata[\"citation\"] is not None:\n            context = answer.metadata[\"worker_log\"]\n            for evidence in answer.metadata[\"citation\"].evidences:\n                matches = find_text(evidence, context)\n                for match in matches:\n                    split_indices.append(match[0])\n                    split_indices.append(match[1])\n                    start_indices.add(match[0])\n            split_indices = sorted(list(set(split_indices)))\n            spans = []\n            prev = 0\n            for index in split_indices:\n                if index > prev:\n                    spans.append(context[prev:index])\n                    prev = index\n            spans.append(context[split_indices[-1] :])\n\n            prev = 0\n            for span, start_idx in list(zip(spans, split_indices)):\n                if start_idx in start_indices:\n                    text += Render.highlight(span)\n                else:\n                    text += span\n\n        else:\n            text = answer.metadata[\"worker_log\"]\n\n        # separate text by detect header: #Plan\n        for line in text.splitlines():\n            if line.startswith(\"#Plan\"):\n                # line starts with #Plan should be marked as a new segment\n                new_segment = [line]\n                segments.append(new_segment)\n            elif line.startswith(\"#Action\"):\n                # small fix for markdown output\n                line = \"\\\\\" + line + \"<br>\"\n                segments[-1].append(line)\n            elif line.startswith(\"#\"):\n                # stop markdown from rendering big headers\n                line = \"\\\\\" + line\n                segments[-1].append(line)\n            else:\n                if segments:\n                    segments[-1].append(line)\n                else:\n                    segments.append([line])\n\n        outputs = []\n        for segment in segments:\n            outputs.append(\n                Document(\n                    channel=\"info\",\n                    content=Render.collapsible(\n                        header=segment[0],\n                        content=Render.table(\"\\n\".join(segment[1:])),\n                        open=True,\n                    ),\n                )\n            )\n\n        return outputs\n\n    async def ainvoke(  # type: ignore\n        self, message, conv_id: str, history: list, **kwargs  # type: ignore\n    ) -> Document:\n        answer = self.agent(message, use_citation=True)\n        self.report_output(Document(content=answer.text, channel=\"chat\"))\n\n        refined_citations = self.prepare_citation(answer)\n        for _ in refined_citations:\n            self.report_output(_)\n\n        self.report_output(None)\n        return answer\n\n    def stream(  # type: ignore\n        self, message, conv_id: str, history: list, **kwargs  # type: ignore\n    ) -> Generator[Document, None, Document] | None:\n        if self.use_rewrite:\n            rewrite = self.rewrite_pipeline(question=message)\n            message = rewrite.text\n            yield Document(\n                channel=\"info\",\n                content=f\"Rewrote the message to: {rewrite.text}\",\n            )\n\n        output_stream = GeneratorWrapper(\n            self.agent.stream(message, use_citation=self.enable_citation)\n        )\n        for item in output_stream:\n            if item.intermediate_steps:\n                for step in item.intermediate_steps:\n                    if \"planner_log\" in step:\n                        yield Document(\n                            channel=\"info\",\n                            content=self.format_info_panel_planner(step[\"planner_log\"]),\n                        )\n                    else:\n                        yield Document(\n                            channel=\"info\",\n                            content=self.format_info_panel_evidence(step[\"worker_log\"]),\n                        )\n            if item.text:\n                # final answer\n                yield Document(channel=\"chat\", content=item.text)\n\n        answer = output_stream.value\n        yield Document(channel=\"info\", content=None)\n        yield from self.prepare_citation(answer)\n\n        return answer\n\n    @classmethod\n    def get_pipeline(\n        cls, settings: dict, states: dict, retrievers: list | None = None\n    ) -> BaseReasoning:\n        _id = cls.get_info()[\"id\"]\n        prefix = f\"reasoning.options.{_id}\"\n        pipeline = RewooAgentPipeline(retrievers=retrievers)\n\n        max_context_length_setting = settings.get(\"reasoning.max_context_length\", None)\n\n        planner_llm_name = settings[f\"{prefix}.planner_llm\"]\n        planner_llm = llms.get(planner_llm_name, llms.get_default())\n        solver_llm_name = settings[f\"{prefix}.solver_llm\"]\n        solver_llm = llms.get(solver_llm_name, llms.get_default())\n\n        pipeline.agent.planner_llm = planner_llm\n        pipeline.agent.solver_llm = solver_llm\n        if max_context_length_setting:\n            pipeline.agent.max_context_length = (\n                max_context_length_setting // DEFAULT_AGENT_STEPS\n            )\n\n        tools = []\n        for tool_name in settings[f\"{prefix}.tools\"]:\n            if tool_name.startswith(\"[MCP] \"):\n                server_name = tool_name[len(\"[MCP] \") :]\n                entry = mcp_manager.get(server_name)\n                if entry:\n                    config = entry[\"config\"]\n                    enabled_tools = config.pop(\"enabled_tools\", None)\n                    mcp_tools = create_tools_from_config(config, enabled_tools)\n                    tools.extend(mcp_tools)\n            else:\n                tool = TOOL_REGISTRY[tool_name]\n                if tool_name == \"SearchDoc\":\n                    tool.retrievers = retrievers\n                elif tool_name == \"LLM\":\n                    tool.llm = solver_llm\n                tools.append(tool)\n        pipeline.agent.plugins = tools\n        pipeline.agent.output_lang = SUPPORTED_LANGUAGE_MAP.get(\n            settings[\"reasoning.lang\"], \"English\"\n        )\n        pipeline.agent.prompt_template[\"Planner\"] = PromptTemplate(\n            settings[f\"{prefix}.planner_prompt\"]\n        )\n        pipeline.agent.prompt_template[\"Solver\"] = PromptTemplate(\n            settings[f\"{prefix}.solver_prompt\"]\n        )\n\n        pipeline.enable_citation = settings[f\"{prefix}.highlight_citation\"]\n        pipeline.use_rewrite = states.get(\"app\", {}).get(\"regen\", False)\n        pipeline.rewrite_pipeline.llm = (\n            planner_llm  # TODO: separate llm for rewrite if needed\n        )\n\n        return pipeline\n\n    @classmethod\n    def get_user_settings(cls) -> dict:\n\n        llm = \"\"\n        llm_choices = [(\"(default)\", \"\")]\n        try:\n            llm_choices += [(_, _) for _ in llms.options().keys()]\n        except Exception as e:\n            logger.exception(f\"Failed to get LLM options: {e}\")\n\n        tool_choices = [\"Wikipedia\", \"Google\", \"LLM\", \"SearchDoc\"]\n        try:\n            tool_choices += mcp_manager.get_enabled_tools()\n        except Exception as e:\n            logger.exception(f\"Failed to get MCP tool options: {e}\")\n\n        return {\n            \"planner_llm\": {\n                \"name\": \"Language model for Planner\",\n                \"value\": llm,\n                \"component\": \"dropdown\",\n                \"choices\": llm_choices,\n                \"special_type\": \"llm\",\n                \"info\": (\n                    \"The language model to use for planning. \"\n                    \"This model will generate a plan based on the \"\n                    \"instruction to find the answer.\"\n                ),\n            },\n            \"solver_llm\": {\n                \"name\": \"Language model for Solver\",\n                \"value\": llm,\n                \"component\": \"dropdown\",\n                \"choices\": llm_choices,\n                \"special_type\": \"llm\",\n                \"info\": (\n                    \"The language model to use for solving. \"\n                    \"This model will generate the answer based on the \"\n                    \"plan generated by the planner and evidences found by the tools.\"\n                ),\n            },\n            \"highlight_citation\": {\n                \"name\": \"Highlight Citation\",\n                \"value\": False,\n                \"component\": \"checkbox\",\n            },\n            \"tools\": {\n                \"name\": \"Tools for knowledge retrieval\",\n                \"value\": [\"SearchDoc\", \"LLM\"],\n                \"component\": \"checkboxgroup\",\n                \"choices\": tool_choices,\n            },\n            \"planner_prompt\": {\n                \"name\": \"Planner Prompt\",\n                \"value\": DEFAULT_PLANNER_PROMPT,\n            },\n            \"solver_prompt\": {\n                \"name\": \"Solver Prompt\",\n                \"value\": DEFAULT_SOLVER_PROMPT,\n            },\n        }\n\n    @classmethod\n    def get_info(cls) -> dict:\n        return {\n            \"id\": \"ReWOO\",\n            \"name\": \"ReWOO Agent\",\n            \"description\": (\n                \"Implementing ReWOO paradigm: https://arxiv.org/abs/2305.18323. \"\n                \"The ReWOO agent makes a step by step plan in the first stage, \"\n                \"then solves each step in the second stage. The agent can use \"\n                \"external tools to help in the reasoning process. Once all stages \"\n                \"are completed, the agent will summarize the answer.\"\n            ),\n        }\n"
  },
  {
    "path": "libs/ktem/ktem/reasoning/simple.py",
    "content": "import logging\nimport threading\nfrom textwrap import dedent\nfrom typing import Generator\n\nfrom decouple import config\nfrom ktem.embeddings.manager import embedding_models_manager as embeddings\nfrom ktem.llms.manager import llms\nfrom ktem.reasoning.prompt_optimization import (\n    DecomposeQuestionPipeline,\n    RewriteQuestionPipeline,\n)\nfrom ktem.utils.render import Render\nfrom ktem.utils.visualize_cited import CreateCitationVizPipeline\nfrom plotly.io import to_json\n\nfrom kotaemon.base import (\n    AIMessage,\n    BaseComponent,\n    Document,\n    HumanMessage,\n    Node,\n    RetrievedDocument,\n    SystemMessage,\n)\nfrom kotaemon.indices.qa.citation_qa import (\n    CONTEXT_RELEVANT_WARNING_SCORE,\n    DEFAULT_QA_TEXT_PROMPT,\n    AnswerWithContextPipeline,\n)\nfrom kotaemon.indices.qa.citation_qa_inline import AnswerWithInlineCitation\nfrom kotaemon.indices.qa.format_context import PrepareEvidencePipeline\nfrom kotaemon.indices.qa.utils import replace_think_tag_with_details\nfrom kotaemon.llms import ChatLLM\n\nfrom ..utils import SUPPORTED_LANGUAGE_MAP\nfrom .base import BaseReasoning\n\nlogger = logging.getLogger(__name__)\n\n\nclass AddQueryContextPipeline(BaseComponent):\n\n    n_last_interactions: int = 5\n    llm: ChatLLM = Node(default_callback=lambda _: llms.get_default())\n\n    def run(self, question: str, history: list) -> Document:\n        messages = [\n            SystemMessage(\n                content=\"Below is a history of the conversation so far, and a new \"\n                \"question asked by the user that needs to be answered by searching \"\n                \"in a knowledge base.\\nYou have access to a Search index \"\n                \"with 100's of documents.\\nGenerate a search query based on the \"\n                \"conversation and the new question.\\nDo not include cited source \"\n                \"filenames and document names e.g info.txt or doc.pdf in the search \"\n                \"query terms.\\nDo not include any text inside [] or <<>> in the \"\n                \"search query terms.\\nDo not include any special characters like \"\n                \"'+'.\\nIf the question is not in English, rewrite the query in \"\n                \"the language used in the question.\\n If the question contains enough \"\n                \"information, return just the number 1\\n If it's unnecessary to do \"\n                \"the searching, return just the number 0.\"\n            ),\n            HumanMessage(content=\"How did crypto do last year?\"),\n            AIMessage(\n                content=\"Summarize Cryptocurrency Market Dynamics from last year\"\n            ),\n            HumanMessage(content=\"What are my health plans?\"),\n            AIMessage(content=\"Show available health plans\"),\n        ]\n        for human, ai in history[-self.n_last_interactions :]:\n            messages.append(HumanMessage(content=human))\n            messages.append(AIMessage(content=ai))\n\n        messages.append(HumanMessage(content=f\"Generate search query for: {question}\"))\n\n        resp = self.llm(messages).text\n        if resp == \"0\":\n            return Document(content=\"\")\n\n        if resp == \"1\":\n            return Document(content=question)\n\n        return Document(content=resp)\n\n\nclass FullQAPipeline(BaseReasoning):\n    \"\"\"Question answering pipeline. Handle from question to answer\"\"\"\n\n    class Config:\n        allow_extra = True\n\n    # configuration parameters\n    trigger_context: int = 150\n    use_rewrite: bool = False\n\n    retrievers: list[BaseComponent]\n\n    evidence_pipeline: PrepareEvidencePipeline = PrepareEvidencePipeline.withx()\n    answering_pipeline: AnswerWithContextPipeline\n    rewrite_pipeline: RewriteQuestionPipeline | None = None\n    create_citation_viz_pipeline: CreateCitationVizPipeline = Node(\n        default_callback=lambda _: CreateCitationVizPipeline(\n            embedding=embeddings.get_default()\n        )\n    )\n    add_query_context: AddQueryContextPipeline = AddQueryContextPipeline.withx()\n\n    def retrieve(\n        self, message: str, history: list\n    ) -> tuple[list[RetrievedDocument], list[Document]]:\n        \"\"\"Retrieve the documents based on the message\"\"\"\n        # if len(message) < self.trigger_context:\n        #     # prefer adding context for short user questions, avoid adding context for\n        #     # long questions, as they are likely to contain enough information\n        #     # plus, avoid the situation where the original message is already too long\n        #     # for the model to handle\n        #     query = self.add_query_context(message, history).content\n        # else:\n        #     query = message\n        # print(f\"Rewritten query: {query}\")\n        query = None\n        if not query:\n            # TODO: previously return [], [] because we think this message as something\n            # like \"Hello\", \"I need help\"...\n            query = message\n\n        docs, doc_ids = [], []\n        plot_docs = []\n\n        for idx, retriever in enumerate(self.retrievers):\n            retriever_node = self._prepare_child(retriever, f\"retriever_{idx}\")\n            retriever_docs = retriever_node(text=query)\n\n            retriever_docs_text = []\n            retriever_docs_plot = []\n\n            for doc in retriever_docs:\n                if doc.metadata.get(\"type\", \"\") == \"plot\":\n                    retriever_docs_plot.append(doc)\n                else:\n                    retriever_docs_text.append(doc)\n\n            for doc in retriever_docs_text:\n                if doc.doc_id not in doc_ids:\n                    docs.append(doc)\n                    doc_ids.append(doc.doc_id)\n\n            plot_docs.extend(retriever_docs_plot)\n\n        info = [\n            Document(\n                channel=\"info\",\n                content=Render.collapsible_with_header(doc, open_collapsible=True),\n            )\n            for doc in docs\n        ] + [\n            Document(\n                channel=\"plot\",\n                content=doc.metadata.get(\"data\", \"\"),\n            )\n            for doc in plot_docs\n        ]\n\n        return docs, info\n\n    def prepare_mindmap(self, answer) -> Document | None:\n        mindmap = answer.metadata[\"mindmap\"]\n        if mindmap:\n            mindmap_text = mindmap.text\n            mindmap_svg = dedent(\n                \"\"\"\n                <div class=\"markmap\">\n                <script type=\"text/template\">\n                ---\n                markmap:\n                    colorFreezeLevel: 2\n                    activeNode:\n                        placement: center\n                    initialExpandLevel: 4\n                    maxWidth: 200\n                ---\n                {}\n                </script>\n                </div>\n                \"\"\"\n            ).format(mindmap_text)\n\n            mindmap_content = Document(\n                channel=\"info\",\n                content=Render.collapsible(\n                    header=\"\"\"\n                    <i>Mindmap</i>\n                    <a href=\"#\" id='mindmap-toggle'>\n                        [Expand]</a>\n                    <a href=\"#\" id='mindmap-export'>\n                        [Export]</a>\"\"\",\n                    content=mindmap_svg,\n                    open=True,\n                ),\n            )\n        else:\n            mindmap_content = None\n\n        return mindmap_content\n\n    def prepare_citation_viz(self, answer, question, docs) -> Document | None:\n        doc_texts = [doc.text for doc in docs]\n        citation_plot = None\n        plot_content = None\n\n        if answer.metadata[\"citation_viz\"] and len(docs) > 1:\n            try:\n                citation_plot = self.create_citation_viz_pipeline(doc_texts, question)\n            except Exception as e:\n                print(\"Failed to create citation plot:\", e)\n\n            if citation_plot:\n                plot = to_json(citation_plot)\n                plot_content = Document(channel=\"plot\", content=plot)\n\n        return plot_content\n\n    def show_citations_and_addons(self, answer, docs, question):\n        # show the evidence\n        with_citation, without_citation = self.answering_pipeline.prepare_citations(\n            answer, docs\n        )\n        mindmap_output = self.prepare_mindmap(answer)\n        citation_plot_output = self.prepare_citation_viz(answer, question, docs)\n\n        if not with_citation and not without_citation:\n            yield Document(channel=\"info\", content=\"<h5><b>No evidence found.</b></h5>\")\n        else:\n            # clear the Info panel\n            max_llm_rerank_score = max(\n                doc.metadata.get(\"llm_trulens_score\", 0.0) for doc in docs\n            )\n            has_llm_score = any(\"llm_trulens_score\" in doc.metadata for doc in docs)\n            # clear previous info\n            yield Document(channel=\"info\", content=None)\n\n            # yield mindmap output\n            if mindmap_output:\n                yield mindmap_output\n\n            # yield citation plot output\n            if citation_plot_output:\n                yield citation_plot_output\n\n            # yield warning message\n            if has_llm_score and max_llm_rerank_score < CONTEXT_RELEVANT_WARNING_SCORE:\n                yield Document(\n                    channel=\"info\",\n                    content=(\n                        \"<h5>WARNING! Context relevance score is low. \"\n                        \"Double check the model answer for correctness.</h5>\"\n                    ),\n                )\n\n            # show QA score\n            qa_score = (\n                round(answer.metadata[\"qa_score\"], 2)\n                if answer.metadata.get(\"qa_score\")\n                else None\n            )\n            if qa_score:\n                yield Document(\n                    channel=\"info\",\n                    content=f\"<h5>Answer confidence: {qa_score}</h5>\",\n                )\n\n            yield from with_citation\n            if without_citation:\n                yield from without_citation\n\n    async def ainvoke(  # type: ignore\n        self, message: str, conv_id: str, history: list, **kwargs  # type: ignore\n    ) -> Document:  # type: ignore\n        raise NotImplementedError\n\n    def stream(  # type: ignore\n        self, message: str, conv_id: str, history: list, **kwargs  # type: ignore\n    ) -> Generator[Document, None, Document]:\n        if self.use_rewrite and self.rewrite_pipeline:\n            print(\"Chosen rewrite pipeline\", self.rewrite_pipeline)\n            message = self.rewrite_pipeline(question=message).text\n            print(\"Rewrite result\", message)\n\n        print(f\"Retrievers {self.retrievers}\")\n        # should populate the context\n        docs, infos = self.retrieve(message, history)\n        print(f\"Got {len(docs)} retrieved documents\")\n        yield from infos\n\n        evidence_mode, evidence, images = self.evidence_pipeline(docs).content\n\n        def generate_relevant_scores():\n            nonlocal docs\n            docs = self.retrievers[0].generate_relevant_scores(message, docs)\n\n        # generate relevant score using\n        if evidence and self.retrievers:\n            scoring_thread = threading.Thread(target=generate_relevant_scores)\n            scoring_thread.start()\n        else:\n            scoring_thread = None\n\n        answer = yield from self.answering_pipeline.stream(\n            question=message,\n            history=history,\n            evidence=evidence,\n            evidence_mode=evidence_mode,\n            images=images,\n            conv_id=conv_id,\n            **kwargs,\n        )\n\n        # check <think> tag from reasoning models\n        processed_answer = replace_think_tag_with_details(answer.text)\n        if processed_answer != answer.text:\n            # clear the chat message and render again\n            yield Document(channel=\"chat\", content=None)\n            yield Document(channel=\"chat\", content=processed_answer)\n\n        # show the evidence\n        if scoring_thread:\n            scoring_thread.join()\n\n        yield from self.show_citations_and_addons(answer, docs, message)\n\n        return answer\n\n    @classmethod\n    def prepare_pipeline_instance(cls, settings, retrievers):\n        return cls(\n            retrievers=retrievers,\n            rewrite_pipeline=None,\n        )\n\n    @classmethod\n    def get_pipeline(cls, settings, states, retrievers):\n        \"\"\"Get the reasoning pipeline\n\n        Args:\n            settings: the settings for the pipeline\n            retrievers: the retrievers to use\n        \"\"\"\n        max_context_length_setting = settings.get(\"reasoning.max_context_length\", 32000)\n\n        pipeline = cls.prepare_pipeline_instance(settings, retrievers)\n\n        prefix = f\"reasoning.options.{cls.get_info()['id']}\"\n        llm_name = settings.get(f\"{prefix}.llm\", None)\n        llm = llms.get(llm_name, llms.get_default())\n\n        # prepare evidence pipeline configuration\n        evidence_pipeline = pipeline.evidence_pipeline\n        evidence_pipeline.max_context_length = max_context_length_setting\n\n        # answering pipeline configuration\n        use_inline_citation = settings[f\"{prefix}.highlight_citation\"] == \"inline\"\n\n        if use_inline_citation:\n            answer_pipeline = pipeline.answering_pipeline = AnswerWithInlineCitation()\n        else:\n            answer_pipeline = pipeline.answering_pipeline = AnswerWithContextPipeline()\n\n        answer_pipeline.llm = llm\n        answer_pipeline.citation_pipeline.llm = llm\n        answer_pipeline.n_last_interactions = settings[f\"{prefix}.n_last_interactions\"]\n        answer_pipeline.enable_citation = (\n            settings[f\"{prefix}.highlight_citation\"] != \"off\"\n        )\n        answer_pipeline.enable_mindmap = settings[f\"{prefix}.create_mindmap\"]\n        answer_pipeline.enable_citation_viz = settings[f\"{prefix}.create_citation_viz\"]\n        answer_pipeline.use_multimodal = settings[f\"{prefix}.use_multimodal\"]\n        answer_pipeline.system_prompt = settings[f\"{prefix}.system_prompt\"]\n        answer_pipeline.qa_template = settings[f\"{prefix}.qa_prompt\"]\n        answer_pipeline.lang = SUPPORTED_LANGUAGE_MAP.get(\n            settings[\"reasoning.lang\"], \"English\"\n        )\n\n        pipeline.add_query_context.llm = llm\n        pipeline.add_query_context.n_last_interactions = settings[\n            f\"{prefix}.n_last_interactions\"\n        ]\n\n        pipeline.trigger_context = settings[f\"{prefix}.trigger_context\"]\n        pipeline.use_rewrite = states.get(\"app\", {}).get(\"regen\", False)\n        if pipeline.rewrite_pipeline:\n            pipeline.rewrite_pipeline.llm = llm\n            pipeline.rewrite_pipeline.lang = SUPPORTED_LANGUAGE_MAP.get(\n                settings[\"reasoning.lang\"], \"English\"\n            )\n        return pipeline\n\n    @classmethod\n    def get_user_settings(cls) -> dict:\n        from ktem.llms.manager import llms\n\n        llm = \"\"\n        choices = [(\"(default)\", \"\")]\n        try:\n            choices += [(_, _) for _ in llms.options().keys()]\n        except Exception as e:\n            logger.exception(f\"Failed to get LLM options: {e}\")\n\n        return {\n            \"llm\": {\n                \"name\": \"Language model\",\n                \"value\": llm,\n                \"component\": \"dropdown\",\n                \"choices\": choices,\n                \"special_type\": \"llm\",\n                \"info\": (\n                    \"The language model to use for generating the answer. If None, \"\n                    \"the application default language model will be used.\"\n                ),\n            },\n            \"highlight_citation\": {\n                \"name\": \"Citation style\",\n                \"value\": (\n                    \"highlight\"\n                    if not config(\"USE_LOW_LLM_REQUESTS\", default=False, cast=bool)\n                    else \"off\"\n                ),\n                \"component\": \"radio\",\n                \"choices\": [\n                    (\"citation: highlight\", \"highlight\"),\n                    (\"citation: inline\", \"inline\"),\n                    (\"no citation\", \"off\"),\n                ],\n            },\n            \"create_mindmap\": {\n                \"name\": \"Create Mindmap\",\n                \"value\": False,\n                \"component\": \"checkbox\",\n            },\n            \"create_citation_viz\": {\n                \"name\": \"Create Embeddings Visualization\",\n                \"value\": False,\n                \"component\": \"checkbox\",\n            },\n            \"use_multimodal\": {\n                \"name\": \"Use Multimodal Input\",\n                \"value\": False,\n                \"component\": \"checkbox\",\n            },\n            \"system_prompt\": {\n                \"name\": \"System Prompt\",\n                \"value\": (\"This is a question answering system.\"),\n            },\n            \"qa_prompt\": {\n                \"name\": \"QA Prompt (contains {context}, {question}, {lang})\",\n                \"value\": DEFAULT_QA_TEXT_PROMPT,\n            },\n            \"n_last_interactions\": {\n                \"name\": \"Number of interactions to include\",\n                \"value\": 5,\n                \"component\": \"number\",\n                \"info\": \"The maximum number of chat interactions to include in the LLM\",\n            },\n            \"trigger_context\": {\n                \"name\": \"Maximum message length for context rewriting\",\n                \"value\": 150,\n                \"component\": \"number\",\n                \"info\": (\n                    \"The maximum length of the message to trigger context addition. \"\n                    \"Exceeding this length, the message will be used as is.\"\n                ),\n            },\n        }\n\n    @classmethod\n    def get_info(cls) -> dict:\n        return {\n            \"id\": \"simple\",\n            \"name\": \"Simple QA\",\n            \"description\": (\n                \"Simple RAG-based question answering pipeline. This pipeline can \"\n                \"perform both keyword search and similarity search to retrieve the \"\n                \"context. After that it includes that context to generate the answer.\"\n            ),\n        }\n\n\nclass FullDecomposeQAPipeline(FullQAPipeline):\n    def answer_sub_questions(\n        self, messages: list, conv_id: str, history: list, **kwargs\n    ):\n        output_str = \"\"\n        for idx, message in enumerate(messages):\n            yield Document(\n                channel=\"chat\",\n                content=f\"<br><b>Sub-question {idx + 1}</b>\"\n                f\"<br>{message}<br><b>Answer</b><br>\",\n            )\n            # should populate the context\n            docs, infos = self.retrieve(message, history)\n            print(f\"Got {len(docs)} retrieved documents\")\n\n            yield from infos\n\n            evidence_mode, evidence, images = self.evidence_pipeline(docs).content\n            answer = yield from self.answering_pipeline.stream(\n                question=message,\n                history=history,\n                evidence=evidence,\n                evidence_mode=evidence_mode,\n                images=images,\n                conv_id=conv_id,\n                **kwargs,\n            )\n\n            output_str += (\n                f\"Sub-question {idx + 1}-th: '{message}'\\nAnswer: '{answer.text}'\\n\\n\"\n            )\n\n        return output_str\n\n    def stream(  # type: ignore\n        self, message: str, conv_id: str, history: list, **kwargs  # type: ignore\n    ) -> Generator[Document, None, Document]:\n        sub_question_answer_output = \"\"\n        if self.rewrite_pipeline:\n            print(\"Chosen rewrite pipeline\", self.rewrite_pipeline)\n            result = self.rewrite_pipeline(question=message)\n            print(\"Rewrite result\", result)\n            if isinstance(result, Document):\n                message = result.text\n            elif (\n                isinstance(result, list)\n                and len(result) > 0\n                and isinstance(result[0], Document)\n            ):\n                yield Document(\n                    channel=\"chat\",\n                    content=\"<h4>Sub questions and their answers</h4>\",\n                )\n                sub_question_answer_output = yield from self.answer_sub_questions(\n                    [r.text for r in result], conv_id, history, **kwargs\n                )\n\n        yield Document(\n            channel=\"chat\",\n            content=f\"<h4>Main question</h4>{message}<br><b>Answer</b><br>\",\n        )\n\n        # should populate the context\n        docs, infos = self.retrieve(message, history)\n        print(f\"Got {len(docs)} retrieved documents\")\n        yield from infos\n\n        evidence_mode, evidence, images = self.evidence_pipeline(docs).content\n        answer = yield from self.answering_pipeline.stream(\n            question=message,\n            history=history,\n            evidence=evidence + \"\\n\" + sub_question_answer_output,\n            evidence_mode=evidence_mode,\n            images=images,\n            conv_id=conv_id,\n            **kwargs,\n        )\n\n        # show the evidence\n        with_citation, without_citation = self.answering_pipeline.prepare_citations(\n            answer, docs\n        )\n        if not with_citation and not without_citation:\n            yield Document(channel=\"info\", content=\"<h5><b>No evidence found.</b></h5>\")\n        else:\n            yield Document(channel=\"info\", content=None)\n            yield from with_citation\n            yield from without_citation\n\n        return answer\n\n    @classmethod\n    def get_user_settings(cls) -> dict:\n        user_settings = super().get_user_settings()\n        user_settings[\"decompose_prompt\"] = {\n            \"name\": \"Decompose Prompt\",\n            \"value\": DecomposeQuestionPipeline.DECOMPOSE_SYSTEM_PROMPT_TEMPLATE,\n        }\n        return user_settings\n\n    @classmethod\n    def prepare_pipeline_instance(cls, settings, retrievers):\n        prefix = f\"reasoning.options.{cls.get_info()['id']}\"\n        pipeline = cls(\n            retrievers=retrievers,\n            rewrite_pipeline=DecomposeQuestionPipeline(\n                prompt_template=settings.get(f\"{prefix}.decompose_prompt\")\n            ),\n        )\n        return pipeline\n\n    @classmethod\n    def get_info(cls) -> dict:\n        return {\n            \"id\": \"complex\",\n            \"name\": \"Complex QA\",\n            \"description\": (\n                \"Use multi-step reasoning to decompose a complex question into \"\n                \"multiple sub-questions. This pipeline can \"\n                \"perform both keyword search and similarity search to retrieve the \"\n                \"context. After that it includes that context to generate the answer.\"\n            ),\n        }\n"
  },
  {
    "path": "libs/ktem/ktem/rerankings/__init__.py",
    "content": ""
  },
  {
    "path": "libs/ktem/ktem/rerankings/db.py",
    "content": "from typing import Type\n\nfrom ktem.db.engine import engine\nfrom sqlalchemy import JSON, Boolean, Column, String\nfrom sqlalchemy.orm import DeclarativeBase\nfrom theflow.settings import settings as flowsettings\nfrom theflow.utils.modules import import_dotted_string\n\n\nclass Base(DeclarativeBase):\n    pass\n\n\nclass BaseRerankingTable(Base):\n    \"\"\"Base table to store rerankings model\"\"\"\n\n    __abstract__ = True\n\n    name = Column(String, primary_key=True, unique=True)\n    spec = Column(JSON, default={})\n    default = Column(Boolean, default=False)\n\n\n__base_reranking: Type[BaseRerankingTable] = (\n    import_dotted_string(flowsettings.KH_TABLE_RERANKING, safe=False)\n    if hasattr(flowsettings, \"KH_TABLE_RERANKING\")\n    else BaseRerankingTable\n)\n\n\nclass RerankingTable(__base_reranking):  # type: ignore\n    __tablename__ = \"reranking\"\n\n\nif not getattr(flowsettings, \"KH_ENABLE_ALEMBIC\", False):\n    RerankingTable.metadata.create_all(engine)\n"
  },
  {
    "path": "libs/ktem/ktem/rerankings/manager.py",
    "content": "from typing import Optional, Type\n\nfrom sqlalchemy import select\nfrom sqlalchemy.orm import Session\nfrom theflow.settings import settings as flowsettings\nfrom theflow.utils.modules import deserialize\n\nfrom kotaemon.rerankings.base import BaseReranking\n\nfrom .db import RerankingTable, engine\n\n\nclass RerankingManager:\n    \"\"\"Represent a pool of rerankings models\"\"\"\n\n    def __init__(self):\n        self._models: dict[str, BaseReranking] = {}\n        self._info: dict[str, dict] = {}\n        self._default: str = \"\"\n        self._vendors: list[Type] = []\n\n        # populate the pool if empty\n        if hasattr(flowsettings, \"KH_RERANKINGS\"):\n            with Session(engine) as sess:\n                count = sess.query(RerankingTable).count()\n            if not count:\n                for name, model in flowsettings.KH_RERANKINGS.items():\n                    self.add(\n                        name=name,\n                        spec=model[\"spec\"],\n                        default=model.get(\"default\", False),\n                    )\n\n        self.load()\n        self.load_vendors()\n\n    def load(self):\n        \"\"\"Load the model pool from database\"\"\"\n        self._models, self._info, self._default = {}, {}, \"\"\n        with Session(engine) as sess:\n            stmt = select(RerankingTable)\n            items = sess.execute(stmt)\n\n            for (item,) in items:\n                self._models[item.name] = deserialize(item.spec, safe=False)\n                self._info[item.name] = {\n                    \"name\": item.name,\n                    \"spec\": item.spec,\n                    \"default\": item.default,\n                }\n                if item.default:\n                    self._default = item.name\n\n    def load_vendors(self):\n        from kotaemon.rerankings import (\n            CohereReranking,\n            TeiFastReranking,\n            VoyageAIReranking,\n        )\n\n        self._vendors = [TeiFastReranking, CohereReranking, VoyageAIReranking]\n\n    def __getitem__(self, key: str) -> BaseReranking:\n        \"\"\"Get model by name\"\"\"\n        return self._models[key]\n\n    def __contains__(self, key: str) -> bool:\n        \"\"\"Check if model exists\"\"\"\n        return key in self._models\n\n    def get(\n        self, key: str, default: Optional[BaseReranking] = None\n    ) -> Optional[BaseReranking]:\n        \"\"\"Get model by name with default value\"\"\"\n        return self._models.get(key, default)\n\n    def settings(self) -> dict:\n        \"\"\"Present model pools option for gradio\"\"\"\n        return {\n            \"label\": \"Reranking\",\n            \"choices\": list(self._models.keys()),\n            \"value\": self.get_default_name(),\n        }\n\n    def options(self) -> dict:\n        \"\"\"Present a dict of models\"\"\"\n        return self._models\n\n    def get_random_name(self) -> str:\n        \"\"\"Get the name of random model\n\n        Returns:\n            str: random model name in the pool\n        \"\"\"\n        import random\n\n        if not self._models:\n            raise ValueError(\"No models is pool\")\n\n        return random.choice(list(self._models.keys()))\n\n    def get_default_name(self) -> str:\n        \"\"\"Get the name of default model\n\n        In case there is no default model, choose random model from pool. In\n        case there are multiple default models, choose random from them.\n\n        Returns:\n            str: model name\n        \"\"\"\n        if not self._models:\n            raise ValueError(\"No models in pool\")\n\n        if not self._default:\n            return self.get_random_name()\n\n        return self._default\n\n    def get_random(self) -> BaseReranking:\n        \"\"\"Get random model\"\"\"\n        return self._models[self.get_random_name()]\n\n    def get_default(self) -> BaseReranking:\n        \"\"\"Get default model\n\n        In case there is no default model, choose random model from pool. In\n        case there are multiple default models, choose random from them.\n\n        Returns:\n            BaseReranking: model\n        \"\"\"\n        return self._models[self.get_default_name()]\n\n    def info(self) -> dict:\n        \"\"\"List all models\"\"\"\n        return self._info\n\n    def add(self, name: str, spec: dict, default: bool):\n        if not name:\n            raise ValueError(\"Name must not be empty\")\n\n        try:\n            with Session(engine) as sess:\n                if default:\n                    # turn all models to non-default\n                    sess.query(RerankingTable).update({\"default\": False})\n                    sess.commit()\n\n                item = RerankingTable(name=name, spec=spec, default=default)\n                sess.add(item)\n                sess.commit()\n        except Exception as e:\n            raise ValueError(f\"Failed to add model {name}: {e}\")\n\n        self.load()\n\n    def delete(self, name: str):\n        \"\"\"Delete a model from the pool\"\"\"\n        try:\n            with Session(engine) as sess:\n                item = sess.query(RerankingTable).filter_by(name=name).first()\n                sess.delete(item)\n                sess.commit()\n        except Exception as e:\n            raise ValueError(f\"Failed to delete model {name}: {e}\")\n\n        self.load()\n\n    def update(self, name: str, spec: dict, default: bool, new_name: str = \"\"):\n        \"\"\"Update a model in the pool, optionally renaming it.\"\"\"\n        if not name:\n            raise ValueError(\"Name must not be empty\")\n\n        if new_name and new_name != name:\n            if new_name in self._info:\n                raise ValueError(\n                    f\"Model '{new_name}' already exists. Use a unique name.\"\n                )\n            self.delete(name)\n            self.add(new_name, spec=spec, default=default)\n            return\n\n        try:\n            with Session(engine) as sess:\n\n                if default:\n                    # turn all models to non-default\n                    sess.query(RerankingTable).update({\"default\": False})\n                    sess.commit()\n\n                item = sess.query(RerankingTable).filter_by(name=name).first()\n                if not item:\n                    raise ValueError(f\"Model {name} not found\")\n                item.spec = spec\n                item.default = default\n                sess.commit()\n        except Exception as e:\n            raise ValueError(f\"Failed to update model {name}: {e}\")\n\n        self.load()\n\n    def vendors(self) -> dict:\n        \"\"\"Return list of vendors\"\"\"\n        return {vendor.__qualname__: vendor for vendor in self._vendors}\n\n\nreranking_models_manager = RerankingManager()\n"
  },
  {
    "path": "libs/ktem/ktem/rerankings/ui.py",
    "content": "from copy import deepcopy\n\nimport gradio as gr\nimport pandas as pd\nimport yaml\nfrom ktem.app import BasePage\nfrom ktem.utils.file import YAMLNoDateSafeLoader\nfrom theflow.utils.modules import deserialize\n\nfrom kotaemon.base import Document\n\nfrom .manager import reranking_models_manager\n\n\ndef format_description(cls):\n    params = cls.describe()[\"params\"]\n    params_lines = [\"| Name | Type | Description |\", \"| --- | --- | --- |\"]\n    for key, value in params.items():\n        if isinstance(value[\"auto_callback\"], str):\n            continue\n        params_lines.append(f\"| {key} | {value['type']} | {value['help']} |\")\n    return f\"{cls.__doc__}\\n\\n\" + \"\\n\".join(params_lines)\n\n\nclass RerankingManagement(BasePage):\n    def __init__(self, app):\n        self._app = app\n        self.spec_desc_default = (\n            \"# Spec description\\n\\nSelect a model to view the spec description.\"\n        )\n        self.on_building_ui()\n\n    def on_building_ui(self):\n        with gr.Tab(label=\"View\"):\n            self.rerank_list = gr.DataFrame(\n                headers=[\"name\", \"vendor\", \"default\"],\n                interactive=False,\n                column_widths=[30, 40, 30],\n            )\n\n            with gr.Column(visible=False) as self._selected_panel:\n                self.selected_rerank_name = gr.Textbox(value=\"\", visible=False)\n                with gr.Row():\n                    with gr.Column():\n                        self.edit_default = gr.Checkbox(\n                            label=\"Set default\",\n                            info=(\n                                \"Set this Reranking model as default. This default \"\n                                \"Reranking will be used by other components by default \"\n                                \"if no Reranking is specified for such components.\"\n                            ),\n                        )\n                        self.edit_name = gr.Textbox(\n                            label=\"Name\",\n                            info=\"Edit to rename this Reranking model.\",\n                        )\n                        self.edit_spec = gr.Textbox(\n                            label=\"Specification\",\n                            info=\"Specification of the Embedding model in YAML format\",\n                            lines=10,\n                        )\n\n                        with gr.Accordion(\n                            label=\"Test connection\", visible=False, open=False\n                        ) as self._check_connection_panel:\n                            with gr.Row():\n                                with gr.Column(scale=4):\n                                    self.connection_logs = gr.HTML(\n                                        \"Logs\",\n                                    )\n\n                                with gr.Column(scale=1):\n                                    self.btn_test_connection = gr.Button(\"Test\")\n\n                        with gr.Row(visible=False) as self._selected_panel_btn:\n                            with gr.Column():\n                                self.btn_edit_save = gr.Button(\n                                    \"Save\", min_width=10, variant=\"primary\"\n                                )\n                            with gr.Column():\n                                self.btn_delete = gr.Button(\n                                    \"Delete\", min_width=10, variant=\"stop\"\n                                )\n                                with gr.Row():\n                                    self.btn_delete_yes = gr.Button(\n                                        \"Confirm Delete\",\n                                        variant=\"stop\",\n                                        visible=False,\n                                        min_width=10,\n                                    )\n                                    self.btn_delete_no = gr.Button(\n                                        \"Cancel\", visible=False, min_width=10\n                                    )\n                            with gr.Column():\n                                self.btn_close = gr.Button(\"Close\", min_width=10)\n\n                    with gr.Column():\n                        self.edit_spec_desc = gr.Markdown(\"# Spec description\")\n\n        with gr.Tab(label=\"Add\"):\n            with gr.Row():\n                with gr.Column(scale=2):\n                    self.name = gr.Textbox(\n                        label=\"Name\",\n                        info=(\n                            \"Must be unique and non-empty. \"\n                            \"The name will be used to identify the reranking model.\"\n                        ),\n                    )\n                    self.rerank_choices = gr.Dropdown(\n                        label=\"Vendors\",\n                        info=(\n                            \"Choose the vendor of the Reranking model. Each vendor \"\n                            \"has different specification.\"\n                        ),\n                    )\n                    self.spec = gr.Textbox(\n                        label=\"Specification\",\n                        info=\"Specification of the Embedding model in YAML format.\",\n                    )\n                    self.default = gr.Checkbox(\n                        label=\"Set default\",\n                        info=(\n                            \"Set this Reranking model as default. This default \"\n                            \"Reranking will be used by other components by default \"\n                            \"if no Reranking is specified for such components.\"\n                        ),\n                    )\n                    self.btn_new = gr.Button(\"Add\", variant=\"primary\")\n\n                with gr.Column(scale=3):\n                    self.spec_desc = gr.Markdown(self.spec_desc_default)\n\n    def _on_app_created(self):\n        \"\"\"Called when the app is created\"\"\"\n        self._app.app.load(\n            self.list_rerankings,\n            inputs=[],\n            outputs=[self.rerank_list],\n        )\n        self._app.app.load(\n            lambda: gr.update(choices=list(reranking_models_manager.vendors().keys())),\n            outputs=[self.rerank_choices],\n        )\n\n    def on_rerank_vendor_change(self, vendor):\n        vendor = reranking_models_manager.vendors()[vendor]\n\n        required: dict = {}\n        desc = vendor.describe()\n        for key, value in desc[\"params\"].items():\n            if value.get(\"required\", False):\n                required[key] = value.get(\"default\", None)\n\n            return yaml.dump(required), format_description(vendor)\n\n    def on_register_events(self):\n        self.rerank_choices.select(\n            self.on_rerank_vendor_change,\n            inputs=[self.rerank_choices],\n            outputs=[self.spec, self.spec_desc],\n        )\n        self.btn_new.click(\n            self.create_rerank,\n            inputs=[self.name, self.rerank_choices, self.spec, self.default],\n            outputs=None,\n        ).success(self.list_rerankings, inputs=[], outputs=[self.rerank_list]).success(\n            lambda: (\"\", None, \"\", False, self.spec_desc_default),\n            outputs=[\n                self.name,\n                self.rerank_choices,\n                self.spec,\n                self.default,\n                self.spec_desc,\n            ],\n        )\n        self.rerank_list.select(\n            self.select_rerank,\n            inputs=self.rerank_list,\n            outputs=[self.selected_rerank_name],\n            show_progress=\"hidden\",\n        )\n        self.selected_rerank_name.change(\n            self.on_selected_rerank_change,\n            inputs=[self.selected_rerank_name],\n            outputs=[\n                self._selected_panel,\n                self._selected_panel_btn,\n                # delete section\n                self.btn_delete,\n                self.btn_delete_yes,\n                self.btn_delete_no,\n                # edit section\n                self.edit_name,\n                self.edit_spec,\n                self.edit_spec_desc,\n                self.edit_default,\n            ],\n            show_progress=\"hidden\",\n        ).success(lambda: gr.update(value=\"\"), outputs=[self.connection_logs])\n\n        self.btn_delete.click(\n            self.on_btn_delete_click,\n            inputs=[],\n            outputs=[self.btn_delete, self.btn_delete_yes, self.btn_delete_no],\n            show_progress=\"hidden\",\n        )\n        self.btn_delete_yes.click(\n            self.delete_rerank,\n            inputs=[self.selected_rerank_name],\n            outputs=[self.selected_rerank_name],\n            show_progress=\"hidden\",\n        ).then(\n            self.list_rerankings,\n            inputs=[],\n            outputs=[self.rerank_list],\n        )\n        self.btn_delete_no.click(\n            lambda: (\n                gr.update(visible=True),\n                gr.update(visible=False),\n                gr.update(visible=False),\n            ),\n            inputs=[],\n            outputs=[self.btn_delete, self.btn_delete_yes, self.btn_delete_no],\n            show_progress=\"hidden\",\n        )\n        self.btn_edit_save.click(\n            self.save_rerank,\n            inputs=[\n                self.selected_rerank_name,\n                self.edit_name,\n                self.edit_default,\n                self.edit_spec,\n            ],\n            outputs=[self.selected_rerank_name],\n            show_progress=\"hidden\",\n        ).then(\n            self.list_rerankings,\n            inputs=[],\n            outputs=[self.rerank_list],\n        )\n        self.btn_close.click(lambda: \"\", outputs=[self.selected_rerank_name])\n\n        self.btn_test_connection.click(\n            self.check_connection,\n            inputs=[self.selected_rerank_name, self.edit_spec],\n            outputs=[self.connection_logs],\n        )\n\n    def create_rerank(self, name, choices, spec, default):\n        try:\n            name = name.strip()\n            spec = yaml.load(spec, Loader=YAMLNoDateSafeLoader)\n            spec[\"__type__\"] = (\n                reranking_models_manager.vendors()[choices].__module__\n                + \".\"\n                + reranking_models_manager.vendors()[choices].__qualname__\n            )\n\n            reranking_models_manager.add(name, spec=spec, default=default)\n            gr.Info(f'Reranking model \"{name}\" created successfully')\n        except ValueError as e:\n            raise gr.Error(str(e))\n        except Exception as e:\n            raise gr.Error(f\"Failed to create Reranking model '{name}': {e}\")\n\n    def list_rerankings(self):\n        \"\"\"List the Reranking models\"\"\"\n        items = []\n        for item in reranking_models_manager.info().values():\n            record = {}\n            record[\"name\"] = item[\"name\"]\n            record[\"vendor\"] = item[\"spec\"].get(\"__type__\", \"-\").split(\".\")[-1]\n            record[\"default\"] = item[\"default\"]\n            items.append(record)\n\n        if items:\n            rerank_list = pd.DataFrame.from_records(items)\n        else:\n            rerank_list = pd.DataFrame.from_records(\n                [{\"name\": \"-\", \"vendor\": \"-\", \"default\": \"-\"}]\n            )\n\n        return rerank_list\n\n    def select_rerank(self, rerank_list, ev: gr.SelectData):\n        if ev.value == \"-\" and ev.index[0] == 0:\n            gr.Info(\"No reranking model is loaded. Please add first\")\n            return \"\"\n\n        if not ev.selected:\n            return \"\"\n\n        return rerank_list[\"name\"][ev.index[0]]\n\n    def on_selected_rerank_change(self, selected_rerank_name):\n        if selected_rerank_name == \"\":\n            _selected_panel = gr.update(visible=False)\n            _selected_panel_btn = gr.update(visible=False)\n            btn_delete = gr.update(visible=True)\n            btn_delete_yes = gr.update(visible=False)\n            btn_delete_no = gr.update(visible=False)\n            edit_name = gr.update(value=\"\")\n            edit_spec = gr.update(value=\"\")\n            edit_spec_desc = gr.update(value=\"\")\n            edit_default = gr.update(value=False)\n        else:\n            _selected_panel = gr.update(visible=True)\n            _selected_panel_btn = gr.update(visible=True)\n            btn_delete = gr.update(visible=True)\n            btn_delete_yes = gr.update(visible=False)\n            btn_delete_no = gr.update(visible=False)\n\n            info = deepcopy(reranking_models_manager.info()[selected_rerank_name])\n            vendor_str = info[\"spec\"].pop(\"__type__\", \"-\").split(\".\")[-1]\n            vendor = reranking_models_manager.vendors()[vendor_str]\n\n            edit_name = selected_rerank_name\n            edit_spec = yaml.dump(info[\"spec\"])\n            edit_spec_desc = format_description(vendor)\n            edit_default = info[\"default\"]\n\n        return (\n            _selected_panel,\n            _selected_panel_btn,\n            btn_delete,\n            btn_delete_yes,\n            btn_delete_no,\n            edit_name,\n            edit_spec,\n            edit_spec_desc,\n            edit_default,\n        )\n\n    def on_btn_delete_click(self):\n        btn_delete = gr.update(visible=False)\n        btn_delete_yes = gr.update(visible=True)\n        btn_delete_no = gr.update(visible=True)\n\n        return btn_delete, btn_delete_yes, btn_delete_no\n\n    def check_connection(self, selected_rerank_name, selected_spec):\n        log_content: str = \"\"\n        try:\n            log_content += f\"- Testing model: {selected_rerank_name}<br>\"\n            yield log_content\n\n            # Parse content & init model\n            info = deepcopy(reranking_models_manager.info()[selected_rerank_name])\n\n            # Parse content & create dummy response\n            spec = yaml.load(selected_spec, Loader=YAMLNoDateSafeLoader)\n            info[\"spec\"].update(spec)\n\n            rerank = deserialize(info[\"spec\"], safe=False)\n\n            if rerank is None:\n                raise Exception(f\"Can not found model: {selected_rerank_name}\")\n\n            log_content += \"- Sending a message ([`Hello`], `Hi`)<br>\"\n            yield log_content\n            _ = rerank([Document(content=\"Hello\")], \"Hi\")\n\n            log_content += (\n                \"<mark style='background: green; color: white'>- Connection success. \"\n                \"</mark><br>\"\n            )\n            yield log_content\n\n            gr.Info(f\"Embedding {selected_rerank_name} connect successfully\")\n        except Exception as e:\n            print(e)\n            log_content += (\n                f\"<mark style='color: yellow; background: red'>- Connection failed. \"\n                f\"Got error:\\n {str(e)}</mark>\"\n            )\n            yield log_content\n\n        return log_content\n\n    def save_rerank(self, selected_rerank_name, edit_name, default, spec):\n        try:\n            new_name = edit_name.strip()\n            spec = yaml.load(spec, Loader=YAMLNoDateSafeLoader)\n            spec[\"__type__\"] = reranking_models_manager.info()[selected_rerank_name][\n                \"spec\"\n            ][\"__type__\"]\n            reranking_models_manager.update(\n                selected_rerank_name, spec=spec, default=default, new_name=new_name\n            )\n            final_name = (\n                new_name if new_name != selected_rerank_name else selected_rerank_name\n            )\n            gr.Info(f'Reranking model \"{final_name}\" saved successfully')\n            return final_name\n        except ValueError as e:\n            raise gr.Error(str(e))\n        except Exception as e:\n            raise gr.Error(\n                f'Failed to save Reranking model \"{selected_rerank_name}\": {e}'\n            )\n\n    def delete_rerank(self, selected_rerank_name):\n        try:\n            reranking_models_manager.delete(selected_rerank_name)\n        except Exception as e:\n            gr.Error(f'Failed to delete Reranking model \"{selected_rerank_name}\": {e}')\n            return selected_rerank_name\n\n        return \"\"\n"
  },
  {
    "path": "libs/ktem/ktem/settings.py",
    "content": "from typing import Any\n\nfrom pydantic import BaseModel, Field\n\n\nclass SettingItem(BaseModel):\n    \"\"\"Represent a setting item\n\n    Args:\n        name: the name of the setting item\n        value: the default value of the setting item\n        choices: the list of choices of the setting item, if any\n        metadata: the metadata of the setting item\n        component: the expected UI component to render the setting\n    \"\"\"\n\n    name: str\n    value: Any\n    choices: list = Field(default_factory=list)\n    metadata: dict = Field(default_factory=dict)\n    component: str = \"text\"\n    special_type: str = \"\"\n\n\nclass BaseSettingGroup(BaseModel):\n    settings: dict[str, \"SettingItem\"] = Field(default_factory=dict)\n    options: dict[str, \"BaseSettingGroup\"] = Field(default_factory=dict)\n\n    def _get_options(self) -> dict:\n        return {}\n\n    def finalize(self):\n        \"\"\"Finalize the setting group\"\"\"\n\n    def flatten(self) -> dict:\n        \"\"\"Render the setting group into value\"\"\"\n        output = {}\n        for key, value in self.settings.items():\n            output[key] = value.value\n\n        output.update({f\"options.{k}\": v for k, v in self._get_options().items()})\n\n        return output\n\n    def get_setting_item(self, path: str) -> SettingItem:\n        \"\"\"Get the item based on dot notation\"\"\"\n        path = path.strip(\".\")\n        if \".\" not in path:\n            return self.settings[path]\n\n        key, sub_path = path.split(\".\", 1)\n        if key != \"options\":\n            raise ValueError(f\"Invalid key {path}. Should starts with `options.*`\")\n\n        option_id, sub_path = sub_path.split(\".\", 1)\n        option = self.options[option_id]\n        return option.get_setting_item(sub_path)\n\n    def __bool__(self):\n        return bool(self.settings) or bool(self.options)\n\n\nclass SettingReasoningGroup(BaseSettingGroup):\n    def _get_options(self) -> dict:\n        output = {}\n        for ex_name, ex_setting in self.options.items():\n            for key, value in ex_setting.flatten().items():\n                output[f\"{ex_name}.{key}\"] = value\n\n        return output\n\n    def finalize(self):\n        \"\"\"Finalize the setting\"\"\"\n        options = list(self.options.keys())\n        if options:\n            self.settings[\"use\"].choices = [(x, x) for x in options]\n            self.settings[\"use\"].value = options[0]\n\n\nclass SettingIndexOption(BaseSettingGroup):\n    \"\"\"Temporarily keep it here to see if we need this setting template\n    for the index component\n    \"\"\"\n\n    indexing: BaseSettingGroup\n    retrieval: BaseSettingGroup\n\n    def flatten(self) -> dict:\n        \"\"\"Render the setting group into value\"\"\"\n        output = {}\n        for key, value in self.indexing.flatten():\n            output[f\"indexing.{key}\"] = value\n\n        for key, value in self.retrieval.flatten():\n            output[f\"retrieval.{key}\"] = value\n\n        return output\n\n    def get_setting_item(self, path: str) -> SettingItem:\n        \"\"\"Get the item based on dot notation\"\"\"\n        path = path.strip(\".\")\n\n        key, sub_path = path.split(\".\", 1)\n        if key not in [\"indexing\", \"retrieval\"]:\n            raise ValueError(\n                f\"Invalid key {path}. Should starts with `indexing.*` or `retrieval.*`\"\n            )\n\n        value = getattr(self, key)\n        return value.get_setting_item(sub_path)\n\n\nclass SettingIndexGroup(BaseSettingGroup):\n    def _get_options(self) -> dict:\n        output = {}\n        for name, setting in self.options.items():\n            for key, value in setting.flatten().items():\n                output[f\"{name}.{key}\"] = value\n\n        return output\n\n\nclass SettingGroup(BaseModel):\n    application: BaseSettingGroup = Field(default_factory=BaseSettingGroup)\n    index: SettingIndexGroup = Field(default_factory=SettingIndexGroup)\n    reasoning: SettingReasoningGroup = Field(default_factory=SettingReasoningGroup)\n\n    def flatten(self) -> dict:\n        \"\"\"Render the setting group into value\"\"\"\n        output = {}\n        for key, value in self.application.flatten().items():\n            output[f\"application.{key}\"] = value\n\n        for key, value in self.index.flatten().items():\n            output[f\"index.{key}\"] = value\n\n        for key, value in self.reasoning.flatten().items():\n            output[f\"reasoning.{key}\"] = value\n\n        return output\n\n    def get_setting_item(self, path: str) -> SettingItem:\n        \"\"\"Get the item based on dot notation\"\"\"\n        path = path.strip(\".\")\n\n        key, sub_path = path.split(\".\", 1)\n        if key not in [\"application\", \"index\", \"reasoning\"]:\n            raise ValueError(\n                f\"Invalid key {path}. Should starts with `indexing.*` or `retrieval.*`\"\n            )\n\n        value = getattr(self, key)\n        return value.get_setting_item(sub_path)\n"
  },
  {
    "path": "libs/ktem/ktem/utils/__init__.py",
    "content": "from .conversation import get_file_names_regex, get_urls\nfrom .lang import SUPPORTED_LANGUAGE_MAP\n\n__all__ = [\"SUPPORTED_LANGUAGE_MAP\", \"get_file_names_regex\", \"get_urls\"]\n"
  },
  {
    "path": "libs/ktem/ktem/utils/commands.py",
    "content": "WEB_SEARCH_COMMAND = \"web\"\n"
  },
  {
    "path": "libs/ktem/ktem/utils/conversation.py",
    "content": "import re\r\n\r\n\r\ndef sync_retrieval_n_message(\r\n    messages: list[list[str]],\r\n    retrievals: list[str],\r\n) -> list[str]:\r\n    \"\"\"Ensure len of  messages history and retrieval history are equal\r\n    Empty string/Truncate will be used in case any difference exist\r\n    \"\"\"\r\n    n_message = len(messages)  # include previous history\r\n    n_retrieval = min(n_message, len(retrievals))\r\n\r\n    diff = n_message - n_retrieval\r\n    retrievals = retrievals[:n_retrieval] + [\"\" for _ in range(diff)]\r\n\r\n    assert len(retrievals) == n_message\r\n\r\n    return retrievals\r\n\r\n\r\ndef get_file_names_regex(input_str: str) -> tuple[list[str], str]:\r\n    # get all file names with pattern @\"filename\" in input_str\r\n    # also remove these file names from input_str\r\n    pattern = r'@\"([^\"]*)\"'\r\n    matches = re.findall(pattern, input_str)\r\n    input_str = re.sub(pattern, \"\", input_str).strip()\r\n\r\n    return matches, input_str\r\n\r\n\r\ndef get_urls(input_str: str) -> tuple[list[str], str]:\r\n    # get all urls in input_str\r\n    # also remove these urls from input_str\r\n    pattern = r\"https?://[^\\s]+\"\r\n    matches = re.findall(pattern, input_str)\r\n    input_str = re.sub(pattern, \"\", input_str).strip()\r\n\r\n    return matches, input_str\r\n\r\n\r\nif __name__ == \"__main__\":\r\n    print(sync_retrieval_n_message([[\"\"], [\"\"], [\"\"]], []))\r\n"
  },
  {
    "path": "libs/ktem/ktem/utils/file.py",
    "content": "import yaml\n\n\nclass YAMLNoDateSafeLoader(yaml.SafeLoader):\n    \"\"\"Load datetime as strings, not dates\"\"\"\n\n    @classmethod\n    def remove_implicit_resolver(cls, tag_to_remove):\n        \"\"\"Remove implicit resolvers for a particular tag\n\n        Args:\n            tag_to_remove (str): YAML tag to remove\n        \"\"\"\n        if \"yaml_implicit_resolvers\" not in cls.__dict__:\n            cls.yaml_implicit_resolvers = cls.yaml_implicit_resolvers.copy()\n\n        for first_letter, mappings in cls.yaml_implicit_resolvers.items():\n            cls.yaml_implicit_resolvers[first_letter] = [\n                (tag, regexp) for tag, regexp in mappings if tag != tag_to_remove\n            ]\n\n\nYAMLNoDateSafeLoader.remove_implicit_resolver(\"tag:yaml.org,2002:timestamp\")\n"
  },
  {
    "path": "libs/ktem/ktem/utils/generator.py",
    "content": "class Generator:\n    \"\"\"A generator that stores return value from another generator\"\"\"\n\n    def __init__(self, gen):\n        self.gen = gen\n\n    def __iter__(self):\n        self.value = yield from self.gen\n        return self.value\n"
  },
  {
    "path": "libs/ktem/ktem/utils/hf_papers.py",
    "content": "from datetime import datetime, timedelta\n\nimport requests\nfrom cachetools import TTLCache, cached\n\nHF_API_URL = \"https://huggingface.co/api/daily_papers\"\nARXIV_URL = \"https://arxiv.org/abs/{paper_id}\"\nSEMANTIC_SCHOLAR_QUERY_URL = \"https://api.semanticscholar.org/graph/v1/paper/search/match?query={paper_name}\"  # noqa\nSEMANTIC_SCHOLAR_RECOMMEND_URL = (\n    \"https://api.semanticscholar.org/recommendations/v1/papers/\"  # noqa\n)\nCACHE_TIME = 60 * 60 * 6  # 6 hours\n\n\n# Function to parse the date string\ndef parse_date(date_str):\n    return datetime.strptime(date_str, \"%Y-%m-%dT%H:%M:%S.%fZ\")\n\n\n@cached(cache=TTLCache(maxsize=500, ttl=CACHE_TIME))\ndef get_recommendations_from_semantic_scholar(semantic_scholar_id: str):\n    try:\n        r = requests.post(\n            SEMANTIC_SCHOLAR_RECOMMEND_URL,\n            json={\n                \"positivePaperIds\": [semantic_scholar_id],\n            },\n            params={\"fields\": \"externalIds,title,year\", \"limit\": 14},  # type: ignore\n        )\n        return r.json()[\"recommendedPapers\"]\n    except KeyError as e:\n        print(e)\n        return []\n\n\ndef filter_recommendations(recommendations, max_paper_count=5):\n    # include only arxiv papers\n    arxiv_paper = [\n        r for r in recommendations if r[\"externalIds\"].get(\"ArXiv\", None) is not None\n    ]\n    if len(arxiv_paper) > max_paper_count:\n        arxiv_paper = arxiv_paper[:max_paper_count]\n    return arxiv_paper\n\n\ndef format_recommendation_into_markdown(recommendations):\n    comment = \"(recommended by the Semantic Scholar API)\\n\\n\"\n    for r in recommendations:\n        hub_paper_url = f\"https://arxiv.org/abs/{r['externalIds']['ArXiv']}\"\n        comment += f\"* [{r['title']}]({hub_paper_url}) ({r['year']})\\n\"\n\n    return comment\n\n\ndef get_paper_id_from_name(paper_name):\n    try:\n        response = requests.get(\n            SEMANTIC_SCHOLAR_QUERY_URL.format(paper_name=paper_name)\n        )\n        response.raise_for_status()\n        items = response.json()\n        paper_id = items.get(\"data\", [])[0].get(\"paperId\")\n    except Exception as e:\n        print(e)\n        return None\n\n    return paper_id\n\n\ndef get_recommended_papers(paper_name):\n    paper_id = get_paper_id_from_name(paper_name)\n    recommended_content = \"\"\n    if paper_id is None:\n        return recommended_content\n\n    recommended_papers = get_recommendations_from_semantic_scholar(paper_id)\n    filtered_recommendations = filter_recommendations(recommended_papers)\n\n    recommended_content = format_recommendation_into_markdown(filtered_recommendations)\n    return recommended_content\n\n\ndef fetch_papers(top_n=5):\n    try:\n        response = requests.get(f\"{HF_API_URL}?limit=100\")\n        response.raise_for_status()\n        items = response.json()\n\n        # Calculate the date 3 days ago from now\n        three_days_ago = datetime.now() - timedelta(days=3)\n\n        # Filter items from the last 3 days\n        recent_items = [\n            item\n            for item in items\n            if parse_date(item.get(\"publishedAt\")) >= three_days_ago\n        ]\n\n        recent_items.sort(\n            key=lambda x: x.get(\"paper\", {}).get(\"upvotes\", 0), reverse=True\n        )\n        output_items = [\n            {\n                \"title\": item.get(\"paper\", {}).get(\"title\"),\n                \"url\": ARXIV_URL.format(paper_id=item.get(\"paper\", {}).get(\"id\")),\n                \"upvotes\": item.get(\"paper\", {}).get(\"upvotes\"),\n            }\n            for item in recent_items[:top_n]\n        ]\n    except Exception as e:\n        print(e)\n        return []\n\n    return output_items\n"
  },
  {
    "path": "libs/ktem/ktem/utils/lang.py",
    "content": "SUPPORTED_LANGUAGE_MAP = {\n    \"en\": \"English\",\n    \"ja\": \"Japanese\",\n    \"vi\": \"Vietnamese\",\n    \"es\": \"Spanish\",\n    \"fr\": \"French\",\n    \"de\": \"German\",\n    \"zh\": \"Chinese\",\n    \"ru\": \"Russian\",\n    \"ar\": \"Arabic\",\n    \"pt\": \"Portuguese\",\n    \"hi\": \"Hindi\",\n    \"bn\": \"Bengali\",\n    \"pa\": \"Punjabi\",\n    \"ko\": \"Korean\",\n    \"it\": \"Italian\",\n    \"nl\": \"Dutch\",\n    \"tr\": \"Turkish\",\n    \"pl\": \"Polish\",\n    \"uk\": \"Ukrainian\",\n    \"ro\": \"Romanian\",\n    \"el\": \"Greek\",\n    \"hu\": \"Hungarian\",\n    \"sv\": \"Swedish\",\n    \"cs\": \"Czech\",\n    \"fi\": \"Finnish\",\n    \"da\": \"Danish\",\n    \"no\": \"Norwegian\",\n    \"he\": \"Hebrew\",\n    \"th\": \"Thai\",\n    \"id\": \"Indonesian\",\n    \"ms\": \"Malay\",\n}\n"
  },
  {
    "path": "libs/ktem/ktem/utils/plantuml.py",
    "content": "#!/usr/bin/env python\n\nfrom __future__ import print_function\n\nimport base64\nimport string\nfrom zlib import compress\n\nimport httplib2\nimport six  # type: ignore\n\nif six.PY2:\n    from string import maketrans\nelse:\n    maketrans = bytes.maketrans\n\n\nplantuml_alphabet = (\n    string.digits + string.ascii_uppercase + string.ascii_lowercase + \"-_\"\n)\nbase64_alphabet = string.ascii_uppercase + string.ascii_lowercase + string.digits + \"+/\"\nb64_to_plantuml = maketrans(\n    base64_alphabet.encode(\"utf-8\"), plantuml_alphabet.encode(\"utf-8\")\n)\n\n\nclass PlantUMLError(Exception):\n    \"\"\"\n    Error in processing.\n    \"\"\"\n\n\nclass PlantUMLConnectionError(PlantUMLError):\n    \"\"\"\n    Error connecting or talking to PlantUML Server.\n    \"\"\"\n\n\nclass PlantUMLHTTPError(PlantUMLConnectionError):\n    \"\"\"\n    Request to PlantUML server returned HTTP Error.\n    \"\"\"\n\n    def __init__(self, response, content, *args, **kwdargs):\n        self.response = response\n        self.content = content\n        message = \"%d: %s\" % (self.response.status, self.response.reason)\n        if not getattr(self, \"message\", None):\n            self.message = message\n        super(PlantUMLHTTPError, self).__init__(message, *args, **kwdargs)\n\n\ndef deflate_and_encode(plantuml_text):\n    \"\"\"zlib compress the plantuml text and encode it for the plantuml server.\"\"\"\n    zlibbed_str = compress(plantuml_text.encode(\"utf-8\"))\n    compressed_string = zlibbed_str[2:-4]\n    return (\n        base64.b64encode(compressed_string).translate(b64_to_plantuml).decode(\"utf-8\")\n    )\n\n\nclass PlantUML(object):\n    \"\"\"Connection to a PlantUML server with optional authentication.\n\n    All parameters are optional.\n\n    :param str url: URL to the PlantUML server image CGI. defaults to\n                    http://www.plantuml.com/plantuml/svg/\n    :param dict request_opts: Extra options to be passed off to the\n                    httplib2.Http().request() call.\n    \"\"\"\n\n    def __init__(self, url=\"http://www.plantuml.com/plantuml/svg/\", request_opts={}):\n        self.HttpLib2Error = httplib2.HttpLib2Error\n        self.http = httplib2.Http()\n\n        self.url = url\n        self.request_opts = request_opts\n\n    def get_url(self, plantuml_text):\n        \"\"\"Return the server URL for the image.\n        You can use this URL in an IMG HTML tag.\n\n        :param str plantuml_text: The plantuml markup to render\n        :returns: the plantuml server image URL\n        \"\"\"\n        return self.url + deflate_and_encode(plantuml_text)\n\n    def process(self, plantuml_text):\n        \"\"\"Processes the plantuml text into the raw PNG image data.\n\n        :param str plantuml_text: The plantuml markup to render\n        :returns: the raw image data\n        \"\"\"\n        url = self.get_url(plantuml_text)\n        try:\n            response, content = self.http.request(url, **self.request_opts)\n        except self.HttpLib2Error as e:\n            raise PlantUMLConnectionError(e)\n        if response.status != 200:\n            raise PlantUMLHTTPError(response, content)\n\n        svg_content = content.decode(\"utf-8\")\n        svg_content = svg_content.replace(\"<svg \", \"<svg id='mindmap' \")\n\n        # wrap in fixed height div\n        svg_content = (\n            \"<div id='mindmap-wrapper' \"\n            \"style='height: 400px; overflow: hidden;'>\"\n            f\"{svg_content}</div>\"\n        )\n\n        return svg_content\n"
  },
  {
    "path": "libs/ktem/ktem/utils/rate_limit.py",
    "content": "from collections import defaultdict\nfrom datetime import datetime, timedelta\n\nimport gradio as gr\nfrom decouple import config\n\n# In-memory store for rate limiting (for demonstration purposes)\nrate_limit_store: dict[str, dict] = defaultdict(dict)\n\n# Rate limit configuration\nRATE_LIMIT = config(\"RATE_LIMIT\", default=20, cast=int)\nRATE_LIMIT_PERIOD = timedelta(hours=24)\n\n\ndef check_rate_limit(limit_type: str, request: gr.Request):\n    if request is None:\n        raise ValueError(\"This feature is not available\")\n\n    user_id = None\n    try:\n        import gradiologin as grlogin\n\n        user = grlogin.get_user(request)\n        if user:\n            user_id = user.get(\"email\")\n    except (ImportError, AssertionError):\n        pass\n\n    if not user_id:\n        raise ValueError(\"Please sign-in to use this feature\")\n\n    now = datetime.now()\n    user_data = rate_limit_store[limit_type].get(\n        user_id, {\"count\": 0, \"reset_time\": now + RATE_LIMIT_PERIOD}\n    )\n\n    if now >= user_data[\"reset_time\"]:\n        # Reset the rate limit for the user\n        user_data = {\"count\": 0, \"reset_time\": now + RATE_LIMIT_PERIOD}\n\n    if user_data[\"count\"] >= RATE_LIMIT:\n        raise ValueError(\"Rate limit exceeded. Please try again later.\")\n\n    # Increment the request count\n    user_data[\"count\"] += 1\n    rate_limit_store[limit_type][user_id] = user_data\n\n    return user_id\n"
  },
  {
    "path": "libs/ktem/ktem/utils/render.py",
    "content": "import os\n\nimport markdown\nfrom fast_langdetect import detect\n\nfrom kotaemon.base import RetrievedDocument\n\nBASE_PATH = os.environ.get(\"GR_FILE_ROOT_PATH\", \"\")\n\n\ndef is_close(val1, val2, tolerance=1e-9):\n    return abs(val1 - val2) <= tolerance\n\n\ndef replace_mardown_header(text: str) -> str:\n    textlines = text.splitlines()\n    newlines = []\n    for line in textlines:\n        if line.startswith(\"#\"):\n            line = \"<strong>\" + line.replace(\"#\", \"\") + \"</strong>\"\n        if line.startswith(\"==\"):\n            line = \"\"\n        newlines.append(line)\n\n    return \"\\n\".join(newlines)\n\n\ndef get_header(doc: RetrievedDocument) -> str:\n    \"\"\"Get the header for the document\"\"\"\n    header = \"\"\n    if \"page_label\" in doc.metadata:\n        header += f\" [Page {doc.metadata['page_label']}]\"\n\n    header += f\" {doc.metadata.get('file_name', '<evidence>')}\"\n    return header.strip()\n\n\nclass Render:\n    \"\"\"Default text rendering into HTML for the UI\"\"\"\n\n    @staticmethod\n    def collapsible(header, content, open: bool = False) -> str:\n        \"\"\"Render an HTML friendly collapsible section\"\"\"\n        o = \" open\" if open else \"\"\n        return (\n            f\"<details class='evidence' {o}><summary>\"\n            f\"{header}</summary>{content}\"\n            \"</details><br>\"\n        )\n\n    @staticmethod\n    def table(text: str) -> str:\n        \"\"\"Render table from markdown format into HTML\"\"\"\n        text = replace_mardown_header(text)\n        return markdown.markdown(\n            text,\n            extensions=[\n                \"markdown.extensions.tables\",\n                \"markdown.extensions.fenced_code\",\n            ],\n        )\n\n    @staticmethod\n    def table_preserve_linebreaks(text: str) -> str:\n        \"\"\"Render table from markdown format into HTML\"\"\"\n        return markdown.markdown(\n            text,\n            extensions=[\n                \"markdown.extensions.tables\",\n                \"markdown.extensions.fenced_code\",\n            ],\n        ).replace(\"\\n\", \"<br>\")\n\n    @staticmethod\n    def preview(\n        html_content: str,\n        doc: RetrievedDocument,\n        highlight_text: str | None = None,\n    ) -> str:\n        text = doc.content\n        pdf_path = doc.metadata.get(\"file_path\", \"\")\n\n        if not os.path.isfile(pdf_path):\n            print(f\"pdf-path: {pdf_path} does not exist\")\n            return html_content\n\n        is_pdf = doc.metadata.get(\"file_type\", \"\") == \"application/pdf\"\n        page_idx = int(doc.metadata.get(\"page_label\", 1))\n\n        if not is_pdf:\n            print(\"Document is not pdf\")\n            return html_content\n\n        if page_idx < 0:\n            print(\"Fail to extract page number\")\n            return html_content\n\n        if not highlight_text:\n            phrase = \"false\"\n            try:\n                lang = detect(text.replace(\"\\n\", \" \"))[\"lang\"]\n                if lang not in [\"ja\", \"cn\"]:\n                    highlight_words = [\n                        t[:-1] if t.endswith(\"-\") else t for t in text.split(\"\\n\")\n                    ]\n                    highlight_text = highlight_words[0]\n                    phrase = \"true\"\n\n                highlight_text = (\n                    text.replace(\"\\n\", \"\").replace('\"', \"\").replace(\"'\", \"\")\n                )\n            except Exception as e:\n                print(e)\n                highlight_text = text\n        else:\n            phrase = \"true\"\n\n        return f\"\"\"\n        {html_content}\n        <a href=\"#\" class=\"pdf-link\" data-src=\"{BASE_PATH}/file={pdf_path}\" data-page=\"{page_idx}\" data-search=\"{highlight_text}\" data-phrase=\"{phrase}\">\n            [Preview]\n        </a>\n        \"\"\"  # noqa\n\n    @staticmethod\n    def highlight(text: str, elem_id: str | None = None) -> str:\n        \"\"\"Highlight text\"\"\"\n        id_text = f\" id='mark-{elem_id}'\" if elem_id else \"\"\n        return f\"<mark{id_text}>{text}</mark>\"\n\n    @staticmethod\n    def image(url: str, text: str = \"\") -> str:\n        \"\"\"Render an image\"\"\"\n        img = f'<img src=\"{url}\"><br>'\n        if text:\n            caption = f\"<p>{text}</p>\"\n            return f\"<figure>{img}{caption}</figure><br>\"\n        return img\n\n    @staticmethod\n    def collapsible_with_header(\n        doc: RetrievedDocument,\n        open_collapsible: bool = False,\n    ) -> str:\n        header = f\"<i>{get_header(doc)}</i>\"\n        if doc.metadata.get(\"type\", \"\") == \"image\":\n            doc_content = Render.image(url=doc.metadata[\"image_origin\"], text=doc.text)\n        elif doc.metadata.get(\"type\", \"\") == \"table_raw\":\n            doc_content = Render.table_preserve_linebreaks(doc.text)\n        else:\n            doc_content = Render.table(doc.text)\n\n        return Render.collapsible(\n            header=Render.preview(header, doc),\n            content=doc_content,\n            open=open_collapsible,\n        )\n\n    @staticmethod\n    def collapsible_with_header_score(\n        doc: RetrievedDocument,\n        override_text: str | None = None,\n        highlight_text: str | None = None,\n        open_collapsible: bool = False,\n    ) -> str:\n        \"\"\"Format the retrieval score and the document\"\"\"\n        # score from doc_store (Elasticsearch)\n        if is_close(doc.score, -1.0):\n            vectorstore_score = \"\"\n            text_search_str = \" (full-text search)<br>\"\n        else:\n            vectorstore_score = str(round(doc.score, 2))\n            text_search_str = \"<br>\"\n\n        llm_reranking_score = (\n            round(doc.metadata[\"llm_trulens_score\"], 2)\n            if doc.metadata.get(\"llm_trulens_score\") is not None\n            else 0.0\n        )\n        reranking_score = (\n            round(doc.metadata[\"reranking_score\"], 2)\n            if doc.metadata.get(\"reranking_score\") is not None\n            else 0.0\n        )\n        item_type_prefix = doc.metadata.get(\"type\", \"\")\n        item_type_prefix = item_type_prefix.capitalize()\n        if item_type_prefix:\n            item_type_prefix += \" from \"\n\n        if \"raw\" in item_type_prefix:\n            item_type_prefix = \"\"\n\n        if llm_reranking_score > 0:\n            relevant_score = llm_reranking_score\n        elif reranking_score > 0:\n            relevant_score = reranking_score\n        else:\n            relevant_score = 0.0\n\n        rendered_score = Render.collapsible(\n            header=f\"<b>&emsp;Relevance score</b>: {relevant_score:.1f}\",\n            content=\"<b>&emsp;&emsp;Vectorstore score:</b>\"\n            f\" {vectorstore_score}\"\n            f\"{text_search_str}\"\n            \"<b>&emsp;&emsp;LLM relevant score:</b>\"\n            f\" {llm_reranking_score}<br>\"\n            \"<b>&emsp;&emsp;Reranking score:</b>\"\n            f\" {reranking_score}<br>\",\n        )\n\n        text = doc.text if not override_text else override_text\n        if doc.metadata.get(\"type\", \"\") == \"image\":\n            rendered_doc_content = Render.image(\n                url=doc.metadata[\"image_origin\"],\n                text=text,\n            )\n        elif doc.metadata.get(\"type\", \"\") == \"table_raw\":\n            rendered_doc_content = Render.table_preserve_linebreaks(doc.text)\n        else:\n            rendered_doc_content = Render.table(text)\n\n        rendered_header = Render.preview(\n            f\"<i>{item_type_prefix}{get_header(doc)}</i>\"\n            f\" [score: {llm_reranking_score}]\",\n            doc,\n            highlight_text=highlight_text,\n        )\n        rendered_doc_content = (\n            f\"<div class='evidence-content'>{rendered_doc_content}</div>\"\n        )\n\n        return Render.collapsible(\n            header=rendered_header,\n            content=rendered_score + rendered_doc_content,\n            open=open_collapsible,\n        )\n"
  },
  {
    "path": "libs/ktem/ktem/utils/visualize_cited.py",
    "content": "\"\"\"\nThis module aims to project high-dimensional embeddings\ninto a lower-dimensional space for visualization.\n\nRefs:\n1. [RAGxplorer](https://github.com/gabrielchua/RAGxplorer)\n2. [RAGVizExpander](https://github.com/KKenny0/RAGVizExpander)\n\"\"\"\nfrom typing import List, Tuple\n\nimport numpy as np\nimport pandas as pd\nimport plotly.graph_objs as go\nimport umap\n\nfrom kotaemon.base import BaseComponent\nfrom kotaemon.embeddings import BaseEmbeddings\n\nVISUALIZATION_SETTINGS = {\n    \"Original Query\": {\"color\": \"red\", \"opacity\": 1, \"symbol\": \"cross\", \"size\": 15},\n    \"Retrieved\": {\"color\": \"green\", \"opacity\": 1, \"symbol\": \"circle\", \"size\": 10},\n    \"Chunks\": {\"color\": \"blue\", \"opacity\": 0.4, \"symbol\": \"circle\", \"size\": 10},\n    \"Sub-Questions\": {\"color\": \"purple\", \"opacity\": 1, \"symbol\": \"star\", \"size\": 15},\n}\n\n\nclass CreateCitationVizPipeline(BaseComponent):\n    \"\"\"Creating PlotData for visualizing query results\"\"\"\n\n    embedding: BaseEmbeddings\n    projector: umap.UMAP = None\n\n    def _set_up_umap(self, embeddings: np.ndarray):\n        umap_transform = umap.UMAP().fit(embeddings)\n        return umap_transform\n\n    def _project_embeddings(self, embeddings, umap_transform) -> np.ndarray:\n        umap_embeddings = np.empty((len(embeddings), 2))\n        for i, embedding in enumerate(embeddings):\n            umap_embeddings[i] = umap_transform.transform([embedding])\n        return umap_embeddings\n\n    def _get_projections(self, embeddings, umap_transform):\n        projections = self._project_embeddings(embeddings, umap_transform)\n        x = projections[:, 0]\n        y = projections[:, 1]\n        return x, y\n\n    def _prepare_projection_df(\n        self,\n        document_projections: Tuple[np.ndarray, np.ndarray],\n        document_text: List[str],\n        plot_size: int = 3,\n    ) -> pd.DataFrame:\n        \"\"\"Prepares a DataFrame for visualization from projections and texts.\n\n        Args:\n            document_projections (Tuple[np.ndarray, np.ndarray]):\n                Tuple of X and Y coordinates of document projections.\n            document_text (List[str]): List of document texts.\n        \"\"\"\n        df = pd.DataFrame({\"x\": document_projections[0], \"y\": document_projections[1]})\n        df[\"document\"] = document_text\n        df[\"document_cleaned\"] = df.document.str.wrap(50).apply(\n            lambda x: x.replace(\"\\n\", \"<br>\")[:512] + \"...\"\n        )\n        df[\"size\"] = plot_size\n        df[\"category\"] = \"Retrieved\"\n        return df\n\n    def _plot_embeddings(self, df: pd.DataFrame) -> go.Figure:\n        \"\"\"\n        Creates a Plotly figure to visualize the embeddings.\n\n        Args:\n            df (pd.DataFrame): DataFrame containing the data to visualize.\n\n        Returns:\n            go.Figure: A Plotly figure object for visualization.\n        \"\"\"\n        fig = go.Figure()\n\n        for category in df[\"category\"].unique():\n            category_df = df[df[\"category\"] == category]\n            settings = VISUALIZATION_SETTINGS.get(\n                category,\n                {\"color\": \"grey\", \"opacity\": 1, \"symbol\": \"circle\", \"size\": 10},\n            )\n            fig.add_trace(\n                go.Scatter(\n                    x=category_df[\"x\"],\n                    y=category_df[\"y\"],\n                    mode=\"markers\",\n                    name=category,\n                    marker=dict(\n                        color=settings[\"color\"],\n                        opacity=settings[\"opacity\"],\n                        symbol=settings[\"symbol\"],\n                        size=settings[\"size\"],\n                        line_width=0,\n                    ),\n                    hoverinfo=\"text\",\n                    text=category_df[\"document_cleaned\"],\n                )\n            )\n\n        fig.update_layout(\n            height=500,\n            legend=dict(y=100, x=0.5, xanchor=\"center\", yanchor=\"top\", orientation=\"h\"),\n        )\n        return fig\n\n    def run(self, context: List[str], question: str):\n        embed_contexts = self.embedding(context)\n        context_embeddings = np.array([d.embedding for d in embed_contexts])\n\n        self.projector = self._set_up_umap(embeddings=context_embeddings)\n\n        embed_query = self.embedding(question)\n        query_projection = self._get_projections(\n            embeddings=[embed_query[0].embedding], umap_transform=self.projector\n        )\n        viz_query_df = pd.DataFrame(\n            {\n                \"x\": [query_projection[0][0]],\n                \"y\": [query_projection[1][0]],\n                \"document_cleaned\": question,\n                \"category\": \"Original Query\",\n                \"size\": 5,\n            }\n        )\n\n        context_projections = self._get_projections(\n            embeddings=context_embeddings, umap_transform=self.projector\n        )\n        viz_base_df = self._prepare_projection_df(\n            document_projections=context_projections, document_text=context\n        )\n\n        visualization_df = pd.concat([viz_base_df, viz_query_df], axis=0)\n        fig = self._plot_embeddings(visualization_df)\n        return fig\n"
  },
  {
    "path": "libs/ktem/ktem_tests/__init__.py",
    "content": ""
  },
  {
    "path": "libs/ktem/ktem_tests/resources/embedding_openai.json",
    "content": "{\n  \"object\": \"list\",\n  \"data\": [\n    {\n      \"object\": \"embedding\",\n      \"index\": 0,\n      \"embedding\": [\n        0.006555966101586819,\n        0.003670461941510439,\n        -0.011642491444945335,\n        -0.026776473969221115,\n        -0.012383491732180119,\n        -0.0014341175556182861,\n        -0.013375678099691868,\n        0.009356695227324963,\n        -0.006364436354488134,\n        -0.0294390507042408,\n        0.023950627073645592,\n        0.0029859787318855524,\n        -0.023234745487570763,\n        -0.009205983020365238,\n        0.006744355894625187,\n        0.0011790062999352813,\n        0.02607315219938755,\n        -0.018437083810567856,\n        0.008904559537768364,\n        0.009620440192520618,\n        -0.01306169480085373,\n        -0.0011358336778357625,\n        0.007253008428961039,\n        0.00875384733080864,\n        -0.012710033915936947,\n        0.0037206991109997034,\n        0.005419347435235977,\n        -0.017243949696421623,\n        0.036246202886104584,\n        -0.0266759991645813,\n        0.012647237628698349,\n        -0.008552898652851582,\n        -0.00762350857257843,\n        -0.012546762824058533,\n        0.007083457428961992,\n        -0.014078999869525433,\n        0.0048761568032205105,\n        -0.013689660467207432,\n        0.018211016431450844,\n        -0.014367864467203617,\n        0.008307991549372673,\n        0.006022194866091013,\n        0.005457025486975908,\n        -0.006320478860288858,\n        -0.03609549254179001,\n        0.012986338697373867,\n        0.00017592862423043698,\n        -0.016427593305706978,\n        -0.004734864458441734,\n        0.0257717277854681,\n        0.01944182999432087,\n        -0.00211938563734293,\n        -0.0147195253521204,\n        0.01332544069737196,\n        -0.017670966684818268,\n        0.0013956546317785978,\n        -0.04024006798863411,\n        0.024754423648118973,\n        0.032051388174295425,\n        -0.018713390454649925,\n        0.011265711858868599,\n        0.012370931915938854,\n        -0.013576626777648926,\n        0.0040974789299070835,\n        -0.002153923735022545,\n        0.009915584698319435,\n        0.006637601647526026,\n        0.011353626847267151,\n        -0.019919084385037422,\n        0.030142372474074364,\n        0.01966789737343788,\n        -0.008213796652853489,\n        -0.004213652573525906,\n        -0.009162025526165962,\n        0.012446288019418716,\n        0.008979915641248226,\n        -0.02098662778735161,\n        0.003113141981884837,\n        0.0018838982796296477,\n        -0.0008289152756333351,\n        0.02722861059010029,\n        -0.03355850651860237,\n        -0.004408321809023619,\n        0.017859356477856636,\n        0.019893966615200043,\n        -0.0014137086691334844,\n        -0.0021602034103125334,\n        0.017959831282496452,\n        -0.0114603815600276,\n        -0.020069796591997147,\n        0.007604669313877821,\n        0.001281835837289691,\n        0.01132222916930914,\n        0.006668999791145325,\n        -0.01743233948945999,\n        0.0048761568032205105,\n        0.0016923686489462852,\n        0.02356128767132759,\n        -0.005796127021312714,\n        -0.041470881551504135,\n        -0.01292354241013527,\n        -0.005824385676532984,\n        -0.010700542479753494,\n        -0.017005322501063347,\n        -0.007950050756335258,\n        0.011560855433344841,\n        -0.005485283676534891,\n        -0.0002590360236354172,\n        0.028434304520487785,\n        0.007447678130120039,\n        -0.026198744773864746,\n        0.009306457825005054,\n        -0.00500488979741931,\n        -0.03107176162302494,\n        -0.0007555217016488314,\n        -0.008056805469095707,\n        0.007479076273739338,\n        0.003978165332227945,\n        -0.005984516814351082,\n        -0.02778122015297413,\n        0.015272135846316814,\n        0.029338575899600983,\n        0.010964288376271725,\n        -0.023548727855086327,\n        0.007014381233602762,\n        0.0014137086691334844,\n        -0.03380969539284706,\n        -0.016741575673222542,\n        -0.005230957642197609,\n        -0.004794521257281303,\n        0.0315992534160614,\n        0.0041100382804870605,\n        0.010644025169312954,\n        -0.0014294078573584557,\n        -0.035367049276828766,\n        0.0246916264295578,\n        -0.007096016779541969,\n        0.02096150815486908,\n        -0.017030440270900726,\n        -0.011906237341463566,\n        0.006273381412029266,\n        0.021237812936306,\n        0.01757049188017845,\n        -0.013224965892732143,\n        -0.005092805251479149,\n        0.01288586389273405,\n        0.004486817866563797,\n        -0.016339678317308426,\n        0.008917118422687054,\n        -0.015083746053278446,\n        0.008377067744731903,\n        0.0185626782476902,\n        0.012961219996213913,\n        -0.002535413019359112,\n        0.0050143091939389706,\n        0.016025694087147713,\n        0.004618690814822912,\n        0.0205470509827137,\n        0.008000288158655167,\n        -0.013074253685772419,\n        -0.0007264782907441258,\n        -0.0004474258457776159,\n        0.0033125211484730244,\n        -0.03084569424390793,\n        0.009400652721524239,\n        0.015787066891789436,\n        0.02607315219938755,\n        0.003689300734549761,\n        -0.013940847478806973,\n        -0.02604803442955017,\n        -0.011454101651906967,\n        0.007340923883020878,\n        -0.043178949505090714,\n        0.02660064399242401,\n        -0.01593777909874916,\n        0.00514304218813777,\n        -0.001305384561419487,\n        0.022820288315415382,\n        -0.012697474099695683,\n        -0.017269067466259003,\n        -0.03024284727871418,\n        0.011617372743785381,\n        0.008992474526166916,\n        0.025897322222590446,\n        0.00629850011318922,\n        -0.00014511904737446457,\n        0.009890465997159481,\n        -0.006364436354488134,\n        0.00384629238396883,\n        -0.017030440270900726,\n        0.016666220501065254,\n        0.03333244100213051,\n        0.021011745557188988,\n        0.01844964362680912,\n        -0.6892555952072144,\n        -0.007196491584181786,\n        0.025319593027234077,\n        0.02024562656879425,\n        0.006245122756808996,\n        0.009909304790198803,\n        0.012176262214779854,\n        0.027806337922811508,\n        0.006518288049846888,\n        0.0274546779692173,\n        -0.02107454277575016,\n        0.013011457398533821,\n        -0.015083746053278446,\n        -0.010719381272792816,\n        -0.0002598209830466658,\n        -0.00823263544589281,\n        0.0055763390846550465,\n        -0.012339534237980843,\n        -0.011881118640303612,\n        0.015196779742836952,\n        -0.010141652077436447,\n        0.03129782900214195,\n        -0.022104406729340553,\n        -0.014568813145160675,\n        0.009928143583238125,\n        0.011730406433343887,\n        0.0025102945510298014,\n        -0.009739753790199757,\n        -0.006964143831282854,\n        0.030192609876394272,\n        -0.0021272350568324327,\n        0.0114603815600276,\n        9.394961671205238e-05,\n        0.014154355973005295,\n        0.06611227244138718,\n        0.007610949221998453,\n        -0.005463304929435253,\n        0.008088203147053719,\n        0.009827669709920883,\n        0.028986915946006775,\n        -0.019077610224485397,\n        -0.022267676889896393,\n        0.012559321708977222,\n        -0.01662854291498661,\n        -0.002763050841167569,\n        0.009205983020365238,\n        0.012659796513617039,\n        -0.0024051100481301546,\n        0.0022543983068317175,\n        0.0017849936848506331,\n        0.014568813145160675,\n        -0.004851038102060556,\n        0.010857533663511276,\n        0.019504627212882042,\n        -0.005343991331756115,\n        0.0071588135324418545,\n        0.02151411771774292,\n        0.011962753720581532,\n        -0.002935741562396288,\n        0.004521355964243412,\n        0.0005718416068702936,\n        0.017168592661619186,\n        -0.0016280021518468857,\n        0.0038494321051985025,\n        -0.022443508729338646,\n        0.02381247468292713,\n        -0.021313169971108437,\n        0.01400364376604557,\n        -0.008929678238928318,\n        -0.012050669640302658,\n        -0.0018368008313700557,\n        -0.007994008250534534,\n        -0.018047746270895004,\n        -0.014744644053280354,\n        0.013965966179966927,\n        0.028107762336730957,\n        0.01681693270802498,\n        -0.0033219405449926853,\n        -0.006160347256809473,\n        0.01966789737343788,\n        0.010926609858870506,\n        0.013237525708973408,\n        -0.014066440984606743,\n        -0.01764584705233574,\n        0.02349849045276642,\n        -0.019780931994318962,\n        -0.030067017301917076,\n        -0.016276881098747253,\n        0.010430516675114632,\n        0.011479220353066921,\n        0.030192609876394272,\n        0.0033250804990530014,\n        -0.012370931915938854,\n        -0.01980605162680149,\n        0.02883620373904705,\n        0.0005384809337556362,\n        -0.011548296548426151,\n        0.003821173682808876,\n        0.027303965762257576,\n        0.0004238771216478199,\n        0.014933033846318722,\n        -0.0028038686141371727,\n        0.0003752097545657307,\n        0.00652456795796752,\n        0.010279805399477482,\n        0.006656440440565348,\n        -0.00809448305517435,\n        0.033156611025333405,\n        0.02444044128060341,\n        -0.030142372474074364,\n        0.006317338906228542,\n        0.004094338975846767,\n        -0.04071732237935066,\n        0.02121269516646862,\n        0.008471262641251087,\n        -0.032001152634620667,\n        0.0013022447237744927,\n        0.016013136133551598,\n        0.026374576613307,\n        -0.014694406650960445,\n        0.023159390315413475,\n        0.0020424597896635532,\n        0.013664542697370052,\n        0.0057458896189928055,\n        0.0041539957746863365,\n        0.005303173791617155,\n        0.0003883577883243561,\n        0.004342385567724705,\n        -0.01690484769642353,\n        0.001676669460721314,\n        0.020697763189673424,\n        0.005133622791618109,\n        0.00046273251064121723,\n        -0.005651694722473621,\n        0.009808829985558987,\n        0.00202676048502326,\n        0.009896745905280113,\n        -0.017884474247694016,\n        0.00862825382500887,\n        0.020308423787355423,\n        -0.007994008250534534,\n        0.005127343349158764,\n        -0.0029388812836259604,\n        -0.0006303209811449051,\n        0.011818322353065014,\n        -0.018399406224489212,\n        -0.012025550939142704,\n        0.008678491227328777,\n        0.00016974708705674857,\n        -0.013677101582288742,\n        0.01983116939663887,\n        -0.007918652147054672,\n        -0.03245328739285469,\n        0.021162457764148712,\n        -0.008044245652854443,\n        -0.0002005566784646362,\n        0.0036076651886105537,\n        -0.029187863692641258,\n        -0.03569359332323074,\n        -0.028961796313524246,\n        0.0027096737176179886,\n        0.016364796087145805,\n        -0.0071022966876626015,\n        -0.005174440797418356,\n        -0.01400364376604557,\n        -0.023787355050444603,\n        -0.024566033855080605,\n        0.011648771353065968,\n        0.005579478573054075,\n        -0.029012033715844154,\n        0.010857533663511276,\n        -0.0049420930445194244,\n        -0.025093525648117065,\n        -0.0024961652234196663,\n        -0.0030016780365258455,\n        0.010694262571632862,\n        -0.010681703686714172,\n        -0.015724271535873413,\n        -0.003249724628403783,\n        -0.01731930486857891,\n        0.006876228842884302,\n        -0.009601601399481297,\n        -0.009821389801800251,\n        0.012176262214779854,\n        0.02582196518778801,\n        -0.010185610502958298,\n        -0.0005722341011278331,\n        0.02747979573905468,\n        -0.019793491810560226,\n        -0.0016405613860115409,\n        0.017005322501063347,\n        0.0072027710266411304,\n        -0.02220488153398037,\n        0.0006103045307099819,\n        -0.007912373170256615,\n        -0.004559034015983343,\n        -0.0010808866936713457,\n        0.0038023346569389105,\n        0.01679181307554245,\n        0.01718115247786045,\n        0.018713390454649925,\n        0.004100618418306112,\n        0.017859356477856636,\n        -0.026022914797067642,\n        -0.016013136133551598,\n        -0.028811084106564522,\n        -0.0023705719504505396,\n        -0.030343322083353996,\n        0.003858851734548807,\n        0.013890610076487064,\n        0.011969033628702164,\n        -0.002329754177480936,\n        -0.0014223431935533881,\n        0.01764584705233574,\n        0.016276881098747253,\n        0.02692718617618084,\n        0.0021821821574121714,\n        -0.020923830568790436,\n        -0.02855989895761013,\n        -0.007510474417358637,\n        -0.002681415295228362,\n        0.008326830342411995,\n        -0.01458137296140194,\n        -0.0007727907504886389,\n        0.01720627024769783,\n        0.032604001462459564,\n        0.004417741671204567,\n        0.03554287925362587,\n        0.003953046631067991,\n        -0.0322021022439003,\n        -0.02505584806203842,\n        0.009231101721525192,\n        0.012659796513617039,\n        0.009563923813402653,\n        -0.016842050477862358,\n        -0.005513542331755161,\n        0.0041414364241063595,\n        -0.014405542053282261,\n        0.02916274592280388,\n        0.012163703329861164,\n        -0.008502661250531673,\n        0.010185610502958298,\n        0.028710609301924706,\n        -0.0177839994430542,\n        0.00950112659484148,\n        0.028384067118167877,\n        0.014744644053280354,\n        -0.009444610215723515,\n        0.005055127199739218,\n        0.02803240716457367,\n        -0.0009089809027500451,\n        0.0004933458403684199,\n        -0.014405542053282261,\n        -0.008433585055172443,\n        0.0012111896649003029,\n        -0.031498778611421585,\n        0.0004556679050438106,\n        -0.021388525143265724,\n        0.019605102017521858,\n        0.03569359332323074,\n        0.013689660467207432,\n        0.01161109283566475,\n        -0.0064680506475269794,\n        -0.005783567670732737,\n        0.009205983020365238,\n        -0.013413355685770512,\n        -0.001551076304167509,\n        -0.013488711789250374,\n        0.003160239430144429,\n        -0.005114783998578787,\n        -0.012408610433340073,\n        -0.003158669453114271,\n        0.0335836261510849,\n        -0.026274101808667183,\n        0.009319016709923744,\n        0.0005424057017080486,\n        -0.0011326938401907682,\n        -0.008841762319207191,\n        -0.005205838941037655,\n        0.0030566249042749405,\n        -0.013036576099693775,\n        -0.022443508729338646,\n        -0.0017865635454654694,\n        0.011830881237983704,\n        0.007096016779541969,\n        -0.01605081371963024,\n        -0.024038542062044144,\n        -0.005039427895098925,\n        -0.0011122849537059665,\n        -0.0032779830507934093,\n        -0.008571737445890903,\n        0.0041194576770067215,\n        0.014292508363723755,\n        -0.02342313528060913,\n        0.015485644340515137,\n        -0.002646877197548747,\n        0.020722880959510803,\n        0.00680087273940444,\n        -0.009865347295999527,\n        -0.00600649556145072,\n        0.021036865189671516,\n        0.020371221005916595,\n        -0.0016107329865917563,\n        -0.019291117787361145,\n        0.013287762179970741,\n        -0.004006423521786928,\n        -0.00762350857257843,\n        -0.015334932133555412,\n        0.0008681631297804415,\n        -0.008295431733131409,\n        -0.0007543442770838737,\n        0.011523177847266197,\n        -0.0027818898670375347,\n        0.0037332584615796804,\n        0.01304913591593504,\n        -0.005237237084656954,\n        -0.00340043636970222,\n        0.016691338270902634,\n        0.02612338960170746,\n        -0.002295216079801321,\n        -0.008333110250532627,\n        -0.009683237411081791,\n        -0.022305356338620186,\n        -0.00935041531920433,\n        0.06972935795783997,\n        -0.001001605880446732,\n        -0.0031319810077548027,\n        0.019617659971117973,\n        -0.019014813005924225,\n        -0.012848186306655407,\n        -0.04051637277007103,\n        -0.018223576247692108,\n        -0.01665366068482399,\n        -0.0031979172490537167,\n        -0.004436580464243889,\n        0.004458559211343527,\n        -0.013802695088088512,\n        0.01994420401751995,\n        0.013551508076488972,\n        0.009080389514565468,\n        0.00340043636970222,\n        -0.027052778750658035,\n        -0.011341067962348461,\n        0.0048667374067008495,\n        -0.0151088647544384,\n        0.010110254399478436,\n        0.00046077012666501105,\n        0.009990940801799297,\n        0.0205470509827137,\n        0.008307991549372673,\n        0.00277247023768723,\n        0.01720627024769783,\n        -0.007460237015038729,\n        -0.0038965295534580946,\n        0.008483821526169777,\n        -0.00610383041203022,\n        0.01745745725929737,\n        0.008778966031968594,\n        0.013312880881130695,\n        0.014078999869525433,\n        0.0029043431859463453,\n        0.03780356049537659,\n        0.008464982733130455,\n        -0.005384809337556362,\n        0.011868558824062347,\n        0.005287474486976862,\n        -0.006179186515510082,\n        -0.006009635515511036,\n        0.0142673896625638,\n        0.0023344638757407665,\n        -0.0057710083201527596,\n        0.028660371899604797,\n        -0.0014239131705835462,\n        -0.028685491532087326,\n        0.023121710866689682,\n        0.007742822170257568,\n        -0.009218541905283928,\n        -0.023071475327014923,\n        -0.01187483873218298,\n        0.018072864040732384,\n        0.00027355772908777,\n        -0.013928287662565708,\n        -0.010844974778592587,\n        -0.017796559259295464,\n        -0.014744644053280354,\n        -0.01513398252427578,\n        -0.014694406650960445,\n        -0.001071467180736363,\n        -0.031172236427664757,\n        -0.019190644845366478,\n        -0.02369944006204605,\n        0.002376851625740528,\n        -0.007466516923159361,\n        0.001561280689202249,\n        -0.0048981355503201485,\n        -0.03863247483968735,\n        -0.0125404829159379,\n        -0.0036516229156404734,\n        0.01909017004072666,\n        0.0004517431079875678,\n        -0.001898027490824461,\n        -0.006486889906227589,\n        0.004044101573526859,\n        0.013802695088088512,\n        -0.006047313567250967,\n        -0.029012033715844154,\n        0.011271991766989231,\n        -0.006292220205068588,\n        0.015611236914992332,\n        0.0031727987807244062,\n        0.00666272034868598,\n        0.009111788123846054,\n        -0.01690484769642353,\n        0.029589762911200523,\n        0.008521500043570995,\n        0.015071186237037182,\n        0.026299219578504562,\n        0.0003149642434436828,\n        0.018650593236088753,\n        0.004537055268883705,\n        0.0050582666881382465,\n        0.001783423707820475,\n        -0.019906524568796158,\n        -1.6802998288767412e-05,\n        -0.008307991549372673,\n        -0.01179320365190506,\n        -0.0020785678643733263,\n        0.004810220096260309,\n        -0.0034695127978920937,\n        0.01676669530570507,\n        0.01690484769642353,\n        0.01568659394979477,\n        -0.015372609719634056,\n        -0.0016327118501067162,\n        0.034211594611406326,\n        -0.024088779464364052,\n        0.016251763328909874,\n        -0.0064806099981069565,\n        0.0001965337578440085,\n        0.013438474386930466,\n        0.021564355120062828,\n        0.03504050895571709,\n        -0.004694046452641487,\n        -0.03443766012787819,\n        -0.005557499825954437,\n        -0.025281915441155434,\n        0.008490101434290409,\n        0.0157996267080307,\n        0.009130626916885376,\n        0.0012418029364198446,\n        -0.0018619195325300097,\n        -0.022192321717739105,\n        -0.006907626986503601,\n        -0.0032748430967330933,\n        -0.005548080429434776,\n        0.019542304798960686,\n        -0.006304779555648565,\n        -0.0023250444792211056,\n        -0.015309813432395458,\n        -0.006499449256807566,\n        -0.018324051052331924,\n        0.005968817975372076,\n        -0.006851110141724348,\n        -0.017859356477856636,\n        -0.02425205148756504,\n        -0.0063236188143491745,\n        -0.006568525452166796,\n        -0.013890610076487064,\n        -0.04551498219370842,\n        -0.044786542654037476,\n        -0.016163846477866173,\n        0.00394362723454833,\n        -0.017444897443056107,\n        0.016364796087145805,\n        -0.003949906677007675,\n        -0.000322813808452338,\n        -0.019981881603598595,\n        -0.010625186376273632,\n        -0.006342457607388496,\n        -0.01692996546626091,\n        -0.0006311059114523232,\n        -0.006116389762610197,\n        0.027002541348338127,\n        0.027931932359933853,\n        0.025369830429553986,\n        0.013978525064885616,\n        0.0114603815600276,\n        0.006474330555647612,\n        -0.008559177629649639,\n        -0.01662854291498661,\n        0.004559034015983343,\n        -0.012094627134501934,\n        -0.014204593375325203,\n        0.012710033915936947,\n        0.020999185740947723,\n        0.028459424152970314,\n        -0.007026940584182739,\n        0.009595322422683239,\n        0.0048447586596012115,\n        -0.008188677951693535,\n        -0.020873593166470528,\n        0.0010173050686717033,\n        -0.013928287662565708,\n        -0.014405542053282261,\n        -0.020974067971110344,\n        -0.0010471334680914879,\n        0.002153923735022545,\n        0.010958008468151093,\n        0.011234313249588013,\n        -0.015548440627753735,\n        0.011812042444944382,\n        0.008596856147050858,\n        0.02079823799431324,\n        0.003050345228984952,\n        0.026299219578504562,\n        -0.01969301700592041,\n        0.02742955833673477,\n        0.007265567779541016,\n        0.020597288385033607,\n        -0.013162169605493546,\n        0.007711423560976982,\n        -0.006794593296945095,\n        0.016917407512664795,\n        0.009262500330805779,\n        0.021451322361826897,\n        0.02770586498081684,\n        -0.012270457111299038,\n        -0.010593787766993046,\n        0.005884042475372553,\n        0.026173627004027367,\n        0.0031162817031145096,\n        0.007510474417358637,\n        0.004194813314825296,\n        -0.007862135767936707,\n        0.0027096737176179886,\n        -0.020195389166474342,\n        -0.004998610354959965,\n        -0.013501270674169064,\n        0.004712885711342096,\n        -0.012201380915939808,\n        -0.01872594840824604,\n        0.018625473603606224,\n        -0.011881118640303612,\n        -0.029087388888001442,\n        -0.015787066891789436,\n        -0.0003167304093949497,\n        0.048001728951931,\n        0.005959398113191128,\n        0.012100907042622566,\n        0.01939159259200096,\n        0.0028556757606565952,\n        -0.005940559320151806,\n        0.02068520337343216,\n        -0.018675711005926132,\n        -0.005522961728274822,\n        0.039813049137592316,\n        0.0015455815009772778,\n        -0.0063236188143491745,\n        -0.02416413463652134,\n        0.004684627056121826,\n        0.004078639671206474,\n        -0.015071186237037182,\n        -0.024051101878285408,\n        0.008050525560975075,\n        0.028760846704244614,\n        -0.002221429953351617,\n        -0.014619050547480583,\n        0.006113249808549881,\n        -0.033131491392850876,\n        0.02471674606204033,\n        0.003482071915641427,\n        -0.021363407373428345,\n        -0.015058627352118492,\n        0.012440008111298084,\n        -0.023071475327014923,\n        0.0088919997215271,\n        -0.0305191520601511,\n        0.02720349095761776,\n        0.03413623571395874,\n        -0.005836945027112961,\n        -0.006668999791145325,\n        0.003796054981648922,\n        0.00045880774268880486,\n        -0.0013477721950039268,\n        -0.007686304859817028,\n        0.02833382971584797,\n        -0.007058338727802038,\n        0.0036641822662204504,\n        0.01891433820128441,\n        -0.0021272350568324327,\n        -0.02079823799431324,\n        -0.005208978895097971,\n        -0.012320694513618946,\n        0.022581661120057106,\n        -0.017331864684820175,\n        0.005482144188135862,\n        -0.011899957433342934,\n        0.011328508146107197,\n        -0.003271703375503421,\n        0.012075788341462612,\n        -0.013790135271847248,\n        -0.00652456795796752,\n        -0.006210584659129381,\n        -0.0016672499477863312,\n        0.02665088139474392,\n        2.0819775272684637e-06,\n        -0.001816391944885254,\n        -0.01485767774283886,\n        -0.017984949052333832,\n        -0.024503236636519432,\n        -0.04712257534265518,\n        -0.004483677912503481,\n        0.0015801197150722146,\n        -0.029589762911200523,\n        -0.004659508354961872,\n        -0.005466444883495569,\n        0.009551363997161388,\n        0.02565869502723217,\n        0.016779253259301186,\n        -0.012358373031020164,\n        0.012553042732179165,\n        0.023247305303812027,\n        -0.0181733388453722,\n        -0.0057301907800138,\n        0.018688270822167397,\n        0.015975456684827805,\n        -0.02584708482027054,\n        0.016251763328909874,\n        -0.0035699873697012663,\n        -0.019178085029125214,\n        0.004577872809022665,\n        -0.014104118570685387,\n        -0.004160275217145681,\n        0.011585974134504795,\n        0.013388236984610558,\n        0.00026119465474039316,\n        -0.016113610938191414,\n        0.006348737049847841,\n        0.02936369553208351,\n        0.016867170110344887,\n        0.009036432020366192,\n        -0.007322084624320269,\n        -0.003296822076663375,\n        0.037050001323223114,\n        -0.014217152260243893,\n        0.0041539957746863365,\n        -0.007334643974900246,\n        -0.012697474099695683,\n        0.019655339419841766,\n        -0.007052059285342693,\n        -0.007667466066777706,\n        -0.0007700434071011841,\n        -0.03576894849538803,\n        -0.003962466027587652,\n        -0.0019985020626336336,\n        0.008138440549373627,\n        0.014342745766043663,\n        -0.015837304294109344,\n        -0.03210162743926048,\n        -0.0056108771823346615,\n        -0.016703898087143898,\n        0.00034538135514594615,\n        0.009984660893678665,\n        -0.01095172856003046,\n        0.001340707647614181,\n        0.013840372674167156,\n        0.016716457903385162,\n        0.00038973146001808345,\n        0.005940559320151806,\n        -0.011535737663507462,\n        -0.01093916967511177,\n        -0.016314558684825897,\n        -0.02427716925740242,\n        -0.012634677812457085,\n        -0.0012151143746450543,\n        0.041244812309741974,\n        0.006957864388823509,\n        -0.018110541626811028,\n        -0.0088919997215271,\n        -0.005127343349158764,\n        -0.02747979573905468,\n        -0.03227745741605759,\n        -0.002741072094067931,\n        -0.0029655699618160725,\n        -0.0056234365329146385,\n        0.016741575673222542,\n        -0.008527779951691628,\n        0.022016491740942,\n        0.006263962015509605,\n        -0.0022748070769011974,\n        -0.00584008451551199,\n        -0.019630219787359238,\n        0.011824601329863071,\n        0.004703465849161148,\n        0.01513398252427578,\n        -0.000919970334507525,\n        -0.02328498288989067,\n        0.002163343131542206,\n        0.0026296081487089396,\n        0.011912517249584198,\n        -0.0042073726654052734,\n        0.00936297420412302,\n        -0.009331576526165009,\n        -0.010487033985555172,\n        -0.014355304650962353,\n        -0.0040284027345478535,\n        -0.002763050841167569,\n        0.007253008428961039,\n        0.016364796087145805,\n        -0.0157996267080307,\n        0.001935705542564392,\n        0.0050582666881382465,\n        -0.003422415116801858,\n        0.003422415116801858,\n        -0.00468148710206151,\n        0.008433585055172443,\n        -0.015360050834715366,\n        0.015837304294109344,\n        -0.019981881603598595,\n        -0.006320478860288858,\n        0.003088023280724883,\n        -0.02336033806204796,\n        0.015159101225435734,\n        -0.025420067831873894,\n        0.00652456795796752,\n        0.0294390507042408,\n        0.01068798266351223,\n        0.00756699126213789,\n        -0.0008603135356679559,\n        -0.0018933177925646305,\n        0.0024160996545106173,\n        0.005488423630595207,\n        -0.00935041531920433,\n        -0.011315949261188507,\n        -2.2285437808022834e-05,\n        -0.007372322026640177,\n        -0.021149897947907448,\n        -0.02966511808335781,\n        0.0021617733873426914,\n        -0.025721492245793343,\n        -0.014330185949802399,\n        0.001420773332938552,\n        0.03476420417428017,\n        0.015787066891789436,\n        -0.026550406590104103,\n        -0.01319984719157219,\n        -0.023159390315413475,\n        0.013011457398533821,\n        0.009406931698322296,\n        -0.013388236984610558,\n        0.001562850666232407,\n        -0.00962672010064125,\n        -0.0042324913665652275,\n        0.021916016936302185,\n        -0.01762072928249836,\n        0.005290614441037178,\n        0.018537558615207672,\n        0.016176406294107437,\n        -0.004251330625265837,\n        0.039260439574718475,\n        0.2337038666009903,\n        -0.012791668996214867,\n        0.0035228899214416742,\n        0.027354203164577484,\n        0.02217976190149784,\n        0.013551508076488972,\n        0.010210729204118252,\n        0.0017080678371712565,\n        0.006982983089983463,\n        0.0028289873152971268,\n        -0.01159853395074606,\n        0.0012826207093894482,\n        -0.00470974575728178,\n        0.006756915245205164,\n        0.005450745578855276,\n        0.00941949151456356,\n        -0.02107454277575016,\n        -0.019793491810560226,\n        -0.030921051278710365,\n        -0.024503236636519432,\n        0.006279660854488611,\n        -0.03295566141605377,\n        -0.025256795808672905,\n        -0.014305067248642445,\n        0.017495134845376015,\n        -0.0072969659231603146,\n        -0.008446143940091133,\n        0.0011868559522554278,\n        0.03363386541604996,\n        0.009746033698320389,\n        0.004763122648000717,\n        -0.011573415249586105,\n        0.011228034272789955,\n        0.011246873065829277,\n        -0.014367864467203617,\n        -0.019567424431443214,\n        0.03604525327682495,\n        0.013363118283450603,\n        0.03084569424390793,\n        0.003871411085128784,\n        -0.016553185880184174,\n        -0.025068406015634537,\n        -0.004518216010183096,\n        -0.017633287236094475,\n        0.0027238030452281237,\n        0.011096160858869553,\n        0.007246728986501694,\n        -0.026173627004027367,\n        0.018123101443052292,\n        0.033985525369644165,\n        0.004656368400901556,\n        0.007014381233602762,\n        0.02300867810845375,\n        0.044032983481884,\n        0.00041406514355912805,\n        -0.015950338914990425,\n        0.008653372526168823,\n        0.016666220501065254,\n        -0.005259216297417879,\n        -0.000553002639207989,\n        0.0005902881384827197,\n        0.03185044229030609,\n        -2.5854542400338687e-05,\n        0.03943626955151558,\n        -0.023598965257406235,\n        0.007472796365618706,\n        -0.02519400045275688,\n        -0.002959290286526084,\n        0.003968745935708284,\n        -0.0035166102461516857,\n        -0.01667878031730652,\n        -0.0014144936576485634,\n        0.00603475421667099,\n        -0.00840846635401249,\n        -0.029514405876398087,\n        -0.009519966319203377,\n        0.010499592870473862,\n        0.0036798813380301,\n        0.03335756063461304,\n        0.0274546779692173,\n        -0.009388092905282974,\n        -0.0015463664894923568,\n        -0.009859067387878895,\n        -0.02010747417807579,\n        -0.038657594472169876,\n        -0.029037151485681534,\n        0.0022512583527714014,\n        -0.00729068648070097,\n        -0.012107186019420624,\n        -0.023372897878289223,\n        -0.014531135559082031,\n        -0.010254686698317528,\n        0.001805402571335435,\n        -0.006069292314350605,\n        0.023686882108449936,\n        0.026198744773864746,\n        -0.0034569534473121166,\n        0.010612627491354942,\n        -0.02091127075254917,\n        0.012364652007818222,\n        -0.005510402377694845,\n        -0.00246790680103004,\n        7.820140308467671e-05,\n        0.0090866694226861,\n        0.0022292796056717634,\n        0.009903025813400745,\n        -0.0071148560382425785,\n        0.0007563066901639104,\n        -0.00300324778072536,\n        -0.01563635654747486,\n        -0.022280236706137657,\n        -0.0016735296230763197,\n        -0.006807152647525072,\n        -0.01108988095074892,\n        -0.00680087273940444,\n        0.00976487249135971,\n        -0.005080245900899172,\n        -0.00265472661703825,\n        -0.0003416528052184731,\n        -0.008885719813406467,\n        -0.003959326073527336,\n        -0.013915728777647018,\n        -0.014794881455600262,\n        0.002414529677480459,\n        -0.006169767118990421,\n        -0.014807440340518951,\n        -0.006888788193464279,\n        0.00915574561804533,\n        0.014531135559082031,\n        -0.030016779899597168,\n        0.00854661874473095,\n        -0.0002739502233453095,\n        0.021237812936306,\n        0.008113321848213673,\n        -0.006989262532442808,\n        0.007334643974900246,\n        0.016992762684822083,\n        -0.0028666651342064142,\n        -0.016879728063941002,\n        0.0035448686685413122,\n        -0.0021131059620529413,\n        -0.010003499686717987,\n        0.0088919997215271,\n        -0.0073534827679395676,\n        -0.014920474030077457,\n        -0.01649039052426815,\n        0.03112199902534485,\n        -0.027555152773857117,\n        -0.010970567353069782,\n        0.002863525412976742,\n        -0.04272681474685669,\n        -0.018185898661613464,\n        -0.013903168961405754,\n        -0.0035385889932513237,\n        0.027931932359933853,\n        -0.020735440775752068,\n        -0.019680457189679146,\n        -0.02485489845275879,\n        -0.006210584659129381,\n        0.009896745905280113,\n        -0.012502805329859257,\n        0.009645558893680573,\n        0.02996654249727726,\n        -0.007862135767936707,\n        -0.038657594472169876,\n        -0.013149609789252281,\n        -0.16045789420604706,\n        0.014430660754442215,\n        0.011523177847266197,\n        -0.012760271318256855,\n        0.02066008374094963,\n        0.015171661041676998,\n        0.024641389027237892,\n        0.004643809515982866,\n        -0.009067830629646778,\n        0.018575238063931465,\n        0.009859067387878895,\n        -0.01288586389273405,\n        -0.014945592731237411,\n        -0.014305067248642445,\n        0.005290614441037178,\n        -0.01612616889178753,\n        -0.008182398043572903,\n        0.012352093122899532,\n        0.029740475118160248,\n        0.028685491532087326,\n        0.035115864127874374,\n        -0.007334643974900246,\n        -0.0009168304968625307,\n        -0.00477882195264101,\n        0.013714779168367386,\n        -0.007811898365616798,\n        0.0007111715967766941,\n        0.03393528610467911,\n        -0.010053737089037895,\n        -0.007020661141723394,\n        -0.016000576317310333,\n        -0.02888644114136696,\n        0.02582196518778801,\n        0.004559034015983343,\n        0.021966254338622093,\n        -0.006430373061448336,\n        0.012446288019418716,\n        -0.042601220309734344,\n        -0.01635223627090454,\n        0.024930253624916077,\n        0.032855186611413956,\n        0.018625473603606224,\n        0.019228322431445122,\n        -0.013526389375329018,\n        -0.015410288237035275,\n        -0.005224677734076977,\n        0.020609848201274872,\n        0.015083746053278446,\n        0.01621408388018608,\n        -0.0070897373370826244,\n        0.008119601756334305,\n        -0.003028366481885314,\n        -0.0005110073834657669,\n        -0.00622942391782999,\n        0.019981881603598595,\n        0.029489288106560707,\n        0.006888788193464279,\n        0.02452835626900196,\n        0.0024773261975497007,\n        -0.008458703756332397,\n        -0.0002564848982729018,\n        -0.012370931915938854,\n        -0.006254542153328657,\n        -0.004706605803221464,\n        0.006050453521311283,\n        -0.005224677734076977,\n        0.0014686556532979012,\n        0.009689517319202423,\n        -0.00024942029267549515,\n        0.018349168822169304,\n        -0.008910838514566422,\n        -0.023084033280611038,\n        -0.01039911899715662,\n        -0.02554566040635109,\n        0.006825991440564394,\n        0.008389626629650593,\n        -0.03378457576036453,\n        0.03109688125550747,\n        -0.0114603815600276,\n        -0.014832559041678905,\n        -0.008295431733131409,\n        0.021313169971108437,\n        -0.0024836058728396893,\n        0.007070898078382015,\n        -0.023121710866689682,\n        0.00038973146001808345,\n        0.0032465846743434668,\n        0.012653516605496407,\n        -0.03134806826710701,\n        -0.027002541348338127,\n        0.007453957572579384,\n        -0.017984949052333832,\n        -0.014166914857923985,\n        -0.018022626638412476,\n        0.00543504673987627,\n        0.015598678030073643,\n        0.012534203007817268,\n        0.01623920351266861,\n        -0.01571171171963215,\n        -0.019893966615200043,\n        -0.002260677982121706,\n        0.017143474891781807,\n        -0.016013136133551598,\n        0.011190355755388737,\n        0.03049403429031372,\n        -0.005469584837555885,\n        0.017532814294099808,\n        0.007981449365615845,\n        0.022405831143260002,\n        -0.03169972822070122,\n        -0.03217698261141777,\n        0.006631322205066681,\n        0.014694406650960445,\n        0.026148507371544838,\n        0.01623920351266861,\n        0.026449931785464287,\n        -0.013262644410133362,\n        -0.01734442450106144,\n        0.0048321993090212345,\n        -0.013099372386932373,\n        0.052045829594135284,\n        0.006223144009709358,\n        -0.04420881345868111,\n        -0.0010887362295761704,\n        0.0028792244847863913,\n        0.0002376459160586819,\n        -0.08409722149372101,\n        -0.03438742458820343,\n        0.004712885711342096,\n        0.014405542053282261,\n        -0.001269276486709714,\n        0.03835617005825043,\n        0.004216792527586222,\n        -0.0017881334060803056,\n        0.0019875126890838146,\n        0.005535521078854799,\n        0.008923398330807686,\n        -0.03049403429031372,\n        -0.01787191443145275,\n        -0.010568669065833092,\n        0.04071732237935066,\n        -0.02720349095761776,\n        0.012779110111296177,\n        0.0010651875054463744,\n        -0.033960405737161636,\n        0.011604813858866692,\n        -0.008678491227328777,\n        0.007868414744734764,\n        0.0028478263411670923,\n        -0.001828951295465231,\n        -0.02222999930381775,\n        0.012201380915939808,\n        -0.022971000522375107,\n        0.03127271309494972,\n        0.0016358516877517104,\n        -0.004926394205540419,\n        -0.016754135489463806,\n        -0.024063661694526672,\n        -0.014166914857923985,\n        -0.008307991549372673,\n        -0.004449139814823866,\n        0.013626864179968834,\n        -0.04099362716078758,\n        0.03102152608335018,\n        0.013413355685770512,\n        -0.04106898233294487,\n        0.027253728359937668,\n        0.010593787766993046,\n        -0.006461771205067635,\n        -0.05581362545490265,\n        -0.012659796513617039,\n        0.017168592661619186,\n        -0.0019137266790494323,\n        0.03777844086289406,\n        0.01870083063840866,\n        -0.009048991836607456,\n        -0.03222722187638283,\n        -0.03139830380678177,\n        -0.021916016936302185,\n        0.010832414962351322,\n        0.01048075407743454,\n        -0.0017535953084006906,\n        0.008056805469095707,\n        0.028911558911204338,\n        -0.009921864606440067,\n        0.011962753720581532,\n        0.013375678099691868,\n        0.006147787906229496,\n        -0.02038377895951271,\n        0.01704300008714199,\n        -0.012559321708977222,\n        0.018575238063931465,\n        -0.012697474099695683,\n        0.0232221856713295,\n        0.0201577115803957,\n        0.00969579629600048,\n        -0.01563635654747486,\n        0.028409186750650406,\n        0.006245122756808996,\n        0.010053737089037895,\n        -0.03792915120720863,\n        -0.005469584837555885,\n        -0.015837304294109344,\n        -0.020258186385035515,\n        0.006606203503906727,\n        -0.005811826325953007,\n        -0.03310637176036835,\n        -0.020735440775752068,\n        0.009218541905283928,\n        -0.008182398043572903,\n        0.026022914797067642,\n        0.01360174547880888,\n        -0.01054983027279377,\n        0.0057710083201527596,\n        -0.015347491018474102,\n        -0.03544240817427635,\n        -0.03054427169263363,\n        0.01035516057163477,\n        0.006989262532442808,\n        -0.011510618962347507,\n        0.007529313676059246,\n        0.014757202938199043,\n        -0.01180576253682375,\n        -0.0010895212180912495,\n        0.01607593148946762,\n        0.010807296261191368,\n        -0.02485489845275879,\n        -0.011824601329863071,\n        -0.0650070533156395,\n        0.032352812588214874,\n        -0.016113610938191414,\n        -0.019353915005922318,\n        0.0009600031771697104,\n        -0.022016491740942,\n        0.006687839049845934,\n        -0.01512142363935709,\n        0.002095836913213134,\n        -0.00881036464124918,\n        -0.018675711005926132,\n        -0.0048635974526405334,\n        -0.002117815660312772,\n        0.012615839019417763,\n        -0.02798216976225376,\n        0.0032685634214431047,\n        0.020182831212878227,\n        0.014192033559083939,\n        0.005880902521312237,\n        -0.002144504338502884,\n        0.00037756460369564593,\n        -0.03245328739285469,\n        -0.016502948477864265,\n        0.001036928966641426,\n        -0.023774797096848488,\n        0.011177796870470047,\n        -0.0314234234392643,\n        0.0005577123956754804,\n        -0.002411389723420143,\n        -0.00874128844588995,\n        0.0064366525039076805,\n        -0.042576100677251816,\n        0.0064021144062280655,\n        0.022682135924696922,\n        0.0008108612382784486,\n        -0.010122813284397125,\n        0.006794593296945095,\n        0.015083746053278446,\n        0.007366042118519545,\n        0.020333541557192802,\n        -0.030895931646227837,\n        -0.033181726932525635,\n        0.015611236914992332,\n        -0.010832414962351322,\n        0.0036013855133205652,\n        0.004634389653801918,\n        -0.005340851843357086,\n        -0.0033941566944122314,\n        0.030418677255511284,\n        0.009808829985558987,\n        -0.003227745648473501,\n        0.029263220727443695,\n        -0.028409186750650406,\n        -0.013036576099693775,\n        -0.012841906398534775,\n        -0.008351949043571949,\n        -0.017143474891781807,\n        -0.0003757984668482095,\n        -0.008333110250532627,\n        -0.04493725299835205,\n        0.02248118631541729,\n        0.00018220828496851027,\n        0.02966511808335781,\n        -0.007868414744734764,\n        0.005246656946837902,\n        0.010813576169312,\n        -0.0205470509827137,\n        -0.002692404668778181,\n        -0.014619050547480583,\n        -0.02234303392469883,\n        -0.006920186337083578,\n        -0.00593741936609149,\n        0.00517130084335804,\n        0.005312593188136816,\n        0.019630219787359238,\n        0.020647525787353516,\n        -0.003180648200213909,\n        0.0170932374894619,\n        -0.040616847574710846,\n        0.037728201597929,\n        0.020258186385035515,\n        0.021112220361828804,\n        -0.030318202450871468,\n        -0.0004332966054789722,\n        0.034563254565000534,\n        0.006088131107389927,\n        -0.006254542153328657,\n        -0.01731930486857891,\n        -0.009205983020365238,\n        0.00310058263130486,\n        -0.03607037290930748,\n        -0.0266759991645813,\n        0.006939025595784187,\n        0.024289729073643684,\n        -0.0038368727546185255,\n        0.00473800441250205,\n        0.017520254477858543,\n        0.00783073715865612,\n        0.017658406868577003,\n        0.023372897878289223,\n        0.0023721419274806976,\n        0.009438330307602882,\n        -0.016000576317310333,\n        -0.004552754107862711,\n        -0.008785245940089226,\n        0.005924860015511513,\n        -0.006289080251008272,\n        -0.037878915667533875,\n        -0.007328364532440901,\n        0.012898423708975315,\n        0.011812042444944382,\n        0.029790712520480156,\n        -0.006361296400427818,\n        0.015046067535877228,\n        -0.02499305084347725,\n        -0.00584008451551199,\n        0.02609827183187008,\n        -0.0014945593429729342,\n        -0.028434304520487785,\n        0.04242539033293724,\n        -0.008973635733127594,\n        0.0035794067662209272,\n        0.02770586498081684,\n        -0.015071186237037182,\n        0.008433585055172443,\n        0.002970279660075903,\n        -0.007793059106916189,\n        -0.0035228899214416742,\n        0.01905249059200287,\n        -0.0050017498433589935,\n        0.011717847548425198,\n        -0.010041178204119205,\n        -0.016452711075544357,\n        -0.015410288237035275,\n        0.0009136906592175364,\n        0.01372733898460865,\n        -0.008351949043571949,\n        0.024339966475963593,\n        -0.0161889661103487,\n        0.05631599947810173,\n        -0.012722592800855637,\n        -0.0041634151712059975,\n        0.02287052571773529,\n        0.014292508363723755,\n        0.020735440775752068,\n        0.024427881464362144,\n        0.018072864040732384,\n        -0.008559177629649639,\n        -0.02024562656879425,\n        -0.0038494321051985025,\n        0.006756915245205164,\n        -0.0008289152756333351,\n        -0.02259422093629837,\n        -0.011127559468150139,\n        0.004289008677005768,\n        -0.01591266132891178,\n        0.004398902412503958,\n        -0.019630219787359238,\n        0.01292354241013527,\n        0.018324051052331924,\n        0.008942237123847008,\n        0.0243902038782835,\n        0.013212407007813454,\n        -0.018072864040732384,\n        -0.019931644201278687,\n        0.00423877127468586,\n        0.0012331684119999409,\n        -0.018349168822169304,\n        -0.016151288524270058,\n        0.017243949696421623,\n        0.0007649412145838141,\n        -0.0012857605470344424,\n        -0.01260955911129713,\n        0.006656440440565348,\n        0.002959290286526084,\n        -0.007108576130121946,\n        -0.009513686411082745,\n        -0.0036422032862901688,\n        0.010562390089035034,\n        0.02289564348757267,\n        0.0006240413058549166,\n        -0.018097983673214912,\n        -0.03295566141605377,\n        0.006813432089984417,\n        0.002750491490587592,\n        -0.02936369553208351,\n        -0.005384809337556362,\n        -0.02317194826900959\n      ]\n    }\n  ],\n  \"model\": \"ada\",\n  \"usage\": {\n    \"prompt_tokens\": 3,\n    \"total_tokens\": 3\n  }\n}\n"
  },
  {
    "path": "libs/ktem/ktem_tests/test_qa.py",
    "content": "import json\nfrom pathlib import Path\nfrom unittest.mock import patch\n\nimport pytest\nfrom index import ReaderIndexingPipeline\nfrom openai.resources.embeddings import Embeddings\nfrom openai.types.chat.chat_completion import ChatCompletion\n\nfrom kotaemon.llms import AzureChatOpenAI\n\nwith open(Path(__file__).parent / \"resources\" / \"embedding_openai.json\") as f:\n    openai_embedding = json.load(f)\n\n\n_openai_chat_completion_response = ChatCompletion.parse_obj(\n    {\n        \"id\": \"chatcmpl-7qyuw6Q1CFCpcKsMdFkmUPUa7JP2x\",\n        \"object\": \"chat.completion\",\n        \"created\": 1692338378,\n        \"model\": \"gpt-35-turbo\",\n        \"system_fingerprint\": None,\n        \"choices\": [\n            {\n                \"index\": 0,\n                \"finish_reason\": \"stop\",\n                \"message\": {\n                    \"role\": \"assistant\",\n                    \"content\": \"Hello! How can I assist you today?\",\n                    \"function_call\": None,\n                    \"tool_calls\": None,\n                },\n            }\n        ],\n        \"usage\": {\"completion_tokens\": 9, \"prompt_tokens\": 10, \"total_tokens\": 19},\n    }\n)\n\n\n@pytest.fixture(scope=\"function\")\ndef mock_openai_embedding(monkeypatch):\n    monkeypatch.setattr(Embeddings, \"create\", lambda *args, **kwargs: openai_embedding)\n\n\n@patch(\n    \"openai.resources.chat.completions.Completions.create\",\n    side_effect=lambda *args, **kwargs: _openai_chat_completion_response,\n)\ndef test_ingest_pipeline(patch, mock_openai_embedding, tmp_path):\n    indexing_pipeline = ReaderIndexingPipeline(\n        storage_path=tmp_path,\n    )\n    indexing_pipeline.indexing_vector_pipeline.embedding.openai_api_key = \"some-key\"\n    input_file_path = Path(__file__).parent / \"resources/dummy.pdf\"\n\n    # call ingestion pipeline\n    indexing_pipeline(input_file_path, force_reindex=True)\n    retrieving_pipeline = indexing_pipeline.to_retrieving_pipeline()\n\n    results = retrieving_pipeline(\"This is a query\")\n    assert len(results) == 1\n\n    # create llm\n    llm = AzureChatOpenAI(\n        api_key=\"dummy\",\n        api_version=\"2024-05-01-preview\",\n        azure_deployment=\"gpt-4o\",\n        azure_endpoint=\"https://test.openai.azure.com/\",\n    )\n    qa_pipeline = indexing_pipeline.to_qa_pipeline(llm=llm, openai_api_key=\"some-key\")\n    response = qa_pipeline(\"Summarize this document.\")\n    assert response\n"
  },
  {
    "path": "libs/ktem/migrations/README",
    "content": "Generic single-database configuration.\n\nTo enable database migration, please set `KH_ENABLE_ALEMBIC` to True in the\nsetting file.\n"
  },
  {
    "path": "libs/ktem/migrations/env.py",
    "content": "from logging.config import fileConfig\n\nfrom alembic import context\nfrom ktem.db.models import *  # noqa\nfrom sqlalchemy import engine_from_config, pool\nfrom sqlmodel import SQLModel\nfrom theflow.settings import settings\n\n# this is the Alembic Config object, which provides\n# access to the values within the .ini file in use.\nconfig = context.config\n\n# Interpret the config file for Python logging.\n# This line sets up loggers basically.\nif config.config_file_name is not None:\n    fileConfig(config.config_file_name)\n\n# add your model's MetaData object here\n# for 'autogenerate' support\n# from myapp import mymodel\n# target_metadata = mymodel.Base.metadata\ntarget_metadata = SQLModel.metadata\n\n# other values from the config, defined by the needs of env.py,\n# can be acquired:\n# my_important_option = config.get_main_option(\"my_important_option\")\n# ... etc.\n\n\ndef run_migrations_offline() -> None:\n    \"\"\"Run migrations in 'offline' mode.\n\n    This configures the context with just a URL\n    and not an Engine, though an Engine is acceptable\n    here as well.  By skipping the Engine creation\n    we don't even need a DBAPI to be available.\n\n    Calls to context.execute() here emit the given string to the\n    script output.\n\n    \"\"\"\n    context.configure(\n        url=settings.KH_DATABASE,\n        target_metadata=target_metadata,\n        literal_binds=True,\n        dialect_opts={\"paramstyle\": \"named\"},\n    )\n\n    with context.begin_transaction():\n        context.run_migrations()\n\n\ndef run_migrations_online() -> None:\n    \"\"\"Run migrations in 'online' mode.\n\n    In this scenario we need to create an Engine\n    and associate a connection with the context.\n\n    \"\"\"\n    configuration = config.get_section(config.config_ini_section, {})\n    configuration[\"sqlalchemy.url\"] = settings.KH_DATABASE\n    connectable = engine_from_config(\n        configuration, prefix=\"sqlalchemy.\", poolclass=pool.NullPool\n    )\n\n    with connectable.connect() as connection:\n        context.configure(connection=connection, target_metadata=target_metadata)\n\n        with context.begin_transaction():\n            context.run_migrations()\n\n\nif context.is_offline_mode():\n    run_migrations_offline()\nelse:\n    run_migrations_online()\n"
  },
  {
    "path": "libs/ktem/migrations/script.py.mako",
    "content": "\"\"\"${message}\n\nRevision ID: ${up_revision}\nRevises: ${down_revision | comma,n}\nCreate Date: ${create_date}\n\n\"\"\"\nfrom typing import Sequence, Union\n\nfrom alembic import op\nimport sqlalchemy as sa\nimport sqlmodel\n${imports if imports else \"\"}\n\n# revision identifiers, used by Alembic.\nrevision: str = ${repr(up_revision)}\ndown_revision: Union[str, None] = ${repr(down_revision)}\nbranch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}\ndepends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}\n\n\ndef upgrade() -> None:\n    ${upgrades if upgrades else \"pass\"}\n\n\ndef downgrade() -> None:\n    ${downgrades if downgrades else \"pass\"}\n"
  },
  {
    "path": "libs/ktem/migrations/versions/.keep",
    "content": ""
  },
  {
    "path": "libs/ktem/pyproject.toml",
    "content": "[build-system]\nrequires = [\"setuptools >= 61.0\", \"wheel\", \"setuptools-git-versioning>=2.0,<3\"]\nbuild-backend = \"setuptools.build_meta\"\n\n[tool.setuptools]\ninclude-package-data = true\npackages.find.exclude = [\"ktem_tests*\", \"env*\"]\npackages.find.include = [\"ktem*\"]\n\n[tool.setuptools-git-versioning]\nenabled = true\ndev_template = \"{tag}\"\ndirty_template = \"{tag}\"\ntag_filter = \"v?\\\\d+(\\\\.\\\\d+)*.*\"\n\n[project]\nname = \"ktem\"\ndynamic = [\"version\"]\nrequires-python = \">= 3.10\"\ndescription = \"RAG-based Question and Answering Application\"\ndependencies = [\n    \"click>=8.1.7,<9\",\n    \"platformdirs>=4.2.1,<5\",\n    \"pluggy>=1.5.0,<2\",\n    \"python-decouple>=3.8,<4\",\n    \"SQLAlchemy>=2.0.29,<3\",\n    \"sqlmodel>=0.0.16,<0.1\",\n    \"tiktoken>=0.6.0,<1\",\n    \"gradio>=4.31.0,<5\",\n    \"gradiologin\",\n    \"python-multipart==0.0.12\", # required for gradio, pinning to avoid yanking issues with micropip (fixed in gradio >= 5.4.0)\n    \"markdown>=3.6,<4\",\n    \"tzlocal>=5.0\",\n    \"mcp>=1.0.0\",\n]\nauthors = [\n    { name = \"@trducng\", email = \"john@cinnamon.is\" },\n    { name = \"@lone17\", email = \"ian@cinnamon.is\" },\n    { name = \"@taprosoft\", email = \"tadashi@cinnamon.is\" },\n    { name = \"@cin-albert\", email = \"albert@cinnamon.is\" },\n]\nclassifiers = [\n    \"Programming Language :: Python :: 3\",\n    \"Operating System :: OS Independent\",\n]\n"
  },
  {
    "path": "libs/ktem/requirements.txt",
    "content": "platformdirs\ntzlocal\n"
  },
  {
    "path": "mkdocs.yml",
    "content": "repo_name: Cinnamon/kotaemon\nrepo_url: https://github.com/Cinnamon/kotaemon\nsite_name: kotaemon Docs\nedit_uri: edit/main/docs/\n\nnav:\n  - Getting Started:\n      - Quick Start: index.md\n      - Basic Usage: usage.md\n  # - Application:\n  #     - Customize UI: pages/app/customize-ui.md\n  # - Functional description: pages/app/functional-description.md\n  - Development:\n      - development/index.md\n      # - Data & Data Structure Components: development/data-components.md\n      # - Features: pages/app/features.md\n      - Customize flow logic: pages/app/customize-flows.md\n      - Creating a Component: development/create-a-component.md\n      - Components:\n          - Index:\n              - File index: pages/app/index/file.md\n          - Settings:\n              - pages/app/settings/overview.md\n              - pages/app/settings/user-settings.md\n          - Extension:\n              - User management: pages/app/ext/user-management.md\n      - Contributing: development/contributing.md\n  # generated using gen-files + literate-nav\n  - API Reference: reference/\n  - Changelogs: https://github.com/Cinnamon/kotaemon/releases\n  - Issue Tracker: https://github.com/Cinnamon/kotaemon/issues\n  - Live Demo: https://huggingface.co/spaces/cin-model/kotaemon-demo\n\nmarkdown_extensions:\n  - admonition\n  - md_in_html\n  - pymdownx.highlight:\n      use_pygments: true\n      anchor_linenums: true\n      line_spans: __span\n      linenums: true\n      pygments_lang_class: true\n  - pymdownx.inlinehilite\n  - pymdownx.snippets\n  - pymdownx.details\n  - pymdownx.extra\n  - pymdownx.tabbed:\n      alternate_style: true\n  - pymdownx.superfences:\n      custom_fences:\n        - name: mermaid\n          class: mermaid\n          format: !!python/name:pymdownx.superfences.fence_code_format\n  - toc:\n      permalink: true\n      title: Page contents\n  - mdx_truly_sane_lists\n\nplugins:\n  - search\n  - gen-files:\n      scripts:\n        - docs/scripts/generate_reference_docs.py\n  - literate-nav:\n      nav_file: Summary.md\n  - mkdocstrings:\n      handlers:\n        python:\n          options:\n            docstring_options:\n              ignore_init_summary: false\n            filters:\n              - \"!^_\"\n            members_order: source\n            separate_signature: true\n          paths: [libs/kotaemon/kotaemon]\n  - git-revision-date-localized:\n      enable_creation_date: true\n      type: timeago\n      fallback_to_build_date: true\n  - section-index\n  - mkdocs-video\n  - include-markdown\n\ntheme:\n  features:\n    - content.action.edit\n    - content.tabs.link\n    - content.code.annotate\n    - content.code.annotations\n    - content.code.copy\n    - navigation.tabs\n    - navigation.top\n    - navigation.instant\n    - navigation.indexes\n    - toc.follow\n    - search.share\n    - search.highlight\n    - search.suggest\n  name: material\n  custom_dir: docs/theme\n  palette:\n    scheme: dracula\n    primary: deep purple\n    accent: deep purple\n  icon:\n    repo: fontawesome/brands/github\n    edit: material/pencil\n    view: material/eye\n\nextra_css:\n  - extra/css/code_select.css\n  - assets/pymdownx-extras/extra-fb5a2a1c86.css\n\nextra_javascript:\n  - assets/pymdownx-extras/extra-loader-MCFnu0Wd.js\n\nvalidation:\n  absolute_links: warn\n  omitted_files: warn\n  unrecognized_links: warn\n"
  },
  {
    "path": "pyproject.toml",
    "content": "[build-system]\nrequires = [\"setuptools >= 61.0\", \"wheel\", \"setuptools-git-versioning>=2.0,<3\"]\nbuild-backend = \"setuptools.build_meta\"\n\n[tool.setuptools]\ninclude-package-data = false\npackages.find.include = []\n\n[tool.setuptools-git-versioning]\nenabled = true\ndev_template = \"{tag}\"\ndirty_template = \"{tag}\"\ntag_filter = \"v?\\\\d+(\\\\.\\\\d+)*.*\"\n\n[project]\nname = \"kotaemon-app\"\ndynamic = [\"version\"]\nrequires-python = \">= 3.10\"\ndescription = \"Kotaemon App\"\ndependencies = [\n    \"kotaemon[all]\",\n    \"ktem\"\n]\nauthors = [\n    { name = \"@trducng\", email = \"john@cinnamon.is\" },\n    { name = \"@lone17\", email = \"ian@cinnamon.is\" },\n    { name = \"@taprosoft\", email = \"tadashi@cinnamon.is\" },\n    { name = \"@cin-albert\", email = \"albert@cinnamon.is\" },\n]\nclassifiers = [\n    \"Programming Language :: Python :: 3\",\n    \"Operating System :: OS Independent\",\n]\n\n[tool.uv.sources]\nkotaemon = { workspace = true }\nktem = { workspace = true }\n\n# uv workspace configuration\n[tool.uv.workspace]\nmembers = [\"libs/kotaemon\", \"libs/ktem\"]\n\n[dependency-groups]\ndev = [\n    \"black\",\n    \"coverage\",\n    \"flake8\",\n    \"ipython\",\n    \"pre-commit\",\n    \"pytest\",\n    \"pytest-mock\",\n    \"sphinx\"\n]\n\n[project.urls]\nHomepage = \"https://cinnamon.github.io/kotaemon/\"\nRepository = \"https://github.com/Cinnamon/kotaemon/\"\nDocumentation = \"https://cinnamon.github.io/kotaemon/\"\n\n[tool.codespell]\nskip = \"*.js,*.css,*.map\"\n# `llm` abbreviation for large language models\nignore-words-list = \"llm,fo\"\nquiet-level = 3\ncheck-filenames = \"\"\n\n[tool.isort]\nknown_first_party = [\"kotaemon\"]\n"
  },
  {
    "path": "scripts/download_pdfjs.sh",
    "content": "#!/bin/bash\n\nset -eo pipefail\n\n# Check and capture input argument for PDFJS_VERSION_DIST\nif [ -z \"$1\" ]; then\n    echo \"Usage: $0 <pdfjs_version_dist>\"\n    exit 1\nfi\n\npdfjs_version_dist=$1\n\nfunction check_path_for_spaces() {\n    if [[ $PWD =~ \\  ]]; then\n        echo \"The current workdir has whitespace which can lead to unintended behaviour. Please modify your path and continue later.\"\n        exit 1\n    fi\n}\n\nfunction download_and_unzip() {\n    local url=$1\n    local dest_dir=$2\n\n    if [ -d \"$dest_dir\" ]; then\n        echo \"Destination directory $dest_dir already exists. Skipping download.\"\n        return\n    fi\n\n    mkdir -p \"$dest_dir\"\n\n    local zip_file=\"${dest_dir}/downloaded.zip\"\n    echo \"Downloading $url to $zip_file\"\n    curl -L -o \"$zip_file\" \"$url\"\n\n    echo \"Unzipping $zip_file to $dest_dir\"\n    unzip -o \"$zip_file\" -d \"$dest_dir\"\n\n    rm \"$zip_file\"\n    echo \"Download and unzip completed successfully.\"\n}\n\n# Main script execution\n\npdf_js_version=\"4.0.379\"\npdf_js_dist_name=\"pdfjs-${pdf_js_version}-dist\"\npdf_js_dist_url=\"https://github.com/mozilla/pdf.js/releases/download/v${pdf_js_version}/${pdf_js_dist_name}.zip\"\n\ncheck_path_for_spaces\n\necho \"Downloading and unzipping PDF.js\"\ndownload_and_unzip \"$pdf_js_dist_url\" \"$pdfjs_version_dist\"\n\necho \"PDF.js has been set up in $pdfjs_version_dist\"\n"
  },
  {
    "path": "scripts/migrate/__init__.py",
    "content": ""
  },
  {
    "path": "scripts/migrate/migrate_chroma_db.py",
    "content": "import uuid\nfrom datetime import datetime\n\nimport chromadb\nfrom ktem.index.models import Index\nfrom sqlalchemy import (\n    JSON,\n    Column,\n    DateTime,\n    Integer,\n    String,\n    UniqueConstraint,\n    create_engine,\n    select,\n)\nfrom sqlalchemy.ext.declarative import declarative_base\nfrom sqlalchemy.ext.mutable import MutableDict\nfrom sqlalchemy.orm import Session\nfrom tzlocal import get_localzone\n\n\ndef _init_resource(private: bool = True, id: int = 1):\n    \"\"\"Init schemas. Hard-code\"\"\"\n    Base = declarative_base()\n\n    if private:\n        Source = type(\n            \"Source\",\n            (Base,),\n            {\n                \"__tablename__\": f\"index__{id}__source\",\n                \"__table_args__\": (\n                    UniqueConstraint(\"name\", \"user\", name=\"_name_user_uc\"),\n                ),\n                \"id\": Column(\n                    String,\n                    primary_key=True,\n                    default=lambda: str(uuid.uuid4()),\n                    unique=True,\n                ),\n                \"name\": Column(String),\n                \"path\": Column(String),\n                \"size\": Column(Integer, default=0),\n                \"date_created\": Column(\n                    DateTime(timezone=True), default=datetime.now(get_localzone())\n                ),\n                \"user\": Column(Integer, default=1),\n                \"note\": Column(\n                    MutableDict.as_mutable(JSON),  # type: ignore\n                    default={},\n                ),\n            },\n        )\n    else:\n        Source = type(\n            \"Source\",\n            (Base,),\n            {\n                \"__tablename__\": f\"index__{id}__source\",\n                \"id\": Column(\n                    String,\n                    primary_key=True,\n                    default=lambda: str(uuid.uuid4()),\n                    unique=True,\n                ),\n                \"name\": Column(String, unique=True),\n                \"path\": Column(String),\n                \"size\": Column(Integer, default=0),\n                \"date_created\": Column(\n                    DateTime(timezone=True), default=datetime.now(get_localzone())\n                ),\n                \"user\": Column(Integer, default=1),\n                \"note\": Column(\n                    MutableDict.as_mutable(JSON),  # type: ignore\n                    default={},\n                ),\n            },\n        )\n    Index = type(\n        \"IndexTable\",\n        (Base,),\n        {\n            \"__tablename__\": f\"index__{id}__index\",\n            \"id\": Column(Integer, primary_key=True, autoincrement=True),\n            \"source_id\": Column(String),\n            \"target_id\": Column(String),\n            \"relation_type\": Column(String),\n            \"user\": Column(Integer, default=1),\n        },\n    )\n\n    return {\"Source\": Source, \"Index\": Index}\n\n\ndef get_chromadb_collection(\n    db_dir: str = \"../ktem_app_data/user_data/vectorstore\",\n    collection_name: str = \"index_1\",\n):\n    \"\"\"Extract collection from chromadb\"\"\"\n    client = chromadb.PersistentClient(path=db_dir)\n    collection = client.get_or_create_collection(collection_name)\n\n    return collection\n\n\ndef update_metadata(metadata, file_id):\n    \"\"\"Update file_id\"\"\"\n    metadata[\"file_id\"] = file_id\n    return metadata\n\n\ndef migrate_chroma_db(\n    chroma_db_dir: str, sqlite_path: str, is_private: bool = True, int_index: int = 1\n):\n    chroma_collection_name = f\"index_{int_index}\"\n\n    \"\"\"Update chromadb with metadata.file_id\"\"\"\n    engine = create_engine(sqlite_path)\n    resource = _init_resource(private=is_private, id=int_index)\n    print(\"Load sqlalchemy engine successfully!\")\n\n    chroma_db_collection = get_chromadb_collection(\n        db_dir=chroma_db_dir, collection_name=chroma_collection_name\n    )\n    print(\n        f\"Load chromadb collection: {chroma_collection_name}, \"\n        f\"path: {chroma_db_dir} successfully!\"\n    )\n\n    # Load docs id of user\n    with Session(engine) as session:\n        stmt = select(resource[\"Source\"])\n        results = session.execute(stmt)\n        doc_ids = [r[0].id for r in results.all()]\n    print(f\"Retrieve n-docs: {len(doc_ids)}\")\n    print(doc_ids)\n\n    for doc_id in doc_ids:\n        print(\"-\")\n        # Find corresponding vector ids\n        with Session(engine) as session:\n            stmt = select(resource[\"Index\"]).where(\n                resource[\"Index\"].relation_type == \"vector\",\n                resource[\"Index\"].source_id.in_([doc_id]),\n            )\n            results = session.execute(stmt)\n            vs_ids = [r[0].target_id for r in results.all()]\n\n        print(f\"Got {len(vs_ids)} vs_ids for doc {doc_id}\")\n\n        # Update file_id\n        if len(vs_ids) > 0:\n            batch = chroma_db_collection.get(ids=vs_ids, include=[\"metadatas\"])\n            batch.update(\n                ids=batch[\"ids\"],\n                metadatas=[\n                    update_metadata(metadata, doc_id) for metadata in batch[\"metadatas\"]\n                ],\n            )\n\n        # Assert file_id. Skip\n        print(f\"doc-{doc_id} got updated\")\n\n\ndef main(chroma_db_dir: str, sqlite_path: str):\n    engine = create_engine(sqlite_path)\n\n    with Session(engine) as session:\n        stmt = select(Index)\n\n        results = session.execute(stmt)\n        file_indices = [r[0] for r in results.all()]\n\n        for file_index in file_indices:\n            _id = file_index.id\n            _is_private = file_index.config[\"private\"]\n\n            print(f\"Migrating for Index id: {_id}, is_private: {_is_private}\")\n\n            migrate_chroma_db(\n                chroma_db_dir=chroma_db_dir,\n                sqlite_path=sqlite_path,\n                is_private=_is_private,\n                int_index=_id,\n            )\n\n\nif __name__ == \"__main__\":\n    chrome_db_dir: str = \"./vectorstore/kan_db\"\n    sqlite_path: str = \"sqlite:///../ktem_app_data/user_data/sql.db\"\n\n    main(chrome_db_dir, sqlite_path)\n"
  },
  {
    "path": "scripts/run_linux.sh",
    "content": "#!/bin/bash\n\n# functions for better code organization\nfunction check_path_for_spaces() {\n    if [[ $PWD =~ \\  ]]; then\n        echo \"The current workdir has whitespace which can lead to unintended behaviour. Please modify your path and continue later.\"\n        exit 1\n    fi\n}\n\nfunction install_miniconda() {\n    # Miniconda installer is limited to two main architectures: x86_64 and arm64\n    local sys_arch=$(uname -m)\n    case \"${sys_arch}\" in\n    x86_64*) sys_arch=\"x86_64\" ;;\n    arm64*) sys_arch=\"aarch64\" ;;\n    aarch64*) sys_arch=\"aarch64\" ;;\n    *) {\n        echo \"Unknown system architecture: ${sys_arch}! This script runs only on x86_64 or arm64\"\n        exit 1\n    } ;;\n    esac\n\n    # if miniconda has not been installed, download and install it\n    if ! \"${conda_root}/bin/conda\" --version &>/dev/null; then\n        if [ ! -d \"$install_dir/miniconda_installer.sh\" ]; then\n            echo \"Downloading Miniconda from $miniconda_url\"\n            local miniconda_url=\"https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-${sys_arch}.sh\"\n\n            mkdir -p \"$install_dir\"\n            curl -Lk \"$miniconda_url\" >\"$install_dir/miniconda_installer.sh\"\n        fi\n\n        echo \"Installing Miniconda to $conda_root\"\n        chmod u+x \"$install_dir/miniconda_installer.sh\"\n        bash \"$install_dir/miniconda_installer.sh\" -b -p \"$conda_root\"\n        rm -rf \"$install_dir/miniconda_installer.sh\"\n    fi\n    echo \"Miniconda is installed at $conda_root\"\n\n    # test conda\n    echo \"Conda version: \"\n    \"$conda_root/bin/conda\" --version || {\n        echo \"Conda not found. Will exit now...\"\n        exit 1\n    }\n}\n\nfunction create_conda_env() {\n    local python_version=\"${1}\"\n\n    if [ ! -d \"${env_dir}\" ]; then\n        echo \"Creating conda environment with python=$python_version in $env_dir\"\n        \"${conda_root}/bin/conda\" create -y -k --prefix \"$env_dir\" python=\"$python_version\" || {\n            echo \"Failed to create conda environment.\"\n            echo \"Will delete the ${env_dir} (if exist) and exit now...\"\n            rm -rf $env_dir\n            exit 1\n        }\n    else\n        echo \"Conda environment exists at $env_dir\"\n    fi\n}\n\nfunction activate_conda_env() {\n    # deactivate the current env(s) to avoid conflicts\n    { conda deactivate && conda deactivate && conda deactivate; } 2>/dev/null\n\n    # check if conda env is broken (because of interruption during creation)\n    if [ ! -f \"$env_dir/bin/python\" ]; then\n        echo \"Conda environment appears to be broken. You may need to remove $env_dir and run the installer again.\"\n        exit 1\n    fi\n\n    source \"$conda_root/etc/profile.d/conda.sh\" # conda init\n    conda activate \"$env_dir\" || {\n        echo \"Failed to activate environment. Please remove $env_dir and run the installer again.\"\n        exit 1\n    }\n    echo \"Activate conda environment at $CONDA_PREFIX\"\n}\n\nfunction deactivate_conda_env() {\n    # Conda deactivate if we are in the right env\n    if [ \"$CONDA_PREFIX\" == \"$env_dir\" ]; then\n        conda deactivate\n        echo \"Deactivate conda environment at $env_dir\"\n    fi\n}\n\nfunction install_dependencies() {\n    if pip list 2>/dev/null | grep -q \"kotaemon\"; then\n        echo \"Requirements are already installed\"\n    else\n        local kotaemon_root=\"$(pwd)/libs/kotaemon\"\n        local ktem_root=\"$(pwd)/libs/ktem/\"\n\n        if [ -f \"$(pwd)/VERSION\" ]; then\n            local app_version=$(<\"$(pwd)/VERSION\")\n        else\n            local app_version=\"latest\"\n        fi\n\n        if [ -f \"pyproject.toml\" ]; then\n            echo \"Found pyproject.toml. Installing from source\"\n            echo \"\" && echo \"Installing libs/kotaemon\"\n            python -m pip install -e \"$kotaemon_root\"\n            echo \"\" && echo \"Installing libs/ktem\"\n            python -m pip install -e \"$ktem_root\"\n\n            python -m pip install --no-deps -e .\n        else\n            echo \"Installing Kotaemon $app_version\"\n            # Work around for versioning control\n            python -m pip install \"git+https://github.com/Cinnamon/kotaemon.git@$app_version#subdirectory=libs/kotaemon\"\n            python -m pip install \"git+https://github.com/Cinnamon/kotaemon.git@$app_version#subdirectory=libs/ktem\"\n            python -m pip install --no-deps \"git+https://github.com/Cinnamon/kotaemon.git@$app_version\"\n        fi\n\n        if ! pip list 2>/dev/null | grep -q \"kotaemon\"; then\n            echo \"Installation failed. You may need to run the installer again.\"\n            deactivate_conda_env\n            exit 1\n        else\n            print_highlight \"Install finished successfully. Clear cache...\"\n            conda clean --all -y\n            python -m pip cache purge\n\n            print_highlight \"Do you want to launch the web UI? [Y/N]\"\n            read -p \"Input> \" launch\n            local launch=${launch,,}\n            if [[ \"$launch\" != \"yes\" && \"$launch\" != \"y\" && \"$launch\" != \"true\" ]]; then\n                echo \"Will exit now...\"\n                deactivate_conda_env\n                echo \"Please run the installer again to launch the UI.\"\n                exit 0\n            fi\n        fi\n    fi\n}\n\nfunction setup_local_model() {\n    python $(pwd)/scripts/serve_local.py\n}\n\nfunction download_and_unzip() {\n    local url=$1\n    local dest_dir=$2\n\n    # Check if the destination directory exists, create if it doesn't\n    if [ -d \"$dest_dir\" ]; then\n        echo \"Destination directory $dest_dir already exists. Skipping download.\"\n        return\n    fi\n\n    mkdir -p \"$dest_dir\"\n\n    # Download the ZIP file\n    local zip_file=\"${dest_dir}/downloaded.zip\"\n    echo \"Downloading $url to $zip_file\"\n    curl -L -o \"$zip_file\" \"$url\"\n\n    # Unzip the file to the destination directory\n    echo \"Unzipping $zip_file to $dest_dir\"\n    unzip -o \"$zip_file\" -d \"$dest_dir\"\n\n    # Clean up the downloaded ZIP file\n    rm \"$zip_file\"\n    echo \"Download and unzip completed successfully.\"\n}\n\nfunction launch_ui() {\n    local pdfjs_prebuilt_dir=$1\n    PDFJS_PREBUILT_DIR=\"$pdfjs_prebuilt_dir\" python $(pwd)/app.py || {\n        echo \"\" && echo \"Will exit now...\"\n        exit 1\n    }\n}\n\nfunction print_highlight() {\n    local message=\"${1}\"\n    echo \"\" && echo \"******************************************************\"\n    echo $message\n    echo \"******************************************************\" && echo \"\"\n}\n\n# Main script execution\n\n# move two levels up from the dir where this script resides\ncd \"$(dirname \"${BASH_SOURCE[0]}\")\" && cd ..\n\ninstall_dir=\"$(pwd)/install_dir\"\nconda_root=\"${install_dir}/conda\"\nenv_dir=\"${install_dir}/env\"\npython_version=\"3.10\"\n\npdf_js_version=\"4.0.379\"\npdf_js_dist_name=\"pdfjs-${pdf_js_version}-dist\"\npdf_js_dist_url=\"https://github.com/mozilla/pdf.js/releases/download/v${pdf_js_version}/${pdf_js_dist_name}.zip\"\ntarget_pdf_js_dir=\"$(pwd)/libs/ktem/ktem/assets/prebuilt/${pdf_js_dist_name}\"\n\ncheck_path_for_spaces\n\nprint_highlight \"Setting up Miniconda\"\ninstall_miniconda\n\nprint_highlight \"Creating conda environment\"\ncreate_conda_env \"$python_version\"\nactivate_conda_env\n\nprint_highlight \"Installing requirements\"\ninstall_dependencies\n\nprint_highlight \"Downloading and unzipping PDF.js\"\ndownload_and_unzip $pdf_js_dist_url $target_pdf_js_dir\n\nprint_highlight \"Setting up a local model\"\nsetup_local_model\n\nprint_highlight \"Launching Kotaemon in your browser, please wait...\"\nlaunch_ui $target_pdf_js_dir\n\ndeactivate_conda_env\n\nread -p \"Press enter to continue\"\n"
  },
  {
    "path": "scripts/run_macos.sh",
    "content": "#!/bin/bash\n\n# functions for better code organization\nfunction check_path_for_spaces() {\n    if [[ $PWD =~ \\  ]]; then\n        echo \"The current workdir has whitespace which can lead to unintended behaviour. Please modify your path and continue later.\"\n        exit 1\n    fi\n}\n\nfunction install_miniconda() {\n    # Miniconda installer is limited to two main architectures: x86_64 and arm64\n    local sys_arch=$(uname -m)\n    case \"${sys_arch}\" in\n    x86_64*) sys_arch=\"x86_64\" ;;\n    arm64*) sys_arch=\"arm64\" ;;\n    *) {\n        echo \"Unknown system architecture: ${sys_arch}! This script runs only on x86_64 or arm64\"\n        exit 1\n    } ;;\n    esac\n\n    # if miniconda has not been installed, download and install it\n    if ! \"${conda_root}/bin/conda\" --version &>/dev/null; then\n        if [ ! -d \"$install_dir/miniconda_installer.sh\" ]; then\n            local miniconda_url=\"https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-${sys_arch}.sh\"\n            echo \"Downloading Miniconda from $miniconda_url\"\n\n            mkdir -p \"$install_dir\"\n            curl -Lk \"$miniconda_url\" >\"$install_dir/miniconda_installer.sh\"\n        fi\n\n        echo \"Installing Miniconda to $conda_root\"\n        chmod u+x \"$install_dir/miniconda_installer.sh\"\n        bash \"$install_dir/miniconda_installer.sh\" -b -p \"$conda_root\"\n        rm -rf \"$install_dir/miniconda_installer.sh\"\n    fi\n    echo \"Miniconda is installed at $conda_root\"\n\n    # test conda\n    echo \"Conda version: \"\n    \"$conda_root/bin/conda\" --version || {\n        echo \"Conda not found. Will exit now...\"\n        exit 1\n    }\n}\n\nfunction create_conda_env() {\n    local python_version=\"${1}\"\n\n    if [ ! -d \"${env_dir}\" ]; then\n        echo \"Creating conda environment with python=$python_version in $env_dir\"\n        \"${conda_root}/bin/conda\" create -y -k --prefix \"$env_dir\" python=\"$python_version\" || {\n            echo \"Failed to create conda environment.\"\n            echo \"Will delete the ${env_dir} (if exist) and exit now...\"\n            rm -rf $env_dir\n            exit 1\n        }\n    else\n        echo \"Conda environment exists at $env_dir\"\n    fi\n}\n\nfunction activate_conda_env() {\n    # deactivate the current env(s) to avoid conflicts\n    { conda deactivate && conda deactivate && conda deactivate; } 2>/dev/null\n\n    # check if conda env is broken (because of interruption during creation)\n    if [ ! -f \"$env_dir/bin/python\" ]; then\n        echo \"Conda environment appears to be broken. You may need to remove $env_dir and run the installer again.\"\n        exit 1\n    fi\n\n    source \"$conda_root/etc/profile.d/conda.sh\" # conda init\n    conda activate \"$env_dir\" || {\n        echo \"Failed to activate environment. Please remove $env_dir and run the installer again.\"\n        exit 1\n    }\n    echo \"Activate conda environment at $CONDA_PREFIX\"\n}\n\nfunction deactivate_conda_env() {\n    # Conda deactivate if we are in the right env\n    if [[ \"$CONDA_PREFIX\" == \"$env_dir\" ]]; then\n        conda deactivate\n        echo \"Deactivate conda environment at $env_dir\"\n    fi\n}\n\nfunction install_dependencies() {\n    # check if the env is already setup by finding 'kotaemon' in 'pip list'\n    if pip list 2>/dev/null | grep -q \"kotaemon\"; then\n        echo \"Requirements are already installed\"\n    else\n        local kotaemon_root=\"$(pwd)/libs/kotaemon\"\n        local ktem_root=\"$(pwd)/libs/ktem/\"\n\n        if [ -f \"$(pwd)/VERSION\" ]; then\n            local app_version=$(<\"$(pwd)/VERSION\")\n        else\n            local app_version=\"latest\"\n        fi\n\n        if [ -f \"pyproject.toml\" ]; then\n            echo \"Found pyproject.toml. Installing from source\"\n            echo \"\" && echo \"Installing libs/kotaemon\"\n            python -m pip install -e \"$kotaemon_root\"\n            echo \"\" && echo \"Installing libs/ktem\"\n            python -m pip install -e \"$ktem_root\"\n\n            python -m pip install --no-deps -e .\n        else\n            echo \"Installing Kotaemon $app_version\"\n            # Work around for versioning control\n            python -m pip install \"git+https://github.com/Cinnamon/kotaemon.git@$app_version#subdirectory=libs/kotaemon\"\n            python -m pip install \"git+https://github.com/Cinnamon/kotaemon.git@$app_version#subdirectory=libs/ktem\"\n            python -m pip install --no-deps \"git+https://github.com/Cinnamon/kotaemon.git@$app_version\"\n        fi\n\n        if ! pip list 2>/dev/null | grep -q \"kotaemon\"; then\n            echo \"Installation failed. You may need to run the installer again.\"\n            deactivate_conda_env\n            exit 1\n        else\n            print_highlight \"Install finished successfully. Clear cache...\"\n            \"$conda_root/bin/conda\" clean --all -y\n            python -m pip cache purge\n\n            print_highlight \"Do you want to launch the web UI? [Y/N]\"\n            read -p \"Input (yes/no)> \" launch\n            # Convert user input to lowercase\n            local launch=${launch:l}\n            if [[ \"$launch\" != \"yes\" && \"$launch\" != \"y\" && \"$launch\" != \"true\" ]]; then\n                echo \"Will exit now...\"\n                deactivate_conda_env\n                echo \"Please run the installer again to launch the UI.\"\n                exit 0\n            fi\n        fi\n    fi\n}\n\nfunction setup_local_model() {\n    python $(pwd)/scripts/serve_local.py\n}\n\nfunction download_and_unzip() {\n    local url=$1\n    local dest_dir=$2\n\n    # Check if the destination directory exists, create if it doesn't\n    if [ -d \"$dest_dir\" ]; then\n        echo \"Destination directory $dest_dir already exists. Skipping download.\"\n        return\n    fi\n\n    mkdir -p \"$dest_dir\"\n\n    # Download the ZIP file\n    local zip_file=\"${dest_dir}/downloaded.zip\"\n    echo \"Downloading $url to $zip_file\"\n    curl -L -o \"$zip_file\" \"$url\"\n\n    # Unzip the file to the destination directory\n    echo \"Unzipping $zip_file to $dest_dir\"\n    unzip -o \"$zip_file\" -d \"$dest_dir\"\n\n    # Clean up the downloaded ZIP file\n    rm \"$zip_file\"\n    echo \"Download and unzip completed successfully.\"\n}\n\nfunction launch_ui() {\n    local pdfjs_prebuilt_dir=$1\n    PDFJS_PREBUILT_DIR=\"$pdfjs_prebuilt_dir\" python $(pwd)/app.py || {\n        echo \"\" && echo \"Will exit now...\"\n        exit 1\n    }\n}\n\nfunction print_highlight() {\n    local message=\"${1}\"\n    echo \"\" && echo \"******************************************************\"\n    echo $message\n    echo \"******************************************************\" && echo \"\"\n}\n\n# Main script execution\n\n# move two levels up from the dir where this script resides\ncd \"$(\n    cd -- \"$(dirname \"$0\")\" >/dev/null 2>&1\n    pwd -P\n)\" && cd ..\n\ninstall_dir=\"$(pwd)/install_dir\"\nconda_root=\"${install_dir}/conda\"\nenv_dir=\"${install_dir}/env\"\npython_version=\"3.10\"\n\npdf_js_version=\"4.0.379\"\npdf_js_dist_name=\"pdfjs-${pdf_js_version}-dist\"\npdf_js_dist_url=\"https://github.com/mozilla/pdf.js/releases/download/v${pdf_js_version}/${pdf_js_dist_name}.zip\"\ntarget_pdf_js_dir=\"$(pwd)/libs/ktem/ktem/assets/prebuilt/${pdf_js_dist_name}\"\n\ncheck_path_for_spaces\n\nprint_highlight \"Setting up Miniconda\"\ninstall_miniconda\n\nprint_highlight \"Creating conda environment\"\ncreate_conda_env \"$python_version\"\nactivate_conda_env\n\nprint_highlight \"Installing requirements\"\ninstall_dependencies\n\nprint_highlight \"Downloading and unzipping PDF.js\"\ndownload_and_unzip $pdf_js_dist_url $target_pdf_js_dir\n\nprint_highlight \"Setting up a local model\"\nsetup_local_model\n\nprint_highlight \"Launching Kotaemon in your browser, please wait...\"\nlaunch_ui $target_pdf_js_dir\n\ndeactivate_conda_env\n\nread -p \"Press enter to continue\"\n"
  },
  {
    "path": "scripts/run_windows.bat",
    "content": "@ECHO off\r\n\r\n:: Main script execution\r\nCD /D \"%~dp0\\..\"\r\n\r\nSET /p app_version=<\"%CD%\\VERSION\" || SET app_version=latest\r\nSET install_dir=%CD%\\install_dir\r\nSET conda_root=%install_dir%\\conda\r\nSET env_dir=%install_dir%\\env\r\nSET python_version=3.10\r\nSET miniconda_download_url=https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe\r\n\r\nSET git_install_dir=%install_dir%\\Git\r\nSET seven_zip_dir=%install_dir%\\7zip\r\n:: Determine if the machine is 32-bit or 64-bit\r\nIF \"%PROCESSOR_ARCHITECTURE%\"==\"x86\" (\r\n    SET seven_zip_url=https://7-zip.org/a/7z2408.exe\r\n    SET git_download_url=https://github.com/git-for-windows/git/releases/download/v2.46.0.windows.1/PortableGit-2.46.0-32-bit.7z.exe\r\n) ELSE (\r\n    SET seven_zip_url=https://7-zip.org/a/7z2408-x64.exe\r\n    SET git_download_url=https://github.com/git-for-windows/git/releases/download/v2.46.0.windows.1/PortableGit-2.46.0-64-bit.7z.exe\r\n)\r\n\r\nECHO %CD%| FINDSTR /C:\" \" >nul 2>&1\r\nIF %ERRORLEVEL% EQU 0 (\r\n    ECHO The current workdir has whitespace which can lead to unintended behaviour. Please modify your path and continue later.\r\n    GOTO :end\r\n)\r\n\r\nIF NOT EXIST \"%install_dir%\" ( MKDIR \"%install_dir%\" )\r\n\r\nCALL :print_highlight \"Setting up Git\"\r\nCALL :download_and_install_git\r\nIF ERRORLEVEL 1 GOTO :end\r\n\r\n:: Temporarily add Portable Git to PATH\r\nSET \"PATH=%git_install_dir%\\bin;%PATH%\"\r\n\r\nCALL :print_highlight \"Setting up Miniconda\"\r\nCALL :download_and_install_miniconda\r\nIF ERRORLEVEL 1 GOTO :end\r\n\r\nCALL :print_highlight \"Creating conda environment\"\r\nCALL :create_conda_environment\r\nIF ERRORLEVEL 1 GOTO :end\r\n\r\nCALL :activate_environment\r\nIF ERRORLEVEL 1 GOTO :end\r\n\r\nCALL :print_highlight \"Installing Kotaemon\"\r\nCALL :install_dependencies\r\nIF ERRORLEVEL 1 GOTO :end\r\n\r\nCALL :print_highlight \"Setting up a local model\"\r\nCALL :setup_local_model\r\nIF ERRORLEVEL 1 GOTO :end\r\n\r\nCALL :print_highlight \"Downloading and extracting PDF.js\"\r\nCALL :download_and_extract_pdf_js\r\nIF ERRORLEVEL 1 GOTO :end\r\n\r\nCALL :print_highlight \"Launching Kotaemon in your browser, please wait...\"\r\nCALL :launch_ui\r\n\r\nCALL :deactivate_environment\r\nGOTO :end_success\r\n\r\n:download_and_install_7zip\r\n:: Check if 7-Zip is installed\r\nIF NOT EXIST \"%seven_zip_dir%\\7z.exe\" (\r\n    ECHO Downloading 7-Zip from %seven_zip_url%\r\n    CALL curl -Lk \"%seven_zip_url%\" -o \"%install_dir%\\7zip_installer.exe\" || (\r\n        ECHO. && ECHO Failed to download 7-Zip. Aborting...\r\n        GOTO :exit_func_with_error\r\n    )\r\n    ECHO Installing 7-Zip to %seven_zip_dir%\r\n    CALL \"%install_dir%\\7zip_installer.exe\" /S /D=%seven_zip_dir%\r\n    DEL \"%install_dir%\\7zip_installer.exe\"\r\n)\r\nECHO 7-Zip is installed at %seven_zip_dir%\r\n\r\nGOTO :eof\r\n\r\n:uninstall_7zip\r\nIF EXIST \"%seven_zip_dir%\\Uninstall.exe\" (\r\n    CALL \"%seven_zip_dir%\\Uninstall.exe\" /S\r\n) ELSE (\r\n    ECHO. && ECHO Uninstaller not found. Manually deleting 7-Zip directory...\r\n    RMDIR /S /Q \"%seven_zip_dir%\"\r\n)\r\n\r\nGOTO :eof\r\n\r\n:download_and_install_git\r\n:: Check if Git is already installed\r\nCALL \"%git_install_dir%\\bin\\git.exe\" --version >nul 2>&1\r\nIF %ERRORLEVEL% NEQ 0 (\r\n    ECHO Install 7-Zip to extract Portable Git. It will be uninstalled automatically after Git installation. && ECHO.\r\n    CALL :download_and_install_7zip\r\n    IF ERRORLEVEL 1 GOTO :end\r\n\r\n    ECHO. && ECHO Downloading Portable Git from %git_download_url%\r\n    CALL curl -Lk \"%git_download_url%\" -o \"%install_dir%\\portable_git.7z.exe\" || (\r\n        ECHO. && ECHO Failed to download Git. Aborting...\r\n        GOTO :exit_func_with_error\r\n    )\r\n\r\n    ECHO Extracting Git to %git_install_dir%...\r\n    CALL \"%seven_zip_dir%\\7z.exe\" x \"%install_dir%\\portable_git.7z.exe\" -o\"%git_install_dir%\" -y >nul || (\r\n        ECHO. && ECHO Failed to extract Git. Aborting...\r\n        GOTO :exit_func_with_error\r\n    )\r\n    DEL \"%install_dir%\\portable_git.7z.exe\"\r\n\r\n    ECHO. && ECHO Uninstalling 7-Zip...\r\n    CALL :uninstall_7zip\r\n    IF ERRORLEVEL 1 GOTO :end\r\n)\r\nECHO Git is installed at %git_install_dir%\r\n:: Recheck Git installation\r\nCALL \"%git_install_dir%\\bin\\git.exe\" --version || (\r\n    ECHO. && ECHO Git not found. Aborting...\r\n    GOTO :exit_func_with_error\r\n)\r\n\r\nSET \"PATH=%git_install_dir%\\bin;%PATH%\"\r\nECHO Git is added to PATH for this session\r\n\r\nGOTO :eof\r\n\r\n:download_and_install_miniconda\r\n:: If conda has been installed at the %conda_root%, don't need to reinstall it\r\nCALL \"%conda_root%\\_conda.exe\" --version >nul 2>&1\r\nIF %ERRORLEVEL% NEQ 0 (\r\n    IF NOT EXIST \"%install_dir%\\miniconda_installer.exe\" (\r\n        ECHO Downloading Miniconda from %miniconda_download_url%\r\n        CALL curl -Lk \"%miniconda_download_url%\" -o \"%install_dir%\\miniconda_installer.exe\" || (\r\n            ECHO. && ECHO Failed to download Miniconda. Aborting...\r\n            GOTO :exit_func_with_error\r\n        )\r\n    )\r\n    ECHO Installing Miniconda to %conda_root%\r\n    START /wait \"\" \"%install_dir%\\miniconda_installer.exe\" /InstallationType=JustMe /NoShortcuts=1 /AddToPath=0 /RegisterPython=0 /NoRegistry=1 /S /D=%conda_root%\r\n    DEL \"%install_dir%\\miniconda_installer.exe\"\r\n)\r\nECHO Conda is installed at %conda_root%\r\n\r\n:: Recheck conda\r\nECHO Conda version:\r\nCALL \"%conda_root%\\_conda.exe\" --version || ( ECHO. && ECHO Conda not found. Aborting... && GOTO :exit_func_with_error )\r\n\r\nGOTO :eof\r\n\r\n:create_conda_environment\r\n:: Create new conda environment if it doesn't exist\r\nIF NOT EXIST %env_dir% (\r\n    ECHO Creating conda environment with python=%python_version% in %env_dir%\r\n    :: Create conda environment. If the interruption happens, rollback and remove the env_dir\r\n    CALL \"%conda_root%\\_conda.exe\" create --no-shortcuts -y -k --prefix %env_dir% python=%python_version% || (\r\n        ECHO. && ECHO Failed to create conda environment. Will delete the %env_dir% and abort now...\r\n        RMDIR /s /q %env_dir%\r\n        GOTO :exit_func_with_error\r\n    )\r\n    ECHO Conda environment created successfully\r\n) ELSE (\r\n    ECHO Conda environment exists at %env_dir%\r\n)\r\nGOTO :eof\r\n\r\n:activate_environment\r\n:: Deactivate existing conda env(s) to avoid conflicts\r\nIF EXIST \"%conda_root%\\condabin\\conda.bat\" (\r\n    CALL \"%conda_root%\\condabin\\conda.bat\" deactivate\r\n    CALL \"%conda_root%\\condabin\\conda.bat\" deactivate\r\n    CALL \"%conda_root%\\condabin\\conda.bat\" deactivate\r\n)\r\n\r\nCALL \"%env_dir%\\python.exe\" --version >nul 2>&1 || (\r\n    ECHO The environment appears to be broken. You may need to remove %env_dir% and run the installer again.\r\n    GOTO :exit_func_with_error\r\n)\r\n\r\nCALL \"%conda_root%\\condabin\\conda.bat\" activate %env_dir% || (\r\n    ECHO Failed to activate environment. You may need to remove %env_dir% and run the installer again.\r\n    GOTO :exit_func_with_error\r\n)\r\nECHO Activate conda environment at %env_dir%\r\n\r\nGOTO :eof\r\n\r\n:deactivate_environment\r\n:: Conda deactivate if we are in the right env\r\nIF \"%CONDA_PREFIX%\" == \"%env_dir%\" (\r\n    CALL \"%conda_root%\\condabin\\conda.bat\" deactivate\r\n    ECHO Deactivate conda environment at %env_dir%\r\n)\r\nGOTO :eof\r\n\r\n:install_dependencies\r\npip list | findstr /C:\"kotaemon\" >NUL 2>&1\r\nIF %ERRORLEVEL% == 0  (\r\n    ECHO Dependencies are already installed\r\n) ELSE (\r\n    IF EXIST \"pyproject.toml\" (\r\n        ECHO Found pyproject.toml. Installing from source...\r\n\r\n        ECHO Installing libs\\kotaemon\r\n        python -m pip install -e \"%CD%\\libs\\kotaemon\"\r\n\r\n        ECHO Installing libs\\ktem\r\n        python -m pip install -e \"%CD%\\libs\\ktem\"\r\n\r\n        python -m pip install --no-deps -e .\r\n    ) ELSE (\r\n        ECHO Installing Kotaemon %app_version%\r\n        @REM Work around for versioning control\r\n        python -m pip install git+https://github.com/Cinnamon/kotaemon.git@\"%app_version%\"#subdirectory=libs/kotaemon\r\n        python -m pip install git+https://github.com/Cinnamon/kotaemon.git@\"%app_version%\"#subdirectory=libs/ktem\r\n        python -m pip install --no-deps git+https://github.com/Cinnamon/kotaemon.git@\"%app_version%\"\r\n    )\r\n\r\n    ( CALL pip list | findstr /C:\"kotaemon\" >NUL 2>&1 ) || (\r\n        ECHO. && ECHO Installation failed. You may need to run the installer again.\r\n        CALL :deactivate_environment\r\n        GOTO :exit_func_with_error\r\n    )\r\n\r\n    CALL :print_highlight \"Install successfully. Clear cache...\"\r\n    \"%conda_root%\\condabin\\conda.bat\" clean --all -y\r\n    python -m pip cache purge\r\n)\r\nGOTO :eof\r\n\r\n:download_and_extract_pdf_js\r\n:: Download and extract a ZIP file from a URL to a destination directory\r\n\r\nREM Define variables\r\nset \"pdf_js_version=4.0.379\"\r\nset \"pdf_js_dist_name=pdfjs-%pdf_js_version%-dist\"\r\nset \"pdf_js_dist_url=https://github.com/mozilla/pdf.js/releases/download/v%pdf_js_version%/%pdf_js_dist_name%.zip\"\r\nfor /f \"delims=\" %%i in ('cd') do set \"current_dir=%%i\"\r\nset \"target_pdf_js_dir=%current_dir%\\libs\\ktem\\ktem\\assets\\prebuilt\\%pdf_js_dist_name%\"\r\n\r\nREM Create the target directory if it does not exist (including parent folders)\r\nif not exist \"%target_pdf_js_dir%\" (\r\n    echo Creating directory %target_pdf_js_dir%\r\n    mkdir \"%target_pdf_js_dir%\"\r\n) else (\r\n    echo Directory already exists: %target_pdf_js_dir%\r\n    GOTO :eof\r\n)\r\n\r\nREM Download the ZIP file using PowerShell\r\nset \"zip_file=%temp%\\downloaded.zip\"\r\necho Downloading %url% to %zip_file%\r\npowershell -Command \"Invoke-WebRequest -Uri '%pdf_js_dist_url%' -OutFile '%zip_file%'\"\r\n\r\n\r\nREM Extract the ZIP file using PowerShell\r\necho Extracting %zip_file% to %dest_dir%\r\npowershell -Command \"Expand-Archive -Path '%zip_file%' -DestinationPath '%target_pdf_js_dir%'\"\r\n\r\nREM Clean up the downloaded ZIP file\r\ndel \"%zip_file%\"\r\necho Download and extraction completed successfully.\r\n\r\ngoto :eof\r\n\r\n:setup_local_model\r\npython \"%CD%\\scripts\\serve_local.py\"\r\nGOTO :eof\r\n\r\n:launch_ui\r\n:: Workaround for diskcache path with folder start with .\r\nSET THEFLOW_TEMP_PATH=flow_tmp\r\nSET PDFJS_PREBUILT_DIR=%target_pdf_js_dir%\r\nECHO Starting Kotaemon UI... (prebuilt PDF.js is at %PDFJS_PREBUILT_DIR%)\r\nCALL python -Xutf8 \"%CD%\\app.py\" || ( ECHO. && ECHO Will exit now... && GOTO :exit_func_with_error )\r\nGOTO :eof\r\n\r\n:print_highlight\r\nECHO. && ECHO ******************************************************\r\nECHO %~1\r\nECHO ****************************************************** && ECHO.\r\nGOTO :eof\r\n\r\n:exit_func_with_error\r\n:: Called inside functions when error happens, then back to the main routine with error code 1\r\nEXIT /B 1\r\n\r\n:end_success\r\n:: Exit the script main routine with error code 0 (success)\r\nECHO Script completed successfully.\r\nPAUSE\r\nEXIT /B 0\r\n\r\n:end\r\n:: Exit the script main routine with error code 1 (fail)\r\nPAUSE\r\nEXIT /B 1\r\n"
  },
  {
    "path": "scripts/serve_local.py",
    "content": "import platform\nimport subprocess\nfrom inspect import currentframe, getframeinfo\nfrom pathlib import Path\n\nfrom decouple import config\n\nsystem_name = platform.system()\n\ncur_frame = currentframe()\nif cur_frame is None:\n    raise ValueError(\"Cannot get the current frame.\")\nthis_file = getframeinfo(cur_frame).filename\nthis_dir = Path(this_file).parent\n\n\ndef serve_llamacpp_python(local_model_file: Path, **kwargs):\n    def guess_chat_format(local_model_file):\n        model_name = local_model_file.stem\n\n        # handle known cases that the server backends handle incorrectly\n        # this is highly heuristic, should be expand later\n        # server backends usually has logic for this but they could still be wrong\n        if \"qwen\" in model_name:\n            return \"qwen\"\n\n        return None\n\n    # default port\n    if \"port\" not in kwargs:\n        kwargs[\"port\"] = 31415\n\n    chat_format = guess_chat_format(local_model_file)\n    if chat_format:\n        kwargs = {**kwargs, \"chat_format\": chat_format}\n\n    # these scripts create a separate conda env and run the server\n    if system_name == \"Windows\":\n        script_file = this_dir / \"server_llamacpp_windows.bat\"\n    elif system_name == \"Linux\":\n        script_file = this_dir / \"server_llamacpp_linux.sh\"\n    elif system_name == \"Darwin\":\n        script_file = this_dir / \"server_llamacpp_macos.sh\"\n    else:\n        raise ValueError(f\"Unsupported system: {system_name}\")\n\n    args = \" \".join(f\"--{k} {v}\" for k, v in kwargs.items())\n\n    cmd = f\"{script_file} --model {local_model_file} {args}\"\n    subprocess.Popen(cmd, shell=True)\n\n\ndef main():\n    local_model_file = config(\"LOCAL_MODEL\", default=\"\")\n\n    if not local_model_file:\n        print(\"LOCAL_MODEL not set in the `.env` file.\")\n        return\n\n    local_model_file = Path(local_model_file)\n    if not local_model_file.exists():\n        print(f\"Local model not found: {local_model_file}\")\n        return\n\n    print(f\"Local model found: {local_model_file}\")\n    will_start_server = input(\"Do you want to use this local model ? (y/n): \")\n\n    if will_start_server.lower().strip() not in [\"y\", \"yes\"]:\n        return\n\n    print(\"Starting the local server...\")\n    if local_model_file.suffix == \".gguf\":\n        serve_llamacpp_python(local_model_file)\n    else:\n        raise ValueError(f\"Unsupported model file type: {local_model_file.suffix}\")\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "scripts/server_llamacpp_linux.sh",
    "content": "#!/bin/bash\n\n# functions used in the main code execution\nfunction print_highlight() {\n    local message=\"${1}\"\n    echo \"\" && echo \"******************************************************\"\n    echo $message\n    echo \"******************************************************\" && echo \"\"\n}\n\nfunction path_sanity_check() {\n    echo \"Path sanity checking\"\n    if [[ $PWD =~ \\  ]]; then\n        print_highlight \"This script relies on Miniconda which can't be silently installed under a path with spaces. Please run it from a path without spaces.\"\n        exit 1\n    fi\n}\n\nfunction deactivate_environment() {\n    echo \"Deactivate existing environment(s)\"\n    # deactivate existing conda envs as needed to avoid conflicts\n    { conda deactivate && conda deactivate && conda deactivate; } 2>/dev/null\n}\n\nfunction check_conda_existence() {\n    echo \"Check for conda existence\"\n    conda_exists=\"F\"\n\n    # figure out whether conda exists\n    if \"$CONDA_ROOT_PREFIX/bin/conda\" --version &>/dev/null; then conda_exists=\"T\"; fi\n\n    # verify if conda is installed by the main app, if not then raise error\n    if [ \"$conda_exists\" == \"F\" ]; then\n        # test the conda binary\n        print_highlight \"conda is not installed, seems like the app wasn't installed correctly.\"\n        exit\n    fi\n}\n\nfunction create_conda_environment() {\n    # create the environment if needed\n    if [ ! -e \"$INSTALL_ENV_DIR\" ]; then\n        echo \"Create conda environment\"\n        \"$CONDA_ROOT_PREFIX/bin/conda\" create -y -k --prefix \"$INSTALL_ENV_DIR\" python=\"$PYTHON_VERSION\" || {\n            echo && print_highlight \"Conda environment creation failed.\" && exit 1\n        }\n    fi\n\n    # check if conda environment was actually created\n    if [ ! -e \"$INSTALL_ENV_DIR/bin/python\" ]; then\n        print_highlight \"Conda environment was not correctly created.\"\n        exit 1\n    fi\n}\n\nfunction isolate_environment() {\n    echo \"Isolate environment\"\n    export PYTHONNOUSERSITE=1\n    unset PYTHONPATH\n    unset PYTHONHOME\n}\n\nfunction activate_environment() {\n    echo \"Activate conda environment\"\n    source \"$CONDA_ROOT_PREFIX/etc/profile.d/conda.sh\" # otherwise conda complains about 'shell not initialized' (needed when running in a script)\n    conda activate \"$INSTALL_ENV_DIR\"\n}\n\n# main code execution\n\ncd \"$(dirname \"${BASH_SOURCE[0]}\")/..\"\necho \"Changed the current directory to: $(pwd)\"\n\npath_sanity_check\ndeactivate_environment\n\n# config\nENV_NAME=\"llama-cpp-python-server\"\nPYTHON_VERSION=\"3.10\"\nCONDA_ROOT_PREFIX=\"$(pwd)/install_dir/conda\"\nINSTALL_ENV_DIR=\"$(pwd)/install_dir/server_envs/${ENV_NAME}\"\n\ncheck_conda_existence\ncreate_conda_environment\nisolate_environment\nactivate_environment\n\n# install dependencies\n# ver 0.2.56 produces segment error for /embeddings on MacOS\npython -m pip install llama-cpp-python[server]==0.2.55\n\n# start the server with passed params\npython -m llama_cpp.server $@\n\nconda deactivate\n"
  },
  {
    "path": "scripts/server_llamacpp_macos.sh",
    "content": "#!/bin/bash\n\n# functions used in the main code execution\nfunction print_highlight() {\n    local message=\"${1}\"\n    echo \"\" && echo \"******************************************************\"\n    echo $message\n    echo \"******************************************************\" && echo \"\"\n}\n\nfunction path_sanity_check() {\n    echo \"Path sanity checking\"\n    if [[ \"$(pwd)\" =~ \" \" ]]; then\n        print_highlight \"This script relies on Miniconda which can't be silently installed under a path with spaces. Please run it from a path without spaces.\"\n        exit 1\n    fi\n}\n\nfunction deactivate_environment() {\n    echo \"Deactivate existing environment(s)\"\n    # deactivate existing conda envs as needed to avoid conflicts\n    { conda deactivate && conda deactivate && conda deactivate; } 2>/dev/null\n}\n\nfunction check_conda_existence() {\n    echo \"Check for conda existence\"\n    conda_exists=\"F\"\n\n    # figure out whether conda exists\n    if \"$CONDA_ROOT_PREFIX/bin/conda\" --version &>/dev/null; then conda_exists=\"T\"; fi\n\n    # verify if conda is installed by the main app, if not then raise error\n    if [ \"$conda_exists\" == \"F\" ]; then\n        # test the conda binary\n        print_highlight \"conda is not installed, seems like the app wasn't installed correctly.\"\n        exit\n    fi\n}\n\nfunction create_conda_environment() {\n    # create the environment if needed\n    if [ ! -d \"${INSTALL_ENV_DIR}\" ]; then\n        echo \"Create conda environment\"\n        \"${CONDA_ROOT_PREFIX}/bin/conda\" create -y -k --prefix \"$INSTALL_ENV_DIR\" python=\"$PYTHON_VERSION\" || (echo && print_highlight \"Conda environment creation failed.\" && exit 1)\n    fi\n\n    # check if conda environment was actually created\n    if [ ! -f \"$INSTALL_ENV_DIR/bin/python\" ]; then\n        print_highlight \"Conda environment was not correctly created.\"\n        exit 1\n    fi\n}\n\nfunction isolate_environment() {\n    echo \"Isolate environment\"\n    export PYTHONNOUSERSITE=1\n    unset PYTHONPATH\n    unset PYTHONHOME\n}\n\nfunction activate_environment() {\n    echo \"Activate conda environment\"\n    source \"$CONDA_ROOT_PREFIX/etc/profile.d/conda.sh\" # otherwise conda complains about 'shell not initialized' (needed when running in a script)\n    conda activate \"$INSTALL_ENV_DIR\"\n}\n\n# main code execution\n\ncd \"$(\n    cd -- \"$(dirname \"$0\")\" >/dev/null 2>&1\n    pwd -P\n)\" && cd ..\necho \"Changed the current directory to: $(pwd)\"\n\npath_sanity_check\ndeactivate_environment\n\n# config\nENV_NAME=\"llama-cpp-python-server\"\nPYTHON_VERSION=\"3.10\"\nCONDA_ROOT_PREFIX=\"$(pwd)/install_dir/conda\"\nINSTALL_ENV_DIR=\"$(pwd)/install_dir/server_envs/${ENV_NAME}\"\n\ncheck_conda_existence\ncreate_conda_environment\nisolate_environment\nactivate_environment\n\n# install dependencies\n# ver 0.2.56 produces segment error for /embeddings on MacOS\npython -m pip install llama-cpp-python[server]==0.2.55\n\n# start the server with passed params\npython -m llama_cpp.server $@\n\nconda deactivate\n"
  },
  {
    "path": "scripts/server_llamacpp_windows.bat",
    "content": "@echo off\r\n\r\n@rem main code execution\r\n\r\ncall :print_highlight \"Starting inference server for llama-cpp\"\r\n\r\ncd /D \"%~dp0\\..\"\r\necho \"Change the current directory to: %cd%\"\r\n\r\ncall :path_sanity_check\r\ncall :deactivate_environment\r\n\r\n@rem config\r\nset ENV_NAME=llama-cpp-python-server\r\nset PYTHON_VERSION=3.10\r\nset CONDA_ROOT_PREFIX=%cd%\\install_dir\\conda\r\nset INSTALL_ENV_DIR=%cd%\\install_dir\\server_envs\\%ENV_NAME%\r\n\r\necho \"Python version: %PYTHON_VERSION%\"\r\necho \"Conda prefix: %CONDA_ROOT_PREFIX%\"\r\necho \"Environment path: %INSTALL_ENV_DIR%\"\r\n\r\n@rem handle conda environment\r\ncall :check_conda_existence\r\ncall :create_conda_environment\r\ncall :isolate_environment\r\ncall :activate_environment\r\n\r\n@rem install dependencies\r\n@rem ver 0.2.56 produces segment error for /embeddings on MacOS\r\ncall python -m pip install llama-cpp-python[server]==0.2.55\r\n\r\n@REM @rem start the server with passed params\r\ncall python -m llama_cpp.server %*\r\ncall conda deactivate\r\n\r\ngoto :end\r\n@rem the end of main code execution\r\n\r\n\r\n@rem below are the functions used in the above execution\r\n\r\n\r\n:print_highlight\r\necho.\r\necho ******************************************************\r\necho %~1\r\necho ******************************************************\r\necho.\r\ngoto :eof\r\n\r\n\r\n:path_sanity_check\r\necho \"Path sanity checking\"\r\necho \"%cd%\"| findstr /C:\" \" >nul ^\r\n&& (call :print_highlight \"This script relies on Miniconda which can not be silently installed under a path with spaces.\" ^\r\n&& goto :end)\r\ngoto :eof\r\n\r\n\r\n:deactivate_environment\r\necho \"Deactivate existing environment(s)\"\r\n(call conda deactivate && call conda deactivate && call conda deactivate) 2>nul\r\ngoto :eof\r\n\r\n\r\n:check_conda_existence\r\necho \"Check for conda existence\"\r\nset conda_exists=F\r\n\r\n@rem figure out whether conda exists\r\ncall \"%CONDA_ROOT_PREFIX%\\_conda.exe\" --version >nul 2>&1\r\nif \"%ERRORLEVEL%\" EQU \"0\" set conda_exists=T\r\n\r\n@rem verify if conda is installed by the main app, if not then raise error\r\nif \"%conda_exists%\" == \"F\" (\r\n\tcall :print_highlight \"conda is not installed, seems like the app wasn't installed correctly.\"\r\n    goto :end\r\n)\r\ngoto :eof\r\n\r\n\r\n:create_conda_environment\r\n@rem create the environment if needed\r\nif not exist \"%INSTALL_ENV_DIR%\" (\r\n    echo \"Create conda environment\"\r\n\tcall \"%CONDA_ROOT_PREFIX%\\_conda.exe\" create ^\r\n        --no-shortcuts -y -k --prefix \"%INSTALL_ENV_DIR%\" python=\"%PYTHON_VERSION%\" || ^\r\n    ( echo. && call :print_highlight \"Conda environment creation failed.\" && goto :end )\r\n)\r\n\r\n@rem check if conda environment was actually created\r\nif not exist \"%INSTALL_ENV_DIR%\\python.exe\" (\r\n    call :print_highlight \"Conda environment was not correctly created.\"\r\n    goto :end\r\n)\r\ngoto :eof\r\n\r\n\r\n:isolate_environment\r\necho \"Isolate environment\"\r\nset PYTHONNOUSERSITE=1\r\nset PYTHONPATH=\r\nset PYTHONHOME=\r\ngoto :eof\r\n\r\n\r\n:activate_environment\r\necho \"Activate conda environment\"\r\ncall \"%CONDA_ROOT_PREFIX%\\condabin\\conda.bat\" activate \"%INSTALL_ENV_DIR%\" || ^\r\n( echo. && call :print_highlight \"Miniconda hook not found.\" && goto :end )\r\ngoto :eof\r\n\r\n\r\n:end\r\n"
  },
  {
    "path": "scripts/update_linux.sh",
    "content": "#!/bin/bash\n\n# functions for better code organization\nfunction check_path_for_spaces() {\n    if [[ $PWD =~ \\  ]]; then\n        echo \"The current workdir has whitespace which can lead to unintended behaviour. Please modify your path and continue later.\"\n        exit 1\n    fi\n}\n\nfunction activate_conda_env() {\n    # deactivate the current env(s) to avoid conflicts\n    { conda deactivate && conda deactivate && conda deactivate; } 2>/dev/null\n\n    # check if conda env is broken (because of interruption during creation)\n    if [ ! -f \"$env_dir/bin/python\" ]; then\n        echo \"Conda environment appears to be broken. You may need to remove $env_dir and run the installer again.\"\n        exit 1\n    fi\n\n    source \"$conda_root/etc/profile.d/conda.sh\" # conda init\n    conda activate \"$env_dir\" || {\n        echo \"Failed to activate environment. Please remove $env_dir and run the installer again\"\n        exit 1\n    }\n    echo \"Activate conda environment at $CONDA_PREFIX\"\n}\n\nfunction deactivate_conda_env() {\n    # Conda deactivate if we are in the right env\n    if [ \"$CONDA_PREFIX\" == \"$env_dir\" ]; then\n        conda deactivate\n        echo \"Deactivate conda environment at $env_dir\"\n    fi\n}\n\nfunction update_latest() {\n    current_version=$(pip list | awk '/kotaemon-app/ {print $2}')\n    echo \"Current version $current_version\"\n\n    if [ -f \"pyproject.toml\" ]; then\n        echo \"Source files detected. Please perform git pull manually.\"\n        deactivate_environment\n        exit 1\n    else\n        echo \"Installing version: $app_version\"\n        # Work around for versioning control\n        python -m pip install \"git+https://github.com/Cinnamon/kotaemon.git@$app_version#subdirectory=libs/kotaemon\"\n        python -m pip install \"git+https://github.com/Cinnamon/kotaemon.git@$app_version#subdirectory=libs/ktem\"\n        python -m pip install --no-deps git+https://github.com/Cinnamon/kotaemon.git@$app_version\n        if [ $? -ne 0 ]; then\n            echo\n            echo \"Update failed. You may need to run the update again.\"\n            deactivate_environment\n            exit 1\n        fi\n    fi\n}\n\nfunction print_highlight() {\n    local message=\"${1}\"\n    echo \"\" && echo \"******************************************************\"\n    echo $message\n    echo \"******************************************************\" && echo \"\"\n}\n\n# Main script execution\n\n# move two levels up from the dir where this script resides\ncd \"$(dirname \"${BASH_SOURCE[0]}\")\" && cd ..\n\napp_version=\"latest\"\ninstall_dir=\"$(pwd)/install_dir\"\nconda_root=\"${install_dir}/conda\"\nenv_dir=\"${install_dir}/env\"\n\ncheck_path_for_spaces\n\nprint_highlight \"Activating conda environment\"\nactivate_conda_env\n\nprint_highlight \"Updating Kotaemon to latest\"\nupdate_latest\n\ndeactivate_conda_env\n\nread -p \"Press enter to continue\"\n"
  },
  {
    "path": "scripts/update_macos.sh",
    "content": "#!/bin/bash\n\n# functions for better code organization\nfunction check_path_for_spaces() {\n    if [[ $PWD =~ \\  ]]; then\n        echo \"The current workdir has whitespace which can lead to unintended behaviour. Please modify your path and continue later.\"\n        exit 1\n    fi\n}\n\nfunction activate_conda_env() {\n    # deactivate the current env(s) to avoid conflicts\n    { conda deactivate && conda deactivate && conda deactivate; } 2>/dev/null\n\n    # check if conda env is broken (because of interruption during creation)\n    if [ ! -f \"$env_dir/bin/python\" ]; then\n        echo \"Conda environment appears to be broken. You may need to remove $env_dir and run the installer again.\"\n        exit 1\n    fi\n\n    source \"$conda_root/etc/profile.d/conda.sh\" # conda init\n    conda activate \"$env_dir\" || {\n        echo \"Failed to activate environment. Please remove $env_dir and run the installer again\"\n        exit 1\n    }\n    echo \"Activate conda environment at $CONDA_PREFIX\"\n}\n\nfunction deactivate_conda_env() {\n    # Conda deactivate if we are in the right env\n    if [ \"$CONDA_PREFIX\" == \"$env_dir\" ]; then\n        conda deactivate\n        echo \"Deactivate conda environment at $env_dir\"\n    fi\n}\n\nfunction update_latest() {\n    current_version=$(pip list | awk '/kotaemon-app/ {print $2}')\n    echo \"Current version $current_version\"\n\n    if [ -f \"pyproject.toml\" ]; then\n        echo \"Source files detected. Please perform git pull manually.\"\n        deactivate_conda_env\n        exit 1\n    else\n        echo \"Installing version: $app_version\"\n        # Work around for versioning control\n        python -m pip install \"git+https://github.com/Cinnamon/kotaemon.git@$app_version#subdirectory=libs/kotaemon\"\n        python -m pip install \"git+https://github.com/Cinnamon/kotaemon.git@$app_version#subdirectory=libs/ktem\"\n        python -m pip install --no-deps git+https://github.com/Cinnamon/kotaemon.git@$app_version\n        if [ $? -ne 0 ]; then\n            echo\n            echo \"Update failed. You may need to run the update again.\"\n            deactivate_conda_env\n            exit 1\n        fi\n    fi\n}\n\nfunction print_highlight() {\n    local message=\"${1}\"\n    echo \"\" && echo \"******************************************************\"\n    echo $message\n    echo \"******************************************************\" && echo \"\"\n}\n\n# Main script execution\n\n# move two levels up from the dir where this script resides\ncd \"$(dirname \"${BASH_SOURCE[0]}\")\" && cd ..\n\napp_version=\"latest\"\ninstall_dir=\"$(pwd)/install_dir\"\nconda_root=\"${install_dir}/conda\"\nenv_dir=\"${install_dir}/env\"\n\ncheck_path_for_spaces\n\nprint_highlight \"Activating conda environment\"\nactivate_conda_env\n\nprint_highlight \"Updating Kotaemon to latest\"\nupdate_latest\n\ndeactivate_conda_env\n\nread -p \"Press enter to continue\"\n"
  },
  {
    "path": "scripts/update_windows.bat",
    "content": "@ECHO off\r\n\r\n:: Main script execution\r\nCD /D \"%~dp0\\..\"\r\n\r\nSET app_version=latest\r\nSET install_dir=%CD%\\install_dir\r\nSET conda_root=%install_dir%\\conda\r\nSET env_dir=%install_dir%\\env\r\n\r\nECHO %CD%| FINDSTR /C:\" \" >nul 2>&1\r\nIF %ERRORLEVEL% EQU 0 (\r\n    ECHO The current workdir has whitespace which can lead to unintended behaviour. Please modify your path and continue later.\r\n    GOTO :end\r\n)\r\n\r\nCALL :print_highlight \"Activating conda environment\"\r\nCALL :activate_environment\r\nIF ERRORLEVEL 1 GOTO :end\r\n\r\nCALL :print_highlight \"Updating Kotaemon to latest\"\r\nCALL :update_latest\r\nIF ERRORLEVEL 1 GOTO :end\r\n\r\nCALL :deactivate_environment\r\nGOTO :end_success\r\n\r\n\r\n:activate_environment\r\n:: deactivate existing conda env(s) to avoid conflicts\r\n( CALL conda deactivate && CALL conda deactivate && CALL conda deactivate ) 2> nul\r\n\r\nCALL \"%env_dir%\\python.exe\" --version >nul 2>&1 || (\r\n    ECHO The environment appears to be broken. You may need to remove %env_dir% and run the installer again.\r\n    GOTO :exit_func_with_error\r\n)\r\n\r\nCALL \"%conda_root%\\condabin\\conda.bat\" activate %env_dir% || (\r\n    ECHO Failed to activate environment. You may need to remove %env_dir% and run the installer again.\r\n    GOTO :exit_func_with_error\r\n)\r\nECHO Activate conda environment at %env_dir%\r\n\r\nGOTO :eof\r\n\r\n:deactivate_environment\r\n:: Conda deactivate if we are in the right env\r\nIF \"%CONDA_PREFIX%\" == \"%env_dir%\" (\r\n    CALL \"%conda_root%\\condabin\\conda.bat\" deactivate\r\n    ECHO Deactivate conda environment at %env_dir%\r\n)\r\nGOTO :eof\r\n\r\n:update_latest\r\nFOR /F \"tokens=1,2\" %%a in ('pip list') do if \"%%a\"==\"kotaemon-app\" set current_version=%%b\r\nECHO Current version %current_version%\r\n\r\nIF EXIST \"pyproject.toml\" (\r\n    ECHO Source files detected. Please perform git pull manually.\r\n    CALL :deactivate_environment\r\n    GOTO :exit_func_with_error\r\n) ELSE (\r\n    ECHO Installing version: %app_version%\r\n    @REM Work around for versioning control\r\n    python -m pip install git+https://github.com/Cinnamon/kotaemon.git@\"%app_version%\"#subdirectory=libs/kotaemon\r\n    python -m pip install git+https://github.com/Cinnamon/kotaemon.git@\"%app_version%\"#subdirectory=libs/ktem\r\n    python -m pip install --no-deps git+https://github.com/Cinnamon/kotaemon.git@\"%app_version%\"\r\n) || (\r\n    ECHO. && ECHO Update failed. You may need to run the update again.\r\n    CALL :deactivate_environment\r\n    GOTO :exit_func_with_error\r\n)\r\n\r\nCALL :print_highlight \"Update successfully.\"\r\nFOR /F \"tokens=1,2\" %%a in ('pip list') do if \"%%a\"==\"kotaemon-app\" set updated_version=%%b\r\nECHO Updated version %updated_version%\r\nECHO %updated_version% > VERSION\r\nGOTO :eof\r\n\r\n:print_highlight\r\nECHO. && ECHO ******************************************************\r\nECHO %~1\r\nECHO ****************************************************** && ECHO.\r\nGOTO :eof\r\n\r\n:exit_func_with_error\r\n:: Called inside functions when error happens, then back to the main routine with error code 1\r\nEXIT /B 1\r\n\r\n:end_success\r\n:: Exit the script main routine with error code 0 (success)\r\nECHO Script completed successfully.\r\nPAUSE\r\nEXIT /B 0\r\n\r\n:end\r\n:: Exit the script main routine with error code 1 (fail)\r\nPAUSE\r\nEXIT /B 1\r\n"
  },
  {
    "path": "settings.yaml.example",
    "content": "# This is a sample GraphRAG settings.yaml file that allows users to run the GraphRAG index process with their customized parameters.\n# The parameters in this file will only take effect when the USE_CUSTOMIZED_GRAPHRAG_SETTING is true in .env file.\n# For a comprehensive understanding of GraphRAG parameters, please refer to: https://microsoft.github.io/graphrag/config/json_yaml/.\n\nencoding_model: cl100k_base\nskip_workflows: []\nllm:\n  api_key: ${GRAPHRAG_API_KEY}\n  type: openai_chat # or azure_openai_chat\n  api_base: http://127.0.0.1:11434/v1\n  model: qwen2\n  model_supports_json: true # recommended if this is available for your model.\n  # max_tokens: 4000\n  request_timeout: 1800.0\n  # api_base: https://<instance>.openai.azure.com\n  # api_version: 2024-02-15-preview\n  # organization: <organization_id>\n  # deployment_name: <azure_model_deployment_name>\n  # tokens_per_minute: 150_000 # set a leaky bucket throttle\n  # requests_per_minute: 10_000 # set a leaky bucket throttle\n  # max_retries: 10\n  # max_retry_wait: 10.0\n  # sleep_on_rate_limit_recommendation: true # whether to sleep when azure suggests wait-times\n  concurrent_requests: 5 # the number of parallel inflight requests that may be made\n  # temperature: 0 # temperature for sampling\n  # top_p: 1 # top-p sampling\n  # n: 1 # Number of completions to generate\n\nparallelization:\n  stagger: 0.3\n  # num_threads: 50 # the number of threads to use for parallel processing\n\nasync_mode: threaded # or asyncio\n\nembeddings:\n  ## parallelization: override the global parallelization settings for embeddings\n  async_mode: threaded # or asyncio\n  # target: required # or all\n  # batch_size: 16 # the number of documents to send in a single request\n  # batch_max_tokens: 8191 # the maximum number of tokens to send in a single request\n  llm:\n    api_base: http://localhost:11434/v1\n    api_key: ${GRAPHRAG_API_KEY}\n    model: nomic-embed-text\n    type: openai_embedding\n    # api_base: https://<instance>.openai.azure.com\n    # api_version: 2024-02-15-preview\n    # organization: <organization_id>\n    # deployment_name: <azure_model_deployment_name>\n    # tokens_per_minute: 150_000 # set a leaky bucket throttle\n    # requests_per_minute: 10_000 # set a leaky bucket throttle\n    # max_retries: 10\n    # max_retry_wait: 10.0\n    # sleep_on_rate_limit_recommendation: true # whether to sleep when azure suggests wait-times\n    # concurrent_requests: 25 # the number of parallel inflight requests that may be made\n\nchunks:\n  size: 1200\n  overlap: 100\n  group_by_columns: [id] # by default, we don't allow chunks to cross documents\n\ninput:\n  type: file # or blob\n  file_type: text # or csv\n  base_dir: \"input\"\n  file_encoding: utf-8\n  file_pattern: \".*\\\\.txt$\"\n\ncache:\n  type: file # or blob\n  base_dir: \"cache\"\n  # connection_string: <azure_blob_storage_connection_string>\n  # container_name: <azure_blob_storage_container_name>\n\nstorage:\n  type: file # or blob\n  base_dir: \"output\"\n  # connection_string: <azure_blob_storage_connection_string>\n  # container_name: <azure_blob_storage_container_name>\n\nreporting:\n  type: file # or console, blob\n  base_dir: \"output\"\n  # connection_string: <azure_blob_storage_connection_string>\n  # container_name: <azure_blob_storage_container_name>\n\nentity_extraction:\n  ## strategy: fully override the entity extraction strategy.\n  ##   type: one of graph_intelligence, graph_intelligence_json and nltk\n  ## llm: override the global llm settings for this task\n  ## parallelization: override the global parallelization settings for this task\n  ## async_mode: override the global async_mode settings for this task\n  prompt: \"prompts/entity_extraction.txt\"\n  entity_types: [organization,person,geo,event]\n  max_gleanings: 1\n\nsummarize_descriptions:\n  ## llm: override the global llm settings for this task\n  ## parallelization: override the global parallelization settings for this task\n  ## async_mode: override the global async_mode settings for this task\n  prompt: \"prompts/summarize_descriptions.txt\"\n  max_length: 500\n\nclaim_extraction:\n  ## llm: override the global llm settings for this task\n  ## parallelization: override the global parallelization settings for this task\n  ## async_mode: override the global async_mode settings for this task\n  # enabled: true\n  prompt: \"prompts/claim_extraction.txt\"\n  description: \"Any claims or facts that could be relevant to information discovery.\"\n  max_gleanings: 1\n\ncommunity_reports:\n  ## llm: override the global llm settings for this task\n  ## parallelization: override the global parallelization settings for this task\n  ## async_mode: override the global async_mode settings for this task\n  prompt: \"prompts/community_report.txt\"\n  max_length: 2000\n  max_input_length: 8000\n\ncluster_graph:\n  max_cluster_size: 10\n\nembed_graph:\n  enabled: false # if true, will generate node2vec embeddings for nodes\n  # num_walks: 10\n  # walk_length: 40\n  # window_size: 2\n  # iterations: 3\n  # random_seed: 597832\n\numap:\n  enabled: false # if true, will generate UMAP embeddings for nodes\n\nsnapshots:\n  graphml: false\n  raw_entities: false\n  top_level_nodes: false\n\nlocal_search:\n  # text_unit_prop: 0.5\n  # community_prop: 0.1\n  # conversation_history_max_turns: 5\n  # top_k_mapped_entities: 10\n  # top_k_relationships: 10\n  # llm_temperature: 0 # temperature for sampling\n  # llm_top_p: 1 # top-p sampling\n  # llm_n: 1 # Number of completions to generate\n  # max_tokens: 12000\n\nglobal_search:\n  # llm_temperature: 0 # temperature for sampling\n  # llm_top_p: 1 # top-p sampling\n  # llm_n: 1 # Number of completions to generate\n  # max_tokens: 12000\n  # data_max_tokens: 12000\n  # map_max_tokens: 1000\n  # reduce_max_tokens: 2000\n  # concurrency: 32\n"
  },
  {
    "path": "sso_app.py",
    "content": "import os\n\nimport gradiologin as grlogin\nfrom decouple import config\nfrom fastapi import FastAPI\nfrom fastapi.responses import FileResponse\nfrom theflow.settings import settings as flowsettings\n\nKH_APP_DATA_DIR = getattr(flowsettings, \"KH_APP_DATA_DIR\", \".\")\nGRADIO_TEMP_DIR = os.getenv(\"GRADIO_TEMP_DIR\", None)\nAUTHENTICATION_METHOD = config(\"AUTHENTICATION_METHOD\", \"GOOGLE\")\n\n# override GRADIO_TEMP_DIR if it's not set\nif GRADIO_TEMP_DIR is None:\n    GRADIO_TEMP_DIR = os.path.join(KH_APP_DATA_DIR, \"gradio_tmp\")\n    os.environ[\"GRADIO_TEMP_DIR\"] = GRADIO_TEMP_DIR\n\n# for authentication with Google\nGOOGLE_CLIENT_ID = config(\"GOOGLE_CLIENT_ID\", default=\"\")\nGOOGLE_CLIENT_SECRET = config(\"GOOGLE_CLIENT_SECRET\", default=\"\")\n\n# for authentication with Open ID by keycloak\nKEYCLOAK_SERVER_URL = config(\"KEYCLOAK_SERVER_URL\", default=\"\")\nKEYCLOAK_REALM = config(\"KEYCLOAK_REALM\", default=\"\")\nKEYCLOAK_CLIENT_ID = config(\"KEYCLOAK_CLIENT_ID\", default=\"\")\nKEYCLOAK_CLIENT_SECRET = config(\"KEYCLOAK_CLIENT_SECRET\", default=\"\")\n\nfrom ktem.main import App  # noqa\n\ngradio_app = App()\ndemo = gradio_app.make()\n\napp = FastAPI()\n\nif AUTHENTICATION_METHOD == \"KEYCLOAK\":\n    # for authentication with Open ID by keycloak\n    grlogin.register(\n        name=\"keycloak\",\n        server_metadata_url=(\n            f\"{KEYCLOAK_SERVER_URL}/realms/{KEYCLOAK_REALM}/\"\n            \".well-known/openid-configuration\"\n        ),\n        client_id=KEYCLOAK_CLIENT_ID,\n        client_secret=KEYCLOAK_CLIENT_SECRET,\n        client_kwargs={\n            \"scope\": \"openid email profile\",\n        },\n    )\n\nelse:\n    # for authentication with Google\n    grlogin.register(\n        name=\"google\",\n        server_metadata_url=(\n            \"https://accounts.google.com/.well-known/openid-configuration\"\n        ),\n        client_id=GOOGLE_CLIENT_ID,\n        client_secret=GOOGLE_CLIENT_SECRET,\n        client_kwargs={\n            \"scope\": \"openid email profile\",\n        },\n    )\n\n\n@app.get(\"/favicon.ico\", include_in_schema=False)\nasync def favicon():\n    return FileResponse(gradio_app._favicon)\n\n\ngrlogin.mount_gradio_app(\n    app,\n    demo,\n    \"/app\",\n    allowed_paths=[\n        \"libs/ktem/ktem/assets\",\n        GRADIO_TEMP_DIR,\n    ],\n)\n"
  },
  {
    "path": "sso_app_demo.py",
    "content": "import os\n\nimport gradio as gr\nfrom authlib.integrations.starlette_client import OAuth, OAuthError\nfrom decouple import config\nfrom fastapi import FastAPI, Request\nfrom fastapi.responses import FileResponse\nfrom starlette.config import Config\nfrom starlette.middleware.sessions import SessionMiddleware\nfrom starlette.responses import RedirectResponse\nfrom theflow.settings import settings as flowsettings\n\nKH_DEMO_MODE = getattr(flowsettings, \"KH_DEMO_MODE\", False)\nKH_APP_DATA_DIR = getattr(flowsettings, \"KH_APP_DATA_DIR\", \".\")\nGRADIO_TEMP_DIR = os.getenv(\"GRADIO_TEMP_DIR\", None)\n# override GRADIO_TEMP_DIR if it's not set\nif GRADIO_TEMP_DIR is None:\n    GRADIO_TEMP_DIR = os.path.join(KH_APP_DATA_DIR, \"gradio_tmp\")\n    os.environ[\"GRADIO_TEMP_DIR\"] = GRADIO_TEMP_DIR\n\n\nGOOGLE_CLIENT_ID = config(\"GOOGLE_CLIENT_ID\", default=\"\")\nGOOGLE_CLIENT_SECRET = config(\"GOOGLE_CLIENT_SECRET\", default=\"\")\nSECRET_KEY = config(\"SECRET_KEY\", default=\"default-secret-key\")\n\n\ndef add_session_middleware(app):\n    config_data = {\n        \"GOOGLE_CLIENT_ID\": GOOGLE_CLIENT_ID,\n        \"GOOGLE_CLIENT_SECRET\": GOOGLE_CLIENT_SECRET,\n    }\n    starlette_config = Config(environ=config_data)\n    oauth = OAuth(starlette_config)\n    oauth.register(\n        name=\"google\",\n        server_metadata_url=(\n            \"https://accounts.google.com/\" \".well-known/openid-configuration\"\n        ),\n        client_kwargs={\"scope\": \"openid email profile\"},\n    )\n\n    app.add_middleware(SessionMiddleware, secret_key=SECRET_KEY)\n    return oauth\n\n\nfrom ktem.main import App  # noqa\n\ngradio_app = App()\nmain_demo = gradio_app.make()\n\napp = FastAPI()\noauth = add_session_middleware(app)\n\n\n@app.get(\"/\")\ndef public(request: Request):\n    root_url = gr.route_utils.get_root_url(request, \"/\", None)\n    return RedirectResponse(url=f\"{root_url}/app/\")\n\n\n@app.get(\"/favicon.ico\", include_in_schema=False)\nasync def favicon():\n    return FileResponse(gradio_app._favicon)\n\n\n@app.route(\"/logout\")\nasync def logout(request: Request):\n    request.session.pop(\"user\", None)\n    return RedirectResponse(url=\"/\")\n\n\n@app.route(\"/login\")\nasync def login(request: Request):\n    root_url = gr.route_utils.get_root_url(request, \"/login\", None)\n    redirect_uri = f\"{root_url}/auth\"\n    return await oauth.google.authorize_redirect(request, redirect_uri)\n\n\n@app.route(\"/auth\")\nasync def auth(request: Request):\n    try:\n        access_token = await oauth.google.authorize_access_token(request)\n    except OAuthError:\n        return RedirectResponse(url=\"/\")\n    request.session[\"user\"] = dict(access_token)[\"userinfo\"]\n    return RedirectResponse(url=\"/\")\n\n\napp = gr.mount_gradio_app(\n    app,\n    main_demo,\n    path=\"/app\",\n    allowed_paths=[\n        \"libs/ktem/ktem/assets\",\n        GRADIO_TEMP_DIR,\n    ],\n)\n"
  },
  {
    "path": "templates/component-default/README.md",
    "content": ""
  },
  {
    "path": "templates/project-default/cookiecutter.json",
    "content": "{\n    \"project_name\": \"prj_kotaemon\",\n    \"ptl\": \"john\"\n}\n"
  },
  {
    "path": "templates/project-default/{{cookiecutter.project_name}}/.gitattributes",
    "content": ".gitattributes text eol=lf\n.gitignore text eol=lf\n*.build text eol=lf\n*.c text eol=lf\n*.cmake text eol=lf\n*.cpp text eol=lf\n*.csv text eol=lf\n*.f text eol=lf\n*.f90 text eol=lf\n*.for text eol=lf\n*.grc text eol=lf\n*.h text eol=lf\n*.ipynb text eol=lf\n*.m text eol=lf\n*.md text eol=lf\n*.pas text eol=lf\n*.py text eol=lf\n*.rst text eol=lf\n*.sh text eol=lf\n*.txt text eol=lf\n*.yml text eol=lf\nMakefile text eol=lf\n*.html linguist-documentation\n"
  },
  {
    "path": "templates/project-default/{{cookiecutter.project_name}}/.gitignore",
    "content": "# Created by https://www.toptal.com/developers/gitignore/api/python,linux,macos,windows,vim,emacs,visualstudiocode,pycharm\n# Edit at https://www.toptal.com/developers/gitignore?templates=python,linux,macos,windows,vim,emacs,visualstudiocode,pycharm\n\n### Emacs ###\n# -*- mode: gitignore; -*-\n*~\n\\#*\\#\n/.emacs.desktop\n/.emacs.desktop.lock\n*.elc\nauto-save-list\ntramp\n.\\#*\n\n# Org-mode\n.org-id-locations\n*_archive\n\n# flymake-mode\n*_flymake.*\n\n# eshell files\n/eshell/history\n/eshell/lastdir\n\n# elpa packages\n/elpa/\n\n# reftex files\n*.rel\n\n# AUCTeX auto folder\n/auto/\n\n# cask packages\n.cask/\ndist/\n\n# Flycheck\nflycheck_*.el\n\n# server auth directory\n/server/\n\n# projectiles files\n.projectile\n\n# directory configuration\n.dir-locals.el\n\n# network security\n/network-security.data\n\n### Linux ###\n\n# temporary files which can be created if a process still has a handle open of a deleted file\n.fuse_hidden*\n\n# KDE directory preferences\n.directory\n\n# Linux trash folder which might appear on any partition or disk\n.Trash-*\n\n# .nfs files are created when an open file is removed but is still being accessed\n.nfs*\n\n### macOS ###\n# General\n.DS_Store\n.AppleDouble\n.LSOverride\n\n# Icon must end with two \\r\nIcon\n\n# Thumbnails\n._*\n\n# Files that might appear in the root of a volume\n.DocumentRevisions-V100\n.fseventsd\n.Spotlight-V100\n.TemporaryItems\n.Trashes\n.VolumeIcon.icns\n.com.apple.timemachine.donotpresent\n\n# Directories potentially created on remote AFP share\n.AppleDB\n.AppleDesktop\nNetwork Trash Folder\nTemporary Items\n.apdisk\n\n### macOS Patch ###\n# iCloud generated files\n*.icloud\n\n### PyCharm ###\n# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider\n# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839\n\n# User-specific stuff\n.idea/**/workspace.xml\n.idea/**/tasks.xml\n.idea/**/usage.statistics.xml\n.idea/**/dictionaries\n.idea/**/shelf\n\n# AWS User-specific\n.idea/**/aws.xml\n\n# Generated files\n.idea/**/contentModel.xml\n\n# Sensitive or high-churn files\n.idea/**/dataSources/\n.idea/**/dataSources.ids\n.idea/**/dataSources.local.xml\n.idea/**/sqlDataSources.xml\n.idea/**/dynamic.xml\n.idea/**/uiDesigner.xml\n.idea/**/dbnavigator.xml\n\n# Gradle\n.idea/**/gradle.xml\n.idea/**/libraries\n\n# Gradle and Maven with auto-import\n# When using Gradle or Maven with auto-import, you should exclude module files,\n# since they will be recreated, and may cause churn.  Uncomment if using\n# auto-import.\n# .idea/artifacts\n# .idea/compiler.xml\n# .idea/jarRepositories.xml\n# .idea/modules.xml\n# .idea/*.iml\n# .idea/modules\n# *.iml\n# *.ipr\n\n# CMake\ncmake-build-*/\n\n# Mongo Explorer plugin\n.idea/**/mongoSettings.xml\n\n# File-based project format\n*.iws\n\n# IntelliJ\nout/\n\n# mpeltonen/sbt-idea plugin\n.idea_modules/\n\n# JIRA plugin\natlassian-ide-plugin.xml\n\n# Cursive Clojure plugin\n.idea/replstate.xml\n\n# SonarLint plugin\n.idea/sonarlint/\n\n# Crashlytics plugin (for Android Studio and IntelliJ)\ncom_crashlytics_export_strings.xml\ncrashlytics.properties\ncrashlytics-build.properties\nfabric.properties\n\n# Editor-based Rest Client\n.idea/httpRequests\n\n# Android studio 3.1+ serialized cache file\n.idea/caches/build_file_checksums.ser\n\n### PyCharm Patch ###\n# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721\n\n# *.iml\n# modules.xml\n# .idea/misc.xml\n# *.ipr\n\n# Sonarlint plugin\n# https://plugins.jetbrains.com/plugin/7973-sonarlint\n.idea/**/sonarlint/\n\n# SonarQube Plugin\n# https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin\n.idea/**/sonarIssues.xml\n\n# Markdown Navigator plugin\n# https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced\n.idea/**/markdown-navigator.xml\n.idea/**/markdown-navigator-enh.xml\n.idea/**/markdown-navigator/\n\n# Cache file creation bug\n# See https://youtrack.jetbrains.com/issue/JBR-2257\n.idea/$CACHE_FILE$\n\n# CodeStream plugin\n# https://plugins.jetbrains.com/plugin/12206-codestream\n.idea/codestream.xml\n\n# Azure Toolkit for IntelliJ plugin\n# https://plugins.jetbrains.com/plugin/8053-azure-toolkit-for-intellij\n.idea/**/azureSettings.xml\n\n### Python ###\n# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packaging\n.Python\nbuild/\ndevelop-eggs/\ndownloads/\neggs/\n.eggs/\nlib/\nlib64/\nparts/\nsdist/\nvar/\nwheels/\nshare/python-wheels/\n*.egg-info/\n.installed.cfg\n*.egg\nMANIFEST\n\n# PyInstaller\n#  Usually these files are written by a python script from a template\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\n*.manifest\n*.spec\n\n# Installer logs\npip-log.txt\npip-delete-this-directory.txt\n\n# Unit test / coverage reports\nhtmlcov/\n.tox/\n.nox/\n.coverage\n.coverage.*\n.cache\nnosetests.xml\ncoverage.xml\n*.cover\n*.py,cover\n.hypothesis/\n.pytest_cache/\ncover/\n\n# Translations\n*.mo\n*.pot\n\n# Django stuff:\n*.log\nlocal_settings.py\ndb.sqlite3\ndb.sqlite3-journal\n\n# Flask stuff:\ninstance/\n.webassets-cache\n\n# Scrapy stuff:\n.scrapy\n\n# Sphinx documentation\ndocs/_build/\n\n# PyBuilder\n.pybuilder/\ntarget/\n\n# Jupyter Notebook\n.ipynb_checkpoints\n\n# IPython\nprofile_default/\nipython_config.py\n\n# pyenv\n#   For a library or package, you might want to ignore these files since the code is\n#   intended to run in multiple environments; otherwise, check them in:\n# .python-version\n\n# pipenv\n#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.\n#   However, in case of collaboration, if having platform-specific dependencies or dependencies\n#   having no cross-platform support, pipenv may install dependencies that don't work, or not\n#   install all needed dependencies.\n#Pipfile.lock\n\n# poetry\n#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.\n#   This is especially recommended for binary packages to ensure reproducibility, and is more\n#   commonly ignored for libraries.\n#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control\n#poetry.lock\n\n# pdm\n#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.\n#pdm.lock\n#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it\n#   in version control.\n#   https://pdm.fming.dev/#use-with-ide\n.pdm.toml\n\n# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm\n__pypackages__/\n\n# Celery stuff\ncelerybeat-schedule\ncelerybeat.pid\n\n# SageMath parsed files\n*.sage.py\n\n# Environments\n.env\n.venv\nenv/\nvenv/\nENV/\nenv.bak/\nvenv.bak/\n\n# Spyder project settings\n.spyderproject\n.spyproject\n\n# Rope project settings\n.ropeproject\n\n# mkdocs documentation\n/site\n\n# mypy\n.mypy_cache/\n.dmypy.json\ndmypy.json\n\n# Pyre type checker\n.pyre/\n\n# pytype static type analyzer\n.pytype/\n\n# Cython debug symbols\ncython_debug/\n\n# PyCharm\n#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can\n#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore\n#  and can be added to the global gitignore or merged into this file.  For a more nuclear\n#  option (not recommended) you can uncomment the following to ignore the entire idea folder.\n#.idea/\n\n### Python Patch ###\n# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration\npoetry.toml\n\n# ruff\n.ruff_cache/\n\n# LSP config files\npyrightconfig.json\n\n### Vim ###\n# Swap\n[._]*.s[a-v][a-z]\n!*.svg # comment out if you don't need vector files\n[._]*.sw[a-p]\n[._]s[a-rt-v][a-z]\n[._]ss[a-gi-z]\n[._]sw[a-p]\n\n# Session\nSession.vim\nSessionx.vim\n\n# Temporary\n.netrwhist\n# Auto-generated tag files\ntags\n# Persistent undo\n[._]*.un~\n\n### VisualStudioCode ###\n.vscode/*\n!.vscode/settings.json\n!.vscode/tasks.json\n!.vscode/launch.json\n!.vscode/extensions.json\n!.vscode/*.code-snippets\n\n# Local History for Visual Studio Code\n.history/\n\n# Built Visual Studio Code Extensions\n*.vsix\n\n### VisualStudioCode Patch ###\n# Ignore all local history of files\n.history\n.ionide\n\n### Windows ###\n# Windows thumbnail cache files\nThumbs.db\nThumbs.db:encryptable\nehthumbs.db\nehthumbs_vista.db\n\n# Dump file\n*.stackdump\n\n# Folder config file\n[Dd]esktop.ini\n\n# Recycle Bin used on file shares\n$RECYCLE.BIN/\n\n# Windows Installer files\n*.cab\n*.msi\n*.msix\n*.msm\n*.msp\n\n# Windows shortcuts\n*.lnk\n\n.theflow/\n\n# End of https://www.toptal.com/developers/gitignore/api/python,linux,macos,windows,vim,emacs,visualstudiocode,pycharm\n\nlogs/\n.gitsecret/keys/random_seed\n!*.secret\ncredentials.txt\n\nS.gpg-agent*\n.vscode/settings.json\n"
  },
  {
    "path": "templates/project-default/{{cookiecutter.project_name}}/.pre-commit-config.yaml",
    "content": "repos:\n  - repo: https://github.com/pre-commit/pre-commit-hooks\n    rev: v4.3.0\n    hooks:\n      - id: check-yaml\n      - id: check-toml\n      - id: end-of-file-fixer\n      - id: trailing-whitespace\n      - id: detect-aws-credentials\n        args: [\"--allow-missing-credentials\"]\n      - id: detect-private-key\n      - id: check-added-large-files\n  - repo: https://github.com/ambv/black\n    rev: 22.3.0\n    hooks:\n      - id: black\n        language_version: python3\n  - repo: https://github.com/pycqa/isort\n    rev: 5.12.0\n    hooks:\n      - id: isort\n        args: [\"--profile\", \"black\"]\n        language_version: python3.10\n  - repo: https://github.com/pycqa/flake8\n    rev: 4.0.1\n    hooks:\n      - id: flake8\n        args: [\"--max-line-length\", \"88\", \"--extend-ignore\", \"E203\"]\n  - repo: https://github.com/myint/autoflake\n    rev: v1.4\n    hooks:\n      - id: autoflake\n        args:\n          [\n            \"--in-place\",\n            \"--remove-unused-variables\",\n            \"--remove-all-unused-imports\",\n            \"--ignore-init-module-imports\",\n            \"--exclude=tests/*\",\n          ]\n  - repo: https://github.com/pre-commit/mirrors-prettier\n    rev: v2.7.1\n    hooks:\n      - id: prettier\n        types_or: [markdown, yaml]\n  - repo: https://github.com/pre-commit/mirrors-mypy\n    rev: \"v1.5.1\"\n    hooks:\n      - id: mypy\n        additional_dependencies: [types-PyYAML==6.0.12.11, \"types-requests\"]\n        args: [\"--check-untyped-defs\", \"--ignore-missing-imports\"]\n"
  },
  {
    "path": "templates/project-default/{{cookiecutter.project_name}}/README.md",
    "content": "<div align=\"center\">\n\n# Project {{ cookiecutter.project_name }}\n\n[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/Cinnamon/kotaemon)\n\n</div>\n\n# Install\n\n```bash\n# Create new conda env (optional)\nconda create -n {{ cookiecutter.project_name }} python=3.10\nconda activate {{ cookiecutter.project_name }}\n\n# Clone and install the project\ngit clone \"<{{ cookiecutter.project_name }}-repo>\"\ncd \"<{{ cookiecutter.project_name }}-repo>\"\npip install -e .\n\n# Generate the project structure\ncd ..\nkh start-project\n```\n\n# Usage\n\n- Build the pipeline in `pipeline.py`\n\nFor supported utilities and tools, refer: https://github.com/Cinnamon/kotaemon/wiki/Utilities\n\n# Contribute\n\n- For project issues and errors, please report in this repo issues.\n- For kotaemon issues and errors, please report or make PR fixes in https://github.com/Cinnamon/kotaemon.git\n- If the template for this project has issues and errors, please report or make\n  PR fixes in https://github.com/Cinnamon/kotaemon/tree/main/templates/project-default\n"
  },
  {
    "path": "templates/project-default/{{cookiecutter.project_name}}/setup.py",
    "content": "import setuptools\n\nsetuptools.setup(\n    name=\"{{ cookiecutter.project_name }}\",\n    version=\"0.0.1\",\n    author=\"{{ cookiecutter.ptl }}\",\n    author_email=\"{{ cookiecutter.ptl }}@cinnamon.is\",\n    description=\"Project {{ cookiecutter.project_name }}\",\n    long_description=\"Project {{ cookiecutter.project_name }}\",\n    url=\"https://github.com/Cinnamon/kotaemon\",\n    python_requires=\">=3\",\n    classifiers=[\n        \"Programming Language :: Python :: 3\",\n        \"License :: OSI Approved :: MIT License\",\n        \"Operating System :: OS Independent\",\n    ],\n    install_requires=[\n        \"kotaemon@git+ssh://git@github.com/Cinnamon/kotaemon.git\",\n    ],\n)\n"
  },
  {
    "path": "templates/project-default/{{cookiecutter.project_name}}/tests/__init__.py",
    "content": ""
  },
  {
    "path": "templates/project-default/{{cookiecutter.project_name}}/{{cookiecutter.project_name}}/__init__.py",
    "content": ""
  },
  {
    "path": "templates/project-default/{{cookiecutter.project_name}}/{{cookiecutter.project_name}}/pipeline.py",
    "content": "import os\nfrom typing import List\n\nfrom kotaemon.base import BaseComponent, Document, LLMInterface, Node, Param, lazy\nfrom kotaemon.contribs.promptui.logs import ResultLog\nfrom kotaemon.embeddings import LCAzureOpenAIEmbeddings\nfrom kotaemon.indices import VectorIndexing, VectorRetrieval\nfrom kotaemon.llms import LCAzureChatOpenAI\nfrom kotaemon.storages import ChromaVectorStore, SimpleFileDocumentStore\n\n\nclass QAResultLog(ResultLog):\n    @staticmethod\n    def _get_prompt(obj):\n        return obj[\"prompt\"]\n\n\nclass QuestionAnsweringPipeline(BaseComponent):\n\n    _promptui_resultlog = QAResultLog\n    _promptui_outputs: list = [\n        {\n            \"step\": \".prompt\",\n            \"getter\": \"_get_prompt\",\n            \"component\": \"text\",\n            \"params\": {\"label\": \"Constructed prompt to LLM\"},\n        },\n        {\n            \"step\": \".\",\n            \"getter\": \"_get_output\",\n            \"component\": \"text\",\n            \"params\": {\"label\": \"Answer\"},\n        },\n    ]\n\n    retrieval_top_k: int = 1\n    llm: LCAzureChatOpenAI = LCAzureChatOpenAI.withx(\n        azure_endpoint=\"https://bleh-dummy-2.openai.azure.com/\",\n        openai_api_key=os.environ.get(\"OPENAI_API_KEY\", \"default-key\"),\n        openai_api_version=\"2023-03-15-preview\",\n        deployment_name=\"dummy-q2-gpt35\",\n        temperature=0,\n        request_timeout=60,\n    )\n\n    retrieving_pipeline: VectorRetrieval = Node(\n        VectorRetrieval.withx(\n            vector_store=lazy(ChromaVectorStore).withx(path=\"./tmp\"),\n            doc_store=lazy(SimpleFileDocumentStore).withx(path=\"docstore.json\"),\n            embedding=LCAzureOpenAIEmbeddings.withx(\n                model=\"text-embedding-ada-002\",\n                deployment=\"dummy-q2-text-embedding\",\n                azure_endpoint=\"https://bleh-dummy-2.openai.azure.com/\",\n                openai_api_key=os.environ.get(\"OPENAI_API_KEY\", \"default-key\"),\n            ),\n        ),\n        ignore_ui=True,\n    )\n\n    def run(self, text: str) -> LLMInterface:\n        # retrieve relevant documents as context\n        matched_texts: List[str] = [\n            _.text\n            for _ in self.retrieving_pipeline(text, top_k=int(self.retrieval_top_k))\n        ]\n        context = \"\\n\".join(matched_texts)\n\n        # generate the answer\n        prompt = f'Answer the following question: \"{text}\". The context is: \\n{context}'\n        self.log_progress(\".prompt\", prompt=prompt)\n\n        return self.llm(prompt).text\n\n\nclass IndexingPipeline(VectorIndexing):\n\n    vector_store: ChromaVectorStore = Param(\n        lazy(ChromaVectorStore).withx(path=\"./tmp\"),\n        ignore_ui=True,\n    )\n    doc_store: SimpleFileDocumentStore = Param(\n        lazy(SimpleFileDocumentStore).withx(path=\"docstore.json\"),\n        ignore_ui=True,\n    )\n    embedding: LCAzureOpenAIEmbeddings = LCAzureOpenAIEmbeddings.withx(\n        model=\"text-embedding-ada-002\",\n        deployment=\"dummy-q2-text-embedding\",\n        azure_endpoint=\"https://bleh-dummy-2.openai.azure.com/\",\n        openai_api_key=os.environ.get(\"OPENAI_API_KEY\", \"default-key\"),\n    )\n\n    def run(self, text: str) -> Document:\n        \"\"\"Normally, this indexing pipeline returns nothing. For demonstration,\n        we want it to return something, so let's return the number of documents\n        in the vector store\n        \"\"\"\n        super().run(text)\n\n        if self.doc_store is not None:\n            # persist to local anytime an indexing is created\n            # this can be bypassed when we have a FileDocumentStore\n            self.doc_store.save(\"docstore.json\")\n\n        return Document(self.vector_store._collection.count())\n"
  }
]